diff options
Diffstat (limited to 'net')
168 files changed, 6558 insertions, 5337 deletions
diff --git a/net/802/mrp.c b/net/802/mrp.c index 1eb05d8..3ed6162 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -24,6 +24,11 @@ static unsigned int mrp_join_time __read_mostly = 200; module_param(mrp_join_time, uint, 0644); MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)"); + +static unsigned int mrp_periodic_time __read_mostly = 1000; +module_param(mrp_periodic_time, uint, 0644); +MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)"); + MODULE_LICENSE("GPL"); static const u8 @@ -595,6 +600,24 @@ static void mrp_join_timer(unsigned long data) mrp_join_timer_arm(app); } +static void mrp_periodic_timer_arm(struct mrp_applicant *app) +{ + mod_timer(&app->periodic_timer, + jiffies + msecs_to_jiffies(mrp_periodic_time)); +} + +static void mrp_periodic_timer(unsigned long data) +{ + struct mrp_applicant *app = (struct mrp_applicant *)data; + + spin_lock(&app->lock); + mrp_mad_event(app, MRP_EVENT_PERIODIC); + mrp_pdu_queue(app); + spin_unlock(&app->lock); + + mrp_periodic_timer_arm(app); +} + static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) { __be16 endmark; @@ -845,6 +868,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl) rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); mrp_join_timer_arm(app); + setup_timer(&app->periodic_timer, mrp_periodic_timer, + (unsigned long)app); + mrp_periodic_timer_arm(app); return 0; err3: @@ -870,6 +896,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) * all pending messages before the applicant is gone. */ del_timer_sync(&app->join_timer); + del_timer_sync(&app->periodic_timer); spin_lock_bh(&app->lock); mrp_mad_event(app, MRP_EVENT_TX); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 61fc573..b3d17d1 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -98,14 +98,14 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) vlan_gvrp_request_leave(dev); vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, NULL); + + netdev_upper_dev_unlink(real_dev, dev); /* Because unregister_netdevice_queue() makes sure at least one rcu * grace period is respected before device freeing, * we dont need to call synchronize_net() here. */ unregister_netdevice_queue(dev, head); - netdev_upper_dev_unlink(real_dev, dev); - if (grp->nr_vlan_devs == 0) { vlan_mvrp_uninit_applicant(real_dev); vlan_gvrp_uninit_applicant(real_dev); @@ -169,13 +169,13 @@ int register_vlan_dev(struct net_device *dev) if (err < 0) goto out_uninit_mvrp; - err = netdev_upper_dev_link(real_dev, dev); - if (err) - goto out_uninit_mvrp; - err = register_netdevice(dev); if (err < 0) - goto out_upper_dev_unlink; + goto out_uninit_mvrp; + + err = netdev_upper_dev_link(real_dev, dev); + if (err) + goto out_unregister_netdev; /* Account for reference in struct vlan_dev_priv */ dev_hold(real_dev); @@ -191,8 +191,8 @@ int register_vlan_dev(struct net_device *dev) return 0; -out_upper_dev_unlink: - netdev_upper_dev_unlink(real_dev, dev); +out_unregister_netdev: + unregister_netdevice(dev); out_uninit_mvrp: if (grp->nr_vlan_devs == 0) vlan_mvrp_uninit_applicant(real_dev); diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 3091297..c7e634a 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -171,7 +171,7 @@ static size_t vlan_get_size(const struct net_device *dev) return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */ nla_total_size(2) + /* IFLA_VLAN_ID */ - sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */ + nla_total_size(sizeof(struct ifla_vlan_flags)) + /* IFLA_VLAN_FLAGS */ vlan_qos_map_size(vlan->nr_ingress_mappings) + vlan_qos_map_size(vlan->nr_egress_mappings); } diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 489bb36..8ddbfe6 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -38,4 +38,3 @@ batman-adv-y += soft-interface.o batman-adv-y += sysfs.o batman-adv-y += translation-table.o batman-adv-y += unicast.o -batman-adv-y += vis.o diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 0a8a80c..97b42d3 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -135,9 +135,8 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) batadv_ogm_packet->header.version = BATADV_COMPAT_VERSION; batadv_ogm_packet->header.ttl = 2; batadv_ogm_packet->flags = BATADV_NO_FLAGS; + batadv_ogm_packet->reserved = 0; batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE; - batadv_ogm_packet->tt_num_changes = 0; - batadv_ogm_packet->ttvn = 0; res = 0; @@ -207,12 +206,12 @@ static uint8_t batadv_hop_penalty(uint8_t tq, /* is there another aggregated packet here? */ static int batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len, - int tt_num_changes) + __be16 tvlv_len) { int next_buff_pos = 0; next_buff_pos += buff_pos + BATADV_OGM_HLEN; - next_buff_pos += batadv_tt_len(tt_num_changes); + next_buff_pos += ntohs(tvlv_len); return (next_buff_pos <= packet_len) && (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES); @@ -240,7 +239,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, /* adjust all flags and log packets */ while (batadv_iv_ogm_aggr_packet(buff_pos, forw_packet->packet_len, - batadv_ogm_packet->tt_num_changes)) { + batadv_ogm_packet->tvlv_len)) { /* we might have aggregated direct link packets with an * ordinary base packet */ @@ -256,18 +255,18 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, fwd_str = "Sending own"; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "%s %spacket (originator %pM, seqno %u, TQ %d, TTL %d, IDF %s, ttvn %d) on interface %s [%pM]\n", + "%s %spacket (originator %pM, seqno %u, TQ %d, TTL %d, IDF %s) on interface %s [%pM]\n", fwd_str, (packet_num > 0 ? "aggregated " : ""), batadv_ogm_packet->orig, ntohl(batadv_ogm_packet->seqno), batadv_ogm_packet->tq, batadv_ogm_packet->header.ttl, (batadv_ogm_packet->flags & BATADV_DIRECTLINK ? "on" : "off"), - batadv_ogm_packet->ttvn, hard_iface->net_dev->name, + hard_iface->net_dev->name, hard_iface->net_dev->dev_addr); buff_pos += BATADV_OGM_HLEN; - buff_pos += batadv_tt_len(batadv_ogm_packet->tt_num_changes); + buff_pos += ntohs(batadv_ogm_packet->tvlv_len); packet_num++; packet_pos = forw_packet->skb->data + buff_pos; batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; @@ -601,7 +600,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_incoming) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - uint8_t tt_num_changes; + uint16_t tvlv_len; if (batadv_ogm_packet->header.ttl <= 1) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n"); @@ -621,7 +620,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, return; } - tt_num_changes = batadv_ogm_packet->tt_num_changes; + tvlv_len = ntohs(batadv_ogm_packet->tvlv_len); batadv_ogm_packet->header.ttl--; memcpy(batadv_ogm_packet->prev_sender, ethhdr->h_source, ETH_ALEN); @@ -642,7 +641,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, batadv_ogm_packet->flags &= ~BATADV_DIRECTLINK; batadv_iv_ogm_queue_add(bat_priv, (unsigned char *)batadv_ogm_packet, - BATADV_OGM_HLEN + batadv_tt_len(tt_num_changes), + BATADV_OGM_HLEN + tvlv_len, if_incoming, 0, batadv_iv_ogm_fwd_send_time()); } @@ -688,43 +687,29 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) struct batadv_ogm_packet *batadv_ogm_packet; struct batadv_hard_iface *primary_if; int *ogm_buff_len = &hard_iface->bat_iv.ogm_buff_len; - int vis_server, tt_num_changes = 0; uint32_t seqno; - uint8_t bandwidth; + uint16_t tvlv_len = 0; - vis_server = atomic_read(&bat_priv->vis_mode); primary_if = batadv_primary_if_get_selected(bat_priv); - if (hard_iface == primary_if) - tt_num_changes = batadv_tt_append_diff(bat_priv, ogm_buff, - ogm_buff_len, - BATADV_OGM_HLEN); + if (hard_iface == primary_if) { + /* tt changes have to be committed before the tvlv data is + * appended as it may alter the tt tvlv container + */ + batadv_tt_local_commit_changes(bat_priv); + tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff, + ogm_buff_len, + BATADV_OGM_HLEN); + } batadv_ogm_packet = (struct batadv_ogm_packet *)(*ogm_buff); + batadv_ogm_packet->tvlv_len = htons(tvlv_len); /* change sequence number to network order */ seqno = (uint32_t)atomic_read(&hard_iface->bat_iv.ogm_seqno); batadv_ogm_packet->seqno = htonl(seqno); atomic_inc(&hard_iface->bat_iv.ogm_seqno); - batadv_ogm_packet->ttvn = atomic_read(&bat_priv->tt.vn); - batadv_ogm_packet->tt_crc = htons(bat_priv->tt.local_crc); - if (tt_num_changes >= 0) - batadv_ogm_packet->tt_num_changes = tt_num_changes; - - if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC) - batadv_ogm_packet->flags |= BATADV_VIS_SERVER; - else - batadv_ogm_packet->flags &= ~BATADV_VIS_SERVER; - - if (hard_iface == primary_if && - atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_SERVER) { - bandwidth = (uint8_t)atomic_read(&bat_priv->gw_bandwidth); - batadv_ogm_packet->gw_flags = bandwidth; - } else { - batadv_ogm_packet->gw_flags = BATADV_NO_FLAGS; - } - batadv_iv_ogm_slide_own_bcast_window(hard_iface); batadv_iv_ogm_queue_add(bat_priv, hard_iface->bat_iv.ogm_buff, hard_iface->bat_iv.ogm_buff_len, hard_iface, 1, @@ -798,7 +783,6 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, rcu_read_unlock(); - orig_node->flags = batadv_ogm_packet->flags; neigh_node->last_seen = jiffies; spin_lock_bh(&neigh_node->lq_update_lock); @@ -820,11 +804,11 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, */ router = batadv_orig_node_get_router(orig_node); if (router == neigh_node) - goto update_tt; + goto out; /* if this neighbor does not offer a better TQ we won't consider it */ if (router && (router->tq_avg > neigh_node->tq_avg)) - goto update_tt; + goto out; /* if the TQ is the same and the link not more symmetric we * won't consider it either @@ -843,35 +827,10 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); if (sum_orig >= sum_neigh) - goto update_tt; + goto out; } batadv_update_route(bat_priv, orig_node, neigh_node); - -update_tt: - /* I have to check for transtable changes only if the OGM has been - * sent through a primary interface - */ - if (((batadv_ogm_packet->orig != ethhdr->h_source) && - (batadv_ogm_packet->header.ttl > 2)) || - (batadv_ogm_packet->flags & BATADV_PRIMARIES_FIRST_HOP)) - batadv_tt_update_orig(bat_priv, orig_node, tt_buff, - batadv_ogm_packet->tt_num_changes, - batadv_ogm_packet->ttvn, - ntohs(batadv_ogm_packet->tt_crc)); - - if (orig_node->gw_flags != batadv_ogm_packet->gw_flags) - batadv_gw_node_update(bat_priv, orig_node, - batadv_ogm_packet->gw_flags); - - orig_node->gw_flags = batadv_ogm_packet->gw_flags; - - /* restart gateway selection if fast or late switching was enabled */ - if ((orig_node->gw_flags) && - (atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_CLIENT) && - (atomic_read(&bat_priv->gw_sel_class) > 2)) - batadv_gw_check_election(bat_priv, orig_node); - goto out; unlock: @@ -1122,13 +1081,11 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, is_single_hop_neigh = true; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, ttvn %u, crc %#.4x, changes %u, tq %d, TTL %d, V %d, IDF %d)\n", + "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, tq %d, TTL %d, V %d, IDF %d)\n", ethhdr->h_source, if_incoming->net_dev->name, if_incoming->net_dev->dev_addr, batadv_ogm_packet->orig, batadv_ogm_packet->prev_sender, - ntohl(batadv_ogm_packet->seqno), batadv_ogm_packet->ttvn, - ntohs(batadv_ogm_packet->tt_crc), - batadv_ogm_packet->tt_num_changes, batadv_ogm_packet->tq, + ntohl(batadv_ogm_packet->seqno), batadv_ogm_packet->tq, batadv_ogm_packet->header.ttl, batadv_ogm_packet->header.version, has_directlink_flag); @@ -1254,6 +1211,8 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, goto out; } + batadv_tvlv_ogm_receive(bat_priv, batadv_ogm_packet, orig_node); + /* if sender is a direct neighbor the sender mac equals * originator mac */ @@ -1350,9 +1309,9 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, struct batadv_ogm_packet *batadv_ogm_packet; struct ethhdr *ethhdr; int buff_pos = 0, packet_len; - unsigned char *tt_buff, *packet_buff; - bool ret; + unsigned char *tvlv_buff, *packet_buff; uint8_t *packet_pos; + bool ret; ret = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN); if (!ret) @@ -1375,14 +1334,14 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, /* unpack the aggregated packets and process them one by one */ while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len, - batadv_ogm_packet->tt_num_changes)) { - tt_buff = packet_buff + buff_pos + BATADV_OGM_HLEN; + batadv_ogm_packet->tvlv_len)) { + tvlv_buff = packet_buff + buff_pos + BATADV_OGM_HLEN; - batadv_iv_ogm_process(ethhdr, batadv_ogm_packet, tt_buff, - if_incoming); + batadv_iv_ogm_process(ethhdr, batadv_ogm_packet, + tvlv_buff, if_incoming); buff_pos += BATADV_OGM_HLEN; - buff_pos += batadv_tt_len(batadv_ogm_packet->tt_num_changes); + buff_pos += ntohs(batadv_ogm_packet->tvlv_len); packet_pos = packet_buff + buff_pos; batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index f186a55..049a7a2 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -28,7 +28,6 @@ #include "gateway_common.h" #include "gateway_client.h" #include "soft-interface.h" -#include "vis.h" #include "icmp_socket.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" @@ -300,12 +299,6 @@ static int batadv_transtable_local_open(struct inode *inode, struct file *file) return single_open(file, batadv_tt_local_seq_print_text, net_dev); } -static int batadv_vis_data_open(struct inode *inode, struct file *file) -{ - struct net_device *net_dev = (struct net_device *)inode->i_private; - return single_open(file, batadv_vis_seq_print_text, net_dev); -} - struct batadv_debuginfo { struct attribute attr; const struct file_operations fops; @@ -356,7 +349,6 @@ static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open); #endif static BATADV_DEBUGINFO(transtable_local, S_IRUGO, batadv_transtable_local_open); -static BATADV_DEBUGINFO(vis_data, S_IRUGO, batadv_vis_data_open); #ifdef CONFIG_BATMAN_ADV_NC static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open); #endif @@ -373,7 +365,6 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { &batadv_debuginfo_dat_cache, #endif &batadv_debuginfo_transtable_local, - &batadv_debuginfo_vis_data, #ifdef CONFIG_BATMAN_ADV_NC &batadv_debuginfo_nc_nodes, #endif diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 06345d4..f07ec32 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -419,6 +419,10 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, bool ret = false; int j; + /* check if orig node candidate is running DAT */ + if (!(candidate->capabilities & BATADV_ORIG_CAPA_HAS_DAT)) + goto out; + /* Check if this node has already been selected... */ for (j = 0; j < select; j++) if (res[j].orig_node == candidate) @@ -626,6 +630,59 @@ out: } /** + * batadv_dat_tvlv_container_update - update the dat tvlv container after dat + * setting change + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_dat_tvlv_container_update(struct batadv_priv *bat_priv) +{ + char dat_mode; + + dat_mode = atomic_read(&bat_priv->distributed_arp_table); + + switch (dat_mode) { + case 0: + batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_DAT, 1); + break; + case 1: + batadv_tvlv_container_register(bat_priv, BATADV_TVLV_DAT, 1, + NULL, 0); + break; + } +} + +/** + * batadv_dat_status_update - update the dat tvlv container after dat + * setting change + * @net_dev: the soft interface net device + */ +void batadv_dat_status_update(struct net_device *net_dev) +{ + struct batadv_priv *bat_priv = netdev_priv(net_dev); + batadv_dat_tvlv_container_update(bat_priv); +} + +/** + * batadv_gw_tvlv_ogm_handler_v1 - process incoming dat tvlv container + * @bat_priv: the bat priv with all the soft interface information + * @orig: the orig_node of the ogm + * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) + * @tvlv_value: tvlv buffer containing the gateway data + * @tvlv_value_len: tvlv buffer length + */ +static void batadv_dat_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + uint8_t flags, + void *tvlv_value, + uint16_t tvlv_value_len) +{ + if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) + orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_DAT; + else + orig->capabilities |= BATADV_ORIG_CAPA_HAS_DAT; +} + +/** * batadv_dat_hash_free - free the local DAT hash table * @bat_priv: the bat priv with all the soft interface information */ @@ -657,6 +714,10 @@ int batadv_dat_init(struct batadv_priv *bat_priv) batadv_dat_start_timer(bat_priv); + batadv_tvlv_handler_register(bat_priv, batadv_dat_tvlv_ogm_handler_v1, + NULL, BATADV_TVLV_DAT, 1, + BATADV_TVLV_HANDLER_OGM_CIFNOTFND); + batadv_dat_tvlv_container_update(bat_priv); return 0; } @@ -666,6 +727,9 @@ int batadv_dat_init(struct batadv_priv *bat_priv) */ void batadv_dat_free(struct batadv_priv *bat_priv) { + batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_DAT, 1); + batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_DAT, 1); + cancel_delayed_work_sync(&bat_priv->dat.work); batadv_dat_hash_free(bat_priv); diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index 125c8c6..60d853b 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -29,6 +29,7 @@ #define BATADV_DAT_ADDR_MAX ((batadv_dat_addr_t)~(batadv_dat_addr_t)0) +void batadv_dat_status_update(struct net_device *net_dev); bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, struct sk_buff *skb); bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, @@ -98,6 +99,10 @@ static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv, #else +static inline void batadv_dat_status_update(struct net_device *net_dev) +{ +} + static inline bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, struct sk_buff *skb) diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 1ce4b87..1bce63aa 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -118,7 +118,6 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) uint32_t max_gw_factor = 0, tmp_gw_factor = 0; uint32_t gw_divisor; uint8_t max_tq = 0; - int down, up; uint8_t tq_avg; struct batadv_orig_node *orig_node; @@ -142,10 +141,9 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) switch (atomic_read(&bat_priv->gw_sel_class)) { case 1: /* fast connection */ - batadv_gw_bandwidth_to_kbit(orig_node->gw_flags, - &down, &up); - - tmp_gw_factor = tq_avg * tq_avg * down * 100 * 100; + tmp_gw_factor = tq_avg * tq_avg; + tmp_gw_factor *= gw_node->bandwidth_down; + tmp_gw_factor *= 100 * 100; tmp_gw_factor /= gw_divisor; if ((tmp_gw_factor > max_gw_factor) || @@ -258,16 +256,22 @@ void batadv_gw_election(struct batadv_priv *bat_priv) NULL); } else if ((!curr_gw) && (next_gw)) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Adding route to gateway %pM (gw_flags: %i, tq: %i)\n", + "Adding route to gateway %pM (bandwidth: %u.%u/%u.%u MBit, tq: %i)\n", next_gw->orig_node->orig, - next_gw->orig_node->gw_flags, router->tq_avg); + next_gw->bandwidth_down / 10, + next_gw->bandwidth_down % 10, + next_gw->bandwidth_up / 10, + next_gw->bandwidth_up % 10, router->tq_avg); batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_ADD, gw_addr); } else { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Changing route to gateway %pM (gw_flags: %i, tq: %i)\n", + "Changing route to gateway %pM (bandwidth: %u.%u/%u.%u MBit, tq: %i)\n", next_gw->orig_node->orig, - next_gw->orig_node->gw_flags, router->tq_avg); + next_gw->bandwidth_down / 10, + next_gw->bandwidth_down % 10, + next_gw->bandwidth_up / 10, + next_gw->bandwidth_up % 10, router->tq_avg); batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_CHANGE, gw_addr); } @@ -337,12 +341,20 @@ out: return; } +/** + * batadv_gw_node_add - add gateway node to list of available gateways + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: originator announcing gateway capabilities + * @gateway: announced bandwidth information + */ static void batadv_gw_node_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - uint8_t new_gwflags) + struct batadv_tvlv_gateway_data *gateway) { struct batadv_gw_node *gw_node; - int down, up; + + if (gateway->bandwidth_down == 0) + return; gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC); if (!gw_node) @@ -356,73 +368,116 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list); spin_unlock_bh(&bat_priv->gw.list_lock); - batadv_gw_bandwidth_to_kbit(new_gwflags, &down, &up); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Found new gateway %pM -> gw_class: %i - %i%s/%i%s\n", - orig_node->orig, new_gwflags, - (down > 2048 ? down / 1024 : down), - (down > 2048 ? "MBit" : "KBit"), - (up > 2048 ? up / 1024 : up), - (up > 2048 ? "MBit" : "KBit")); + "Found new gateway %pM -> gw bandwidth: %u.%u/%u.%u MBit\n", + orig_node->orig, + ntohl(gateway->bandwidth_down) / 10, + ntohl(gateway->bandwidth_down) % 10, + ntohl(gateway->bandwidth_up) / 10, + ntohl(gateway->bandwidth_up) % 10); } -void batadv_gw_node_update(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node, - uint8_t new_gwflags) +/** + * batadv_gw_node_get - retrieve gateway node from list of available gateways + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: originator announcing gateway capabilities + * + * Returns gateway node if found or NULL otherwise. + */ +static struct batadv_gw_node * +batadv_gw_node_get(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node) { - struct batadv_gw_node *gw_node, *curr_gw; - - /* Note: We don't need a NULL check here, since curr_gw never gets - * dereferenced. If curr_gw is NULL we also should not exit as we may - * have this gateway in our list (duplication check!) even though we - * have no currently selected gateway. - */ - curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + struct batadv_gw_node *gw_node_tmp, *gw_node = NULL; rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { - if (gw_node->orig_node != orig_node) + hlist_for_each_entry_rcu(gw_node_tmp, &bat_priv->gw.list, list) { + if (gw_node_tmp->orig_node != orig_node) continue; - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Gateway class of originator %pM changed from %i to %i\n", - orig_node->orig, gw_node->orig_node->gw_flags, - new_gwflags); + if (gw_node_tmp->deleted) + continue; - gw_node->deleted = 0; + if (!atomic_inc_not_zero(&gw_node_tmp->refcount)) + continue; - if (new_gwflags == BATADV_NO_FLAGS) { - gw_node->deleted = jiffies; - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Gateway %pM removed from gateway list\n", - orig_node->orig); + gw_node = gw_node_tmp; + break; + } + rcu_read_unlock(); - if (gw_node == curr_gw) - goto deselect; - } + return gw_node; +} - goto unlock; +/** + * batadv_gw_node_update - update list of available gateways with changed + * bandwidth information + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: originator announcing gateway capabilities + * @gateway: announced bandwidth information + */ +void batadv_gw_node_update(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_tvlv_gateway_data *gateway) +{ + struct batadv_gw_node *gw_node, *curr_gw = NULL; + + gw_node = batadv_gw_node_get(bat_priv, orig_node); + if (!gw_node) { + batadv_gw_node_add(bat_priv, orig_node, gateway); + goto out; } - if (new_gwflags == BATADV_NO_FLAGS) - goto unlock; + if ((gw_node->bandwidth_down == ntohl(gateway->bandwidth_down)) && + (gw_node->bandwidth_up == ntohl(gateway->bandwidth_up))) + goto out; - batadv_gw_node_add(bat_priv, orig_node, new_gwflags); - goto unlock; + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Gateway bandwidth of originator %pM changed from %u.%u/%u.%u MBit to %u.%u/%u.%u MBit\n", + orig_node->orig, + gw_node->bandwidth_down / 10, + gw_node->bandwidth_down % 10, + gw_node->bandwidth_up / 10, + gw_node->bandwidth_up % 10, + ntohl(gateway->bandwidth_down) / 10, + ntohl(gateway->bandwidth_down) % 10, + ntohl(gateway->bandwidth_up) / 10, + ntohl(gateway->bandwidth_up) % 10); + + gw_node->bandwidth_down = ntohl(gateway->bandwidth_down); + gw_node->bandwidth_up = ntohl(gateway->bandwidth_up); + + gw_node->deleted = 0; + if (ntohl(gateway->bandwidth_down) == 0) { + gw_node->deleted = jiffies; + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Gateway %pM removed from gateway list\n", + orig_node->orig); -deselect: - batadv_gw_deselect(bat_priv); -unlock: - rcu_read_unlock(); + /* Note: We don't need a NULL check here, since curr_gw never + * gets dereferenced. + */ + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + if (gw_node == curr_gw) + batadv_gw_deselect(bat_priv); + } +out: if (curr_gw) batadv_gw_node_free_ref(curr_gw); + if (gw_node) + batadv_gw_node_free_ref(gw_node); } void batadv_gw_node_delete(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { - batadv_gw_node_update(bat_priv, orig_node, 0); + struct batadv_tvlv_gateway_data gateway; + + gateway.bandwidth_down = 0; + gateway.bandwidth_up = 0; + + batadv_gw_node_update(bat_priv, orig_node, &gateway); } void batadv_gw_node_purge(struct batadv_priv *bat_priv) @@ -467,9 +522,7 @@ static int batadv_write_buffer_text(struct batadv_priv *bat_priv, { struct batadv_gw_node *curr_gw; struct batadv_neigh_node *router; - int down, up, ret = -1; - - batadv_gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags, &down, &up); + int ret = -1; router = batadv_orig_node_get_router(gw_node->orig_node); if (!router) @@ -477,16 +530,15 @@ static int batadv_write_buffer_text(struct batadv_priv *bat_priv, curr_gw = batadv_gw_get_selected_gw_node(bat_priv); - ret = seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %3i - %i%s/%i%s\n", + ret = seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n", (curr_gw == gw_node ? "=>" : " "), gw_node->orig_node->orig, router->tq_avg, router->addr, router->if_incoming->net_dev->name, - gw_node->orig_node->gw_flags, - (down > 2048 ? down / 1024 : down), - (down > 2048 ? "MBit" : "KBit"), - (up > 2048 ? up / 1024 : up), - (up > 2048 ? "MBit" : "KBit")); + gw_node->bandwidth_down / 10, + gw_node->bandwidth_down % 10, + gw_node->bandwidth_up / 10, + gw_node->bandwidth_up % 10); batadv_neigh_node_free_ref(router); if (curr_gw) @@ -508,7 +560,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) goto out; seq_printf(seq, - " %-12s (%s/%i) %17s [%10s]: gw_class ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", + " %-12s (%s/%i) %17s [%10s]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", "Gateway", "#", BATADV_TQ_MAX_VALUE, "Nexthop", "outgoingIF", BATADV_SOURCE_VERSION, primary_if->net_dev->name, primary_if->net_dev->dev_addr, net_dev->name); @@ -675,7 +727,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, { struct batadv_neigh_node *neigh_curr = NULL, *neigh_old = NULL; struct batadv_orig_node *orig_dst_node = NULL; - struct batadv_gw_node *curr_gw = NULL; + struct batadv_gw_node *gw_node = NULL, *curr_gw = NULL; struct ethhdr *ethhdr; bool ret, out_of_range = false; unsigned int header_len = 0; @@ -691,7 +743,8 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, if (!orig_dst_node) goto out; - if (!orig_dst_node->gw_flags) + gw_node = batadv_gw_node_get(bat_priv, orig_dst_node); + if (!gw_node->bandwidth_down == 0) goto out; ret = batadv_is_type_dhcprequest(skb, header_len); @@ -742,6 +795,8 @@ out: batadv_orig_node_free_ref(orig_dst_node); if (curr_gw) batadv_gw_node_free_ref(curr_gw); + if (gw_node) + batadv_gw_node_free_ref(gw_node); if (neigh_old) batadv_neigh_node_free_ref(neigh_old); if (neigh_curr) diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index ceef4eb..d95c2d2 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -29,7 +29,7 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); void batadv_gw_node_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - uint8_t new_gwflags); + struct batadv_tvlv_gateway_data *gateway); void batadv_gw_node_delete(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); void batadv_gw_node_purge(struct batadv_priv *bat_priv); diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 84bb2b1..b211b0f 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -21,64 +21,23 @@ #include "gateway_common.h" #include "gateway_client.h" -/* calculates the gateway class from kbit */ -static void batadv_kbit_to_gw_bandwidth(int down, int up, long *gw_srv_class) -{ - int mdown = 0, tdown, tup, difference; - uint8_t sbit, part; - - *gw_srv_class = 0; - difference = 0x0FFFFFFF; - - /* test all downspeeds */ - for (sbit = 0; sbit < 2; sbit++) { - for (part = 0; part < 16; part++) { - tdown = 32 * (sbit + 2) * (1 << part); - - if (abs(tdown - down) < difference) { - *gw_srv_class = (sbit << 7) + (part << 3); - difference = abs(tdown - down); - mdown = tdown; - } - } - } - - /* test all upspeeds */ - difference = 0x0FFFFFFF; - - for (part = 0; part < 8; part++) { - tup = ((part + 1) * (mdown)) / 8; - - if (abs(tup - up) < difference) { - *gw_srv_class = (*gw_srv_class & 0xF8) | part; - difference = abs(tup - up); - } - } -} - -/* returns the up and downspeeds in kbit, calculated from the class */ -void batadv_gw_bandwidth_to_kbit(uint8_t gw_srv_class, int *down, int *up) -{ - int sbit = (gw_srv_class & 0x80) >> 7; - int dpart = (gw_srv_class & 0x78) >> 3; - int upart = (gw_srv_class & 0x07); - - if (!gw_srv_class) { - *down = 0; - *up = 0; - return; - } - - *down = 32 * (sbit + 2) * (1 << dpart); - *up = ((upart + 1) * (*down)) / 8; -} - +/** + * batadv_parse_gw_bandwidth - parse supplied string buffer to extract download + * and upload bandwidth information + * @net_dev: the soft interface net device + * @buff: string buffer to parse + * @down: pointer holding the returned download bandwidth information + * @up: pointer holding the returned upload bandwidth information + * + * Returns false on parse error and true otherwise. + */ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, - int *up, int *down) + uint32_t *down, uint32_t *up) { - int ret, multi = 1; + enum batadv_bandwidth_units bw_unit_type = BATADV_BW_UNIT_KBIT; char *slash_ptr, *tmp_ptr; long ldown, lup; + int ret; slash_ptr = strchr(buff, '/'); if (slash_ptr) @@ -88,10 +47,10 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, tmp_ptr = buff + strlen(buff) - 4; if (strnicmp(tmp_ptr, "mbit", 4) == 0) - multi = 1024; + bw_unit_type = BATADV_BW_UNIT_MBIT; if ((strnicmp(tmp_ptr, "kbit", 4) == 0) || - (multi > 1)) + (bw_unit_type == BATADV_BW_UNIT_MBIT)) *tmp_ptr = '\0'; } @@ -103,20 +62,28 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, return false; } - *down = ldown * multi; + switch (bw_unit_type) { + case BATADV_BW_UNIT_MBIT: + *down = ldown * 10; + break; + case BATADV_BW_UNIT_KBIT: + default: + *down = ldown / 100; + break; + } /* we also got some upload info */ if (slash_ptr) { - multi = 1; + bw_unit_type = BATADV_BW_UNIT_KBIT; if (strlen(slash_ptr + 1) > 4) { tmp_ptr = slash_ptr + 1 - 4 + strlen(slash_ptr + 1); if (strnicmp(tmp_ptr, "mbit", 4) == 0) - multi = 1024; + bw_unit_type = BATADV_BW_UNIT_MBIT; if ((strnicmp(tmp_ptr, "kbit", 4) == 0) || - (multi > 1)) + (bw_unit_type == BATADV_BW_UNIT_MBIT)) *tmp_ptr = '\0'; } @@ -128,52 +95,149 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, return false; } - *up = lup * multi; + switch (bw_unit_type) { + case BATADV_BW_UNIT_MBIT: + *up = lup * 10; + break; + case BATADV_BW_UNIT_KBIT: + default: + *up = lup / 100; + break; + } } return true; } +/** + * batadv_gw_tvlv_container_update - update the gw tvlv container after gateway + * setting change + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv) +{ + struct batadv_tvlv_gateway_data gw; + uint32_t down, up; + char gw_mode; + + gw_mode = atomic_read(&bat_priv->gw_mode); + + switch (gw_mode) { + case BATADV_GW_MODE_OFF: + case BATADV_GW_MODE_CLIENT: + batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_GW, 1); + break; + case BATADV_GW_MODE_SERVER: + down = atomic_read(&bat_priv->gw.bandwidth_down); + up = atomic_read(&bat_priv->gw.bandwidth_up); + gw.bandwidth_down = htonl(down); + gw.bandwidth_up = htonl(up); + batadv_tvlv_container_register(bat_priv, BATADV_TVLV_GW, 1, + &gw, sizeof(gw)); + break; + } +} + ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff, size_t count) { struct batadv_priv *bat_priv = netdev_priv(net_dev); - long gw_bandwidth_tmp = 0; - int up = 0, down = 0; + uint32_t down_curr, up_curr, down_new = 0, up_new = 0; bool ret; - ret = batadv_parse_gw_bandwidth(net_dev, buff, &up, &down); + down_curr = (unsigned int)atomic_read(&bat_priv->gw.bandwidth_down); + up_curr = (unsigned int)atomic_read(&bat_priv->gw.bandwidth_up); + + ret = batadv_parse_gw_bandwidth(net_dev, buff, &down_new, &up_new); if (!ret) goto end; - if ((!down) || (down < 256)) - down = 2000; - - if (!up) - up = down / 5; + if (!down_new) + down_new = 1; - batadv_kbit_to_gw_bandwidth(down, up, &gw_bandwidth_tmp); + if (!up_new) + up_new = down_new / 5; - /* the gw bandwidth we guessed above might not match the given - * speeds, hence we need to calculate it back to show the number - * that is going to be propagated - */ - batadv_gw_bandwidth_to_kbit((uint8_t)gw_bandwidth_tmp, &down, &up); + if (!up_new) + up_new = 1; - if (atomic_read(&bat_priv->gw_bandwidth) == gw_bandwidth_tmp) + if ((down_curr == down_new) && (up_curr == up_new)) return count; batadv_gw_deselect(bat_priv); batadv_info(net_dev, - "Changing gateway bandwidth from: '%i' to: '%ld' (propagating: %d%s/%d%s)\n", - atomic_read(&bat_priv->gw_bandwidth), gw_bandwidth_tmp, - (down > 2048 ? down / 1024 : down), - (down > 2048 ? "MBit" : "KBit"), - (up > 2048 ? up / 1024 : up), - (up > 2048 ? "MBit" : "KBit")); + "Changing gateway bandwidth from: '%u.%u/%u.%u MBit' to: '%u.%u/%u.%u MBit'\n", + down_curr / 10, down_curr % 10, up_curr / 10, up_curr % 10, + down_new / 10, down_new % 10, up_new / 10, up_new % 10); - atomic_set(&bat_priv->gw_bandwidth, gw_bandwidth_tmp); + atomic_set(&bat_priv->gw.bandwidth_down, down_new); + atomic_set(&bat_priv->gw.bandwidth_up, up_new); + batadv_gw_tvlv_container_update(bat_priv); end: return count; } + +/** + * batadv_gw_tvlv_ogm_handler_v1 - process incoming gateway tvlv container + * @bat_priv: the bat priv with all the soft interface information + * @orig: the orig_node of the ogm + * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) + * @tvlv_value: tvlv buffer containing the gateway data + * @tvlv_value_len: tvlv buffer length + */ +static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + uint8_t flags, + void *tvlv_value, + uint16_t tvlv_value_len) +{ + struct batadv_tvlv_gateway_data gateway, *gateway_ptr; + + /* only fetch the tvlv value if the handler wasn't called via the + * CIFNOTFND flag and if there is data to fetch + */ + if ((flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) || + (tvlv_value_len < sizeof(gateway))) { + gateway.bandwidth_down = 0; + gateway.bandwidth_up = 0; + } else { + gateway_ptr = tvlv_value; + gateway.bandwidth_down = gateway_ptr->bandwidth_down; + gateway.bandwidth_up = gateway_ptr->bandwidth_up; + if ((gateway.bandwidth_down == 0) || + (gateway.bandwidth_up == 0)) { + gateway.bandwidth_down = 0; + gateway.bandwidth_up = 0; + } + } + + batadv_gw_node_update(bat_priv, orig, &gateway); + + /* restart gateway selection if fast or late switching was enabled */ + if ((gateway.bandwidth_down != 0) && + (atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_CLIENT) && + (atomic_read(&bat_priv->gw_sel_class) > 2)) + batadv_gw_check_election(bat_priv, orig); +} + +/** + * batadv_gw_init - initialise the gateway handling internals + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_gw_init(struct batadv_priv *bat_priv) +{ + batadv_tvlv_handler_register(bat_priv, batadv_gw_tvlv_ogm_handler_v1, + NULL, BATADV_TVLV_GW, 1, + BATADV_TVLV_HANDLER_OGM_CIFNOTFND); +} + +/** + * batadv_gw_free - free the gateway handling internals + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_gw_free(struct batadv_priv *bat_priv) +{ + batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_GW, 1); + batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_GW, 1); +} diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index 509b2bf..56384a4c 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -26,12 +26,24 @@ enum batadv_gw_modes { BATADV_GW_MODE_SERVER, }; +/** + * enum batadv_bandwidth_units - bandwidth unit types + * @BATADV_BW_UNIT_KBIT: unit type kbit + * @BATADV_BW_UNIT_MBIT: unit type mbit + */ +enum batadv_bandwidth_units { + BATADV_BW_UNIT_KBIT, + BATADV_BW_UNIT_MBIT, +}; + #define BATADV_GW_MODE_OFF_NAME "off" #define BATADV_GW_MODE_CLIENT_NAME "client" #define BATADV_GW_MODE_SERVER_NAME "server" -void batadv_gw_bandwidth_to_kbit(uint8_t gw_class, int *down, int *up); ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff, size_t count); +void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv); +void batadv_gw_init(struct batadv_priv *bat_priv); +void batadv_gw_free(struct batadv_priv *bat_priv); #endif /* _NET_BATMAN_ADV_GATEWAY_COMMON_H_ */ diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index c478e6b..eeb6671 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -194,22 +194,13 @@ out: static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv, struct batadv_hard_iface *oldif) { - struct batadv_vis_packet *vis_packet; struct batadv_hard_iface *primary_if; - struct sk_buff *skb; primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto out; batadv_dat_init_own_addr(bat_priv, primary_if); - - skb = bat_priv->vis.my_info->skb_packet; - vis_packet = (struct batadv_vis_packet *)skb->data; - memcpy(vis_packet->vis_orig, primary_if->net_dev->dev_addr, ETH_ALEN); - memcpy(vis_packet->sender_orig, - primary_if->net_dev->dev_addr, ETH_ALEN); - batadv_bla_update_orig_address(bat_priv, primary_if, oldif); out: if (primary_if) diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index c72d1bc..8b195e6 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -36,7 +36,8 @@ #include "gateway_client.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" -#include "vis.h" +#include "unicast.h" +#include "gateway_common.h" #include "hash.h" #include "bat_algo.h" #include "network-coding.h" @@ -65,6 +66,7 @@ static int __init batadv_init(void) batadv_recv_handler_init(); batadv_iv_init(); + batadv_nc_init(); batadv_event_workqueue = create_singlethread_workqueue("bat_events"); @@ -109,8 +111,8 @@ int batadv_mesh_init(struct net_device *soft_iface) spin_lock_init(&bat_priv->tt.roam_list_lock); spin_lock_init(&bat_priv->tt.last_changeset_lock); spin_lock_init(&bat_priv->gw.list_lock); - spin_lock_init(&bat_priv->vis.hash_lock); - spin_lock_init(&bat_priv->vis.list_lock); + spin_lock_init(&bat_priv->tvlv.container_list_lock); + spin_lock_init(&bat_priv->tvlv.handler_list_lock); INIT_HLIST_HEAD(&bat_priv->forw_bat_list); INIT_HLIST_HEAD(&bat_priv->forw_bcast_list); @@ -118,6 +120,8 @@ int batadv_mesh_init(struct net_device *soft_iface) INIT_LIST_HEAD(&bat_priv->tt.changes_list); INIT_LIST_HEAD(&bat_priv->tt.req_list); INIT_LIST_HEAD(&bat_priv->tt.roam_list); + INIT_HLIST_HEAD(&bat_priv->tvlv.container_list); + INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list); ret = batadv_originator_init(bat_priv); if (ret < 0) @@ -130,10 +134,6 @@ int batadv_mesh_init(struct net_device *soft_iface) batadv_tt_local_add(soft_iface, soft_iface->dev_addr, BATADV_NULL_IFINDEX); - ret = batadv_vis_init(bat_priv); - if (ret < 0) - goto err; - ret = batadv_bla_init(bat_priv); if (ret < 0) goto err; @@ -142,10 +142,12 @@ int batadv_mesh_init(struct net_device *soft_iface) if (ret < 0) goto err; - ret = batadv_nc_init(bat_priv); + ret = batadv_nc_mesh_init(bat_priv); if (ret < 0) goto err; + batadv_gw_init(bat_priv); + atomic_set(&bat_priv->gw.reselect, 0); atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE); @@ -164,10 +166,8 @@ void batadv_mesh_free(struct net_device *soft_iface) batadv_purge_outstanding_packets(bat_priv, NULL); - batadv_vis_quit(bat_priv); - batadv_gw_node_purge(bat_priv); - batadv_nc_free(bat_priv); + batadv_nc_mesh_free(bat_priv); batadv_dat_free(bat_priv); batadv_bla_free(bat_priv); @@ -184,6 +184,8 @@ void batadv_mesh_free(struct net_device *soft_iface) */ batadv_originator_free(bat_priv); + batadv_gw_free(bat_priv); + free_percpu(bat_priv->bat_counters); bat_priv->bat_counters = NULL; @@ -391,22 +393,31 @@ static void batadv_recv_handler_init(void) for (i = 0; i < ARRAY_SIZE(batadv_rx_handler); i++) batadv_rx_handler[i] = batadv_recv_unhandled_packet; - /* batman icmp packet */ - batadv_rx_handler[BATADV_ICMP] = batadv_recv_icmp_packet; + for (i = BATADV_UNICAST_MIN; i <= BATADV_UNICAST_MAX; i++) + batadv_rx_handler[i] = batadv_recv_unhandled_unicast_packet; + + /* compile time checks for struct member offsets */ + BUILD_BUG_ON(offsetof(struct batadv_unicast_4addr_packet, src) != 10); + BUILD_BUG_ON(offsetof(struct batadv_unicast_packet, dest) != 4); + BUILD_BUG_ON(offsetof(struct batadv_unicast_frag_packet, dest) != 4); + BUILD_BUG_ON(offsetof(struct batadv_unicast_tvlv_packet, dst) != 4); + BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, dst) != 4); + BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, dst) != 4); + + /* broadcast packet */ + batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet; + + /* unicast packets ... */ /* unicast with 4 addresses packet */ batadv_rx_handler[BATADV_UNICAST_4ADDR] = batadv_recv_unicast_packet; /* unicast packet */ batadv_rx_handler[BATADV_UNICAST] = batadv_recv_unicast_packet; /* fragmented unicast packet */ batadv_rx_handler[BATADV_UNICAST_FRAG] = batadv_recv_ucast_frag_packet; - /* broadcast packet */ - batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet; - /* vis packet */ - batadv_rx_handler[BATADV_VIS] = batadv_recv_vis_packet; - /* Translation table query (request or response) */ - batadv_rx_handler[BATADV_TT_QUERY] = batadv_recv_tt_query; - /* Roaming advertisement */ - batadv_rx_handler[BATADV_ROAM_ADV] = batadv_recv_roam_adv; + /* unicast tvlv packet */ + batadv_rx_handler[BATADV_UNICAST_TVLV] = batadv_recv_unicast_tvlv; + /* batman icmp packet */ + batadv_rx_handler[BATADV_ICMP] = batadv_recv_icmp_packet; } int @@ -414,7 +425,12 @@ batadv_recv_handler_register(uint8_t packet_type, int (*recv_handler)(struct sk_buff *, struct batadv_hard_iface *)) { - if (batadv_rx_handler[packet_type] != &batadv_recv_unhandled_packet) + int (*curr)(struct sk_buff *, + struct batadv_hard_iface *); + curr = batadv_rx_handler[packet_type]; + + if ((curr != batadv_recv_unhandled_packet) && + (curr != batadv_recv_unhandled_unicast_packet)) return -EBUSY; batadv_rx_handler[packet_type] = recv_handler; @@ -535,6 +551,574 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr) return htonl(crc); } +/** + * batadv_tvlv_handler_free_ref - decrement the tvlv handler refcounter and + * possibly free it + * @tvlv_handler: the tvlv handler to free + */ +static void +batadv_tvlv_handler_free_ref(struct batadv_tvlv_handler *tvlv_handler) +{ + if (atomic_dec_and_test(&tvlv_handler->refcount)) + kfree_rcu(tvlv_handler, rcu); +} + +/** + * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list + * based on the provided type and version (both need to match) + * @bat_priv: the bat priv with all the soft interface information + * @type: tvlv handler type to look for + * @version: tvlv handler version to look for + * + * Returns tvlv handler if found or NULL otherwise. + */ +static struct batadv_tvlv_handler +*batadv_tvlv_handler_get(struct batadv_priv *bat_priv, + uint8_t type, uint8_t version) +{ + struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tvlv_handler_tmp, + &bat_priv->tvlv.handler_list, list) { + if (tvlv_handler_tmp->type != type) + continue; + + if (tvlv_handler_tmp->version != version) + continue; + + if (!atomic_inc_not_zero(&tvlv_handler_tmp->refcount)) + continue; + + tvlv_handler = tvlv_handler_tmp; + break; + } + rcu_read_unlock(); + + return tvlv_handler; +} + +/** + * batadv_tvlv_container_free_ref - decrement the tvlv container refcounter and + * possibly free it + * @tvlv_handler: the tvlv container to free + */ +static void batadv_tvlv_container_free_ref(struct batadv_tvlv_container *tvlv) +{ + if (atomic_dec_and_test(&tvlv->refcount)) + kfree(tvlv); +} + +/** + * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container + * list based on the provided type and version (both need to match) + * @bat_priv: the bat priv with all the soft interface information + * @type: tvlv container type to look for + * @version: tvlv container version to look for + * + * Has to be called with the appropriate locks being acquired + * (tvlv.container_list_lock). + * + * Returns tvlv container if found or NULL otherwise. + */ +static struct batadv_tvlv_container +*batadv_tvlv_container_get(struct batadv_priv *bat_priv, + uint8_t type, uint8_t version) +{ + struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL; + + hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) { + if (tvlv_tmp->tvlv_hdr.type != type) + continue; + + if (tvlv_tmp->tvlv_hdr.version != version) + continue; + + if (!atomic_inc_not_zero(&tvlv_tmp->refcount)) + continue; + + tvlv = tvlv_tmp; + break; + } + + return tvlv; +} + +/** + * batadv_tvlv_container_list_size - calculate the size of the tvlv container + * list entries + * @bat_priv: the bat priv with all the soft interface information + * + * Has to be called with the appropriate locks being acquired + * (tvlv.container_list_lock). + * + * Returns size of all currently registered tvlv containers in bytes. + */ +static uint16_t batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) +{ + struct batadv_tvlv_container *tvlv; + uint16_t tvlv_len = 0; + + hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) { + tvlv_len += sizeof(struct batadv_tvlv_hdr); + tvlv_len += ntohs(tvlv->tvlv_hdr.len); + } + + return tvlv_len; +} + +/** + * batadv_tvlv_container_remove - remove tvlv container from the tvlv container + * list + * @tvlv: the to be removed tvlv container + * + * Has to be called with the appropriate locks being acquired + * (tvlv.container_list_lock). + */ +static void batadv_tvlv_container_remove(struct batadv_tvlv_container *tvlv) +{ + if (!tvlv) + return; + + hlist_del(&tvlv->list); + + /* first call to decrement the counter, second call to free */ + batadv_tvlv_container_free_ref(tvlv); + batadv_tvlv_container_free_ref(tvlv); +} + +/** + * batadv_tvlv_container_unregister - unregister tvlv container based on the + * provided type and version (both need to match) + * @bat_priv: the bat priv with all the soft interface information + * @type: tvlv container type to unregister + * @version: tvlv container type to unregister + */ +void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv, + uint8_t type, uint8_t version) +{ + struct batadv_tvlv_container *tvlv; + + spin_lock_bh(&bat_priv->tvlv.container_list_lock); + tvlv = batadv_tvlv_container_get(bat_priv, type, version); + batadv_tvlv_container_remove(tvlv); + spin_unlock_bh(&bat_priv->tvlv.container_list_lock); +} + +/** + * batadv_tvlv_container_register - register tvlv type, version and content + * to be propagated with each (primary interface) OGM + * @bat_priv: the bat priv with all the soft interface information + * @type: tvlv container type + * @version: tvlv container version + * @tvlv_value: tvlv container content + * @tvlv_value_len: tvlv container content length + * + * If a container of the same type and version was already registered the new + * content is going to replace the old one. + */ +void batadv_tvlv_container_register(struct batadv_priv *bat_priv, + uint8_t type, uint8_t version, + void *tvlv_value, uint16_t tvlv_value_len) +{ + struct batadv_tvlv_container *tvlv_old, *tvlv_new; + + if (!tvlv_value) + tvlv_value_len = 0; + + tvlv_new = kzalloc(sizeof(*tvlv_new) + tvlv_value_len, GFP_ATOMIC); + if (!tvlv_new) + return; + + tvlv_new->tvlv_hdr.version = version; + tvlv_new->tvlv_hdr.type = type; + tvlv_new->tvlv_hdr.len = htons(tvlv_value_len); + + memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len)); + INIT_HLIST_NODE(&tvlv_new->list); + atomic_set(&tvlv_new->refcount, 1); + + spin_lock_bh(&bat_priv->tvlv.container_list_lock); + tvlv_old = batadv_tvlv_container_get(bat_priv, type, version); + batadv_tvlv_container_remove(tvlv_old); + hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list); + spin_unlock_bh(&bat_priv->tvlv.container_list_lock); +} + +/** + * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accomodate + * requested packet size + * @packet_buff: packet buffer + * @packet_buff_len: packet buffer size + * @packet_min_len: requested packet minimum size + * @additional_packet_len: requested additional packet size on top of minimum + * size + * + * Returns true of the packet buffer could be changed to the requested size, + * false otherwise. + */ +static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff, + int *packet_buff_len, + int min_packet_len, + int additional_packet_len) +{ + unsigned char *new_buff; + + new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC); + + /* keep old buffer if kmalloc should fail */ + if (new_buff) { + memcpy(new_buff, *packet_buff, min_packet_len); + kfree(*packet_buff); + *packet_buff = new_buff; + *packet_buff_len = min_packet_len + additional_packet_len; + return true; + } + + return false; +} + +/** + * batadv_tvlv_container_ogm_append - append tvlv container content to given + * OGM packet buffer + * @bat_priv: the bat priv with all the soft interface information + * @packet_buff: ogm packet buffer + * @packet_buff_len: ogm packet buffer size including ogm header and tvlv + * content + * @packet_min_len: ogm header size to be preserved for the OGM itself + * + * The ogm packet might be enlarged or shrunk depending on the current size + * and the size of the to-be-appended tvlv containers. + * + * Returns size of all appended tvlv containers in bytes. + */ +uint16_t batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, + unsigned char **packet_buff, + int *packet_buff_len, + int packet_min_len) +{ + struct batadv_tvlv_container *tvlv; + struct batadv_tvlv_hdr *tvlv_hdr; + uint16_t tvlv_value_len; + void *tvlv_value; + bool ret; + + spin_lock_bh(&bat_priv->tvlv.container_list_lock); + tvlv_value_len = batadv_tvlv_container_list_size(bat_priv); + + ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len, + packet_min_len, tvlv_value_len); + + if (!ret) + goto end; + + if (!tvlv_value_len) + goto end; + + tvlv_value = (*packet_buff) + packet_min_len; + + hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) { + tvlv_hdr = tvlv_value; + tvlv_hdr->type = tvlv->tvlv_hdr.type; + tvlv_hdr->version = tvlv->tvlv_hdr.version; + tvlv_hdr->len = tvlv->tvlv_hdr.len; + tvlv_value = tvlv_hdr + 1; + memcpy(tvlv_value, tvlv + 1, ntohs(tvlv->tvlv_hdr.len)); + tvlv_value = (uint8_t *)tvlv_value + ntohs(tvlv->tvlv_hdr.len); + } + +end: + spin_unlock_bh(&bat_priv->tvlv.container_list_lock); + return tvlv_value_len; +} + +/** + * batadv_tvlv_call_handler - parse the given tvlv buffer to call the + * appropriate handlers + * @bat_priv: the bat priv with all the soft interface information + * @tvlv_handler: tvlv callback function handling the tvlv content + * @ogm_source: flag indicating wether the tvlv is an ogm or a unicast packet + * @orig_node: orig node emitting the ogm packet + * @src: source mac address of the unicast packet + * @dst: destination mac address of the unicast packet + * @tvlv_value: tvlv content + * @tvlv_value_len: tvlv content length + * + * Returns success if handler was not found or the return value of the handler + * callback. + */ +static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, + struct batadv_tvlv_handler *tvlv_handler, + bool ogm_source, + struct batadv_orig_node *orig_node, + uint8_t *src, uint8_t *dst, + void *tvlv_value, uint16_t tvlv_value_len) +{ + if (!tvlv_handler) + return NET_RX_SUCCESS; + + if (ogm_source) { + if (!tvlv_handler->ogm_handler) + return NET_RX_SUCCESS; + + if (!orig_node) + return NET_RX_SUCCESS; + + tvlv_handler->ogm_handler(bat_priv, orig_node, + BATADV_NO_FLAGS, + tvlv_value, tvlv_value_len); + tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED; + } else { + if (!src) + return NET_RX_SUCCESS; + + if (!dst) + return NET_RX_SUCCESS; + + if (!tvlv_handler->unicast_handler) + return NET_RX_SUCCESS; + + return tvlv_handler->unicast_handler(bat_priv, src, + dst, tvlv_value, + tvlv_value_len); + } + + return NET_RX_SUCCESS; +} + +/** + * batadv_tvlv_containers_process - parse the given tvlv buffer to call the + * appropriate handlers + * @bat_priv: the bat priv with all the soft interface information + * @ogm_source: flag indicating wether the tvlv is an ogm or a unicast packet + * @orig_node: orig node emitting the ogm packet + * @src: source mac address of the unicast packet + * @dst: destination mac address of the unicast packet + * @tvlv_value: tvlv content + * @tvlv_value_len: tvlv content length + * + * Returns success when processing an OGM or the return value of all called + * handler callbacks. + */ +int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, + bool ogm_source, + struct batadv_orig_node *orig_node, + uint8_t *src, uint8_t *dst, + void *tvlv_value, uint16_t tvlv_value_len) +{ + struct batadv_tvlv_handler *tvlv_handler; + struct batadv_tvlv_hdr *tvlv_hdr; + uint16_t tvlv_value_cont_len; + uint8_t cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND; + int ret = NET_RX_SUCCESS; + + while (tvlv_value_len >= sizeof(*tvlv_hdr)) { + tvlv_hdr = tvlv_value; + tvlv_value_cont_len = ntohs(tvlv_hdr->len); + tvlv_value = tvlv_hdr + 1; + tvlv_value_len -= sizeof(*tvlv_hdr); + + if (tvlv_value_cont_len > tvlv_value_len) + break; + + tvlv_handler = batadv_tvlv_handler_get(bat_priv, + tvlv_hdr->type, + tvlv_hdr->version); + + ret |= batadv_tvlv_call_handler(bat_priv, tvlv_handler, + ogm_source, orig_node, + src, dst, tvlv_value, + tvlv_value_cont_len); + if (tvlv_handler) + batadv_tvlv_handler_free_ref(tvlv_handler); + tvlv_value = (uint8_t *)tvlv_value + tvlv_value_cont_len; + tvlv_value_len -= tvlv_value_cont_len; + } + + if (!ogm_source) + return ret; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tvlv_handler, + &bat_priv->tvlv.handler_list, list) { + if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) && + !(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED)) + tvlv_handler->ogm_handler(bat_priv, orig_node, + cifnotfound, NULL, 0); + + tvlv_handler->flags &= ~BATADV_TVLV_HANDLER_OGM_CALLED; + } + rcu_read_unlock(); + + return NET_RX_SUCCESS; +} + +/** + * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate + * handlers + * @bat_priv: the bat priv with all the soft interface information + * @batadv_ogm_packet: ogm packet containing the tvlv containers + * @orig_node: orig node emitting the ogm packet + */ +void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, + struct batadv_ogm_packet *batadv_ogm_packet, + struct batadv_orig_node *orig_node) +{ + void *tvlv_value; + uint16_t tvlv_value_len; + + if (!batadv_ogm_packet) + return; + + tvlv_value_len = ntohs(batadv_ogm_packet->tvlv_len); + if (!tvlv_value_len) + return; + + tvlv_value = batadv_ogm_packet + 1; + + batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL, + tvlv_value, tvlv_value_len); +} + +/** + * batadv_tvlv_handler_register - register tvlv handler based on the provided + * type and version (both need to match) for ogm tvlv payload and/or unicast + * payload + * @bat_priv: the bat priv with all the soft interface information + * @optr: ogm tvlv handler callback function. This function receives the orig + * node, flags and the tvlv content as argument to process. + * @uptr: unicast tvlv handler callback function. This function receives the + * source & destination of the unicast packet as well as the tvlv content + * to process. + * @type: tvlv handler type to be registered + * @version: tvlv handler version to be registered + * @flags: flags to enable or disable TVLV API behavior + */ +void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, + void (*optr)(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + uint8_t flags, + void *tvlv_value, + uint16_t tvlv_value_len), + int (*uptr)(struct batadv_priv *bat_priv, + uint8_t *src, uint8_t *dst, + void *tvlv_value, + uint16_t tvlv_value_len), + uint8_t type, uint8_t version, uint8_t flags) +{ + struct batadv_tvlv_handler *tvlv_handler; + + tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version); + if (tvlv_handler) { + batadv_tvlv_handler_free_ref(tvlv_handler); + return; + } + + tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC); + if (!tvlv_handler) + return; + + tvlv_handler->ogm_handler = optr; + tvlv_handler->unicast_handler = uptr; + tvlv_handler->type = type; + tvlv_handler->version = version; + tvlv_handler->flags = flags; + atomic_set(&tvlv_handler->refcount, 1); + INIT_HLIST_NODE(&tvlv_handler->list); + + spin_lock_bh(&bat_priv->tvlv.handler_list_lock); + hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list); + spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); +} + +/** + * batadv_tvlv_handler_unregister - unregister tvlv handler based on the + * provided type and version (both need to match) + * @bat_priv: the bat priv with all the soft interface information + * @type: tvlv handler type to be unregistered + * @version: tvlv handler version to be unregistered + */ +void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, + uint8_t type, uint8_t version) +{ + struct batadv_tvlv_handler *tvlv_handler; + + tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version); + if (!tvlv_handler) + return; + + batadv_tvlv_handler_free_ref(tvlv_handler); + spin_lock_bh(&bat_priv->tvlv.handler_list_lock); + hlist_del_rcu(&tvlv_handler->list); + spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); + batadv_tvlv_handler_free_ref(tvlv_handler); +} + +/** + * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the + * specified host + * @bat_priv: the bat priv with all the soft interface information + * @src: source mac address of the unicast packet + * @dst: destination mac address of the unicast packet + * @type: tvlv type + * @version: tvlv version + * @tvlv_value: tvlv content + * @tvlv_value_len: tvlv content length + */ +void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src, + uint8_t *dst, uint8_t type, uint8_t version, + void *tvlv_value, uint16_t tvlv_value_len) +{ + struct batadv_unicast_tvlv_packet *unicast_tvlv_packet; + struct batadv_tvlv_hdr *tvlv_hdr; + struct batadv_orig_node *orig_node; + struct sk_buff *skb = NULL; + unsigned char *tvlv_buff; + unsigned int tvlv_len; + ssize_t hdr_len = sizeof(*unicast_tvlv_packet); + bool ret = false; + + orig_node = batadv_orig_hash_find(bat_priv, dst); + if (!orig_node) + goto out; + + tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len; + + skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + hdr_len + tvlv_len); + if (!skb) + goto out; + + skb->priority = TC_PRIO_CONTROL; + skb_reserve(skb, ETH_HLEN); + tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len); + unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff; + unicast_tvlv_packet->header.packet_type = BATADV_UNICAST_TVLV; + unicast_tvlv_packet->header.version = BATADV_COMPAT_VERSION; + unicast_tvlv_packet->header.ttl = BATADV_TTL; + unicast_tvlv_packet->reserved = 0; + unicast_tvlv_packet->tvlv_len = htons(tvlv_len); + unicast_tvlv_packet->align = 0; + memcpy(unicast_tvlv_packet->src, src, ETH_ALEN); + memcpy(unicast_tvlv_packet->dst, dst, ETH_ALEN); + + tvlv_buff = (unsigned char *)(unicast_tvlv_packet + 1); + tvlv_hdr = (struct batadv_tvlv_hdr *)tvlv_buff; + tvlv_hdr->version = version; + tvlv_hdr->type = type; + tvlv_hdr->len = htons(tvlv_value_len); + tvlv_buff += sizeof(*tvlv_hdr); + memcpy(tvlv_buff, tvlv_value, tvlv_value_len); + + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) + ret = true; + +out: + if (skb && !ret) + kfree_skb(skb); + if (orig_node) + batadv_orig_node_free_ref(orig_node); +} + static int batadv_param_set_ra(const char *val, const struct kernel_param *kp) { struct batadv_algo_ops *bat_algo_ops; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 2467552..e11c2ec 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -86,8 +86,6 @@ /* numbers of originator to contact for any PUT/GET DHT operation */ #define BATADV_DAT_CANDIDATES_NUM 3 -#define BATADV_VIS_INTERVAL 5000 /* 5 seconds */ - /* how much worse secondary interfaces may be to be considered as bonding * candidates */ @@ -326,4 +324,39 @@ static inline uint64_t batadv_sum_counter(struct batadv_priv *bat_priv, */ #define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0])) +void batadv_tvlv_container_register(struct batadv_priv *bat_priv, + uint8_t type, uint8_t version, + void *tvlv_value, uint16_t tvlv_value_len); +uint16_t batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, + unsigned char **packet_buff, + int *packet_buff_len, + int packet_min_len); +void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, + struct batadv_ogm_packet *batadv_ogm_packet, + struct batadv_orig_node *orig_node); +void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv, + uint8_t type, uint8_t version); + +void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, + void (*optr)(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + uint8_t flags, + void *tvlv_value, + uint16_t tvlv_value_len), + int (*uptr)(struct batadv_priv *bat_priv, + uint8_t *src, uint8_t *dst, + void *tvlv_value, + uint16_t tvlv_value_len), + uint8_t type, uint8_t version, uint8_t flags); +void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, + uint8_t type, uint8_t version); +int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, + bool ogm_source, + struct batadv_orig_node *orig_node, + uint8_t *src, uint8_t *dst, + void *tvlv_buff, uint16_t tvlv_buff_len); +void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src, + uint8_t *dst, uint8_t type, uint8_t version, + void *tvlv_value, uint16_t tvlv_value_len); + #endif /* _NET_BATMAN_ADV_MAIN_H_ */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index a487d46..23f611b 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -35,6 +35,20 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); /** + * batadv_nc_init - one-time initialization for network coding + */ +int __init batadv_nc_init(void) +{ + int ret; + + /* Register our packet type */ + ret = batadv_recv_handler_register(BATADV_CODED, + batadv_nc_recv_coded_packet); + + return ret; +} + +/** * batadv_nc_start_timer - initialise the nc periodic worker * @bat_priv: the bat priv with all the soft interface information */ @@ -45,10 +59,63 @@ static void batadv_nc_start_timer(struct batadv_priv *bat_priv) } /** - * batadv_nc_init - initialise coding hash table and start house keeping + * batadv_nc_tvlv_container_update - update the network coding tvlv container + * after network coding setting change * @bat_priv: the bat priv with all the soft interface information */ -int batadv_nc_init(struct batadv_priv *bat_priv) +static void batadv_nc_tvlv_container_update(struct batadv_priv *bat_priv) +{ + char nc_mode; + + nc_mode = atomic_read(&bat_priv->network_coding); + + switch (nc_mode) { + case 0: + batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_NC, 1); + break; + case 1: + batadv_tvlv_container_register(bat_priv, BATADV_TVLV_NC, 1, + NULL, 0); + break; + } +} + +/** + * batadv_nc_status_update - update the network coding tvlv container after + * network coding setting change + * @net_dev: the soft interface net device + */ +void batadv_nc_status_update(struct net_device *net_dev) +{ + struct batadv_priv *bat_priv = netdev_priv(net_dev); + batadv_nc_tvlv_container_update(bat_priv); +} + +/** + * batadv_nc_tvlv_ogm_handler_v1 - process incoming nc tvlv container + * @bat_priv: the bat priv with all the soft interface information + * @orig: the orig_node of the ogm + * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) + * @tvlv_value: tvlv buffer containing the gateway data + * @tvlv_value_len: tvlv buffer length + */ +static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + uint8_t flags, + void *tvlv_value, + uint16_t tvlv_value_len) +{ + if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) + orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_NC; + else + orig->capabilities |= BATADV_ORIG_CAPA_HAS_NC; +} + +/** + * batadv_nc_mesh_init - initialise coding hash table and start house keeping + * @bat_priv: the bat priv with all the soft interface information + */ +int batadv_nc_mesh_init(struct batadv_priv *bat_priv) { bat_priv->nc.timestamp_fwd_flush = jiffies; bat_priv->nc.timestamp_sniffed_purge = jiffies; @@ -70,14 +137,13 @@ int batadv_nc_init(struct batadv_priv *bat_priv) batadv_hash_set_lock_class(bat_priv->nc.coding_hash, &batadv_nc_decoding_hash_lock_class_key); - /* Register our packet type */ - if (batadv_recv_handler_register(BATADV_CODED, - batadv_nc_recv_coded_packet) < 0) - goto err; - INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker); batadv_nc_start_timer(bat_priv); + batadv_tvlv_handler_register(bat_priv, batadv_nc_tvlv_ogm_handler_v1, + NULL, BATADV_TVLV_NC, 1, + BATADV_TVLV_HANDLER_OGM_CIFNOTFND); + batadv_nc_tvlv_container_update(bat_priv); return 0; err: @@ -793,6 +859,10 @@ void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, if (!atomic_read(&bat_priv->network_coding)) goto out; + /* check if orig node is network coding enabled */ + if (!(orig_node->capabilities & BATADV_ORIG_CAPA_HAS_NC)) + goto out; + /* accept ogms from 'good' neighbors and single hop neighbors */ if (!batadv_can_nc_with_orig(bat_priv, orig_node, ogm_packet) && !is_single_hop_neigh) @@ -1721,12 +1791,13 @@ free_nc_packet: } /** - * batadv_nc_free - clean up network coding memory + * batadv_nc_mesh_free - clean up network coding memory * @bat_priv: the bat priv with all the soft interface information */ -void batadv_nc_free(struct batadv_priv *bat_priv) +void batadv_nc_mesh_free(struct batadv_priv *bat_priv) { - batadv_recv_handler_unregister(BATADV_CODED); + batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_NC, 1); + batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_NC, 1); cancel_delayed_work_sync(&bat_priv->nc.work); batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL); diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index 85a4ec8..d4fd315 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -22,8 +22,10 @@ #ifdef CONFIG_BATMAN_ADV_NC -int batadv_nc_init(struct batadv_priv *bat_priv); -void batadv_nc_free(struct batadv_priv *bat_priv); +void batadv_nc_status_update(struct net_device *net_dev); +int batadv_nc_init(void); +int batadv_nc_mesh_init(struct batadv_priv *bat_priv); +void batadv_nc_mesh_free(struct batadv_priv *bat_priv); void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_orig_node *orig_neigh_node, @@ -46,12 +48,21 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv); #else /* ifdef CONFIG_BATMAN_ADV_NC */ -static inline int batadv_nc_init(struct batadv_priv *bat_priv) +static inline void batadv_nc_status_update(struct net_device *net_dev) +{ +} + +static inline int batadv_nc_init(void) +{ + return 0; +} + +static inline int batadv_nc_mesh_init(struct batadv_priv *bat_priv) { return 0; } -static inline void batadv_nc_free(struct batadv_priv *bat_priv) +static inline void batadv_nc_mesh_free(struct batadv_priv *bat_priv) { return; } diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index f50553a..5d53d2f 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -388,9 +388,7 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv) hlist_for_each_entry_safe(orig_node, node_tmp, head, hash_entry) { if (batadv_purge_orig_node(bat_priv, orig_node)) { - if (orig_node->gw_flags) - batadv_gw_node_delete(bat_priv, - orig_node); + batadv_gw_node_delete(bat_priv, orig_node); hlist_del_rcu(&orig_node->hash_entry); batadv_orig_node_free_ref(orig_node); continue; diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index a51ccfc..4361bae 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -20,17 +20,34 @@ #ifndef _NET_BATMAN_ADV_PACKET_H_ #define _NET_BATMAN_ADV_PACKET_H_ +/** + * enum batadv_packettype - types for batman-adv encapsulated packets + * @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV + * @BATADV_BCAST: broadcast packets carrying broadcast payload + * @BATADV_CODED: network coded packets + * + * @BATADV_UNICAST: unicast packets carrying unicast payload traffic + * @BATADV_UNICAST_FRAG: unicast packets carrying a fragment of the original + * payload packet + * @BATADV_UNICAST_4ADDR: unicast packet including the originator address of + * the sender + * @BATADV_ICMP: unicast packet like IP ICMP used for ping or traceroute + * @BATADV_UNICAST_TVLV: unicast packet carrying TVLV containers + */ enum batadv_packettype { - BATADV_IV_OGM = 0x01, - BATADV_ICMP = 0x02, - BATADV_UNICAST = 0x03, - BATADV_BCAST = 0x04, - BATADV_VIS = 0x05, - BATADV_UNICAST_FRAG = 0x06, - BATADV_TT_QUERY = 0x07, - BATADV_ROAM_ADV = 0x08, - BATADV_UNICAST_4ADDR = 0x09, - BATADV_CODED = 0x0a, + /* 0x00 - 0x3f: local packets or special rules for handling */ + BATADV_IV_OGM = 0x00, + BATADV_BCAST = 0x01, + BATADV_CODED = 0x02, + /* 0x40 - 0x7f: unicast */ +#define BATADV_UNICAST_MIN 0x40 + BATADV_UNICAST = 0x40, + BATADV_UNICAST_FRAG = 0x41, + BATADV_UNICAST_4ADDR = 0x42, + BATADV_ICMP = 0x43, + BATADV_UNICAST_TVLV = 0x44, +#define BATADV_UNICAST_MAX 0x7f + /* 0x80 - 0xff: reserved */ }; /** @@ -48,13 +65,21 @@ enum batadv_subtype { }; /* this file is included by batctl which needs these defines */ -#define BATADV_COMPAT_VERSION 14 +#define BATADV_COMPAT_VERSION 15 +/** + * enum batadv_iv_flags - flags used in B.A.T.M.A.N. IV OGM packets + * @BATADV_NOT_BEST_NEXT_HOP: flag is set when ogm packet is forwarded and was + * previously received from someone else than the best neighbor. + * @BATADV_PRIMARIES_FIRST_HOP: flag is set when the primary interface address + * is used, and the packet travels its first hop. + * @BATADV_DIRECTLINK: flag is for the first hop or if rebroadcasted from a + * one hop neighbor on the interface where it was originally received. + */ enum batadv_iv_flags { - BATADV_NOT_BEST_NEXT_HOP = BIT(3), - BATADV_PRIMARIES_FIRST_HOP = BIT(4), - BATADV_VIS_SERVER = BIT(5), - BATADV_DIRECTLINK = BIT(6), + BATADV_NOT_BEST_NEXT_HOP = BIT(0), + BATADV_PRIMARIES_FIRST_HOP = BIT(1), + BATADV_DIRECTLINK = BIT(2), }; /* ICMP message types */ @@ -66,29 +91,27 @@ enum batadv_icmp_packettype { BATADV_PARAMETER_PROBLEM = 12, }; -/* vis defines */ -enum batadv_vis_packettype { - BATADV_VIS_TYPE_SERVER_SYNC = 0, - BATADV_VIS_TYPE_CLIENT_UPDATE = 1, -}; - /* fragmentation defines */ enum batadv_unicast_frag_flags { BATADV_UNI_FRAG_HEAD = BIT(0), BATADV_UNI_FRAG_LARGETAIL = BIT(1), }; -/* TT_QUERY subtypes */ -#define BATADV_TT_QUERY_TYPE_MASK 0x3 +/* tt data subtypes */ +#define BATADV_TT_DATA_TYPE_MASK 0x0F -enum batadv_tt_query_packettype { - BATADV_TT_REQUEST = 0, - BATADV_TT_RESPONSE = 1, -}; - -/* TT_QUERY flags */ -enum batadv_tt_query_flags { - BATADV_TT_FULL_TABLE = BIT(2), +/** + * enum batadv_tt_data_flags - flags for tt data tvlv + * @BATADV_TT_OGM_DIFF: TT diff propagated through OGM + * @BATADV_TT_REQUEST: TT request message + * @BATADV_TT_RESPONSE: TT response message + * @BATADV_TT_FULL_TABLE: contains full table to replace existing table + */ +enum batadv_tt_data_flags { + BATADV_TT_OGM_DIFF = BIT(0), + BATADV_TT_REQUEST = BIT(1), + BATADV_TT_RESPONSE = BIT(2), + BATADV_TT_FULL_TABLE = BIT(4), }; /* BATADV_TT_CLIENT flags. @@ -99,10 +122,10 @@ enum batadv_tt_client_flags { BATADV_TT_CLIENT_DEL = BIT(0), BATADV_TT_CLIENT_ROAM = BIT(1), BATADV_TT_CLIENT_WIFI = BIT(2), - BATADV_TT_CLIENT_TEMP = BIT(3), BATADV_TT_CLIENT_NOPURGE = BIT(8), BATADV_TT_CLIENT_NEW = BIT(9), BATADV_TT_CLIENT_PENDING = BIT(10), + BATADV_TT_CLIENT_TEMP = BIT(11), }; /* claim frame types for the bridge loop avoidance */ @@ -113,6 +136,22 @@ enum batadv_bla_claimframe { BATADV_CLAIM_TYPE_REQUEST = 0x03, }; +/** + * enum batadv_tvlv_type - tvlv type definitions + * @BATADV_TVLV_GW: gateway tvlv + * @BATADV_TVLV_DAT: distributed arp table tvlv + * @BATADV_TVLV_NC: network coding tvlv + * @BATADV_TVLV_TT: translation table tvlv + * @BATADV_TVLV_ROAM: roaming advertisement tvlv + */ +enum batadv_tvlv_type { + BATADV_TVLV_GW = 0x01, + BATADV_TVLV_DAT = 0x02, + BATADV_TVLV_NC = 0x03, + BATADV_TVLV_TT = 0x04, + BATADV_TVLV_ROAM = 0x05, +}; + /* the destination hardware field in the ARP frame is used to * transport the claim type and the group id */ @@ -131,18 +170,25 @@ struct batadv_header { */ }; +/** + * struct batadv_ogm_packet - ogm (routing protocol) packet + * @header: common batman packet header + * @flags: contains routing relevant flags - see enum batadv_iv_flags + * @tvlv_len: length of tvlv data following the ogm header + */ struct batadv_ogm_packet { struct batadv_header header; - uint8_t flags; /* 0x40: DIRECTLINK flag, 0x20 VIS_SERVER flag... */ + uint8_t flags; __be32 seqno; uint8_t orig[ETH_ALEN]; uint8_t prev_sender[ETH_ALEN]; - uint8_t gw_flags; /* flags related to gateway class */ + uint8_t reserved; uint8_t tq; - uint8_t tt_num_changes; - uint8_t ttvn; /* translation table version number */ - __be16 tt_crc; -} __packed; + __be16 tvlv_len; + /* __packed is not needed as the struct size is divisible by 4, + * and the largest data type in this struct has a size of 4. + */ +}; #define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet) @@ -231,54 +277,6 @@ struct batadv_bcast_packet { #pragma pack() -struct batadv_vis_packet { - struct batadv_header header; - uint8_t vis_type; /* which type of vis-participant sent this? */ - __be32 seqno; /* sequence number */ - uint8_t entries; /* number of entries behind this struct */ - uint8_t reserved; - uint8_t vis_orig[ETH_ALEN]; /* originator reporting its neighbors */ - uint8_t target_orig[ETH_ALEN]; /* who should receive this packet */ - uint8_t sender_orig[ETH_ALEN]; /* who sent or forwarded this packet */ -}; - -struct batadv_tt_query_packet { - struct batadv_header header; - /* the flag field is a combination of: - * - TT_REQUEST or TT_RESPONSE - * - TT_FULL_TABLE - */ - uint8_t flags; - uint8_t dst[ETH_ALEN]; - uint8_t src[ETH_ALEN]; - /* the ttvn field is: - * if TT_REQUEST: ttvn that triggered the - * request - * if TT_RESPONSE: new ttvn for the src - * orig_node - */ - uint8_t ttvn; - /* tt_data field is: - * if TT_REQUEST: crc associated with the - * ttvn - * if TT_RESPONSE: table_size - */ - __be16 tt_data; -} __packed; - -struct batadv_roam_adv_packet { - struct batadv_header header; - uint8_t reserved; - uint8_t dst[ETH_ALEN]; - uint8_t src[ETH_ALEN]; - uint8_t client[ETH_ALEN]; -} __packed; - -struct batadv_tt_change { - uint8_t flags; - uint8_t addr[ETH_ALEN]; -} __packed; - /** * struct batadv_coded_packet - network coded packet * @header: common batman packet header and ttl of first included packet @@ -311,4 +309,82 @@ struct batadv_coded_packet { __be16 coded_len; }; +/** + * struct batadv_unicast_tvlv - generic unicast packet with tvlv payload + * @header: common batman packet header + * @reserved: reserved field (for packet alignment) + * @src: address of the source + * @dst: address of the destination + * @tvlv_len: length of tvlv data following the unicast tvlv header + * @align: 2 bytes to align the header to a 4 byte boundry + */ +struct batadv_unicast_tvlv_packet { + struct batadv_header header; + uint8_t reserved; + uint8_t dst[ETH_ALEN]; + uint8_t src[ETH_ALEN]; + __be16 tvlv_len; + uint16_t align; +}; + +/** + * struct batadv_tvlv_hdr - base tvlv header struct + * @type: tvlv container type (see batadv_tvlv_type) + * @version: tvlv container version + * @len: tvlv container length + */ +struct batadv_tvlv_hdr { + uint8_t type; + uint8_t version; + __be16 len; +}; + +/** + * struct batadv_tvlv_gateway_data - gateway data propagated through gw tvlv + * container + * @bandwidth_down: advertised uplink download bandwidth + * @bandwidth_up: advertised uplink upload bandwidth + */ +struct batadv_tvlv_gateway_data { + __be32 bandwidth_down; + __be32 bandwidth_up; +}; + +/** + * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container + * @flags: translation table flags (see batadv_tt_data_flags) + * @ttvn: translation table version number + * @reserved: field reserved for future use + * @crc: crc32 checksum of the local translation table + */ +struct batadv_tvlv_tt_data { + uint8_t flags; + uint8_t ttvn; + uint16_t reserved; + __be32 crc; +}; + +/** + * struct batadv_tvlv_tt_change - translation table diff data + * @flags: status indicators concerning the non-mesh client (see + * batadv_tt_client_flags) + * @reserved: reserved field + * @addr: mac address of non-mesh client that triggered this tt change + */ +struct batadv_tvlv_tt_change { + uint8_t flags; + uint8_t reserved; + uint8_t addr[ETH_ALEN]; +}; + +/** + * struct batadv_tvlv_roam_adv - roaming advertisement + * @client: mac address of roaming client + * @reserved: field reserved for future use + */ +struct batadv_tvlv_roam_adv { + uint8_t client[ETH_ALEN]; + uint16_t reserved; +}; + #endif /* _NET_BATMAN_ADV_PACKET_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 0439395..457dfef 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -25,7 +25,6 @@ #include "icmp_socket.h" #include "translation-table.h" #include "originator.h" -#include "vis.h" #include "unicast.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" @@ -557,126 +556,6 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, return 0; } -int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if) -{ - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); - struct batadv_tt_query_packet *tt_query; - uint16_t tt_size; - int hdr_size = sizeof(*tt_query); - char tt_flag; - size_t packet_size; - - if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) - return NET_RX_DROP; - - /* I could need to modify it */ - if (skb_cow(skb, sizeof(struct batadv_tt_query_packet)) < 0) - goto out; - - tt_query = (struct batadv_tt_query_packet *)skb->data; - - switch (tt_query->flags & BATADV_TT_QUERY_TYPE_MASK) { - case BATADV_TT_REQUEST: - batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_RX); - - /* If we cannot provide an answer the tt_request is - * forwarded - */ - if (!batadv_send_tt_response(bat_priv, tt_query)) { - if (tt_query->flags & BATADV_TT_FULL_TABLE) - tt_flag = 'F'; - else - tt_flag = '.'; - - batadv_dbg(BATADV_DBG_TT, bat_priv, - "Routing TT_REQUEST to %pM [%c]\n", - tt_query->dst, - tt_flag); - return batadv_route_unicast_packet(skb, recv_if); - } - break; - case BATADV_TT_RESPONSE: - batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_RX); - - if (batadv_is_my_mac(bat_priv, tt_query->dst)) { - /* packet needs to be linearized to access the TT - * changes - */ - if (skb_linearize(skb) < 0) - goto out; - /* skb_linearize() possibly changed skb->data */ - tt_query = (struct batadv_tt_query_packet *)skb->data; - - tt_size = batadv_tt_len(ntohs(tt_query->tt_data)); - - /* Ensure we have all the claimed data */ - packet_size = sizeof(struct batadv_tt_query_packet); - packet_size += tt_size; - if (unlikely(skb_headlen(skb) < packet_size)) - goto out; - - batadv_handle_tt_response(bat_priv, tt_query); - } else { - if (tt_query->flags & BATADV_TT_FULL_TABLE) - tt_flag = 'F'; - else - tt_flag = '.'; - batadv_dbg(BATADV_DBG_TT, bat_priv, - "Routing TT_RESPONSE to %pM [%c]\n", - tt_query->dst, - tt_flag); - return batadv_route_unicast_packet(skb, recv_if); - } - break; - } - -out: - /* returning NET_RX_DROP will make the caller function kfree the skb */ - return NET_RX_DROP; -} - -int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if) -{ - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); - struct batadv_roam_adv_packet *roam_adv_packet; - struct batadv_orig_node *orig_node; - - if (batadv_check_unicast_packet(bat_priv, skb, - sizeof(*roam_adv_packet)) < 0) - goto out; - - batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX); - - roam_adv_packet = (struct batadv_roam_adv_packet *)skb->data; - - if (!batadv_is_my_mac(bat_priv, roam_adv_packet->dst)) - return batadv_route_unicast_packet(skb, recv_if); - - /* check if it is a backbone gateway. we don't accept - * roaming advertisement from it, as it has the same - * entries as we have. - */ - if (batadv_bla_is_backbone_gw_orig(bat_priv, roam_adv_packet->src)) - goto out; - - orig_node = batadv_orig_hash_find(bat_priv, roam_adv_packet->src); - if (!orig_node) - goto out; - - batadv_dbg(BATADV_DBG_TT, bat_priv, - "Received ROAMING_ADV from %pM (client %pM)\n", - roam_adv_packet->src, roam_adv_packet->client); - - batadv_tt_global_add(bat_priv, orig_node, roam_adv_packet->client, - BATADV_TT_CLIENT_ROAM, - atomic_read(&orig_node->last_ttvn) + 1); - - batadv_orig_node_free_ref(orig_node); -out: - /* returning NET_RX_DROP will make the caller function kfree the skb */ - return NET_RX_DROP; -} - /* find a suitable router for this originator, and use * bonding if possible. increases the found neighbors * refcount. @@ -1032,6 +911,34 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, return 1; } +/** + * batadv_recv_unhandled_unicast_packet - receive and process packets which + * are in the unicast number space but not yet known to the implementation + * @skb: unicast tvlv packet to process + * @recv_if: pointer to interface this packet was received on + * + * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP + * otherwise. + */ +int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb, + struct batadv_hard_iface *recv_if) +{ + struct batadv_unicast_packet *unicast_packet; + struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + int check, hdr_size = sizeof(*unicast_packet); + + check = batadv_check_unicast_packet(bat_priv, skb, hdr_size); + if (check < 0) + return NET_RX_DROP; + + /* we don't know about this type, drop it. */ + unicast_packet = (struct batadv_unicast_packet *)skb->data; + if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) + return NET_RX_DROP; + + return batadv_route_unicast_packet(skb, recv_if); +} + int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { @@ -1139,6 +1046,54 @@ rx_success: return batadv_route_unicast_packet(skb, recv_if); } +/** + * batadv_recv_unicast_tvlv - receive and process unicast tvlv packets + * @skb: unicast tvlv packet to process + * @recv_if: pointer to interface this packet was received on + * @dst_addr: the payload destination + * + * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP + * otherwise. + */ +int batadv_recv_unicast_tvlv(struct sk_buff *skb, + struct batadv_hard_iface *recv_if) +{ + struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_unicast_tvlv_packet *unicast_tvlv_packet; + unsigned char *tvlv_buff; + uint16_t tvlv_buff_len; + int hdr_size = sizeof(*unicast_tvlv_packet); + int ret = NET_RX_DROP; + + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) + return NET_RX_DROP; + + /* the header is likely to be modified while forwarding */ + if (skb_cow(skb, hdr_size) < 0) + return NET_RX_DROP; + + /* packet needs to be linearized to access the tvlv content */ + if (skb_linearize(skb) < 0) + return NET_RX_DROP; + + unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)skb->data; + + tvlv_buff = (unsigned char *)(skb->data + hdr_size); + tvlv_buff_len = ntohs(unicast_tvlv_packet->tvlv_len); + + if (tvlv_buff_len > skb->len - hdr_size) + return NET_RX_DROP; + + ret = batadv_tvlv_containers_process(bat_priv, false, NULL, + unicast_tvlv_packet->src, + unicast_tvlv_packet->dst, + tvlv_buff, tvlv_buff_len); + + if (ret != NET_RX_SUCCESS) + ret = batadv_route_unicast_packet(skb, recv_if); + + return ret; +} int batadv_recv_bcast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) @@ -1240,53 +1195,3 @@ out: batadv_orig_node_free_ref(orig_node); return ret; } - -int batadv_recv_vis_packet(struct sk_buff *skb, - struct batadv_hard_iface *recv_if) -{ - struct batadv_vis_packet *vis_packet; - struct ethhdr *ethhdr; - struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); - int hdr_size = sizeof(*vis_packet); - - /* keep skb linear */ - if (skb_linearize(skb) < 0) - return NET_RX_DROP; - - if (unlikely(!pskb_may_pull(skb, hdr_size))) - return NET_RX_DROP; - - vis_packet = (struct batadv_vis_packet *)skb->data; - ethhdr = eth_hdr(skb); - - /* not for me */ - if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) - return NET_RX_DROP; - - /* ignore own packets */ - if (batadv_is_my_mac(bat_priv, vis_packet->vis_orig)) - return NET_RX_DROP; - - if (batadv_is_my_mac(bat_priv, vis_packet->sender_orig)) - return NET_RX_DROP; - - switch (vis_packet->vis_type) { - case BATADV_VIS_TYPE_SERVER_SYNC: - batadv_receive_server_sync_packet(bat_priv, vis_packet, - skb_headlen(skb)); - break; - - case BATADV_VIS_TYPE_CLIENT_UPDATE: - batadv_receive_client_update_packet(bat_priv, vis_packet, - skb_headlen(skb)); - break; - - default: /* ignore unknown packet */ - break; - } - - /* We take a copy of the data in the packet, so we should - * always free the skbuf. - */ - return NET_RX_DROP; -} diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 72a29bd..ea15fa6 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -34,12 +34,14 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); int batadv_recv_bcast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); -int batadv_recv_vis_packet(struct sk_buff *skb, - struct batadv_hard_iface *recv_if); int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if); int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if); +int batadv_recv_unicast_tvlv(struct sk_buff *skb, + struct batadv_hard_iface *recv_if); +int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb, + struct batadv_hard_iface *recv_if); struct batadv_neigh_node * batadv_find_router(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 0266edd..81d69fb 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -24,7 +24,6 @@ #include "translation-table.h" #include "soft-interface.h" #include "hard-interface.h" -#include "vis.h" #include "gateway_common.h" #include "originator.h" #include "network-coding.h" diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 813db4e..25e6004 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -469,10 +469,10 @@ static int batadv_softif_init_late(struct net_device *dev) atomic_set(&bat_priv->distributed_arp_table, 1); #endif atomic_set(&bat_priv->ap_isolation, 0); - atomic_set(&bat_priv->vis_mode, BATADV_VIS_TYPE_CLIENT_UPDATE); atomic_set(&bat_priv->gw_mode, BATADV_GW_MODE_OFF); atomic_set(&bat_priv->gw_sel_class, 20); - atomic_set(&bat_priv->gw_bandwidth, 41); + atomic_set(&bat_priv->gw.bandwidth_down, 100); + atomic_set(&bat_priv->gw.bandwidth_up, 20); atomic_set(&bat_priv->orig_interval, 1000); atomic_set(&bat_priv->hop_penalty, 30); #ifdef CONFIG_BATMAN_ADV_DEBUG diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 4114b96..869eb46 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -21,11 +21,11 @@ #include "sysfs.h" #include "translation-table.h" #include "distributed-arp-table.h" +#include "network-coding.h" #include "originator.h" #include "hard-interface.h" #include "gateway_common.h" #include "gateway_client.h" -#include "vis.h" static struct net_device *batadv_kobj_to_netdev(struct kobject *obj) { @@ -230,74 +230,6 @@ __batadv_store_uint_attr(const char *buff, size_t count, return ret; } -static ssize_t batadv_show_vis_mode(struct kobject *kobj, - struct attribute *attr, char *buff) -{ - struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); - int vis_mode = atomic_read(&bat_priv->vis_mode); - const char *mode; - - if (vis_mode == BATADV_VIS_TYPE_CLIENT_UPDATE) - mode = "client"; - else - mode = "server"; - - return sprintf(buff, "%s\n", mode); -} - -static ssize_t batadv_store_vis_mode(struct kobject *kobj, - struct attribute *attr, char *buff, - size_t count) -{ - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); - struct batadv_priv *bat_priv = netdev_priv(net_dev); - unsigned long val; - int ret, vis_mode_tmp = -1; - const char *old_mode, *new_mode; - - ret = kstrtoul(buff, 10, &val); - - if (((count == 2) && (!ret) && - (val == BATADV_VIS_TYPE_CLIENT_UPDATE)) || - (strncmp(buff, "client", 6) == 0) || - (strncmp(buff, "off", 3) == 0)) - vis_mode_tmp = BATADV_VIS_TYPE_CLIENT_UPDATE; - - if (((count == 2) && (!ret) && - (val == BATADV_VIS_TYPE_SERVER_SYNC)) || - (strncmp(buff, "server", 6) == 0)) - vis_mode_tmp = BATADV_VIS_TYPE_SERVER_SYNC; - - if (vis_mode_tmp < 0) { - if (buff[count - 1] == '\n') - buff[count - 1] = '\0'; - - batadv_info(net_dev, - "Invalid parameter for 'vis mode' setting received: %s\n", - buff); - return -EINVAL; - } - - if (atomic_read(&bat_priv->vis_mode) == vis_mode_tmp) - return count; - - if (atomic_read(&bat_priv->vis_mode) == BATADV_VIS_TYPE_CLIENT_UPDATE) - old_mode = "client"; - else - old_mode = "server"; - - if (vis_mode_tmp == BATADV_VIS_TYPE_CLIENT_UPDATE) - new_mode = "client"; - else - new_mode = "server"; - - batadv_info(net_dev, "Changing vis mode from: %s to: %s\n", old_mode, - new_mode); - - atomic_set(&bat_priv->vis_mode, (unsigned int)vis_mode_tmp); - return count; -} - static ssize_t batadv_show_bat_algo(struct kobject *kobj, struct attribute *attr, char *buff) { @@ -390,6 +322,7 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, */ batadv_gw_check_client_stop(bat_priv); atomic_set(&bat_priv->gw_mode, (unsigned int)gw_mode_tmp); + batadv_gw_tvlv_container_update(bat_priv); return count; } @@ -397,15 +330,13 @@ static ssize_t batadv_show_gw_bwidth(struct kobject *kobj, struct attribute *attr, char *buff) { struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); - int down, up; - int gw_bandwidth = atomic_read(&bat_priv->gw_bandwidth); - - batadv_gw_bandwidth_to_kbit(gw_bandwidth, &down, &up); - return sprintf(buff, "%i%s/%i%s\n", - (down > 2048 ? down / 1024 : down), - (down > 2048 ? "MBit" : "KBit"), - (up > 2048 ? up / 1024 : up), - (up > 2048 ? "MBit" : "KBit")); + uint32_t down, up; + + down = atomic_read(&bat_priv->gw.bandwidth_down); + up = atomic_read(&bat_priv->gw.bandwidth_up); + + return sprintf(buff, "%u.%u/%u.%u MBit\n", down / 10, + down % 10, up / 10, up % 10); } static ssize_t batadv_store_gw_bwidth(struct kobject *kobj, @@ -426,12 +357,11 @@ BATADV_ATTR_SIF_BOOL(bonding, S_IRUGO | S_IWUSR, NULL); BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR, NULL); #endif #ifdef CONFIG_BATMAN_ADV_DAT -BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR, NULL); +BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR, + batadv_dat_status_update); #endif BATADV_ATTR_SIF_BOOL(fragmentation, S_IRUGO | S_IWUSR, batadv_update_min_mtu); BATADV_ATTR_SIF_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL); -static BATADV_ATTR(vis_mode, S_IRUGO | S_IWUSR, batadv_show_vis_mode, - batadv_store_vis_mode); static BATADV_ATTR(routing_algo, S_IRUGO, batadv_show_bat_algo, NULL); static BATADV_ATTR(gw_mode, S_IRUGO | S_IWUSR, batadv_show_gw_mode, batadv_store_gw_mode); @@ -447,7 +377,8 @@ static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth, BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL); #endif #ifdef CONFIG_BATMAN_ADV_NC -BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR, NULL); +BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR, + batadv_nc_status_update); #endif static struct batadv_attribute *batadv_mesh_attrs[] = { @@ -461,7 +392,6 @@ static struct batadv_attribute *batadv_mesh_attrs[] = { #endif &batadv_attr_fragmentation, &batadv_attr_ap_isolation, - &batadv_attr_vis_mode, &batadv_attr_routing_algo, &batadv_attr_gw_mode, &batadv_attr_orig_interval, diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 34510f3..c741694 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -27,7 +27,7 @@ #include "routing.h" #include "bridge_loop_avoidance.h" -#include <linux/crc16.h> +#include <linux/crc32c.h> /* hash class keys */ static struct lock_class_key batadv_tt_local_hash_lock_class_key; @@ -180,11 +180,11 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, bool del_op_requested, del_op_entry; tt_change_node = kmalloc(sizeof(*tt_change_node), GFP_ATOMIC); - if (!tt_change_node) return; tt_change_node->change.flags = flags; + tt_change_node->change.reserved = 0; memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN); del_op_requested = flags & BATADV_TT_CLIENT_DEL; @@ -229,9 +229,15 @@ unlock: atomic_inc(&bat_priv->tt.local_changes); } -int batadv_tt_len(int changes_num) +/** + * batadv_tt_len - compute length in bytes of given number of tt changes + * @changes_num: number of tt changes + * + * Returns computed length in bytes. + */ +static int batadv_tt_len(int changes_num) { - return changes_num * sizeof(struct batadv_tt_change); + return changes_num * sizeof(struct batadv_tvlv_tt_change); } static int batadv_tt_local_init(struct batadv_priv *bat_priv) @@ -376,71 +382,52 @@ out: batadv_tt_global_entry_free_ref(tt_global); } -static void batadv_tt_realloc_packet_buff(unsigned char **packet_buff, - int *packet_buff_len, - int min_packet_len, - int new_packet_len) -{ - unsigned char *new_buff; - - new_buff = kmalloc(new_packet_len, GFP_ATOMIC); - - /* keep old buffer if kmalloc should fail */ - if (new_buff) { - memcpy(new_buff, *packet_buff, min_packet_len); - kfree(*packet_buff); - *packet_buff = new_buff; - *packet_buff_len = new_packet_len; - } -} - -static void batadv_tt_prepare_packet_buff(struct batadv_priv *bat_priv, - unsigned char **packet_buff, - int *packet_buff_len, - int min_packet_len) +/** + * batadv_tt_tvlv_container_update - update the translation table tvlv container + * after local tt changes have been committed + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) { - int req_len; + struct batadv_tt_change_node *entry, *safe; + struct batadv_tvlv_tt_data *tt_data; + struct batadv_tvlv_tt_change *tt_change; + int tt_diff_len = 0, tt_change_len = 0; + int tt_diff_entries_num = 0, tt_diff_entries_count = 0; - req_len = min_packet_len; - req_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes)); + tt_diff_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes)); /* if we have too many changes for one packet don't send any * and wait for the tt table request which will be fragmented */ - if (req_len > bat_priv->soft_iface->mtu) - req_len = min_packet_len; + if (tt_diff_len > bat_priv->soft_iface->mtu) + tt_diff_len = 0; - batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len, - min_packet_len, req_len); -} - -static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv, - unsigned char **packet_buff, - int *packet_buff_len, - int min_packet_len) -{ - struct batadv_tt_change_node *entry, *safe; - int count = 0, tot_changes = 0, new_len; - unsigned char *tt_buff; + tt_data = kzalloc(sizeof(*tt_data) + tt_diff_len, GFP_ATOMIC); + if (!tt_data) + return; - batadv_tt_prepare_packet_buff(bat_priv, packet_buff, - packet_buff_len, min_packet_len); + tt_data->flags = BATADV_TT_OGM_DIFF; + tt_data->ttvn = atomic_read(&bat_priv->tt.vn); + tt_data->crc = htonl(bat_priv->tt.local_crc); - new_len = *packet_buff_len - min_packet_len; - tt_buff = *packet_buff + min_packet_len; + if (tt_diff_len == 0) + goto container_register; - if (new_len > 0) - tot_changes = new_len / batadv_tt_len(1); + tt_diff_entries_num = tt_diff_len / batadv_tt_len(1); spin_lock_bh(&bat_priv->tt.changes_list_lock); atomic_set(&bat_priv->tt.local_changes, 0); + tt_change = (struct batadv_tvlv_tt_change *)(tt_data + 1); + list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { - if (count < tot_changes) { - memcpy(tt_buff + batadv_tt_len(count), - &entry->change, sizeof(struct batadv_tt_change)); - count++; + if (tt_diff_entries_count < tt_diff_entries_num) { + memcpy(tt_change + tt_diff_entries_count, + &entry->change, + sizeof(struct batadv_tvlv_tt_change)); + tt_diff_entries_count++; } list_del(&entry->list); kfree(entry); @@ -452,20 +439,25 @@ static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv, kfree(bat_priv->tt.last_changeset); bat_priv->tt.last_changeset_len = 0; bat_priv->tt.last_changeset = NULL; + tt_change_len = batadv_tt_len(tt_diff_entries_count); /* check whether this new OGM has no changes due to size problems */ - if (new_len > 0) { + if (tt_diff_entries_count > 0) { /* if kmalloc() fails we will reply with the full table * instead of providing the diff */ - bat_priv->tt.last_changeset = kmalloc(new_len, GFP_ATOMIC); + bat_priv->tt.last_changeset = kzalloc(tt_diff_len, GFP_ATOMIC); if (bat_priv->tt.last_changeset) { - memcpy(bat_priv->tt.last_changeset, tt_buff, new_len); - bat_priv->tt.last_changeset_len = new_len; + memcpy(bat_priv->tt.last_changeset, + tt_change, tt_change_len); + bat_priv->tt.last_changeset_len = tt_diff_len; } } spin_unlock_bh(&bat_priv->tt.last_changeset_lock); - return count; +container_register: + batadv_tvlv_container_register(bat_priv, BATADV_TVLV_TT, 1, tt_data, + sizeof(*tt_data) + tt_change_len); + kfree(tt_data); } int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) @@ -489,7 +481,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) goto out; seq_printf(seq, - "Locally retrieved addresses (from %s) announced via TT (TTVN: %u CRC: %#.4x):\n", + "Locally retrieved addresses (from %s) announced via TT (TTVN: %u CRC: %#.8x):\n", net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn), bat_priv->tt.local_crc); seq_printf(seq, " %-13s %-7s %-10s\n", "Client", "Flags", @@ -1001,7 +993,7 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry, if (best_entry) { last_ttvn = atomic_read(&best_entry->orig_node->last_ttvn); seq_printf(seq, - " %c %pM (%3u) via %pM (%3u) (%#.4x) [%c%c%c]\n", + " %c %pM (%3u) via %pM (%3u) (%#.8x) [%c%c%c]\n", '*', tt_global_entry->common.addr, best_entry->ttvn, best_entry->orig_node->orig, last_ttvn, best_entry->orig_node->tt_crc, @@ -1045,7 +1037,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Globally announced TT entries received via the mesh %s\n", net_dev->name); - seq_printf(seq, " %-13s %s %-15s %s (%-6s) %s\n", + seq_printf(seq, " %-13s %s %-15s %s (%-10s) %s\n", "Client", "(TTVN)", "Originator", "(Curr TTVN)", "CRC", "Flags"); @@ -1402,17 +1394,19 @@ out: return orig_node; } -/* Calculates the checksum of the local table of a given orig_node */ -static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv, +/** + * batadv_tt_global_crc - calculates the checksum of the local table belonging + * to the given orig_node + * @bat_priv: the bat priv with all the soft interface information + */ +static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { - uint16_t total = 0, total_one; struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_global_entry *tt_global; struct hlist_head *head; - uint32_t i; - int j; + uint32_t i, crc = 0; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1443,27 +1437,24 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv, orig_node)) continue; - total_one = 0; - for (j = 0; j < ETH_ALEN; j++) - total_one = crc16_byte(total_one, - tt_common->addr[j]); - total ^= total_one; + crc ^= crc32c(0, tt_common->addr, ETH_ALEN); } rcu_read_unlock(); } - return total; + return crc; } -/* Calculates the checksum of the local table */ -static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv) +/** + * batadv_tt_local_crc - calculates the checksum of the local table + * @bat_priv: the bat priv with all the soft interface information + */ +static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv) { - uint16_t total = 0, total_one; struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct hlist_head *head; - uint32_t i; - int j; + uint32_t i, crc = 0; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1475,16 +1466,13 @@ static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv) */ if (tt_common->flags & BATADV_TT_CLIENT_NEW) continue; - total_one = 0; - for (j = 0; j < ETH_ALEN; j++) - total_one = crc16_byte(total_one, - tt_common->addr[j]); - total ^= total_one; + + crc ^= crc32c(0, tt_common->addr, ETH_ALEN); } rcu_read_unlock(); } - return total; + return crc; } static void batadv_tt_req_list_free(struct batadv_priv *bat_priv) @@ -1504,7 +1492,7 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv) static void batadv_tt_save_orig_buffer(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, const unsigned char *tt_buff, - uint8_t tt_num_changes) + uint16_t tt_num_changes) { uint16_t tt_buff_len = batadv_tt_len(tt_num_changes); @@ -1569,9 +1557,14 @@ unlock: return tt_req_node; } -/* data_ptr is useless here, but has to be kept to respect the prototype */ -static int batadv_tt_local_valid_entry(const void *entry_ptr, - const void *data_ptr) +/** + * batadv_tt_local_valid - verify that given tt entry is a valid one + * @entry_ptr: to be checked local tt entry + * @data_ptr: not used but definition required to satisfy the callback prototype + * + * Returns 1 if the entry is a valid, 0 otherwise. + */ +static int batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; @@ -1598,41 +1591,45 @@ static int batadv_tt_global_valid(const void *entry_ptr, return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node); } -static struct sk_buff * -batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, - struct batadv_hashtable *hash, - struct batadv_priv *bat_priv, - int (*valid_cb)(const void *, const void *), - void *cb_data) +/** + * batadv_tt_tvlv_generate - creates tvlv tt data buffer to fill it with the + * tt entries from the specified tt hash + * @bat_priv: the bat priv with all the soft interface information + * @hash: hash table containing the tt entries + * @tt_len: expected tvlv tt data buffer length in number of bytes + * @valid_cb: function to filter tt change entries + * @cb_data: data passed to the filter function as argument + * + * Returns pointer to allocated tvlv tt data buffer if operation was + * successful or NULL otherwise. + */ +static struct batadv_tvlv_tt_data * +batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, + struct batadv_hashtable *hash, uint16_t tt_len, + int (*valid_cb)(const void *, const void *), + void *cb_data) { struct batadv_tt_common_entry *tt_common_entry; - struct batadv_tt_query_packet *tt_response; - struct batadv_tt_change *tt_change; + struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; + struct batadv_tvlv_tt_change *tt_change; struct hlist_head *head; - struct sk_buff *skb = NULL; - uint16_t tt_tot, tt_count; - ssize_t tt_query_size = sizeof(struct batadv_tt_query_packet); + uint16_t tt_tot, tt_num_entries = 0; + ssize_t tvlv_tt_size = sizeof(struct batadv_tvlv_tt_data); uint32_t i; - size_t len; - if (tt_query_size + tt_len > bat_priv->soft_iface->mtu) { - tt_len = bat_priv->soft_iface->mtu - tt_query_size; - tt_len -= tt_len % sizeof(struct batadv_tt_change); + if (tvlv_tt_size + tt_len > bat_priv->soft_iface->mtu) { + tt_len = bat_priv->soft_iface->mtu - tvlv_tt_size; + tt_len -= tt_len % sizeof(struct batadv_tvlv_tt_change); } - tt_tot = tt_len / sizeof(struct batadv_tt_change); - len = tt_query_size + tt_len; - skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); - if (!skb) - goto out; + tt_tot = tt_len / sizeof(struct batadv_tvlv_tt_change); - skb->priority = TC_PRIO_CONTROL; - skb_reserve(skb, ETH_HLEN); - tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len); - tt_response->ttvn = ttvn; + tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data) + tt_len, + GFP_ATOMIC); + if (!tvlv_tt_data) + goto out; - tt_change = (struct batadv_tt_change *)(skb->data + tt_query_size); - tt_count = 0; + tt_change = (struct batadv_tvlv_tt_change *)(tvlv_tt_data + 1); rcu_read_lock(); for (i = 0; i < hash->size; i++) { @@ -1640,7 +1637,7 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { - if (tt_count == tt_tot) + if (tt_tot == tt_num_entries) break; if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data))) @@ -1649,33 +1646,36 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, memcpy(tt_change->addr, tt_common_entry->addr, ETH_ALEN); tt_change->flags = tt_common_entry->flags; + tt_change->reserved = 0; - tt_count++; + tt_num_entries++; tt_change++; } } rcu_read_unlock(); - /* store in the message the number of entries we have successfully - * copied - */ - tt_response->tt_data = htons(tt_count); - out: - return skb; + return tvlv_tt_data; } +/** + * batadv_send_tt_request - send a TT Request message to a given node + * @bat_priv: the bat priv with all the soft interface information + * @dst_orig_node: the destination of the message + * @ttvn: the version number that the source of the message is looking for + * @tt_crc: the CRC associated with the version number + * @full_table: ask for the entire translation table if true, while only for the + * last TT diff otherwise + */ static int batadv_send_tt_request(struct batadv_priv *bat_priv, struct batadv_orig_node *dst_orig_node, - uint8_t ttvn, uint16_t tt_crc, + uint8_t ttvn, uint32_t tt_crc, bool full_table) { - struct sk_buff *skb = NULL; - struct batadv_tt_query_packet *tt_request; + struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; struct batadv_hard_iface *primary_if; struct batadv_tt_req_node *tt_req_node = NULL; - int ret = 1; - size_t tt_req_len; + bool ret = false; primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) @@ -1688,157 +1688,136 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv, if (!tt_req_node) goto out; - skb = netdev_alloc_skb_ip_align(NULL, sizeof(*tt_request) + ETH_HLEN); - if (!skb) + tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data), GFP_ATOMIC); + if (!tvlv_tt_data) goto out; - skb->priority = TC_PRIO_CONTROL; - skb_reserve(skb, ETH_HLEN); - - tt_req_len = sizeof(*tt_request); - tt_request = (struct batadv_tt_query_packet *)skb_put(skb, tt_req_len); - - tt_request->header.packet_type = BATADV_TT_QUERY; - tt_request->header.version = BATADV_COMPAT_VERSION; - memcpy(tt_request->src, primary_if->net_dev->dev_addr, ETH_ALEN); - memcpy(tt_request->dst, dst_orig_node->orig, ETH_ALEN); - tt_request->header.ttl = BATADV_TTL; - tt_request->ttvn = ttvn; - tt_request->tt_data = htons(tt_crc); - tt_request->flags = BATADV_TT_REQUEST; + tvlv_tt_data->flags = BATADV_TT_REQUEST; + tvlv_tt_data->ttvn = ttvn; + tvlv_tt_data->crc = htonl(tt_crc); if (full_table) - tt_request->flags |= BATADV_TT_FULL_TABLE; + tvlv_tt_data->flags |= BATADV_TT_FULL_TABLE; batadv_dbg(BATADV_DBG_TT, bat_priv, "Sending TT_REQUEST to %pM [%c]\n", - dst_orig_node->orig, (full_table ? 'F' : '.')); + dst_orig_node->orig, full_table ? 'F' : '.'); batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_TX); - - if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL) != NET_XMIT_DROP) - ret = 0; + batadv_tvlv_unicast_send(bat_priv, primary_if->net_dev->dev_addr, + dst_orig_node->orig, BATADV_TVLV_TT, 1, + tvlv_tt_data, sizeof(*tvlv_tt_data)); + ret = true; out: if (primary_if) batadv_hardif_free_ref(primary_if); - if (ret) - kfree_skb(skb); if (ret && tt_req_node) { spin_lock_bh(&bat_priv->tt.req_list_lock); list_del(&tt_req_node->list); spin_unlock_bh(&bat_priv->tt.req_list_lock); kfree(tt_req_node); } + kfree(tvlv_tt_data); return ret; } -static bool -batadv_send_other_tt_response(struct batadv_priv *bat_priv, - struct batadv_tt_query_packet *tt_request) +/** + * batadv_send_other_tt_response - send reply to tt request concerning another + * node's translation table + * @bat_priv: the bat priv with all the soft interface information + * @tt_data: tt data containing the tt request information + * @req_src: mac address of tt request sender + * @req_dst: mac address of tt request recipient + * + * Returns true if tt request reply was sent, false otherwise. + */ +static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, + struct batadv_tvlv_tt_data *tt_data, + uint8_t *req_src, uint8_t *req_dst) { struct batadv_orig_node *req_dst_orig_node; struct batadv_orig_node *res_dst_orig_node = NULL; - uint8_t orig_ttvn, req_ttvn, ttvn; - int res, ret = false; - unsigned char *tt_buff; - bool full_table; - uint16_t tt_len, tt_tot; - struct sk_buff *skb = NULL; - struct batadv_tt_query_packet *tt_response; - uint8_t *packet_pos; - size_t len; + struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; + uint8_t orig_ttvn, req_ttvn; + uint16_t tt_len; + bool ret = false, full_table; batadv_dbg(BATADV_DBG_TT, bat_priv, "Received TT_REQUEST from %pM for ttvn: %u (%pM) [%c]\n", - tt_request->src, tt_request->ttvn, tt_request->dst, - (tt_request->flags & BATADV_TT_FULL_TABLE ? 'F' : '.')); + req_src, tt_data->ttvn, req_dst, + (tt_data->flags & BATADV_TT_FULL_TABLE ? 'F' : '.')); /* Let's get the orig node of the REAL destination */ - req_dst_orig_node = batadv_orig_hash_find(bat_priv, tt_request->dst); + req_dst_orig_node = batadv_orig_hash_find(bat_priv, req_dst); if (!req_dst_orig_node) goto out; - res_dst_orig_node = batadv_orig_hash_find(bat_priv, tt_request->src); + res_dst_orig_node = batadv_orig_hash_find(bat_priv, req_src); if (!res_dst_orig_node) goto out; orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); - req_ttvn = tt_request->ttvn; + req_ttvn = tt_data->ttvn; - /* I don't have the requested data */ + /* this node doesn't have the requested data */ if (orig_ttvn != req_ttvn || - tt_request->tt_data != htons(req_dst_orig_node->tt_crc)) + tt_data->crc != htonl(req_dst_orig_node->tt_crc)) goto out; /* If the full table has been explicitly requested */ - if (tt_request->flags & BATADV_TT_FULL_TABLE || + if (tt_data->flags & BATADV_TT_FULL_TABLE || !req_dst_orig_node->tt_buff) full_table = true; else full_table = false; - /* In this version, fragmentation is not implemented, then - * I'll send only one packet with as much TT entries as I can + /* TT fragmentation hasn't been implemented yet, so send as many + * TT entries fit a single packet as possible only */ if (!full_table) { spin_lock_bh(&req_dst_orig_node->tt_buff_lock); tt_len = req_dst_orig_node->tt_buff_len; - tt_tot = tt_len / sizeof(struct batadv_tt_change); - len = sizeof(*tt_response) + tt_len; - skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); - if (!skb) + tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data) + tt_len, + GFP_ATOMIC); + if (!tvlv_tt_data) goto unlock; - skb->priority = TC_PRIO_CONTROL; - skb_reserve(skb, ETH_HLEN); - packet_pos = skb_put(skb, len); - tt_response = (struct batadv_tt_query_packet *)packet_pos; - tt_response->ttvn = req_ttvn; - tt_response->tt_data = htons(tt_tot); - - tt_buff = skb->data + sizeof(*tt_response); /* Copy the last orig_node's OGM buffer */ - memcpy(tt_buff, req_dst_orig_node->tt_buff, + memcpy(tvlv_tt_data + 1, req_dst_orig_node->tt_buff, req_dst_orig_node->tt_buff_len); - spin_unlock_bh(&req_dst_orig_node->tt_buff_lock); } else { tt_len = (uint16_t)atomic_read(&req_dst_orig_node->tt_size); - tt_len *= sizeof(struct batadv_tt_change); - ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); - - skb = batadv_tt_response_fill_table(tt_len, ttvn, - bat_priv->tt.global_hash, - bat_priv, - batadv_tt_global_valid, - req_dst_orig_node); - if (!skb) + tt_len = batadv_tt_len(tt_len); + + tvlv_tt_data = batadv_tt_tvlv_generate(bat_priv, + bat_priv->tt.global_hash, + tt_len, + batadv_tt_global_valid, + req_dst_orig_node); + if (!tvlv_tt_data) goto out; - - tt_response = (struct batadv_tt_query_packet *)skb->data; } - tt_response->header.packet_type = BATADV_TT_QUERY; - tt_response->header.version = BATADV_COMPAT_VERSION; - tt_response->header.ttl = BATADV_TTL; - memcpy(tt_response->src, req_dst_orig_node->orig, ETH_ALEN); - memcpy(tt_response->dst, tt_request->src, ETH_ALEN); - tt_response->flags = BATADV_TT_RESPONSE; + tvlv_tt_data->flags = BATADV_TT_RESPONSE; + tvlv_tt_data->ttvn = req_ttvn; if (full_table) - tt_response->flags |= BATADV_TT_FULL_TABLE; + tvlv_tt_data->flags |= BATADV_TT_FULL_TABLE; batadv_dbg(BATADV_DBG_TT, bat_priv, - "Sending TT_RESPONSE %pM for %pM (ttvn: %u)\n", - res_dst_orig_node->orig, req_dst_orig_node->orig, req_ttvn); + "Sending TT_RESPONSE %pM for %pM [%c] (ttvn: %u)\n", + res_dst_orig_node->orig, req_dst_orig_node->orig, + full_table ? 'F' : '.', req_ttvn); batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); - res = batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL); - if (res != NET_XMIT_DROP) - ret = true; + batadv_tvlv_unicast_send(bat_priv, req_dst_orig_node->orig, + req_src, BATADV_TVLV_TT, 1, + tvlv_tt_data, sizeof(*tvlv_tt_data) + tt_len); + ret = true; goto out; unlock: @@ -1849,37 +1828,40 @@ out: batadv_orig_node_free_ref(res_dst_orig_node); if (req_dst_orig_node) batadv_orig_node_free_ref(req_dst_orig_node); - if (!ret) - kfree_skb(skb); + kfree(tvlv_tt_data); return ret; } -static bool -batadv_send_my_tt_response(struct batadv_priv *bat_priv, - struct batadv_tt_query_packet *tt_request) +/** + * batadv_send_my_tt_response - send reply to tt request concerning this node's + * translation table + * @bat_priv: the bat priv with all the soft interface information + * @tt_data: tt data containing the tt request information + * @req_src: mac address of tt request sender + * + * Returns true if tt request reply was sent, false otherwise. + */ +static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, + struct batadv_tvlv_tt_data *tt_data, + uint8_t *req_src) { + struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; struct batadv_orig_node *orig_node; struct batadv_hard_iface *primary_if = NULL; - uint8_t my_ttvn, req_ttvn, ttvn; - int ret = false; - unsigned char *tt_buff; + uint8_t my_ttvn, req_ttvn; bool full_table; - uint16_t tt_len, tt_tot; - struct sk_buff *skb = NULL; - struct batadv_tt_query_packet *tt_response; - uint8_t *packet_pos; - size_t len; + uint16_t tt_len; batadv_dbg(BATADV_DBG_TT, bat_priv, "Received TT_REQUEST from %pM for ttvn: %u (me) [%c]\n", - tt_request->src, tt_request->ttvn, - (tt_request->flags & BATADV_TT_FULL_TABLE ? 'F' : '.')); + req_src, tt_data->ttvn, + (tt_data->flags & BATADV_TT_FULL_TABLE ? 'F' : '.')); my_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); - req_ttvn = tt_request->ttvn; + req_ttvn = tt_data->ttvn; - orig_node = batadv_orig_hash_find(bat_priv, tt_request->src); + orig_node = batadv_orig_hash_find(bat_priv, req_src); if (!orig_node) goto out; @@ -1890,71 +1872,58 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, /* If the full table has been explicitly requested or the gap * is too big send the whole local translation table */ - if (tt_request->flags & BATADV_TT_FULL_TABLE || my_ttvn != req_ttvn || + if (tt_data->flags & BATADV_TT_FULL_TABLE || my_ttvn != req_ttvn || !bat_priv->tt.last_changeset) full_table = true; else full_table = false; - /* In this version, fragmentation is not implemented, then - * I'll send only one packet with as much TT entries as I can + /* TT fragmentation hasn't been implemented yet, so send as many + * TT entries fit a single packet as possible only */ if (!full_table) { spin_lock_bh(&bat_priv->tt.last_changeset_lock); tt_len = bat_priv->tt.last_changeset_len; - tt_tot = tt_len / sizeof(struct batadv_tt_change); - len = sizeof(*tt_response) + tt_len; - skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); - if (!skb) + tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data) + tt_len, + GFP_ATOMIC); + if (!tvlv_tt_data) goto unlock; - skb->priority = TC_PRIO_CONTROL; - skb_reserve(skb, ETH_HLEN); - packet_pos = skb_put(skb, len); - tt_response = (struct batadv_tt_query_packet *)packet_pos; - tt_response->ttvn = req_ttvn; - tt_response->tt_data = htons(tt_tot); - - tt_buff = skb->data + sizeof(*tt_response); - memcpy(tt_buff, bat_priv->tt.last_changeset, + /* Copy the last orig_node's OGM buffer */ + memcpy(tvlv_tt_data + 1, bat_priv->tt.last_changeset, bat_priv->tt.last_changeset_len); spin_unlock_bh(&bat_priv->tt.last_changeset_lock); } else { tt_len = (uint16_t)atomic_read(&bat_priv->tt.local_entry_num); - tt_len *= sizeof(struct batadv_tt_change); - ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); - - skb = batadv_tt_response_fill_table(tt_len, ttvn, - bat_priv->tt.local_hash, - bat_priv, - batadv_tt_local_valid_entry, - NULL); - if (!skb) + tt_len = batadv_tt_len(tt_len); + req_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); + + tvlv_tt_data = batadv_tt_tvlv_generate(bat_priv, + bat_priv->tt.local_hash, + tt_len, + batadv_tt_local_valid, + NULL); + if (!tvlv_tt_data) goto out; - - tt_response = (struct batadv_tt_query_packet *)skb->data; } - tt_response->header.packet_type = BATADV_TT_QUERY; - tt_response->header.version = BATADV_COMPAT_VERSION; - tt_response->header.ttl = BATADV_TTL; - memcpy(tt_response->src, primary_if->net_dev->dev_addr, ETH_ALEN); - memcpy(tt_response->dst, tt_request->src, ETH_ALEN); - tt_response->flags = BATADV_TT_RESPONSE; + tvlv_tt_data->flags = BATADV_TT_RESPONSE; + tvlv_tt_data->ttvn = req_ttvn; if (full_table) - tt_response->flags |= BATADV_TT_FULL_TABLE; + tvlv_tt_data->flags |= BATADV_TT_FULL_TABLE; batadv_dbg(BATADV_DBG_TT, bat_priv, - "Sending TT_RESPONSE to %pM [%c]\n", - orig_node->orig, - (tt_response->flags & BATADV_TT_FULL_TABLE ? 'F' : '.')); + "Sending TT_RESPONSE to %pM [%c] (ttvn: %u)\n", + orig_node->orig, full_table ? 'F' : '.', req_ttvn); batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); - if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) - ret = true; + batadv_tvlv_unicast_send(bat_priv, primary_if->net_dev->dev_addr, + req_src, BATADV_TVLV_TT, 1, + tvlv_tt_data, sizeof(*tvlv_tt_data) + tt_len); + goto out; unlock: @@ -1964,29 +1933,39 @@ out: batadv_orig_node_free_ref(orig_node); if (primary_if) batadv_hardif_free_ref(primary_if); - if (!ret) - kfree_skb(skb); - /* This packet was for me, so it doesn't need to be re-routed */ + kfree(tvlv_tt_data); + /* The packet was for this host, so it doesn't need to be re-routed */ return true; } -bool batadv_send_tt_response(struct batadv_priv *bat_priv, - struct batadv_tt_query_packet *tt_request) +/** + * batadv_send_tt_response - send reply to tt request + * @bat_priv: the bat priv with all the soft interface information + * @tt_data: tt data containing the tt request information + * @req_src: mac address of tt request sender + * @req_dst: mac address of tt request recipient + * + * Returns true if tt request reply was sent, false otherwise. + */ +static bool batadv_send_tt_response(struct batadv_priv *bat_priv, + struct batadv_tvlv_tt_data *tt_data, + uint8_t *req_src, uint8_t *req_dst) { - if (batadv_is_my_mac(bat_priv, tt_request->dst)) { + if (batadv_is_my_mac(bat_priv, req_dst)) { /* don't answer backbone gws! */ - if (batadv_bla_is_backbone_gw_orig(bat_priv, tt_request->src)) + if (batadv_bla_is_backbone_gw_orig(bat_priv, req_src)) return true; - return batadv_send_my_tt_response(bat_priv, tt_request); + return batadv_send_my_tt_response(bat_priv, tt_data, req_src); } else { - return batadv_send_other_tt_response(bat_priv, tt_request); + return batadv_send_other_tt_response(bat_priv, tt_data, + req_src, req_dst); } } static void _batadv_tt_update_changes(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - struct batadv_tt_change *tt_change, + struct batadv_tvlv_tt_change *tt_change, uint16_t tt_num_changes, uint8_t ttvn) { int i; @@ -2016,11 +1995,12 @@ static void _batadv_tt_update_changes(struct batadv_priv *bat_priv, } static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv, - struct batadv_tt_query_packet *tt_response) + struct batadv_tvlv_tt_data *tt_data, + uint8_t *resp_src, uint16_t num_entries) { struct batadv_orig_node *orig_node; - orig_node = batadv_orig_hash_find(bat_priv, tt_response->src); + orig_node = batadv_orig_hash_find(bat_priv, resp_src); if (!orig_node) goto out; @@ -2028,9 +2008,8 @@ static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv, batadv_tt_global_del_orig(bat_priv, orig_node, "Received full table"); _batadv_tt_update_changes(bat_priv, orig_node, - (struct batadv_tt_change *)(tt_response + 1), - ntohs(tt_response->tt_data), - tt_response->ttvn); + (struct batadv_tvlv_tt_change *)(tt_data + 1), + num_entries, tt_data->ttvn); spin_lock_bh(&orig_node->tt_buff_lock); kfree(orig_node->tt_buff); @@ -2038,7 +2017,7 @@ static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv, orig_node->tt_buff = NULL; spin_unlock_bh(&orig_node->tt_buff_lock); - atomic_set(&orig_node->last_ttvn, tt_response->ttvn); + atomic_set(&orig_node->last_ttvn, tt_data->ttvn); out: if (orig_node) @@ -2048,7 +2027,7 @@ out: static void batadv_tt_update_changes(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, uint16_t tt_num_changes, uint8_t ttvn, - struct batadv_tt_change *tt_change) + struct batadv_tvlv_tt_change *tt_change) { _batadv_tt_update_changes(bat_priv, orig_node, tt_change, tt_num_changes, ttvn); @@ -2079,40 +2058,46 @@ out: return ret; } -void batadv_handle_tt_response(struct batadv_priv *bat_priv, - struct batadv_tt_query_packet *tt_response) +/** + * batadv_handle_tt_response - process incoming tt reply + * @bat_priv: the bat priv with all the soft interface information + * @tt_data: tt data containing the tt request information + * @resp_src: mac address of tt reply sender + * @num_entries: number of tt change entries appended to the tt data + */ +static void batadv_handle_tt_response(struct batadv_priv *bat_priv, + struct batadv_tvlv_tt_data *tt_data, + uint8_t *resp_src, uint16_t num_entries) { struct batadv_tt_req_node *node, *safe; struct batadv_orig_node *orig_node = NULL; - struct batadv_tt_change *tt_change; + struct batadv_tvlv_tt_change *tt_change; batadv_dbg(BATADV_DBG_TT, bat_priv, "Received TT_RESPONSE from %pM for ttvn %d t_size: %d [%c]\n", - tt_response->src, tt_response->ttvn, - ntohs(tt_response->tt_data), - (tt_response->flags & BATADV_TT_FULL_TABLE ? 'F' : '.')); + resp_src, tt_data->ttvn, num_entries, + (tt_data->flags & BATADV_TT_FULL_TABLE ? 'F' : '.')); /* we should have never asked a backbone gw */ - if (batadv_bla_is_backbone_gw_orig(bat_priv, tt_response->src)) + if (batadv_bla_is_backbone_gw_orig(bat_priv, resp_src)) goto out; - orig_node = batadv_orig_hash_find(bat_priv, tt_response->src); + orig_node = batadv_orig_hash_find(bat_priv, resp_src); if (!orig_node) goto out; - if (tt_response->flags & BATADV_TT_FULL_TABLE) { - batadv_tt_fill_gtable(bat_priv, tt_response); + if (tt_data->flags & BATADV_TT_FULL_TABLE) { + batadv_tt_fill_gtable(bat_priv, tt_data, resp_src, num_entries); } else { - tt_change = (struct batadv_tt_change *)(tt_response + 1); - batadv_tt_update_changes(bat_priv, orig_node, - ntohs(tt_response->tt_data), - tt_response->ttvn, tt_change); + tt_change = (struct batadv_tvlv_tt_change *)(tt_data + 1); + batadv_tt_update_changes(bat_priv, orig_node, num_entries, + tt_data->ttvn, tt_change); } /* Delete the tt_req_node from pending tt_requests list */ spin_lock_bh(&bat_priv->tt.req_list_lock); list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { - if (!batadv_compare_eth(node->addr, tt_response->src)) + if (!batadv_compare_eth(node->addr, resp_src)) continue; list_del(&node->list); kfree(node); @@ -2126,25 +2111,6 @@ out: batadv_orig_node_free_ref(orig_node); } -int batadv_tt_init(struct batadv_priv *bat_priv) -{ - int ret; - - ret = batadv_tt_local_init(bat_priv); - if (ret < 0) - return ret; - - ret = batadv_tt_global_init(bat_priv); - if (ret < 0) - return ret; - - INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge); - queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work, - msecs_to_jiffies(BATADV_TT_WORK_PERIOD)); - - return 1; -} - static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv) { struct batadv_tt_roam_node *node, *safe; @@ -2228,11 +2194,12 @@ unlock: static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, struct batadv_orig_node *orig_node) { - struct sk_buff *skb = NULL; - struct batadv_roam_adv_packet *roam_adv_packet; - int ret = 1; struct batadv_hard_iface *primary_if; - size_t len = sizeof(*roam_adv_packet); + struct batadv_tvlv_roam_adv tvlv_roam; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; /* before going on we have to check whether the client has * already roamed to us too many times @@ -2240,40 +2207,22 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, if (!batadv_tt_check_roam_count(bat_priv, client)) goto out; - skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); - if (!skb) - goto out; - - skb->priority = TC_PRIO_CONTROL; - skb_reserve(skb, ETH_HLEN); - - roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len); - - roam_adv_packet->header.packet_type = BATADV_ROAM_ADV; - roam_adv_packet->header.version = BATADV_COMPAT_VERSION; - roam_adv_packet->header.ttl = BATADV_TTL; - roam_adv_packet->reserved = 0; - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; - memcpy(roam_adv_packet->src, primary_if->net_dev->dev_addr, ETH_ALEN); - batadv_hardif_free_ref(primary_if); - memcpy(roam_adv_packet->dst, orig_node->orig, ETH_ALEN); - memcpy(roam_adv_packet->client, client, ETH_ALEN); - batadv_dbg(BATADV_DBG_TT, bat_priv, "Sending ROAMING_ADV to %pM (client %pM)\n", orig_node->orig, client); batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX); - if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) - ret = 0; + memcpy(tvlv_roam.client, client, sizeof(tvlv_roam.client)); + tvlv_roam.reserved = 0; + + batadv_tvlv_unicast_send(bat_priv, primary_if->net_dev->dev_addr, + orig_node->orig, BATADV_TVLV_ROAM, 1, + &tvlv_roam, sizeof(tvlv_roam)); out: - if (ret && skb) - kfree_skb(skb); - return; + if (primary_if) + batadv_hardif_free_ref(primary_if); } static void batadv_tt_purge(struct work_struct *work) @@ -2297,6 +2246,9 @@ static void batadv_tt_purge(struct work_struct *work) void batadv_tt_free(struct batadv_priv *bat_priv) { + batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_TT, 1); + batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_TT, 1); + cancel_delayed_work_sync(&bat_priv->tt.work); batadv_tt_local_table_free(bat_priv); @@ -2384,14 +2336,20 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) } } -static int batadv_tt_commit_changes(struct batadv_priv *bat_priv, - unsigned char **packet_buff, - int *packet_buff_len, int packet_min_len) +/** + * batadv_tt_local_commit_changes - commit all pending local tt changes which + * have been queued in the time since the last commit + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv) { uint16_t changed_num = 0; - if (atomic_read(&bat_priv->tt.local_changes) < 1) - return -ENOENT; + if (atomic_read(&bat_priv->tt.local_changes) < 1) { + if (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt)) + batadv_tt_tvlv_container_update(bat_priv); + return; + } changed_num = batadv_tt_set_flags(bat_priv->tt.local_hash, BATADV_TT_CLIENT_NEW, false); @@ -2409,32 +2367,7 @@ static int batadv_tt_commit_changes(struct batadv_priv *bat_priv, /* reset the sending counter */ atomic_set(&bat_priv->tt.ogm_append_cnt, BATADV_TT_OGM_APPEND_MAX); - - return batadv_tt_changes_fill_buff(bat_priv, packet_buff, - packet_buff_len, packet_min_len); -} - -/* when calling this function (hard_iface == primary_if) has to be true */ -int batadv_tt_append_diff(struct batadv_priv *bat_priv, - unsigned char **packet_buff, int *packet_buff_len, - int packet_min_len) -{ - int tt_num_changes; - - /* if at least one change happened */ - tt_num_changes = batadv_tt_commit_changes(bat_priv, packet_buff, - packet_buff_len, - packet_min_len); - - /* if the changes have been sent often enough */ - if ((tt_num_changes < 0) && - (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt))) { - batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len, - packet_min_len, packet_min_len); - tt_num_changes = 0; - } - - return tt_num_changes; + batadv_tt_tvlv_container_update(bat_priv); } bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, uint8_t *src, @@ -2468,14 +2401,25 @@ out: return ret; } -void batadv_tt_update_orig(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node, - const unsigned char *tt_buff, uint8_t tt_num_changes, - uint8_t ttvn, uint16_t tt_crc) +/** + * batadv_tt_update_orig - update global translation table with new tt + * information received via ogms + * @bat_priv: the bat priv with all the soft interface information + * @orig: the orig_node of the ogm + * @tt_buff: buffer holding the tt information + * @tt_num_changes: number of tt changes inside the tt buffer + * @ttvn: translation table version number of this changeset + * @tt_crc: crc32 checksum of orig node's translation table + */ +static void batadv_tt_update_orig(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + const unsigned char *tt_buff, + uint16_t tt_num_changes, uint8_t ttvn, + uint32_t tt_crc) { uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); bool full_table = true; - struct batadv_tt_change *tt_change; + struct batadv_tvlv_tt_change *tt_change; /* don't care about a backbone gateways updates. */ if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig)) @@ -2496,7 +2440,7 @@ void batadv_tt_update_orig(struct batadv_priv *bat_priv, goto request_table; } - tt_change = (struct batadv_tt_change *)tt_buff; + tt_change = (struct batadv_tvlv_tt_change *)tt_buff; batadv_tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn, tt_change); @@ -2525,7 +2469,7 @@ void batadv_tt_update_orig(struct batadv_priv *bat_priv, orig_node->tt_crc != tt_crc) { request_table: batadv_dbg(BATADV_DBG_TT, bat_priv, - "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u crc: %#.4x last_crc: %#.4x num_changes: %u)\n", + "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u crc: %#.8x last_crc: %#.8x num_changes: %u)\n", orig_node->orig, ttvn, orig_ttvn, tt_crc, orig_node->tt_crc, tt_num_changes); batadv_send_tt_request(bat_priv, orig_node, ttvn, @@ -2605,3 +2549,198 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, out: return ret; } + +/** + * batadv_tt_tvlv_ogm_handler_v1 - process incoming tt tvlv container + * @bat_priv: the bat priv with all the soft interface information + * @orig: the orig_node of the ogm + * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) + * @tvlv_value: tvlv buffer containing the gateway data + * @tvlv_value_len: tvlv buffer length + */ +static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + uint8_t flags, + void *tvlv_value, + uint16_t tvlv_value_len) +{ + struct batadv_tvlv_tt_data *tt_data; + uint16_t num_entries; + + if (tvlv_value_len < sizeof(*tt_data)) + return; + + tt_data = (struct batadv_tvlv_tt_data *)tvlv_value; + tvlv_value_len -= sizeof(*tt_data); + + num_entries = tvlv_value_len / batadv_tt_len(1); + + batadv_tt_update_orig(bat_priv, orig, + (unsigned char *)(tt_data + 1), + num_entries, tt_data->ttvn, ntohl(tt_data->crc)); +} + +/** + * batadv_tt_tvlv_unicast_handler_v1 - process incoming (unicast) tt tvlv + * container + * @bat_priv: the bat priv with all the soft interface information + * @src: mac address of tt tvlv sender + * @dst: mac address of tt tvlv recipient + * @tvlv_value: tvlv buffer containing the tt data + * @tvlv_value_len: tvlv buffer length + * + * Returns NET_RX_DROP if the tt tvlv is to be re-routed, NET_RX_SUCCESS + * otherwise. + */ +static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, + uint8_t *src, uint8_t *dst, + void *tvlv_value, + uint16_t tvlv_value_len) +{ + struct batadv_tvlv_tt_data *tt_data; + uint16_t num_entries; + char tt_flag; + bool ret; + + if (tvlv_value_len < sizeof(*tt_data)) + return NET_RX_SUCCESS; + + tt_data = (struct batadv_tvlv_tt_data *)tvlv_value; + tvlv_value_len -= sizeof(*tt_data); + + num_entries = tvlv_value_len / batadv_tt_len(1); + + switch (tt_data->flags & BATADV_TT_DATA_TYPE_MASK) { + case BATADV_TT_REQUEST: + batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_RX); + + /* If this node cannot provide a TT response the tt_request is + * forwarded + */ + ret = batadv_send_tt_response(bat_priv, tt_data, src, dst); + if (!ret) { + if (tt_data->flags & BATADV_TT_FULL_TABLE) + tt_flag = 'F'; + else + tt_flag = '.'; + + batadv_dbg(BATADV_DBG_TT, bat_priv, + "Routing TT_REQUEST to %pM [%c]\n", + dst, tt_flag); + /* tvlv API will re-route the packet */ + return NET_RX_DROP; + } + break; + case BATADV_TT_RESPONSE: + batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_RX); + + if (batadv_is_my_mac(bat_priv, dst)) { + batadv_handle_tt_response(bat_priv, tt_data, + src, num_entries); + return NET_RX_SUCCESS; + } + + if (tt_data->flags & BATADV_TT_FULL_TABLE) + tt_flag = 'F'; + else + tt_flag = '.'; + + batadv_dbg(BATADV_DBG_TT, bat_priv, + "Routing TT_RESPONSE to %pM [%c]\n", dst, tt_flag); + + /* tvlv API will re-route the packet */ + return NET_RX_DROP; + } + + return NET_RX_SUCCESS; +} + +/** + * batadv_roam_tvlv_unicast_handler_v1 - process incoming tt roam tvlv container + * @bat_priv: the bat priv with all the soft interface information + * @src: mac address of tt tvlv sender + * @dst: mac address of tt tvlv recipient + * @tvlv_value: tvlv buffer containing the tt data + * @tvlv_value_len: tvlv buffer length + * + * Returns NET_RX_DROP if the tt roam tvlv is to be re-routed, NET_RX_SUCCESS + * otherwise. + */ +static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, + uint8_t *src, uint8_t *dst, + void *tvlv_value, + uint16_t tvlv_value_len) +{ + struct batadv_tvlv_roam_adv *roaming_adv; + struct batadv_orig_node *orig_node = NULL; + + /* If this node is not the intended recipient of the + * roaming advertisement the packet is forwarded + * (the tvlv API will re-route the packet). + */ + if (!batadv_is_my_mac(bat_priv, dst)) + return NET_RX_DROP; + + /* check if it is a backbone gateway. we don't accept + * roaming advertisement from it, as it has the same + * entries as we have. + */ + if (batadv_bla_is_backbone_gw_orig(bat_priv, src)) + goto out; + + if (tvlv_value_len < sizeof(*roaming_adv)) + goto out; + + orig_node = batadv_orig_hash_find(bat_priv, src); + if (!orig_node) + goto out; + + batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX); + roaming_adv = (struct batadv_tvlv_roam_adv *)tvlv_value; + + batadv_dbg(BATADV_DBG_TT, bat_priv, + "Received ROAMING_ADV from %pM (client %pM)\n", + src, roaming_adv->client); + + batadv_tt_global_add(bat_priv, orig_node, roaming_adv->client, + BATADV_TT_CLIENT_ROAM, + atomic_read(&orig_node->last_ttvn) + 1); + +out: + if (orig_node) + batadv_orig_node_free_ref(orig_node); + return NET_RX_SUCCESS; +} + +/** + * batadv_tt_init - initialise the translation table internals + * @bat_priv: the bat priv with all the soft interface information + * + * Return 0 on success or negative error number in case of failure. + */ +int batadv_tt_init(struct batadv_priv *bat_priv) +{ + int ret; + + ret = batadv_tt_local_init(bat_priv); + if (ret < 0) + return ret; + + ret = batadv_tt_global_init(bat_priv); + if (ret < 0) + return ret; + + batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1, + batadv_tt_tvlv_unicast_handler_v1, + BATADV_TVLV_TT, 1, BATADV_NO_FLAGS); + + batadv_tvlv_handler_register(bat_priv, NULL, + batadv_roam_tvlv_unicast_handler_v1, + BATADV_TVLV_ROAM, 1, BATADV_NO_FLAGS); + + INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge); + queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work, + msecs_to_jiffies(BATADV_TT_WORK_PERIOD)); + + return 1; +} diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 659a3bb..b4b6dea 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -20,7 +20,6 @@ #ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ #define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ -int batadv_tt_len(int changes_num); int batadv_tt_init(struct batadv_priv *bat_priv); void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, int ifindex); @@ -43,20 +42,10 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv, const uint8_t *src, const uint8_t *addr); void batadv_tt_free(struct batadv_priv *bat_priv); -bool batadv_send_tt_response(struct batadv_priv *bat_priv, - struct batadv_tt_query_packet *tt_request); bool batadv_is_my_client(struct batadv_priv *bat_priv, const uint8_t *addr); -void batadv_handle_tt_response(struct batadv_priv *bat_priv, - struct batadv_tt_query_packet *tt_response); bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, uint8_t *src, uint8_t *dst); -void batadv_tt_update_orig(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node, - const unsigned char *tt_buff, uint8_t tt_num_changes, - uint8_t ttvn, uint16_t tt_crc); -int batadv_tt_append_diff(struct batadv_priv *bat_priv, - unsigned char **packet_buff, int *packet_buff_len, - int packet_min_len); +void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv); bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv, uint8_t *addr); bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index b2c94e1..8fbd89d 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -99,8 +99,7 @@ struct batadv_hard_iface { * @last_seen: time when last packet from this node was received * @bcast_seqno_reset: time when the broadcast seqno window was reset * @batman_seqno_reset: time when the batman seqno window was reset - * @gw_flags: flags related to gateway class - * @flags: for now only VIS_SERVER flag + * @capabilities: announced capabilities of this originator * @last_ttvn: last seen translation table version number * @tt_crc: CRC of the translation table * @tt_buff: last tt changeset this node received from the orig node @@ -147,10 +146,9 @@ struct batadv_orig_node { unsigned long last_seen; unsigned long bcast_seqno_reset; unsigned long batman_seqno_reset; - uint8_t gw_flags; - uint8_t flags; + uint8_t capabilities; atomic_t last_ttvn; - uint16_t tt_crc; + uint32_t tt_crc; unsigned char *tt_buff; int16_t tt_buff_len; spinlock_t tt_buff_lock; /* protects tt_buff & tt_buff_len */ @@ -186,9 +184,21 @@ struct batadv_orig_node { }; /** + * enum batadv_orig_capabilities - orig node capabilities + * @BATADV_ORIG_CAPA_HAS_DAT: orig node has distributed arp table enabled + * @BATADV_ORIG_CAPA_HAS_NC: orig node has network coding enabled + */ +enum batadv_orig_capabilities { + BATADV_ORIG_CAPA_HAS_DAT = BIT(0), + BATADV_ORIG_CAPA_HAS_NC = BIT(1), +}; + +/** * struct batadv_gw_node - structure for orig nodes announcing gw capabilities * @list: list node for batadv_priv_gw::list * @orig_node: pointer to corresponding orig node + * @bandwidth_down: advertised uplink download bandwidth + * @bandwidth_up: advertised uplink upload bandwidth * @deleted: this struct is scheduled for deletion * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner @@ -196,6 +206,8 @@ struct batadv_orig_node { struct batadv_gw_node { struct hlist_node list; struct batadv_orig_node *orig_node; + uint32_t bandwidth_down; + uint32_t bandwidth_up; unsigned long deleted; atomic_t refcount; struct rcu_head rcu; @@ -363,7 +375,7 @@ struct batadv_priv_tt { spinlock_t req_list_lock; /* protects req_list */ spinlock_t roam_list_lock; /* protects roam_list */ atomic_t local_entry_num; - uint16_t local_crc; + uint32_t local_crc; unsigned char *last_changeset; int16_t last_changeset_len; /* protects last_changeset & last_changeset_len */ @@ -420,31 +432,31 @@ struct batadv_priv_debug_log { * @list: list of available gateway nodes * @list_lock: lock protecting gw_list & curr_gw * @curr_gw: pointer to currently selected gateway node + * @bandwidth_down: advertised uplink download bandwidth (if gw_mode server) + * @bandwidth_up: advertised uplink upload bandwidth (if gw_mode server) * @reselect: bool indicating a gateway re-selection is in progress */ struct batadv_priv_gw { struct hlist_head list; spinlock_t list_lock; /* protects gw_list & curr_gw */ struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */ + atomic_t bandwidth_down; + atomic_t bandwidth_up; atomic_t reselect; }; /** - * struct batadv_priv_vis - per mesh interface vis data - * @send_list: list of batadv_vis_info packets to sent - * @hash: hash table containing vis data from other nodes in the network - * @hash_lock: lock protecting the hash table - * @list_lock: lock protecting my_info::recv_list - * @work: work queue callback item for vis packet sending - * @my_info: holds this node's vis data sent on a regular basis + * struct batadv_priv_tvlv - per mesh interface tvlv data + * @container_list: list of registered tvlv containers to be sent with each OGM + * @handler_list: list of the various tvlv content handlers + * @container_list_lock: protects tvlv container list access + * @handler_list_lock: protects handler list access */ -struct batadv_priv_vis { - struct list_head send_list; - struct batadv_hashtable *hash; - spinlock_t hash_lock; /* protects hash */ - spinlock_t list_lock; /* protects my_info::recv_list */ - struct delayed_work work; - struct batadv_vis_info *my_info; +struct batadv_priv_tvlv { + struct hlist_head container_list; + struct hlist_head handler_list; + spinlock_t container_list_lock; /* protects container_list */ + spinlock_t handler_list_lock; /* protects handler_list */ }; /** @@ -504,10 +516,8 @@ struct batadv_priv_nc { * enabled * @distributed_arp_table: bool indicating whether distributed ARP table is * enabled - * @vis_mode: vis operation: client or server (see batadv_vis_packettype) * @gw_mode: gateway operation: off, client or server (see batadv_gw_modes) * @gw_sel_class: gateway selection class (applies if gw_mode client) - * @gw_bandwidth: gateway announced bandwidth (applies if gw_mode server) * @orig_interval: OGM broadcast interval in milliseconds * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop * @log_level: configured log level (see batadv_dbg_level) @@ -531,7 +541,7 @@ struct batadv_priv_nc { * @debug_log: holding debug logging relevant data * @gw: gateway data * @tt: translation table data - * @vis: vis data + * @tvlv: type-version-length-value data * @dat: distributed arp table data * @network_coding: bool indicating whether network coding is enabled * @batadv_priv_nc: network coding data @@ -551,10 +561,8 @@ struct batadv_priv { #ifdef CONFIG_BATMAN_ADV_DAT atomic_t distributed_arp_table; #endif - atomic_t vis_mode; atomic_t gw_mode; atomic_t gw_sel_class; - atomic_t gw_bandwidth; atomic_t orig_interval; atomic_t hop_penalty; #ifdef CONFIG_BATMAN_ADV_DEBUG @@ -583,7 +591,7 @@ struct batadv_priv { #endif struct batadv_priv_gw gw; struct batadv_priv_tt tt; - struct batadv_priv_vis vis; + struct batadv_priv_tvlv tvlv; #ifdef CONFIG_BATMAN_ADV_DAT struct batadv_priv_dat dat; #endif @@ -740,7 +748,7 @@ struct batadv_tt_orig_list_entry { */ struct batadv_tt_change_node { struct list_head list; - struct batadv_tt_change change; + struct batadv_tvlv_tt_change change; }; /** @@ -878,66 +886,6 @@ struct batadv_frag_packet_list_entry { }; /** - * struct batadv_vis_info - local data for vis information - * @first_seen: timestamp used for purging stale vis info entries - * @recv_list: List of server-neighbors we have received this packet from. This - * packet should not be re-forward to them again. List elements are struct - * batadv_vis_recvlist_node - * @send_list: list of packets to be forwarded - * @refcount: number of contexts the object is used - * @hash_entry: hlist node for batadv_priv_vis::hash - * @bat_priv: pointer to soft_iface this orig node belongs to - * @skb_packet: contains the vis packet - */ -struct batadv_vis_info { - unsigned long first_seen; - struct list_head recv_list; - struct list_head send_list; - struct kref refcount; - struct hlist_node hash_entry; - struct batadv_priv *bat_priv; - struct sk_buff *skb_packet; -} __packed; - -/** - * struct batadv_vis_info_entry - contains link information for vis - * @src: source MAC of the link, all zero for local TT entry - * @dst: destination MAC of the link, client mac address for local TT entry - * @quality: transmission quality of the link, or 0 for local TT entry - */ -struct batadv_vis_info_entry { - uint8_t src[ETH_ALEN]; - uint8_t dest[ETH_ALEN]; - uint8_t quality; -} __packed; - -/** - * struct batadv_vis_recvlist_node - list entry for batadv_vis_info::recv_list - * @list: list node for batadv_vis_info::recv_list - * @mac: MAC address of the originator from where the vis_info was received - */ -struct batadv_vis_recvlist_node { - struct list_head list; - uint8_t mac[ETH_ALEN]; -}; - -/** - * struct batadv_vis_if_list_entry - auxiliary data for vis data generation - * @addr: MAC address of the interface - * @primary: true if this interface is the primary interface - * @list: list node the interface list - * - * While scanning for vis-entries of a particular vis-originator - * this list collects its interfaces to create a subgraph/cluster - * out of them later - */ -struct batadv_vis_if_list_entry { - uint8_t addr[ETH_ALEN]; - bool primary; - struct hlist_node list; -}; - -/** * struct batadv_algo_ops - mesh algorithm callbacks * @list: list node for the batadv_algo_list * @name: name of the algorithm @@ -992,4 +940,60 @@ struct batadv_dat_candidate { struct batadv_orig_node *orig_node; }; +/** + * struct batadv_tvlv_container - container for tvlv appended to OGMs + * @list: hlist node for batadv_priv_tvlv::container_list + * @tvlv_hdr: tvlv header information needed to construct the tvlv + * @value_len: length of the buffer following this struct which contains + * the actual tvlv payload + * @refcount: number of contexts the object is used + */ +struct batadv_tvlv_container { + struct hlist_node list; + struct batadv_tvlv_hdr tvlv_hdr; + atomic_t refcount; +}; + +/** + * struct batadv_tvlv_handler - handler for specific tvlv type and version + * @list: hlist node for batadv_priv_tvlv::handler_list + * @ogm_handler: handler callback which is given the tvlv payload to process on + * incoming OGM packets + * @unicast_handler: handler callback which is given the tvlv payload to process + * on incoming unicast tvlv packets + * @type: tvlv type this handler feels responsible for + * @version: tvlv version this handler feels responsible for + * @flags: tvlv handler flags + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ +struct batadv_tvlv_handler { + struct hlist_node list; + void (*ogm_handler)(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + uint8_t flags, + void *tvlv_value, uint16_t tvlv_value_len); + int (*unicast_handler)(struct batadv_priv *bat_priv, + uint8_t *src, uint8_t *dst, + void *tvlv_value, uint16_t tvlv_value_len); + uint8_t type; + uint8_t version; + uint8_t flags; + atomic_t refcount; + struct rcu_head rcu; +}; + +/** + * enum batadv_tvlv_handler_flags - tvlv handler flags definitions + * @BATADV_TVLV_HANDLER_OGM_CIFNOTFND: tvlv ogm processing function will call + * this handler even if its type was not found (with no data) + * @BATADV_TVLV_HANDLER_OGM_CALLED: interval tvlv handling flag - the API marks + * a handler as being called, so it won't be called if the + * BATADV_TVLV_HANDLER_OGM_CIFNOTFND flag was set + */ +enum batadv_tvlv_handler_flags { + BATADV_TVLV_HANDLER_OGM_CIFNOTFND = BIT(1), + BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2), +}; + #endif /* _NET_BATMAN_ADV_TYPES_H_ */ diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c deleted file mode 100644 index d8ea31a..0000000 --- a/net/batman-adv/vis.c +++ /dev/null @@ -1,938 +0,0 @@ -/* Copyright (C) 2008-2013 B.A.T.M.A.N. contributors: - * - * Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - */ - -#include "main.h" -#include "send.h" -#include "translation-table.h" -#include "vis.h" -#include "soft-interface.h" -#include "hard-interface.h" -#include "hash.h" -#include "originator.h" - -#define BATADV_MAX_VIS_PACKET_SIZE 1000 - -/* hash class keys */ -static struct lock_class_key batadv_vis_hash_lock_class_key; - -/* free the info */ -static void batadv_free_info(struct kref *ref) -{ - struct batadv_vis_info *info; - struct batadv_priv *bat_priv; - struct batadv_vis_recvlist_node *entry, *tmp; - - info = container_of(ref, struct batadv_vis_info, refcount); - bat_priv = info->bat_priv; - - list_del_init(&info->send_list); - spin_lock_bh(&bat_priv->vis.list_lock); - list_for_each_entry_safe(entry, tmp, &info->recv_list, list) { - list_del(&entry->list); - kfree(entry); - } - - spin_unlock_bh(&bat_priv->vis.list_lock); - kfree_skb(info->skb_packet); - kfree(info); -} - -/* Compare two vis packets, used by the hashing algorithm */ -static int batadv_vis_info_cmp(const struct hlist_node *node, const void *data2) -{ - const struct batadv_vis_info *d1, *d2; - const struct batadv_vis_packet *p1, *p2; - - d1 = container_of(node, struct batadv_vis_info, hash_entry); - d2 = data2; - p1 = (struct batadv_vis_packet *)d1->skb_packet->data; - p2 = (struct batadv_vis_packet *)d2->skb_packet->data; - return batadv_compare_eth(p1->vis_orig, p2->vis_orig); -} - -/* hash function to choose an entry in a hash table of given size - * hash algorithm from http://en.wikipedia.org/wiki/Hash_table - */ -static uint32_t batadv_vis_info_choose(const void *data, uint32_t size) -{ - const struct batadv_vis_info *vis_info = data; - const struct batadv_vis_packet *packet; - const unsigned char *key; - uint32_t hash = 0; - size_t i; - - packet = (struct batadv_vis_packet *)vis_info->skb_packet->data; - key = packet->vis_orig; - for (i = 0; i < ETH_ALEN; i++) { - hash += key[i]; - hash += (hash << 10); - hash ^= (hash >> 6); - } - - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - - return hash % size; -} - -static struct batadv_vis_info * -batadv_vis_hash_find(struct batadv_priv *bat_priv, const void *data) -{ - struct batadv_hashtable *hash = bat_priv->vis.hash; - struct hlist_head *head; - struct batadv_vis_info *vis_info, *vis_info_tmp = NULL; - uint32_t index; - - if (!hash) - return NULL; - - index = batadv_vis_info_choose(data, hash->size); - head = &hash->table[index]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(vis_info, head, hash_entry) { - if (!batadv_vis_info_cmp(&vis_info->hash_entry, data)) - continue; - - vis_info_tmp = vis_info; - break; - } - rcu_read_unlock(); - - return vis_info_tmp; -} - -/* insert interface to the list of interfaces of one originator, if it - * does not already exist in the list - */ -static void batadv_vis_data_insert_interface(const uint8_t *interface, - struct hlist_head *if_list, - bool primary) -{ - struct batadv_vis_if_list_entry *entry; - - hlist_for_each_entry(entry, if_list, list) { - if (batadv_compare_eth(entry->addr, interface)) - return; - } - - /* it's a new address, add it to the list */ - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); - if (!entry) - return; - memcpy(entry->addr, interface, ETH_ALEN); - entry->primary = primary; - hlist_add_head(&entry->list, if_list); -} - -static void batadv_vis_data_read_prim_sec(struct seq_file *seq, - const struct hlist_head *if_list) -{ - struct batadv_vis_if_list_entry *entry; - - hlist_for_each_entry(entry, if_list, list) { - if (entry->primary) - seq_puts(seq, "PRIMARY, "); - else - seq_printf(seq, "SEC %pM, ", entry->addr); - } -} - -/* read an entry */ -static ssize_t -batadv_vis_data_read_entry(struct seq_file *seq, - const struct batadv_vis_info_entry *entry, - const uint8_t *src, bool primary) -{ - if (primary && entry->quality == 0) - return seq_printf(seq, "TT %pM, ", entry->dest); - else if (batadv_compare_eth(entry->src, src)) - return seq_printf(seq, "TQ %pM %d, ", entry->dest, - entry->quality); - - return 0; -} - -static void -batadv_vis_data_insert_interfaces(struct hlist_head *list, - struct batadv_vis_packet *packet, - struct batadv_vis_info_entry *entries) -{ - int i; - - for (i = 0; i < packet->entries; i++) { - if (entries[i].quality == 0) - continue; - - if (batadv_compare_eth(entries[i].src, packet->vis_orig)) - continue; - - batadv_vis_data_insert_interface(entries[i].src, list, false); - } -} - -static void batadv_vis_data_read_entries(struct seq_file *seq, - struct hlist_head *list, - struct batadv_vis_packet *packet, - struct batadv_vis_info_entry *entries) -{ - int i; - struct batadv_vis_if_list_entry *entry; - - hlist_for_each_entry(entry, list, list) { - seq_printf(seq, "%pM,", entry->addr); - - for (i = 0; i < packet->entries; i++) - batadv_vis_data_read_entry(seq, &entries[i], - entry->addr, entry->primary); - - /* add primary/secondary records */ - if (batadv_compare_eth(entry->addr, packet->vis_orig)) - batadv_vis_data_read_prim_sec(seq, list); - - seq_puts(seq, "\n"); - } -} - -static void batadv_vis_seq_print_text_bucket(struct seq_file *seq, - const struct hlist_head *head) -{ - struct batadv_vis_info *info; - struct batadv_vis_packet *packet; - uint8_t *entries_pos; - struct batadv_vis_info_entry *entries; - struct batadv_vis_if_list_entry *entry; - struct hlist_node *n; - - HLIST_HEAD(vis_if_list); - - hlist_for_each_entry_rcu(info, head, hash_entry) { - packet = (struct batadv_vis_packet *)info->skb_packet->data; - entries_pos = (uint8_t *)packet + sizeof(*packet); - entries = (struct batadv_vis_info_entry *)entries_pos; - - batadv_vis_data_insert_interface(packet->vis_orig, &vis_if_list, - true); - batadv_vis_data_insert_interfaces(&vis_if_list, packet, - entries); - batadv_vis_data_read_entries(seq, &vis_if_list, packet, - entries); - - hlist_for_each_entry_safe(entry, n, &vis_if_list, list) { - hlist_del(&entry->list); - kfree(entry); - } - } -} - -int batadv_vis_seq_print_text(struct seq_file *seq, void *offset) -{ - struct batadv_hard_iface *primary_if; - struct hlist_head *head; - struct net_device *net_dev = (struct net_device *)seq->private; - struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hashtable *hash = bat_priv->vis.hash; - uint32_t i; - int ret = 0; - int vis_server = atomic_read(&bat_priv->vis_mode); - - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; - - if (vis_server == BATADV_VIS_TYPE_CLIENT_UPDATE) - goto out; - - spin_lock_bh(&bat_priv->vis.hash_lock); - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - batadv_vis_seq_print_text_bucket(seq, head); - } - spin_unlock_bh(&bat_priv->vis.hash_lock); - -out: - if (primary_if) - batadv_hardif_free_ref(primary_if); - return ret; -} - -/* add the info packet to the send list, if it was not - * already linked in. - */ -static void batadv_send_list_add(struct batadv_priv *bat_priv, - struct batadv_vis_info *info) -{ - if (list_empty(&info->send_list)) { - kref_get(&info->refcount); - list_add_tail(&info->send_list, &bat_priv->vis.send_list); - } -} - -/* delete the info packet from the send list, if it was - * linked in. - */ -static void batadv_send_list_del(struct batadv_vis_info *info) -{ - if (!list_empty(&info->send_list)) { - list_del_init(&info->send_list); - kref_put(&info->refcount, batadv_free_info); - } -} - -/* tries to add one entry to the receive list. */ -static void batadv_recv_list_add(struct batadv_priv *bat_priv, - struct list_head *recv_list, const char *mac) -{ - struct batadv_vis_recvlist_node *entry; - - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); - if (!entry) - return; - - memcpy(entry->mac, mac, ETH_ALEN); - spin_lock_bh(&bat_priv->vis.list_lock); - list_add_tail(&entry->list, recv_list); - spin_unlock_bh(&bat_priv->vis.list_lock); -} - -/* returns 1 if this mac is in the recv_list */ -static int batadv_recv_list_is_in(struct batadv_priv *bat_priv, - const struct list_head *recv_list, - const char *mac) -{ - const struct batadv_vis_recvlist_node *entry; - - spin_lock_bh(&bat_priv->vis.list_lock); - list_for_each_entry(entry, recv_list, list) { - if (batadv_compare_eth(entry->mac, mac)) { - spin_unlock_bh(&bat_priv->vis.list_lock); - return 1; - } - } - spin_unlock_bh(&bat_priv->vis.list_lock); - return 0; -} - -/* try to add the packet to the vis_hash. return NULL if invalid (e.g. too old, - * broken.. ). vis hash must be locked outside. is_new is set when the packet - * is newer than old entries in the hash. - */ -static struct batadv_vis_info * -batadv_add_packet(struct batadv_priv *bat_priv, - struct batadv_vis_packet *vis_packet, int vis_info_len, - int *is_new, int make_broadcast) -{ - struct batadv_vis_info *info, *old_info; - struct batadv_vis_packet *search_packet, *old_packet; - struct batadv_vis_info search_elem; - struct batadv_vis_packet *packet; - struct sk_buff *tmp_skb; - int hash_added; - size_t len; - size_t max_entries; - - *is_new = 0; - /* sanity check */ - if (!bat_priv->vis.hash) - return NULL; - - /* see if the packet is already in vis_hash */ - search_elem.skb_packet = dev_alloc_skb(sizeof(*search_packet)); - if (!search_elem.skb_packet) - return NULL; - len = sizeof(*search_packet); - tmp_skb = search_elem.skb_packet; - search_packet = (struct batadv_vis_packet *)skb_put(tmp_skb, len); - - memcpy(search_packet->vis_orig, vis_packet->vis_orig, ETH_ALEN); - old_info = batadv_vis_hash_find(bat_priv, &search_elem); - kfree_skb(search_elem.skb_packet); - - if (old_info) { - tmp_skb = old_info->skb_packet; - old_packet = (struct batadv_vis_packet *)tmp_skb->data; - if (!batadv_seq_after(ntohl(vis_packet->seqno), - ntohl(old_packet->seqno))) { - if (old_packet->seqno == vis_packet->seqno) { - batadv_recv_list_add(bat_priv, - &old_info->recv_list, - vis_packet->sender_orig); - return old_info; - } else { - /* newer packet is already in hash. */ - return NULL; - } - } - /* remove old entry */ - batadv_hash_remove(bat_priv->vis.hash, batadv_vis_info_cmp, - batadv_vis_info_choose, old_info); - batadv_send_list_del(old_info); - kref_put(&old_info->refcount, batadv_free_info); - } - - info = kmalloc(sizeof(*info), GFP_ATOMIC); - if (!info) - return NULL; - - len = sizeof(*packet) + vis_info_len; - info->skb_packet = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); - if (!info->skb_packet) { - kfree(info); - return NULL; - } - info->skb_packet->priority = TC_PRIO_CONTROL; - skb_reserve(info->skb_packet, ETH_HLEN); - packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len); - - kref_init(&info->refcount); - INIT_LIST_HEAD(&info->send_list); - INIT_LIST_HEAD(&info->recv_list); - info->first_seen = jiffies; - info->bat_priv = bat_priv; - memcpy(packet, vis_packet, len); - - /* initialize and add new packet. */ - *is_new = 1; - - /* Make it a broadcast packet, if required */ - if (make_broadcast) - memcpy(packet->target_orig, batadv_broadcast_addr, ETH_ALEN); - - /* repair if entries is longer than packet. */ - max_entries = vis_info_len / sizeof(struct batadv_vis_info_entry); - if (packet->entries > max_entries) - packet->entries = max_entries; - - batadv_recv_list_add(bat_priv, &info->recv_list, packet->sender_orig); - - /* try to add it */ - hash_added = batadv_hash_add(bat_priv->vis.hash, batadv_vis_info_cmp, - batadv_vis_info_choose, info, - &info->hash_entry); - if (hash_added != 0) { - /* did not work (for some reason) */ - kref_put(&info->refcount, batadv_free_info); - info = NULL; - } - - return info; -} - -/* handle the server sync packet, forward if needed. */ -void batadv_receive_server_sync_packet(struct batadv_priv *bat_priv, - struct batadv_vis_packet *vis_packet, - int vis_info_len) -{ - struct batadv_vis_info *info; - int is_new, make_broadcast; - int vis_server = atomic_read(&bat_priv->vis_mode); - - make_broadcast = (vis_server == BATADV_VIS_TYPE_SERVER_SYNC); - - spin_lock_bh(&bat_priv->vis.hash_lock); - info = batadv_add_packet(bat_priv, vis_packet, vis_info_len, - &is_new, make_broadcast); - if (!info) - goto end; - - /* only if we are server ourselves and packet is newer than the one in - * hash. - */ - if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC && is_new) - batadv_send_list_add(bat_priv, info); -end: - spin_unlock_bh(&bat_priv->vis.hash_lock); -} - -/* handle an incoming client update packet and schedule forward if needed. */ -void batadv_receive_client_update_packet(struct batadv_priv *bat_priv, - struct batadv_vis_packet *vis_packet, - int vis_info_len) -{ - struct batadv_vis_info *info; - struct batadv_vis_packet *packet; - int is_new; - int vis_server = atomic_read(&bat_priv->vis_mode); - int are_target = 0; - - /* clients shall not broadcast. */ - if (is_broadcast_ether_addr(vis_packet->target_orig)) - return; - - /* Are we the target for this VIS packet? */ - if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC && - batadv_is_my_mac(bat_priv, vis_packet->target_orig)) - are_target = 1; - - spin_lock_bh(&bat_priv->vis.hash_lock); - info = batadv_add_packet(bat_priv, vis_packet, vis_info_len, - &is_new, are_target); - - if (!info) - goto end; - /* note that outdated packets will be dropped at this point. */ - - packet = (struct batadv_vis_packet *)info->skb_packet->data; - - /* send only if we're the target server or ... */ - if (are_target && is_new) { - packet->vis_type = BATADV_VIS_TYPE_SERVER_SYNC; /* upgrade! */ - batadv_send_list_add(bat_priv, info); - - /* ... we're not the recipient (and thus need to forward). */ - } else if (!batadv_is_my_mac(bat_priv, packet->target_orig)) { - batadv_send_list_add(bat_priv, info); - } - -end: - spin_unlock_bh(&bat_priv->vis.hash_lock); -} - -/* Walk the originators and find the VIS server with the best tq. Set the packet - * address to its address and return the best_tq. - * - * Must be called with the originator hash locked - */ -static int batadv_find_best_vis_server(struct batadv_priv *bat_priv, - struct batadv_vis_info *info) -{ - struct batadv_hashtable *hash = bat_priv->orig_hash; - struct batadv_neigh_node *router; - struct hlist_head *head; - struct batadv_orig_node *orig_node; - struct batadv_vis_packet *packet; - int best_tq = -1; - uint32_t i; - - packet = (struct batadv_vis_packet *)info->skb_packet->data; - - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - router = batadv_orig_node_get_router(orig_node); - if (!router) - continue; - - if ((orig_node->flags & BATADV_VIS_SERVER) && - (router->tq_avg > best_tq)) { - best_tq = router->tq_avg; - memcpy(packet->target_orig, orig_node->orig, - ETH_ALEN); - } - batadv_neigh_node_free_ref(router); - } - rcu_read_unlock(); - } - - return best_tq; -} - -/* Return true if the vis packet is full. */ -static bool batadv_vis_packet_full(const struct batadv_vis_info *info) -{ - const struct batadv_vis_packet *packet; - size_t num; - - packet = (struct batadv_vis_packet *)info->skb_packet->data; - num = BATADV_MAX_VIS_PACKET_SIZE / sizeof(struct batadv_vis_info_entry); - - if (num < packet->entries + 1) - return true; - return false; -} - -/* generates a packet of own vis data, - * returns 0 on success, -1 if no packet could be generated - */ -static int batadv_generate_vis_packet(struct batadv_priv *bat_priv) -{ - struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_head *head; - struct batadv_orig_node *orig_node; - struct batadv_neigh_node *router; - struct batadv_vis_info *info = bat_priv->vis.my_info; - struct batadv_vis_packet *packet; - struct batadv_vis_info_entry *entry; - struct batadv_tt_common_entry *tt_common_entry; - uint8_t *packet_pos; - int best_tq = -1; - uint32_t i; - - info->first_seen = jiffies; - packet = (struct batadv_vis_packet *)info->skb_packet->data; - packet->vis_type = atomic_read(&bat_priv->vis_mode); - - memcpy(packet->target_orig, batadv_broadcast_addr, ETH_ALEN); - packet->header.ttl = BATADV_TTL; - packet->seqno = htonl(ntohl(packet->seqno) + 1); - packet->entries = 0; - packet->reserved = 0; - skb_trim(info->skb_packet, sizeof(*packet)); - - if (packet->vis_type == BATADV_VIS_TYPE_CLIENT_UPDATE) { - best_tq = batadv_find_best_vis_server(bat_priv, info); - - if (best_tq < 0) - return best_tq; - } - - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - router = batadv_orig_node_get_router(orig_node); - if (!router) - continue; - - if (!batadv_compare_eth(router->addr, orig_node->orig)) - goto next; - - if (router->if_incoming->if_status != BATADV_IF_ACTIVE) - goto next; - - if (router->tq_avg < 1) - goto next; - - /* fill one entry into buffer. */ - packet_pos = skb_put(info->skb_packet, sizeof(*entry)); - entry = (struct batadv_vis_info_entry *)packet_pos; - memcpy(entry->src, - router->if_incoming->net_dev->dev_addr, - ETH_ALEN); - memcpy(entry->dest, orig_node->orig, ETH_ALEN); - entry->quality = router->tq_avg; - packet->entries++; - -next: - batadv_neigh_node_free_ref(router); - - if (batadv_vis_packet_full(info)) - goto unlock; - } - rcu_read_unlock(); - } - - hash = bat_priv->tt.local_hash; - - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, head, - hash_entry) { - packet_pos = skb_put(info->skb_packet, sizeof(*entry)); - entry = (struct batadv_vis_info_entry *)packet_pos; - memset(entry->src, 0, ETH_ALEN); - memcpy(entry->dest, tt_common_entry->addr, ETH_ALEN); - entry->quality = 0; /* 0 means TT */ - packet->entries++; - - if (batadv_vis_packet_full(info)) - goto unlock; - } - rcu_read_unlock(); - } - - return 0; - -unlock: - rcu_read_unlock(); - return 0; -} - -/* free old vis packets. Must be called with this vis_hash_lock - * held - */ -static void batadv_purge_vis_packets(struct batadv_priv *bat_priv) -{ - uint32_t i; - struct batadv_hashtable *hash = bat_priv->vis.hash; - struct hlist_node *node_tmp; - struct hlist_head *head; - struct batadv_vis_info *info; - - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - hlist_for_each_entry_safe(info, node_tmp, - head, hash_entry) { - /* never purge own data. */ - if (info == bat_priv->vis.my_info) - continue; - - if (batadv_has_timed_out(info->first_seen, - BATADV_VIS_TIMEOUT)) { - hlist_del(&info->hash_entry); - batadv_send_list_del(info); - kref_put(&info->refcount, batadv_free_info); - } - } - } -} - -static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, - struct batadv_vis_info *info) -{ - struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_head *head; - struct batadv_orig_node *orig_node; - struct batadv_vis_packet *packet; - struct sk_buff *skb; - uint32_t i, res; - - - packet = (struct batadv_vis_packet *)info->skb_packet->data; - - /* send to all routers in range. */ - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - /* if it's a vis server and reachable, send it. */ - if (!(orig_node->flags & BATADV_VIS_SERVER)) - continue; - - /* don't send it if we already received the packet from - * this node. - */ - if (batadv_recv_list_is_in(bat_priv, &info->recv_list, - orig_node->orig)) - continue; - - memcpy(packet->target_orig, orig_node->orig, ETH_ALEN); - skb = skb_clone(info->skb_packet, GFP_ATOMIC); - if (!skb) - continue; - - res = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (res == NET_XMIT_DROP) - kfree_skb(skb); - } - rcu_read_unlock(); - } -} - -static void batadv_unicast_vis_packet(struct batadv_priv *bat_priv, - struct batadv_vis_info *info) -{ - struct batadv_orig_node *orig_node; - struct sk_buff *skb; - struct batadv_vis_packet *packet; - - packet = (struct batadv_vis_packet *)info->skb_packet->data; - - orig_node = batadv_orig_hash_find(bat_priv, packet->target_orig); - if (!orig_node) - goto out; - - skb = skb_clone(info->skb_packet, GFP_ATOMIC); - if (!skb) - goto out; - - if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP) - kfree_skb(skb); - -out: - if (orig_node) - batadv_orig_node_free_ref(orig_node); -} - -/* only send one vis packet. called from batadv_send_vis_packets() */ -static void batadv_send_vis_packet(struct batadv_priv *bat_priv, - struct batadv_vis_info *info) -{ - struct batadv_hard_iface *primary_if; - struct batadv_vis_packet *packet; - - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; - - packet = (struct batadv_vis_packet *)info->skb_packet->data; - if (packet->header.ttl < 2) { - pr_debug("Error - can't send vis packet: ttl exceeded\n"); - goto out; - } - - memcpy(packet->sender_orig, primary_if->net_dev->dev_addr, ETH_ALEN); - packet->header.ttl--; - - if (is_broadcast_ether_addr(packet->target_orig)) - batadv_broadcast_vis_packet(bat_priv, info); - else - batadv_unicast_vis_packet(bat_priv, info); - packet->header.ttl++; /* restore TTL */ - -out: - if (primary_if) - batadv_hardif_free_ref(primary_if); -} - -/* called from timer; send (and maybe generate) vis packet. */ -static void batadv_send_vis_packets(struct work_struct *work) -{ - struct delayed_work *delayed_work; - struct batadv_priv *bat_priv; - struct batadv_priv_vis *priv_vis; - struct batadv_vis_info *info; - - delayed_work = container_of(work, struct delayed_work, work); - priv_vis = container_of(delayed_work, struct batadv_priv_vis, work); - bat_priv = container_of(priv_vis, struct batadv_priv, vis); - spin_lock_bh(&bat_priv->vis.hash_lock); - batadv_purge_vis_packets(bat_priv); - - if (batadv_generate_vis_packet(bat_priv) == 0) { - /* schedule if generation was successful */ - batadv_send_list_add(bat_priv, bat_priv->vis.my_info); - } - - while (!list_empty(&bat_priv->vis.send_list)) { - info = list_first_entry(&bat_priv->vis.send_list, - typeof(*info), send_list); - - kref_get(&info->refcount); - spin_unlock_bh(&bat_priv->vis.hash_lock); - - batadv_send_vis_packet(bat_priv, info); - - spin_lock_bh(&bat_priv->vis.hash_lock); - batadv_send_list_del(info); - kref_put(&info->refcount, batadv_free_info); - } - spin_unlock_bh(&bat_priv->vis.hash_lock); - - queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work, - msecs_to_jiffies(BATADV_VIS_INTERVAL)); -} - -/* init the vis server. this may only be called when if_list is already - * initialized (e.g. bat0 is initialized, interfaces have been added) - */ -int batadv_vis_init(struct batadv_priv *bat_priv) -{ - struct batadv_vis_packet *packet; - int hash_added; - unsigned int len; - unsigned long first_seen; - struct sk_buff *tmp_skb; - - if (bat_priv->vis.hash) - return 0; - - spin_lock_bh(&bat_priv->vis.hash_lock); - - bat_priv->vis.hash = batadv_hash_new(256); - if (!bat_priv->vis.hash) { - pr_err("Can't initialize vis_hash\n"); - goto err; - } - - batadv_hash_set_lock_class(bat_priv->vis.hash, - &batadv_vis_hash_lock_class_key); - - bat_priv->vis.my_info = kmalloc(BATADV_MAX_VIS_PACKET_SIZE, GFP_ATOMIC); - if (!bat_priv->vis.my_info) - goto err; - - len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE + ETH_HLEN; - bat_priv->vis.my_info->skb_packet = netdev_alloc_skb_ip_align(NULL, - len); - if (!bat_priv->vis.my_info->skb_packet) - goto free_info; - - bat_priv->vis.my_info->skb_packet->priority = TC_PRIO_CONTROL; - skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN); - tmp_skb = bat_priv->vis.my_info->skb_packet; - packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet)); - - /* prefill the vis info */ - first_seen = jiffies - msecs_to_jiffies(BATADV_VIS_INTERVAL); - bat_priv->vis.my_info->first_seen = first_seen; - INIT_LIST_HEAD(&bat_priv->vis.my_info->recv_list); - INIT_LIST_HEAD(&bat_priv->vis.my_info->send_list); - kref_init(&bat_priv->vis.my_info->refcount); - bat_priv->vis.my_info->bat_priv = bat_priv; - packet->header.version = BATADV_COMPAT_VERSION; - packet->header.packet_type = BATADV_VIS; - packet->header.ttl = BATADV_TTL; - packet->seqno = 0; - packet->reserved = 0; - packet->entries = 0; - - INIT_LIST_HEAD(&bat_priv->vis.send_list); - - hash_added = batadv_hash_add(bat_priv->vis.hash, batadv_vis_info_cmp, - batadv_vis_info_choose, - bat_priv->vis.my_info, - &bat_priv->vis.my_info->hash_entry); - if (hash_added != 0) { - pr_err("Can't add own vis packet into hash\n"); - /* not in hash, need to remove it manually. */ - kref_put(&bat_priv->vis.my_info->refcount, batadv_free_info); - goto err; - } - - spin_unlock_bh(&bat_priv->vis.hash_lock); - - INIT_DELAYED_WORK(&bat_priv->vis.work, batadv_send_vis_packets); - queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work, - msecs_to_jiffies(BATADV_VIS_INTERVAL)); - - return 0; - -free_info: - kfree(bat_priv->vis.my_info); - bat_priv->vis.my_info = NULL; -err: - spin_unlock_bh(&bat_priv->vis.hash_lock); - batadv_vis_quit(bat_priv); - return -ENOMEM; -} - -/* Decrease the reference count on a hash item info */ -static void batadv_free_info_ref(struct hlist_node *node, void *arg) -{ - struct batadv_vis_info *info; - - info = container_of(node, struct batadv_vis_info, hash_entry); - batadv_send_list_del(info); - kref_put(&info->refcount, batadv_free_info); -} - -/* shutdown vis-server */ -void batadv_vis_quit(struct batadv_priv *bat_priv) -{ - if (!bat_priv->vis.hash) - return; - - cancel_delayed_work_sync(&bat_priv->vis.work); - - spin_lock_bh(&bat_priv->vis.hash_lock); - /* properly remove, kill timers ... */ - batadv_hash_delete(bat_priv->vis.hash, batadv_free_info_ref, NULL); - bat_priv->vis.hash = NULL; - bat_priv->vis.my_info = NULL; - spin_unlock_bh(&bat_priv->vis.hash_lock); -} diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h deleted file mode 100644 index ad92b0e..0000000 --- a/net/batman-adv/vis.h +++ /dev/null @@ -1,36 +0,0 @@ -/* Copyright (C) 2008-2013 B.A.T.M.A.N. contributors: - * - * Simon Wunderlich, Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - */ - -#ifndef _NET_BATMAN_ADV_VIS_H_ -#define _NET_BATMAN_ADV_VIS_H_ - -/* timeout of vis packets in milliseconds */ -#define BATADV_VIS_TIMEOUT 200000 - -int batadv_vis_seq_print_text(struct seq_file *seq, void *offset); -void batadv_receive_server_sync_packet(struct batadv_priv *bat_priv, - struct batadv_vis_packet *vis_packet, - int vis_info_len); -void batadv_receive_client_update_packet(struct batadv_priv *bat_priv, - struct batadv_vis_packet *vis_packet, - int vis_info_len); -int batadv_vis_init(struct batadv_priv *bat_priv); -void batadv_vis_quit(struct batadv_priv *bat_priv); - -#endif /* _NET_BATMAN_ADV_VIS_H_ */ diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d1c5786..005d876 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -363,7 +363,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, skb_reset_mac_header(skb); eth = eth_hdr(skb); - memcpy(eth->h_source, br->dev->dev_addr, 6); + memcpy(eth->h_source, br->dev->dev_addr, ETH_ALEN); eth->h_dest[0] = 1; eth->h_dest[1] = 0; eth->h_dest[2] = 0x5e; @@ -433,7 +433,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, skb_reset_mac_header(skb); eth = eth_hdr(skb); - memcpy(eth->h_source, br->dev->dev_addr, 6); + memcpy(eth->h_source, br->dev->dev_addr, ETH_ALEN); eth->h_proto = htons(ETH_P_IPV6); skb_put(skb, sizeof(*eth)); diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index 8b84c58..3fb3c84 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -28,7 +28,7 @@ static bool ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, uint32_t cmp[2] = { 0, 0 }; int key = ((const unsigned char *)mac)[5]; - memcpy(((char *) cmp) + 2, mac, 6); + memcpy(((char *) cmp) + 2, mac, ETH_ALEN); start = wh->table[key]; limit = wh->table[key + 1]; if (ip) { diff --git a/net/compat.c b/net/compat.c index f0a1ba6..8903258 100644 --- a/net/compat.c +++ b/net/compat.c @@ -71,6 +71,8 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) + return -EINVAL; kmsg->msg_name = compat_ptr(tmp1); kmsg->msg_iov = compat_ptr(tmp2); kmsg->msg_control = compat_ptr(tmp3); diff --git a/net/core/dev.c b/net/core/dev.c index 5c713f2..1b6eadf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1307,7 +1307,7 @@ static int __dev_close_many(struct list_head *head) ASSERT_RTNL(); might_sleep(); - list_for_each_entry(dev, head, unreg_list) { + list_for_each_entry(dev, head, close_list) { call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); clear_bit(__LINK_STATE_START, &dev->state); @@ -1323,7 +1323,7 @@ static int __dev_close_many(struct list_head *head) dev_deactivate_many(head); - list_for_each_entry(dev, head, unreg_list) { + list_for_each_entry(dev, head, close_list) { const struct net_device_ops *ops = dev->netdev_ops; /* @@ -1351,7 +1351,7 @@ static int __dev_close(struct net_device *dev) /* Temporarily disable netpoll until the interface is down */ netpoll_rx_disable(dev); - list_add(&dev->unreg_list, &single); + list_add(&dev->close_list, &single); retval = __dev_close_many(&single); list_del(&single); @@ -1362,21 +1362,20 @@ static int __dev_close(struct net_device *dev) static int dev_close_many(struct list_head *head) { struct net_device *dev, *tmp; - LIST_HEAD(tmp_list); - list_for_each_entry_safe(dev, tmp, head, unreg_list) + /* Remove the devices that don't need to be closed */ + list_for_each_entry_safe(dev, tmp, head, close_list) if (!(dev->flags & IFF_UP)) - list_move(&dev->unreg_list, &tmp_list); + list_del_init(&dev->close_list); __dev_close_many(head); - list_for_each_entry(dev, head, unreg_list) { + list_for_each_entry_safe(dev, tmp, head, close_list) { rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); call_netdevice_notifiers(NETDEV_DOWN, dev); + list_del_init(&dev->close_list); } - /* rollback_registered_many needs the complete original list */ - list_splice(&tmp_list, head); return 0; } @@ -1397,7 +1396,7 @@ int dev_close(struct net_device *dev) /* Block netpoll rx while the interface is going down */ netpoll_rx_disable(dev); - list_add(&dev->unreg_list, &single); + list_add(&dev->close_list, &single); dev_close_many(&single); list_del(&single); @@ -1917,7 +1916,8 @@ static struct xps_map *expand_xps_map(struct xps_map *map, return new_map; } -int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index) +int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, + u16 index) { struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; struct xps_map *map, *new_map; @@ -4373,42 +4373,40 @@ struct netdev_adjacent { /* upper master flag, there can only be one master device per list */ bool master; - /* indicates that this dev is our first-level lower/upper device */ - bool neighbour; - /* counter for the number of times this device was added to us */ u16 ref_nr; + /* private field for the users */ + void *private; + struct list_head list; struct rcu_head rcu; }; -static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, - struct net_device *adj_dev, - bool upper) +static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev, + struct net_device *adj_dev, + struct list_head *adj_list) { struct netdev_adjacent *adj; - struct list_head *dev_list; - - dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list; - list_for_each_entry(adj, dev_list, list) { + list_for_each_entry_rcu(adj, adj_list, list) { if (adj->dev == adj_dev) return adj; } return NULL; } -static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev, - struct net_device *udev) +static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, + struct net_device *adj_dev, + struct list_head *adj_list) { - return __netdev_find_adj(dev, udev, true); -} + struct netdev_adjacent *adj; -static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev, - struct net_device *ldev) -{ - return __netdev_find_adj(dev, ldev, false); + list_for_each_entry(adj, adj_list, list) { + if (adj->dev == adj_dev) + return adj; + } + return NULL; } /** @@ -4425,7 +4423,7 @@ bool netdev_has_upper_dev(struct net_device *dev, { ASSERT_RTNL(); - return __netdev_find_upper(dev, upper_dev); + return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper); } EXPORT_SYMBOL(netdev_has_upper_dev); @@ -4440,7 +4438,7 @@ bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); - return !list_empty(&dev->upper_dev_list); + return !list_empty(&dev->all_adj_list.upper); } EXPORT_SYMBOL(netdev_has_any_upper_dev); @@ -4457,10 +4455,10 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) ASSERT_RTNL(); - if (list_empty(&dev->upper_dev_list)) + if (list_empty(&dev->adj_list.upper)) return NULL; - upper = list_first_entry(&dev->upper_dev_list, + upper = list_first_entry(&dev->adj_list.upper, struct netdev_adjacent, list); if (likely(upper->master)) return upper->dev; @@ -4468,15 +4466,26 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) } EXPORT_SYMBOL(netdev_master_upper_dev_get); -/* netdev_upper_get_next_dev_rcu - Get the next dev from upper list +void *netdev_adjacent_get_private(struct list_head *adj_list) +{ + struct netdev_adjacent *adj; + + adj = list_entry(adj_list, struct netdev_adjacent, list); + + return adj->private; +} +EXPORT_SYMBOL(netdev_adjacent_get_private); + +/** + * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list * @dev: device * @iter: list_head ** of the current position * * Gets the next device from the dev's upper list, starting from iter * position. The caller must hold RCU read lock. */ -struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, - struct list_head **iter) +struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *upper; @@ -4484,14 +4493,71 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); - if (&upper->list == &dev->upper_dev_list) + if (&upper->list == &dev->all_adj_list.upper) return NULL; *iter = &upper->list; return upper->dev; } -EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); +EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu); + +/** + * netdev_lower_get_next_private - Get the next ->private from the + * lower neighbour list + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent->private from the dev's lower neighbour + * list, starting from iter position. The caller must hold either hold the + * RTNL lock or its own locking that guarantees that the neighbour lower + * list will remain unchainged. + */ +void *netdev_lower_get_next_private(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_entry(*iter, struct netdev_adjacent, list); + + if (&lower->list == &dev->adj_list.lower) + return NULL; + + if (iter) + *iter = lower->list.next; + + return lower->private; +} +EXPORT_SYMBOL(netdev_lower_get_next_private); + +/** + * netdev_lower_get_next_private_rcu - Get the next ->private from the + * lower neighbour list, RCU + * variant + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent->private from the dev's lower neighbour + * list, starting from iter position. The caller must hold RCU read lock. + */ +void *netdev_lower_get_next_private_rcu(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *lower; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); + + if (&lower->list == &dev->adj_list.lower) + return NULL; + + if (iter) + *iter = &lower->list; + + return lower->private; +} +EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); /** * netdev_master_upper_dev_get_rcu - Get master upper device @@ -4504,7 +4570,7 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) { struct netdev_adjacent *upper; - upper = list_first_or_null_rcu(&dev->upper_dev_list, + upper = list_first_or_null_rcu(&dev->adj_list.upper, struct netdev_adjacent, list); if (upper && likely(upper->master)) return upper->dev; @@ -4514,15 +4580,16 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, - bool neighbour, bool master, - bool upper) + struct list_head *dev_list, + void *private, bool master) { struct netdev_adjacent *adj; + char linkname[IFNAMSIZ+7]; + int ret; - adj = __netdev_find_adj(dev, adj_dev, upper); + adj = __netdev_find_adj(dev, adj_dev, dev_list); if (adj) { - BUG_ON(neighbour); adj->ref_nr++; return 0; } @@ -4533,124 +4600,178 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->dev = adj_dev; adj->master = master; - adj->neighbour = neighbour; adj->ref_nr = 1; - + adj->private = private; dev_hold(adj_dev); - pr_debug("dev_hold for %s, because of %s link added from %s to %s\n", - adj_dev->name, upper ? "upper" : "lower", dev->name, - adj_dev->name); - if (!upper) { - list_add_tail_rcu(&adj->list, &dev->lower_dev_list); - return 0; + pr_debug("dev_hold for %s, because of link added from %s to %s\n", + adj_dev->name, dev->name, adj_dev->name); + + if (dev_list == &dev->adj_list.lower) { + sprintf(linkname, "lower_%s", adj_dev->name); + ret = sysfs_create_link(&(dev->dev.kobj), + &(adj_dev->dev.kobj), linkname); + if (ret) + goto free_adj; + } else if (dev_list == &dev->adj_list.upper) { + sprintf(linkname, "upper_%s", adj_dev->name); + ret = sysfs_create_link(&(dev->dev.kobj), + &(adj_dev->dev.kobj), linkname); + if (ret) + goto free_adj; } - /* Ensure that master upper link is always the first item in list. */ - if (master) - list_add_rcu(&adj->list, &dev->upper_dev_list); - else - list_add_tail_rcu(&adj->list, &dev->upper_dev_list); + /* Ensure that master link is always the first item in list. */ + if (master) { + ret = sysfs_create_link(&(dev->dev.kobj), + &(adj_dev->dev.kobj), "master"); + if (ret) + goto remove_symlinks; + + list_add_rcu(&adj->list, dev_list); + } else { + list_add_tail_rcu(&adj->list, dev_list); + } return 0; -} -static inline int __netdev_upper_dev_insert(struct net_device *dev, - struct net_device *udev, - bool master, bool neighbour) -{ - return __netdev_adjacent_dev_insert(dev, udev, neighbour, master, - true); -} +remove_symlinks: + if (dev_list == &dev->adj_list.lower) { + sprintf(linkname, "lower_%s", adj_dev->name); + sysfs_remove_link(&(dev->dev.kobj), linkname); + } else if (dev_list == &dev->adj_list.upper) { + sprintf(linkname, "upper_%s", adj_dev->name); + sysfs_remove_link(&(dev->dev.kobj), linkname); + } -static inline int __netdev_lower_dev_insert(struct net_device *dev, - struct net_device *ldev, - bool neighbour) -{ - return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false, - false); +free_adj: + kfree(adj); + + return ret; } void __netdev_adjacent_dev_remove(struct net_device *dev, - struct net_device *adj_dev, bool upper) + struct net_device *adj_dev, + struct list_head *dev_list) { struct netdev_adjacent *adj; + char linkname[IFNAMSIZ+7]; - if (upper) - adj = __netdev_find_upper(dev, adj_dev); - else - adj = __netdev_find_lower(dev, adj_dev); + adj = __netdev_find_adj(dev, adj_dev, dev_list); - if (!adj) + if (!adj) { + pr_err("tried to remove device %s from %s\n", + dev->name, adj_dev->name); BUG(); + } if (adj->ref_nr > 1) { + pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name, + adj->ref_nr-1); adj->ref_nr--; return; } + if (adj->master) + sysfs_remove_link(&(dev->dev.kobj), "master"); + + if (dev_list == &dev->adj_list.lower) { + sprintf(linkname, "lower_%s", adj_dev->name); + sysfs_remove_link(&(dev->dev.kobj), linkname); + } else if (dev_list == &dev->adj_list.upper) { + sprintf(linkname, "upper_%s", adj_dev->name); + sysfs_remove_link(&(dev->dev.kobj), linkname); + } + list_del_rcu(&adj->list); - pr_debug("dev_put for %s, because of %s link removed from %s to %s\n", - adj_dev->name, upper ? "upper" : "lower", dev->name, - adj_dev->name); + pr_debug("dev_put for %s, because link removed from %s to %s\n", + adj_dev->name, dev->name, adj_dev->name); dev_put(adj_dev); kfree_rcu(adj, rcu); } -static inline void __netdev_upper_dev_remove(struct net_device *dev, - struct net_device *udev) -{ - return __netdev_adjacent_dev_remove(dev, udev, true); -} - -static inline void __netdev_lower_dev_remove(struct net_device *dev, - struct net_device *ldev) -{ - return __netdev_adjacent_dev_remove(dev, ldev, false); -} - -int __netdev_adjacent_dev_insert_link(struct net_device *dev, - struct net_device *upper_dev, - bool master, bool neighbour) +int __netdev_adjacent_dev_link_lists(struct net_device *dev, + struct net_device *upper_dev, + struct list_head *up_list, + struct list_head *down_list, + void *private, bool master) { int ret; - ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour); + ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private, + master); if (ret) return ret; - ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour); + ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private, + false); if (ret) { - __netdev_upper_dev_remove(dev, upper_dev); + __netdev_adjacent_dev_remove(dev, upper_dev, up_list); return ret; } return 0; } -static inline int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *udev) +int __netdev_adjacent_dev_link(struct net_device *dev, + struct net_device *upper_dev) { - return __netdev_adjacent_dev_insert_link(dev, udev, false, false); + return __netdev_adjacent_dev_link_lists(dev, upper_dev, + &dev->all_adj_list.upper, + &upper_dev->all_adj_list.lower, + NULL, false); } -static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, - struct net_device *udev, - bool master) +void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, + struct net_device *upper_dev, + struct list_head *up_list, + struct list_head *down_list) { - return __netdev_adjacent_dev_insert_link(dev, udev, master, true); + __netdev_adjacent_dev_remove(dev, upper_dev, up_list); + __netdev_adjacent_dev_remove(upper_dev, dev, down_list); } void __netdev_adjacent_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { - __netdev_upper_dev_remove(dev, upper_dev); - __netdev_lower_dev_remove(upper_dev, dev); + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + &dev->all_adj_list.upper, + &upper_dev->all_adj_list.lower); +} + +int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, + struct net_device *upper_dev, + void *private, bool master) +{ + int ret = __netdev_adjacent_dev_link(dev, upper_dev); + + if (ret) + return ret; + + ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, + &dev->adj_list.upper, + &upper_dev->adj_list.lower, + private, master); + if (ret) { + __netdev_adjacent_dev_unlink(dev, upper_dev); + return ret; + } + + return 0; } +void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, + struct net_device *upper_dev) +{ + __netdev_adjacent_dev_unlink(dev, upper_dev); + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + &dev->adj_list.upper, + &upper_dev->adj_list.lower); +} static int __netdev_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev, bool master) + struct net_device *upper_dev, bool master, + void *private) { struct netdev_adjacent *i, *j, *to_i, *to_j; int ret = 0; @@ -4661,26 +4782,29 @@ static int __netdev_upper_dev_link(struct net_device *dev, return -EBUSY; /* To prevent loops, check if dev is not upper device to upper_dev. */ - if (__netdev_find_upper(upper_dev, dev)) + if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper)) return -EBUSY; - if (__netdev_find_upper(dev, upper_dev)) + if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper)) return -EEXIST; if (master && netdev_master_upper_dev_get(dev)) return -EBUSY; - ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master); + ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, + master); if (ret) return ret; /* Now that we linked these devs, make all the upper_dev's - * upper_dev_list visible to every dev's lower_dev_list and vice + * all_adj_list.upper visible to every dev's all_adj_list.lower an * versa, and don't forget the devices itself. All of these * links are non-neighbours. */ - list_for_each_entry(i, &dev->lower_dev_list, list) { - list_for_each_entry(j, &upper_dev->upper_dev_list, list) { + list_for_each_entry(i, &dev->all_adj_list.lower, list) { + list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { + pr_debug("Interlinking %s with %s, non-neighbour\n", + i->dev->name, j->dev->name); ret = __netdev_adjacent_dev_link(i->dev, j->dev); if (ret) goto rollback_mesh; @@ -4688,14 +4812,18 @@ static int __netdev_upper_dev_link(struct net_device *dev, } /* add dev to every upper_dev's upper device */ - list_for_each_entry(i, &upper_dev->upper_dev_list, list) { + list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { + pr_debug("linking %s's upper device %s with %s\n", + upper_dev->name, i->dev->name, dev->name); ret = __netdev_adjacent_dev_link(dev, i->dev); if (ret) goto rollback_upper_mesh; } /* add upper_dev to every dev's lower device */ - list_for_each_entry(i, &dev->lower_dev_list, list) { + list_for_each_entry(i, &dev->all_adj_list.lower, list) { + pr_debug("linking %s's lower device %s with %s\n", dev->name, + i->dev->name, upper_dev->name); ret = __netdev_adjacent_dev_link(i->dev, upper_dev); if (ret) goto rollback_lower_mesh; @@ -4706,7 +4834,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, rollback_lower_mesh: to_i = i; - list_for_each_entry(i, &dev->lower_dev_list, list) { + list_for_each_entry(i, &dev->all_adj_list.lower, list) { if (i == to_i) break; __netdev_adjacent_dev_unlink(i->dev, upper_dev); @@ -4716,7 +4844,7 @@ rollback_lower_mesh: rollback_upper_mesh: to_i = i; - list_for_each_entry(i, &upper_dev->upper_dev_list, list) { + list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { if (i == to_i) break; __netdev_adjacent_dev_unlink(dev, i->dev); @@ -4727,8 +4855,8 @@ rollback_upper_mesh: rollback_mesh: to_i = i; to_j = j; - list_for_each_entry(i, &dev->lower_dev_list, list) { - list_for_each_entry(j, &upper_dev->upper_dev_list, list) { + list_for_each_entry(i, &dev->all_adj_list.lower, list) { + list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { if (i == to_i && j == to_j) break; __netdev_adjacent_dev_unlink(i->dev, j->dev); @@ -4737,7 +4865,7 @@ rollback_mesh: break; } - __netdev_adjacent_dev_unlink(dev, upper_dev); + __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); return ret; } @@ -4755,7 +4883,7 @@ rollback_mesh: int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev) { - return __netdev_upper_dev_link(dev, upper_dev, false); + return __netdev_upper_dev_link(dev, upper_dev, false, NULL); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -4773,10 +4901,18 @@ EXPORT_SYMBOL(netdev_upper_dev_link); int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev) { - return __netdev_upper_dev_link(dev, upper_dev, true); + return __netdev_upper_dev_link(dev, upper_dev, true, NULL); } EXPORT_SYMBOL(netdev_master_upper_dev_link); +int netdev_master_upper_dev_link_private(struct net_device *dev, + struct net_device *upper_dev, + void *private) +{ + return __netdev_upper_dev_link(dev, upper_dev, true, private); +} +EXPORT_SYMBOL(netdev_master_upper_dev_link_private); + /** * netdev_upper_dev_unlink - Removes a link to upper device * @dev: device @@ -4791,29 +4927,59 @@ void netdev_upper_dev_unlink(struct net_device *dev, struct netdev_adjacent *i, *j; ASSERT_RTNL(); - __netdev_adjacent_dev_unlink(dev, upper_dev); + __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); /* Here is the tricky part. We must remove all dev's lower * devices from all upper_dev's upper devices and vice * versa, to maintain the graph relationship. */ - list_for_each_entry(i, &dev->lower_dev_list, list) - list_for_each_entry(j, &upper_dev->upper_dev_list, list) + list_for_each_entry(i, &dev->all_adj_list.lower, list) + list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) __netdev_adjacent_dev_unlink(i->dev, j->dev); /* remove also the devices itself from lower/upper device * list */ - list_for_each_entry(i, &dev->lower_dev_list, list) + list_for_each_entry(i, &dev->all_adj_list.lower, list) __netdev_adjacent_dev_unlink(i->dev, upper_dev); - list_for_each_entry(i, &upper_dev->upper_dev_list, list) + list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) __netdev_adjacent_dev_unlink(dev, i->dev); call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); } EXPORT_SYMBOL(netdev_upper_dev_unlink); +void *netdev_lower_dev_get_private_rcu(struct net_device *dev, + struct net_device *lower_dev) +{ + struct netdev_adjacent *lower; + + if (!lower_dev) + return NULL; + lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower); + if (!lower) + return NULL; + + return lower->private; +} +EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu); + +void *netdev_lower_dev_get_private(struct net_device *dev, + struct net_device *lower_dev) +{ + struct netdev_adjacent *lower; + + if (!lower_dev) + return NULL; + lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower); + if (!lower) + return NULL; + + return lower->private; +} +EXPORT_SYMBOL(netdev_lower_dev_get_private); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; @@ -4822,7 +4988,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) ops->ndo_change_rx_flags(dev, flags); } -static int __dev_set_promiscuity(struct net_device *dev, int inc) +static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) { unsigned int old_flags = dev->flags; kuid_t uid; @@ -4865,6 +5031,8 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) dev_change_rx_flags(dev, IFF_PROMISC); } + if (notify) + __dev_notify_flags(dev, old_flags, IFF_PROMISC); return 0; } @@ -4884,7 +5052,7 @@ int dev_set_promiscuity(struct net_device *dev, int inc) unsigned int old_flags = dev->flags; int err; - err = __dev_set_promiscuity(dev, inc); + err = __dev_set_promiscuity(dev, inc, true); if (err < 0) return err; if (dev->flags != old_flags) @@ -4893,22 +5061,9 @@ int dev_set_promiscuity(struct net_device *dev, int inc) } EXPORT_SYMBOL(dev_set_promiscuity); -/** - * dev_set_allmulti - update allmulti count on a device - * @dev: device - * @inc: modifier - * - * Add or remove reception of all multicast frames to a device. While the - * count in the device remains above zero the interface remains listening - * to all interfaces. Once it hits zero the device reverts back to normal - * filtering operation. A negative @inc value is used to drop the counter - * when releasing a resource needing all multicasts. - * Return 0 if successful or a negative errno code on error. - */ - -int dev_set_allmulti(struct net_device *dev, int inc) +static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) { - unsigned int old_flags = dev->flags; + unsigned int old_flags = dev->flags, old_gflags = dev->gflags; ASSERT_RTNL(); @@ -4931,9 +5086,30 @@ int dev_set_allmulti(struct net_device *dev, int inc) if (dev->flags ^ old_flags) { dev_change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); + if (notify) + __dev_notify_flags(dev, old_flags, + dev->gflags ^ old_gflags); } return 0; } + +/** + * dev_set_allmulti - update allmulti count on a device + * @dev: device + * @inc: modifier + * + * Add or remove reception of all multicast frames to a device. While the + * count in the device remains above zero the interface remains listening + * to all interfaces. Once it hits zero the device reverts back to normal + * filtering operation. A negative @inc value is used to drop the counter + * when releasing a resource needing all multicasts. + * Return 0 if successful or a negative errno code on error. + */ + +int dev_set_allmulti(struct net_device *dev, int inc) +{ + return __dev_set_allmulti(dev, inc, true); +} EXPORT_SYMBOL(dev_set_allmulti); /* @@ -4958,10 +5134,10 @@ void __dev_set_rx_mode(struct net_device *dev) * therefore calling __dev_set_promiscuity here is safe. */ if (!netdev_uc_empty(dev) && !dev->uc_promisc) { - __dev_set_promiscuity(dev, 1); + __dev_set_promiscuity(dev, 1, false); dev->uc_promisc = true; } else if (netdev_uc_empty(dev) && dev->uc_promisc) { - __dev_set_promiscuity(dev, -1); + __dev_set_promiscuity(dev, -1, false); dev->uc_promisc = false; } } @@ -5050,9 +5226,13 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags) if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? 1 : -1; + unsigned int old_flags = dev->flags; dev->gflags ^= IFF_PROMISC; - dev_set_promiscuity(dev, inc); + + if (__dev_set_promiscuity(dev, inc, false) >= 0) + if (dev->flags != old_flags) + dev_set_rx_mode(dev); } /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI @@ -5063,16 +5243,20 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags) int inc = (flags & IFF_ALLMULTI) ? 1 : -1; dev->gflags ^= IFF_ALLMULTI; - dev_set_allmulti(dev, inc); + __dev_set_allmulti(dev, inc, false); } return ret; } -void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) +void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, + unsigned int gchanges) { unsigned int changes = dev->flags ^ old_flags; + if (gchanges) + rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges); + if (changes & IFF_UP) { if (dev->flags & IFF_UP) call_netdevice_notifiers(NETDEV_UP, dev); @@ -5101,17 +5285,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) int dev_change_flags(struct net_device *dev, unsigned int flags) { int ret; - unsigned int changes, old_flags = dev->flags; + unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags; ret = __dev_change_flags(dev, flags); if (ret < 0) return ret; - changes = old_flags ^ dev->flags; - if (changes) - rtmsg_ifinfo(RTM_NEWLINK, dev, changes); - - __dev_notify_flags(dev, old_flags); + changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags); + __dev_notify_flags(dev, old_flags, changes); return ret; } EXPORT_SYMBOL(dev_change_flags); @@ -5247,15 +5428,18 @@ static int dev_new_index(struct net *net) /* Delayed registration/unregisteration */ static LIST_HEAD(net_todo_list); +static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); static void net_set_todo(struct net_device *dev) { list_add_tail(&dev->todo_list, &net_todo_list); + dev_net(dev)->dev_unreg_count++; } static void rollback_registered_many(struct list_head *head) { struct net_device *dev, *tmp; + LIST_HEAD(close_head); BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -5278,7 +5462,9 @@ static void rollback_registered_many(struct list_head *head) } /* If device is running, close it first. */ - dev_close_many(head); + list_for_each_entry(dev, head, unreg_list) + list_add_tail(&dev->close_list, &close_head); + dev_close_many(&close_head); list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ @@ -5918,6 +6104,12 @@ void netdev_run_todo(void) if (dev->destructor) dev->destructor(dev); + /* Report a network device has been unregistered */ + rtnl_lock(); + dev_net(dev)->dev_unreg_count--; + __rtnl_unlock(); + wake_up(&netdev_unregistering_wq); + /* Free network device */ kobject_put(&dev->dev.kobj); } @@ -6068,9 +6260,12 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); + INIT_LIST_HEAD(&dev->close_list); INIT_LIST_HEAD(&dev->link_watch_list); - INIT_LIST_HEAD(&dev->upper_dev_list); - INIT_LIST_HEAD(&dev->lower_dev_list); + INIT_LIST_HEAD(&dev->adj_list.upper); + INIT_LIST_HEAD(&dev->adj_list.lower); + INIT_LIST_HEAD(&dev->all_adj_list.upper); + INIT_LIST_HEAD(&dev->all_adj_list.lower); dev->priv_flags = IFF_XMIT_DST_RELEASE; setup(dev); @@ -6603,6 +6798,34 @@ static void __net_exit default_device_exit(struct net *net) rtnl_unlock(); } +static void __net_exit rtnl_lock_unregistering(struct list_head *net_list) +{ + /* Return with the rtnl_lock held when there are no network + * devices unregistering in any network namespace in net_list. + */ + struct net *net; + bool unregistering; + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&netdev_unregistering_wq, &wait, + TASK_UNINTERRUPTIBLE); + unregistering = false; + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) { + if (net->dev_unreg_count > 0) { + unregistering = true; + break; + } + } + if (!unregistering) + break; + __rtnl_unlock(); + schedule(); + } + finish_wait(&netdev_unregistering_wq, &wait); +} + static void __net_exit default_device_exit_batch(struct list_head *net_list) { /* At exit all network devices most be removed from a network @@ -6614,7 +6837,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) struct net *net; LIST_HEAD(dev_kill_list); - rtnl_lock(); + /* To prevent network device cleanup code from dereferencing + * loopback devices or network devices that have been freed + * wait here for all pending unregistrations to complete, + * before unregistring the loopback device and allowing the + * network namespace be freed. + * + * The netdev todo list containing all network devices + * unregistrations that happen in default_device_exit_batch + * will run in the rtnl_unlock() at the end of + * default_device_exit_batch. + */ + rtnl_lock_unregistering(net_list); list_for_each_entry(net, net_list, exit_list) { for_each_netdev_reverse(net, dev) { if (dev->rtnl_link_ops) diff --git a/net/core/filter.c b/net/core/filter.c index 6438f29..01b7808 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -644,7 +644,6 @@ void sk_filter_release_rcu(struct rcu_head *rcu) struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); bpf_jit_free(fp); - kfree(fp); } EXPORT_SYMBOL(sk_filter_release_rcu); @@ -683,7 +682,7 @@ int sk_unattached_filter_create(struct sk_filter **pfp, if (fprog->filter == NULL) return -EINVAL; - fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL); + fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL); if (!fp) return -ENOMEM; memcpy(fp->insns, fprog->filter, fsize); @@ -723,6 +722,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { struct sk_filter *fp, *old_fp; unsigned int fsize = sizeof(struct sock_filter) * fprog->len; + unsigned int sk_fsize = sk_filter_size(fprog->len); int err; if (sock_flag(sk, SOCK_FILTER_LOCKED)) @@ -732,11 +732,11 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (fprog->filter == NULL) return -EINVAL; - fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL); + fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL); if (!fp) return -ENOMEM; if (copy_from_user(fp->insns, fprog->filter, fsize)) { - sock_kfree_s(sk, fp, fsize+sizeof(*fp)); + sock_kfree_s(sk, fp, sk_fsize); return -EFAULT; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1929af8..f8e25ac 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -25,9 +25,35 @@ static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *i memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); } +/** + * skb_flow_get_ports - extract the upper layer ports and return them + * @skb: buffer to extract the ports from + * @thoff: transport header offset + * @ip_proto: protocol for which to get port offset + * + * The function will try to retrieve the ports at offset thoff + poff where poff + * is the protocol port offset returned from proto_ports_offset + */ +__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto) +{ + int poff = proto_ports_offset(ip_proto); + + if (poff >= 0) { + __be32 *ports, _ports; + + ports = skb_header_pointer(skb, thoff + poff, + sizeof(_ports), &_ports); + if (ports) + return *ports; + } + + return 0; +} +EXPORT_SYMBOL(skb_flow_get_ports); + bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) { - int poff, nhoff = skb_network_offset(skb); + int nhoff = skb_network_offset(skb); u8 ip_proto; __be16 proto = skb->protocol; @@ -150,16 +176,7 @@ ipv6: } flow->ip_proto = ip_proto; - poff = proto_ports_offset(ip_proto); - if (poff >= 0) { - __be32 *ports, _ports; - - nhoff += poff; - ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); - if (ports) - flow->ports = *ports; - } - + flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto); flow->thoff = (u16) nhoff; return true; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6072610..ca15f32 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -867,7 +867,7 @@ static void neigh_invalidate(struct neighbour *neigh) static void neigh_probe(struct neighbour *neigh) __releases(neigh->lock) { - struct sk_buff *skb = skb_peek(&neigh->arp_queue); + struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); /* keep skb alive even if arp_queue overflows */ if (skb) skb = skb_copy(skb, GFP_ATOMIC); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index d9cd627..9b7cf6c 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -222,11 +222,10 @@ static void net_prio_attach(struct cgroup_subsys_state *css, struct cgroup_taskset *tset) { struct task_struct *p; - void *v; + void *v = (void *)(unsigned long)css->cgroup->id; cgroup_taskset_for_each(p, css, tset) { task_lock(p); - v = (void *)(unsigned long)task_netprioidx(p); iterate_fd(p->files, 0, update_netprio, v); task_unlock(p); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2a0e21d..4aedf03 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1647,9 +1647,8 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) } dev->rtnl_link_state = RTNL_LINK_INITIALIZED; - rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); - __dev_notify_flags(dev, old_flags); + __dev_notify_flags(dev, old_flags, ~0U); return 0; } EXPORT_SYMBOL(rtnl_configure_link); diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 6a2f13c..3f1ec15 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -10,11 +10,24 @@ #include <net/secure_seq.h> -static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; +#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4) -void net_secret_init(void) +static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; + +static void net_secret_init(void) { - get_random_bytes(net_secret, sizeof(net_secret)); + u32 tmp; + int i; + + if (likely(net_secret[0])) + return; + + for (i = NET_SECRET_SIZE; i > 0;) { + do { + get_random_bytes(&tmp, sizeof(tmp)); + } while (!tmp); + cmpxchg(&net_secret[--i], 0, tmp); + } } #ifdef CONFIG_INET @@ -42,6 +55,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, u32 hash[MD5_DIGEST_WORDS]; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + (__force u32)daddr[i]; @@ -63,6 +77,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, u32 hash[MD5_DIGEST_WORDS]; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + (__force u32) daddr[i]; @@ -82,6 +97,7 @@ __u32 secure_ip_id(__be32 daddr) { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force __u32) daddr; hash[1] = net_secret[13]; hash[2] = net_secret[14]; @@ -96,6 +112,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4]) { __u32 hash[4]; + net_secret_init(); memcpy(hash, daddr, 16); md5_transform(hash, net_secret); @@ -107,6 +124,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = ((__force u16)sport << 16) + (__force u16)dport; @@ -121,6 +139,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = (__force u32)dport ^ net_secret[14]; @@ -140,6 +159,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, u32 hash[MD5_DIGEST_WORDS]; u64 seq; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = ((__force u16)sport << 16) + (__force u16)dport; @@ -164,6 +184,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, u64 seq; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + daddr[i]; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d81cff1..8ead744 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2936,32 +2936,30 @@ EXPORT_SYMBOL_GPL(skb_segment); int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - struct sk_buff *p = *head; - struct sk_buff *nskb; - struct skb_shared_info *skbinfo = skb_shinfo(skb); - struct skb_shared_info *pinfo = skb_shinfo(p); - unsigned int headroom; - unsigned int len = skb_gro_len(skb); + struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb); unsigned int offset = skb_gro_offset(skb); unsigned int headlen = skb_headlen(skb); + struct sk_buff *nskb, *lp, *p = *head; + unsigned int len = skb_gro_len(skb); unsigned int delta_truesize; + unsigned int headroom; - if (p->len + len >= 65536) + if (unlikely(p->len + len >= 65536)) return -E2BIG; - if (pinfo->frag_list) - goto merge; - else if (headlen <= offset) { + lp = NAPI_GRO_CB(p)->last ?: p; + pinfo = skb_shinfo(lp); + + if (headlen <= offset) { skb_frag_t *frag; skb_frag_t *frag2; int i = skbinfo->nr_frags; int nr_frags = pinfo->nr_frags + i; - offset -= headlen; - if (nr_frags > MAX_SKB_FRAGS) - return -E2BIG; + goto merge; + offset -= headlen; pinfo->nr_frags = nr_frags; skbinfo->nr_frags = 0; @@ -2992,7 +2990,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) unsigned int first_offset; if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS) - return -E2BIG; + goto merge; first_offset = skb->data - (unsigned char *)page_address(page) + @@ -3010,7 +3008,10 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; goto done; - } else if (skb_gro_len(p) != pinfo->gso_size) + } + if (pinfo->frag_list) + goto merge; + if (skb_gro_len(p) != pinfo->gso_size) return -E2BIG; headroom = skb_headroom(p); @@ -3062,16 +3063,24 @@ merge: __skb_pull(skb, offset); - NAPI_GRO_CB(p)->last->next = skb; + if (!NAPI_GRO_CB(p)->last) + skb_shinfo(p)->frag_list = skb; + else + NAPI_GRO_CB(p)->last->next = skb; NAPI_GRO_CB(p)->last = skb; skb_header_release(skb); + lp = p; done: NAPI_GRO_CB(p)->count++; p->data_len += len; p->truesize += delta_truesize; p->len += len; - + if (lp != p) { + lp->data_len += len; + lp->truesize += delta_truesize; + lp->len += len; + } NAPI_GRO_CB(skb)->same_flow = 1; return 0; } diff --git a/net/core/sock.c b/net/core/sock.c index 5b6beba..fd6afa2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -914,6 +914,13 @@ set_rcvbuf: } break; #endif + + case SO_MAX_PACING_RATE: + sk->sk_max_pacing_rate = val; + sk->sk_pacing_rate = min(sk->sk_pacing_rate, + sk->sk_max_pacing_rate); + break; + default: ret = -ENOPROTOOPT; break; @@ -1177,6 +1184,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; #endif + case SO_MAX_PACING_RATE: + v.val = sk->sk_max_pacing_rate; + break; + default: return -ENOPROTOOPT; } @@ -2319,6 +2330,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_ll_usec = sysctl_net_busy_read; #endif + sk->sk_max_pacing_rate = ~0U; + sk->sk_pacing_rate = ~0U; /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ebc54fe..720c362 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -409,9 +409,9 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->inet_daddr = ireq->rmt_addr; - newinet->inet_rcv_saddr = ireq->loc_addr; - newinet->inet_saddr = ireq->loc_addr; + newinet->inet_daddr = ireq->ir_rmt_addr; + newinet->inet_rcv_saddr = ireq->ir_loc_addr; + newinet->inet_saddr = ireq->ir_loc_addr; newinet->inet_opt = ireq->opt; ireq->opt = NULL; newinet->mc_index = inet_iif(skb); @@ -516,10 +516,10 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req) const struct inet_request_sock *ireq = inet_rsk(req); struct dccp_hdr *dh = dccp_hdr(skb); - dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr, - ireq->rmt_addr); - err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, - ireq->rmt_addr, + dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr, + ireq->ir_rmt_addr); + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, ireq->opt); err = net_xmit_eval(err); } @@ -641,8 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; ireq = inet_rsk(req); - ireq->loc_addr = ip_hdr(skb)->daddr; - ireq->rmt_addr = ip_hdr(skb)->saddr; + ireq->ir_loc_addr = ip_hdr(skb)->daddr; + ireq->ir_rmt_addr = ip_hdr(skb)->saddr; /* * Step 3: Process LISTEN state diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6cf9f77..4ac71ff 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -67,7 +67,7 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb) struct dccp_hdr *dh = dccp_hdr(skb); dccp_csum_outgoing(skb); - dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr); + dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr); } static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb) @@ -216,7 +216,7 @@ out: static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) { - struct inet6_request_sock *ireq6 = inet6_rsk(req); + struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; struct in6_addr *final_p, final; @@ -226,12 +226,12 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_DCCP; - fl6.daddr = ireq6->rmt_addr; - fl6.saddr = ireq6->loc_addr; + fl6.daddr = ireq->ir_v6_rmt_addr; + fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowlabel = 0; - fl6.flowi6_oif = ireq6->iif; - fl6.fl6_dport = inet_rsk(req)->rmt_port; - fl6.fl6_sport = inet_rsk(req)->loc_port; + fl6.flowi6_oif = ireq->ir_iif; + fl6.fl6_dport = ireq->ir_rmt_port; + fl6.fl6_sport = htons(ireq->ir_num); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); @@ -249,9 +249,9 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) struct dccp_hdr *dh = dccp_hdr(skb); dh->dccph_checksum = dccp_v6_csum_finish(skb, - &ireq6->loc_addr, - &ireq6->rmt_addr); - fl6.daddr = ireq6->rmt_addr; + &ireq->ir_v6_loc_addr, + &ireq->ir_v6_rmt_addr); + fl6.daddr = ireq->ir_v6_rmt_addr; err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); err = net_xmit_eval(err); } @@ -264,8 +264,7 @@ done: static void dccp_v6_reqsk_destructor(struct request_sock *req) { dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); - if (inet6_rsk(req)->pktopts != NULL) - kfree_skb(inet6_rsk(req)->pktopts); + kfree_skb(inet_rsk(req)->pktopts); } static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) @@ -359,7 +358,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { struct request_sock *req; struct dccp_request_sock *dreq; - struct inet6_request_sock *ireq6; + struct inet_request_sock *ireq; struct ipv6_pinfo *np = inet6_sk(sk); const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); @@ -398,22 +397,22 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; - ireq6 = inet6_rsk(req); - ireq6->rmt_addr = ipv6_hdr(skb)->saddr; - ireq6->loc_addr = ipv6_hdr(skb)->daddr; + ireq = inet_rsk(req); + ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; + ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { atomic_inc(&skb->users); - ireq6->pktopts = skb; + ireq->pktopts = skb; } - ireq6->iif = sk->sk_bound_dev_if; + ireq->ir_iif = sk->sk_bound_dev_if; /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && - ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL) - ireq6->iif = inet6_iif(skb); + ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) + ireq->ir_iif = inet6_iif(skb); /* * Step 3: Process LISTEN state @@ -446,7 +445,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, struct request_sock *req, struct dst_entry *dst) { - struct inet6_request_sock *ireq6 = inet6_rsk(req); + struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct inet_sock *newinet; struct dccp6_sock *newdp6; @@ -467,11 +466,11 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr); + ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr); ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - newnp->rcv_saddr = newnp->saddr; + newsk->sk_v6_rcv_saddr = newnp->saddr; inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; newsk->sk_backlog_rcv = dccp_v4_do_rcv; @@ -505,12 +504,12 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_DCCP; - fl6.daddr = ireq6->rmt_addr; + fl6.daddr = ireq->ir_v6_rmt_addr; final_p = fl6_update_dst(&fl6, np->opt, &final); - fl6.saddr = ireq6->loc_addr; + fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.fl6_dport = inet_rsk(req)->rmt_port; - fl6.fl6_sport = inet_rsk(req)->loc_port; + fl6.fl6_dport = ireq->ir_rmt_port; + fl6.fl6_sport = htons(ireq->ir_num); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); @@ -538,10 +537,10 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - newnp->daddr = ireq6->rmt_addr; - newnp->saddr = ireq6->loc_addr; - newnp->rcv_saddr = ireq6->loc_addr; - newsk->sk_bound_dev_if = ireq6->iif; + newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; + newnp->saddr = ireq->ir_v6_loc_addr; + newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; + newsk->sk_bound_dev_if = ireq->ir_iif; /* Now IPv6 options... @@ -554,10 +553,10 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; - if (ireq6->pktopts != NULL) { - newnp->pktoptions = skb_clone(ireq6->pktopts, GFP_ATOMIC); - consume_skb(ireq6->pktopts); - ireq6->pktopts = NULL; + if (ireq->pktopts != NULL) { + newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); + consume_skb(ireq->pktopts); + ireq->pktopts = NULL; if (newnp->pktoptions) skb_set_owner_r(newnp->pktoptions, newsk); } @@ -885,7 +884,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, return -EINVAL; } - np->daddr = usin->sin6_addr; + sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; /* @@ -915,16 +914,16 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, goto failure; } ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); + ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &sk->sk_v6_rcv_saddr); return err; } - if (!ipv6_addr_any(&np->rcv_saddr)) - saddr = &np->rcv_saddr; + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + saddr = &sk->sk_v6_rcv_saddr; fl6.flowi6_proto = IPPROTO_DCCP; - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.fl6_dport = usin->sin6_port; @@ -941,7 +940,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (saddr == NULL) { saddr = &fl6.saddr; - np->rcv_saddr = *saddr; + sk->sk_v6_rcv_saddr = *saddr; } /* set the source address */ @@ -963,7 +962,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, goto late_failure; dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32, - np->daddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport); err = dccp_connect(sk); diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h index 6eef81f..af259e1 100644 --- a/net/dccp/ipv6.h +++ b/net/dccp/ipv6.h @@ -25,12 +25,10 @@ struct dccp6_sock { struct dccp6_request_sock { struct dccp_request_sock dccp; - struct inet6_request_sock inet6; }; struct dccp6_timewait_sock { struct inet_timewait_sock inet; - struct inet6_timewait_sock tw6; }; #endif /* _DCCP_IPV6_H */ diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 662071b..9e2f78b 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -56,12 +56,9 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); - struct inet6_timewait_sock *tw6; - tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); - tw6 = inet6_twsk((struct sock *)tw); - tw6->tw_v6_daddr = np->daddr; - tw6->tw_v6_rcv_saddr = np->rcv_saddr; + tw->tw_v6_daddr = sk->sk_v6_daddr; + tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_ipv6only = np->ipv6only; } #endif @@ -269,10 +266,10 @@ int dccp_reqsk_init(struct request_sock *req, { struct dccp_request_sock *dreq = dccp_rsk(req); - inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; - inet_rsk(req)->loc_port = dccp_hdr(skb)->dccph_dport; - inet_rsk(req)->acked = 0; - dreq->dreq_timestamp_echo = 0; + inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport; + inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport); + inet_rsk(req)->acked = 0; + dreq->dreq_timestamp_echo = 0; /* inherit feature negotiation options from listening socket */ return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg); diff --git a/net/dccp/output.c b/net/dccp/output.c index d17fc90..8876078 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -424,8 +424,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, /* Build and checksum header */ dh = dccp_zeroed_hdr(skb, dccp_header_size); - dh->dccph_sport = inet_rsk(req)->loc_port; - dh->dccph_dport = inet_rsk(req)->rmt_port; + dh->dccph_sport = htons(inet_rsk(req)->ir_num); + dh->dccph_dport = inet_rsk(req)->ir_rmt_port; dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; dh->dccph_type = DCCP_PKT_RESPONSE; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ba64750..eb892b4 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1158,10 +1158,8 @@ static int __init dccp_init(void) goto out_free_bind_bucket_cachep; } - for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) { + for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i); - INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i); - } if (inet_ehash_locks_alloc(&dccp_hashinfo)) goto out_free_dccp_ehash; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index be1f64d..8f032ba 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -58,7 +58,7 @@ #include <net/ipv6.h> #include <net/ip.h> #include <net/dsa.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> __setup("ether=", netdev_boot_setup); @@ -133,7 +133,7 @@ int eth_rebuild_header(struct sk_buff *skb) return arp_find(eth->h_dest, skb); #endif default: - printk(KERN_DEBUG + netdev_dbg(dev, "%s: unable to resolve type %X addresses.\n", dev->name, ntohs(eth->h_proto)); @@ -169,20 +169,9 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) else skb->pkt_type = PACKET_MULTICAST; } - - /* - * This ALLMULTI check should be redundant by 1.4 - * so don't forget to remove it. - * - * Seems, you forgot to remove it. All silly devices - * seems to set IFF_PROMISC. - */ - - else if (1 /*dev->flags&IFF_PROMISC */ ) { - if (unlikely(!ether_addr_equal_64bits(eth->h_dest, - dev->dev_addr))) - skb->pkt_type = PACKET_OTHERHOST; - } + else if (unlikely(!ether_addr_equal_64bits(eth->h_dest, + dev->dev_addr))) + skb->pkt_type = PACKET_OTHERHOST; /* * Some variants of DSA tagging don't have an ethertype field @@ -190,12 +179,13 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) * variants has been configured on the receiving interface, * and if so, set skb->protocol without looking at the packet. */ - if (netdev_uses_dsa_tags(dev)) + if (unlikely(netdev_uses_dsa_tags(dev))) return htons(ETH_P_DSA); - if (netdev_uses_trailer_tags(dev)) + + if (unlikely(netdev_uses_trailer_tags(dev))) return htons(ETH_P_TRAILER); - if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) + if (likely(ntohs(eth->h_proto) >= ETH_P_802_3_MIN)) return eth->h_proto; /* @@ -204,7 +194,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This * won't work for fault tolerant netware but does for the rest. */ - if (skb->len >= 2 && *(unsigned short *)(skb->data) == 0xFFFF) + if (unlikely(skb->len >= 2 && *(unsigned short *)(skb->data) == 0xFFFF)) return htons(ETH_P_802_3); /* diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index c85e71e..ff41b4d 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1372,6 +1372,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; + if (real_dev->type != ARPHRD_IEEE802154) + return -EINVAL; lowpan_dev_info(dev)->real_dev = real_dev; lowpan_dev_info(dev)->fragment_tag = 0; @@ -1386,6 +1388,9 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, entry->ldev = dev; + /* Set the lowpan harware address to the wpan hardware address. */ + memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN); + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); INIT_LIST_HEAD(&entry->list); list_add_tail(&entry->list, &lowpan_devices); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7a1874b..35913fb 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -263,10 +263,8 @@ void build_ehash_secret(void) get_random_bytes(&rnd, sizeof(rnd)); } while (rnd == 0); - if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) { + if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); - net_secret_init(); - } } EXPORT_SYMBOL(build_ehash_secret); @@ -1548,6 +1546,7 @@ static const struct net_protocol tcp_protocol = { }; static const struct net_protocol udp_protocol = { + .early_demux = udp_v4_early_demux, .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1, diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 3df6d3e..ec9a9ef 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -762,12 +762,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) if (IS_LEAF(node) || ((struct tnode *) node)->pos > tn->pos + tn->bits - 1) { - if (tkey_extract_bits(node->key, - oldtnode->pos + oldtnode->bits, - 1) == 0) - put_child(tn, 2*i, node); - else - put_child(tn, 2*i+1, node); + put_child(tn, + tkey_extract_bits(node->key, oldtnode->pos, oldtnode->bits + 1), + node); continue; } @@ -1120,12 +1117,8 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) * first tnode need some special handling */ - if (tp) - pos = tp->pos+tp->bits; - else - pos = 0; - if (n) { + pos = tp ? tp->pos+tp->bits : 0; newpos = tkey_mismatch(key, pos, n->key); tn = tnode_new(n->key, newpos, 1); } else { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5f7d11a..5c0e8bc 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -353,6 +353,9 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) saddr = fib_compute_spec_dst(skb); ipc.opt = NULL; ipc.tx_flags = 0; + ipc.ttl = 0; + ipc.tos = -1; + if (icmp_param->replyopts.opt.opt.optlen) { ipc.opt = &icmp_param->replyopts.opt; if (ipc.opt->opt.srr) @@ -608,6 +611,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) ipc.addr = iph->saddr; ipc.opt = &icmp_param->replyopts.opt; ipc.tx_flags = 0; + ipc.ttl = 0; + ipc.tos = -1; rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, type, code, icmp_param); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index dace87f..7defdc9 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -736,7 +736,7 @@ static void igmp_gq_timer_expire(unsigned long data) in_dev->mr_gq_running = 0; igmpv3_send_report(in_dev, NULL); - __in_dev_put(in_dev); + in_dev_put(in_dev); } static void igmp_ifc_timer_expire(unsigned long data) @@ -749,7 +749,7 @@ static void igmp_ifc_timer_expire(unsigned long data) igmp_ifc_start_timer(in_dev, unsolicited_report_interval(in_dev)); } - __in_dev_put(in_dev); + in_dev_put(in_dev); } static void igmp_ifc_event(struct in_device *in_dev) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 6acb541..fc0e649 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -29,27 +29,19 @@ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; EXPORT_SYMBOL(inet_csk_timer_bug_msg); #endif -/* - * This struct holds the first and last local port number. - */ -struct local_ports sysctl_local_ports __read_mostly = { - .lock = __SEQLOCK_UNLOCKED(sysctl_local_ports.lock), - .range = { 32768, 61000 }, -}; - unsigned long *sysctl_local_reserved_ports; EXPORT_SYMBOL(sysctl_local_reserved_ports); -void inet_get_local_port_range(int *low, int *high) +void inet_get_local_port_range(struct net *net, int *low, int *high) { unsigned int seq; do { - seq = read_seqbegin(&sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); - *low = sysctl_local_ports.range[0]; - *high = sysctl_local_ports.range[1]; - } while (read_seqretry(&sysctl_local_ports.lock, seq)); + *low = net->ipv4.sysctl_local_ports.range[0]; + *high = net->ipv4.sysctl_local_ports.range[1]; + } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); } EXPORT_SYMBOL(inet_get_local_port_range); @@ -79,17 +71,16 @@ int inet_csk_bind_conflict(const struct sock *sk, (!reuseport || !sk2->sk_reuseport || (sk2->sk_state != TCP_TIME_WAIT && !uid_eq(uid, sock_i_uid(sk2))))) { - const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || - sk2_rcv_saddr == sk_rcv_saddr(sk)) + + if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || + sk2->sk_rcv_saddr == sk->sk_rcv_saddr) break; } if (!relax && reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN) { - const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || - sk2_rcv_saddr == sk_rcv_saddr(sk)) + if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || + sk2->sk_rcv_saddr == sk->sk_rcv_saddr) break; } } @@ -116,7 +107,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) int remaining, rover, low, high; again: - inet_get_local_port_range(&low, &high); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; smallest_rover = rover = net_random() % remaining + low; @@ -421,8 +412,8 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, flags, - (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, + ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -457,8 +448,8 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), - (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, + ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -504,9 +495,9 @@ struct request_sock *inet_csk_search_req(const struct sock *sk, prev = &req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); - if (ireq->rmt_port == rport && - ireq->rmt_addr == raddr && - ireq->loc_addr == laddr && + if (ireq->ir_rmt_port == rport && + ireq->ir_rmt_addr == raddr && + ireq->ir_loc_addr == laddr && AF_INET_FAMILY(req->rsk_ops->family)) { WARN_ON(req->sk); *prevp = prev; @@ -523,7 +514,8 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, { struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, + const u32 h = inet_synq_hash(inet_rsk(req)->ir_rmt_addr, + inet_rsk(req)->ir_rmt_port, lopt->hash_rnd, lopt->nr_table_entries); reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); @@ -683,9 +675,9 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, newsk->sk_state = TCP_SYN_RECV; newicsk->icsk_bind_hash = NULL; - inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port; - inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port); - inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port; + inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; + inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num; + inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); newsk->sk_write_space = sk_stream_write_space; newicsk->icsk_retransmits = 0; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 5f64875..41e1c3e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -121,13 +121,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, #if IS_ENABLED(CONFIG_IPV6) if (r->idiag_family == AF_INET6) { - const struct ipv6_pinfo *np = inet6_sk(sk); - *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr; - *(struct in6_addr *)r->id.idiag_dst = np->daddr; + *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr; + *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr; if (ext & (1 << (INET_DIAG_TCLASS - 1))) - if (nla_put_u8(skb, INET_DIAG_TCLASS, np->tclass) < 0) + if (nla_put_u8(skb, INET_DIAG_TCLASS, + inet6_sk(sk)->tclass) < 0) goto errout; } #endif @@ -222,7 +222,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { - long tmo; + s32 tmo; struct inet_diag_msg *r; struct nlmsghdr *nlh; @@ -234,7 +234,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r = nlmsg_data(nlh); BUG_ON(tw->tw_state != TCP_TIME_WAIT); - tmo = tw->tw_ttd - jiffies; + tmo = tw->tw_ttd - inet_tw_time_stamp(); if (tmo < 0) tmo = 0; @@ -248,18 +248,15 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->id.idiag_dst[0] = tw->tw_daddr; r->idiag_state = tw->tw_substate; r->idiag_timer = 3; - r->idiag_expires = DIV_ROUND_UP(tmo * 1000, HZ); + r->idiag_expires = jiffies_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; r->idiag_uid = 0; r->idiag_inode = 0; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == AF_INET6) { - const struct inet6_timewait_sock *tw6 = - inet6_twsk((struct sock *)tw); - - *(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr; - *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr; + *(struct in6_addr *)r->id.idiag_src = tw->tw_v6_rcv_saddr; + *(struct in6_addr *)r->id.idiag_dst = tw->tw_v6_daddr; } #endif @@ -273,10 +270,11 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) - return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, - skb, r, portid, seq, nlmsg_flags, - unlh); - return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh); + return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq, + nlmsg_flags, unlh); + + return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, + nlmsg_flags, unlh); } int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, @@ -489,10 +487,9 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) entry.family = sk->sk_family; #if IS_ENABLED(CONFIG_IPV6) if (entry.family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - entry.saddr = np->rcv_saddr.s6_addr32; - entry.daddr = np->daddr.s6_addr32; + entry.saddr = sk->sk_v6_rcv_saddr.s6_addr32; + entry.daddr = sk->sk_v6_daddr.s6_addr32; } else #endif { @@ -635,22 +632,22 @@ static int inet_csk_diag_dump(struct sock *sk, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } -static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, +static int inet_twsk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, struct inet_diag_req_v2 *r, const struct nlattr *bc) { + struct inet_timewait_sock *tw = inet_twsk(sk); + if (bc != NULL) { struct inet_diag_entry entry; entry.family = tw->tw_family; #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == AF_INET6) { - struct inet6_timewait_sock *tw6 = - inet6_twsk((struct sock *)tw); - entry.saddr = tw6->tw_v6_rcv_saddr.s6_addr32; - entry.daddr = tw6->tw_v6_daddr.s6_addr32; + entry.saddr = tw->tw_v6_rcv_saddr.s6_addr32; + entry.daddr = tw->tw_v6_daddr.s6_addr32; } else #endif { @@ -682,12 +679,12 @@ static inline void inet_diag_req_addrs(const struct sock *sk, #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) { if (req->rsk_ops->family == AF_INET6) { - entry->saddr = inet6_rsk(req)->loc_addr.s6_addr32; - entry->daddr = inet6_rsk(req)->rmt_addr.s6_addr32; + entry->saddr = ireq->ir_v6_loc_addr.s6_addr32; + entry->daddr = ireq->ir_v6_rmt_addr.s6_addr32; } else if (req->rsk_ops->family == AF_INET) { - ipv6_addr_set_v4mapped(ireq->loc_addr, + ipv6_addr_set_v4mapped(ireq->ir_loc_addr, &entry->saddr_storage); - ipv6_addr_set_v4mapped(ireq->rmt_addr, + ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, &entry->daddr_storage); entry->saddr = entry->saddr_storage.s6_addr32; entry->daddr = entry->daddr_storage.s6_addr32; @@ -695,8 +692,8 @@ static inline void inet_diag_req_addrs(const struct sock *sk, } else #endif { - entry->saddr = &ireq->loc_addr; - entry->daddr = &ireq->rmt_addr; + entry->saddr = &ireq->ir_loc_addr; + entry->daddr = &ireq->ir_rmt_addr; } } @@ -731,9 +728,9 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, tmo = 0; r->id.idiag_sport = inet->inet_sport; - r->id.idiag_dport = ireq->rmt_port; - r->id.idiag_src[0] = ireq->loc_addr; - r->id.idiag_dst[0] = ireq->rmt_addr; + r->id.idiag_dport = ireq->ir_rmt_port; + r->id.idiag_src[0] = ireq->ir_loc_addr; + r->id.idiag_dst[0] = ireq->ir_rmt_addr; r->idiag_expires = jiffies_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; @@ -792,13 +789,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, if (reqnum < s_reqnum) continue; - if (r->id.idiag_dport != ireq->rmt_port && + if (r->id.idiag_dport != ireq->ir_rmt_port && r->id.idiag_dport) continue; if (bc) { inet_diag_req_addrs(sk, req, &entry); - entry.dport = ntohs(ireq->rmt_port); + entry.dport = ntohs(ireq->ir_rmt_port); if (!inet_diag_bc_run(bc, &entry)) continue; @@ -911,8 +908,7 @@ skip_listen_ht: num = 0; - if (hlist_nulls_empty(&head->chain) && - hlist_nulls_empty(&head->twchain)) + if (hlist_nulls_empty(&head->chain)) continue; if (i > s_i) @@ -920,7 +916,7 @@ skip_listen_ht: spin_lock_bh(lock); sk_nulls_for_each(sk, node, &head->chain) { - struct inet_sock *inet = inet_sk(sk); + int res; if (!net_eq(sock_net(sk), net)) continue; @@ -929,15 +925,19 @@ skip_listen_ht: if (!(r->idiag_states & (1 << sk->sk_state))) goto next_normal; if (r->sdiag_family != AF_UNSPEC && - sk->sk_family != r->sdiag_family) + sk->sk_family != r->sdiag_family) goto next_normal; - if (r->id.idiag_sport != inet->inet_sport && + if (r->id.idiag_sport != htons(sk->sk_num) && r->id.idiag_sport) goto next_normal; - if (r->id.idiag_dport != inet->inet_dport && + if (r->id.idiag_dport != sk->sk_dport && r->id.idiag_dport) goto next_normal; - if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { + if (sk->sk_state == TCP_TIME_WAIT) + res = inet_twsk_diag_dump(sk, skb, cb, r, bc); + else + res = inet_csk_diag_dump(sk, skb, cb, r, bc); + if (res < 0) { spin_unlock_bh(lock); goto done; } @@ -945,33 +945,6 @@ next_normal: ++num; } - if (r->idiag_states & TCPF_TIME_WAIT) { - struct inet_timewait_sock *tw; - - inet_twsk_for_each(tw, node, - &head->twchain) { - if (!net_eq(twsk_net(tw), net)) - continue; - - if (num < s_num) - goto next_dying; - if (r->sdiag_family != AF_UNSPEC && - tw->tw_family != r->sdiag_family) - goto next_dying; - if (r->id.idiag_sport != tw->tw_sport && - r->id.idiag_sport) - goto next_dying; - if (r->id.idiag_dport != tw->tw_dport && - r->id.idiag_dport) - goto next_dying; - if (inet_twsk_diag_dump(tw, skb, cb, r, bc) < 0) { - spin_unlock_bh(lock); - goto done; - } -next_dying: - ++num; - } - } spin_unlock_bh(lock); } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 7bd8983..a4b66bb 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -230,6 +230,19 @@ begin: } EXPORT_SYMBOL_GPL(__inet_lookup_listener); +/* All sockets share common refcount, but have different destructors */ +void sock_gen_put(struct sock *sk) +{ + if (!atomic_dec_and_test(&sk->sk_refcnt)) + return; + + if (sk->sk_state == TCP_TIME_WAIT) + inet_twsk_free(inet_twsk(sk)); + else + sk_free(sk); +} +EXPORT_SYMBOL_GPL(sock_gen_put); + struct sock *__inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, @@ -255,13 +268,13 @@ begin: if (likely(INET_MATCH(sk, net, acookie, saddr, daddr, ports, dif))) { if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) - goto begintw; + goto out; if (unlikely(!INET_MATCH(sk, net, acookie, saddr, daddr, ports, dif))) { - sock_put(sk); + sock_gen_put(sk); goto begin; } - goto out; + goto found; } } /* @@ -271,37 +284,9 @@ begin: */ if (get_nulls_value(node) != slot) goto begin; - -begintw: - /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_nulls_for_each_rcu(sk, node, &head->twchain) { - if (sk->sk_hash != hash) - continue; - if (likely(INET_TW_MATCH(sk, net, acookie, - saddr, daddr, ports, - dif))) { - if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { - sk = NULL; - goto out; - } - if (unlikely(!INET_TW_MATCH(sk, net, acookie, - saddr, daddr, ports, - dif))) { - sock_put(sk); - goto begintw; - } - goto out; - } - } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot) - goto begintw; - sk = NULL; out: + sk = NULL; +found: rcu_read_unlock(); return sk; } @@ -326,39 +311,29 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; - struct inet_timewait_sock *tw; + struct inet_timewait_sock *tw = NULL; int twrefcnt = 0; spin_lock(lock); - /* Check TIME-WAIT sockets first. */ - sk_nulls_for_each(sk2, node, &head->twchain) { - if (sk2->sk_hash != hash) - continue; - - if (likely(INET_TW_MATCH(sk2, net, acookie, - saddr, daddr, ports, dif))) { - tw = inet_twsk(sk2); - if (twsk_unique(sk, sk2, twp)) - goto unique; - else - goto not_unique; - } - } - tw = NULL; - - /* And established part... */ sk_nulls_for_each(sk2, node, &head->chain) { if (sk2->sk_hash != hash) continue; + if (likely(INET_MATCH(sk2, net, acookie, - saddr, daddr, ports, dif))) + saddr, daddr, ports, dif))) { + if (sk2->sk_state == TCP_TIME_WAIT) { + tw = inet_twsk(sk2); + if (twsk_unique(sk, sk2, twp)) + break; + } goto not_unique; + } } -unique: /* Must record num and sport now. Otherwise we will see - * in hash table socket with a funny identity. */ + * in hash table socket with a funny identity. + */ inet->inet_num = lport; inet->inet_sport = htons(lport); sk->sk_hash = hash; @@ -494,7 +469,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, u32 offset = hint + port_offset; struct inet_timewait_sock *tw = NULL; - inet_get_local_port_range(&low, &high); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; local_bh_disable(); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 1f27c9f..6d592f8 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -87,19 +87,11 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, refcnt += inet_twsk_bind_unhash(tw, hashinfo); spin_unlock(&bhead->lock); -#ifdef SOCK_REFCNT_DEBUG - if (atomic_read(&tw->tw_refcnt) != 1) { - pr_debug("%s timewait_sock %p refcnt=%d\n", - tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); - } -#endif - while (refcnt) { - inet_twsk_put(tw); - refcnt--; - } + BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt)); + atomic_sub(refcnt, &tw->tw_refcnt); } -static noinline void inet_twsk_free(struct inet_timewait_sock *tw) +void inet_twsk_free(struct inet_timewait_sock *tw) { struct module *owner = tw->tw_prot->owner; twsk_destructor((struct sock *)tw); @@ -118,6 +110,18 @@ void inet_twsk_put(struct inet_timewait_sock *tw) } EXPORT_SYMBOL_GPL(inet_twsk_put); +static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, + struct hlist_nulls_head *list) +{ + hlist_nulls_add_head_rcu(&tw->tw_node, list); +} + +static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_bind_node, list); +} + /* * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it @@ -146,26 +150,21 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, spin_lock(lock); /* - * Step 2: Hash TW into TIMEWAIT chain. - * Should be done before removing sk from established chain - * because readers are lockless and search established first. + * Step 2: Hash TW into tcp ehash chain. + * Notes : + * - tw_refcnt is set to 3 because : + * - We have one reference from bhash chain. + * - We have one reference from ehash chain. + * We can use atomic_set() because prior spin_lock()/spin_unlock() + * committed into memory all tw fields. */ - inet_twsk_add_node_rcu(tw, &ehead->twchain); + atomic_set(&tw->tw_refcnt, 1 + 1 + 1); + inet_twsk_add_node_rcu(tw, &ehead->chain); - /* Step 3: Remove SK from established hash. */ + /* Step 3: Remove SK from hash chain */ if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - /* - * Notes : - * - We initially set tw_refcnt to 0 in inet_twsk_alloc() - * - We add one reference for the bhash link - * - We add one reference for the ehash link - * - We want this refcnt update done before allowing other - * threads to find this tw in ehash chain. - */ - atomic_add(1 + 1 + 1, &tw->tw_refcnt); - spin_unlock(lock); } EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); @@ -387,11 +386,11 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, if (slot >= INET_TWDR_TWKILL_SLOTS) slot = INET_TWDR_TWKILL_SLOTS - 1; } - tw->tw_ttd = jiffies + timeo; + tw->tw_ttd = inet_tw_time_stamp() + timeo; slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); list = &twdr->cells[slot]; } else { - tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK); + tw->tw_ttd = inet_tw_time_stamp() + (slot << INET_TWDR_RECYCLE_TICK); if (twdr->twcal_hand < 0) { twdr->twcal_hand = 0; @@ -490,7 +489,9 @@ void inet_twsk_purge(struct inet_hashinfo *hashinfo, restart_rcu: rcu_read_lock(); restart: - sk_nulls_for_each_rcu(sk, node, &head->twchain) { + sk_nulls_for_each_rcu(sk, node, &head->chain) { + if (sk->sk_state != TCP_TIME_WAIT) + continue; tw = inet_twsk(sk); if ((tw->tw_family != family) || atomic_read(&twsk_net(tw)->count)) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a04d872..7d8357b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1060,6 +1060,9 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, rt->dst.dev->mtu : dst_mtu(&rt->dst); cork->dst = &rt->dst; cork->length = 0; + cork->ttl = ipc->ttl; + cork->tos = ipc->tos; + cork->priority = ipc->priority; cork->tx_flags = ipc->tx_flags; return 0; @@ -1311,7 +1314,9 @@ struct sk_buff *__ip_make_skb(struct sock *sk, if (cork->flags & IPCORK_OPT) opt = cork->opt; - if (rt->rt_type == RTN_MULTICAST) + if (cork->ttl != 0) + ttl = cork->ttl; + else if (rt->rt_type == RTN_MULTICAST) ttl = inet->mc_ttl; else ttl = ip_select_ttl(inet, &rt->dst); @@ -1319,7 +1324,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph = ip_hdr(skb); iph->version = 4; iph->ihl = 5; - iph->tos = inet->tos; + iph->tos = (cork->tos != -1) ? cork->tos : inet->tos; iph->frag_off = df; iph->ttl = ttl; iph->protocol = sk->sk_protocol; @@ -1331,7 +1336,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, ip_options_build(skb, opt, cork->addr, rt, 0); } - skb->priority = sk->sk_priority; + skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority; skb->mark = sk->sk_mark; /* * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec @@ -1481,6 +1486,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, ipc.addr = daddr; ipc.opt = NULL; ipc.tx_flags = 0; + ipc.ttl = 0; + ipc.tos = -1; if (replyopts.opt.opt.optlen) { ipc.opt = &replyopts.opt; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index d9c4f11..0626f2c 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -189,7 +189,7 @@ EXPORT_SYMBOL(ip_cmsg_recv); int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) { - int err; + int err, val; struct cmsghdr *cmsg; for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { @@ -215,6 +215,24 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) ipc->addr = info->ipi_spec_dst.s_addr; break; } + case IP_TTL: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) + return -EINVAL; + val = *(int *)CMSG_DATA(cmsg); + if (val < 1 || val > 255) + return -EINVAL; + ipc->ttl = val; + break; + case IP_TOS: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) + return -EINVAL; + val = *(int *)CMSG_DATA(cmsg); + if (val < 0 || val > 255) + return -EINVAL; + ipc->tos = val; + ipc->priority = rt_tos2priority(ipc->tos); + break; + default: return -EINVAL; } @@ -1034,11 +1052,12 @@ e_inval: * destination in skb->cb[] before dst drop. * This way, receiver doesnt make cache line misses to read rtable. */ -void ipv4_pktinfo_prepare(struct sk_buff *skb) +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) { struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); - if (skb_rtable(skb)) { + if ((inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) && + skb_rtable(skb)) { pktinfo->ipi_ifindex = inet_iif(skb); pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index ac9fabe0..63a6d6d 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -623,6 +623,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, tunnel->err_count = 0; } + tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); ttl = tnl_params->ttl; if (ttl == 0) { if (skb->protocol == htons(ETH_P_IP)) @@ -641,18 +642,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len; - if (max_headroom > dev->needed_headroom) { + if (max_headroom > dev->needed_headroom) dev->needed_headroom = max_headroom; - if (skb_cow_head(skb, dev->needed_headroom)) { - dev->stats.tx_dropped++; - dev_kfree_skb(skb); - return; - } + + if (skb_cow_head(skb, dev->needed_headroom)) { + dev->stats.tx_dropped++; + dev_kfree_skb(skb); + return; } err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, - ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df, - !net_eq(tunnel->net, dev_net(dev))); + tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return; @@ -853,8 +853,10 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ - if (!IS_ERR(itn->fb_tunnel_dev)) + if (!IS_ERR(itn->fb_tunnel_dev)) { itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; + ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); + } rtnl_unlock(); return PTR_RET(itn->fb_tunnel_dev); @@ -884,8 +886,6 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, if (!net_eq(dev_net(t->dev), net)) unregister_netdevice_queue(t->dev, head); } - if (itn->fb_tunnel_dev) - unregister_netdevice_queue(itn->fb_tunnel_dev, head); } void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index d6c856b..c31e3ad 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -61,7 +61,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, memset(IPCB(skb), 0, sizeof(*IPCB(skb))); /* Push down and install the IP header. */ - __skb_push(skb, sizeof(struct iphdr)); + skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); iph = ip_hdr(skb); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index e805e7b..91f69bc 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -49,70 +49,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly; static int vti_net_id __read_mostly; static int vti_tunnel_init(struct net_device *dev); -static int vti_err(struct sk_buff *skb, u32 info) -{ - - /* All the routers (except for Linux) return only - * 8 bytes of packet payload. It means, that precise relaying of - * ICMP in the real Internet is absolutely infeasible. - */ - struct net *net = dev_net(skb->dev); - struct ip_tunnel_net *itn = net_generic(net, vti_net_id); - struct iphdr *iph = (struct iphdr *)skb->data; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - struct ip_tunnel *t; - int err; - - switch (type) { - default: - case ICMP_PARAMETERPROB: - return 0; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return 0; - default: - /* All others are translated to HOST_UNREACH. */ - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return 0; - break; - } - - err = -ENOENT; - - t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, - iph->daddr, iph->saddr, 0); - if (t == NULL) - goto out; - - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, IPPROTO_IPIP, 0); - err = 0; - goto out; - } - - err = 0; - if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) - goto out; - - if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) - t->err_count++; - else - t->err_count = 1; - t->err_time = jiffies; -out: - return err; -} - /* We dont digest the packet therefore let the packet pass */ static int vti_rcv(struct sk_buff *skb) { @@ -296,9 +232,8 @@ static void __net_init vti_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; } -static struct xfrm_tunnel vti_handler __read_mostly = { +static struct xfrm_tunnel_notifier vti_handler __read_mostly = { .handler = vti_rcv, - .err_handler = vti_err, .priority = 1, }; diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 67e17dc..b6346bf 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -267,7 +267,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) if (th == NULL) return NF_DROP; - synproxy_parse_options(skb, par->thoff, th, &opts); + if (!synproxy_parse_options(skb, par->thoff, th, &opts)) + return NF_DROP; if (th->syn && !(th->ack || th->fin || th->rst)) { /* Initial SYN from client */ @@ -350,7 +351,8 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum, /* fall through */ case TCP_CONNTRACK_SYN_SENT: - synproxy_parse_options(skb, thoff, th, &opts); + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; if (!th->syn && th->ack && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { @@ -373,7 +375,9 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum, if (!th->syn || !th->ack) break; - synproxy_parse_options(skb, thoff, th, &opts); + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy->tsoff = opts.tsval - synproxy->its; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index d7d9882..9afbdb1 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -202,15 +202,14 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) #if IS_ENABLED(CONFIG_IPV6) } else if (skb->protocol == htons(ETH_P_IPV6) && sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk, (int) isk->inet_num, - &inet6_sk(sk)->rcv_saddr, + &sk->sk_v6_rcv_saddr, sk->sk_bound_dev_if); - if (!ipv6_addr_any(&np->rcv_saddr) && - !ipv6_addr_equal(&np->rcv_saddr, + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) && + !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, &ipv6_hdr(skb)->daddr)) continue; #endif @@ -237,11 +236,11 @@ static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, unsigned int seq; do { - seq = read_seqbegin(&sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&sysctl_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); } @@ -362,7 +361,7 @@ static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) } else if (saddr->sa_family == AF_INET6) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr; struct ipv6_pinfo *np = inet6_sk(sk); - np->rcv_saddr = np->saddr = addr->sin6_addr; + sk->sk_v6_rcv_saddr = np->saddr = addr->sin6_addr; #endif } } @@ -376,7 +375,7 @@ static void ping_clear_saddr(struct sock *sk, int dif) #if IS_ENABLED(CONFIG_IPV6) } else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr)); + memset(&sk->sk_v6_rcv_saddr, 0, sizeof(sk->sk_v6_rcv_saddr)); memset(&np->saddr, 0, sizeof(np->saddr)); #endif } @@ -416,10 +415,12 @@ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) (int)sk->sk_bound_dev_if); err = 0; - if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) || - (sk->sk_family == AF_INET6 && - !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr))) + if (sk->sk_family == AF_INET && isk->inet_rcv_saddr) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6 && !ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + sk->sk_userlocks |= SOCK_BINDADDR_LOCK; +#endif if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; @@ -429,7 +430,7 @@ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) - memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr)); + memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr)); #endif sk_dst_reset(sk); @@ -713,6 +714,8 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.opt = NULL; ipc.oif = sk->sk_bound_dev_if; ipc.tx_flags = 0; + ipc.ttl = 0; + ipc.tos = -1; sock_tx_timestamp(sk, &ipc.tx_flags); @@ -744,7 +747,7 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return -EINVAL; faddr = ipc.opt->opt.faddr; } - tos = RT_TOS(inet->tos); + tos = get_rttos(&ipc, inet); if (sock_flag(sk, SOCK_LOCALROUTE) || (msg->msg_flags & MSG_DONTROUTE) || (ipc.opt && ipc.opt->opt.is_strictroute)) { diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bfec521..41e1d28 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -218,8 +218,10 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) ipv4_sk_update_pmtu(skb, sk, info); - else if (type == ICMP_REDIRECT) + else if (type == ICMP_REDIRECT) { ipv4_sk_redirect(skb, sk); + return; + } /* Report error on raw socket, if: 1. User requested ip_recverr. @@ -297,7 +299,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) { /* Charge it to the socket. */ - ipv4_pktinfo_prepare(skb); + ipv4_pktinfo_prepare(sk, skb); if (sock_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; @@ -517,6 +519,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.tx_flags = 0; + ipc.ttl = 0; + ipc.tos = -1; ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { @@ -556,7 +560,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, daddr = ipc.opt->opt.faddr; } } - tos = RT_CONN_FLAGS(sk); + tos = get_rtconn_flags(&ipc, sk); if (msg->msg_flags & MSG_DONTROUTE) tos |= RTO_ONLINK; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 727f436..6011615 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2072,7 +2072,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) RT_SCOPE_LINK); goto make_route; } - if (fl4->saddr) { + if (!fl4->saddr) { if (ipv4_is_multicast(fl4->daddr)) fl4->saddr = inet_select_addr(dev_out, 0, fl4->flowi4_scope); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 14a15c4..3b64c59 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -89,8 +89,7 @@ __u32 cookie_init_timestamp(struct request_sock *req) static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport, - __be16 dport, __u32 sseq, __u32 count, - __u32 data) + __be16 dport, __u32 sseq, __u32 data) { /* * Compute the secure sequence number. @@ -102,7 +101,7 @@ static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport, * As an extra hack, we add a small "data" value that encodes the * MSS into the second hash value. */ - + u32 count = tcp_cookie_time(); return (cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq + (count << COOKIEBITS) + ((cookie_hash(saddr, daddr, sport, dport, count, 1) + data) @@ -114,22 +113,21 @@ static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport, * If the syncookie is bad, the data returned will be out of * range. This must be checked by the caller. * - * The count value used to generate the cookie must be within - * "maxdiff" if the current (passed-in) "count". The return value - * is (__u32)-1 if this test fails. + * The count value used to generate the cookie must be less than + * MAX_SYNCOOKIE_AGE minutes in the past. + * The return value (__u32)-1 if this test fails. */ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr, - __be16 sport, __be16 dport, __u32 sseq, - __u32 count, __u32 maxdiff) + __be16 sport, __be16 dport, __u32 sseq) { - __u32 diff; + u32 diff, count = tcp_cookie_time(); /* Strip away the layers from the cookie */ cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq; /* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */ diff = (count - (cookie >> COOKIEBITS)) & ((__u32) - 1 >> COOKIEBITS); - if (diff >= maxdiff) + if (diff >= MAX_SYNCOOKIE_AGE) return (__u32)-1; return (cookie - @@ -138,22 +136,22 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr, } /* - * MSS Values are taken from the 2009 paper - * 'Measuring TCP Maximum Segment Size' by S. Alcock and R. Nelson: - * - values 1440 to 1460 accounted for 80% of observed mss values - * - values outside the 536-1460 range are rare (<0.2%). + * MSS Values are chosen based on the 2011 paper + * 'An Analysis of TCP Maximum Segement Sizes' by S. Alcock and R. Nelson. + * Values .. + * .. lower than 536 are rare (< 0.2%) + * .. between 537 and 1299 account for less than < 1.5% of observed values + * .. in the 1300-1349 range account for about 15 to 20% of observed mss values + * .. exceeding 1460 are very rare (< 0.04%) * - * Table must be sorted. + * 1460 is the single most frequently announced mss value (30 to 46% depending + * on monitor location). Table must be sorted. */ static __u16 const msstab[] = { - 64, - 512, 536, - 1024, - 1440, + 1300, + 1440, /* 1440, 1452: PPPoE */ 1460, - 4312, - 8960, }; /* @@ -173,7 +171,7 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, return secure_tcp_syn_cookie(iph->saddr, iph->daddr, th->source, th->dest, ntohl(th->seq), - jiffies / (HZ * 60), mssind); + mssind); } EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence); @@ -189,13 +187,6 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) } /* - * This (misnamed) value is the age of syncookie which is permitted. - * Its ideal value should be dependent on TCP_TIMEOUT_INIT and - * sysctl_tcp_retries1. It's a rather complicated formula (exponential - * backoff) to compute at runtime so it's currently hardcoded here. - */ -#define COUNTER_TRIES 4 -/* * Check if a ack sequence number is a valid syncookie. * Return the decoded mss if it is, or 0 if not. */ @@ -204,9 +195,7 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, { __u32 seq = ntohl(th->seq) - 1; __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr, - th->source, th->dest, seq, - jiffies / (HZ * 60), - COUNTER_TRIES); + th->source, th->dest, seq); return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } @@ -315,10 +304,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; req->mss = mss; - ireq->loc_port = th->dest; - ireq->rmt_port = th->source; - ireq->loc_addr = ip_hdr(skb)->daddr; - ireq->rmt_addr = ip_hdr(skb)->saddr; + ireq->ir_num = ntohs(th->dest); + ireq->ir_rmt_port = th->source; + ireq->ir_loc_addr = ip_hdr(skb)->daddr; + ireq->ir_rmt_addr = ip_hdr(skb)->saddr; ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; @@ -358,8 +347,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), - (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, - ireq->loc_addr, th->source, th->dest); + (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr, + ireq->ir_loc_addr, th->source, th->dest); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 540279f..c08f096 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -43,12 +43,12 @@ static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; /* Update system visible IP port range */ -static void set_local_port_range(int range[2]) +static void set_local_port_range(struct net *net, int range[2]) { - write_seqlock(&sysctl_local_ports.lock); - sysctl_local_ports.range[0] = range[0]; - sysctl_local_ports.range[1] = range[1]; - write_sequnlock(&sysctl_local_ports.lock); + write_seqlock(&net->ipv4.sysctl_local_ports.lock); + net->ipv4.sysctl_local_ports.range[0] = range[0]; + net->ipv4.sysctl_local_ports.range[1] = range[1]; + write_sequnlock(&net->ipv4.sysctl_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -56,6 +56,8 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = + container_of(table->data, struct net, ipv4.sysctl_local_ports.range); int ret; int range[2]; struct ctl_table tmp = { @@ -66,14 +68,15 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, .extra2 = &ip_local_port_range_max, }; - inet_get_local_port_range(range, range + 1); + inet_get_local_port_range(net, &range[0], &range[1]); + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { if (range[1] < range[0]) ret = -EINVAL; else - set_local_port_range(range); + set_local_port_range(net, range); } return ret; @@ -83,23 +86,27 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high) { kgid_t *data = table->data; + struct net *net = + container_of(table->data, struct net, ipv4.sysctl_ping_group_range); unsigned int seq; do { - seq = read_seqbegin(&sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&sysctl_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); } /* Update system visible IP port range */ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high) { kgid_t *data = table->data; - write_seqlock(&sysctl_local_ports.lock); + struct net *net = + container_of(table->data, struct net, ipv4.sysctl_ping_group_range); + write_seqlock(&net->ipv4.sysctl_local_ports.lock); data[0] = low; data[1] = high; - write_sequnlock(&sysctl_local_ports.lock); + write_sequnlock(&net->ipv4.sysctl_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -475,13 +482,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "ip_local_port_range", - .data = &sysctl_local_ports.range, - .maxlen = sizeof(sysctl_local_ports.range), - .mode = 0644, - .proc_handler = ipv4_local_port_range, - }, - { .procname = "ip_local_reserved_ports", .data = NULL, /* initialized in sysctl_ipv4_init */ .maxlen = 65536, @@ -854,6 +854,13 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { + .procname = "ip_local_port_range", + .maxlen = sizeof(init_net.ipv4.sysctl_local_ports.range), + .data = &init_net.ipv4.sysctl_local_ports.range, + .mode = 0644, + .proc_handler = ipv4_local_port_range, + }, + { .procname = "tcp_mem", .maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem), .mode = 0644, @@ -888,6 +895,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) &net->ipv4.sysctl_ping_group_range; table[7].data = &net->ipv4.sysctl_tcp_ecn; + table[8].data = + &net->ipv4.sysctl_local_ports.range; /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) @@ -901,6 +910,13 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1); net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0); + /* + * Set defaults for local port range + */ + seqlock_init(&net->ipv4.sysctl_local_ports.lock); + net->ipv4.sysctl_local_ports.range[0] = 32768; + net->ipv4.sysctl_local_ports.range[1] = 61000; + tcp_init_mem(net); net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6e5617b..be4b161 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3137,10 +3137,9 @@ void __init tcp_init(void) &tcp_hashinfo.ehash_mask, 0, thash_entries ? 0 : 512 * 1024); - for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) { + for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); - INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); - } + if (inet_ehash_locks_alloc(&tcp_hashinfo)) panic("TCP: failed to alloc ehash_locks"); tcp_hashinfo.bhash = diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 25a89ea..eb651a0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -267,11 +267,31 @@ static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr * 1. Tuning sk->sk_sndbuf, when connection enters established state. */ -static void tcp_fixup_sndbuf(struct sock *sk) +static void tcp_sndbuf_expand(struct sock *sk) { - int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER); + const struct tcp_sock *tp = tcp_sk(sk); + int sndmem, per_mss; + u32 nr_segs; + + /* Worst case is non GSO/TSO : each frame consumes one skb + * and skb->head is kmalloced using power of two area of memory + */ + per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + + MAX_TCP_HEADER + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + per_mss = roundup_pow_of_two(per_mss) + + SKB_DATA_ALIGN(sizeof(struct sk_buff)); + + nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd); + nr_segs = max_t(u32, nr_segs, tp->reordering + 1); + + /* Fast Recovery (RFC 5681 3.2) : + * Cubic needs 1.7 factor, rounded to 2 to include + * extra cushion (application might react slowly to POLLOUT) + */ + sndmem = 2 * nr_segs * per_mss; - sndmem *= TCP_INIT_CWND; if (sk->sk_sndbuf < sndmem) sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); } @@ -355,6 +375,12 @@ static void tcp_fixup_rcvbuf(struct sock *sk) rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) * tcp_default_init_rwnd(mss); + /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency + * Allow enough cushion so that sender is not limited by our window + */ + if (sysctl_tcp_moderate_rcvbuf) + rcvmem <<= 2; + if (sk->sk_rcvbuf < rcvmem) sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); } @@ -370,9 +396,11 @@ void tcp_init_buffer_space(struct sock *sk) if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) tcp_fixup_rcvbuf(sk); if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) - tcp_fixup_sndbuf(sk); + tcp_sndbuf_expand(sk); tp->rcvq_space.space = tp->rcv_wnd; + tp->rcvq_space.time = tcp_time_stamp; + tp->rcvq_space.seq = tp->copied_seq; maxwin = tcp_full_space(sk); @@ -512,48 +540,62 @@ void tcp_rcv_space_adjust(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); int time; - int space; - - if (tp->rcvq_space.time == 0) - goto new_measure; + int copied; time = tcp_time_stamp - tp->rcvq_space.time; if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0) return; - space = 2 * (tp->copied_seq - tp->rcvq_space.seq); + /* Number of bytes copied to user in last RTT */ + copied = tp->copied_seq - tp->rcvq_space.seq; + if (copied <= tp->rcvq_space.space) + goto new_measure; - space = max(tp->rcvq_space.space, space); + /* A bit of theory : + * copied = bytes received in previous RTT, our base window + * To cope with packet losses, we need a 2x factor + * To cope with slow start, and sender growing its cwin by 100 % + * every RTT, we need a 4x factor, because the ACK we are sending + * now is for the next RTT, not the current one : + * <prev RTT . ><current RTT .. ><next RTT .... > + */ - if (tp->rcvq_space.space != space) { - int rcvmem; + if (sysctl_tcp_moderate_rcvbuf && + !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { + int rcvwin, rcvmem, rcvbuf; - tp->rcvq_space.space = space; + /* minimal window to cope with packet losses, assuming + * steady state. Add some cushion because of small variations. + */ + rcvwin = (copied << 1) + 16 * tp->advmss; - if (sysctl_tcp_moderate_rcvbuf && - !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { - int new_clamp = space; + /* If rate increased by 25%, + * assume slow start, rcvwin = 3 * copied + * If rate increased by 50%, + * assume sender can use 2x growth, rcvwin = 4 * copied + */ + if (copied >= + tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) { + if (copied >= + tp->rcvq_space.space + (tp->rcvq_space.space >> 1)) + rcvwin <<= 1; + else + rcvwin += (rcvwin >> 1); + } - /* Receive space grows, normalize in order to - * take into account packet headers and sk_buff - * structure overhead. - */ - space /= tp->advmss; - if (!space) - space = 1; - rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); - while (tcp_win_from_space(rcvmem) < tp->advmss) - rcvmem += 128; - space *= rcvmem; - space = min(space, sysctl_tcp_rmem[2]); - if (space > sk->sk_rcvbuf) { - sk->sk_rcvbuf = space; - - /* Make the window clamp follow along. */ - tp->window_clamp = new_clamp; - } + rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); + while (tcp_win_from_space(rcvmem) < tp->advmss) + rcvmem += 128; + + rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]); + if (rcvbuf > sk->sk_rcvbuf) { + sk->sk_rcvbuf = rcvbuf; + + /* Make the window clamp follow along. */ + tp->window_clamp = rcvwin; } } + tp->rcvq_space.space = copied; new_measure: tp->rcvq_space.seq = tp->copied_seq; @@ -713,7 +755,12 @@ static void tcp_update_pacing_rate(struct sock *sk) if (tp->srtt > 8 + 2) do_div(rate, tp->srtt); - sk->sk_pacing_rate = min_t(u64, rate, ~0U); + /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate + * without any lock. We want to make sure compiler wont store + * intermediate values in this location. + */ + ACCESS_ONCE(sk->sk_pacing_rate) = min_t(u64, rate, + sk->sk_max_pacing_rate); } /* Calculate rto without backoff. This is the second half of Van Jacobson's @@ -1284,7 +1331,10 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tp->lost_cnt_hint -= tcp_skb_pcount(prev); } - TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags; + TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + TCP_SKB_CB(prev)->end_seq++; + if (skb == tcp_highest_sack(sk)) tcp_advance_highest_sack(sk, skb); @@ -2970,7 +3020,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, const struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *skb; u32 now = tcp_time_stamp; - int fully_acked = true; + bool fully_acked = true; int flag = 0; u32 pkts_acked = 0; u32 reord = tp->packets_out; @@ -4701,15 +4751,7 @@ static void tcp_new_space(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tcp_should_expand_sndbuf(sk)) { - int sndmem = SKB_TRUESIZE(max_t(u32, - tp->rx_opt.mss_clamp, - tp->mss_cache) + - MAX_TCP_HEADER); - int demanded = max_t(unsigned int, tp->snd_cwnd, - tp->reordering + 1); - sndmem *= 2 * demanded; - if (sndmem > sk->sk_sndbuf) - sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); + tcp_sndbuf_expand(sk); tp->snd_cwnd_stamp = tcp_time_stamp; } @@ -5674,8 +5716,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tcp_init_congestion_control(sk); tcp_mtup_init(sk); - tcp_init_buffer_space(sk); tp->copied_seq = tp->rcv_nxt; + tcp_init_buffer_space(sk); } smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b14266b..114d1b74 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -835,11 +835,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, skb = tcp_make_synack(sk, dst, req, NULL); if (skb) { - __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); + __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); skb_set_queue_mapping(skb, queue_mapping); - err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, - ireq->rmt_addr, + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, ireq->opt); err = net_xmit_eval(err); if (!tcp_rsk(req)->snt_synack && !err) @@ -972,7 +972,7 @@ static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, { union tcp_md5_addr *addr; - addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr; + addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr; return tcp_md5_do_lookup(sk, addr, AF_INET); } @@ -1149,8 +1149,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, saddr = inet_sk(sk)->inet_saddr; daddr = inet_sk(sk)->inet_daddr; } else if (req) { - saddr = inet_rsk(req)->loc_addr; - daddr = inet_rsk(req)->rmt_addr; + saddr = inet_rsk(req)->ir_loc_addr; + daddr = inet_rsk(req)->ir_rmt_addr; } else { const struct iphdr *iph = ip_hdr(skb); saddr = iph->saddr; @@ -1366,8 +1366,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, kfree_skb(skb_synack); return -1; } - err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, - ireq->rmt_addr, ireq->opt); + err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, ireq->opt); err = net_xmit_eval(err); if (!err) tcp_rsk(req)->snt_synack = tcp_time_stamp; @@ -1410,8 +1410,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, inet_csk(child)->icsk_af_ops->rebuild_header(child); tcp_init_congestion_control(child); tcp_mtup_init(child); - tcp_init_buffer_space(child); tcp_init_metrics(child); + tcp_init_buffer_space(child); /* Queue the data carried in the SYN packet. We need to first * bump skb's refcnt because the caller will attempt to free it. @@ -1502,8 +1502,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init(req, &tmp_opt, skb); ireq = inet_rsk(req); - ireq->loc_addr = daddr; - ireq->rmt_addr = saddr; + ireq->ir_loc_addr = daddr; + ireq->ir_rmt_addr = saddr; ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(skb); @@ -1578,15 +1578,15 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); if (skb_synack) { - __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr); + __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr); skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb)); } else goto drop_and_free; if (likely(!do_fastopen)) { int err; - err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, - ireq->rmt_addr, ireq->opt); + err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, ireq->opt); err = net_xmit_eval(err); if (err || want_cookie) goto drop_and_free; @@ -1644,9 +1644,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp = tcp_sk(newsk); newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->inet_daddr = ireq->rmt_addr; - newinet->inet_rcv_saddr = ireq->loc_addr; - newinet->inet_saddr = ireq->loc_addr; + newinet->inet_daddr = ireq->ir_rmt_addr; + newinet->inet_rcv_saddr = ireq->ir_loc_addr; + newinet->inet_saddr = ireq->ir_loc_addr; inet_opt = ireq->opt; rcu_assign_pointer(newinet->inet_opt, inet_opt); ireq->opt = NULL; @@ -2194,18 +2194,6 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ -static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head) -{ - return hlist_nulls_empty(head) ? NULL : - list_entry(head->first, struct inet_timewait_sock, tw_node); -} - -static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) -{ - return !is_a_nulls(tw->tw_node.next) ? - hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; -} - /* * Get next listener socket follow cur. If cur is NULL, get first socket * starting from bucket given in st->bucket; when st->bucket is zero the @@ -2309,10 +2297,9 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) return rc; } -static inline bool empty_bucket(struct tcp_iter_state *st) +static inline bool empty_bucket(const struct tcp_iter_state *st) { - return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && - hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); + return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain); } /* @@ -2329,7 +2316,6 @@ static void *established_get_first(struct seq_file *seq) for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { struct sock *sk; struct hlist_nulls_node *node; - struct inet_timewait_sock *tw; spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); /* Lockless fast path for the common case of empty buckets */ @@ -2345,18 +2331,7 @@ static void *established_get_first(struct seq_file *seq) rc = sk; goto out; } - st->state = TCP_SEQ_STATE_TIME_WAIT; - inet_twsk_for_each(tw, node, - &tcp_hashinfo.ehash[st->bucket].twchain) { - if (tw->tw_family != st->family || - !net_eq(twsk_net(tw), net)) { - continue; - } - rc = tw; - goto out; - } spin_unlock_bh(lock); - st->state = TCP_SEQ_STATE_ESTABLISHED; } out: return rc; @@ -2365,7 +2340,6 @@ out: static void *established_get_next(struct seq_file *seq, void *cur) { struct sock *sk = cur; - struct inet_timewait_sock *tw; struct hlist_nulls_node *node; struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); @@ -2373,45 +2347,16 @@ static void *established_get_next(struct seq_file *seq, void *cur) ++st->num; ++st->offset; - if (st->state == TCP_SEQ_STATE_TIME_WAIT) { - tw = cur; - tw = tw_next(tw); -get_tw: - while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) { - tw = tw_next(tw); - } - if (tw) { - cur = tw; - goto out; - } - spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); - st->state = TCP_SEQ_STATE_ESTABLISHED; - - /* Look for next non empty bucket */ - st->offset = 0; - while (++st->bucket <= tcp_hashinfo.ehash_mask && - empty_bucket(st)) - ; - if (st->bucket > tcp_hashinfo.ehash_mask) - return NULL; - - spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); - sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); - } else - sk = sk_nulls_next(sk); + sk = sk_nulls_next(sk); sk_nulls_for_each_from(sk, node) { if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) - goto found; + return sk; } - st->state = TCP_SEQ_STATE_TIME_WAIT; - tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain); - goto get_tw; -found: - cur = sk; -out: - return cur; + spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + ++st->bucket; + return established_get_first(seq); } static void *established_get_idx(struct seq_file *seq, loff_t pos) @@ -2464,10 +2409,9 @@ static void *tcp_seek_last_pos(struct seq_file *seq) if (rc) break; st->bucket = 0; + st->state = TCP_SEQ_STATE_ESTABLISHED; /* Fallthrough */ case TCP_SEQ_STATE_ESTABLISHED: - case TCP_SEQ_STATE_TIME_WAIT: - st->state = TCP_SEQ_STATE_ESTABLISHED; if (st->bucket > tcp_hashinfo.ehash_mask) break; rc = established_get_first(seq); @@ -2524,7 +2468,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) } break; case TCP_SEQ_STATE_ESTABLISHED: - case TCP_SEQ_STATE_TIME_WAIT: rc = established_get_next(seq, v); break; } @@ -2548,7 +2491,6 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) if (v != SEQ_START_TOKEN) spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); break; - case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: if (v) spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); @@ -2606,10 +2548,10 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK%n", i, - ireq->loc_addr, + ireq->ir_loc_addr, ntohs(inet_sk(sk)->inet_sport), - ireq->rmt_addr, - ntohs(ireq->rmt_port), + ireq->ir_rmt_addr, + ntohs(ireq->ir_rmt_port), TCP_SYN_RECV, 0, 0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ @@ -2707,6 +2649,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, static int tcp4_seq_show(struct seq_file *seq, void *v) { struct tcp_iter_state *st; + struct sock *sk = v; int len; if (v == SEQ_START_TOKEN) { @@ -2721,14 +2664,14 @@ static int tcp4_seq_show(struct seq_file *seq, void *v) switch (st->state) { case TCP_SEQ_STATE_LISTENING: case TCP_SEQ_STATE_ESTABLISHED: - get_tcp4_sock(v, seq, st->num, &len); + if (sk->sk_state == TCP_TIME_WAIT) + get_timewait4_sock(v, seq, st->num, &len); + else + get_tcp4_sock(v, seq, st->num, &len); break; case TCP_SEQ_STATE_OPENREQ: get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len); break; - case TCP_SEQ_STATE_TIME_WAIT: - get_timewait4_sock(v, seq, st->num, &len); - break; } seq_printf(seq, "%*s\n", TMPSZ - 1 - len, ""); out: diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 52f3c6b..4a2a841 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -215,13 +215,15 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, addr.family = req->rsk_ops->family; switch (addr.family) { case AF_INET: - addr.addr.a4 = inet_rsk(req)->rmt_addr; + addr.addr.a4 = inet_rsk(req)->ir_rmt_addr; hash = (__force unsigned int) addr.addr.a4; break; +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - *(struct in6_addr *)addr.addr.a6 = inet6_rsk(req)->rmt_addr; - hash = ipv6_addr_hash(&inet6_rsk(req)->rmt_addr); + *(struct in6_addr *)addr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr; + hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr); break; +#endif default: return NULL; } @@ -240,7 +242,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw) { - struct inet6_timewait_sock *tw6; struct tcp_metrics_block *tm; struct inetpeer_addr addr; unsigned int hash; @@ -252,11 +253,12 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock addr.addr.a4 = tw->tw_daddr; hash = (__force unsigned int) addr.addr.a4; break; +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - tw6 = inet6_twsk((struct sock *)tw); - *(struct in6_addr *)addr.addr.a6 = tw6->tw_v6_daddr; - hash = ipv6_addr_hash(&tw6->tw_v6_daddr); + *(struct in6_addr *)addr.addr.a6 = tw->tw_v6_daddr; + hash = ipv6_addr_hash(&tw->tw_v6_daddr); break; +#endif default: return NULL; } @@ -288,10 +290,12 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, addr.addr.a4 = inet_sk(sk)->inet_daddr; hash = (__force unsigned int) addr.addr.a4; break; +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - *(struct in6_addr *)addr.addr.a6 = inet6_sk(sk)->daddr; - hash = ipv6_addr_hash(&inet6_sk(sk)->daddr); + *(struct in6_addr *)addr.addr.a6 = sk->sk_v6_daddr; + hash = ipv6_addr_hash(&sk->sk_v6_daddr); break; +#endif default: return NULL; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 58a3e69..97b6841 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -293,12 +293,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - struct inet6_timewait_sock *tw6; - tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); - tw6 = inet6_twsk((struct sock *)tw); - tw6->tw_v6_daddr = np->daddr; - tw6->tw_v6_rcv_saddr = np->rcv_saddr; + tw->tw_v6_daddr = sk->sk_v6_daddr; + tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_tclass = np->tclass; tw->tw_ipv6only = np->ipv6only; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7c83cb8..e5ce0e1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -637,6 +637,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb unsigned int size = 0; unsigned int eff_sacks; + opts->options = 0; + #ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); if (unlikely(*md5)) { @@ -848,15 +850,15 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, BUG_ON(!skb || !tcp_skb_pcount(skb)); - /* If congestion control is doing timestamping, we must - * take such a timestamp before we potentially clone/copy. - */ - if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP) - __net_timestamp(skb); - - if (likely(clone_it)) { + if (clone_it) { const struct sk_buff *fclone = skb + 1; + /* If congestion control is doing timestamping, we must + * take such a timestamp before we potentially clone/copy. + */ + if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP) + __net_timestamp(skb); + if (unlikely(skb->fclone == SKB_FCLONE_ORIG && fclone->fclone == SKB_FCLONE_CLONE)) NET_INC_STATS_BH(sock_net(sk), @@ -895,8 +897,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_orphan(skb); skb->sk = sk; - skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? - tcp_wfree : sock_wfree; + skb->destructor = tcp_wfree; atomic_add(skb->truesize, &sk->sk_wmem_alloc); /* Build TCP header and checksum it. */ @@ -1840,7 +1841,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, while ((skb = tcp_send_head(sk))) { unsigned int limit; - tso_segs = tcp_init_tso_segs(sk, skb, mss_now); BUG_ON(!tso_segs); @@ -1869,13 +1869,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, break; } - /* TSQ : sk_wmem_alloc accounts skb truesize, - * including skb overhead. But thats OK. + /* TCP Small Queues : + * Control number of packets in qdisc/devices to two packets / or ~1 ms. + * This allows for : + * - better RTT estimation and ACK scheduling + * - faster recovery + * - high rates */ - if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { + limit = max(skb->truesize, sk->sk_pacing_rate >> 10); + + if (atomic_read(&sk->sk_wmem_alloc) > limit) { set_bit(TSQ_THROTTLED, &tp->tsq_flags); break; } + limit = mss_now; if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, @@ -2727,8 +2734,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, th->syn = 1; th->ack = 1; TCP_ECN_make_synack(req, th); - th->source = ireq->loc_port; - th->dest = ireq->rmt_port; + th->source = htons(ireq->ir_num); + th->dest = ireq->ir_rmt_port; /* Setting of flags are superfluous here for callers (and ECE is * not even correctly set) */ diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 611beab..8b97d71e 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -101,22 +101,6 @@ static inline int tcp_probe_avail(void) si4.sin_addr.s_addr = inet->inet_##mem##addr; \ } while (0) \ -#if IS_ENABLED(CONFIG_IPV6) -#define tcp_probe_copy_fl_to_si6(inet, si6, mem) \ - do { \ - struct ipv6_pinfo *pi6 = inet->pinet6; \ - si6.sin6_family = AF_INET6; \ - si6.sin6_port = inet->inet_##mem##port; \ - si6.sin6_addr = pi6->mem##addr; \ - si6.sin6_flowinfo = 0; /* No need here. */ \ - si6.sin6_scope_id = 0; /* No need here. */ \ - } while (0) -#else -#define tcp_probe_copy_fl_to_si6(fl, si6, mem) \ - do { \ - memset(&si6, 0, sizeof(si6)); \ - } while (0) -#endif /* * Hook inserted to be called before each receive packet. @@ -147,8 +131,17 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d); break; case AF_INET6: - tcp_probe_copy_fl_to_si6(inet, p->src.v6, s); - tcp_probe_copy_fl_to_si6(inet, p->dst.v6, d); + memset(&p->src.v6, 0, sizeof(p->src.v6)); + memset(&p->dst.v6, 0, sizeof(p->dst.v6)); +#if IS_ENABLED(CONFIG_IPV6) + p->src.v6.sin6_family = AF_INET6; + p->src.v6.sin6_port = inet->inet_sport; + p->src.v6.sin6_addr = inet6_sk(sk)->saddr; + + p->dst.v6.sin6_family = AF_INET6; + p->dst.v6.sin6_port = inet->inet_dport; + p->dst.v6.sin6_addr = sk->sk_v6_daddr; +#endif break; default: BUG(); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 4b85e6f..af07b5b 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -374,9 +374,8 @@ void tcp_retransmit_timer(struct sock *sk) } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"), - &np->daddr, + &sk->sk_v6_daddr, ntohs(inet->inet_dport), inet->inet_num, tp->snd_una, tp->snd_nxt); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 74d2c95..9f27bb8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -103,6 +103,7 @@ #include <linux/seq_file.h> #include <net/net_namespace.h> #include <net/icmp.h> +#include <net/inet_hashtables.h> #include <net/route.h> #include <net/checksum.h> #include <net/xfrm.h> @@ -219,7 +220,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, unsigned short first, last; DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); - inet_get_local_port_range(&low, &high); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; rand = net_random(); @@ -565,6 +566,26 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, } EXPORT_SYMBOL_GPL(udp4_lib_lookup); +static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, + __be16 loc_port, __be32 loc_addr, + __be16 rmt_port, __be32 rmt_addr, + int dif, unsigned short hnum) +{ + struct inet_sock *inet = inet_sk(sk); + + if (!net_eq(sock_net(sk), net) || + udp_sk(sk)->udp_port_hash != hnum || + (inet->inet_daddr && inet->inet_daddr != rmt_addr) || + (inet->inet_dport != rmt_port && inet->inet_dport) || + (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) || + ipv6_only_sock(sk) || + (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) + return false; + if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif)) + return false; + return true; +} + static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, @@ -575,20 +596,11 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, unsigned short hnum = ntohs(loc_port); sk_nulls_for_each_from(s, node) { - struct inet_sock *inet = inet_sk(s); - - if (!net_eq(sock_net(s), net) || - udp_sk(s)->udp_port_hash != hnum || - (inet->inet_daddr && inet->inet_daddr != rmt_addr) || - (inet->inet_dport != rmt_port && inet->inet_dport) || - (inet->inet_rcv_saddr && - inet->inet_rcv_saddr != loc_addr) || - ipv6_only_sock(s) || - (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) - continue; - if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) - continue; - goto found; + if (__udp_is_mcast_sock(net, s, + loc_port, loc_addr, + rmt_port, rmt_addr, + dif, hnum)) + goto found; } s = NULL; found: @@ -658,7 +670,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) break; case ICMP_REDIRECT: ipv4_sk_redirect(skb, sk); - break; + goto out; } /* @@ -855,6 +867,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.opt = NULL; ipc.tx_flags = 0; + ipc.ttl = 0; + ipc.tos = -1; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; @@ -938,7 +952,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, faddr = ipc.opt->opt.faddr; connected = 0; } - tos = RT_TOS(inet->tos); + tos = get_rttos(&ipc, inet); if (sock_flag(sk, SOCK_LOCALROUTE) || (msg->msg_flags & MSG_DONTROUTE) || (ipc.opt && ipc.opt->opt.is_strictroute)) { @@ -1403,8 +1417,10 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; - if (inet_sk(sk)->inet_daddr) + if (inet_sk(sk)->inet_daddr) { sock_rps_save_rxhash(sk, skb); + sk_mark_napi_id(sk, skb); + } rc = sock_queue_rcv_skb(sk, skb); if (rc < 0) { @@ -1528,7 +1544,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) rc = 0; - ipv4_pktinfo_prepare(skb); + ipv4_pktinfo_prepare(sk, skb); bh_lock_sock(sk); if (!sock_owned_by_user(sk)) rc = __udp_queue_rcv_skb(sk, skb); @@ -1577,6 +1593,14 @@ static void flush_stack(struct sock **stack, unsigned int count, kfree_skb(skb1); } +static void udp_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + dst_hold(dst); + sk->sk_rx_dst = dst; +} + /* * Multicasts and broadcasts go to each listener. * @@ -1705,16 +1729,32 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp4_csum_init(skb, uh, proto)) goto csum_error; - if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) - return __udp4_lib_mcast_deliver(net, skb, uh, - saddr, daddr, udptable); + if (skb->sk) { + int ret; + sk = skb->sk; + + if (unlikely(sk->sk_rx_dst == NULL)) + udp_sk_rx_dst_set(sk, skb); - sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); + ret = udp_queue_rcv_skb(sk, skb); + + /* a return value > 0 means to resubmit the input, but + * it wants the return to be -protocol, or 0 + */ + if (ret > 0) + return -ret; + return 0; + } else { + if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) + return __udp4_lib_mcast_deliver(net, skb, uh, + saddr, daddr, udptable); + + sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); + } if (sk != NULL) { int ret; - sk_mark_napi_id(sk, skb); ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); @@ -1768,6 +1808,135 @@ drop: return 0; } +/* We can only early demux multicast if there is a single matching socket. + * If more than one socket found returns NULL + */ +static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, + __be16 loc_port, __be32 loc_addr, + __be16 rmt_port, __be32 rmt_addr, + int dif) +{ + struct sock *sk, *result; + struct hlist_nulls_node *node; + unsigned short hnum = ntohs(loc_port); + unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask); + struct udp_hslot *hslot = &udp_table.hash[slot]; + + rcu_read_lock(); +begin: + count = 0; + result = NULL; + sk_nulls_for_each_rcu(sk, node, &hslot->head) { + if (__udp_is_mcast_sock(net, sk, + loc_port, loc_addr, + rmt_port, rmt_addr, + dif, hnum)) { + result = sk; + ++count; + } + } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto begin; + + if (result) { + if (count != 1 || + unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) + result = NULL; + else if (unlikely(!__udp_is_mcast_sock(net, result, + loc_port, loc_addr, + rmt_port, rmt_addr, + dif, hnum))) { + sock_put(result); + result = NULL; + } + } + rcu_read_unlock(); + return result; +} + +/* For unicast we should only early demux connected sockets or we can + * break forwarding setups. The chains here can be long so only check + * if the first socket is an exact match and if not move on. + */ +static struct sock *__udp4_lib_demux_lookup(struct net *net, + __be16 loc_port, __be32 loc_addr, + __be16 rmt_port, __be32 rmt_addr, + int dif) +{ + struct sock *sk, *result; + struct hlist_nulls_node *node; + unsigned short hnum = ntohs(loc_port); + unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); + unsigned int slot2 = hash2 & udp_table.mask; + struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; + INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr) + const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); + + rcu_read_lock(); + result = NULL; + udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { + if (INET_MATCH(sk, net, acookie, + rmt_addr, loc_addr, ports, dif)) + result = sk; + /* Only check first socket in chain */ + break; + } + + if (result) { + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) + result = NULL; + else if (unlikely(!INET_MATCH(sk, net, acookie, + rmt_addr, loc_addr, + ports, dif))) { + sock_put(result); + result = NULL; + } + } + rcu_read_unlock(); + return result; +} + +void udp_v4_early_demux(struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + const struct udphdr *uh = udp_hdr(skb); + struct sock *sk; + struct dst_entry *dst; + struct net *net = dev_net(skb->dev); + int dif = skb->dev->ifindex; + + /* validate the packet */ + if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) + return; + + if (skb->pkt_type == PACKET_BROADCAST || + skb->pkt_type == PACKET_MULTICAST) + sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, + uh->source, iph->saddr, dif); + else if (skb->pkt_type == PACKET_HOST) + sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, + uh->source, iph->saddr, dif); + else + return; + + if (!sk) + return; + + skb->sk = sk; + skb->destructor = sock_edemux; + dst = sk->sk_rx_dst; + + if (dst) + dst = dst_check(dst, 0); + if (dst) + skb_dst_set_noref(skb, dst); +} + int udp_rcv(struct sk_buff *skb) { return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index b5663c3..31b1815 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -16,13 +16,13 @@ #include <net/xfrm.h> /* Informational hook. The decap is still done here. */ -static struct xfrm_tunnel __rcu *rcv_notify_handlers __read_mostly; +static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly; static DEFINE_MUTEX(xfrm4_mode_tunnel_input_mutex); -int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel *handler) +int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler) { - struct xfrm_tunnel __rcu **pprev; - struct xfrm_tunnel *t; + struct xfrm_tunnel_notifier __rcu **pprev; + struct xfrm_tunnel_notifier *t; int ret = -EEXIST; int priority = handler->priority; @@ -50,10 +50,10 @@ err: } EXPORT_SYMBOL_GPL(xfrm4_mode_tunnel_input_register); -int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel *handler) +int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler) { - struct xfrm_tunnel __rcu **pprev; - struct xfrm_tunnel *t; + struct xfrm_tunnel_notifier __rcu **pprev; + struct xfrm_tunnel_notifier *t; int ret = -ENOENT; mutex_lock(&xfrm4_mode_tunnel_input_mutex); @@ -134,7 +134,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_tunnel *handler; + struct xfrm_tunnel_notifier *handler; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d6ff126..cd3fb30 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1499,6 +1499,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, return false; } +/* Compares an address/prefix_len with addresses on device @dev. + * If one is found it returns true. + */ +bool ipv6_chk_custom_prefix(const struct in6_addr *addr, + const unsigned int prefix_len, struct net_device *dev) +{ + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + bool ret = false; + + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev) { + read_lock_bh(&idev->lock); + list_for_each_entry(ifa, &idev->addr_list, if_list) { + ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len); + if (ret) + break; + } + read_unlock_bh(&idev->lock); + } + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(ipv6_chk_custom_prefix); + int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) { struct inet6_dev *idev; @@ -2193,43 +2220,21 @@ ok: else stored_lft = 0; if (!update_lft && !create && stored_lft) { - if (valid_lft > MIN_VALID_LIFETIME || - valid_lft > stored_lft) - update_lft = 1; - else if (stored_lft <= MIN_VALID_LIFETIME) { - /* valid_lft <= stored_lft is always true */ - /* - * RFC 4862 Section 5.5.3e: - * "Note that the preferred lifetime of - * the corresponding address is always - * reset to the Preferred Lifetime in - * the received Prefix Information - * option, regardless of whether the - * valid lifetime is also reset or - * ignored." - * - * So if the preferred lifetime in - * this advertisement is different - * than what we have stored, but the - * valid lifetime is invalid, just - * reset prefered_lft. - * - * We must set the valid lifetime - * to the stored lifetime since we'll - * be updating the timestamp below, - * else we'll set it back to the - * minimum. - */ - if (prefered_lft != ifp->prefered_lft) { - valid_lft = stored_lft; - update_lft = 1; - } - } else { - valid_lft = MIN_VALID_LIFETIME; - if (valid_lft < prefered_lft) - prefered_lft = valid_lft; - update_lft = 1; - } + const u32 minimum_lft = min( + stored_lft, (u32)MIN_VALID_LIFETIME); + valid_lft = max(valid_lft, minimum_lft); + + /* RFC4862 Section 5.5.3e: + * "Note that the preferred lifetime of the + * corresponding address is always reset to + * the Preferred Lifetime in the received + * Prefix Information option, regardless of + * whether the valid lifetime is also reset or + * ignored." + * + * So we should always update prefered_lft here. + */ + update_lft = 1; } if (update_lft) { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7c96100..a2cb07c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -364,7 +364,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) inet->inet_rcv_saddr = v4addr; inet->inet_saddr = v4addr; - np->rcv_saddr = addr->sin6_addr; + sk->sk_v6_rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; @@ -461,14 +461,14 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, peer == 1) return -ENOTCONN; sin->sin6_port = inet->inet_dport; - sin->sin6_addr = np->daddr; + sin->sin6_addr = sk->sk_v6_daddr; if (np->sndflow) sin->sin6_flowinfo = np->flow_label; } else { - if (ipv6_addr_any(&np->rcv_saddr)) + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) sin->sin6_addr = np->saddr; else - sin->sin6_addr = np->rcv_saddr; + sin->sin6_addr = sk->sk_v6_rcv_saddr; sin->sin6_port = inet->inet_sport; } @@ -655,7 +655,7 @@ int inet6_sk_rebuild_header(struct sock *sk) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = sk->sk_protocol; - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; fl6.saddr = np->saddr; fl6.flowlabel = np->flow_label; fl6.flowi6_oif = sk->sk_bound_dev_if; @@ -1028,52 +1028,4 @@ out_unregister_tcp_proto: } module_init(inet6_init); -static void __exit inet6_exit(void) -{ - if (disable_ipv6_mod) - return; - - /* First of all disallow new sockets creation. */ - sock_unregister(PF_INET6); - /* Disallow any further netlink messages */ - rtnl_unregister_all(PF_INET6); - - udpv6_exit(); - udplitev6_exit(); - tcpv6_exit(); - - /* Cleanup code parts. */ - ipv6_packet_cleanup(); - ipv6_frag_exit(); - ipv6_exthdrs_exit(); - addrconf_cleanup(); - ip6_flowlabel_cleanup(); - ndisc_late_cleanup(); - ip6_route_cleanup(); -#ifdef CONFIG_PROC_FS - - /* Cleanup code parts. */ - if6_proc_exit(); - ipv6_misc_proc_exit(); - udplite6_proc_exit(); - raw6_proc_exit(); -#endif - ipv6_netfilter_fini(); - ipv6_stub = NULL; - igmp6_cleanup(); - ndisc_cleanup(); - ip6_mr_cleanup(); - icmpv6_cleanup(); - rawv6_exit(); - - unregister_pernet_subsys(&inet6_net_ops); - proto_unregister(&rawv6_prot); - proto_unregister(&udplitev6_prot); - proto_unregister(&udpv6_prot); - proto_unregister(&tcpv6_prot); - - rcu_barrier(); /* Wait for completion of call_rcu()'s */ -} -module_exit(inet6_exit); - MODULE_ALIAS_NETPROTO(PF_INET6); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 48b6bd2..a454b0f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -107,16 +107,16 @@ ipv4_connected: if (err) goto out; - ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr); + ipv6_addr_set_v4mapped(inet->inet_daddr, &sk->sk_v6_daddr); if (ipv6_addr_any(&np->saddr) || ipv6_mapped_addr_any(&np->saddr)) ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - if (ipv6_addr_any(&np->rcv_saddr) || - ipv6_mapped_addr_any(&np->rcv_saddr)) { + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) || + ipv6_mapped_addr_any(&sk->sk_v6_rcv_saddr)) { ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, - &np->rcv_saddr); + &sk->sk_v6_rcv_saddr); if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); } @@ -145,7 +145,7 @@ ipv4_connected: } } - np->daddr = *daddr; + sk->sk_v6_daddr = *daddr; np->flow_label = fl6.flowlabel; inet->inet_dport = usin->sin6_port; @@ -156,7 +156,7 @@ ipv4_connected: */ fl6.flowi6_proto = sk->sk_protocol; - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; fl6.saddr = np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; @@ -183,16 +183,16 @@ ipv4_connected: if (ipv6_addr_any(&np->saddr)) np->saddr = fl6.saddr; - if (ipv6_addr_any(&np->rcv_saddr)) { - np->rcv_saddr = fl6.saddr; + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + sk->sk_v6_rcv_saddr = fl6.saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); } ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl6.daddr, &np->daddr) ? - &np->daddr : NULL, + ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? + &sk->sk_v6_daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_equal(&fl6.saddr, &np->saddr) ? &np->saddr : @@ -883,11 +883,10 @@ EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl); void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp, __u16 destp, int bucket) { - struct ipv6_pinfo *np = inet6_sk(sp); const struct in6_addr *dest, *src; - dest = &np->daddr; - src = &np->rcv_saddr; + dest = &sp->sk_v6_daddr; + src = &sp->sk_v6_rcv_saddr; seq_printf(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n", diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index e4311cb..77bb8af 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -70,20 +70,20 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, struct flowi6 *fl6, const struct request_sock *req) { - struct inet6_request_sock *treq = inet6_rsk(req); + struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = IPPROTO_TCP; - fl6->daddr = treq->rmt_addr; + fl6->daddr = ireq->ir_v6_rmt_addr; final_p = fl6_update_dst(fl6, np->opt, &final); - fl6->saddr = treq->loc_addr; - fl6->flowi6_oif = treq->iif; + fl6->saddr = ireq->ir_v6_loc_addr; + fl6->flowi6_oif = ireq->ir_iif; fl6->flowi6_mark = sk->sk_mark; - fl6->fl6_dport = inet_rsk(req)->rmt_port; - fl6->fl6_sport = inet_rsk(req)->loc_port; + fl6->fl6_dport = ireq->ir_rmt_port; + fl6->fl6_sport = htons(ireq->ir_num); security_req_classify_flow(req, flowi6_to_flowi(fl6)); dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); @@ -129,13 +129,13 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, lopt->nr_table_entries)]; (req = *prev) != NULL; prev = &req->dl_next) { - const struct inet6_request_sock *treq = inet6_rsk(req); + const struct inet_request_sock *ireq = inet_rsk(req); - if (inet_rsk(req)->rmt_port == rport && + if (ireq->ir_rmt_port == rport && req->rsk_ops->family == AF_INET6 && - ipv6_addr_equal(&treq->rmt_addr, raddr) && - ipv6_addr_equal(&treq->loc_addr, laddr) && - (!treq->iif || treq->iif == iif)) { + ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) && + ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) && + (!ireq->ir_iif || ireq->ir_iif == iif)) { WARN_ON(req->sk != NULL); *prevp = prev; return req; @@ -153,8 +153,8 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk, { struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr, - inet_rsk(req)->rmt_port, + const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr, + inet_rsk(req)->ir_rmt_port, lopt->hash_rnd, lopt->nr_table_entries); reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); @@ -165,11 +165,10 @@ EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add); void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) { - struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; sin6->sin6_family = AF_INET6; - sin6->sin6_addr = np->daddr; + sin6->sin6_addr = sk->sk_v6_daddr; sin6->sin6_port = inet_sk(sk)->inet_dport; /* We do not store received flowlabel for TCP */ sin6->sin6_flowinfo = 0; @@ -203,7 +202,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = sk->sk_protocol; - fl6->daddr = np->daddr; + fl6->daddr = sk->sk_v6_daddr; fl6->saddr = np->saddr; fl6->flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl6->flowlabel); @@ -245,7 +244,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) skb_dst_set_noref(skb, dst); /* Restore final destination back after routing done */ - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); rcu_read_unlock(); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 32b4a16..842d833 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -89,43 +89,22 @@ begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; - if (likely(INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { - if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) - goto begintw; - if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, - ports, dif))) { - sock_put(sk); - goto begin; - } - goto out; - } - } - if (get_nulls_value(node) != slot) - goto begin; - -begintw: - /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_nulls_for_each_rcu(sk, node, &head->twchain) { - if (sk->sk_hash != hash) + if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif)) continue; - if (likely(INET6_TW_MATCH(sk, net, saddr, daddr, - ports, dif))) { - if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { - sk = NULL; - goto out; - } - if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr, - ports, dif))) { - sock_put(sk); - goto begintw; - } + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) goto out; + + if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { + sock_gen_put(sk); + goto begin; } + goto found; } if (get_nulls_value(node) != slot) - goto begintw; - sk = NULL; + goto begin; out: + sk = NULL; +found: rcu_read_unlock(); return sk; } @@ -140,11 +119,10 @@ static inline int compute_score(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum && sk->sk_family == PF_INET6) { - const struct ipv6_pinfo *np = inet6_sk(sk); score = 1; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score++; } @@ -236,9 +214,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - const struct in6_addr *daddr = &np->rcv_saddr; - const struct in6_addr *saddr = &np->daddr; + const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; + const struct in6_addr *saddr = &sk->sk_v6_daddr; const int dif = sk->sk_bound_dev_if; const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct net *net = sock_net(sk); @@ -248,38 +225,28 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; - struct inet_timewait_sock *tw; + struct inet_timewait_sock *tw = NULL; int twrefcnt = 0; spin_lock(lock); - /* Check TIME-WAIT sockets first. */ - sk_nulls_for_each(sk2, node, &head->twchain) { - if (sk2->sk_hash != hash) - continue; - - if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr, - ports, dif))) { - tw = inet_twsk(sk2); - if (twsk_unique(sk, sk2, twp)) - goto unique; - else - goto not_unique; - } - } - tw = NULL; - - /* And established part... */ sk_nulls_for_each(sk2, node, &head->chain) { if (sk2->sk_hash != hash) continue; - if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) + + if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) { + if (sk2->sk_state == TCP_TIME_WAIT) { + tw = inet_twsk(sk2); + if (twsk_unique(sk, sk2, twp)) + break; + } goto not_unique; + } } -unique: /* Must record num and sport now. Otherwise we will see - * in hash table socket with a funny identity. */ + * in hash table socket with a funny identity. + */ inet->inet_num = lport; inet->inet_sport = htons(lport); sk->sk_hash = hash; @@ -312,9 +279,9 @@ not_unique: static inline u32 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32, - np->daddr.s6_addr32, + + return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, inet->inet_dport); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5bec666..5550a81 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1529,25 +1529,6 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, fib6_walk(&c.w); } -void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg), - int prune, void *arg) -{ - struct fib6_table *table; - struct hlist_head *head; - unsigned int h; - - rcu_read_lock(); - for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { - head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(table, head, tb6_hlist) { - read_lock_bh(&table->tb6_lock); - fib6_clean_tree(net, &table->tb6_root, - func, prune, arg); - read_unlock_bh(&table->tb6_lock); - } - } - rcu_read_unlock(); -} void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), int prune, void *arg) { @@ -1782,3 +1763,189 @@ void fib6_gc_cleanup(void) unregister_pernet_subsys(&fib6_net_ops); kmem_cache_destroy(fib6_node_kmem); } + +#ifdef CONFIG_PROC_FS + +struct ipv6_route_iter { + struct seq_net_private p; + struct fib6_walker_t w; + loff_t skip; + struct fib6_table *tbl; + __u32 sernum; +}; + +static int ipv6_route_seq_show(struct seq_file *seq, void *v) +{ + struct rt6_info *rt = v; + struct ipv6_route_iter *iter = seq->private; + + seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); + +#ifdef CONFIG_IPV6_SUBTREES + seq_printf(seq, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); +#else + seq_puts(seq, "00000000000000000000000000000000 00 "); +#endif + if (rt->rt6i_flags & RTF_GATEWAY) + seq_printf(seq, "%pi6", &rt->rt6i_gateway); + else + seq_puts(seq, "00000000000000000000000000000000"); + + seq_printf(seq, " %08x %08x %08x %08x %8s\n", + rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), + rt->dst.__use, rt->rt6i_flags, + rt->dst.dev ? rt->dst.dev->name : ""); + iter->w.leaf = NULL; + return 0; +} + +static int ipv6_route_yield(struct fib6_walker_t *w) +{ + struct ipv6_route_iter *iter = w->args; + + if (!iter->skip) + return 1; + + do { + iter->w.leaf = iter->w.leaf->dst.rt6_next; + iter->skip--; + if (!iter->skip && iter->w.leaf) + return 1; + } while (iter->w.leaf); + + return 0; +} + +static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter) +{ + memset(&iter->w, 0, sizeof(iter->w)); + iter->w.func = ipv6_route_yield; + iter->w.root = &iter->tbl->tb6_root; + iter->w.state = FWS_INIT; + iter->w.node = iter->w.root; + iter->w.args = iter; + iter->sernum = iter->w.root->fn_sernum; + INIT_LIST_HEAD(&iter->w.lh); + fib6_walker_link(&iter->w); +} + +static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, + struct net *net) +{ + unsigned int h; + struct hlist_node *node; + + if (tbl) { + h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1; + node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist)); + } else { + h = 0; + node = NULL; + } + + while (!node && h < FIB6_TABLE_HASHSZ) { + node = rcu_dereference_bh( + hlist_first_rcu(&net->ipv6.fib_table_hash[h++])); + } + return hlist_entry_safe(node, struct fib6_table, tb6_hlist); +} + +static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) +{ + if (iter->sernum != iter->w.root->fn_sernum) { + iter->sernum = iter->w.root->fn_sernum; + iter->w.state = FWS_INIT; + iter->w.node = iter->w.root; + WARN_ON(iter->w.skip); + iter->w.skip = iter->w.count; + } +} + +static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + int r; + struct rt6_info *n; + struct net *net = seq_file_net(seq); + struct ipv6_route_iter *iter = seq->private; + + if (!v) + goto iter_table; + + n = ((struct rt6_info *)v)->dst.rt6_next; + if (n) { + ++*pos; + return n; + } + +iter_table: + ipv6_route_check_sernum(iter); + read_lock(&iter->tbl->tb6_lock); + r = fib6_walk_continue(&iter->w); + read_unlock(&iter->tbl->tb6_lock); + if (r > 0) { + if (v) + ++*pos; + return iter->w.leaf; + } else if (r < 0) { + fib6_walker_unlink(&iter->w); + return NULL; + } + fib6_walker_unlink(&iter->w); + + iter->tbl = ipv6_route_seq_next_table(iter->tbl, net); + if (!iter->tbl) + return NULL; + + ipv6_route_seq_setup_walk(iter); + goto iter_table; +} + +static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU_BH) +{ + struct net *net = seq_file_net(seq); + struct ipv6_route_iter *iter = seq->private; + + rcu_read_lock_bh(); + iter->tbl = ipv6_route_seq_next_table(NULL, net); + iter->skip = *pos; + + if (iter->tbl) { + ipv6_route_seq_setup_walk(iter); + return ipv6_route_seq_next(seq, NULL, pos); + } else { + return NULL; + } +} + +static bool ipv6_route_iter_active(struct ipv6_route_iter *iter) +{ + struct fib6_walker_t *w = &iter->w; + return w->node && !(w->state == FWS_U && w->node == w->root); +} + +static void ipv6_route_seq_stop(struct seq_file *seq, void *v) + __releases(RCU_BH) +{ + struct ipv6_route_iter *iter = seq->private; + + if (ipv6_route_iter_active(iter)) + fib6_walker_unlink(&iter->w); + + rcu_read_unlock_bh(); +} + +static const struct seq_operations ipv6_route_seq_ops = { + .start = ipv6_route_seq_start, + .next = ipv6_route_seq_next, + .stop = ipv6_route_seq_stop, + .show = ipv6_route_seq_show +}; + +int ipv6_route_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &ipv6_route_seq_ops, + sizeof(struct ipv6_route_iter)); +} + +#endif /* CONFIG_PROC_FS */ diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6b26e9f..1ef1fa2 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -618,7 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, struct ip6_tnl *tunnel = netdev_priv(dev); struct net_device *tdev; /* Device to other host */ struct ipv6hdr *ipv6h; /* Our new IP header */ - unsigned int max_headroom; /* The extra header space needed */ + unsigned int max_headroom = 0; /* The extra header space needed */ int gre_hlen; struct ipv6_tel_txoption opt; int mtu; @@ -693,7 +693,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); - max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; + max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; if (skb_headroom(skb) < max_headroom || skb_shared(skb) || (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { @@ -1173,9 +1173,8 @@ done: static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu) { - struct ip6_tnl *tunnel = netdev_priv(dev); if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) + new_mtu > 0xFFF8 - dev->hard_header_len) return -EINVAL; dev->mtu = new_mtu; return 0; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3a692d5..a54c45c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1015,6 +1015,8 @@ static inline int ip6_ufo_append_data(struct sock *sk, * udp datagram */ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { + struct frag_hdr fhdr; + skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); @@ -1036,12 +1038,6 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - } - - err = skb_append_datato_frags(sk,skb, getfrag, from, - (length - transhdrlen)); - if (!err) { - struct frag_hdr fhdr; /* Specify the length of each IPv6 datagram fragment. * It has to be a multiple of 8. @@ -1052,15 +1048,10 @@ static inline int ip6_ufo_append_data(struct sock *sk, ipv6_select_ident(&fhdr, rt); skb_shinfo(skb)->ip6_frag_id = fhdr.identification; __skb_queue_tail(&sk->sk_write_queue, skb); - - return 0; } - /* There is not enough support do UPD LSO, - * so follow normal path - */ - kfree_skb(skb); - return err; + return skb_append_datato_frags(sk, skb, getfrag, from, + (length - transhdrlen)); } static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, @@ -1227,27 +1218,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, * --yoshfuji */ - cork->length += length; - if (length > mtu) { - int proto = sk->sk_protocol; - if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ - ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); - return -EMSGSIZE; - } - - if (proto == IPPROTO_UDP && - (rt->dst.dev->features & NETIF_F_UFO)) { + if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_RAW)) { + ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); + return -EMSGSIZE; + } - err = ip6_ufo_append_data(sk, getfrag, from, length, - hh_len, fragheaderlen, - transhdrlen, mtu, flags, rt); - if (err) - goto error; - return 0; - } + skb = skb_peek_tail(&sk->sk_write_queue); + cork->length += length; + if (((length > mtu) || + (skb && skb_is_gso(skb))) && + (sk->sk_protocol == IPPROTO_UDP) && + (rt->dst.dev->features & NETIF_F_UFO)) { + err = ip6_ufo_append_data(sk, getfrag, from, length, + hh_len, fragheaderlen, + transhdrlen, mtu, flags, rt); + if (err) + goto error; + return 0; } - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) + if (!skb) goto alloc_new_skb; while (length > 0) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2d8f482..583b77e 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1430,9 +1430,17 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { - if (new_mtu < IPV6_MIN_MTU) { - return -EINVAL; + struct ip6_tnl *tnl = netdev_priv(dev); + + if (tnl->parms.proto == IPPROTO_IPIP) { + if (new_mtu < 68) + return -EINVAL; + } else { + if (new_mtu < IPV6_MIN_MTU) + return -EINVAL; } + if (new_mtu > 0xFFF8 - dev->hard_header_len) + return -EINVAL; dev->mtu = new_mtu; return 0; } @@ -1731,8 +1739,6 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) } } - t = rtnl_dereference(ip6n->tnls_wc[0]); - unregister_netdevice_queue(t->dev, &list); unregister_netdevice_many(&list); } @@ -1752,6 +1758,7 @@ static int __net_init ip6_tnl_init_net(struct net *net) if (!ip6n->fb_tnl_dev) goto err_alloc_dev; dev_net_set(ip6n->fb_tnl_dev, net); + ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops; /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d1e2e8e..4919a8e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -174,7 +174,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } if (ipv6_only_sock(sk) || - !ipv6_addr_v4mapped(&np->daddr)) { + !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) { retv = -EADDRNOTAVAIL; break; } @@ -1011,7 +1011,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : np->sticky_pktinfo.ipi6_ifindex; - src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr; + src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { @@ -1026,7 +1026,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : np->sticky_pktinfo.ipi6_ifindex; - src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr; + src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : + np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 096cd67..d18f9f9 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2034,7 +2034,7 @@ static void mld_dad_timer_expire(unsigned long data) if (idev->mc_dad_count) mld_dad_start_timer(idev, idev->mc_maxdelay); } - __in6_dev_put(idev); + in6_dev_put(idev); } static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, @@ -2379,7 +2379,7 @@ static void mld_gq_timer_expire(unsigned long data) idev->mc_gq_running = 0; mld_send_report(idev, NULL); - __in6_dev_put(idev); + in6_dev_put(idev); } static void mld_ifc_timer_expire(unsigned long data) @@ -2392,7 +2392,7 @@ static void mld_ifc_timer_expire(unsigned long data) if (idev->mc_ifc_count) mld_ifc_start_timer(idev, idev->mc_maxdelay); } - __in6_dev_put(idev); + in6_dev_put(idev); } static void mld_ifc_event(struct inet6_dev *idev) diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 19cfea8..2748b04 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -282,7 +282,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) if (th == NULL) return NF_DROP; - synproxy_parse_options(skb, par->thoff, th, &opts); + if (!synproxy_parse_options(skb, par->thoff, th, &opts)) + return NF_DROP; if (th->syn && !(th->ack || th->fin || th->rst)) { /* Initial SYN from client */ @@ -372,7 +373,8 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum, /* fall through */ case TCP_CONNTRACK_SYN_SENT: - synproxy_parse_options(skb, thoff, th, &opts); + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; if (!th->syn && th->ack && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { @@ -395,7 +397,9 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum, if (!th->syn || !th->ack) break; - synproxy_parse_options(skb, thoff, th, &opts); + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy->tsoff = opts.tsval - synproxy->its; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index d6e4dd8..54b75ea 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -297,9 +297,9 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; struct nf_conn *ct; - tuple.src.u3.in6 = inet6->rcv_saddr; + tuple.src.u3.in6 = sk->sk_v6_rcv_saddr; tuple.src.u.tcp.port = inet->inet_sport; - tuple.dst.u3.in6 = inet6->daddr; + tuple.dst.u3.in6 = sk->sk_v6_daddr; tuple.dst.u.tcp.port = inet->inet_dport; tuple.dst.protonum = sk->sk_protocol; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 18f19df..8815e31 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -116,7 +116,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; } if (!iif) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 58916bb..3c00842 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -77,20 +77,19 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, sk_for_each_from(sk) if (inet_sk(sk)->inet_num == num) { - struct ipv6_pinfo *np = inet6_sk(sk); if (!net_eq(sock_net(sk), net)) continue; - if (!ipv6_addr_any(&np->daddr) && - !ipv6_addr_equal(&np->daddr, rmt_addr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) continue; if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) continue; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)) goto found; if (is_multicast && inet6_mc_check(sk, loc_addr, rmt_addr)) @@ -302,7 +301,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) } inet->inet_rcv_saddr = inet->inet_saddr = v4addr; - np->rcv_saddr = addr->sin6_addr; + sk->sk_v6_rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; err = 0; @@ -335,8 +334,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, ip6_sk_update_pmtu(skb, sk, info); harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); } - if (type == NDISC_REDIRECT) + if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); + return; + } if (np->recverr) { u8 *payload = skb->data; if (!inet->hdrincl) @@ -802,8 +803,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && - ipv6_addr_equal(daddr, &np->daddr)) - daddr = &np->daddr; + ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) + daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && @@ -814,7 +815,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, return -EDESTADDRREQ; proto = inet->inet_num; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; fl6.flowlabel = np->flow_label; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c979dd9..c3130ff 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1137,7 +1137,6 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; - fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); @@ -1236,7 +1235,6 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; - fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); @@ -1258,7 +1256,6 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; - fl6.flowi6_flags = 0; fl6.daddr = msg->dest; fl6.saddr = iph->daddr; @@ -2800,56 +2797,12 @@ static int ip6_route_dev_notify(struct notifier_block *this, #ifdef CONFIG_PROC_FS -struct rt6_proc_arg -{ - char *buffer; - int offset; - int length; - int skip; - int len; -}; - -static int rt6_info_route(struct rt6_info *rt, void *p_arg) -{ - struct seq_file *m = p_arg; - - seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); - -#ifdef CONFIG_IPV6_SUBTREES - seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); -#else - seq_puts(m, "00000000000000000000000000000000 00 "); -#endif - if (rt->rt6i_flags & RTF_GATEWAY) { - seq_printf(m, "%pi6", &rt->rt6i_gateway); - } else { - seq_puts(m, "00000000000000000000000000000000"); - } - seq_printf(m, " %08x %08x %08x %08x %8s\n", - rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), - rt->dst.__use, rt->rt6i_flags, - rt->dst.dev ? rt->dst.dev->name : ""); - return 0; -} - -static int ipv6_route_show(struct seq_file *m, void *v) -{ - struct net *net = (struct net *)m->private; - fib6_clean_all_ro(net, rt6_info_route, 0, m); - return 0; -} - -static int ipv6_route_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, ipv6_route_show); -} - static const struct file_operations ipv6_route_proc_fops = { .owner = THIS_MODULE, .open = ipv6_route_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release_net, + .release = seq_release_net, }; static int rt6_stats_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 7ee5cb9..1926945 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -566,6 +566,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr, return false; } +/* Checks if an address matches an address on the tunnel interface. + * Used to detect the NAT of proto 41 packets and let them pass spoofing test. + * Long story: + * This function is called after we considered the packet as spoofed + * in is_spoofed_6rd. + * We may have a router that is doing NAT for proto 41 packets + * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb + * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd + * function will return true, dropping the packet. + * But, we can still check if is spoofed against the IP + * addresses associated with the interface. + */ +static bool only_dnatted(const struct ip_tunnel *tunnel, + const struct in6_addr *v6dst) +{ + int prefix_len; + +#ifdef CONFIG_IPV6_SIT_6RD + prefix_len = tunnel->ip6rd.prefixlen + 32 + - tunnel->ip6rd.relay_prefixlen; +#else + prefix_len = 48; +#endif + return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev); +} + +/* Returns true if a packet is spoofed */ +static bool packet_is_spoofed(struct sk_buff *skb, + const struct iphdr *iph, + struct ip_tunnel *tunnel) +{ + const struct ipv6hdr *ipv6h; + + if (tunnel->dev->priv_flags & IFF_ISATAP) { + if (!isatap_chksrc(skb, iph, tunnel)) + return true; + + return false; + } + + if (tunnel->dev->flags & IFF_POINTOPOINT) + return false; + + ipv6h = ipv6_hdr(skb); + + if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) { + net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n", + &iph->saddr, &ipv6h->saddr, + &iph->daddr, &ipv6h->daddr); + return true; + } + + if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr))) + return false; + + if (only_dnatted(tunnel, &ipv6h->daddr)) + return false; + + net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n", + &iph->saddr, &ipv6h->saddr, + &iph->daddr, &ipv6h->daddr); + return true; +} + static int ipip6_rcv(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); @@ -586,19 +650,9 @@ static int ipip6_rcv(struct sk_buff *skb) IPCB(skb)->flags = 0; skb->protocol = htons(ETH_P_IPV6); - if (tunnel->dev->priv_flags & IFF_ISATAP) { - if (!isatap_chksrc(skb, iph, tunnel)) { - tunnel->dev->stats.rx_errors++; - goto out; - } - } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) { - if (is_spoofed_6rd(tunnel, iph->saddr, - &ipv6_hdr(skb)->saddr) || - is_spoofed_6rd(tunnel, iph->daddr, - &ipv6_hdr(skb)->daddr)) { - tunnel->dev->stats.rx_errors++; - goto out; - } + if (packet_is_spoofed(skb, iph, tunnel)) { + tunnel->dev->stats.rx_errors++; + goto out; } __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); @@ -748,7 +802,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { - net_dbg_ratelimited("sit: nexthop == NULL\n"); + net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -777,7 +831,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { - net_dbg_ratelimited("sit: nexthop == NULL\n"); + net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -1612,6 +1666,7 @@ static int __net_init sit_init_net(struct net *net) goto err_alloc_dev; } dev_net_set(sitn->fb_tunnel_dev, net); + sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops; /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ @@ -1646,7 +1701,6 @@ static void __net_exit sit_exit_net(struct net *net) rtnl_lock(); sit_destroy_tunnels(sitn, &list); - unregister_netdevice_queue(sitn->fb_tunnel_dev, &list); unregister_netdevice_many(&list); rtnl_unlock(); } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index bf63ac8..d04d3f1d 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -24,26 +24,21 @@ #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) -/* Table must be sorted. */ +/* RFC 2460, Section 8.3: + * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..] + * + * Due to IPV6_MIN_MTU=1280 the lowest possible MSS is 1220, which allows + * using higher values than ipv4 tcp syncookies. + * The other values are chosen based on ethernet (1500 and 9k MTU), plus + * one that accounts for common encap (PPPoe) overhead. Table must be sorted. + */ static __u16 const msstab[] = { - 64, - 512, - 536, - 1280 - 60, + 1280 - 60, /* IPV6_MIN_MTU - 60 */ 1480 - 60, 1500 - 60, - 4460 - 60, 9000 - 60, }; -/* - * This (misnamed) value is the age of syncookie which is permitted. - * Its ideal value should be dependent on TCP_TIMEOUT_INIT and - * sysctl_tcp_retries1. It's a rather complicated formula (exponential - * backoff) to compute at runtime so it's currently hardcoded here. - */ -#define COUNTER_TRIES 4 - static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) @@ -86,8 +81,9 @@ static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *dadd static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr, const struct in6_addr *daddr, __be16 sport, __be16 dport, __u32 sseq, - __u32 count, __u32 data) + __u32 data) { + u32 count = tcp_cookie_time(); return (cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq + (count << COOKIEBITS) + ((cookie_hash(saddr, daddr, sport, dport, count, 1) + data) @@ -96,15 +92,14 @@ static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr, static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr, const struct in6_addr *daddr, __be16 sport, - __be16 dport, __u32 sseq, __u32 count, - __u32 maxdiff) + __be16 dport, __u32 sseq) { - __u32 diff; + __u32 diff, count = tcp_cookie_time(); cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq; diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS); - if (diff >= maxdiff) + if (diff >= MAX_SYNCOOKIE_AGE) return (__u32)-1; return (cookie - @@ -125,8 +120,7 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, *mssp = msstab[mssind]; return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source, - th->dest, ntohl(th->seq), - jiffies / (HZ * 60), mssind); + th->dest, ntohl(th->seq), mssind); } EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence); @@ -146,8 +140,7 @@ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th, { __u32 seq = ntohl(th->seq) - 1; __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, - th->source, th->dest, seq, - jiffies / (HZ * 60), COUNTER_TRIES); + th->source, th->dest, seq); return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } @@ -157,7 +150,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tcp_opt; struct inet_request_sock *ireq; - struct inet6_request_sock *ireq6; struct tcp_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -194,7 +186,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ireq = inet_rsk(req); - ireq6 = inet6_rsk(req); treq = tcp_rsk(req); treq->listener = NULL; @@ -202,22 +193,22 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out_free; req->mss = mss; - ireq->rmt_port = th->source; - ireq->loc_port = th->dest; - ireq6->rmt_addr = ipv6_hdr(skb)->saddr; - ireq6->loc_addr = ipv6_hdr(skb)->daddr; + ireq->ir_rmt_port = th->source; + ireq->ir_num = ntohs(th->dest); + ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; + ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { atomic_inc(&skb->users); - ireq6->pktopts = skb; + ireq->pktopts = skb; } - ireq6->iif = sk->sk_bound_dev_if; + ireq->ir_iif = sk->sk_bound_dev_if; /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && - ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL) - ireq6->iif = inet6_iif(skb); + ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) + ireq->ir_iif = inet6_iif(skb); req->expires = 0UL; req->num_retrans = 0; @@ -241,12 +232,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; - fl6.daddr = ireq6->rmt_addr; + fl6.daddr = ireq->ir_v6_rmt_addr; final_p = fl6_update_dst(&fl6, np->opt, &final); - fl6.saddr = ireq6->loc_addr; + fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5c71501..b996ee2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -192,13 +192,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } if (tp->rx_opt.ts_recent_stamp && - !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) { + !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; tp->write_seq = 0; } - np->daddr = usin->sin6_addr; + sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; /* @@ -237,17 +237,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } else { ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, - &np->rcv_saddr); + &sk->sk_v6_rcv_saddr); } return err; } - if (!ipv6_addr_any(&np->rcv_saddr)) - saddr = &np->rcv_saddr; + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + saddr = &sk->sk_v6_rcv_saddr; fl6.flowi6_proto = IPPROTO_TCP; - fl6.daddr = np->daddr; + fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; @@ -266,7 +266,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (saddr == NULL) { saddr = &fl6.saddr; - np->rcv_saddr = *saddr; + sk->sk_v6_rcv_saddr = *saddr; } /* set the source address */ @@ -279,7 +279,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, rt = (struct rt6_info *) dst; if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && - ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) + ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr)) tcp_fetch_timewait_stamp(sk, dst); icsk->icsk_ext_hdr_len = 0; @@ -298,7 +298,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!tp->write_seq && likely(!tp->repair)) tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, - np->daddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport); @@ -465,7 +465,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, u16 queue_mapping) { - struct inet6_request_sock *treq = inet6_rsk(req); + struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff * skb; int err = -ENOMEM; @@ -477,9 +477,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, skb = tcp_make_synack(sk, dst, req, NULL); if (skb) { - __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); + __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, + &ireq->ir_v6_rmt_addr); - fl6->daddr = treq->rmt_addr; + fl6->daddr = ireq->ir_v6_rmt_addr; skb_set_queue_mapping(skb, queue_mapping); err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); err = net_xmit_eval(err); @@ -502,7 +503,7 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) static void tcp_v6_reqsk_destructor(struct request_sock *req) { - kfree_skb(inet6_rsk(req)->pktopts); + kfree_skb(inet_rsk(req)->pktopts); } #ifdef CONFIG_TCP_MD5SIG @@ -515,13 +516,13 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk, struct sock *addr_sk) { - return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr); + return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr); } static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk, struct request_sock *req) { - return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr); + return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr); } static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, @@ -621,10 +622,10 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, if (sk) { saddr = &inet6_sk(sk)->saddr; - daddr = &inet6_sk(sk)->daddr; + daddr = &sk->sk_v6_daddr; } else if (req) { - saddr = &inet6_rsk(req)->loc_addr; - daddr = &inet6_rsk(req)->rmt_addr; + saddr = &inet_rsk(req)->ir_v6_loc_addr; + daddr = &inet_rsk(req)->ir_v6_rmt_addr; } else { const struct ipv6hdr *ip6h = ipv6_hdr(skb); saddr = &ip6h->saddr; @@ -949,7 +950,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tmp_opt; struct request_sock *req; - struct inet6_request_sock *treq; + struct inet_request_sock *ireq; struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; @@ -994,25 +995,25 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb); - treq = inet6_rsk(req); - treq->rmt_addr = ipv6_hdr(skb)->saddr; - treq->loc_addr = ipv6_hdr(skb)->daddr; + ireq = inet_rsk(req); + ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; + ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, skb, sock_net(sk)); - treq->iif = sk->sk_bound_dev_if; + ireq->ir_iif = sk->sk_bound_dev_if; /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && - ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) - treq->iif = inet6_iif(skb); + ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) + ireq->ir_iif = inet6_iif(skb); if (!isn) { if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { atomic_inc(&skb->users); - treq->pktopts = skb; + ireq->pktopts = skb; } if (want_cookie) { @@ -1051,7 +1052,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) * to the moment of synflood. */ LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n", - &treq->rmt_addr, ntohs(tcp_hdr(skb)->source)); + &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source)); goto drop_and_release; } @@ -1086,7 +1087,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { - struct inet6_request_sock *treq; + struct inet_request_sock *ireq; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; struct inet_sock *newinet; @@ -1116,11 +1117,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr); + ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr); ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - newnp->rcv_saddr = newnp->saddr; + newsk->sk_v6_rcv_saddr = newnp->saddr; inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; newsk->sk_backlog_rcv = tcp_v4_do_rcv; @@ -1151,7 +1152,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, return newsk; } - treq = inet6_rsk(req); + ireq = inet_rsk(req); if (sk_acceptq_is_full(sk)) goto out_overflow; @@ -1185,10 +1186,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - newnp->daddr = treq->rmt_addr; - newnp->saddr = treq->loc_addr; - newnp->rcv_saddr = treq->loc_addr; - newsk->sk_bound_dev_if = treq->iif; + newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; + newnp->saddr = ireq->ir_v6_loc_addr; + newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; + newsk->sk_bound_dev_if = ireq->ir_iif; /* Now IPv6 options... @@ -1203,11 +1204,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; - if (treq->pktopts != NULL) { - newnp->pktoptions = skb_clone(treq->pktopts, + if (ireq->pktopts != NULL) { + newnp->pktoptions = skb_clone(ireq->pktopts, sk_gfp_atomic(sk, GFP_ATOMIC)); - consume_skb(treq->pktopts); - treq->pktopts = NULL; + consume_skb(ireq->pktopts); + ireq->pktopts = NULL; if (newnp->pktoptions) skb_set_owner_r(newnp->pktoptions, newsk); } @@ -1244,13 +1245,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ - if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) { + if ((key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr)) != NULL) { /* We're using one, so create a matching key * on the newsk structure. If we fail to get * memory, then we end up not copying the key * across. Shucks. */ - tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, + tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, AF_INET6, key->key, key->keylen, sk_gfp_atomic(sk, GFP_ATOMIC)); } @@ -1722,8 +1723,8 @@ static void get_openreq6(struct seq_file *seq, const struct sock *sk, struct request_sock *req, int i, kuid_t uid) { int ttd = req->expires - jiffies; - const struct in6_addr *src = &inet6_rsk(req)->loc_addr; - const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr; + const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; + const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; if (ttd < 0) ttd = 0; @@ -1734,10 +1735,10 @@ static void get_openreq6(struct seq_file *seq, i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], - ntohs(inet_rsk(req)->loc_port), + inet_rsk(req)->ir_num, dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], - ntohs(inet_rsk(req)->rmt_port), + ntohs(inet_rsk(req)->ir_rmt_port), TCP_SYN_RECV, 0,0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ @@ -1758,10 +1759,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) const struct inet_sock *inet = inet_sk(sp); const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); - const struct ipv6_pinfo *np = inet6_sk(sp); - dest = &np->daddr; - src = &np->rcv_saddr; + dest = &sp->sk_v6_daddr; + src = &sp->sk_v6_rcv_saddr; destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); @@ -1810,11 +1810,10 @@ static void get_timewait6_sock(struct seq_file *seq, { const struct in6_addr *dest, *src; __u16 destp, srcp; - const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); - long delta = tw->tw_ttd - jiffies; + s32 delta = tw->tw_ttd - inet_tw_time_stamp(); - dest = &tw6->tw_v6_daddr; - src = &tw6->tw_v6_rcv_saddr; + dest = &tw->tw_v6_daddr; + src = &tw->tw_v6_rcv_saddr; destp = ntohs(tw->tw_dport); srcp = ntohs(tw->tw_sport); @@ -1834,6 +1833,7 @@ static void get_timewait6_sock(struct seq_file *seq, static int tcp6_seq_show(struct seq_file *seq, void *v) { struct tcp_iter_state *st; + struct sock *sk = v; if (v == SEQ_START_TOKEN) { seq_puts(seq, @@ -1849,14 +1849,14 @@ static int tcp6_seq_show(struct seq_file *seq, void *v) switch (st->state) { case TCP_SEQ_STATE_LISTENING: case TCP_SEQ_STATE_ESTABLISHED: - get_tcp6_sock(seq, v, st->num); + if (sk->sk_state == TCP_TIME_WAIT) + get_timewait6_sock(seq, v, st->num); + else + get_tcp6_sock(seq, v, st->num); break; case TCP_SEQ_STATE_OPENREQ: get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid); break; - case TCP_SEQ_STATE_TIME_WAIT: - get_timewait6_sock(seq, v, st->num); - break; } out: return 0; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f405815..b496de1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -55,20 +55,17 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { - const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - __be32 sk1_rcv_saddr = sk_rcv_saddr(sk); - __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); - int addr_type = ipv6_addr_type(sk_rcv_saddr6); + int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; /* if both are mapped, treat as IPv4 */ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) return (!sk2_ipv6only && - (!sk1_rcv_saddr || !sk2_rcv_saddr || - sk1_rcv_saddr == sk2_rcv_saddr)); + (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr || + sk->sk_rcv_saddr == sk2->sk_rcv_saddr)); if (addr_type2 == IPV6_ADDR_ANY && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) @@ -79,7 +76,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) return 1; if (sk2_rcv_saddr6 && - ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) + ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6)) return 1; return 0; @@ -107,7 +104,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) unsigned int hash2_nulladdr = udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum); unsigned int hash2_partial = - udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0); + udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0); /* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; @@ -117,7 +114,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) static void udp_v6_rehash(struct sock *sk) { u16 new_hash = udp6_portaddr_hash(sock_net(sk), - &inet6_sk(sk)->rcv_saddr, + &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); udp_lib_rehash(sk, new_hash); @@ -133,7 +130,6 @@ static inline int compute_score(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); score = 0; @@ -142,13 +138,13 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score++; } - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score++; } - if (!ipv6_addr_any(&np->daddr)) { - if (!ipv6_addr_equal(&np->daddr, saddr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) return -1; score++; } @@ -171,10 +167,9 @@ static inline int compute_score2(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score = 0; if (inet->inet_dport) { @@ -182,8 +177,8 @@ static inline int compute_score2(struct sock *sk, struct net *net, return -1; score++; } - if (!ipv6_addr_any(&np->daddr)) { - if (!ipv6_addr_equal(&np->daddr, saddr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) return -1; score++; } @@ -525,8 +520,10 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == ICMPV6_PKT_TOOBIG) ip6_sk_update_pmtu(skb, sk, info); - if (type == NDISC_REDIRECT) + if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); + goto out; + } np = inet6_sk(sk); @@ -549,8 +546,10 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; - if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { sock_rps_save_rxhash(sk, skb); + sk_mark_napi_id(sk, skb); + } rc = sock_queue_rcv_skb(sk, skb); if (rc < 0) { @@ -688,20 +687,19 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, if (udp_sk(s)->udp_port_hash == num && s->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(s); if (inet->inet_dport) { if (inet->inet_dport != rmt_port) continue; } - if (!ipv6_addr_any(&np->daddr) && - !ipv6_addr_equal(&np->daddr, rmt_addr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) continue; if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif) continue; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)) continue; } if (!inet6_mc_check(s, loc_addr, rmt_addr)) @@ -844,7 +842,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; - sk_mark_napi_id(sk, skb); ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); @@ -1062,7 +1059,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, } else if (!up->pending) { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; } else daddr = NULL; @@ -1132,8 +1129,8 @@ do_udp_sendmsg: * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && - ipv6_addr_equal(daddr, &np->daddr)) - daddr = &np->daddr; + ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) + daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && @@ -1144,7 +1141,7 @@ do_udp_sendmsg: return -EDESTADDRREQ; fl6.fl6_dport = inet->inet_dport; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; fl6.flowlabel = np->flow_label; connected = 1; } @@ -1260,8 +1257,8 @@ do_append_data: if (dst) { if (connected) { ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl6.daddr, &np->daddr) ? - &np->daddr : NULL, + ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? + &sk->sk_v6_daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_equal(&fl6.saddr, &np->saddr) ? &np->saddr : diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index feae495a..9af77d9 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -115,6 +115,11 @@ struct l2tp_net { static void l2tp_session_set_header_len(struct l2tp_session *session, int version); static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); +static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) +{ + return sk->sk_user_data; +} + static inline struct l2tp_net *l2tp_pernet(struct net *net) { BUG_ON(!net); @@ -504,7 +509,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk, return 0; #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) { + if (sk->sk_family == PF_INET6 && !l2tp_tunnel(sk)->v4mapped) { if (!uh->check) { LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n"); return 1; @@ -1128,7 +1133,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, /* Queue the packet to IP for output */ skb->local_df = 1; #if IS_ENABLED(CONFIG_IPV6) - if (skb->sk->sk_family == PF_INET6) + if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped) error = inet6_csk_xmit(skb, NULL); else #endif @@ -1176,7 +1181,7 @@ static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb, !(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { __wsum csum = skb_checksum(skb, 0, udp_len, 0); skb->ip_summed = CHECKSUM_UNNECESSARY; - uh->check = csum_ipv6_magic(&np->saddr, &np->daddr, udp_len, + uh->check = csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, udp_len, IPPROTO_UDP, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; @@ -1184,7 +1189,7 @@ static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb, skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, + uh->check = ~csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, udp_len, IPPROTO_UDP, 0); } } @@ -1255,7 +1260,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len /* Calculate UDP checksum if configured to do so */ #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) + if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) l2tp_xmit_ipv6_csum(sk, skb, udp_len); else #endif @@ -1304,10 +1309,9 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb); */ static void l2tp_tunnel_destruct(struct sock *sk) { - struct l2tp_tunnel *tunnel; + struct l2tp_tunnel *tunnel = l2tp_tunnel(sk); struct l2tp_net *pn; - tunnel = sk->sk_user_data; if (tunnel == NULL) goto end; @@ -1675,7 +1679,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 } /* Check if this socket has already been prepped */ - tunnel = (struct l2tp_tunnel *)sk->sk_user_data; + tunnel = l2tp_tunnel(sk); if (tunnel != NULL) { /* This socket has already been prepped */ err = -EBUSY; @@ -1704,6 +1708,24 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 if (cfg != NULL) tunnel->debug = cfg->debug; +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == PF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + if (ipv6_addr_v4mapped(&np->saddr) && + ipv6_addr_v4mapped(&sk->sk_v6_daddr)) { + struct inet_sock *inet = inet_sk(sk); + + tunnel->v4mapped = true; + inet->inet_saddr = np->saddr.s6_addr32[3]; + inet->inet_rcv_saddr = sk->sk_v6_rcv_saddr.s6_addr32[3]; + inet->inet_daddr = sk->sk_v6_daddr.s6_addr32[3]; + } else { + tunnel->v4mapped = false; + } + } +#endif + /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; if (encap == L2TP_ENCAPTYPE_UDP) { @@ -1712,7 +1734,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv; udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy; #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) + if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) udpv6_encap_enable(); else #endif diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 66a559b..6f251cb 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -194,6 +194,9 @@ struct l2tp_tunnel { struct sock *sock; /* Parent socket */ int fd; /* Parent fd, if tunnel socket * was created by userspace */ +#if IS_ENABLED(CONFIG_IPV6) + bool v4mapped; +#endif struct work_struct del_work; diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 072d720..2d6760a 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -127,9 +127,10 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) #if IS_ENABLED(CONFIG_IPV6) if (tunnel->sock->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(tunnel->sock); + const struct ipv6_pinfo *np = inet6_sk(tunnel->sock); + seq_printf(m, " from %pI6c to %pI6c\n", - &np->saddr, &np->daddr); + &np->saddr, &tunnel->sock->sk_v6_daddr); } else #endif seq_printf(m, " from %pI4 to %pI4\n", diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index b8a6039..cfd6530 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -63,7 +63,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, struct sock *sk; sk_for_each_bound(sk, &l2tp_ip6_bind_table) { - struct in6_addr *addr = inet6_rcv_saddr(sk); + const struct in6_addr *addr = inet6_rcv_saddr(sk); struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk); if (l2tp == NULL) @@ -331,7 +331,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) rcu_read_unlock(); inet->inet_rcv_saddr = inet->inet_saddr = v4addr; - np->rcv_saddr = addr->l2tp_addr; + sk->sk_v6_rcv_saddr = addr->l2tp_addr; np->saddr = addr->l2tp_addr; l2tp_ip6_sk(sk)->conn_id = addr->l2tp_conn_id; @@ -421,14 +421,14 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, if (!lsk->peer_conn_id) return -ENOTCONN; lsa->l2tp_conn_id = lsk->peer_conn_id; - lsa->l2tp_addr = np->daddr; + lsa->l2tp_addr = sk->sk_v6_daddr; if (np->sndflow) lsa->l2tp_flowinfo = np->flow_label; } else { - if (ipv6_addr_any(&np->rcv_saddr)) + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) lsa->l2tp_addr = np->saddr; else - lsa->l2tp_addr = np->rcv_saddr; + lsa->l2tp_addr = sk->sk_v6_rcv_saddr; lsa->l2tp_conn_id = lsk->conn_id; } @@ -537,8 +537,8 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && - ipv6_addr_equal(daddr, &np->daddr)) - daddr = &np->daddr; + ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) + daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && lsa->l2tp_scope_id && @@ -548,7 +548,7 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; fl6.flowlabel = np->flow_label; } diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 0825ff2..be446d5 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -306,8 +306,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla if (np) { if (nla_put(skb, L2TP_ATTR_IP6_SADDR, sizeof(np->saddr), &np->saddr) || - nla_put(skb, L2TP_ATTR_IP6_DADDR, sizeof(np->daddr), - &np->daddr)) + nla_put(skb, L2TP_ATTR_IP6_DADDR, sizeof(sk->sk_v6_daddr), + &sk->sk_v6_daddr)) goto nla_put_failure; } else #endif diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 5ebee2d..f0a7ada 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -906,8 +906,8 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, #if IS_ENABLED(CONFIG_IPV6) } else if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET6)) { - struct ipv6_pinfo *np = inet6_sk(tunnel->sock); struct sockaddr_pppol2tpin6 sp; + len = sizeof(sp); memset(&sp, 0, len); sp.sa_family = AF_PPPOX; @@ -920,13 +920,13 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, sp.pppol2tp.d_session = session->peer_session_id; sp.pppol2tp.addr.sin6_family = AF_INET6; sp.pppol2tp.addr.sin6_port = inet->inet_dport; - memcpy(&sp.pppol2tp.addr.sin6_addr, &np->daddr, - sizeof(np->daddr)); + memcpy(&sp.pppol2tp.addr.sin6_addr, &tunnel->sock->sk_v6_daddr, + sizeof(tunnel->sock->sk_v6_daddr)); memcpy(uaddr, &sp, len); } else if ((tunnel->version == 3) && (tunnel->sock->sk_family == AF_INET6)) { - struct ipv6_pinfo *np = inet6_sk(tunnel->sock); struct sockaddr_pppol2tpv3in6 sp; + len = sizeof(sp); memset(&sp, 0, len); sp.sa_family = AF_PPPOX; @@ -939,8 +939,8 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, sp.pppol2tp.d_session = session->peer_session_id; sp.pppol2tp.addr.sin6_family = AF_INET6; sp.pppol2tp.addr.sin6_port = inet->inet_dport; - memcpy(&sp.pppol2tp.addr.sin6_addr, &np->daddr, - sizeof(np->daddr)); + memcpy(&sp.pppol2tp.addr.sin6_addr, &tunnel->sock->sk_v6_daddr, + sizeof(tunnel->sock->sk_v6_daddr)); memcpy(uaddr, &sp, len); #endif } else if (tunnel->version == 3) { diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c index 54563ad..355cc3b 100644 --- a/net/lapb/lapb_timer.c +++ b/net/lapb/lapb_timer.c @@ -154,6 +154,7 @@ static void lapb_t1timer_expiry(unsigned long param) } else { lapb->n2count++; lapb_requeue_frames(lapb); + lapb_kick(lapb); } break; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f0247a4..0011ac8 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3073,6 +3073,9 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, case NL80211_IFTYPE_ADHOC: if (!bssid) return 0; + if (ether_addr_equal(sdata->vif.addr, hdr->addr2) || + ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2)) + return 0; if (ieee80211_is_beacon(hdr->frame_control)) { return 1; } else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) { diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 5d62c58..d4cee98 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -77,13 +77,13 @@ DECLARE_EVENT_CLASS(local_sdata_addr_evt, TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY - __array(char, addr, 6) + __array(char, addr, ETH_ALEN) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; - memcpy(__entry->addr, sdata->vif.addr, 6); + memcpy(__entry->addr, sdata->vif.addr, ETH_ALEN); ), TP_printk( diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 550a688..1220f5a 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2101,7 +2101,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; - int rate, skip, shift; + int rate, shift; u8 i, exrates, *pos; u32 basic_rates = sdata->vif.bss_conf.basic_rates; u32 rate_flags; @@ -2129,14 +2129,11 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, exrates + 2); *pos++ = WLAN_EID_EXT_SUPP_RATES; *pos++ = exrates; - skip = 0; for (i = 8; i < sband->n_bitrates; i++) { u8 basic = 0; if ((rate_flags & sband->bitrates[i].flags) != rate_flags) continue; - if (skip++ < 8) - continue; if (need_basic && basic_rates & BIT(i)) basic = 0x80; rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index ba36c28..a2d6263 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -1,7 +1,7 @@ menuconfig IP_SET tristate "IP set support" depends on INET && NETFILTER - depends on NETFILTER_NETLINK + select NETFILTER_NETLINK help This option adds IP set support to the kernel. In order to define and use the sets, you need the userspace utility @@ -90,6 +90,15 @@ config IP_SET_HASH_IPPORTNET To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_NETPORTNET + tristate "hash:net,port,net set support" + depends on IP_SET + help + This option adds the hash:net,port,net set type support, by which + one can store two IPv4/IPv6 subnets, and a protocol/port in a set. + + To compile it as a module, choose M here. If unsure, say N. + config IP_SET_HASH_NET tristate "hash:net set support" depends on IP_SET @@ -99,6 +108,15 @@ config IP_SET_HASH_NET To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_NETNET + tristate "hash:net,net set support" + depends on IP_SET + help + This option adds the hash:net,net set type support, by which + one can store IPv4/IPv6 network address/prefix pairs in a set. + + To compile it as a module, choose M here. If unsure, say N. + config IP_SET_HASH_NETPORT tristate "hash:net,port set support" depends on IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 6e965ec..44b2d38 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -20,6 +20,8 @@ obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o +obj-$(CONFIG_IP_SET_HASH_NETNET) += ip_set_hash_netnet.o +obj-$(CONFIG_IP_SET_HASH_NETPORTNET) += ip_set_hash_netportnet.o # list types obj-$(CONFIG_IP_SET_LIST_SET) += ip_set_list_set.o diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 2524337..a13e15b 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -8,38 +8,32 @@ #ifndef __IP_SET_BITMAP_IP_GEN_H #define __IP_SET_BITMAP_IP_GEN_H -#define CONCAT(a, b) a##b -#define TOKEN(a,b) CONCAT(a, b) - -#define mtype_do_test TOKEN(MTYPE, _do_test) -#define mtype_gc_test TOKEN(MTYPE, _gc_test) -#define mtype_is_filled TOKEN(MTYPE, _is_filled) -#define mtype_do_add TOKEN(MTYPE, _do_add) -#define mtype_do_del TOKEN(MTYPE, _do_del) -#define mtype_do_list TOKEN(MTYPE, _do_list) -#define mtype_do_head TOKEN(MTYPE, _do_head) -#define mtype_adt_elem TOKEN(MTYPE, _adt_elem) -#define mtype_add_timeout TOKEN(MTYPE, _add_timeout) -#define mtype_gc_init TOKEN(MTYPE, _gc_init) -#define mtype_kadt TOKEN(MTYPE, _kadt) -#define mtype_uadt TOKEN(MTYPE, _uadt) -#define mtype_destroy TOKEN(MTYPE, _destroy) -#define mtype_flush TOKEN(MTYPE, _flush) -#define mtype_head TOKEN(MTYPE, _head) -#define mtype_same_set TOKEN(MTYPE, _same_set) -#define mtype_elem TOKEN(MTYPE, _elem) -#define mtype_test TOKEN(MTYPE, _test) -#define mtype_add TOKEN(MTYPE, _add) -#define mtype_del TOKEN(MTYPE, _del) -#define mtype_list TOKEN(MTYPE, _list) -#define mtype_gc TOKEN(MTYPE, _gc) +#define mtype_do_test IPSET_TOKEN(MTYPE, _do_test) +#define mtype_gc_test IPSET_TOKEN(MTYPE, _gc_test) +#define mtype_is_filled IPSET_TOKEN(MTYPE, _is_filled) +#define mtype_do_add IPSET_TOKEN(MTYPE, _do_add) +#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) +#define mtype_do_del IPSET_TOKEN(MTYPE, _do_del) +#define mtype_do_list IPSET_TOKEN(MTYPE, _do_list) +#define mtype_do_head IPSET_TOKEN(MTYPE, _do_head) +#define mtype_adt_elem IPSET_TOKEN(MTYPE, _adt_elem) +#define mtype_add_timeout IPSET_TOKEN(MTYPE, _add_timeout) +#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) +#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) +#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) +#define mtype_flush IPSET_TOKEN(MTYPE, _flush) +#define mtype_head IPSET_TOKEN(MTYPE, _head) +#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) +#define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_test IPSET_TOKEN(MTYPE, _test) +#define mtype_add IPSET_TOKEN(MTYPE, _add) +#define mtype_del IPSET_TOKEN(MTYPE, _del) +#define mtype_list IPSET_TOKEN(MTYPE, _list) +#define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype MTYPE -#define ext_timeout(e, m) \ - (unsigned long *)((e) + (m)->offset[IPSET_OFFSET_TIMEOUT]) -#define ext_counter(e, m) \ - (struct ip_set_counter *)((e) + (m)->offset[IPSET_OFFSET_COUNTER]) -#define get_ext(map, id) ((map)->extensions + (map)->dsize * (id)) +#define get_ext(set, map, id) ((map)->extensions + (set)->dsize * (id)) static void mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) @@ -49,11 +43,22 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) init_timer(&map->gc); map->gc.data = (unsigned long) set; map->gc.function = gc; - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } static void +mtype_ext_cleanup(struct ip_set *set) +{ + struct mtype *map = set->data; + u32 id; + + for (id = 0; id < map->elements; id++) + if (test_bit(id, map->members)) + ip_set_ext_destroy(set, get_ext(set, map, id)); +} + +static void mtype_destroy(struct ip_set *set) { struct mtype *map = set->data; @@ -62,8 +67,11 @@ mtype_destroy(struct ip_set *set) del_timer_sync(&map->gc); ip_set_free(map->members); - if (map->dsize) + if (set->dsize) { + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set); ip_set_free(map->extensions); + } kfree(map); set->data = NULL; @@ -74,6 +82,8 @@ mtype_flush(struct ip_set *set) { struct mtype *map = set->data; + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set); memset(map->members, 0, map->memsize); } @@ -91,12 +101,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize + - map->dsize * map->elements)) || - (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) || - (SET_WITH_COUNTER(set) && - nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, - htonl(IPSET_FLAG_WITH_COUNTERS)))) + set->dsize * map->elements))) + goto nla_put_failure; + if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; ipset_nest_end(skb, nested); @@ -111,16 +118,16 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - void *x = get_ext(map, e->id); - int ret = mtype_do_test(e, map); + void *x = get_ext(set, map, e->id); + int ret = mtype_do_test(e, map, set->dsize); if (ret <= 0) return ret; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, map))) + ip_set_timeout_expired(ext_timeout(x, set))) return 0; if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(x, map), ext, mext, flags); + ip_set_update_counter(ext_counter(x, set), ext, mext, flags); return 1; } @@ -130,26 +137,30 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - void *x = get_ext(map, e->id); - int ret = mtype_do_add(e, map, flags); + void *x = get_ext(set, map, e->id); + int ret = mtype_do_add(e, map, flags, set->dsize); if (ret == IPSET_ADD_FAILED) { if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, map))) + ip_set_timeout_expired(ext_timeout(x, set))) ret = 0; else if (!(flags & IPSET_FLAG_EXIST)) return -IPSET_ERR_EXIST; + /* Element is re-added, cleanup extensions */ + ip_set_ext_destroy(set, x); } if (SET_WITH_TIMEOUT(set)) #ifdef IP_SET_BITMAP_STORED_TIMEOUT - mtype_add_timeout(ext_timeout(x, map), e, ext, map, ret); + mtype_add_timeout(ext_timeout(x, set), e, ext, set, map, ret); #else - ip_set_timeout_set(ext_timeout(x, map), ext->timeout); + ip_set_timeout_set(ext_timeout(x, set), ext->timeout); #endif if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(x, map), ext); + ip_set_init_counter(ext_counter(x, set), ext); + if (SET_WITH_COMMENT(set)) + ip_set_init_comment(ext_comment(x, set), ext); return 0; } @@ -159,16 +170,27 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - const void *x = get_ext(map, e->id); + void *x = get_ext(set, map, e->id); - if (mtype_do_del(e, map) || - (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, map)))) + if (mtype_do_del(e, map)) + return -IPSET_ERR_EXIST; + + ip_set_ext_destroy(set, x); + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(x, set))) return -IPSET_ERR_EXIST; return 0; } +#ifndef IP_SET_BITMAP_STORED_TIMEOUT +static inline bool +mtype_is_filled(const struct mtype_elem *x) +{ + return true; +} +#endif + static int mtype_list(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb) @@ -183,13 +205,13 @@ mtype_list(const struct ip_set *set, return -EMSGSIZE; for (; cb->args[2] < map->elements; cb->args[2]++) { id = cb->args[2]; - x = get_ext(map, id); + x = get_ext(set, map, id); if (!test_bit(id, map->members) || (SET_WITH_TIMEOUT(set) && #ifdef IP_SET_BITMAP_STORED_TIMEOUT mtype_is_filled((const struct mtype_elem *) x) && #endif - ip_set_timeout_expired(ext_timeout(x, map)))) + ip_set_timeout_expired(ext_timeout(x, set)))) continue; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { @@ -199,23 +221,10 @@ mtype_list(const struct ip_set *set, } else goto nla_put_failure; } - if (mtype_do_list(skb, map, id)) + if (mtype_do_list(skb, map, id, set->dsize)) goto nla_put_failure; - if (SET_WITH_TIMEOUT(set)) { -#ifdef IP_SET_BITMAP_STORED_TIMEOUT - if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_stored(map, id, - ext_timeout(x, map))))) - goto nla_put_failure; -#else - if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get( - ext_timeout(x, map))))) - goto nla_put_failure; -#endif - } - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(x, map))) + if (ip_set_put_extensions(skb, set, x, + mtype_is_filled((const struct mtype_elem *) x))) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -228,11 +237,11 @@ mtype_list(const struct ip_set *set, nla_put_failure: nla_nest_cancel(skb, nested); - ipset_nest_end(skb, adt); if (unlikely(id == first)) { cb->args[2] = 0; return -EMSGSIZE; } + ipset_nest_end(skb, adt); return 0; } @@ -241,21 +250,23 @@ mtype_gc(unsigned long ul_set) { struct ip_set *set = (struct ip_set *) ul_set; struct mtype *map = set->data; - const void *x; + void *x; u32 id; /* We run parallel with other readers (test element) * but adding/deleting new entries is locked out */ read_lock_bh(&set->lock); for (id = 0; id < map->elements; id++) - if (mtype_gc_test(id, map)) { - x = get_ext(map, id); - if (ip_set_timeout_expired(ext_timeout(x, map))) + if (mtype_gc_test(id, map, set->dsize)) { + x = get_ext(set, map, id); + if (ip_set_timeout_expired(ext_timeout(x, set))) { clear_bit(id, map->members); + ip_set_ext_destroy(set, x); + } } read_unlock_bh(&set->lock); - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index f1a8128..6f1f9f4 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -25,12 +25,13 @@ #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_bitmap.h> -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counter support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 Counter support added */ +#define IPSET_TYPE_REV_MAX 2 /* Comment support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:ip"); #define MTYPE bitmap_ip @@ -44,10 +45,7 @@ struct bitmap_ip { u32 elements; /* number of max elements in the set */ u32 hosts; /* number of hosts in a subnet */ size_t memsize; /* members size */ - size_t dsize; /* extensions struct size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ u8 netmask; /* subnet netmask */ - u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ }; @@ -65,20 +63,21 @@ ip_to_id(const struct bitmap_ip *m, u32 ip) /* Common functions */ static inline int -bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map) +bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, + struct bitmap_ip *map, size_t dsize) { return !!test_bit(e->id, map->members); } static inline int -bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map) +bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map, size_t dsize) { return !!test_bit(id, map->members); } static inline int bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map, - u32 flags) + u32 flags, size_t dsize) { return !!test_and_set_bit(e->id, map->members); } @@ -90,7 +89,8 @@ bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map) } static inline int -bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id) +bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id, + size_t dsize) { return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip + id * map->hosts)); @@ -113,7 +113,7 @@ bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb, struct bitmap_ip *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ip_adt_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip; ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); @@ -131,9 +131,9 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], { struct bitmap_ip *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - u32 ip, ip_to; + u32 ip = 0, ip_to = 0; struct bitmap_ip_adt_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret = 0; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -200,7 +200,7 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b) return x->first_ip == y->first_ip && x->last_ip == y->last_ip && x->netmask == y->netmask && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -209,25 +209,6 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b) struct bitmap_ip_elem { }; -/* Timeout variant */ - -struct bitmap_ipt_elem { - unsigned long timeout; -}; - -/* Plain variant with counter */ - -struct bitmap_ipc_elem { - struct ip_set_counter counter; -}; - -/* Timeout variant with counter */ - -struct bitmap_ipct_elem { - unsigned long timeout; - struct ip_set_counter counter; -}; - #include "ip_set_bitmap_gen.h" /* Create bitmap:ip type of sets */ @@ -240,8 +221,8 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, map->members = ip_set_alloc(map->memsize); if (!map->members) return false; - if (map->dsize) { - map->extensions = ip_set_alloc(map->dsize * elements); + if (set->dsize) { + map->extensions = ip_set_alloc(set->dsize * elements); if (!map->extensions) { kfree(map->members); return false; @@ -252,7 +233,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, map->elements = elements; map->hosts = hosts; map->netmask = netmask; - map->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; set->data = map; set->family = NFPROTO_IPV4; @@ -261,10 +242,11 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, } static int -bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], + u32 flags) { struct bitmap_ip *map; - u32 first_ip, last_ip, hosts, cadt_flags = 0; + u32 first_ip = 0, last_ip = 0, hosts; u64 elements; u8 netmask = 32; int ret; @@ -336,61 +318,15 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) map->memsize = bitmap_bytes(0, elements - 1); set->variant = &bitmap_ip; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipct_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct bitmap_ipct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct bitmap_ipct_elem, counter); - - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } - - map->timeout = ip_set_timeout_uget( - tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - - bitmap_ip_gc_init(set, bitmap_ip_gc); - } else { - map->dsize = sizeof(struct bitmap_ipc_elem); - map->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct bitmap_ipc_elem, counter); - - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipt_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct bitmap_ipt_elem, timeout); - - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } - - map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - + set->dsize = ip_set_elem_len(set, tb, 0); + if (!init_map_ip(set, map, first_ip, last_ip, + elements, hosts, netmask)) { + kfree(map); + return -ENOMEM; + } + if (tb[IPSET_ATTR_TIMEOUT]) { + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); bitmap_ip_gc_init(set, bitmap_ip_gc); - } else { - map->dsize = 0; - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } } return 0; } @@ -401,8 +337,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_IPV4, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_ip_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, @@ -420,6 +356,7 @@ static struct ip_set_type bitmap_ip_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 3b30e0b..740eabe 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -25,12 +25,13 @@ #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_bitmap.h> -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counter support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 Counter support added */ +#define IPSET_TYPE_REV_MAX 2 /* Comment support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:ip,mac"); #define MTYPE bitmap_ipmac @@ -48,11 +49,8 @@ struct bitmap_ipmac { u32 first_ip; /* host byte order, included in range */ u32 last_ip; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ - u32 timeout; /* timeout value */ - struct timer_list gc; /* garbage collector */ size_t memsize; /* members size */ - size_t dsize; /* size of element */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + struct timer_list gc; /* garbage collector */ }; /* ADT structure for generic function args */ @@ -82,13 +80,13 @@ get_elem(void *extensions, u16 id, size_t dsize) static inline int bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e, - const struct bitmap_ipmac *map) + const struct bitmap_ipmac *map, size_t dsize) { const struct bitmap_ipmac_elem *elem; if (!test_bit(e->id, map->members)) return 0; - elem = get_elem(map->extensions, e->id, map->dsize); + elem = get_elem(map->extensions, e->id, dsize); if (elem->filled == MAC_FILLED) return e->ether == NULL || ether_addr_equal(e->ether, elem->ether); @@ -97,13 +95,13 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e, } static inline int -bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map) +bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize) { const struct bitmap_ipmac_elem *elem; if (!test_bit(id, map->members)) return 0; - elem = get_elem(map->extensions, id, map->dsize); + elem = get_elem(map->extensions, id, dsize); /* Timer not started for the incomplete elements */ return elem->filled == MAC_FILLED; } @@ -117,13 +115,13 @@ bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem) static inline int bitmap_ipmac_add_timeout(unsigned long *timeout, const struct bitmap_ipmac_adt_elem *e, - const struct ip_set_ext *ext, + const struct ip_set_ext *ext, struct ip_set *set, struct bitmap_ipmac *map, int mode) { u32 t = ext->timeout; if (mode == IPSET_ADD_START_STORED_TIMEOUT) { - if (t == map->timeout) + if (t == set->timeout) /* Timeout was not specified, get stored one */ t = *timeout; ip_set_timeout_set(timeout, t); @@ -142,11 +140,11 @@ bitmap_ipmac_add_timeout(unsigned long *timeout, static inline int bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e, - struct bitmap_ipmac *map, u32 flags) + struct bitmap_ipmac *map, u32 flags, size_t dsize) { struct bitmap_ipmac_elem *elem; - elem = get_elem(map->extensions, e->id, map->dsize); + elem = get_elem(map->extensions, e->id, dsize); if (test_and_set_bit(e->id, map->members)) { if (elem->filled == MAC_FILLED) { if (e->ether && (flags & IPSET_FLAG_EXIST)) @@ -178,22 +176,12 @@ bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e, return !test_and_clear_bit(e->id, map->members); } -static inline unsigned long -ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout) -{ - const struct bitmap_ipmac_elem *elem = - get_elem(map->extensions, id, map->dsize); - - return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) : - *timeout; -} - static inline int bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map, - u32 id) + u32 id, size_t dsize) { const struct bitmap_ipmac_elem *elem = - get_elem(map->extensions, id, map->dsize); + get_elem(map->extensions, id, dsize); return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip + id)) || @@ -216,7 +204,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ipmac_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip; /* MAC can be src only */ @@ -245,8 +233,8 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], const struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ipmac_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); - u32 ip; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0; int ret = 0; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -285,43 +273,12 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b) return x->first_ip == y->first_ip && x->last_ip == y->last_ip && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } /* Plain variant */ -/* Timeout variant */ - -struct bitmap_ipmact_elem { - struct { - unsigned char ether[ETH_ALEN]; - unsigned char filled; - } __attribute__ ((aligned)); - unsigned long timeout; -}; - -/* Plain variant with counter */ - -struct bitmap_ipmacc_elem { - struct { - unsigned char ether[ETH_ALEN]; - unsigned char filled; - } __attribute__ ((aligned)); - struct ip_set_counter counter; -}; - -/* Timeout variant with counter */ - -struct bitmap_ipmacct_elem { - struct { - unsigned char ether[ETH_ALEN]; - unsigned char filled; - } __attribute__ ((aligned)); - unsigned long timeout; - struct ip_set_counter counter; -}; - #include "ip_set_bitmap_gen.h" /* Create bitmap:ip,mac type of sets */ @@ -330,11 +287,11 @@ static bool init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, u32 first_ip, u32 last_ip, u32 elements) { - map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize); + map->members = ip_set_alloc(map->memsize); if (!map->members) return false; - if (map->dsize) { - map->extensions = ip_set_alloc(map->dsize * elements); + if (set->dsize) { + map->extensions = ip_set_alloc(set->dsize * elements); if (!map->extensions) { kfree(map->members); return false; @@ -343,7 +300,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, map->first_ip = first_ip; map->last_ip = last_ip; map->elements = elements; - map->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; set->data = map; set->family = NFPROTO_IPV4; @@ -352,10 +309,10 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, } static int -bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], +bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], u32 flags) { - u32 first_ip, last_ip, cadt_flags = 0; + u32 first_ip = 0, last_ip = 0; u64 elements; struct bitmap_ipmac *map; int ret; @@ -399,57 +356,15 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], map->memsize = bitmap_bytes(0, elements - 1); set->variant = &bitmap_ipmac; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipmacct_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct bitmap_ipmacct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct bitmap_ipmacct_elem, counter); - - if (!init_map_ipmac(set, map, first_ip, last_ip, - elements)) { - kfree(map); - return -ENOMEM; - } - map->timeout = ip_set_timeout_uget( - tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); - } else { - map->dsize = sizeof(struct bitmap_ipmacc_elem); - map->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct bitmap_ipmacc_elem, counter); - - if (!init_map_ipmac(set, map, first_ip, last_ip, - elements)) { - kfree(map); - return -ENOMEM; - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipmact_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct bitmap_ipmact_elem, timeout); - - if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { - kfree(map); - return -ENOMEM; - } - map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; + set->dsize = ip_set_elem_len(set, tb, + sizeof(struct bitmap_ipmac_elem)); + if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { + kfree(map); + return -ENOMEM; + } + if (tb[IPSET_ATTR_TIMEOUT]) { + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); - } else { - map->dsize = sizeof(struct bitmap_ipmac_elem); - - if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { - kfree(map); - return -ENOMEM; - } - set->variant = &bitmap_ipmac; } return 0; } @@ -460,8 +375,8 @@ static struct ip_set_type bitmap_ipmac_type = { .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, .dimension = IPSET_DIM_TWO, .family = NFPROTO_IPV4, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_ipmac_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, @@ -478,6 +393,7 @@ static struct ip_set_type bitmap_ipmac_type = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 8207d1f..e7603c5 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -20,12 +20,13 @@ #include <linux/netfilter/ipset/ip_set_bitmap.h> #include <linux/netfilter/ipset/ip_set_getport.h> -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counter support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 Counter support added */ +#define IPSET_TYPE_REV_MAX 2 /* Comment support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("bitmap:port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:port"); #define MTYPE bitmap_port @@ -38,9 +39,6 @@ struct bitmap_port { u16 last_port; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ size_t memsize; /* members size */ - size_t dsize; /* extensions struct size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ - u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ }; @@ -59,20 +57,20 @@ port_to_id(const struct bitmap_port *m, u16 port) static inline int bitmap_port_do_test(const struct bitmap_port_adt_elem *e, - const struct bitmap_port *map) + const struct bitmap_port *map, size_t dsize) { return !!test_bit(e->id, map->members); } static inline int -bitmap_port_gc_test(u16 id, const struct bitmap_port *map) +bitmap_port_gc_test(u16 id, const struct bitmap_port *map, size_t dsize) { return !!test_bit(id, map->members); } static inline int bitmap_port_do_add(const struct bitmap_port_adt_elem *e, - struct bitmap_port *map, u32 flags) + struct bitmap_port *map, u32 flags, size_t dsize) { return !!test_and_set_bit(e->id, map->members); } @@ -85,7 +83,8 @@ bitmap_port_do_del(const struct bitmap_port_adt_elem *e, } static inline int -bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id) +bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id, + size_t dsize) { return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port + id)); @@ -106,7 +105,7 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb, struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_port_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); __be16 __port; u16 port = 0; @@ -131,7 +130,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_port_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port; /* wraparound */ u16 port_to; int ret = 0; @@ -191,7 +190,7 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b) return x->first_port == y->first_port && x->last_port == y->last_port && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -200,25 +199,6 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b) struct bitmap_port_elem { }; -/* Timeout variant */ - -struct bitmap_portt_elem { - unsigned long timeout; -}; - -/* Plain variant with counter */ - -struct bitmap_portc_elem { - struct ip_set_counter counter; -}; - -/* Timeout variant with counter */ - -struct bitmap_portct_elem { - unsigned long timeout; - struct ip_set_counter counter; -}; - #include "ip_set_bitmap_gen.h" /* Create bitmap:ip type of sets */ @@ -230,8 +210,8 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, map->members = ip_set_alloc(map->memsize); if (!map->members) return false; - if (map->dsize) { - map->extensions = ip_set_alloc(map->dsize * map->elements); + if (set->dsize) { + map->extensions = ip_set_alloc(set->dsize * map->elements); if (!map->extensions) { kfree(map->members); return false; @@ -239,7 +219,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, } map->first_port = first_port; map->last_port = last_port; - map->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; set->data = map; set->family = NFPROTO_UNSPEC; @@ -248,11 +228,11 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, } static int -bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], + u32 flags) { struct bitmap_port *map; u16 first_port, last_port; - u32 cadt_flags = 0; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) || @@ -276,53 +256,14 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) map->elements = last_port - first_port + 1; map->memsize = map->elements * sizeof(unsigned long); set->variant = &bitmap_port; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_portct_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct bitmap_portct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct bitmap_portct_elem, counter); - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - - map->timeout = - ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - bitmap_port_gc_init(set, bitmap_port_gc); - } else { - map->dsize = sizeof(struct bitmap_portc_elem); - map->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct bitmap_portc_elem, counter); - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_portt_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct bitmap_portt_elem, timeout); - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - - map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; + set->dsize = ip_set_elem_len(set, tb, 0); + if (!init_map_port(set, map, first_port, last_port)) { + kfree(map); + return -ENOMEM; + } + if (tb[IPSET_ATTR_TIMEOUT]) { + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); bitmap_port_gc_init(set, bitmap_port_gc); - } else { - map->dsize = 0; - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - } return 0; } @@ -333,8 +274,8 @@ static struct ip_set_type bitmap_port_type = { .features = IPSET_TYPE_PORT, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_port_create, .create_policy = { [IPSET_ATTR_PORT] = { .type = NLA_U16 }, @@ -349,6 +290,7 @@ static struct ip_set_type bitmap_port_type = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index f2e30fb..dc9284b 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -17,6 +17,8 @@ #include <linux/spinlock.h> #include <linux/rculist.h> #include <net/netlink.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> #include <linux/netfilter.h> #include <linux/netfilter/x_tables.h> @@ -27,8 +29,17 @@ static LIST_HEAD(ip_set_type_list); /* all registered set types */ static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ -static struct ip_set * __rcu *ip_set_list; /* all individual sets */ -static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ +struct ip_set_net { + struct ip_set * __rcu *ip_set_list; /* all individual sets */ + ip_set_id_t ip_set_max; /* max number of sets */ + int is_deleted; /* deleted by ip_set_net_exit */ +}; +static int ip_set_net_id __read_mostly; + +static inline struct ip_set_net *ip_set_pernet(struct net *net) +{ + return net_generic(net, ip_set_net_id); +} #define IP_SET_INC 64 #define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0) @@ -45,8 +56,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); /* When the nfnl mutex is held: */ #define nfnl_dereference(p) \ rcu_dereference_protected(p, 1) -#define nfnl_set(id) \ - nfnl_dereference(ip_set_list)[id] +#define nfnl_set(inst, id) \ + nfnl_dereference((inst)->ip_set_list)[id] /* * The set types are implemented in modules and registered set types @@ -315,6 +326,60 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); +typedef void (*destroyer)(void *); +/* ipset data extension types, in size order */ + +const struct ip_set_ext_type ip_set_extensions[] = { + [IPSET_EXT_ID_COUNTER] = { + .type = IPSET_EXT_COUNTER, + .flag = IPSET_FLAG_WITH_COUNTERS, + .len = sizeof(struct ip_set_counter), + .align = __alignof__(struct ip_set_counter), + }, + [IPSET_EXT_ID_TIMEOUT] = { + .type = IPSET_EXT_TIMEOUT, + .len = sizeof(unsigned long), + .align = __alignof__(unsigned long), + }, + [IPSET_EXT_ID_COMMENT] = { + .type = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY, + .flag = IPSET_FLAG_WITH_COMMENT, + .len = sizeof(struct ip_set_comment), + .align = __alignof__(struct ip_set_comment), + .destroy = (destroyer) ip_set_comment_free, + }, +}; +EXPORT_SYMBOL_GPL(ip_set_extensions); + +static inline bool +add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[]) +{ + return ip_set_extensions[id].flag ? + (flags & ip_set_extensions[id].flag) : + !!tb[IPSET_ATTR_TIMEOUT]; +} + +size_t +ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len) +{ + enum ip_set_ext_id id; + size_t offset = 0; + u32 cadt_flags = 0; + + if (tb[IPSET_ATTR_CADT_FLAGS]) + cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + for (id = 0; id < IPSET_EXT_ID_MAX; id++) { + if (!add_extension(id, cadt_flags, tb)) + continue; + offset += ALIGN(len + offset, ip_set_extensions[id].align); + set->offset[id] = offset; + set->extensions |= ip_set_extensions[id].type; + offset += ip_set_extensions[id].len; + } + return len + offset; +} +EXPORT_SYMBOL_GPL(ip_set_elem_len); + int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext) @@ -334,6 +399,12 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], ext->packets = be64_to_cpu(nla_get_be64( tb[IPSET_ATTR_PACKETS])); } + if (tb[IPSET_ATTR_COMMENT]) { + if (!(set->extensions & IPSET_EXT_COMMENT)) + return -IPSET_ERR_COMMENT; + ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]); + } + return 0; } EXPORT_SYMBOL_GPL(ip_set_get_extensions); @@ -374,13 +445,14 @@ __ip_set_put(struct ip_set *set) */ static inline struct ip_set * -ip_set_rcu_get(ip_set_id_t index) +ip_set_rcu_get(struct net *net, ip_set_id_t index) { struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); rcu_read_lock(); /* ip_set_list itself needs to be protected */ - set = rcu_dereference(ip_set_list)[index]; + set = rcu_dereference(inst->ip_set_list)[index]; rcu_read_unlock(); return set; @@ -390,7 +462,8 @@ int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(index); + struct ip_set *set = ip_set_rcu_get( + dev_net(par->in ? par->in : par->out), index); int ret = 0; BUG_ON(set == NULL); @@ -428,7 +501,8 @@ int ip_set_add(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(index); + struct ip_set *set = ip_set_rcu_get( + dev_net(par->in ? par->in : par->out), index); int ret; BUG_ON(set == NULL); @@ -450,7 +524,8 @@ int ip_set_del(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(index); + struct ip_set *set = ip_set_rcu_get( + dev_net(par->in ? par->in : par->out), index); int ret = 0; BUG_ON(set == NULL); @@ -474,14 +549,15 @@ EXPORT_SYMBOL_GPL(ip_set_del); * */ ip_set_id_t -ip_set_get_byname(const char *name, struct ip_set **set) +ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) { ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; + struct ip_set_net *inst = ip_set_pernet(net); rcu_read_lock(); - for (i = 0; i < ip_set_max; i++) { - s = rcu_dereference(ip_set_list)[i]; + for (i = 0; i < inst->ip_set_max; i++) { + s = rcu_dereference(inst->ip_set_list)[i]; if (s != NULL && STREQ(s->name, name)) { __ip_set_get(s); index = i; @@ -501,17 +577,26 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname); * to be valid, after calling this function. * */ -void -ip_set_put_byindex(ip_set_id_t index) + +static inline void +__ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index) { struct ip_set *set; rcu_read_lock(); - set = rcu_dereference(ip_set_list)[index]; + set = rcu_dereference(inst->ip_set_list)[index]; if (set != NULL) __ip_set_put(set); rcu_read_unlock(); } + +void +ip_set_put_byindex(struct net *net, ip_set_id_t index) +{ + struct ip_set_net *inst = ip_set_pernet(net); + + __ip_set_put_byindex(inst, index); +} EXPORT_SYMBOL_GPL(ip_set_put_byindex); /* @@ -522,9 +607,9 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex); * */ const char * -ip_set_name_byindex(ip_set_id_t index) +ip_set_name_byindex(struct net *net, ip_set_id_t index) { - const struct ip_set *set = ip_set_rcu_get(index); + const struct ip_set *set = ip_set_rcu_get(net, index); BUG_ON(set == NULL); BUG_ON(set->ref == 0); @@ -546,14 +631,15 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex); * The nfnl mutex is used in the function. */ ip_set_id_t -ip_set_nfnl_get(const char *name) +ip_set_nfnl_get(struct net *net, const char *name) { ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; + struct ip_set_net *inst = ip_set_pernet(net); nfnl_lock(NFNL_SUBSYS_IPSET); - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL && STREQ(s->name, name)) { __ip_set_get(s); index = i; @@ -573,15 +659,16 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get); * The nfnl mutex is used in the function. */ ip_set_id_t -ip_set_nfnl_get_byindex(ip_set_id_t index) +ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) { struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); - if (index > ip_set_max) + if (index > inst->ip_set_max) return IPSET_INVALID_ID; nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(index); + set = nfnl_set(inst, index); if (set) __ip_set_get(set); else @@ -600,13 +687,17 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex); * The nfnl mutex is used in the function. */ void -ip_set_nfnl_put(ip_set_id_t index) +ip_set_nfnl_put(struct net *net, ip_set_id_t index) { struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); + nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(index); - if (set != NULL) - __ip_set_put(set); + if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */ + set = nfnl_set(inst, index); + if (set != NULL) + __ip_set_put(set); + } nfnl_unlock(NFNL_SUBSYS_IPSET); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); @@ -664,14 +755,14 @@ static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static struct ip_set * -find_set_and_id(const char *name, ip_set_id_t *id) +find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id) { struct ip_set *set = NULL; ip_set_id_t i; *id = IPSET_INVALID_ID; - for (i = 0; i < ip_set_max; i++) { - set = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + set = nfnl_set(inst, i); if (set != NULL && STREQ(set->name, name)) { *id = i; break; @@ -681,22 +772,23 @@ find_set_and_id(const char *name, ip_set_id_t *id) } static inline struct ip_set * -find_set(const char *name) +find_set(struct ip_set_net *inst, const char *name) { ip_set_id_t id; - return find_set_and_id(name, &id); + return find_set_and_id(inst, name, &id); } static int -find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set) +find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, + struct ip_set **set) { struct ip_set *s; ip_set_id_t i; *index = IPSET_INVALID_ID; - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s == NULL) { if (*index == IPSET_INVALID_ID) *index = i; @@ -725,6 +817,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct net *net = sock_net(ctnl); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *clash = NULL; ip_set_id_t index = IPSET_INVALID_ID; struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; @@ -783,7 +877,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, goto put_out; } - ret = set->type->create(set, tb, flags); + ret = set->type->create(net, set, tb, flags); if (ret != 0) goto put_out; @@ -794,7 +888,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * by the nfnl mutex. Find the first free index in ip_set_list * and check clashing. */ - ret = find_free_id(set->name, &index, &clash); + ret = find_free_id(inst, set->name, &index, &clash); if (ret == -EEXIST) { /* If this is the same set and requested, ignore error */ if ((flags & IPSET_FLAG_EXIST) && @@ -807,9 +901,9 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, goto cleanup; } else if (ret == -IPSET_ERR_MAX_SETS) { struct ip_set **list, **tmp; - ip_set_id_t i = ip_set_max + IP_SET_INC; + ip_set_id_t i = inst->ip_set_max + IP_SET_INC; - if (i < ip_set_max || i == IPSET_INVALID_ID) + if (i < inst->ip_set_max || i == IPSET_INVALID_ID) /* Wraparound */ goto cleanup; @@ -817,14 +911,14 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, if (!list) goto cleanup; /* nfnl mutex is held, both lists are valid */ - tmp = nfnl_dereference(ip_set_list); - memcpy(list, tmp, sizeof(struct ip_set *) * ip_set_max); - rcu_assign_pointer(ip_set_list, list); + tmp = nfnl_dereference(inst->ip_set_list); + memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max); + rcu_assign_pointer(inst->ip_set_list, list); /* Make sure all current packets have passed through */ synchronize_net(); /* Use new list */ - index = ip_set_max; - ip_set_max = i; + index = inst->ip_set_max; + inst->ip_set_max = i; kfree(tmp); ret = 0; } else if (ret) @@ -834,7 +928,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * Finally! Add our shiny new set to the list, and be done. */ pr_debug("create: '%s' created with index %u!\n", set->name, index); - nfnl_set(index) = set; + nfnl_set(inst, index) = set; return ret; @@ -857,12 +951,12 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static void -ip_set_destroy_set(ip_set_id_t index) +ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index) { - struct ip_set *set = nfnl_set(index); + struct ip_set *set = nfnl_set(inst, index); pr_debug("set: %s\n", set->name); - nfnl_set(index) = NULL; + nfnl_set(inst, index) = NULL; /* Must call it without holding any lock */ set->variant->destroy(set); @@ -875,6 +969,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *s; ip_set_id_t i; int ret = 0; @@ -894,21 +989,22 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, */ read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL && s->ref) { ret = -IPSET_ERR_BUSY; goto out; } } read_unlock_bh(&ip_set_ref_lock); - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL) - ip_set_destroy_set(i); + ip_set_destroy_set(inst, i); } } else { - s = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &i); + s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), + &i); if (s == NULL) { ret = -ENOENT; goto out; @@ -918,7 +1014,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, } read_unlock_bh(&ip_set_ref_lock); - ip_set_destroy_set(i); + ip_set_destroy_set(inst, i); } return 0; out: @@ -943,6 +1039,7 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *s; ip_set_id_t i; @@ -950,13 +1047,13 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, return -IPSET_ERR_PROTOCOL; if (!attr[IPSET_ATTR_SETNAME]) { - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL) ip_set_flush_set(s); } } else { - s = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (s == NULL) return -ENOENT; @@ -982,6 +1079,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set, *s; const char *name2; ip_set_id_t i; @@ -992,7 +1090,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_SETNAME2] == NULL)) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1003,8 +1101,8 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, } name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL && STREQ(s->name, name2)) { ret = -IPSET_ERR_EXIST_SETNAME2; goto out; @@ -1031,6 +1129,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *from, *to; ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; @@ -1040,11 +1139,13 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_SETNAME2] == NULL)) return -IPSET_ERR_PROTOCOL; - from = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &from_id); + from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), + &from_id); if (from == NULL) return -ENOENT; - to = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id); + to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]), + &to_id); if (to == NULL) return -IPSET_ERR_EXIST_SETNAME2; @@ -1061,8 +1162,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, write_lock_bh(&ip_set_ref_lock); swap(from->ref, to->ref); - nfnl_set(from_id) = to; - nfnl_set(to_id) = from; + nfnl_set(inst, from_id) = to; + nfnl_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); return 0; @@ -1081,9 +1182,10 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, static int ip_set_dump_done(struct netlink_callback *cb) { + struct ip_set_net *inst = (struct ip_set_net *)cb->data; if (cb->args[2]) { - pr_debug("release set %s\n", nfnl_set(cb->args[1])->name); - ip_set_put_byindex((ip_set_id_t) cb->args[1]); + pr_debug("release set %s\n", nfnl_set(inst, cb->args[1])->name); + __ip_set_put_byindex(inst, (ip_set_id_t) cb->args[1]); } return 0; } @@ -1109,6 +1211,7 @@ dump_init(struct netlink_callback *cb) struct nlattr *attr = (void *)nlh + min_len; u32 dump_type; ip_set_id_t index; + struct ip_set_net *inst = (struct ip_set_net *)cb->data; /* Second pass, so parser can't fail */ nla_parse(cda, IPSET_ATTR_CMD_MAX, @@ -1122,7 +1225,7 @@ dump_init(struct netlink_callback *cb) if (cda[IPSET_ATTR_SETNAME]) { struct ip_set *set; - set = find_set_and_id(nla_data(cda[IPSET_ATTR_SETNAME]), + set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), &index); if (set == NULL) return -ENOENT; @@ -1150,6 +1253,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; u32 dump_type, dump_flags; int ret = 0; + struct ip_set_net *inst = (struct ip_set_net *)cb->data; if (!cb->args[0]) { ret = dump_init(cb); @@ -1163,18 +1267,18 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) } } - if (cb->args[1] >= ip_set_max) + if (cb->args[1] >= inst->ip_set_max) goto out; dump_type = DUMP_TYPE(cb->args[0]); dump_flags = DUMP_FLAGS(cb->args[0]); - max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max; + max = dump_type == DUMP_ONE ? cb->args[1] + 1 : inst->ip_set_max; dump_last: pr_debug("args[0]: %u %u args[1]: %ld\n", dump_type, dump_flags, cb->args[1]); for (; cb->args[1] < max; cb->args[1]++) { index = (ip_set_id_t) cb->args[1]; - set = nfnl_set(index); + set = nfnl_set(inst, index); if (set == NULL) { if (dump_type == DUMP_ONE) { ret = -ENOENT; @@ -1252,8 +1356,8 @@ next_set: release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[2]) { - pr_debug("release set %s\n", nfnl_set(index)->name); - ip_set_put_byindex(index); + pr_debug("release set %s\n", nfnl_set(inst, index)->name); + __ip_set_put_byindex(inst, index); cb->args[2] = 0; } out: @@ -1271,6 +1375,8 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; @@ -1278,6 +1384,7 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, struct netlink_dump_control c = { .dump = ip_set_dump_start, .done = ip_set_dump_done, + .data = (void *)inst }; return netlink_dump_start(ctnl, skb, nlh, &c); } @@ -1356,6 +1463,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; const struct nlattr *nla; @@ -1374,7 +1482,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_LINENO] == NULL)))) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1410,6 +1518,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; const struct nlattr *nla; @@ -1428,7 +1537,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_LINENO] == NULL)))) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1464,6 +1573,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; int ret = 0; @@ -1474,7 +1584,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, !flag_nested(attr[IPSET_ATTR_DATA]))) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1499,6 +1609,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); const struct ip_set *set; struct sk_buff *skb2; struct nlmsghdr *nlh2; @@ -1508,7 +1619,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_SETNAME] == NULL)) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1733,8 +1844,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) unsigned int *op; void *data; int copylen = *len, ret = 0; + struct net *net = sock_net(sk); + struct ip_set_net *inst = ip_set_pernet(net); - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (optval != SO_IP_SET) return -EBADF; @@ -1783,22 +1896,39 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; nfnl_lock(NFNL_SUBSYS_IPSET); - find_set_and_id(req_get->set.name, &id); + find_set_and_id(inst, req_get->set.name, &id); req_get->set.index = id; nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; } + case IP_SET_OP_GET_FNAME: { + struct ip_set_req_get_set_family *req_get = data; + ip_set_id_t id; + + if (*len != sizeof(struct ip_set_req_get_set_family)) { + ret = -EINVAL; + goto done; + } + req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; + nfnl_lock(NFNL_SUBSYS_IPSET); + find_set_and_id(inst, req_get->set.name, &id); + req_get->set.index = id; + if (id != IPSET_INVALID_ID) + req_get->family = nfnl_set(inst, id)->family; + nfnl_unlock(NFNL_SUBSYS_IPSET); + goto copy; + } case IP_SET_OP_GET_BYINDEX: { struct ip_set_req_get_set *req_get = data; struct ip_set *set; if (*len != sizeof(struct ip_set_req_get_set) || - req_get->set.index >= ip_set_max) { + req_get->set.index >= inst->ip_set_max) { ret = -EINVAL; goto done; } nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(req_get->set.index); + set = nfnl_set(inst, req_get->set.index); strncpy(req_get->set.name, set ? set->name : "", IPSET_MAXNAMELEN); nfnl_unlock(NFNL_SUBSYS_IPSET); @@ -1827,49 +1957,82 @@ static struct nf_sockopt_ops so_set __read_mostly = { .owner = THIS_MODULE, }; -static int __init -ip_set_init(void) +static int __net_init +ip_set_net_init(struct net *net) { + struct ip_set_net *inst = ip_set_pernet(net); + struct ip_set **list; - int ret; - if (max_sets) - ip_set_max = max_sets; - if (ip_set_max >= IPSET_INVALID_ID) - ip_set_max = IPSET_INVALID_ID - 1; + inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX; + if (inst->ip_set_max >= IPSET_INVALID_ID) + inst->ip_set_max = IPSET_INVALID_ID - 1; - list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL); + list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL); if (!list) return -ENOMEM; + inst->is_deleted = 0; + rcu_assign_pointer(inst->ip_set_list, list); + pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL); + return 0; +} + +static void __net_exit +ip_set_net_exit(struct net *net) +{ + struct ip_set_net *inst = ip_set_pernet(net); + + struct ip_set *set = NULL; + ip_set_id_t i; + + inst->is_deleted = 1; /* flag for ip_set_nfnl_put */ + + for (i = 0; i < inst->ip_set_max; i++) { + set = nfnl_set(inst, i); + if (set != NULL) + ip_set_destroy_set(inst, i); + } + kfree(rcu_dereference_protected(inst->ip_set_list, 1)); +} + +static struct pernet_operations ip_set_net_ops = { + .init = ip_set_net_init, + .exit = ip_set_net_exit, + .id = &ip_set_net_id, + .size = sizeof(struct ip_set_net) +}; + - rcu_assign_pointer(ip_set_list, list); - ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); +static int __init +ip_set_init(void) +{ + int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); if (ret != 0) { pr_err("ip_set: cannot register with nfnetlink.\n"); - kfree(list); return ret; } ret = nf_register_sockopt(&so_set); if (ret != 0) { pr_err("SO_SET registry failed: %d\n", ret); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - kfree(list); return ret; } - - pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL); + ret = register_pernet_subsys(&ip_set_net_ops); + if (ret) { + pr_err("ip_set: cannot register pernet_subsys.\n"); + nf_unregister_sockopt(&so_set); + nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + return ret; + } return 0; } static void __exit ip_set_fini(void) { - struct ip_set **list = rcu_dereference_protected(ip_set_list, 1); - - /* There can't be any existing set */ + unregister_pernet_subsys(&ip_set_net_ops); nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - kfree(list); pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index dac156f..29fb01d 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -102,9 +102,25 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src, int protocol = iph->protocol; /* See comments at tcp_match in ip_tables.c */ - if (protocol <= 0 || (ntohs(iph->frag_off) & IP_OFFSET)) + if (protocol <= 0) return false; + if (ntohs(iph->frag_off) & IP_OFFSET) + switch (protocol) { + case IPPROTO_TCP: + case IPPROTO_SCTP: + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_ICMP: + /* Port info not available for fragment offset > 0 */ + return false; + default: + /* Other protocols doesn't have ports, + so we can match fragments */ + *proto = protocol; + return true; + } + return get_port(skb, protocol, protooff, src, port, proto); } EXPORT_SYMBOL_GPL(ip_set_get_ip4_port); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 707bc52..6a80dbd 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -15,8 +15,7 @@ #define rcu_dereference_bh(p) rcu_dereference(p) #endif -#define CONCAT(a, b) a##b -#define TOKEN(a, b) CONCAT(a, b) +#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1) /* Hashing which uses arrays to resolve clashing. The hash table is resized * (doubled) when searching becomes too long. @@ -78,10 +77,14 @@ struct htable { #define hbucket(h, i) (&((h)->bucket[i])) +#ifndef IPSET_NET_COUNT +#define IPSET_NET_COUNT 1 +#endif + /* Book-keeping of the prefixes added to the set */ struct net_prefixes { - u8 cidr; /* the different cidr values in the set */ - u32 nets; /* number of elements per cidr */ + u32 nets[IPSET_NET_COUNT]; /* number of elements per cidr */ + u8 cidr[IPSET_NET_COUNT]; /* the different cidr values in the set */ }; /* Compute the hash table size */ @@ -114,23 +117,6 @@ htable_bits(u32 hashsize) return bits; } -/* Destroy the hashtable part of the set */ -static void -ahash_destroy(struct htable *t) -{ - struct hbucket *n; - u32 i; - - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = hbucket(t, i); - if (n->size) - /* FIXME: use slab cache */ - kfree(n->value); - } - - ip_set_free(t); -} - static int hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) { @@ -156,30 +142,30 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) } #ifdef IP_SET_HASH_WITH_NETS +#if IPSET_NET_COUNT > 1 +#define __CIDR(cidr, i) (cidr[i]) +#else +#define __CIDR(cidr, i) (cidr) +#endif #ifdef IP_SET_HASH_WITH_NETS_PACKED /* When cidr is packed with nomatch, cidr - 1 is stored in the entry */ -#define CIDR(cidr) (cidr + 1) +#define CIDR(cidr, i) (__CIDR(cidr, i) + 1) #else -#define CIDR(cidr) (cidr) +#define CIDR(cidr, i) (__CIDR(cidr, i)) #endif #define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) #ifdef IP_SET_HASH_WITH_MULTI -#define NETS_LENGTH(family) (SET_HOST_MASK(family) + 1) +#define NLEN(family) (SET_HOST_MASK(family) + 1) #else -#define NETS_LENGTH(family) SET_HOST_MASK(family) +#define NLEN(family) SET_HOST_MASK(family) #endif #else -#define NETS_LENGTH(family) 0 +#define NLEN(family) 0 #endif /* IP_SET_HASH_WITH_NETS */ -#define ext_timeout(e, h) \ -(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_TIMEOUT]) -#define ext_counter(e, h) \ -(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_COUNTER]) - #endif /* _IP_SET_HASH_GEN_H */ /* Family dependent templates */ @@ -194,6 +180,8 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #undef mtype_data_next #undef mtype_elem +#undef mtype_ahash_destroy +#undef mtype_ext_cleanup #undef mtype_add_cidr #undef mtype_del_cidr #undef mtype_ahash_memsize @@ -220,41 +208,44 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #undef HKEY -#define mtype_data_equal TOKEN(MTYPE, _data_equal) +#define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal) #ifdef IP_SET_HASH_WITH_NETS -#define mtype_do_data_match TOKEN(MTYPE, _do_data_match) +#define mtype_do_data_match IPSET_TOKEN(MTYPE, _do_data_match) #else #define mtype_do_data_match(d) 1 #endif -#define mtype_data_set_flags TOKEN(MTYPE, _data_set_flags) -#define mtype_data_reset_flags TOKEN(MTYPE, _data_reset_flags) -#define mtype_data_netmask TOKEN(MTYPE, _data_netmask) -#define mtype_data_list TOKEN(MTYPE, _data_list) -#define mtype_data_next TOKEN(MTYPE, _data_next) -#define mtype_elem TOKEN(MTYPE, _elem) -#define mtype_add_cidr TOKEN(MTYPE, _add_cidr) -#define mtype_del_cidr TOKEN(MTYPE, _del_cidr) -#define mtype_ahash_memsize TOKEN(MTYPE, _ahash_memsize) -#define mtype_flush TOKEN(MTYPE, _flush) -#define mtype_destroy TOKEN(MTYPE, _destroy) -#define mtype_gc_init TOKEN(MTYPE, _gc_init) -#define mtype_same_set TOKEN(MTYPE, _same_set) -#define mtype_kadt TOKEN(MTYPE, _kadt) -#define mtype_uadt TOKEN(MTYPE, _uadt) +#define mtype_data_set_flags IPSET_TOKEN(MTYPE, _data_set_flags) +#define mtype_data_reset_elem IPSET_TOKEN(MTYPE, _data_reset_elem) +#define mtype_data_reset_flags IPSET_TOKEN(MTYPE, _data_reset_flags) +#define mtype_data_netmask IPSET_TOKEN(MTYPE, _data_netmask) +#define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) +#define mtype_data_next IPSET_TOKEN(MTYPE, _data_next) +#define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy) +#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) +#define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr) +#define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr) +#define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize) +#define mtype_flush IPSET_TOKEN(MTYPE, _flush) +#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) +#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) +#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) +#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) #define mtype MTYPE -#define mtype_elem TOKEN(MTYPE, _elem) -#define mtype_add TOKEN(MTYPE, _add) -#define mtype_del TOKEN(MTYPE, _del) -#define mtype_test_cidrs TOKEN(MTYPE, _test_cidrs) -#define mtype_test TOKEN(MTYPE, _test) -#define mtype_expire TOKEN(MTYPE, _expire) -#define mtype_resize TOKEN(MTYPE, _resize) -#define mtype_head TOKEN(MTYPE, _head) -#define mtype_list TOKEN(MTYPE, _list) -#define mtype_gc TOKEN(MTYPE, _gc) -#define mtype_variant TOKEN(MTYPE, _variant) -#define mtype_data_match TOKEN(MTYPE, _data_match) +#define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_add IPSET_TOKEN(MTYPE, _add) +#define mtype_del IPSET_TOKEN(MTYPE, _del) +#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) +#define mtype_test IPSET_TOKEN(MTYPE, _test) +#define mtype_expire IPSET_TOKEN(MTYPE, _expire) +#define mtype_resize IPSET_TOKEN(MTYPE, _resize) +#define mtype_head IPSET_TOKEN(MTYPE, _head) +#define mtype_list IPSET_TOKEN(MTYPE, _list) +#define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_variant IPSET_TOKEN(MTYPE, _variant) +#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) #ifndef HKEY_DATALEN #define HKEY_DATALEN sizeof(struct mtype_elem) @@ -269,13 +260,10 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) /* The generic hash structure */ struct htype { - struct htable *table; /* the hash table */ + struct htable __rcu *table; /* the hash table */ u32 maxelem; /* max elements in the hash */ u32 elements; /* current element (vs timeout) */ u32 initval; /* random jhash init value */ - u32 timeout; /* timeout value, if enabled */ - size_t dsize; /* data struct size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ struct timer_list gc; /* garbage collection when timeout enabled */ struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_MULTI @@ -297,49 +285,49 @@ struct htype { /* Network cidr size book keeping when the hash stores different * sized networks */ static void -mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length) +mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) { int i, j; /* Add in increasing prefix order, so larger cidr first */ - for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) { + for (i = 0, j = -1; i < nets_length && h->nets[i].nets[n]; i++) { if (j != -1) continue; - else if (h->nets[i].cidr < cidr) + else if (h->nets[i].cidr[n] < cidr) j = i; - else if (h->nets[i].cidr == cidr) { - h->nets[i].nets++; + else if (h->nets[i].cidr[n] == cidr) { + h->nets[i].nets[n]++; return; } } if (j != -1) { for (; i > j; i--) { - h->nets[i].cidr = h->nets[i - 1].cidr; - h->nets[i].nets = h->nets[i - 1].nets; + h->nets[i].cidr[n] = h->nets[i - 1].cidr[n]; + h->nets[i].nets[n] = h->nets[i - 1].nets[n]; } } - h->nets[i].cidr = cidr; - h->nets[i].nets = 1; + h->nets[i].cidr[n] = cidr; + h->nets[i].nets[n] = 1; } static void -mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length) +mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) { u8 i, j, net_end = nets_length - 1; for (i = 0; i < nets_length; i++) { - if (h->nets[i].cidr != cidr) + if (h->nets[i].cidr[n] != cidr) continue; - if (h->nets[i].nets > 1 || i == net_end || - h->nets[i + 1].nets == 0) { - h->nets[i].nets--; + if (h->nets[i].nets[n] > 1 || i == net_end || + h->nets[i + 1].nets[n] == 0) { + h->nets[i].nets[n]--; return; } - for (j = i; j < net_end && h->nets[j].nets; j++) { - h->nets[j].cidr = h->nets[j + 1].cidr; - h->nets[j].nets = h->nets[j + 1].nets; + for (j = i; j < net_end && h->nets[j].nets[n]; j++) { + h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; + h->nets[j].nets[n] = h->nets[j + 1].nets[n]; } - h->nets[j].nets = 0; + h->nets[j].nets[n] = 0; return; } } @@ -347,10 +335,10 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length) /* Calculate the actual memory size of the set data */ static size_t -mtype_ahash_memsize(const struct htype *h, u8 nets_length) +mtype_ahash_memsize(const struct htype *h, const struct htable *t, + u8 nets_length, size_t dsize) { u32 i; - struct htable *t = h->table; size_t memsize = sizeof(*h) + sizeof(*t) #ifdef IP_SET_HASH_WITH_NETS @@ -359,35 +347,70 @@ mtype_ahash_memsize(const struct htype *h, u8 nets_length) + jhash_size(t->htable_bits) * sizeof(struct hbucket); for (i = 0; i < jhash_size(t->htable_bits); i++) - memsize += t->bucket[i].size * h->dsize; + memsize += t->bucket[i].size * dsize; return memsize; } +/* Get the ith element from the array block n */ +#define ahash_data(n, i, dsize) \ + ((struct mtype_elem *)((n)->value + ((i) * (dsize)))) + +static void +mtype_ext_cleanup(struct ip_set *set, struct hbucket *n) +{ + int i; + + for (i = 0; i < n->pos; i++) + ip_set_ext_destroy(set, ahash_data(n, i, set->dsize)); +} + /* Flush a hash type of set: destroy all elements */ static void mtype_flush(struct ip_set *set) { struct htype *h = set->data; - struct htable *t = h->table; + struct htable *t; struct hbucket *n; u32 i; + t = rcu_dereference_bh_nfnl(h->table); for (i = 0; i < jhash_size(t->htable_bits); i++) { n = hbucket(t, i); if (n->size) { + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set, n); n->size = n->pos = 0; /* FIXME: use slab cache */ kfree(n->value); } } #ifdef IP_SET_HASH_WITH_NETS - memset(h->nets, 0, sizeof(struct net_prefixes) - * NETS_LENGTH(set->family)); + memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family)); #endif h->elements = 0; } +/* Destroy the hashtable part of the set */ +static void +mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) +{ + struct hbucket *n; + u32 i; + + for (i = 0; i < jhash_size(t->htable_bits); i++) { + n = hbucket(t, i); + if (n->size) { + if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) + mtype_ext_cleanup(set, n); + /* FIXME: use slab cache */ + kfree(n->value); + } + } + + ip_set_free(t); +} + /* Destroy a hash type of set */ static void mtype_destroy(struct ip_set *set) @@ -397,7 +420,7 @@ mtype_destroy(struct ip_set *set) if (set->extensions & IPSET_EXT_TIMEOUT) del_timer_sync(&h->gc); - ahash_destroy(h->table); + mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table), true); #ifdef IP_SET_HASH_WITH_RBTREE rbtree_destroy(&h->rbtree); #endif @@ -414,10 +437,10 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) init_timer(&h->gc); h->gc.data = (unsigned long) set; h->gc.function = gc; - h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&h->gc); pr_debug("gc initialized, run in every %u\n", - IPSET_GC_PERIOD(h->timeout)); + IPSET_GC_PERIOD(set->timeout)); } static bool @@ -428,37 +451,40 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) /* Resizing changes htable_bits, so we ignore it */ return x->maxelem == y->maxelem && - x->timeout == y->timeout && + a->timeout == b->timeout && #ifdef IP_SET_HASH_WITH_NETMASK x->netmask == y->netmask && #endif a->extensions == b->extensions; } -/* Get the ith element from the array block n */ -#define ahash_data(n, i, dsize) \ - ((struct mtype_elem *)((n)->value + ((i) * (dsize)))) - /* Delete expired elements from the hashtable */ static void -mtype_expire(struct htype *h, u8 nets_length, size_t dsize) +mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) { - struct htable *t = h->table; + struct htable *t; struct hbucket *n; struct mtype_elem *data; u32 i; int j; +#ifdef IP_SET_HASH_WITH_NETS + u8 k; +#endif + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); for (i = 0; i < jhash_size(t->htable_bits); i++) { n = hbucket(t, i); for (j = 0; j < n->pos; j++) { data = ahash_data(n, j, dsize); - if (ip_set_timeout_expired(ext_timeout(data, h))) { + if (ip_set_timeout_expired(ext_timeout(data, set))) { pr_debug("expired %u/%u\n", i, j); #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(data->cidr), - nets_length); + for (k = 0; k < IPSET_NET_COUNT; k++) + mtype_del_cidr(h, CIDR(data->cidr, k), + nets_length, k); #endif + ip_set_ext_destroy(set, data); if (j != n->pos - 1) /* Not last one */ memcpy(data, @@ -481,6 +507,7 @@ mtype_expire(struct htype *h, u8 nets_length, size_t dsize) n->value = tmp; } } + rcu_read_unlock_bh(); } static void @@ -491,10 +518,10 @@ mtype_gc(unsigned long ul_set) pr_debug("called\n"); write_lock_bh(&set->lock); - mtype_expire(h, NETS_LENGTH(set->family), h->dsize); + mtype_expire(set, h, NLEN(set->family), set->dsize); write_unlock_bh(&set->lock); - h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&h->gc); } @@ -505,7 +532,7 @@ static int mtype_resize(struct ip_set *set, bool retried) { struct htype *h = set->data; - struct htable *t, *orig = h->table; + struct htable *t, *orig = rcu_dereference_bh_nfnl(h->table); u8 htable_bits = orig->htable_bits; #ifdef IP_SET_HASH_WITH_NETS u8 flags; @@ -520,8 +547,7 @@ mtype_resize(struct ip_set *set, bool retried) if (SET_WITH_TIMEOUT(set) && !retried) { i = h->elements; write_lock_bh(&set->lock); - mtype_expire(set->data, NETS_LENGTH(set->family), - h->dsize); + mtype_expire(set, set->data, NLEN(set->family), set->dsize); write_unlock_bh(&set->lock); if (h->elements < i) return 0; @@ -548,25 +574,25 @@ retry: for (i = 0; i < jhash_size(orig->htable_bits); i++) { n = hbucket(orig, i); for (j = 0; j < n->pos; j++) { - data = ahash_data(n, j, h->dsize); + data = ahash_data(n, j, set->dsize); #ifdef IP_SET_HASH_WITH_NETS flags = 0; mtype_data_reset_flags(data, &flags); #endif m = hbucket(t, HKEY(data, h->initval, htable_bits)); - ret = hbucket_elem_add(m, AHASH_MAX(h), h->dsize); + ret = hbucket_elem_add(m, AHASH_MAX(h), set->dsize); if (ret < 0) { #ifdef IP_SET_HASH_WITH_NETS mtype_data_reset_flags(data, &flags); #endif read_unlock_bh(&set->lock); - ahash_destroy(t); + mtype_ahash_destroy(set, t, false); if (ret == -EAGAIN) goto retry; return ret; } - d = ahash_data(m, m->pos++, h->dsize); - memcpy(d, data, h->dsize); + d = ahash_data(m, m->pos++, set->dsize); + memcpy(d, data, set->dsize); #ifdef IP_SET_HASH_WITH_NETS mtype_data_reset_flags(d, &flags); #endif @@ -581,7 +607,7 @@ retry: pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); - ahash_destroy(orig); + mtype_ahash_destroy(set, orig, false); return 0; } @@ -604,7 +630,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) /* FIXME: when set is full, we slow down here */ - mtype_expire(h, NETS_LENGTH(set->family), h->dsize); + mtype_expire(set, h, NLEN(set->family), set->dsize); if (h->elements >= h->maxelem) { if (net_ratelimit()) @@ -618,11 +644,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, key = HKEY(value, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (mtype_data_equal(data, d, &multi)) { if (flag_exist || (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)))) { + ip_set_timeout_expired(ext_timeout(data, set)))) { /* Just the extensions could be overwritten */ j = i; goto reuse_slot; @@ -633,30 +659,37 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, } /* Reuse first timed out entry */ if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)) && + ip_set_timeout_expired(ext_timeout(data, set)) && j != AHASH_MAX(h) + 1) j = i; } reuse_slot: if (j != AHASH_MAX(h) + 1) { /* Fill out reused slot */ - data = ahash_data(n, j, h->dsize); + data = ahash_data(n, j, set->dsize); #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family)); - mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); + for (i = 0; i < IPSET_NET_COUNT; i++) { + mtype_del_cidr(h, CIDR(data->cidr, i), + NLEN(set->family), i); + mtype_add_cidr(h, CIDR(d->cidr, i), + NLEN(set->family), i); + } #endif + ip_set_ext_destroy(set, data); } else { /* Use/create a new slot */ TUNE_AHASH_MAX(h, multi); - ret = hbucket_elem_add(n, AHASH_MAX(h), h->dsize); + ret = hbucket_elem_add(n, AHASH_MAX(h), set->dsize); if (ret != 0) { if (ret == -EAGAIN) mtype_data_next(&h->next, d); goto out; } - data = ahash_data(n, n->pos++, h->dsize); + data = ahash_data(n, n->pos++, set->dsize); #ifdef IP_SET_HASH_WITH_NETS - mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); + for (i = 0; i < IPSET_NET_COUNT; i++) + mtype_add_cidr(h, CIDR(d->cidr, i), NLEN(set->family), + i); #endif h->elements++; } @@ -665,9 +698,11 @@ reuse_slot: mtype_data_set_flags(data, flags); #endif if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(data, h), ext->timeout); + ip_set_timeout_set(ext_timeout(data, set), ext->timeout); if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(data, h), ext); + ip_set_init_counter(ext_counter(data, set), ext); + if (SET_WITH_COMMENT(set)) + ip_set_init_comment(ext_comment(data, set), ext); out: rcu_read_unlock_bh(); @@ -682,47 +717,60 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { struct htype *h = set->data; - struct htable *t = h->table; + struct htable *t; const struct mtype_elem *d = value; struct mtype_elem *data; struct hbucket *n; - int i; + int i, ret = -IPSET_ERR_EXIST; +#ifdef IP_SET_HASH_WITH_NETS + u8 j; +#endif u32 key, multi = 0; + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); key = HKEY(value, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (!mtype_data_equal(data, d, &multi)) continue; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h))) - return -IPSET_ERR_EXIST; + ip_set_timeout_expired(ext_timeout(data, set))) + goto out; if (i != n->pos - 1) /* Not last one */ - memcpy(data, ahash_data(n, n->pos - 1, h->dsize), - h->dsize); + memcpy(data, ahash_data(n, n->pos - 1, set->dsize), + set->dsize); n->pos--; h->elements--; #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); + for (j = 0; j < IPSET_NET_COUNT; j++) + mtype_del_cidr(h, CIDR(d->cidr, j), NLEN(set->family), + j); #endif + ip_set_ext_destroy(set, data); if (n->pos + AHASH_INIT_SIZE < n->size) { void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) - * h->dsize, + * set->dsize, GFP_ATOMIC); - if (!tmp) - return 0; + if (!tmp) { + ret = 0; + goto out; + } n->size -= AHASH_INIT_SIZE; - memcpy(tmp, n->value, n->size * h->dsize); + memcpy(tmp, n->value, n->size * set->dsize); kfree(n->value); n->value = tmp; } - return 0; + ret = 0; + goto out; } - return -IPSET_ERR_EXIST; +out: + rcu_read_unlock_bh(); + return ret; } static inline int @@ -730,8 +778,7 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, struct ip_set_ext *mext, struct ip_set *set, u32 flags) { if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(data, - (struct htype *)(set->data)), + ip_set_update_counter(ext_counter(data, set), ext, mext, flags); return mtype_do_data_match(data); } @@ -745,25 +792,38 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, struct ip_set_ext *mext, u32 flags) { struct htype *h = set->data; - struct htable *t = h->table; + struct htable *t = rcu_dereference_bh(h->table); struct hbucket *n; struct mtype_elem *data; +#if IPSET_NET_COUNT == 2 + struct mtype_elem orig = *d; + int i, j = 0, k; +#else int i, j = 0; +#endif u32 key, multi = 0; - u8 nets_length = NETS_LENGTH(set->family); + u8 nets_length = NLEN(set->family); pr_debug("test by nets\n"); - for (; j < nets_length && h->nets[j].nets && !multi; j++) { - mtype_data_netmask(d, h->nets[j].cidr); + for (; j < nets_length && h->nets[j].nets[0] && !multi; j++) { +#if IPSET_NET_COUNT == 2 + mtype_data_reset_elem(d, &orig); + mtype_data_netmask(d, h->nets[j].cidr[0], false); + for (k = 0; k < nets_length && h->nets[k].nets[1] && !multi; + k++) { + mtype_data_netmask(d, h->nets[k].cidr[1], true); +#else + mtype_data_netmask(d, h->nets[j].cidr[0]); +#endif key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (!mtype_data_equal(data, d, &multi)) continue; if (SET_WITH_TIMEOUT(set)) { if (!ip_set_timeout_expired( - ext_timeout(data, h))) + ext_timeout(data, set))) return mtype_data_match(data, ext, mext, set, flags); @@ -774,6 +834,9 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, return mtype_data_match(data, ext, mext, set, flags); } +#if IPSET_NET_COUNT == 2 + } +#endif } return 0; } @@ -785,30 +848,41 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { struct htype *h = set->data; - struct htable *t = h->table; + struct htable *t; struct mtype_elem *d = value; struct hbucket *n; struct mtype_elem *data; - int i; + int i, ret = 0; u32 key, multi = 0; + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); #ifdef IP_SET_HASH_WITH_NETS /* If we test an IP address and not a network address, * try all possible network sizes */ - if (CIDR(d->cidr) == SET_HOST_MASK(set->family)) - return mtype_test_cidrs(set, d, ext, mext, flags); + for (i = 0; i < IPSET_NET_COUNT; i++) + if (CIDR(d->cidr, i) != SET_HOST_MASK(set->family)) + break; + if (i == IPSET_NET_COUNT) { + ret = mtype_test_cidrs(set, d, ext, mext, flags); + goto out; + } #endif key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (mtype_data_equal(data, d, &multi) && !(SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)))) - return mtype_data_match(data, ext, mext, set, flags); + ip_set_timeout_expired(ext_timeout(data, set)))) { + ret = mtype_data_match(data, ext, mext, set, flags); + goto out; + } } - return 0; +out: + rcu_read_unlock_bh(); + return ret; } /* Reply a HEADER request: fill out the header part of the set */ @@ -816,18 +890,18 @@ static int mtype_head(struct ip_set *set, struct sk_buff *skb) { const struct htype *h = set->data; + const struct htable *t; struct nlattr *nested; size_t memsize; - read_lock_bh(&set->lock); - memsize = mtype_ahash_memsize(h, NETS_LENGTH(set->family)); - read_unlock_bh(&set->lock); + t = rcu_dereference_bh_nfnl(h->table); + memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE, - htonl(jhash_size(h->table->htable_bits))) || + htonl(jhash_size(t->htable_bits))) || nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem))) goto nla_put_failure; #ifdef IP_SET_HASH_WITH_NETMASK @@ -836,12 +910,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; #endif if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || - nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || - ((set->extensions & IPSET_EXT_TIMEOUT) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout))) || - ((set->extensions & IPSET_EXT_COUNTER) && - nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, - htonl(IPSET_FLAG_WITH_COUNTERS)))) + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) + goto nla_put_failure; + if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; ipset_nest_end(skb, nested); @@ -856,7 +927,7 @@ mtype_list(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb) { const struct htype *h = set->data; - const struct htable *t = h->table; + const struct htable *t = rcu_dereference_bh_nfnl(h->table); struct nlattr *atd, *nested; const struct hbucket *n; const struct mtype_elem *e; @@ -874,9 +945,9 @@ mtype_list(const struct ip_set *set, n = hbucket(t, cb->args[2]); pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n); for (i = 0; i < n->pos; i++) { - e = ahash_data(n, i, h->dsize); + e = ahash_data(n, i, set->dsize); if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, h))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; pr_debug("list hash %lu hbucket %p i %u, data %p\n", cb->args[2], n, i, e); @@ -890,13 +961,7 @@ mtype_list(const struct ip_set *set, } if (mtype_data_list(skb, e)) goto nla_put_failure; - if (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get( - ext_timeout(e, h))))) - goto nla_put_failure; - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, h))) + if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -909,24 +974,24 @@ mtype_list(const struct ip_set *set, nla_put_failure: nlmsg_trim(skb, incomplete); - ipset_nest_end(skb, atd); if (unlikely(first == cb->args[2])) { pr_warning("Can't list set %s: one bucket does not fit into " "a message. Please report it!\n", set->name); cb->args[2] = 0; return -EMSGSIZE; } + ipset_nest_end(skb, atd); return 0; } static int -TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt); +IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt); static int -TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); +IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); static const struct ip_set_type_variant mtype_variant = { .kadt = mtype_kadt, @@ -946,16 +1011,17 @@ static const struct ip_set_type_variant mtype_variant = { #ifdef IP_SET_EMIT_CREATE static int -TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) +IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, + struct nlattr *tb[], u32 flags) { u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; - u32 cadt_flags = 0; u8 hbits; #ifdef IP_SET_HASH_WITH_NETMASK u8 netmask; #endif size_t hsize; struct HTYPE *h; + struct htable *t; if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) return -IPSET_ERR_INVALID_FAMILY; @@ -1005,7 +1071,7 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) h->netmask = netmask; #endif get_random_bytes(&h->initval, sizeof(h->initval)); - h->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; hbits = htable_bits(hashsize); hsize = htable_size(hbits); @@ -1013,91 +1079,37 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) kfree(h); return -ENOMEM; } - h->table = ip_set_alloc(hsize); - if (!h->table) { + t = ip_set_alloc(hsize); + if (!t) { kfree(h); return -ENOMEM; } - h->table->htable_bits = hbits; + t->htable_bits = hbits; + rcu_assign_pointer(h->table, t); set->data = h; - if (set->family == NFPROTO_IPV4) - set->variant = &TOKEN(HTYPE, 4_variant); - else - set->variant = &TOKEN(HTYPE, 6_variant); - - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - h->timeout = - ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - if (set->family == NFPROTO_IPV4) { - h->dsize = - sizeof(struct TOKEN(HTYPE, 4ct_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 4ct_elem), - timeout); - h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 4ct_elem), - counter); - TOKEN(HTYPE, 4_gc_init)(set, - TOKEN(HTYPE, 4_gc)); - } else { - h->dsize = - sizeof(struct TOKEN(HTYPE, 6ct_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 6ct_elem), - timeout); - h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 6ct_elem), - counter); - TOKEN(HTYPE, 6_gc_init)(set, - TOKEN(HTYPE, 6_gc)); - } - } else { - if (set->family == NFPROTO_IPV4) { - h->dsize = - sizeof(struct TOKEN(HTYPE, 4c_elem)); - h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 4c_elem), - counter); - } else { - h->dsize = - sizeof(struct TOKEN(HTYPE, 6c_elem)); - h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 6c_elem), - counter); - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - if (set->family == NFPROTO_IPV4) { - h->dsize = sizeof(struct TOKEN(HTYPE, 4t_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 4t_elem), - timeout); - TOKEN(HTYPE, 4_gc_init)(set, TOKEN(HTYPE, 4_gc)); - } else { - h->dsize = sizeof(struct TOKEN(HTYPE, 6t_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 6t_elem), - timeout); - TOKEN(HTYPE, 6_gc_init)(set, TOKEN(HTYPE, 6_gc)); - } + if (set->family == NFPROTO_IPV4) { + set->variant = &IPSET_TOKEN(HTYPE, 4_variant); + set->dsize = ip_set_elem_len(set, tb, + sizeof(struct IPSET_TOKEN(HTYPE, 4_elem))); } else { + set->variant = &IPSET_TOKEN(HTYPE, 6_variant); + set->dsize = ip_set_elem_len(set, tb, + sizeof(struct IPSET_TOKEN(HTYPE, 6_elem))); + } + if (tb[IPSET_ATTR_TIMEOUT]) { + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); if (set->family == NFPROTO_IPV4) - h->dsize = sizeof(struct TOKEN(HTYPE, 4_elem)); + IPSET_TOKEN(HTYPE, 4_gc_init)(set, + IPSET_TOKEN(HTYPE, 4_gc)); else - h->dsize = sizeof(struct TOKEN(HTYPE, 6_elem)); + IPSET_TOKEN(HTYPE, 6_gc_init)(set, + IPSET_TOKEN(HTYPE, 6_gc)); } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", - set->name, jhash_size(h->table->htable_bits), - h->table->htable_bits, h->maxelem, set->data, h->table); + set->name, jhash_size(t->htable_bits), + t->htable_bits, h->maxelem, set->data, t); return 0; } diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index c74e6e1..e65fc24 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -23,19 +23,20 @@ #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counters support */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 Counters support */ +#define IPSET_TYPE_REV_MAX 2 /* Comments support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip"); /* Type specific function prefix */ #define HTYPE hash_ip #define IP_SET_HASH_WITH_NETMASK -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ip4_elem { @@ -43,22 +44,6 @@ struct hash_ip4_elem { __be32 ip; }; -struct hash_ip4t_elem { - __be32 ip; - unsigned long timeout; -}; - -struct hash_ip4c_elem { - __be32 ip; - struct ip_set_counter counter; -}; - -struct hash_ip4ct_elem { - __be32 ip; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -99,7 +84,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); __be32 ip; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip); @@ -118,8 +103,8 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip, ip_to, hosts; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, hosts; int ret = 0; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -178,29 +163,13 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ /* Member elements */ struct hash_ip6_elem { union nf_inet_addr ip; }; -struct hash_ip6t_elem { - union nf_inet_addr ip; - unsigned long timeout; -}; - -struct hash_ip6c_elem { - union nf_inet_addr ip; - struct ip_set_counter counter; -}; - -struct hash_ip6ct_elem { - union nf_inet_addr ip; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -253,7 +222,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); hash_ip6_netmask(&e.ip, h->netmask); @@ -270,7 +239,7 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -304,8 +273,8 @@ static struct ip_set_type hash_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -324,6 +293,7 @@ static struct ip_set_type hash_ip_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 7a2d2bd..525a595 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -24,19 +24,20 @@ #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -#define REVISION_MAX 2 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Counters support added */ +#define IPSET_TYPE_REV_MAX 3 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port"); /* Type specific function prefix */ #define HTYPE hash_ipport -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ipport4_elem { @@ -46,31 +47,6 @@ struct hash_ipport4_elem { u8 padding; }; -struct hash_ipport4t_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipport4c_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipport4ct_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -116,10 +92,9 @@ hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -136,8 +111,8 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip, ip_to, p = 0, port, port_to; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip, ip_to = 0, p = 0, port, port_to; bool with_ports = false; int ret; @@ -222,7 +197,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_ipport6_elem { union nf_inet_addr ip; @@ -231,31 +206,6 @@ struct hash_ipport6_elem { u8 padding; }; -struct hash_ipport6t_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipport6c_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipport6ct_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -306,10 +256,9 @@ hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -326,7 +275,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; @@ -396,8 +345,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ipport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -419,6 +368,7 @@ static struct ip_set_type hash_ipport_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 34e8a1a..f563663 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -24,19 +24,20 @@ #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -#define REVISION_MAX 2 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Counters support added */ +#define IPSET_TYPE_REV_MAX 3 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip,port,ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,ip"); /* Type specific function prefix */ #define HTYPE hash_ipportip -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ipportip4_elem { @@ -47,34 +48,6 @@ struct hash_ipportip4_elem { u8 padding; }; -struct hash_ipportip4t_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipportip4c_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipportip4ct_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - static inline bool hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1, const struct hash_ipportip4_elem *ip2, @@ -120,10 +93,9 @@ hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -141,8 +113,8 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip, ip_to, p = 0, port, port_to; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip, ip_to = 0, p = 0, port, port_to; bool with_ports = false; int ret; @@ -231,7 +203,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_ipportip6_elem { union nf_inet_addr ip; @@ -241,34 +213,6 @@ struct hash_ipportip6_elem { u8 padding; }; -struct hash_ipportip6t_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipportip6c_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipportip6ct_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -319,10 +263,9 @@ hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -340,7 +283,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; @@ -414,8 +357,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ipportip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -437,6 +380,7 @@ static struct ip_set_type hash_ipportip_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index f15f3e2..5d87fe8 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -24,15 +24,16 @@ #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -/* 2 Range as input support for IPv4 added */ -/* 3 nomatch flag support added */ -#define REVISION_MAX 4 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Range as input support for IPv4 added */ +/* 3 nomatch flag support added */ +/* 4 Counters support added */ +#define IPSET_TYPE_REV_MAX 5 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,net"); /* Type specific function prefix */ @@ -46,7 +47,7 @@ MODULE_ALIAS("ip_set_hash:ip,port,net"); #define IP_SET_HASH_WITH_PROTO #define IP_SET_HASH_WITH_NETS -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ipportnet4_elem { @@ -58,37 +59,6 @@ struct hash_ipportnet4_elem { u8 proto; }; -struct hash_ipportnet4t_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - unsigned long timeout; -}; - -struct hash_ipportnet4c_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; -}; - -struct hash_ipportnet4ct_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -170,9 +140,9 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -195,9 +165,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip, ip_to, p = 0, port, port_to; - u32 ip2_from, ip2_to, ip2_last, ip2; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, p = 0, port, port_to; + u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; bool with_ports = false; u8 cidr; int ret; @@ -272,7 +242,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (ip > ip_to) swap(ip, ip_to); } else if (tb[IPSET_ATTR_CIDR]) { - u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; @@ -306,9 +276,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], : port; for (; p <= port_to; p++) { e.port = htons(p); - ip2 = retried - && ip == ntohl(h->next.ip) - && p == ntohs(h->next.port) + ip2 = retried && + ip == ntohl(h->next.ip) && + p == ntohs(h->next.port) ? ntohl(h->next.ip2) : ip2_from; while (!after(ip2, ip2_to)) { e.ip2 = htonl(ip2); @@ -328,7 +298,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_ipportnet6_elem { union nf_inet_addr ip; @@ -339,37 +309,6 @@ struct hash_ipportnet6_elem { u8 proto; }; -struct hash_ipportnet6t_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - unsigned long timeout; -}; - -struct hash_ipportnet6c_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; -}; - -struct hash_ipportnet6ct_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -454,9 +393,9 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -479,7 +418,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; u8 cidr; @@ -574,8 +513,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ipportnet_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -600,6 +539,7 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 223e9f5..8295cf4 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -22,21 +22,22 @@ #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> -#define REVISION_MIN 0 -/* 1 Range as input support for IPv4 added */ -/* 2 nomatch flag support added */ -#define REVISION_MAX 3 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 Range as input support for IPv4 added */ +/* 2 nomatch flag support added */ +/* 3 Counters support added */ +#define IPSET_TYPE_REV_MAX 4 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net"); /* Type specific function prefix */ #define HTYPE hash_net #define IP_SET_HASH_WITH_NETS -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_net4_elem { @@ -46,31 +47,6 @@ struct hash_net4_elem { u8 cidr; }; -struct hash_net4t_elem { - __be32 ip; - u16 padding0; - u8 nomatch; - u8 cidr; - unsigned long timeout; -}; - -struct hash_net4c_elem { - __be32 ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; -}; - -struct hash_net4ct_elem { - __be32 ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -143,9 +119,9 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (e.cidr == 0) return -EINVAL; @@ -165,8 +141,8 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip = 0, ip_to, last; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, last; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -228,7 +204,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_net6_elem { union nf_inet_addr ip; @@ -237,31 +213,6 @@ struct hash_net6_elem { u8 cidr; }; -struct hash_net6t_elem { - union nf_inet_addr ip; - u16 padding0; - u8 nomatch; - u8 cidr; - unsigned long timeout; -}; - -struct hash_net6c_elem { - union nf_inet_addr ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; -}; - -struct hash_net6ct_elem { - union nf_inet_addr ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -338,9 +289,9 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (e.cidr == 0) return -EINVAL; @@ -357,10 +308,9 @@ static int hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { .cidr = HOST_MASK }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -406,8 +356,8 @@ static struct ip_set_type hash_net_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_net_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -425,6 +375,7 @@ static struct ip_set_type hash_net_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 7d798d5..3f64a66 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -23,14 +23,15 @@ #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> -#define REVISION_MIN 0 -/* 1 nomatch flag support added */ -/* 2 /0 support added */ -#define REVISION_MAX 3 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 nomatch flag support added */ +/* 2 /0 support added */ +/* 3 Counters support added */ +#define IPSET_TYPE_REV_MAX 4 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("hash:net,iface", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net,iface"); /* Interface name rbtree */ @@ -134,7 +135,7 @@ iface_add(struct rb_root *root, const char **iface) #define STREQ(a, b) (strcmp(a, b) == 0) -/* IPv4 variants */ +/* IPv4 variant */ struct hash_netiface4_elem_hashed { __be32 ip; @@ -144,7 +145,7 @@ struct hash_netiface4_elem_hashed { u8 elem; }; -/* Member elements without timeout */ +/* Member elements */ struct hash_netiface4_elem { __be32 ip; u8 physdev; @@ -154,37 +155,6 @@ struct hash_netiface4_elem { const char *iface; }; -struct hash_netiface4t_elem { - __be32 ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - unsigned long timeout; -}; - -struct hash_netiface4c_elem { - __be32 ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; -}; - -struct hash_netiface4ct_elem { - __be32 ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -265,10 +235,10 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK, + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); int ret; if (e.cidr == 0) @@ -319,8 +289,8 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip = 0, ip_to, last; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, last; char iface[IFNAMSIZ]; int ret; @@ -399,7 +369,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_netiface6_elem_hashed { union nf_inet_addr ip; @@ -418,37 +388,6 @@ struct hash_netiface6_elem { const char *iface; }; -struct hash_netiface6t_elem { - union nf_inet_addr ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - unsigned long timeout; -}; - -struct hash_netiface6c_elem { - union nf_inet_addr ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; -}; - -struct hash_netiface6ct_elem { - union nf_inet_addr ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -534,10 +473,10 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK, + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); int ret; if (e.cidr == 0) @@ -584,7 +523,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); char iface[IFNAMSIZ]; int ret; @@ -645,8 +584,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = { IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_netiface_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -668,6 +607,7 @@ static struct ip_set_type hash_netiface_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c new file mode 100644 index 0000000..4260327 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -0,0 +1,483 @@ +/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * Copyright (C) 2013 Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:net type */ + +#include <linux/jhash.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/skbuff.h> +#include <linux/errno.h> +#include <linux/random.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/netlink.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/ipset/pfxlen.h> +#include <linux/netfilter/ipset/ip_set.h> +#include <linux/netfilter/ipset/ip_set_hash.h> + +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 0 + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>"); +IP_SET_MODULE_DESC("hash:net,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); +MODULE_ALIAS("ip_set_hash:net,net"); + +/* Type specific function prefix */ +#define HTYPE hash_netnet +#define IP_SET_HASH_WITH_NETS +#define IPSET_NET_COUNT 2 + +/* IPv4 variants */ + +/* Member elements */ +struct hash_netnet4_elem { + union { + __be32 ip[2]; + __be64 ipcmp; + }; + u8 nomatch; + union { + u8 cidr[2]; + u16 ccmp; + }; +}; + +/* Common functions */ + +static inline bool +hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1, + const struct hash_netnet4_elem *ip2, + u32 *multi) +{ + return ip1->ipcmp == ip2->ipcmp && + ip2->ccmp == ip2->ccmp; +} + +static inline int +hash_netnet4_do_data_match(const struct hash_netnet4_elem *elem) +{ + return elem->nomatch ? -ENOTEMPTY : 1; +} + +static inline void +hash_netnet4_data_set_flags(struct hash_netnet4_elem *elem, u32 flags) +{ + elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH; +} + +static inline void +hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags) +{ + swap(*flags, elem->nomatch); +} + +static inline void +hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem, + struct hash_netnet4_elem *orig) +{ + elem->ip[1] = orig->ip[1]; +} + +static inline void +hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner) +{ + if (inner) { + elem->ip[1] &= ip_set_netmask(cidr); + elem->cidr[1] = cidr; + } else { + elem->ip[0] &= ip_set_netmask(cidr); + elem->cidr[0] = cidr; + } +} + +static bool +hash_netnet4_data_list(struct sk_buff *skb, + const struct hash_netnet4_elem *data) +{ + u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; + + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip[0]) || + nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip[1]) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_netnet4_data_next(struct hash_netnet4_elem *next, + const struct hash_netnet4_elem *d) +{ + next->ipcmp = d->ipcmp; +} + +#define MTYPE hash_netnet4 +#define PF 4 +#define HOST_MASK 32 +#include "ip_set_hash_gen.h" + +static int +hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_netnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet4_elem e = { + .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK, + .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK, + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + if (adt == IPSET_TEST) + e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; + + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0]); + ip4addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1]); + e.ip[0] &= ip_set_netmask(e.cidr[0]); + e.ip[1] &= ip_set_netmask(e.cidr[1]); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + const struct hash_netnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet4_elem e = { .cidr[0] = HOST_MASK, + .cidr[1] = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, last; + u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2; + u8 cidr, cidr2; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || + ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) { + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!cidr || cidr > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + e.cidr[0] = cidr; + } + + if (tb[IPSET_ATTR_CIDR2]) { + cidr2 = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + if (!cidr2 || cidr2 > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + e.cidr[1] = cidr2; + } + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); + } + + if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] && + tb[IPSET_ATTR_IP2_TO])) { + e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0])); + e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1])); + ret = adtfn(set, &e, &ext, &ext, flags); + return ip_set_enomatch(ret, flags, adt, set) ? -ret : + ip_set_eexist(ret, flags) ? 0 : ret; + } + + ip_to = ip; + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip_to < ip) + swap(ip, ip_to); + if (ip + UINT_MAX == ip_to) + return -IPSET_ERR_HASH_RANGE; + } + + ip2_to = ip2_from; + if (tb[IPSET_ATTR_IP2_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); + if (ret) + return ret; + if (ip2_to < ip2_from) + swap(ip2_from, ip2_to); + if (ip2_from + UINT_MAX == ip2_to) + return -IPSET_ERR_HASH_RANGE; + + } + + if (retried) + ip = ntohl(h->next.ip[0]); + + while (!after(ip, ip_to)) { + e.ip[0] = htonl(ip); + last = ip_set_range_to_cidr(ip, ip_to, &cidr); + e.cidr[0] = cidr; + ip2 = (retried && + ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1]) + : ip2_from; + while (!after(ip2, ip2_to)) { + e.ip[1] = htonl(ip2); + last2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr2); + e.cidr[1] = cidr2; + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + ip2 = last2 + 1; + } + ip = last + 1; + } + return ret; +} + +/* IPv6 variants */ + +struct hash_netnet6_elem { + union nf_inet_addr ip[2]; + u8 nomatch; + union { + u8 cidr[2]; + u16 ccmp; + }; +}; + +/* Common functions */ + +static inline bool +hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1, + const struct hash_netnet6_elem *ip2, + u32 *multi) +{ + return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) && + ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) && + ip1->ccmp == ip2->ccmp; +} + +static inline int +hash_netnet6_do_data_match(const struct hash_netnet6_elem *elem) +{ + return elem->nomatch ? -ENOTEMPTY : 1; +} + +static inline void +hash_netnet6_data_set_flags(struct hash_netnet6_elem *elem, u32 flags) +{ + elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH; +} + +static inline void +hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags) +{ + swap(*flags, elem->nomatch); +} + +static inline void +hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem, + struct hash_netnet6_elem *orig) +{ + elem->ip[1] = orig->ip[1]; +} + +static inline void +hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner) +{ + if (inner) { + ip6_netmask(&elem->ip[1], cidr); + elem->cidr[1] = cidr; + } else { + ip6_netmask(&elem->ip[0], cidr); + elem->cidr[0] = cidr; + } +} + +static bool +hash_netnet6_data_list(struct sk_buff *skb, + const struct hash_netnet6_elem *data) +{ + u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; + + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip[0].in6) || + nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip[1].in6) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_netnet6_data_next(struct hash_netnet4_elem *next, + const struct hash_netnet6_elem *d) +{ +} + +#undef MTYPE +#undef PF +#undef HOST_MASK + +#define MTYPE hash_netnet6 +#define PF 6 +#define HOST_MASK 128 +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h" + +static int +hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_netnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet6_elem e = { + .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK, + .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + if (adt == IPSET_TEST) + e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK; + + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1].in6); + ip6_netmask(&e.ip[0], e.cidr[0]); + ip6_netmask(&e.ip[1], e.cidr[1]); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet6_elem e = { .cidr[0] = HOST_MASK, + .cidr[1] = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) || + ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (tb[IPSET_ATTR_CIDR2]) + e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + + if (!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] || + e.cidr[1] > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + + ip6_netmask(&e.ip[0], e.cidr[0]); + ip6_netmask(&e.ip[1], e.cidr[1]); + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); + } + + ret = adtfn(set, &e, &ext, &ext, flags); + + return ip_set_enomatch(ret, flags, adt, set) ? -ret : + ip_set_eexist(ret, flags) ? 0 : ret; +} + +static struct ip_set_type hash_netnet_type __read_mostly = { + .name = "hash:net,net", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_IP2 | IPSET_TYPE_NOMATCH, + .dimension = IPSET_DIM_TWO, + .family = NFPROTO_UNSPEC, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, + .create = hash_netnet_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, + [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_netnet_init(void) +{ + return ip_set_type_register(&hash_netnet_type); +} + +static void __exit +hash_netnet_fini(void) +{ + ip_set_type_unregister(&hash_netnet_type); +} + +module_init(hash_netnet_init); +module_exit(hash_netnet_fini); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 09d6690..7097fb0 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -23,15 +23,16 @@ #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -/* 2 Range as input support for IPv4 added */ -/* 3 nomatch flag support added */ -#define REVISION_MAX 4 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Range as input support for IPv4 added */ +/* 3 nomatch flag support added */ +/* 4 Counters support added */ +#define IPSET_TYPE_REV_MAX 5 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:net,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net,port"); /* Type specific function prefix */ @@ -45,7 +46,7 @@ MODULE_ALIAS("ip_set_hash:net,port"); */ #define IP_SET_HASH_WITH_NETS_PACKED -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_netport4_elem { @@ -56,34 +57,6 @@ struct hash_netport4_elem { u8 nomatch:1; }; -struct hash_netport4t_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - unsigned long timeout; -}; - -struct hash_netport4c_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; -}; - -struct hash_netport4ct_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -162,9 +135,9 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -186,8 +159,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 port, port_to, p = 0, ip = 0, ip_to, last; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 port, port_to, p = 0, ip = 0, ip_to = 0, last; bool with_ports = false; u8 cidr; int ret; @@ -287,7 +260,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_netport6_elem { union nf_inet_addr ip; @@ -297,34 +270,6 @@ struct hash_netport6_elem { u8 nomatch:1; }; -struct hash_netport6t_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - unsigned long timeout; -}; - -struct hash_netport6c_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; -}; - -struct hash_netport6ct_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -407,9 +352,9 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1, + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -431,7 +376,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; u8 cidr; @@ -518,8 +463,8 @@ static struct ip_set_type hash_netport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_netport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -542,6 +487,7 @@ static struct ip_set_type hash_netport_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c new file mode 100644 index 0000000..363fab9 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -0,0 +1,588 @@ +/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:ip,port,net type */ + +#include <linux/jhash.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/skbuff.h> +#include <linux/errno.h> +#include <linux/random.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/netlink.h> +#include <net/tcp.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/ipset/pfxlen.h> +#include <linux/netfilter/ipset/ip_set.h> +#include <linux/netfilter/ipset/ip_set_getport.h> +#include <linux/netfilter/ipset/ip_set_hash.h> + +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 0 /* Comments support added */ + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>"); +IP_SET_MODULE_DESC("hash:net,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); +MODULE_ALIAS("ip_set_hash:net,port,net"); + +/* Type specific function prefix */ +#define HTYPE hash_netportnet +#define IP_SET_HASH_WITH_PROTO +#define IP_SET_HASH_WITH_NETS +#define IPSET_NET_COUNT 2 + +/* IPv4 variant */ + +/* Member elements */ +struct hash_netportnet4_elem { + union { + __be32 ip[2]; + __be64 ipcmp; + }; + __be16 port; + union { + u8 cidr[2]; + u16 ccmp; + }; + u8 nomatch:1; + u8 proto; +}; + +/* Common functions */ + +static inline bool +hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1, + const struct hash_netportnet4_elem *ip2, + u32 *multi) +{ + return ip1->ipcmp == ip2->ipcmp && + ip1->ccmp == ip2->ccmp && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline int +hash_netportnet4_do_data_match(const struct hash_netportnet4_elem *elem) +{ + return elem->nomatch ? -ENOTEMPTY : 1; +} + +static inline void +hash_netportnet4_data_set_flags(struct hash_netportnet4_elem *elem, u32 flags) +{ + elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags) +{ + swap(*flags, elem->nomatch); +} + +static inline void +hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem, + struct hash_netportnet4_elem *orig) +{ + elem->ip[1] = orig->ip[1]; +} + +static inline void +hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem, + u8 cidr, bool inner) +{ + if (inner) { + elem->ip[1] &= ip_set_netmask(cidr); + elem->cidr[1] = cidr; + } else { + elem->ip[0] &= ip_set_netmask(cidr); + elem->cidr[0] = cidr; + } +} + +static bool +hash_netportnet4_data_list(struct sk_buff *skb, + const struct hash_netportnet4_elem *data) +{ + u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; + + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip[0]) || + nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip[1]) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_netportnet4_data_next(struct hash_netportnet4_elem *next, + const struct hash_netportnet4_elem *d) +{ + next->ipcmp = d->ipcmp; + next->port = d->port; +} + +#define MTYPE hash_netportnet4 +#define PF 4 +#define HOST_MASK 32 +#include "ip_set_hash_gen.h" + +static int +hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_netportnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet4_elem e = { + .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK), + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + if (adt == IPSET_TEST) + e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; + + if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, + &e.port, &e.proto)) + return -EINVAL; + + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0]); + ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip[1]); + e.ip[0] &= ip_set_netmask(e.cidr[0]); + e.ip[1] &= ip_set_netmask(e.cidr[1]); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + const struct hash_netportnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet4_elem e = { .cidr[0] = HOST_MASK, + .cidr[1] = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to; + u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; + bool with_ports = false; + u8 cidr, cidr2; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || + ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) { + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!cidr || cidr > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + e.cidr[0] = cidr; + } + + if (tb[IPSET_ATTR_CIDR2]) { + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + if (!cidr || cidr > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + e.cidr[1] = cidr; + } + + if (tb[IPSET_ATTR_PORT]) + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(e.proto); + + if (e.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + if (!(with_ports || e.proto == IPPROTO_ICMP)) + e.port = 0; + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); + } + + with_ports = with_ports && tb[IPSET_ATTR_PORT_TO]; + if (adt == IPSET_TEST || + !(tb[IPSET_ATTR_IP_TO] || with_ports || tb[IPSET_ATTR_IP2_TO])) { + e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0])); + e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1])); + ret = adtfn(set, &e, &ext, &ext, flags); + return ip_set_enomatch(ret, flags, adt, set) ? -ret : + ip_set_eexist(ret, flags) ? 0 : ret; + } + + ip_to = ip; + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip > ip_to) + swap(ip, ip_to); + if (unlikely(ip + UINT_MAX == ip_to)) + return -IPSET_ERR_HASH_RANGE; + } + + port_to = port = ntohs(e.port); + if (tb[IPSET_ATTR_PORT_TO]) { + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + } + + ip2_to = ip2_from; + if (tb[IPSET_ATTR_IP2_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); + if (ret) + return ret; + if (ip2_from > ip2_to) + swap(ip2_from, ip2_to); + if (unlikely(ip2_from + UINT_MAX == ip2_to)) + return -IPSET_ERR_HASH_RANGE; + } + + if (retried) + ip = ntohl(h->next.ip[0]); + + while (!after(ip, ip_to)) { + e.ip[0] = htonl(ip); + ip_last = ip_set_range_to_cidr(ip, ip_to, &cidr); + e.cidr[0] = cidr; + p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port) + : port; + for (; p <= port_to; p++) { + e.port = htons(p); + ip2 = (retried && ip == ntohl(h->next.ip[0]) && + p == ntohs(h->next.port)) ? ntohl(h->next.ip[1]) + : ip2_from; + while (!after(ip2, ip2_to)) { + e.ip[1] = htonl(ip2); + ip2_last = ip_set_range_to_cidr(ip2, ip2_to, + &cidr2); + e.cidr[1] = cidr2; + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + ip2 = ip2_last + 1; + } + } + ip = ip_last + 1; + } + return ret; +} + +/* IPv6 variant */ + +struct hash_netportnet6_elem { + union nf_inet_addr ip[2]; + __be16 port; + union { + u8 cidr[2]; + u16 ccmp; + }; + u8 nomatch:1; + u8 proto; +}; + +/* Common functions */ + +static inline bool +hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1, + const struct hash_netportnet6_elem *ip2, + u32 *multi) +{ + return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) && + ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) && + ip1->ccmp == ip2->ccmp && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline int +hash_netportnet6_do_data_match(const struct hash_netportnet6_elem *elem) +{ + return elem->nomatch ? -ENOTEMPTY : 1; +} + +static inline void +hash_netportnet6_data_set_flags(struct hash_netportnet6_elem *elem, u32 flags) +{ + elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags) +{ + swap(*flags, elem->nomatch); +} + +static inline void +hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem, + struct hash_netportnet6_elem *orig) +{ + elem->ip[1] = orig->ip[1]; +} + +static inline void +hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem, + u8 cidr, bool inner) +{ + if (inner) { + ip6_netmask(&elem->ip[1], cidr); + elem->cidr[1] = cidr; + } else { + ip6_netmask(&elem->ip[0], cidr); + elem->cidr[0] = cidr; + } +} + +static bool +hash_netportnet6_data_list(struct sk_buff *skb, + const struct hash_netportnet6_elem *data) +{ + u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; + + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip[0].in6) || + nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip[1].in6) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_netportnet6_data_next(struct hash_netportnet4_elem *next, + const struct hash_netportnet6_elem *d) +{ + next->port = d->port; +} + +#undef MTYPE +#undef PF +#undef HOST_MASK + +#define MTYPE hash_netportnet6 +#define PF 6 +#define HOST_MASK 128 +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h" + +static int +hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_netportnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet6_elem e = { + .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK), + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + if (adt == IPSET_TEST) + e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK; + + if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, + &e.port, &e.proto)) + return -EINVAL; + + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip[1].in6); + ip6_netmask(&e.ip[0], e.cidr[0]); + ip6_netmask(&e.ip[1], e.cidr[1]); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + const struct hash_netportnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet6_elem e = { .cidr[0] = HOST_MASK, + .cidr[1] = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 port, port_to; + bool with_ports = false; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) || + ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (tb[IPSET_ATTR_CIDR2]) + e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + + if (unlikely(!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] || + e.cidr[1] > HOST_MASK)) + return -IPSET_ERR_INVALID_CIDR; + + ip6_netmask(&e.ip[0], e.cidr[0]); + ip6_netmask(&e.ip[1], e.cidr[1]); + + if (tb[IPSET_ATTR_PORT]) + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(e.proto); + + if (e.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + if (!(with_ports || e.proto == IPPROTO_ICMPV6)) + e.port = 0; + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); + } + + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { + ret = adtfn(set, &e, &ext, &ext, flags); + return ip_set_enomatch(ret, flags, adt, set) ? -ret : + ip_set_eexist(ret, flags) ? 0 : ret; + } + + port = ntohs(e.port); + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + + if (retried) + port = ntohs(h->next.port); + for (; port <= port_to; port++) { + e.port = htons(port); + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static struct ip_set_type hash_netportnet_type __read_mostly = { + .name = "hash:net,port,net", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2 | + IPSET_TYPE_NOMATCH, + .dimension = IPSET_DIM_THREE, + .family = NFPROTO_UNSPEC, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, + .create = hash_netportnet_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, + [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_netportnet_init(void) +{ + return ip_set_type_register(&hash_netportnet_type); +} + +static void __exit +hash_netportnet_fini(void) +{ + ip_set_type_unregister(&hash_netportnet_type); +} + +module_init(hash_netportnet_init); +module_exit(hash_netportnet_fini); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 979b8c9..ec6f6d1 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -15,12 +15,13 @@ #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_list.h> -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 Counters support added */ +#define IPSET_TYPE_REV_MAX 2 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("list:set", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_list:set"); /* Member elements */ @@ -28,28 +29,6 @@ struct set_elem { ip_set_id_t id; }; -struct sett_elem { - struct { - ip_set_id_t id; - } __attribute__ ((aligned)); - unsigned long timeout; -}; - -struct setc_elem { - struct { - ip_set_id_t id; - } __attribute__ ((aligned)); - struct ip_set_counter counter; -}; - -struct setct_elem { - struct { - ip_set_id_t id; - } __attribute__ ((aligned)); - struct ip_set_counter counter; - unsigned long timeout; -}; - struct set_adt_elem { ip_set_id_t id; ip_set_id_t refid; @@ -58,24 +37,14 @@ struct set_adt_elem { /* Type structure */ struct list_set { - size_t dsize; /* element size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ u32 size; /* size of set list array */ - u32 timeout; /* timeout value */ struct timer_list gc; /* garbage collection */ + struct net *net; /* namespace */ struct set_elem members[0]; /* the set members */ }; -static inline struct set_elem * -list_set_elem(const struct list_set *map, u32 id) -{ - return (struct set_elem *)((void *)map->members + id * map->dsize); -} - -#define ext_timeout(e, m) \ -(unsigned long *)((void *)(e) + (m)->offset[IPSET_OFFSET_TIMEOUT]) -#define ext_counter(e, m) \ -(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_OFFSET_COUNTER]) +#define list_set_elem(set, map, id) \ + (struct set_elem *)((void *)(map)->members + (id) * (set)->dsize) static int list_set_ktest(struct ip_set *set, const struct sk_buff *skb, @@ -92,16 +61,16 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE) opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_test(e->id, skb, par, opt); if (ret > 0) { if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(e, map), + ip_set_update_counter(ext_counter(e, set), ext, &opt->ext, cmdflags); return ret; @@ -121,11 +90,11 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb, int ret; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_add(e->id, skb, par, opt); if (ret == 0) @@ -145,11 +114,11 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb, int ret; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_del(e->id, skb, par, opt); if (ret == 0) @@ -163,8 +132,7 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - struct list_set *map = set->data; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); switch (adt) { case IPSET_TEST: @@ -188,10 +156,10 @@ id_eq(const struct ip_set *set, u32 i, ip_set_id_t id) if (i >= map->size) return 0; - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); return !!(e->id == id && !(SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map)))); + ip_set_timeout_expired(ext_timeout(e, set)))); } static int @@ -199,28 +167,36 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, const struct ip_set_ext *ext) { struct list_set *map = set->data; - struct set_elem *e = list_set_elem(map, i); + struct set_elem *e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { - if (i == map->size - 1) + if (i == map->size - 1) { /* Last element replaced: e.g. add new,before,last */ - ip_set_put_byindex(e->id); - else { - struct set_elem *x = list_set_elem(map, map->size - 1); + ip_set_put_byindex(map->net, e->id); + ip_set_ext_destroy(set, e); + } else { + struct set_elem *x = list_set_elem(set, map, + map->size - 1); /* Last element pushed off */ - if (x->id != IPSET_INVALID_ID) - ip_set_put_byindex(x->id); - memmove(list_set_elem(map, i + 1), e, - map->dsize * (map->size - (i + 1))); + if (x->id != IPSET_INVALID_ID) { + ip_set_put_byindex(map->net, x->id); + ip_set_ext_destroy(set, x); + } + memmove(list_set_elem(set, map, i + 1), e, + set->dsize * (map->size - (i + 1))); + /* Extensions must be initialized to zero */ + memset(e, 0, set->dsize); } } e->id = d->id; if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(e, map), ext->timeout); + ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(e, map), ext); + ip_set_init_counter(ext_counter(e, set), ext); + if (SET_WITH_COMMENT(set)) + ip_set_init_comment(ext_comment(e, set), ext); return 0; } @@ -228,16 +204,17 @@ static int list_set_del(struct ip_set *set, u32 i) { struct list_set *map = set->data; - struct set_elem *e = list_set_elem(map, i); + struct set_elem *e = list_set_elem(set, map, i); - ip_set_put_byindex(e->id); + ip_set_put_byindex(map->net, e->id); + ip_set_ext_destroy(set, e); if (i < map->size - 1) - memmove(e, list_set_elem(map, i + 1), - map->dsize * (map->size - (i + 1))); + memmove(e, list_set_elem(set, map, i + 1), + set->dsize * (map->size - (i + 1))); /* Last element */ - e = list_set_elem(map, map->size - 1); + e = list_set_elem(set, map, map->size - 1); e->id = IPSET_INVALID_ID; return 0; } @@ -247,13 +224,16 @@ set_cleanup_entries(struct ip_set *set) { struct list_set *map = set->data; struct set_elem *e; - u32 i; + u32 i = 0; - for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + while (i < map->size) { + e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) list_set_del(set, i); + /* Check element moved to position i in next loop */ + else + i++; } } @@ -268,11 +248,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, int ret; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; else if (e->id != d->id) continue; @@ -299,14 +279,14 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, bool flag_exist = flags & IPSET_FLAG_EXIST; u32 i, ret = 0; + if (SET_WITH_TIMEOUT(set)) + set_cleanup_entries(set); + /* Check already added element */ for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto insert; - else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) - continue; else if (e->id != d->id) continue; @@ -319,18 +299,22 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* Can't re-add */ return -IPSET_ERR_EXIST; /* Update extensions */ + ip_set_ext_destroy(set, e); + if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(e, map), ext->timeout); + ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(e, map), ext); + ip_set_init_counter(ext_counter(e, set), ext); + if (SET_WITH_COMMENT(set)) + ip_set_init_comment(ext_comment(e, set), ext); /* Set is already added to the list */ - ip_set_put_byindex(d->id); + ip_set_put_byindex(map->net, d->id); return 0; } insert: ret = -IPSET_ERR_LIST_FULL; for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) ret = d->before != 0 ? -IPSET_ERR_REF_EXIST : list_set_add(set, i, d, ext); @@ -355,12 +339,12 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, u32 i; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return d->before != 0 ? -IPSET_ERR_REF_EXIST : -IPSET_ERR_EXIST; else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; else if (e->id != d->id) continue; @@ -386,7 +370,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], struct list_set *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct set_adt_elem e = { .refid = IPSET_INVALID_ID }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); struct ip_set *s; int ret = 0; @@ -403,7 +387,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; - e.id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s); + e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s); if (e.id == IPSET_INVALID_ID) return -IPSET_ERR_NAME; /* "Loop detection" */ @@ -423,7 +407,8 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], } if (tb[IPSET_ATTR_NAMEREF]) { - e.refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]), + e.refid = ip_set_get_byname(map->net, + nla_data(tb[IPSET_ATTR_NAMEREF]), &s); if (e.refid == IPSET_INVALID_ID) { ret = -IPSET_ERR_NAMEREF; @@ -439,9 +424,9 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], finish: if (e.refid != IPSET_INVALID_ID) - ip_set_put_byindex(e.refid); + ip_set_put_byindex(map->net, e.refid); if (adt != IPSET_ADD || ret) - ip_set_put_byindex(e.id); + ip_set_put_byindex(map->net, e.id); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -454,9 +439,10 @@ list_set_flush(struct ip_set *set) u32 i; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { - ip_set_put_byindex(e->id); + ip_set_put_byindex(map->net, e->id); + ip_set_ext_destroy(set, e); e->id = IPSET_INVALID_ID; } } @@ -485,14 +471,11 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || - (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) || - (SET_WITH_COUNTER(set) && - nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, - htonl(IPSET_FLAG_WITH_COUNTERS))) || nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, - htonl(sizeof(*map) + map->size * map->dsize))) + htonl(sizeof(*map) + map->size * set->dsize))) + goto nla_put_failure; + if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; ipset_nest_end(skb, nested); @@ -515,11 +498,11 @@ list_set_list(const struct ip_set *set, return -EMSGSIZE; for (; cb->args[2] < map->size; cb->args[2]++) { i = cb->args[2]; - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto finish; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { @@ -530,15 +513,9 @@ list_set_list(const struct ip_set *set, goto nla_put_failure; } if (nla_put_string(skb, IPSET_ATTR_NAME, - ip_set_name_byindex(e->id))) - goto nla_put_failure; - if (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get( - ext_timeout(e, map))))) + ip_set_name_byindex(map->net, e->id))) goto nla_put_failure; - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, map))) + if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -550,11 +527,11 @@ finish: nla_put_failure: nla_nest_cancel(skb, nested); - ipset_nest_end(skb, atd); if (unlikely(i == first)) { cb->args[2] = 0; return -EMSGSIZE; } + ipset_nest_end(skb, atd); return 0; } @@ -565,7 +542,7 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b) const struct list_set *y = b->data; return x->size == y->size && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -594,7 +571,7 @@ list_set_gc(unsigned long ul_set) set_cleanup_entries(set); write_unlock_bh(&set->lock); - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } @@ -606,43 +583,40 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) init_timer(&map->gc); map->gc.data = (unsigned long) set; map->gc.function = gc; - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } /* Create list:set type of sets */ -static struct list_set * -init_list_set(struct ip_set *set, u32 size, size_t dsize, - unsigned long timeout) +static bool +init_list_set(struct net *net, struct ip_set *set, u32 size) { struct list_set *map; struct set_elem *e; u32 i; - map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL); + map = kzalloc(sizeof(*map) + size * set->dsize, GFP_KERNEL); if (!map) - return NULL; + return false; map->size = size; - map->dsize = dsize; - map->timeout = timeout; + map->net = net; set->data = map; for (i = 0; i < size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); e->id = IPSET_INVALID_ID; } - return map; + return true; } static int -list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[], + u32 flags) { - struct list_set *map; - u32 size = IP_SET_LIST_DEFAULT_SIZE, cadt_flags = 0; - unsigned long timeout = 0; + u32 size = IP_SET_LIST_DEFAULT_SIZE; if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || @@ -654,45 +628,13 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (size < IP_SET_LIST_MIN_SIZE) size = IP_SET_LIST_MIN_SIZE; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (tb[IPSET_ATTR_TIMEOUT]) - timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->variant = &set_variant; - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - map = init_list_set(set, size, - sizeof(struct setct_elem), timeout); - if (!map) - return -ENOMEM; - set->extensions |= IPSET_EXT_TIMEOUT; - map->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct setct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct setct_elem, counter); - list_set_gc_init(set, list_set_gc); - } else { - map = init_list_set(set, size, - sizeof(struct setc_elem), 0); - if (!map) - return -ENOMEM; - map->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct setc_elem, counter); - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - map = init_list_set(set, size, - sizeof(struct sett_elem), timeout); - if (!map) - return -ENOMEM; - set->extensions |= IPSET_EXT_TIMEOUT; - map->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct sett_elem, timeout); + set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem)); + if (!init_list_set(net, set, size)) + return -ENOMEM; + if (tb[IPSET_ATTR_TIMEOUT]) { + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); list_set_gc_init(set, list_set_gc); - } else { - map = init_list_set(set, size, sizeof(struct set_elem), 0); - if (!map) - return -ENOMEM; } return 0; } @@ -703,8 +645,8 @@ static struct ip_set_type list_set_type __read_mostly = { .features = IPSET_TYPE_NAME | IPSET_DUMP_LAST, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = list_set_create, .create_policy = { [IPSET_ATTR_SIZE] = { .type = NLA_U32 }, @@ -721,6 +663,7 @@ static struct ip_set_type list_set_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4f69e83..74fd00c 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -116,6 +116,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { struct ip_vs_cpu_stats *s; + struct ip_vs_service *svc; s = this_cpu_ptr(dest->stats.cpustats); s->ustats.inpkts++; @@ -123,11 +124,14 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->ustats.inbytes += skb->len; u64_stats_update_end(&s->syncp); - s = this_cpu_ptr(dest->svc->stats.cpustats); + rcu_read_lock(); + svc = rcu_dereference(dest->svc); + s = this_cpu_ptr(svc->stats.cpustats); s->ustats.inpkts++; u64_stats_update_begin(&s->syncp); s->ustats.inbytes += skb->len; u64_stats_update_end(&s->syncp); + rcu_read_unlock(); s = this_cpu_ptr(ipvs->tot_stats.cpustats); s->ustats.inpkts++; @@ -146,6 +150,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { struct ip_vs_cpu_stats *s; + struct ip_vs_service *svc; s = this_cpu_ptr(dest->stats.cpustats); s->ustats.outpkts++; @@ -153,11 +158,14 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->ustats.outbytes += skb->len; u64_stats_update_end(&s->syncp); - s = this_cpu_ptr(dest->svc->stats.cpustats); + rcu_read_lock(); + svc = rcu_dereference(dest->svc); + s = this_cpu_ptr(svc->stats.cpustats); s->ustats.outpkts++; u64_stats_update_begin(&s->syncp); s->ustats.outbytes += skb->len; u64_stats_update_end(&s->syncp); + rcu_read_unlock(); s = this_cpu_ptr(ipvs->tot_stats.cpustats); s->ustats.outpkts++; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c8148e4..a3df9bd 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -460,7 +460,7 @@ static inline void __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) { atomic_inc(&svc->refcnt); - dest->svc = svc; + rcu_assign_pointer(dest->svc, svc); } static void ip_vs_service_free(struct ip_vs_service *svc) @@ -470,18 +470,25 @@ static void ip_vs_service_free(struct ip_vs_service *svc) kfree(svc); } -static void -__ip_vs_unbind_svc(struct ip_vs_dest *dest) +static void ip_vs_service_rcu_free(struct rcu_head *head) { - struct ip_vs_service *svc = dest->svc; + struct ip_vs_service *svc; + + svc = container_of(head, struct ip_vs_service, rcu_head); + ip_vs_service_free(svc); +} - dest->svc = NULL; +static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay) +{ if (atomic_dec_and_test(&svc->refcnt)) { IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), ntohs(svc->port)); - ip_vs_service_free(svc); + if (do_delay) + call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); + else + ip_vs_service_free(svc); } } @@ -667,11 +674,6 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); - /* We can not reuse dest while in grace period - * because conns still can use dest->svc - */ - if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) - continue; if (dest->af == svc->af && ip_vs_addr_equal(svc->af, &dest->addr, daddr) && dest->port == dport && @@ -697,8 +699,10 @@ out: static void ip_vs_dest_free(struct ip_vs_dest *dest) { + struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1); + __ip_vs_dst_cache_reset(dest); - __ip_vs_unbind_svc(dest); + __ip_vs_svc_put(svc, false); free_percpu(dest->stats.cpustats); kfree(dest); } @@ -771,6 +775,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest, int add) { struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct ip_vs_service *old_svc; struct ip_vs_scheduler *sched; int conn_flags; @@ -792,13 +797,14 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, atomic_set(&dest->conn_flags, conn_flags); /* bind the service */ - if (!dest->svc) { + old_svc = rcu_dereference_protected(dest->svc, 1); + if (!old_svc) { __ip_vs_bind_svc(dest, svc); } else { - if (dest->svc != svc) { - __ip_vs_unbind_svc(dest); + if (old_svc != svc) { ip_vs_zero_stats(&dest->stats); __ip_vs_bind_svc(dest, svc); + __ip_vs_svc_put(old_svc, true); } } @@ -998,16 +1004,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return 0; } -static void ip_vs_dest_wait_readers(struct rcu_head *head) -{ - struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest, - rcu_head); - - /* End of grace period after unlinking */ - clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); -} - - /* * Delete a destination (must be already unlinked from the service) */ @@ -1023,20 +1019,16 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest, */ ip_vs_rs_unhash(dest); - if (!cleanup) { - set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); - call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers); - } - spin_lock_bh(&ipvs->dest_trash_lock); IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); if (list_empty(&ipvs->dest_trash) && !cleanup) mod_timer(&ipvs->dest_trash_timer, - jiffies + IP_VS_DEST_TRASH_PERIOD); + jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); /* dest lives in trash without reference */ list_add(&dest->t_list, &ipvs->dest_trash); + dest->idle_start = 0; spin_unlock_bh(&ipvs->dest_trash_lock); ip_vs_dest_put(dest); } @@ -1108,24 +1100,30 @@ static void ip_vs_dest_trash_expire(unsigned long data) struct net *net = (struct net *) data; struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_dest *dest, *next; + unsigned long now = jiffies; spin_lock(&ipvs->dest_trash_lock); list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { - /* Skip if dest is in grace period */ - if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) - continue; if (atomic_read(&dest->refcnt) > 0) continue; + if (dest->idle_start) { + if (time_before(now, dest->idle_start + + IP_VS_DEST_TRASH_PERIOD)) + continue; + } else { + dest->idle_start = max(1UL, now); + continue; + } IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", dest->vfwmark, - IP_VS_DBG_ADDR(dest->svc->af, &dest->addr), + IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); list_del(&dest->t_list); ip_vs_dest_free(dest); } if (!list_empty(&ipvs->dest_trash)) mod_timer(&ipvs->dest_trash_timer, - jiffies + IP_VS_DEST_TRASH_PERIOD); + jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); spin_unlock(&ipvs->dest_trash_lock); } @@ -1320,14 +1318,6 @@ out: return ret; } -static void ip_vs_service_rcu_free(struct rcu_head *head) -{ - struct ip_vs_service *svc; - - svc = container_of(head, struct ip_vs_service, rcu_head); - ip_vs_service_free(svc); -} - /* * Delete a service from the service list * - The service must be unlinked, unlocked and not referenced! @@ -1376,13 +1366,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) /* * Free the service if nobody refers to it */ - if (atomic_dec_and_test(&svc->refcnt)) { - IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", - svc->fwmark, - IP_VS_DBG_ADDR(svc->af, &svc->addr), - ntohs(svc->port)); - call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); - } + __ip_vs_svc_put(svc, true); /* decrease the module use count */ ip_vs_use_count_dec(); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 6bee6d0..1425e9a 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -59,12 +59,13 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, struct ip_vs_cpu_stats __percpu *stats) { int i; + bool add = false; for_each_possible_cpu(i) { struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); unsigned int start; __u64 inbytes, outbytes; - if (i) { + if (add) { sum->conns += s->ustats.conns; sum->inpkts += s->ustats.inpkts; sum->outpkts += s->ustats.outpkts; @@ -76,6 +77,7 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, sum->inbytes += inbytes; sum->outbytes += outbytes; } else { + add = true; sum->conns = s->ustats.conns; sum->inpkts = s->ustats.inpkts; sum->outpkts = s->ustats.outpkts; diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 1383b0e..eff13c9 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -93,7 +93,7 @@ struct ip_vs_lblc_entry { struct hlist_node list; int af; /* address family */ union nf_inet_addr addr; /* destination IP address */ - struct ip_vs_dest __rcu *dest; /* real server (cache) */ + struct ip_vs_dest *dest; /* real server (cache) */ unsigned long lastuse; /* last used time */ struct rcu_head rcu_head; }; @@ -130,20 +130,21 @@ static struct ctl_table vs_vars_table[] = { }; #endif -static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) +static void ip_vs_lblc_rcu_free(struct rcu_head *head) { - struct ip_vs_dest *dest; + struct ip_vs_lblc_entry *en = container_of(head, + struct ip_vs_lblc_entry, + rcu_head); - hlist_del_rcu(&en->list); - /* - * We don't kfree dest because it is referred either by its service - * or the trash dest list. - */ - dest = rcu_dereference_protected(en->dest, 1); - ip_vs_dest_put(dest); - kfree_rcu(en, rcu_head); + ip_vs_dest_put(en->dest); + kfree(en); } +static inline void ip_vs_lblc_del(struct ip_vs_lblc_entry *en) +{ + hlist_del_rcu(&en->list); + call_rcu(&en->rcu_head, ip_vs_lblc_rcu_free); +} /* * Returns hash value for IPVS LBLC entry @@ -203,30 +204,23 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, struct ip_vs_lblc_entry *en; en = ip_vs_lblc_get(dest->af, tbl, daddr); - if (!en) { - en = kmalloc(sizeof(*en), GFP_ATOMIC); - if (!en) - return NULL; - - en->af = dest->af; - ip_vs_addr_copy(dest->af, &en->addr, daddr); - en->lastuse = jiffies; + if (en) { + if (en->dest == dest) + return en; + ip_vs_lblc_del(en); + } + en = kmalloc(sizeof(*en), GFP_ATOMIC); + if (!en) + return NULL; - ip_vs_dest_hold(dest); - RCU_INIT_POINTER(en->dest, dest); + en->af = dest->af; + ip_vs_addr_copy(dest->af, &en->addr, daddr); + en->lastuse = jiffies; - ip_vs_lblc_hash(tbl, en); - } else { - struct ip_vs_dest *old_dest; + ip_vs_dest_hold(dest); + en->dest = dest; - old_dest = rcu_dereference_protected(en->dest, 1); - if (old_dest != dest) { - ip_vs_dest_put(old_dest); - ip_vs_dest_hold(dest); - /* No ordering constraints for refcnt */ - RCU_INIT_POINTER(en->dest, dest); - } - } + ip_vs_lblc_hash(tbl, en); return en; } @@ -246,7 +240,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc) tbl->dead = 1; for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { - ip_vs_lblc_free(en); + ip_vs_lblc_del(en); atomic_dec(&tbl->entries); } } @@ -281,7 +275,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) sysctl_lblc_expiration(svc))) continue; - ip_vs_lblc_free(en); + ip_vs_lblc_del(en); atomic_dec(&tbl->entries); } spin_unlock(&svc->sched_lock); @@ -335,7 +329,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) continue; - ip_vs_lblc_free(en); + ip_vs_lblc_del(en); atomic_dec(&tbl->entries); goal--; } @@ -443,8 +437,8 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc) continue; doh = ip_vs_dest_conn_overhead(dest); - if (loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight)) { + if ((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight)) { least = dest; loh = doh; } @@ -511,7 +505,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, * free up entries from the trash at any time. */ - dest = rcu_dereference(en->dest); + dest = en->dest; if ((dest->flags & IP_VS_DEST_F_AVAILABLE) && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) goto out; @@ -631,7 +625,7 @@ static void __exit ip_vs_lblc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); unregister_pernet_subsys(&ip_vs_lblc_ops); - synchronize_rcu(); + rcu_barrier(); } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 5199448..0b85500 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -89,7 +89,7 @@ */ struct ip_vs_dest_set_elem { struct list_head list; /* list link */ - struct ip_vs_dest __rcu *dest; /* destination server */ + struct ip_vs_dest *dest; /* destination server */ struct rcu_head rcu_head; }; @@ -107,11 +107,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, if (check) { list_for_each_entry(e, &set->list, list) { - struct ip_vs_dest *d; - - d = rcu_dereference_protected(e->dest, 1); - if (d == dest) - /* already existed */ + if (e->dest == dest) return; } } @@ -121,7 +117,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, return; ip_vs_dest_hold(dest); - RCU_INIT_POINTER(e->dest, dest); + e->dest = dest; list_add_rcu(&e->list, &set->list); atomic_inc(&set->size); @@ -129,22 +125,27 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, set->lastmod = jiffies; } +static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head) +{ + struct ip_vs_dest_set_elem *e; + + e = container_of(head, struct ip_vs_dest_set_elem, rcu_head); + ip_vs_dest_put(e->dest); + kfree(e); +} + static void ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) { struct ip_vs_dest_set_elem *e; list_for_each_entry(e, &set->list, list) { - struct ip_vs_dest *d; - - d = rcu_dereference_protected(e->dest, 1); - if (d == dest) { + if (e->dest == dest) { /* HIT */ atomic_dec(&set->size); set->lastmod = jiffies; - ip_vs_dest_put(dest); list_del_rcu(&e->list); - kfree_rcu(e, rcu_head); + call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free); break; } } @@ -155,16 +156,8 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) struct ip_vs_dest_set_elem *e, *ep; list_for_each_entry_safe(e, ep, &set->list, list) { - struct ip_vs_dest *d; - - d = rcu_dereference_protected(e->dest, 1); - /* - * We don't kfree dest because it is referred either - * by its service or by the trash dest list. - */ - ip_vs_dest_put(d); list_del_rcu(&e->list); - kfree_rcu(e, rcu_head); + call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free); } } @@ -175,12 +168,9 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) struct ip_vs_dest *dest, *least; int loh, doh; - if (set == NULL) - return NULL; - /* select the first destination server, whose weight > 0 */ list_for_each_entry_rcu(e, &set->list, list) { - least = rcu_dereference(e->dest); + least = e->dest; if (least->flags & IP_VS_DEST_F_OVERLOAD) continue; @@ -195,13 +185,13 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) /* find the destination with the weighted least load */ nextstage: list_for_each_entry_continue_rcu(e, &set->list, list) { - dest = rcu_dereference(e->dest); + dest = e->dest; if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; doh = ip_vs_dest_conn_overhead(dest); - if ((loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight)) + if (((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight)) && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { least = dest; loh = doh; @@ -232,7 +222,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) /* select the first destination server, whose weight > 0 */ list_for_each_entry(e, &set->list, list) { - most = rcu_dereference_protected(e->dest, 1); + most = e->dest; if (atomic_read(&most->weight) > 0) { moh = ip_vs_dest_conn_overhead(most); goto nextstage; @@ -243,11 +233,11 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) /* find the destination with the weighted most load */ nextstage: list_for_each_entry_continue(e, &set->list, list) { - dest = rcu_dereference_protected(e->dest, 1); + dest = e->dest; doh = ip_vs_dest_conn_overhead(dest); /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ - if ((moh * atomic_read(&dest->weight) < - doh * atomic_read(&most->weight)) + if (((__s64)moh * atomic_read(&dest->weight) < + (__s64)doh * atomic_read(&most->weight)) && (atomic_read(&dest->weight) > 0)) { most = dest; moh = doh; @@ -611,8 +601,8 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc) continue; doh = ip_vs_dest_conn_overhead(dest); - if (loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight)) { + if ((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight)) { least = dest; loh = doh; } @@ -819,7 +809,7 @@ static void __exit ip_vs_lblcr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); unregister_pernet_subsys(&ip_vs_lblcr_ops); - synchronize_rcu(); + rcu_barrier(); } diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index d8d9860..961a6de 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -40,7 +40,7 @@ #include <net/ip_vs.h> -static inline unsigned int +static inline int ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) { /* @@ -59,7 +59,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least = NULL; - unsigned int loh = 0, doh; + int loh = 0, doh; IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); @@ -92,8 +92,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, } if (!least || - (loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight))) { + ((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight))) { least = dest; loh = doh; } diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index a5284cc..e446b9f 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -44,7 +44,7 @@ #include <net/ip_vs.h> -static inline unsigned int +static inline int ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) { /* @@ -63,7 +63,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least; - unsigned int loh, doh; + int loh, doh; IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); @@ -99,8 +99,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; doh = ip_vs_sed_dest_overhead(dest); - if (loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight)) { + if ((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight)) { least = dest; loh = doh; } diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index 6dc1fa1..b5b4650 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -35,7 +35,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least; - unsigned int loh, doh; + int loh, doh; IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n"); @@ -71,8 +71,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; doh = ip_vs_dest_conn_overhead(dest); - if (loh * atomic_read(&dest->weight) > - doh * atomic_read(&least->weight)) { + if ((__s64)loh * atomic_read(&dest->weight) > + (__s64)doh * atomic_read(&least->weight)) { least = dest; loh = doh; } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index e0c4373..466410e 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -52,66 +52,8 @@ module_param(sip_direct_media, int, 0600); MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling " "endpoints only (default 1)"); -unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, const char **dptr, - unsigned int *datalen) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sip_hook); - -void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, unsigned int protoff, - s16 off) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook); - -unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - struct nf_conntrack_expect *exp, - unsigned int matchoff, - unsigned int matchlen) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook); - -unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int sdpoff, - enum sdp_header_types type, - enum sdp_header_types term, - const union nf_inet_addr *addr) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook); - -unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int matchoff, - unsigned int matchlen, - u_int16_t port) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook); - -unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int sdpoff, - const union nf_inet_addr *addr) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook); - -unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - struct nf_conntrack_expect *rtp_exp, - struct nf_conntrack_expect *rtcp_exp, - unsigned int mediaoff, - unsigned int medialen, - union nf_inet_addr *rtp_addr) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_media_hook); +const struct nf_nat_sip_hooks *nf_nat_sip_hooks; +EXPORT_SYMBOL_GPL(nf_nat_sip_hooks); static int string_len(const struct nf_conn *ct, const char *dptr, const char *limit, int *shift) @@ -914,8 +856,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, int direct_rtp = 0, skip_expect = 0, ret = NF_DROP; u_int16_t base_port; __be16 rtp_port, rtcp_port; - typeof(nf_nat_sdp_port_hook) nf_nat_sdp_port; - typeof(nf_nat_sdp_media_hook) nf_nat_sdp_media; + const struct nf_nat_sip_hooks *hooks; saddr = NULL; if (sip_direct_media) { @@ -966,22 +907,23 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, #endif skip_expect = 1; } while (!skip_expect); - rcu_read_unlock(); base_port = ntohs(tuple.dst.u.udp.port) & ~1; rtp_port = htons(base_port); rtcp_port = htons(base_port + 1); if (direct_rtp) { - nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook); - if (nf_nat_sdp_port && - !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen, + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && + !hooks->sdp_port(skb, protoff, dataoff, dptr, datalen, mediaoff, medialen, ntohs(rtp_port))) goto err1; } - if (skip_expect) + if (skip_expect) { + rcu_read_unlock(); return NF_ACCEPT; + } rtp_exp = nf_ct_expect_alloc(ct); if (rtp_exp == NULL) @@ -995,10 +937,10 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, nf_ct_expect_init(rtcp_exp, class, nf_ct_l3num(ct), saddr, daddr, IPPROTO_UDP, NULL, &rtcp_port); - nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook); - if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp) - ret = nf_nat_sdp_media(skb, protoff, dataoff, dptr, datalen, - rtp_exp, rtcp_exp, + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && ct->status & IPS_NAT_MASK && !direct_rtp) + ret = hooks->sdp_media(skb, protoff, dataoff, dptr, + datalen, rtp_exp, rtcp_exp, mediaoff, medialen, daddr); else { if (nf_ct_expect_related(rtp_exp) == 0) { @@ -1012,6 +954,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, err2: nf_ct_expect_put(rtp_exp); err1: + rcu_read_unlock(); return ret; } @@ -1051,13 +994,12 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, unsigned int caddr_len, maddr_len; unsigned int i; union nf_inet_addr caddr, maddr, rtp_addr; + const struct nf_nat_sip_hooks *hooks; unsigned int port; const struct sdp_media_type *t; int ret = NF_ACCEPT; - typeof(nf_nat_sdp_addr_hook) nf_nat_sdp_addr; - typeof(nf_nat_sdp_session_hook) nf_nat_sdp_session; - nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook); + hooks = rcu_dereference(nf_nat_sip_hooks); /* Find beginning of session description */ if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen, @@ -1125,10 +1067,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, } /* Update media connection address if present */ - if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) { - ret = nf_nat_sdp_addr(skb, protoff, dataoff, + if (maddr_len && hooks && ct->status & IPS_NAT_MASK) { + ret = hooks->sdp_addr(skb, protoff, dataoff, dptr, datalen, mediaoff, - SDP_HDR_CONNECTION, SDP_HDR_MEDIA, + SDP_HDR_CONNECTION, + SDP_HDR_MEDIA, &rtp_addr); if (ret != NF_ACCEPT) { nf_ct_helper_log(skb, ct, "cannot mangle SDP"); @@ -1139,10 +1082,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, } /* Update session connection and owner addresses */ - nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook); - if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp_session(skb, protoff, dataoff, - dptr, datalen, sdpoff, &rtp_addr); + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && ct->status & IPS_NAT_MASK) + ret = hooks->sdp_session(skb, protoff, dataoff, + dptr, datalen, sdpoff, + &rtp_addr); return ret; } @@ -1242,11 +1186,11 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, unsigned int matchoff, matchlen; struct nf_conntrack_expect *exp; union nf_inet_addr *saddr, daddr; + const struct nf_nat_sip_hooks *hooks; __be16 port; u8 proto; unsigned int expires = 0; int ret; - typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect; /* Expected connections can not register again. */ if (ct->status & IPS_EXPECTED) @@ -1309,10 +1253,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, exp->helper = nfct_help(ct)->helper; exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; - nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook); - if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK) - ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen, - exp, matchoff, matchlen); + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && ct->status & IPS_NAT_MASK) + ret = hooks->expect(skb, protoff, dataoff, dptr, datalen, + exp, matchoff, matchlen); else { if (nf_ct_expect_related(exp) != 0) { nf_ct_helper_log(skb, ct, "cannot add expectation"); @@ -1515,7 +1459,7 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, unsigned int protoff, unsigned int dataoff, const char **dptr, unsigned int *datalen) { - typeof(nf_nat_sip_hook) nf_nat_sip; + const struct nf_nat_sip_hooks *hooks; int ret; if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) @@ -1524,9 +1468,9 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, ret = process_sip_response(skb, protoff, dataoff, dptr, datalen); if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { - nf_nat_sip = rcu_dereference(nf_nat_sip_hook); - if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff, - dptr, datalen)) { + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && !hooks->msg(skb, protoff, dataoff, + dptr, datalen)) { nf_ct_helper_log(skb, ct, "cannot NAT SIP message"); ret = NF_DROP; } @@ -1546,7 +1490,6 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, s16 diff, tdiff = 0; int ret = NF_ACCEPT; bool term; - typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) @@ -1610,9 +1553,11 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, } if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { - nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook); - if (nf_nat_sip_seq_adjust) - nf_nat_sip_seq_adjust(skb, protoff, tdiff); + const struct nf_nat_sip_hooks *hooks; + + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks) + hooks->seq_adjust(skb, protoff, tdiff); } return ret; diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index f979040..b4d691d 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -625,33 +625,26 @@ static struct nf_ct_helper_expectfn sip_nat = { static void __exit nf_nat_sip_fini(void) { - RCU_INIT_POINTER(nf_nat_sip_hook, NULL); - RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL); - RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL); + RCU_INIT_POINTER(nf_nat_sip_hooks, NULL); + nf_ct_helper_expectfn_unregister(&sip_nat); synchronize_rcu(); } +static const struct nf_nat_sip_hooks sip_hooks = { + .msg = nf_nat_sip, + .seq_adjust = nf_nat_sip_seq_adjust, + .expect = nf_nat_sip_expect, + .sdp_addr = nf_nat_sdp_addr, + .sdp_port = nf_nat_sdp_port, + .sdp_session = nf_nat_sdp_session, + .sdp_media = nf_nat_sdp_media, +}; + static int __init nf_nat_sip_init(void) { - BUG_ON(nf_nat_sip_hook != NULL); - BUG_ON(nf_nat_sip_seq_adjust_hook != NULL); - BUG_ON(nf_nat_sip_expect_hook != NULL); - BUG_ON(nf_nat_sdp_addr_hook != NULL); - BUG_ON(nf_nat_sdp_port_hook != NULL); - BUG_ON(nf_nat_sdp_session_hook != NULL); - BUG_ON(nf_nat_sdp_media_hook != NULL); - RCU_INIT_POINTER(nf_nat_sip_hook, nf_nat_sip); - RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, nf_nat_sip_seq_adjust); - RCU_INIT_POINTER(nf_nat_sip_expect_hook, nf_nat_sip_expect); - RCU_INIT_POINTER(nf_nat_sdp_addr_hook, nf_nat_sdp_addr); - RCU_INIT_POINTER(nf_nat_sdp_port_hook, nf_nat_sdp_port); - RCU_INIT_POINTER(nf_nat_sdp_session_hook, nf_nat_sdp_session); - RCU_INIT_POINTER(nf_nat_sdp_media_hook, nf_nat_sdp_media); + BUG_ON(nf_nat_sip_hooks != NULL); + RCU_INIT_POINTER(nf_nat_sip_hooks, &sip_hooks); nf_ct_helper_expectfn_register(&sip_nat); return 0; } diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 6fd967c..cdf4567 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -24,7 +24,7 @@ int synproxy_net_id; EXPORT_SYMBOL_GPL(synproxy_net_id); -void +bool synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, const struct tcphdr *th, struct synproxy_options *opts) { @@ -32,7 +32,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, u8 buf[40], *ptr; ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf); - BUG_ON(ptr == NULL); + if (ptr == NULL) + return false; opts->options = 0; while (length > 0) { @@ -41,16 +42,16 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, switch (opcode) { case TCPOPT_EOL: - return; + return true; case TCPOPT_NOP: length--; continue; default: opsize = *ptr++; if (opsize < 2) - return; + return true; if (opsize > length) - return; + return true; switch (opcode) { case TCPOPT_MSS: @@ -84,6 +85,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, length -= opsize; } } + return true; } EXPORT_SYMBOL_GPL(synproxy_parse_options); diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 5058049..476accd 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -49,10 +49,8 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { }; static int -ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, - struct nf_conntrack_l4proto *l4proto, - struct net *net, - const struct nlattr *attr) +ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto, + struct net *net, const struct nlattr *attr) { int ret = 0; @@ -64,8 +62,7 @@ ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, if (ret < 0) return ret; - ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, - &timeout->data); + ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts); } return ret; } @@ -123,7 +120,8 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, goto err_proto_put; } - ret = ctnl_timeout_parse_policy(matching, l4proto, net, + ret = ctnl_timeout_parse_policy(&matching->data, + l4proto, net, cda[CTA_TIMEOUT_DATA]); return ret; } @@ -138,7 +136,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, goto err_proto_put; } - ret = ctnl_timeout_parse_policy(timeout, l4proto, net, + ret = ctnl_timeout_parse_policy(&timeout->data, l4proto, net, cda[CTA_TIMEOUT_DATA]); if (ret < 0) goto err; @@ -342,6 +340,147 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, return ret; } +static int +cttimeout_default_set(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + __u16 l3num; + __u8 l4num; + struct nf_conntrack_l4proto *l4proto; + struct net *net = sock_net(skb->sk); + unsigned int *timeouts; + int ret; + + if (!cda[CTA_TIMEOUT_L3PROTO] || + !cda[CTA_TIMEOUT_L4PROTO] || + !cda[CTA_TIMEOUT_DATA]) + return -EINVAL; + + l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); + l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); + l4proto = nf_ct_l4proto_find_get(l3num, l4num); + + /* This protocol is not supported, skip. */ + if (l4proto->l4proto != l4num) { + ret = -EOPNOTSUPP; + goto err; + } + + timeouts = l4proto->get_timeouts(net); + + ret = ctnl_timeout_parse_policy(timeouts, l4proto, net, + cda[CTA_TIMEOUT_DATA]); + if (ret < 0) + goto err; + + nf_ct_l4proto_put(l4proto); + return 0; +err: + nf_ct_l4proto_put(l4proto); + return ret; +} + +static int +cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, + u32 seq, u32 type, int event, + struct nf_conntrack_l4proto *l4proto) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = portid ? NLM_F_MULTI : 0; + + event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) || + nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto)) + goto nla_put_failure; + + if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { + struct nlattr *nest_parms; + unsigned int *timeouts = l4proto->get_timeouts(net); + int ret; + + nest_parms = nla_nest_start(skb, + CTA_TIMEOUT_DATA | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + + ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts); + if (ret < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest_parms); + } + + nlmsg_end(skb, nlh); + return skb->len; + +nlmsg_failure: +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int cttimeout_default_get(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + __u16 l3num; + __u8 l4num; + struct nf_conntrack_l4proto *l4proto; + struct net *net = sock_net(skb->sk); + struct sk_buff *skb2; + int ret, err; + + if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO]) + return -EINVAL; + + l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); + l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); + l4proto = nf_ct_l4proto_find_get(l3num, l4num); + + /* This protocol is not supported, skip. */ + if (l4proto->l4proto != l4num) { + err = -EOPNOTSUPP; + goto err; + } + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + err = -ENOMEM; + goto err; + } + + ret = cttimeout_default_fill_info(net, skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + IPCTNL_MSG_TIMEOUT_DEFAULT_SET, + l4proto); + if (ret <= 0) { + kfree_skb(skb2); + err = -ENOMEM; + goto err; + } + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); + if (ret > 0) + ret = 0; + + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; +err: + nf_ct_l4proto_put(l4proto); + return err; +} + #ifdef CONFIG_NF_CONNTRACK_TIMEOUT static struct ctnl_timeout *ctnl_timeout_find_get(const char *name) { @@ -384,6 +523,12 @@ static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = { [IPCTNL_MSG_TIMEOUT_DELETE] = { .call = cttimeout_del_timeout, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, + [IPCTNL_MSG_TIMEOUT_DEFAULT_SET]= { .call = cttimeout_default_set, + .attr_count = CTA_TIMEOUT_MAX, + .policy = cttimeout_nla_policy }, + [IPCTNL_MSG_TIMEOUT_DEFAULT_GET]= { .call = cttimeout_default_get, + .attr_count = CTA_TIMEOUT_MAX, + .policy = cttimeout_nla_policy }, }; static const struct nfnetlink_subsystem cttimeout_subsys = { diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index d92cc31..3c4b69e 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -319,7 +319,8 @@ nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags) } static struct sk_buff * -nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size) +nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, + unsigned int pkt_size) { struct sk_buff *skb; unsigned int n; @@ -328,13 +329,13 @@ nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size) * message. WARNING: has to be <= 128k due to slab restrictions */ n = max(inst_size, pkt_size); - skb = nfnetlink_alloc_skb(&init_net, n, peer_portid, GFP_ATOMIC); + skb = nfnetlink_alloc_skb(net, n, peer_portid, GFP_ATOMIC); if (!skb) { if (n > pkt_size) { /* try to allocate only as much as we need for current * packet */ - skb = nfnetlink_alloc_skb(&init_net, pkt_size, + skb = nfnetlink_alloc_skb(net, pkt_size, peer_portid, GFP_ATOMIC); if (!skb) pr_err("nfnetlink_log: can't even alloc %u bytes\n", @@ -702,8 +703,8 @@ nfulnl_log_packet(struct net *net, } if (!inst->skb) { - inst->skb = nfulnl_alloc_skb(inst->peer_portid, inst->nlbufsiz, - size); + inst->skb = nfulnl_alloc_skb(net, inst->peer_portid, + inst->nlbufsiz, size); if (!inst->skb) goto alloc_failure; } diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index ae2e5c1..21258cf 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -298,7 +298,7 @@ nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, } static struct sk_buff * -nfqnl_build_packet_message(struct nfqnl_instance *queue, +nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry, __be32 **packet_id_ptr) { @@ -372,7 +372,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (queue->flags & NFQA_CFG_F_CONNTRACK) ct = nfqnl_ct_get(entskb, &size, &ctinfo); - skb = nfnetlink_alloc_skb(&init_net, size, queue->peer_portid, + skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, GFP_ATOMIC); if (!skb) return NULL; @@ -525,7 +525,7 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, __be32 *packet_id_ptr; int failopen = 0; - nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); + nskb = nfqnl_build_packet_message(net, queue, entry, &packet_id_ptr); if (nskb == NULL) { err = -ENOMEM; goto err_out; diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index cd24290..e762de5 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -43,10 +43,42 @@ optlen(const u_int8_t *opt, unsigned int offset) return opt[offset+1]; } +static u_int32_t tcpmss_reverse_mtu(struct net *net, + const struct sk_buff *skb, + unsigned int family) +{ + struct flowi fl; + const struct nf_afinfo *ai; + struct rtable *rt = NULL; + u_int32_t mtu = ~0U; + + if (family == PF_INET) { + struct flowi4 *fl4 = &fl.u.ip4; + memset(fl4, 0, sizeof(*fl4)); + fl4->daddr = ip_hdr(skb)->saddr; + } else { + struct flowi6 *fl6 = &fl.u.ip6; + + memset(fl6, 0, sizeof(*fl6)); + fl6->daddr = ipv6_hdr(skb)->saddr; + } + rcu_read_lock(); + ai = nf_get_afinfo(family); + if (ai != NULL) + ai->route(net, (struct dst_entry **)&rt, &fl, false); + rcu_read_unlock(); + + if (rt != NULL) { + mtu = dst_mtu(&rt->dst); + dst_release(&rt->dst); + } + return mtu; +} + static int tcpmss_mangle_packet(struct sk_buff *skb, const struct xt_action_param *par, - unsigned int in_mtu, + unsigned int family, unsigned int tcphoff, unsigned int minlen) { @@ -76,6 +108,9 @@ tcpmss_mangle_packet(struct sk_buff *skb, return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { + struct net *net = dev_net(par->in ? par->in : par->out); + unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); + if (dst_mtu(skb_dst(skb)) <= minlen) { net_err_ratelimited("unknown or invalid path-MTU (%u)\n", dst_mtu(skb_dst(skb))); @@ -165,37 +200,6 @@ tcpmss_mangle_packet(struct sk_buff *skb, return TCPOLEN_MSS; } -static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, - unsigned int family) -{ - struct flowi fl; - const struct nf_afinfo *ai; - struct rtable *rt = NULL; - u_int32_t mtu = ~0U; - - if (family == PF_INET) { - struct flowi4 *fl4 = &fl.u.ip4; - memset(fl4, 0, sizeof(*fl4)); - fl4->daddr = ip_hdr(skb)->saddr; - } else { - struct flowi6 *fl6 = &fl.u.ip6; - - memset(fl6, 0, sizeof(*fl6)); - fl6->daddr = ipv6_hdr(skb)->saddr; - } - rcu_read_lock(); - ai = nf_get_afinfo(family); - if (ai != NULL) - ai->route(&init_net, (struct dst_entry **)&rt, &fl, false); - rcu_read_unlock(); - - if (rt != NULL) { - mtu = dst_mtu(&rt->dst); - dst_release(&rt->dst); - } - return mtu; -} - static unsigned int tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par) { @@ -204,7 +208,7 @@ tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par) int ret; ret = tcpmss_mangle_packet(skb, par, - tcpmss_reverse_mtu(skb, PF_INET), + PF_INET, iph->ihl * 4, sizeof(*iph) + sizeof(struct tcphdr)); if (ret < 0) @@ -233,7 +237,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par) if (tcphoff < 0) return NF_DROP; ret = tcpmss_mangle_packet(skb, par, - tcpmss_reverse_mtu(skb, PF_INET6), + PF_INET6, tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); if (ret < 0) diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 5d8a3a3..ef8a926 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -200,7 +200,7 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, in->ifindex); if (sk) { int connected = (sk->sk_state == TCP_ESTABLISHED); - int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr); + int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr); /* NOTE: we return listeners even if bound to * 0.0.0.0, those are filtered out in diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 31790e7..e7c4e0e 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -81,7 +81,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) struct xt_set_info_match_v0 *info = par->matchinfo; ip_set_id_t index; - index = ip_set_nfnl_get_byindex(info->match_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find set indentified by id %u to match\n", @@ -91,7 +91,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) { pr_warning("Protocol error: set match dimension " "is over the limit!\n"); - ip_set_nfnl_put(info->match_set.index); + ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; } @@ -106,9 +106,104 @@ set_match_v0_destroy(const struct xt_mtdtor_param *par) { struct xt_set_info_match_v0 *info = par->matchinfo; - ip_set_nfnl_put(info->match_set.index); + ip_set_nfnl_put(par->net, info->match_set.index); } +/* Revision 1 match */ + +static bool +set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_set_info_match_v1 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.dim, + info->match_set.flags, 0, UINT_MAX); + + if (opt.flags & IPSET_RETURN_NOMATCH) + opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH; + + return match_set(info->match_set.index, skb, par, &opt, + info->match_set.flags & IPSET_INV_MATCH); +} + +static int +set_match_v1_checkentry(const struct xt_mtchk_param *par) +{ + struct xt_set_info_match_v1 *info = par->matchinfo; + ip_set_id_t index; + + index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); + + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find set indentified by id %u to match\n", + info->match_set.index); + return -ENOENT; + } + if (info->match_set.dim > IPSET_DIM_MAX) { + pr_warning("Protocol error: set match dimension " + "is over the limit!\n"); + ip_set_nfnl_put(par->net, info->match_set.index); + return -ERANGE; + } + + return 0; +} + +static void +set_match_v1_destroy(const struct xt_mtdtor_param *par) +{ + struct xt_set_info_match_v1 *info = par->matchinfo; + + ip_set_nfnl_put(par->net, info->match_set.index); +} + +/* Revision 3 match */ + +static bool +match_counter(u64 counter, const struct ip_set_counter_match *info) +{ + switch (info->op) { + case IPSET_COUNTER_NONE: + return true; + case IPSET_COUNTER_EQ: + return counter == info->value; + case IPSET_COUNTER_NE: + return counter != info->value; + case IPSET_COUNTER_LT: + return counter < info->value; + case IPSET_COUNTER_GT: + return counter > info->value; + } + return false; +} + +static bool +set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_set_info_match_v3 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.dim, + info->match_set.flags, info->flags, UINT_MAX); + int ret; + + if (info->packets.op != IPSET_COUNTER_NONE || + info->bytes.op != IPSET_COUNTER_NONE) + opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS; + + ret = match_set(info->match_set.index, skb, par, &opt, + info->match_set.flags & IPSET_INV_MATCH); + + if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS)) + return ret; + + if (!match_counter(opt.ext.packets, &info->packets)) + return 0; + return match_counter(opt.ext.bytes, &info->bytes); +} + +#define set_match_v3_checkentry set_match_v1_checkentry +#define set_match_v3_destroy set_match_v1_destroy + +/* Revision 0 interface: backward compatible with netfilter/iptables */ + static unsigned int set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { @@ -133,7 +228,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) ip_set_id_t index; if (info->add_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->add_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find add_set index %u as target\n", info->add_set.index); @@ -142,12 +237,12 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) } if (info->del_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->del_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find del_set index %u as target\n", info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; } } @@ -156,9 +251,9 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) pr_warning("Protocol error: SET target dimension " "is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); return -ERANGE; } @@ -175,57 +270,12 @@ set_target_v0_destroy(const struct xt_tgdtor_param *par) const struct xt_set_info_target_v0 *info = par->targinfo; if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); } -/* Revision 1 match and target */ - -static bool -set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) -{ - const struct xt_set_info_match_v1 *info = par->matchinfo; - ADT_OPT(opt, par->family, info->match_set.dim, - info->match_set.flags, 0, UINT_MAX); - - if (opt.flags & IPSET_RETURN_NOMATCH) - opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH; - - return match_set(info->match_set.index, skb, par, &opt, - info->match_set.flags & IPSET_INV_MATCH); -} - -static int -set_match_v1_checkentry(const struct xt_mtchk_param *par) -{ - struct xt_set_info_match_v1 *info = par->matchinfo; - ip_set_id_t index; - - index = ip_set_nfnl_get_byindex(info->match_set.index); - - if (index == IPSET_INVALID_ID) { - pr_warning("Cannot find set indentified by id %u to match\n", - info->match_set.index); - return -ENOENT; - } - if (info->match_set.dim > IPSET_DIM_MAX) { - pr_warning("Protocol error: set match dimension " - "is over the limit!\n"); - ip_set_nfnl_put(info->match_set.index); - return -ERANGE; - } - - return 0; -} - -static void -set_match_v1_destroy(const struct xt_mtdtor_param *par) -{ - struct xt_set_info_match_v1 *info = par->matchinfo; - - ip_set_nfnl_put(info->match_set.index); -} +/* Revision 1 target */ static unsigned int set_target_v1(struct sk_buff *skb, const struct xt_action_param *par) @@ -251,7 +301,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) ip_set_id_t index; if (info->add_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->add_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find add_set index %u as target\n", info->add_set.index); @@ -260,12 +310,12 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) } if (info->del_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->del_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find del_set index %u as target\n", info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; } } @@ -274,9 +324,9 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) pr_warning("Protocol error: SET target dimension " "is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); return -ERANGE; } @@ -289,9 +339,9 @@ set_target_v1_destroy(const struct xt_tgdtor_param *par) const struct xt_set_info_target_v1 *info = par->targinfo; if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); } /* Revision 2 target */ @@ -320,52 +370,6 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) #define set_target_v2_checkentry set_target_v1_checkentry #define set_target_v2_destroy set_target_v1_destroy -/* Revision 3 match */ - -static bool -match_counter(u64 counter, const struct ip_set_counter_match *info) -{ - switch (info->op) { - case IPSET_COUNTER_NONE: - return true; - case IPSET_COUNTER_EQ: - return counter == info->value; - case IPSET_COUNTER_NE: - return counter != info->value; - case IPSET_COUNTER_LT: - return counter < info->value; - case IPSET_COUNTER_GT: - return counter > info->value; - } - return false; -} - -static bool -set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) -{ - const struct xt_set_info_match_v3 *info = par->matchinfo; - ADT_OPT(opt, par->family, info->match_set.dim, - info->match_set.flags, info->flags, UINT_MAX); - int ret; - - if (info->packets.op != IPSET_COUNTER_NONE || - info->bytes.op != IPSET_COUNTER_NONE) - opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS; - - ret = match_set(info->match_set.index, skb, par, &opt, - info->match_set.flags & IPSET_INV_MATCH); - - if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS)) - return ret; - - if (!match_counter(opt.ext.packets, &info->packets)) - return 0; - return match_counter(opt.ext.bytes, &info->bytes); -} - -#define set_match_v3_checkentry set_match_v1_checkentry -#define set_match_v3_destroy set_match_v1_destroy - static struct xt_match set_matches[] __read_mostly = { { .name = "set", diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 06df2b9..3dd0e37 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -370,7 +370,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) */ wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && sk->sk_state != TCP_TIME_WAIT && - ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)); + ipv6_addr_any(&sk->sk_v6_rcv_saddr)); /* Ignore non-transparent sockets, if XT_SOCKET_TRANSPARENT is used */ diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 96a458e..dce1beb 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -817,7 +817,7 @@ int netlbl_req_setattr(struct request_sock *req, switch (req->rsk_ops->family) { case AF_INET: entry = netlbl_domhsh_getentry_af4(secattr->domain, - inet_rsk(req)->rmt_addr); + inet_rsk(req)->ir_rmt_addr); if (entry == NULL) { ret_val = -ENOENT; goto req_setattr_return; diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index a481c03..56e22b7 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -173,7 +173,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) skb->local_df = 1; - inet_get_local_port_range(&port_min, &port_max); + inet_get_local_port_range(net, &port_min, &port_max); src_port = vxlan_src_port(port_min, port_max, skb); err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 189e3c5..272d8e9 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -231,14 +231,14 @@ override: } if (R_tab) { police->rate_present = true; - psched_ratecfg_precompute(&police->rate, &R_tab->rate); + psched_ratecfg_precompute(&police->rate, &R_tab->rate, 0); qdisc_put_rtab(R_tab); } else { police->rate_present = false; } if (P_tab) { police->peak_present = true; - psched_ratecfg_precompute(&police->peak, &P_tab->rate); + psched_ratecfg_precompute(&police->peak, &P_tab->rate, 0); qdisc_put_rtab(P_tab); } else { police->peak_present = false; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index d76a35d..636d913 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -137,7 +137,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *est) { - int err = -EINVAL; + int err; struct tcf_exts e; struct tcf_ematch_tree t; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 867b4a3..16006c9 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -72,11 +72,11 @@ static void cgrp_attach(struct cgroup_subsys_state *css, struct cgroup_taskset *tset) { struct task_struct *p; - void *v; + struct cgroup_cls_state *cs = css_cls_state(css); + void *v = (void *)(unsigned long)cs->classid; cgroup_taskset_for_each(p, css, tset) { task_lock(p); - v = (void *)(unsigned long)task_cls_classid(p); iterate_fd(p->files, 0, update_classid, v); task_unlock(p); } diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index 938b7cb..1ac41d3 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -24,11 +24,12 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len, { struct xt_set_info *set = data; ip_set_id_t index; + struct net *net = qdisc_dev(tp->q)->nd_net; if (data_len != sizeof(*set)) return -EINVAL; - index = ip_set_nfnl_get_byindex(set->index); + index = ip_set_nfnl_get_byindex(net, set->index); if (index == IPSET_INVALID_ID) return -ENOENT; @@ -37,7 +38,7 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len, if (em->data) return 0; - ip_set_nfnl_put(index); + ip_set_nfnl_put(net, index); return -ENOMEM; } @@ -45,7 +46,7 @@ static void em_ipset_destroy(struct tcf_proto *p, struct tcf_ematch *em) { const struct xt_set_info *set = (const void *) em->data; if (set) { - ip_set_nfnl_put(set->index); + ip_set_nfnl_put(qdisc_dev(p->q)->nd_net, set->index); kfree((void *) em->data); } } diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 7c3de6f..e5cef956 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -793,8 +793,10 @@ static int em_meta_change(struct tcf_proto *tp, void *data, int len, goto errout; meta = kzalloc(sizeof(*meta), GFP_KERNEL); - if (meta == NULL) + if (meta == NULL) { + err = -ENOMEM; goto errout; + } memcpy(&meta->lvalue.hdr, &hdr->left, sizeof(hdr->left)); memcpy(&meta->rvalue.hdr, &hdr->right, sizeof(hdr->right)); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2adda7f..cd81505 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -737,9 +737,11 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; + int drops; if (n == 0) return; + drops = max_t(int, n, 0); while ((parentid = sch->parent)) { if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) return; @@ -756,6 +758,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) cops->put(sch, cl); } sch->q.qlen -= n; + sch->qstats.drops += drops; } } EXPORT_SYMBOL(qdisc_tree_decrease_qlen); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 32ad015..a9dfdda 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -285,7 +285,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) /* remove one skb from head of flow queue */ -static struct sk_buff *fq_dequeue_head(struct fq_flow *flow) +static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow) { struct sk_buff *skb = flow->head; @@ -293,6 +293,8 @@ static struct sk_buff *fq_dequeue_head(struct fq_flow *flow) flow->head = skb->next; skb->next = NULL; flow->qlen--; + sch->qstats.backlog -= qdisc_pkt_len(skb); + sch->q.qlen--; } return skb; } @@ -418,8 +420,9 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; + u32 rate; - skb = fq_dequeue_head(&q->internal); + skb = fq_dequeue_head(sch, &q->internal); if (skb) goto out; fq_check_throttled(q, now); @@ -449,7 +452,7 @@ begin: goto begin; } - skb = fq_dequeue_head(f); + skb = fq_dequeue_head(sch, f); if (!skb) { head->first = f->next; /* force a pass through old_flows to prevent starvation */ @@ -466,43 +469,70 @@ begin: f->time_next_packet = now; f->credit -= qdisc_pkt_len(skb); - if (f->credit <= 0 && - q->rate_enable && - skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) { - u32 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate; + if (f->credit > 0 || !q->rate_enable) + goto out; - rate = min(rate, q->flow_max_rate); - if (rate) { - u64 len = (u64)qdisc_pkt_len(skb) * NSEC_PER_SEC; + rate = q->flow_max_rate; + if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) + rate = min(skb->sk->sk_pacing_rate, rate); - do_div(len, rate); - /* Since socket rate can change later, - * clamp the delay to 125 ms. - * TODO: maybe segment the too big skb, as in commit - * e43ac79a4bc ("sch_tbf: segment too big GSO packets") - */ - if (unlikely(len > 125 * NSEC_PER_MSEC)) { - len = 125 * NSEC_PER_MSEC; - q->stat_pkts_too_long++; - } + if (rate != ~0U) { + u32 plen = max(qdisc_pkt_len(skb), q->quantum); + u64 len = (u64)plen * NSEC_PER_SEC; - f->time_next_packet = now + len; + if (likely(rate)) + do_div(len, rate); + /* Since socket rate can change later, + * clamp the delay to 125 ms. + * TODO: maybe segment the too big skb, as in commit + * e43ac79a4bc ("sch_tbf: segment too big GSO packets") + */ + if (unlikely(len > 125 * NSEC_PER_MSEC)) { + len = 125 * NSEC_PER_MSEC; + q->stat_pkts_too_long++; } + + f->time_next_packet = now + len; } out: - sch->qstats.backlog -= qdisc_pkt_len(skb); qdisc_bstats_update(sch, skb); - sch->q.qlen--; qdisc_unthrottled(sch); return skb; } static void fq_reset(struct Qdisc *sch) { + struct fq_sched_data *q = qdisc_priv(sch); + struct rb_root *root; struct sk_buff *skb; + struct rb_node *p; + struct fq_flow *f; + unsigned int idx; - while ((skb = fq_dequeue(sch)) != NULL) + while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL) kfree_skb(skb); + + if (!q->fq_root) + return; + + for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { + root = &q->fq_root[idx]; + while ((p = rb_first(root)) != NULL) { + f = container_of(p, struct fq_flow, fq_node); + rb_erase(p, root); + + while ((skb = fq_dequeue_head(sch, f)) != NULL) + kfree_skb(skb); + + kmem_cache_free(fq_flow_cachep, f); + } + } + q->new_flows.first = NULL; + q->old_flows.first = NULL; + q->delayed = RB_ROOT; + q->flows = 0; + q->inactive_flows = 0; + q->throttled_flows = 0; } static void fq_rehash(struct fq_sched_data *q, @@ -622,7 +652,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); if (tb[TCA_FQ_INITIAL_QUANTUM]) - q->quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); + q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); if (tb[TCA_FQ_FLOW_DEFAULT_RATE]) q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]); @@ -645,6 +675,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) while (sch->q.qlen > sch->limit) { struct sk_buff *skb = fq_dequeue(sch); + if (!skb) + break; kfree_skb(skb); drop_count++; } @@ -657,21 +689,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) static void fq_destroy(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); - struct rb_root *root; - struct rb_node *p; - unsigned int idx; - if (q->fq_root) { - for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { - root = &q->fq_root[idx]; - while ((p = rb_first(root)) != NULL) { - rb_erase(p, root); - kmem_cache_free(fq_flow_cachep, - container_of(p, struct fq_flow, fq_node)); - } - } - kfree(q->fq_root); - } + fq_reset(sch); + kfree(q->fq_root); qdisc_watchdog_cancel(&q->watchdog); } @@ -711,12 +731,14 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) if (opts == NULL) goto nla_put_failure; + /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore, + * do not bother giving its value + */ if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) || nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) || nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) || nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) || nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) || - nla_put_u32(skb, TCA_FQ_FLOW_DEFAULT_RATE, q->flow_default_rate) || nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) goto nla_put_failure; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index a74e278..7fc899a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -829,7 +829,7 @@ void dev_deactivate_many(struct list_head *head) struct net_device *dev; bool sync_needed = false; - list_for_each_entry(dev, head, unreg_list) { + list_for_each_entry(dev, head, close_list) { netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); if (dev_ingress_queue(dev)) @@ -848,7 +848,7 @@ void dev_deactivate_many(struct list_head *head) synchronize_net(); /* Wait for outstanding qdisc_run calls. */ - list_for_each_entry(dev, head, unreg_list) + list_for_each_entry(dev, head, close_list) while (some_qdisc_is_busy(dev)) yield(); } @@ -857,7 +857,7 @@ void dev_deactivate(struct net_device *dev) { LIST_HEAD(single); - list_add(&dev->unreg_list, &single); + list_add(&dev->close_list, &single); dev_deactivate_many(&single); list_del(&single); } @@ -910,11 +910,12 @@ void dev_shutdown(struct net_device *dev) } void psched_ratecfg_precompute(struct psched_ratecfg *r, - const struct tc_ratespec *conf) + const struct tc_ratespec *conf, + u64 rate64) { memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; - r->rate_bytes_ps = conf->rate; + r->rate_bytes_ps = max_t(u64, conf->rate, rate64); r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK); r->mult = 1; /* diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 863846c..0e1e38b 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -997,6 +997,8 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = { [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 }, + [TCA_HTB_RATE64] = { .type = NLA_U64 }, + [TCA_HTB_CEIL64] = { .type = NLA_U64 }, }; static void htb_work_func(struct work_struct *work) @@ -1114,6 +1116,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, opt.level = cl->level; if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt)) goto nla_put_failure; + if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) && + nla_put_u64(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps)) + goto nla_put_failure; + if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) && + nla_put_u64(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps)) + goto nla_put_failure; nla_nest_end(skb, nest); spin_unlock_bh(root_lock); @@ -1332,6 +1340,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct qdisc_rate_table *rtab = NULL, *ctab = NULL; struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_opt *hopt; + u64 rate64, ceil64; /* extract all subattrs from opt attr */ if (!opt) @@ -1491,8 +1500,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->prio = TC_HTB_NUMPRIO - 1; } - psched_ratecfg_precompute(&cl->rate, &hopt->rate); - psched_ratecfg_precompute(&cl->ceil, &hopt->ceil); + rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; + + ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; + + psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); + psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); cl->buffer = PSCHED_TICKS2NS(hopt->buffer); cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 1aaf1b6..b057122 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -341,9 +341,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->tokens = q->buffer; q->ptokens = q->mtu; - psched_ratecfg_precompute(&q->rate, &rtab->rate); + psched_ratecfg_precompute(&q->rate, &rtab->rate, 0); if (ptab) { - psched_ratecfg_precompute(&q->peak, &ptab->rate); + psched_ratecfg_precompute(&q->peak, &ptab->rate, 0); q->peak_present = true; } else { q->peak_present = false; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e7b2d4f..f6334aa 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -426,20 +426,20 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk) { addr->v6.sin6_family = AF_INET6; addr->v6.sin6_port = 0; - addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr; + addr->v6.sin6_addr = sk->sk_v6_rcv_saddr; } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk) { if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) { - inet6_sk(sk)->rcv_saddr.s6_addr32[0] = 0; - inet6_sk(sk)->rcv_saddr.s6_addr32[1] = 0; - inet6_sk(sk)->rcv_saddr.s6_addr32[2] = htonl(0x0000ffff); - inet6_sk(sk)->rcv_saddr.s6_addr32[3] = + sk->sk_v6_rcv_saddr.s6_addr32[0] = 0; + sk->sk_v6_rcv_saddr.s6_addr32[1] = 0; + sk->sk_v6_rcv_saddr.s6_addr32[2] = htonl(0x0000ffff); + sk->sk_v6_rcv_saddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; } else { - inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr; + sk->sk_v6_rcv_saddr = addr->v6.sin6_addr; } } @@ -447,12 +447,12 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk) static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk) { if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) { - inet6_sk(sk)->daddr.s6_addr32[0] = 0; - inet6_sk(sk)->daddr.s6_addr32[1] = 0; - inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff); - inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; + sk->sk_v6_daddr.s6_addr32[0] = 0; + sk->sk_v6_daddr.s6_addr32[1] = 0; + sk->sk_v6_daddr.s6_addr32[2] = htonl(0x0000ffff); + sk->sk_v6_daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; } else { - inet6_sk(sk)->daddr = addr->v6.sin6_addr; + sk->sk_v6_daddr = addr->v6.sin6_addr; } } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 911b71b..72046b9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5890,7 +5890,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) int low, high, remaining, index; unsigned int rover; - inet_get_local_port_range(&low, &high); + inet_get_local_port_range(sock_net(sk), &low, &high); remaining = (high - low) + 1; rover = net_random() % remaining + low; diff --git a/net/socket.c b/net/socket.c index ebed4b6..c226ace 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1964,6 +1964,16 @@ struct used_address { unsigned int name_len; }; +static int copy_msghdr_from_user(struct msghdr *kmsg, + struct msghdr __user *umsg) +{ + if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) + return -EFAULT; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) + return -EINVAL; + return 0; +} + static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, struct msghdr *msg_sys, unsigned int flags, struct used_address *used_address) @@ -1982,8 +1992,11 @@ static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; - } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) - return -EFAULT; + } else { + err = copy_msghdr_from_user(msg_sys, msg); + if (err) + return err; + } if (msg_sys->msg_iovlen > UIO_FASTIOV) { err = -EMSGSIZE; @@ -2191,8 +2204,11 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; - } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) - return -EFAULT; + } else { + err = copy_msghdr_from_user(msg_sys, msg); + if (err) + return err; + } if (msg_sys->msg_iovlen > UIO_FASTIOV) { err = -EMSGSIZE; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index fcac5d1..0846566 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1075,6 +1075,15 @@ gss_destroy(struct rpc_auth *auth) kref_put(&gss_auth->kref, gss_free_callback); } +/* + * Auths may be shared between rpc clients that were cloned from a + * common client with the same xprt, if they also share the flavor and + * target_name. + * + * The auth is looked up from the oldest parent sharing the same + * cl_xprt, and the auth itself references only that common parent + * (which is guaranteed to last as long as any of its descendants). + */ static struct gss_auth * gss_auth_find_or_add_hashed(struct rpc_auth_create_args *args, struct rpc_clnt *clnt, @@ -1088,6 +1097,8 @@ gss_auth_find_or_add_hashed(struct rpc_auth_create_args *args, gss_auth, hash, hashval) { + if (gss_auth->client != clnt) + continue; if (gss_auth->rpc_auth.au_flavor != args->pseudoflavor) continue; if (gss_auth->target_name != args->target_name) { diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 9c9caaa..b6e59f0 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -291,12 +291,14 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) &inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num); break; +#if IS_ENABLED(CONFIG_IPV6) case PF_INET6: len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n", proto_name, - &inet6_sk(sk)->rcv_saddr, + &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); break; +#endif default: len = snprintf(buf, remaining, "*unknown-%d*\n", sk->sk_family); diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 9bc6db04..e7000be 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -47,12 +47,12 @@ static int net_ctl_permissions(struct ctl_table_header *head, /* Allow network administrator to have same access as root. */ if (ns_capable(net->user_ns, CAP_NET_ADMIN) || - uid_eq(root_uid, current_uid())) { + uid_eq(root_uid, current_euid())) { int mode = (table->mode >> 6) & 7; return (mode << 6) | (mode << 3) | mode; } /* Allow netns root group to have the same access as the root group */ - if (gid_eq(root_gid, current_gid())) { + if (in_egroup_p(root_gid)) { int mode = (table->mode >> 3) & 7; return (mode << 3) | mode; } diff --git a/net/unix/diag.c b/net/unix/diag.c index d591091..86fa0f3 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -124,6 +124,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r rep->udiag_family = AF_UNIX; rep->udiag_type = sk->sk_type; rep->udiag_state = sk->sk_state; + rep->pad = 0; rep->udiag_ino = sk_ino; sock_diag_save_cookie(sk, rep->udiag_cookie); diff --git a/net/wireless/core.c b/net/wireless/core.c index 6715396..fe8d4f2 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -566,18 +566,13 @@ int wiphy_register(struct wiphy *wiphy) /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); - + rtnl_lock(); res = device_add(&rdev->wiphy.dev); - if (res) - return res; - - res = rfkill_register(rdev->rfkill); if (res) { - device_del(&rdev->wiphy.dev); + rtnl_unlock(); return res; } - rtnl_lock(); /* set up regulatory info */ wiphy_regulatory_register(wiphy); @@ -606,6 +601,15 @@ int wiphy_register(struct wiphy *wiphy) rdev->wiphy.registered = true; rtnl_unlock(); + + res = rfkill_register(rdev->rfkill); + if (res) { + rfkill_destroy(rdev->rfkill); + rdev->rfkill = NULL; + wiphy_unregister(&rdev->wiphy); + return res; + } + return 0; } EXPORT_SYMBOL(wiphy_register); @@ -640,7 +644,8 @@ void wiphy_unregister(struct wiphy *wiphy) rtnl_unlock(); __count == 0; })); - rfkill_unregister(rdev->rfkill); + if (rdev->rfkill) + rfkill_unregister(rdev->rfkill); rtnl_lock(); rdev->wiphy.registered = false; diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 39bff7d..403fe29 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -263,6 +263,8 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, if (chan->flags & IEEE80211_CHAN_DISABLED) continue; wdev->wext.ibss.chandef.chan = chan; + wdev->wext.ibss.chandef.center_freq1 = + chan->center_freq; break; } @@ -347,6 +349,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, if (chan) { wdev->wext.ibss.chandef.chan = chan; wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; + wdev->wext.ibss.chandef.center_freq1 = freq; wdev->wext.ibss.channel_fixed = true; } else { /* cfg80211_ibss_wext_join will pick one if needed */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2838206..cbbef88 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2421,7 +2421,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) change = true; } - if (flags && (*flags & NL80211_MNTR_FLAG_ACTIVE) && + if (flags && (*flags & MONITOR_FLAG_ACTIVE) && !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) return -EOPNOTSUPP; @@ -2483,7 +2483,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); - if (!err && (flags & NL80211_MNTR_FLAG_ACTIVE) && + if (!err && (flags & MONITOR_FLAG_ACTIVE) && !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) return -EOPNOTSUPP; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index b9c3f9e..d6e7f98 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -468,7 +468,7 @@ expired: } err = __xfrm_state_delete(x); - if (!err && x->id.spi) + if (!err) km_state_expired(x, 1, 0); xfrm_audit_state_delete(x, err ? 0 : 1, |