summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/mrp.c27
-rw-r--r--net/8021q/vlan.c18
-rw-r--r--net/8021q/vlan_netlink.c2
-rw-r--r--net/batman-adv/Makefile1
-rw-r--r--net/batman-adv/bat_iv_ogm.c111
-rw-r--r--net/batman-adv/debugfs.c9
-rw-r--r--net/batman-adv/distributed-arp-table.c64
-rw-r--r--net/batman-adv/distributed-arp-table.h5
-rw-r--r--net/batman-adv/gateway_client.c187
-rw-r--r--net/batman-adv/gateway_client.h2
-rw-r--r--net/batman-adv/gateway_common.c230
-rw-r--r--net/batman-adv/gateway_common.h14
-rw-r--r--net/batman-adv/hard-interface.c9
-rw-r--r--net/batman-adv/main.c628
-rw-r--r--net/batman-adv/main.h37
-rw-r--r--net/batman-adv/network-coding.c91
-rw-r--r--net/batman-adv/network-coding.h19
-rw-r--r--net/batman-adv/originator.c4
-rw-r--r--net/batman-adv/packet.h248
-rw-r--r--net/batman-adv/routing.c247
-rw-r--r--net/batman-adv/routing.h6
-rw-r--r--net/batman-adv/send.c1
-rw-r--r--net/batman-adv/soft-interface.c4
-rw-r--r--net/batman-adv/sysfs.c96
-rw-r--r--net/batman-adv/translation-table.c905
-rw-r--r--net/batman-adv/translation-table.h13
-rw-r--r--net/batman-adv/types.h178
-rw-r--r--net/batman-adv/vis.c938
-rw-r--r--net/batman-adv/vis.h36
-rw-r--r--net/bridge/br_multicast.c4
-rw-r--r--net/bridge/netfilter/ebt_among.c2
-rw-r--r--net/compat.c2
-rw-r--r--net/core/dev.c552
-rw-r--r--net/core/filter.c8
-rw-r--r--net/core/flow_dissector.c39
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/core/netprio_cgroup.c3
-rw-r--r--net/core/rtnetlink.c3
-rw-r--r--net/core/secure_seq.c27
-rw-r--r--net/core/skbuff.c43
-rw-r--r--net/core/sock.c13
-rw-r--r--net/dccp/ipv4.c18
-rw-r--r--net/dccp/ipv6.c83
-rw-r--r--net/dccp/ipv6.h2
-rw-r--r--net/dccp/minisocks.c15
-rw-r--r--net/dccp/output.c4
-rw-r--r--net/dccp/proto.c4
-rw-r--r--net/ethernet/eth.c30
-rw-r--r--net/ieee802154/6lowpan.c5
-rw-r--r--net/ipv4/af_inet.c5
-rw-r--r--net/ipv4/fib_trie.c15
-rw-r--r--net/ipv4/icmp.c5
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inet_connection_sock.c54
-rw-r--r--net/ipv4/inet_diag.c111
-rw-r--r--net/ipv4/inet_hashtables.c85
-rw-r--r--net/ipv4/inet_timewait_sock.c59
-rw-r--r--net/ipv4/ip_output.c13
-rw-r--r--net/ipv4/ip_sockglue.c25
-rw-r--r--net/ipv4/ip_tunnel.c22
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/ip_vti.c67
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c10
-rw-r--r--net/ipv4/ping.c29
-rw-r--r--net/ipv4/raw.c10
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/syncookies.c65
-rw-r--r--net/ipv4/sysctl_net_ipv4.c52
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_input.c136
-rw-r--r--net/ipv4/tcp_ipv4.c123
-rw-r--r--net/ipv4/tcp_metrics.c22
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv4/tcp_output.c37
-rw-r--r--net/ipv4/tcp_probe.c29
-rw-r--r--net/ipv4/tcp_timer.c3
-rw-r--r--net/ipv4/udp.c217
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c16
-rw-r--r--net/ipv6/addrconf.c79
-rw-r--r--net/ipv6/af_inet6.c58
-rw-r--r--net/ipv6/datagram.c25
-rw-r--r--net/ipv6/inet6_connection_sock.c33
-rw-r--r--net/ipv6/inet6_hashtables.c89
-rw-r--r--net/ipv6/ip6_fib.c205
-rw-r--r--net/ipv6/ip6_gre.c7
-rw-r--r--net/ipv6/ip6_output.c53
-rw-r--r--net/ipv6/ip6_tunnel.c15
-rw-r--r--net/ipv6/ipv6_sockglue.c7
-rw-r--r--net/ipv6/mcast.c6
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c10
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c4
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/ipv6/raw.c21
-rw-r--r--net/ipv6/route.c49
-rw-r--r--net/ipv6/sit.c86
-rw-r--r--net/ipv6/syncookies.c63
-rw-r--r--net/ipv6/tcp_ipv6.c112
-rw-r--r--net/ipv6/udp.c61
-rw-r--r--net/l2tp/l2tp_core.c40
-rw-r--r--net/l2tp/l2tp_core.h3
-rw-r--r--net/l2tp/l2tp_debugfs.c5
-rw-r--r--net/l2tp/l2tp_ip6.c16
-rw-r--r--net/l2tp/l2tp_netlink.c4
-rw-r--r--net/l2tp/l2tp_ppp.c12
-rw-r--r--net/lapb/lapb_timer.c1
-rw-r--r--net/mac80211/rx.c3
-rw-r--r--net/mac80211/trace.h4
-rw-r--r--net/mac80211/util.c5
-rw-r--r--net/netfilter/ipset/Kconfig20
-rw-r--r--net/netfilter/ipset/Makefile2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h163
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c125
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c156
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c112
-rw-r--r--net/netfilter/ipset/ip_set_core.c361
-rw-r--r--net/netfilter/ipset/ip_set_getport.c18
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h526
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c58
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c80
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c86
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c108
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c85
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c98
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c483
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c92
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c588
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c263
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c72
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c62
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c6
-rw-r--r--net/netfilter/nf_conntrack_sip.c133
-rw-r--r--net/netfilter/nf_nat_sip.c35
-rw-r--r--net/netfilter/nf_synproxy_core.c12
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c161
-rw-r--r--net/netfilter/nfnetlink_log.c11
-rw-r--r--net/netfilter/nfnetlink_queue_core.c6
-rw-r--r--net/netfilter/xt_TCPMSS.c72
-rw-r--r--net/netfilter/xt_TPROXY.c2
-rw-r--r--net/netfilter/xt_set.c222
-rw-r--r--net/netfilter/xt_socket.c2
-rw-r--r--net/netlabel/netlabel_kapi.c2
-rw-r--r--net/openvswitch/vport-vxlan.c2
-rw-r--r--net/sched/act_police.c4
-rw-r--r--net/sched/cls_basic.c2
-rw-r--r--net/sched/cls_cgroup.c4
-rw-r--r--net/sched/em_ipset.c7
-rw-r--r--net/sched/em_meta.c4
-rw-r--r--net/sched/sch_api.c3
-rw-r--r--net/sched/sch_fq.c102
-rw-r--r--net/sched/sch_generic.c11
-rw-r--r--net/sched/sch_htb.c17
-rw-r--r--net/sched/sch_tbf.c4
-rw-r--r--net/sctp/ipv6.c22
-rw-r--r--net/sctp/socket.c2
-rw-r--r--net/socket.c24
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c11
-rw-r--r--net/sunrpc/svcsock.c4
-rw-r--r--net/sysctl_net.c4
-rw-r--r--net/unix/diag.c1
-rw-r--r--net/wireless/core.c21
-rw-r--r--net/wireless/ibss.c3
-rw-r--r--net/wireless/nl80211.c4
-rw-r--r--net/xfrm/xfrm_state.c2
168 files changed, 6558 insertions, 5337 deletions
diff --git a/net/802/mrp.c b/net/802/mrp.c
index 1eb05d8..3ed6162 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -24,6 +24,11 @@
static unsigned int mrp_join_time __read_mostly = 200;
module_param(mrp_join_time, uint, 0644);
MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)");
+
+static unsigned int mrp_periodic_time __read_mostly = 1000;
+module_param(mrp_periodic_time, uint, 0644);
+MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)");
+
MODULE_LICENSE("GPL");
static const u8
@@ -595,6 +600,24 @@ static void mrp_join_timer(unsigned long data)
mrp_join_timer_arm(app);
}
+static void mrp_periodic_timer_arm(struct mrp_applicant *app)
+{
+ mod_timer(&app->periodic_timer,
+ jiffies + msecs_to_jiffies(mrp_periodic_time));
+}
+
+static void mrp_periodic_timer(unsigned long data)
+{
+ struct mrp_applicant *app = (struct mrp_applicant *)data;
+
+ spin_lock(&app->lock);
+ mrp_mad_event(app, MRP_EVENT_PERIODIC);
+ mrp_pdu_queue(app);
+ spin_unlock(&app->lock);
+
+ mrp_periodic_timer_arm(app);
+}
+
static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset)
{
__be16 endmark;
@@ -845,6 +868,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app);
mrp_join_timer_arm(app);
+ setup_timer(&app->periodic_timer, mrp_periodic_timer,
+ (unsigned long)app);
+ mrp_periodic_timer_arm(app);
return 0;
err3:
@@ -870,6 +896,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
* all pending messages before the applicant is gone.
*/
del_timer_sync(&app->join_timer);
+ del_timer_sync(&app->periodic_timer);
spin_lock_bh(&app->lock);
mrp_mad_event(app, MRP_EVENT_TX);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 61fc573..b3d17d1 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -98,14 +98,14 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
vlan_gvrp_request_leave(dev);
vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, NULL);
+
+ netdev_upper_dev_unlink(real_dev, dev);
/* Because unregister_netdevice_queue() makes sure at least one rcu
* grace period is respected before device freeing,
* we dont need to call synchronize_net() here.
*/
unregister_netdevice_queue(dev, head);
- netdev_upper_dev_unlink(real_dev, dev);
-
if (grp->nr_vlan_devs == 0) {
vlan_mvrp_uninit_applicant(real_dev);
vlan_gvrp_uninit_applicant(real_dev);
@@ -169,13 +169,13 @@ int register_vlan_dev(struct net_device *dev)
if (err < 0)
goto out_uninit_mvrp;
- err = netdev_upper_dev_link(real_dev, dev);
- if (err)
- goto out_uninit_mvrp;
-
err = register_netdevice(dev);
if (err < 0)
- goto out_upper_dev_unlink;
+ goto out_uninit_mvrp;
+
+ err = netdev_upper_dev_link(real_dev, dev);
+ if (err)
+ goto out_unregister_netdev;
/* Account for reference in struct vlan_dev_priv */
dev_hold(real_dev);
@@ -191,8 +191,8 @@ int register_vlan_dev(struct net_device *dev)
return 0;
-out_upper_dev_unlink:
- netdev_upper_dev_unlink(real_dev, dev);
+out_unregister_netdev:
+ unregister_netdevice(dev);
out_uninit_mvrp:
if (grp->nr_vlan_devs == 0)
vlan_mvrp_uninit_applicant(real_dev);
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 3091297..c7e634a 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -171,7 +171,7 @@ static size_t vlan_get_size(const struct net_device *dev)
return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */
nla_total_size(2) + /* IFLA_VLAN_ID */
- sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */
+ nla_total_size(sizeof(struct ifla_vlan_flags)) + /* IFLA_VLAN_FLAGS */
vlan_qos_map_size(vlan->nr_ingress_mappings) +
vlan_qos_map_size(vlan->nr_egress_mappings);
}
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 489bb36..8ddbfe6 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -38,4 +38,3 @@ batman-adv-y += soft-interface.o
batman-adv-y += sysfs.o
batman-adv-y += translation-table.o
batman-adv-y += unicast.o
-batman-adv-y += vis.o
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 0a8a80c..97b42d3 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -135,9 +135,8 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
batadv_ogm_packet->header.version = BATADV_COMPAT_VERSION;
batadv_ogm_packet->header.ttl = 2;
batadv_ogm_packet->flags = BATADV_NO_FLAGS;
+ batadv_ogm_packet->reserved = 0;
batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE;
- batadv_ogm_packet->tt_num_changes = 0;
- batadv_ogm_packet->ttvn = 0;
res = 0;
@@ -207,12 +206,12 @@ static uint8_t batadv_hop_penalty(uint8_t tq,
/* is there another aggregated packet here? */
static int batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len,
- int tt_num_changes)
+ __be16 tvlv_len)
{
int next_buff_pos = 0;
next_buff_pos += buff_pos + BATADV_OGM_HLEN;
- next_buff_pos += batadv_tt_len(tt_num_changes);
+ next_buff_pos += ntohs(tvlv_len);
return (next_buff_pos <= packet_len) &&
(next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES);
@@ -240,7 +239,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
/* adjust all flags and log packets */
while (batadv_iv_ogm_aggr_packet(buff_pos, forw_packet->packet_len,
- batadv_ogm_packet->tt_num_changes)) {
+ batadv_ogm_packet->tvlv_len)) {
/* we might have aggregated direct link packets with an
* ordinary base packet
*/
@@ -256,18 +255,18 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
fwd_str = "Sending own";
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "%s %spacket (originator %pM, seqno %u, TQ %d, TTL %d, IDF %s, ttvn %d) on interface %s [%pM]\n",
+ "%s %spacket (originator %pM, seqno %u, TQ %d, TTL %d, IDF %s) on interface %s [%pM]\n",
fwd_str, (packet_num > 0 ? "aggregated " : ""),
batadv_ogm_packet->orig,
ntohl(batadv_ogm_packet->seqno),
batadv_ogm_packet->tq, batadv_ogm_packet->header.ttl,
(batadv_ogm_packet->flags & BATADV_DIRECTLINK ?
"on" : "off"),
- batadv_ogm_packet->ttvn, hard_iface->net_dev->name,
+ hard_iface->net_dev->name,
hard_iface->net_dev->dev_addr);
buff_pos += BATADV_OGM_HLEN;
- buff_pos += batadv_tt_len(batadv_ogm_packet->tt_num_changes);
+ buff_pos += ntohs(batadv_ogm_packet->tvlv_len);
packet_num++;
packet_pos = forw_packet->skb->data + buff_pos;
batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos;
@@ -601,7 +600,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node,
struct batadv_hard_iface *if_incoming)
{
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
- uint8_t tt_num_changes;
+ uint16_t tvlv_len;
if (batadv_ogm_packet->header.ttl <= 1) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n");
@@ -621,7 +620,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node,
return;
}
- tt_num_changes = batadv_ogm_packet->tt_num_changes;
+ tvlv_len = ntohs(batadv_ogm_packet->tvlv_len);
batadv_ogm_packet->header.ttl--;
memcpy(batadv_ogm_packet->prev_sender, ethhdr->h_source, ETH_ALEN);
@@ -642,7 +641,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node,
batadv_ogm_packet->flags &= ~BATADV_DIRECTLINK;
batadv_iv_ogm_queue_add(bat_priv, (unsigned char *)batadv_ogm_packet,
- BATADV_OGM_HLEN + batadv_tt_len(tt_num_changes),
+ BATADV_OGM_HLEN + tvlv_len,
if_incoming, 0, batadv_iv_ogm_fwd_send_time());
}
@@ -688,43 +687,29 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
struct batadv_ogm_packet *batadv_ogm_packet;
struct batadv_hard_iface *primary_if;
int *ogm_buff_len = &hard_iface->bat_iv.ogm_buff_len;
- int vis_server, tt_num_changes = 0;
uint32_t seqno;
- uint8_t bandwidth;
+ uint16_t tvlv_len = 0;
- vis_server = atomic_read(&bat_priv->vis_mode);
primary_if = batadv_primary_if_get_selected(bat_priv);
- if (hard_iface == primary_if)
- tt_num_changes = batadv_tt_append_diff(bat_priv, ogm_buff,
- ogm_buff_len,
- BATADV_OGM_HLEN);
+ if (hard_iface == primary_if) {
+ /* tt changes have to be committed before the tvlv data is
+ * appended as it may alter the tt tvlv container
+ */
+ batadv_tt_local_commit_changes(bat_priv);
+ tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff,
+ ogm_buff_len,
+ BATADV_OGM_HLEN);
+ }
batadv_ogm_packet = (struct batadv_ogm_packet *)(*ogm_buff);
+ batadv_ogm_packet->tvlv_len = htons(tvlv_len);
/* change sequence number to network order */
seqno = (uint32_t)atomic_read(&hard_iface->bat_iv.ogm_seqno);
batadv_ogm_packet->seqno = htonl(seqno);
atomic_inc(&hard_iface->bat_iv.ogm_seqno);
- batadv_ogm_packet->ttvn = atomic_read(&bat_priv->tt.vn);
- batadv_ogm_packet->tt_crc = htons(bat_priv->tt.local_crc);
- if (tt_num_changes >= 0)
- batadv_ogm_packet->tt_num_changes = tt_num_changes;
-
- if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC)
- batadv_ogm_packet->flags |= BATADV_VIS_SERVER;
- else
- batadv_ogm_packet->flags &= ~BATADV_VIS_SERVER;
-
- if (hard_iface == primary_if &&
- atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_SERVER) {
- bandwidth = (uint8_t)atomic_read(&bat_priv->gw_bandwidth);
- batadv_ogm_packet->gw_flags = bandwidth;
- } else {
- batadv_ogm_packet->gw_flags = BATADV_NO_FLAGS;
- }
-
batadv_iv_ogm_slide_own_bcast_window(hard_iface);
batadv_iv_ogm_queue_add(bat_priv, hard_iface->bat_iv.ogm_buff,
hard_iface->bat_iv.ogm_buff_len, hard_iface, 1,
@@ -798,7 +783,6 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
rcu_read_unlock();
- orig_node->flags = batadv_ogm_packet->flags;
neigh_node->last_seen = jiffies;
spin_lock_bh(&neigh_node->lq_update_lock);
@@ -820,11 +804,11 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
*/
router = batadv_orig_node_get_router(orig_node);
if (router == neigh_node)
- goto update_tt;
+ goto out;
/* if this neighbor does not offer a better TQ we won't consider it */
if (router && (router->tq_avg > neigh_node->tq_avg))
- goto update_tt;
+ goto out;
/* if the TQ is the same and the link not more symmetric we
* won't consider it either
@@ -843,35 +827,10 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
if (sum_orig >= sum_neigh)
- goto update_tt;
+ goto out;
}
batadv_update_route(bat_priv, orig_node, neigh_node);
-
-update_tt:
- /* I have to check for transtable changes only if the OGM has been
- * sent through a primary interface
- */
- if (((batadv_ogm_packet->orig != ethhdr->h_source) &&
- (batadv_ogm_packet->header.ttl > 2)) ||
- (batadv_ogm_packet->flags & BATADV_PRIMARIES_FIRST_HOP))
- batadv_tt_update_orig(bat_priv, orig_node, tt_buff,
- batadv_ogm_packet->tt_num_changes,
- batadv_ogm_packet->ttvn,
- ntohs(batadv_ogm_packet->tt_crc));
-
- if (orig_node->gw_flags != batadv_ogm_packet->gw_flags)
- batadv_gw_node_update(bat_priv, orig_node,
- batadv_ogm_packet->gw_flags);
-
- orig_node->gw_flags = batadv_ogm_packet->gw_flags;
-
- /* restart gateway selection if fast or late switching was enabled */
- if ((orig_node->gw_flags) &&
- (atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_CLIENT) &&
- (atomic_read(&bat_priv->gw_sel_class) > 2))
- batadv_gw_check_election(bat_priv, orig_node);
-
goto out;
unlock:
@@ -1122,13 +1081,11 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
is_single_hop_neigh = true;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, ttvn %u, crc %#.4x, changes %u, tq %d, TTL %d, V %d, IDF %d)\n",
+ "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, tq %d, TTL %d, V %d, IDF %d)\n",
ethhdr->h_source, if_incoming->net_dev->name,
if_incoming->net_dev->dev_addr, batadv_ogm_packet->orig,
batadv_ogm_packet->prev_sender,
- ntohl(batadv_ogm_packet->seqno), batadv_ogm_packet->ttvn,
- ntohs(batadv_ogm_packet->tt_crc),
- batadv_ogm_packet->tt_num_changes, batadv_ogm_packet->tq,
+ ntohl(batadv_ogm_packet->seqno), batadv_ogm_packet->tq,
batadv_ogm_packet->header.ttl,
batadv_ogm_packet->header.version, has_directlink_flag);
@@ -1254,6 +1211,8 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
goto out;
}
+ batadv_tvlv_ogm_receive(bat_priv, batadv_ogm_packet, orig_node);
+
/* if sender is a direct neighbor the sender mac equals
* originator mac
*/
@@ -1350,9 +1309,9 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
struct batadv_ogm_packet *batadv_ogm_packet;
struct ethhdr *ethhdr;
int buff_pos = 0, packet_len;
- unsigned char *tt_buff, *packet_buff;
- bool ret;
+ unsigned char *tvlv_buff, *packet_buff;
uint8_t *packet_pos;
+ bool ret;
ret = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN);
if (!ret)
@@ -1375,14 +1334,14 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
/* unpack the aggregated packets and process them one by one */
while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len,
- batadv_ogm_packet->tt_num_changes)) {
- tt_buff = packet_buff + buff_pos + BATADV_OGM_HLEN;
+ batadv_ogm_packet->tvlv_len)) {
+ tvlv_buff = packet_buff + buff_pos + BATADV_OGM_HLEN;
- batadv_iv_ogm_process(ethhdr, batadv_ogm_packet, tt_buff,
- if_incoming);
+ batadv_iv_ogm_process(ethhdr, batadv_ogm_packet,
+ tvlv_buff, if_incoming);
buff_pos += BATADV_OGM_HLEN;
- buff_pos += batadv_tt_len(batadv_ogm_packet->tt_num_changes);
+ buff_pos += ntohs(batadv_ogm_packet->tvlv_len);
packet_pos = packet_buff + buff_pos;
batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos;
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index f186a55..049a7a2 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -28,7 +28,6 @@
#include "gateway_common.h"
#include "gateway_client.h"
#include "soft-interface.h"
-#include "vis.h"
#include "icmp_socket.h"
#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
@@ -300,12 +299,6 @@ static int batadv_transtable_local_open(struct inode *inode, struct file *file)
return single_open(file, batadv_tt_local_seq_print_text, net_dev);
}
-static int batadv_vis_data_open(struct inode *inode, struct file *file)
-{
- struct net_device *net_dev = (struct net_device *)inode->i_private;
- return single_open(file, batadv_vis_seq_print_text, net_dev);
-}
-
struct batadv_debuginfo {
struct attribute attr;
const struct file_operations fops;
@@ -356,7 +349,6 @@ static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open);
#endif
static BATADV_DEBUGINFO(transtable_local, S_IRUGO,
batadv_transtable_local_open);
-static BATADV_DEBUGINFO(vis_data, S_IRUGO, batadv_vis_data_open);
#ifdef CONFIG_BATMAN_ADV_NC
static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open);
#endif
@@ -373,7 +365,6 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
&batadv_debuginfo_dat_cache,
#endif
&batadv_debuginfo_transtable_local,
- &batadv_debuginfo_vis_data,
#ifdef CONFIG_BATMAN_ADV_NC
&batadv_debuginfo_nc_nodes,
#endif
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 06345d4..f07ec32 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -419,6 +419,10 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
bool ret = false;
int j;
+ /* check if orig node candidate is running DAT */
+ if (!(candidate->capabilities & BATADV_ORIG_CAPA_HAS_DAT))
+ goto out;
+
/* Check if this node has already been selected... */
for (j = 0; j < select; j++)
if (res[j].orig_node == candidate)
@@ -626,6 +630,59 @@ out:
}
/**
+ * batadv_dat_tvlv_container_update - update the dat tvlv container after dat
+ * setting change
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_dat_tvlv_container_update(struct batadv_priv *bat_priv)
+{
+ char dat_mode;
+
+ dat_mode = atomic_read(&bat_priv->distributed_arp_table);
+
+ switch (dat_mode) {
+ case 0:
+ batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_DAT, 1);
+ break;
+ case 1:
+ batadv_tvlv_container_register(bat_priv, BATADV_TVLV_DAT, 1,
+ NULL, 0);
+ break;
+ }
+}
+
+/**
+ * batadv_dat_status_update - update the dat tvlv container after dat
+ * setting change
+ * @net_dev: the soft interface net device
+ */
+void batadv_dat_status_update(struct net_device *net_dev)
+{
+ struct batadv_priv *bat_priv = netdev_priv(net_dev);
+ batadv_dat_tvlv_container_update(bat_priv);
+}
+
+/**
+ * batadv_gw_tvlv_ogm_handler_v1 - process incoming dat tvlv container
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node of the ogm
+ * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
+ * @tvlv_value: tvlv buffer containing the gateway data
+ * @tvlv_value_len: tvlv buffer length
+ */
+static void batadv_dat_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ uint8_t flags,
+ void *tvlv_value,
+ uint16_t tvlv_value_len)
+{
+ if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND)
+ orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_DAT;
+ else
+ orig->capabilities |= BATADV_ORIG_CAPA_HAS_DAT;
+}
+
+/**
* batadv_dat_hash_free - free the local DAT hash table
* @bat_priv: the bat priv with all the soft interface information
*/
@@ -657,6 +714,10 @@ int batadv_dat_init(struct batadv_priv *bat_priv)
batadv_dat_start_timer(bat_priv);
+ batadv_tvlv_handler_register(bat_priv, batadv_dat_tvlv_ogm_handler_v1,
+ NULL, BATADV_TVLV_DAT, 1,
+ BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
+ batadv_dat_tvlv_container_update(bat_priv);
return 0;
}
@@ -666,6 +727,9 @@ int batadv_dat_init(struct batadv_priv *bat_priv)
*/
void batadv_dat_free(struct batadv_priv *bat_priv)
{
+ batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_DAT, 1);
+ batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_DAT, 1);
+
cancel_delayed_work_sync(&bat_priv->dat.work);
batadv_dat_hash_free(bat_priv);
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 125c8c6..60d853b 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -29,6 +29,7 @@
#define BATADV_DAT_ADDR_MAX ((batadv_dat_addr_t)~(batadv_dat_addr_t)0)
+void batadv_dat_status_update(struct net_device *net_dev);
bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
struct sk_buff *skb);
bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
@@ -98,6 +99,10 @@ static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv,
#else
+static inline void batadv_dat_status_update(struct net_device *net_dev)
+{
+}
+
static inline bool
batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
struct sk_buff *skb)
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 1ce4b87..1bce63aa 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -118,7 +118,6 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
uint32_t max_gw_factor = 0, tmp_gw_factor = 0;
uint32_t gw_divisor;
uint8_t max_tq = 0;
- int down, up;
uint8_t tq_avg;
struct batadv_orig_node *orig_node;
@@ -142,10 +141,9 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
switch (atomic_read(&bat_priv->gw_sel_class)) {
case 1: /* fast connection */
- batadv_gw_bandwidth_to_kbit(orig_node->gw_flags,
- &down, &up);
-
- tmp_gw_factor = tq_avg * tq_avg * down * 100 * 100;
+ tmp_gw_factor = tq_avg * tq_avg;
+ tmp_gw_factor *= gw_node->bandwidth_down;
+ tmp_gw_factor *= 100 * 100;
tmp_gw_factor /= gw_divisor;
if ((tmp_gw_factor > max_gw_factor) ||
@@ -258,16 +256,22 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
NULL);
} else if ((!curr_gw) && (next_gw)) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Adding route to gateway %pM (gw_flags: %i, tq: %i)\n",
+ "Adding route to gateway %pM (bandwidth: %u.%u/%u.%u MBit, tq: %i)\n",
next_gw->orig_node->orig,
- next_gw->orig_node->gw_flags, router->tq_avg);
+ next_gw->bandwidth_down / 10,
+ next_gw->bandwidth_down % 10,
+ next_gw->bandwidth_up / 10,
+ next_gw->bandwidth_up % 10, router->tq_avg);
batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_ADD,
gw_addr);
} else {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Changing route to gateway %pM (gw_flags: %i, tq: %i)\n",
+ "Changing route to gateway %pM (bandwidth: %u.%u/%u.%u MBit, tq: %i)\n",
next_gw->orig_node->orig,
- next_gw->orig_node->gw_flags, router->tq_avg);
+ next_gw->bandwidth_down / 10,
+ next_gw->bandwidth_down % 10,
+ next_gw->bandwidth_up / 10,
+ next_gw->bandwidth_up % 10, router->tq_avg);
batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_CHANGE,
gw_addr);
}
@@ -337,12 +341,20 @@ out:
return;
}
+/**
+ * batadv_gw_node_add - add gateway node to list of available gateways
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: originator announcing gateway capabilities
+ * @gateway: announced bandwidth information
+ */
static void batadv_gw_node_add(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- uint8_t new_gwflags)
+ struct batadv_tvlv_gateway_data *gateway)
{
struct batadv_gw_node *gw_node;
- int down, up;
+
+ if (gateway->bandwidth_down == 0)
+ return;
gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC);
if (!gw_node)
@@ -356,73 +368,116 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list);
spin_unlock_bh(&bat_priv->gw.list_lock);
- batadv_gw_bandwidth_to_kbit(new_gwflags, &down, &up);
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Found new gateway %pM -> gw_class: %i - %i%s/%i%s\n",
- orig_node->orig, new_gwflags,
- (down > 2048 ? down / 1024 : down),
- (down > 2048 ? "MBit" : "KBit"),
- (up > 2048 ? up / 1024 : up),
- (up > 2048 ? "MBit" : "KBit"));
+ "Found new gateway %pM -> gw bandwidth: %u.%u/%u.%u MBit\n",
+ orig_node->orig,
+ ntohl(gateway->bandwidth_down) / 10,
+ ntohl(gateway->bandwidth_down) % 10,
+ ntohl(gateway->bandwidth_up) / 10,
+ ntohl(gateway->bandwidth_up) % 10);
}
-void batadv_gw_node_update(struct batadv_priv *bat_priv,
- struct batadv_orig_node *orig_node,
- uint8_t new_gwflags)
+/**
+ * batadv_gw_node_get - retrieve gateway node from list of available gateways
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: originator announcing gateway capabilities
+ *
+ * Returns gateway node if found or NULL otherwise.
+ */
+static struct batadv_gw_node *
+batadv_gw_node_get(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node)
{
- struct batadv_gw_node *gw_node, *curr_gw;
-
- /* Note: We don't need a NULL check here, since curr_gw never gets
- * dereferenced. If curr_gw is NULL we also should not exit as we may
- * have this gateway in our list (duplication check!) even though we
- * have no currently selected gateway.
- */
- curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+ struct batadv_gw_node *gw_node_tmp, *gw_node = NULL;
rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
- if (gw_node->orig_node != orig_node)
+ hlist_for_each_entry_rcu(gw_node_tmp, &bat_priv->gw.list, list) {
+ if (gw_node_tmp->orig_node != orig_node)
continue;
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Gateway class of originator %pM changed from %i to %i\n",
- orig_node->orig, gw_node->orig_node->gw_flags,
- new_gwflags);
+ if (gw_node_tmp->deleted)
+ continue;
- gw_node->deleted = 0;
+ if (!atomic_inc_not_zero(&gw_node_tmp->refcount))
+ continue;
- if (new_gwflags == BATADV_NO_FLAGS) {
- gw_node->deleted = jiffies;
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Gateway %pM removed from gateway list\n",
- orig_node->orig);
+ gw_node = gw_node_tmp;
+ break;
+ }
+ rcu_read_unlock();
- if (gw_node == curr_gw)
- goto deselect;
- }
+ return gw_node;
+}
- goto unlock;
+/**
+ * batadv_gw_node_update - update list of available gateways with changed
+ * bandwidth information
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: originator announcing gateway capabilities
+ * @gateway: announced bandwidth information
+ */
+void batadv_gw_node_update(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
+ struct batadv_tvlv_gateway_data *gateway)
+{
+ struct batadv_gw_node *gw_node, *curr_gw = NULL;
+
+ gw_node = batadv_gw_node_get(bat_priv, orig_node);
+ if (!gw_node) {
+ batadv_gw_node_add(bat_priv, orig_node, gateway);
+ goto out;
}
- if (new_gwflags == BATADV_NO_FLAGS)
- goto unlock;
+ if ((gw_node->bandwidth_down == ntohl(gateway->bandwidth_down)) &&
+ (gw_node->bandwidth_up == ntohl(gateway->bandwidth_up)))
+ goto out;
- batadv_gw_node_add(bat_priv, orig_node, new_gwflags);
- goto unlock;
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+ "Gateway bandwidth of originator %pM changed from %u.%u/%u.%u MBit to %u.%u/%u.%u MBit\n",
+ orig_node->orig,
+ gw_node->bandwidth_down / 10,
+ gw_node->bandwidth_down % 10,
+ gw_node->bandwidth_up / 10,
+ gw_node->bandwidth_up % 10,
+ ntohl(gateway->bandwidth_down) / 10,
+ ntohl(gateway->bandwidth_down) % 10,
+ ntohl(gateway->bandwidth_up) / 10,
+ ntohl(gateway->bandwidth_up) % 10);
+
+ gw_node->bandwidth_down = ntohl(gateway->bandwidth_down);
+ gw_node->bandwidth_up = ntohl(gateway->bandwidth_up);
+
+ gw_node->deleted = 0;
+ if (ntohl(gateway->bandwidth_down) == 0) {
+ gw_node->deleted = jiffies;
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+ "Gateway %pM removed from gateway list\n",
+ orig_node->orig);
-deselect:
- batadv_gw_deselect(bat_priv);
-unlock:
- rcu_read_unlock();
+ /* Note: We don't need a NULL check here, since curr_gw never
+ * gets dereferenced.
+ */
+ curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+ if (gw_node == curr_gw)
+ batadv_gw_deselect(bat_priv);
+ }
+out:
if (curr_gw)
batadv_gw_node_free_ref(curr_gw);
+ if (gw_node)
+ batadv_gw_node_free_ref(gw_node);
}
void batadv_gw_node_delete(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node)
{
- batadv_gw_node_update(bat_priv, orig_node, 0);
+ struct batadv_tvlv_gateway_data gateway;
+
+ gateway.bandwidth_down = 0;
+ gateway.bandwidth_up = 0;
+
+ batadv_gw_node_update(bat_priv, orig_node, &gateway);
}
void batadv_gw_node_purge(struct batadv_priv *bat_priv)
@@ -467,9 +522,7 @@ static int batadv_write_buffer_text(struct batadv_priv *bat_priv,
{
struct batadv_gw_node *curr_gw;
struct batadv_neigh_node *router;
- int down, up, ret = -1;
-
- batadv_gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags, &down, &up);
+ int ret = -1;
router = batadv_orig_node_get_router(gw_node->orig_node);
if (!router)
@@ -477,16 +530,15 @@ static int batadv_write_buffer_text(struct batadv_priv *bat_priv,
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
- ret = seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %3i - %i%s/%i%s\n",
+ ret = seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n",
(curr_gw == gw_node ? "=>" : " "),
gw_node->orig_node->orig,
router->tq_avg, router->addr,
router->if_incoming->net_dev->name,
- gw_node->orig_node->gw_flags,
- (down > 2048 ? down / 1024 : down),
- (down > 2048 ? "MBit" : "KBit"),
- (up > 2048 ? up / 1024 : up),
- (up > 2048 ? "MBit" : "KBit"));
+ gw_node->bandwidth_down / 10,
+ gw_node->bandwidth_down % 10,
+ gw_node->bandwidth_up / 10,
+ gw_node->bandwidth_up % 10);
batadv_neigh_node_free_ref(router);
if (curr_gw)
@@ -508,7 +560,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
goto out;
seq_printf(seq,
- " %-12s (%s/%i) %17s [%10s]: gw_class ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
+ " %-12s (%s/%i) %17s [%10s]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
"Gateway", "#", BATADV_TQ_MAX_VALUE, "Nexthop", "outgoingIF",
BATADV_SOURCE_VERSION, primary_if->net_dev->name,
primary_if->net_dev->dev_addr, net_dev->name);
@@ -675,7 +727,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
{
struct batadv_neigh_node *neigh_curr = NULL, *neigh_old = NULL;
struct batadv_orig_node *orig_dst_node = NULL;
- struct batadv_gw_node *curr_gw = NULL;
+ struct batadv_gw_node *gw_node = NULL, *curr_gw = NULL;
struct ethhdr *ethhdr;
bool ret, out_of_range = false;
unsigned int header_len = 0;
@@ -691,7 +743,8 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
if (!orig_dst_node)
goto out;
- if (!orig_dst_node->gw_flags)
+ gw_node = batadv_gw_node_get(bat_priv, orig_dst_node);
+ if (!gw_node->bandwidth_down == 0)
goto out;
ret = batadv_is_type_dhcprequest(skb, header_len);
@@ -742,6 +795,8 @@ out:
batadv_orig_node_free_ref(orig_dst_node);
if (curr_gw)
batadv_gw_node_free_ref(curr_gw);
+ if (gw_node)
+ batadv_gw_node_free_ref(gw_node);
if (neigh_old)
batadv_neigh_node_free_ref(neigh_old);
if (neigh_curr)
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index ceef4eb..d95c2d2 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -29,7 +29,7 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node);
void batadv_gw_node_update(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- uint8_t new_gwflags);
+ struct batadv_tvlv_gateway_data *gateway);
void batadv_gw_node_delete(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node);
void batadv_gw_node_purge(struct batadv_priv *bat_priv);
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 84bb2b1..b211b0f 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -21,64 +21,23 @@
#include "gateway_common.h"
#include "gateway_client.h"
-/* calculates the gateway class from kbit */
-static void batadv_kbit_to_gw_bandwidth(int down, int up, long *gw_srv_class)
-{
- int mdown = 0, tdown, tup, difference;
- uint8_t sbit, part;
-
- *gw_srv_class = 0;
- difference = 0x0FFFFFFF;
-
- /* test all downspeeds */
- for (sbit = 0; sbit < 2; sbit++) {
- for (part = 0; part < 16; part++) {
- tdown = 32 * (sbit + 2) * (1 << part);
-
- if (abs(tdown - down) < difference) {
- *gw_srv_class = (sbit << 7) + (part << 3);
- difference = abs(tdown - down);
- mdown = tdown;
- }
- }
- }
-
- /* test all upspeeds */
- difference = 0x0FFFFFFF;
-
- for (part = 0; part < 8; part++) {
- tup = ((part + 1) * (mdown)) / 8;
-
- if (abs(tup - up) < difference) {
- *gw_srv_class = (*gw_srv_class & 0xF8) | part;
- difference = abs(tup - up);
- }
- }
-}
-
-/* returns the up and downspeeds in kbit, calculated from the class */
-void batadv_gw_bandwidth_to_kbit(uint8_t gw_srv_class, int *down, int *up)
-{
- int sbit = (gw_srv_class & 0x80) >> 7;
- int dpart = (gw_srv_class & 0x78) >> 3;
- int upart = (gw_srv_class & 0x07);
-
- if (!gw_srv_class) {
- *down = 0;
- *up = 0;
- return;
- }
-
- *down = 32 * (sbit + 2) * (1 << dpart);
- *up = ((upart + 1) * (*down)) / 8;
-}
-
+/**
+ * batadv_parse_gw_bandwidth - parse supplied string buffer to extract download
+ * and upload bandwidth information
+ * @net_dev: the soft interface net device
+ * @buff: string buffer to parse
+ * @down: pointer holding the returned download bandwidth information
+ * @up: pointer holding the returned upload bandwidth information
+ *
+ * Returns false on parse error and true otherwise.
+ */
static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
- int *up, int *down)
+ uint32_t *down, uint32_t *up)
{
- int ret, multi = 1;
+ enum batadv_bandwidth_units bw_unit_type = BATADV_BW_UNIT_KBIT;
char *slash_ptr, *tmp_ptr;
long ldown, lup;
+ int ret;
slash_ptr = strchr(buff, '/');
if (slash_ptr)
@@ -88,10 +47,10 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
tmp_ptr = buff + strlen(buff) - 4;
if (strnicmp(tmp_ptr, "mbit", 4) == 0)
- multi = 1024;
+ bw_unit_type = BATADV_BW_UNIT_MBIT;
if ((strnicmp(tmp_ptr, "kbit", 4) == 0) ||
- (multi > 1))
+ (bw_unit_type == BATADV_BW_UNIT_MBIT))
*tmp_ptr = '\0';
}
@@ -103,20 +62,28 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
return false;
}
- *down = ldown * multi;
+ switch (bw_unit_type) {
+ case BATADV_BW_UNIT_MBIT:
+ *down = ldown * 10;
+ break;
+ case BATADV_BW_UNIT_KBIT:
+ default:
+ *down = ldown / 100;
+ break;
+ }
/* we also got some upload info */
if (slash_ptr) {
- multi = 1;
+ bw_unit_type = BATADV_BW_UNIT_KBIT;
if (strlen(slash_ptr + 1) > 4) {
tmp_ptr = slash_ptr + 1 - 4 + strlen(slash_ptr + 1);
if (strnicmp(tmp_ptr, "mbit", 4) == 0)
- multi = 1024;
+ bw_unit_type = BATADV_BW_UNIT_MBIT;
if ((strnicmp(tmp_ptr, "kbit", 4) == 0) ||
- (multi > 1))
+ (bw_unit_type == BATADV_BW_UNIT_MBIT))
*tmp_ptr = '\0';
}
@@ -128,52 +95,149 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
return false;
}
- *up = lup * multi;
+ switch (bw_unit_type) {
+ case BATADV_BW_UNIT_MBIT:
+ *up = lup * 10;
+ break;
+ case BATADV_BW_UNIT_KBIT:
+ default:
+ *up = lup / 100;
+ break;
+ }
}
return true;
}
+/**
+ * batadv_gw_tvlv_container_update - update the gw tvlv container after gateway
+ * setting change
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv)
+{
+ struct batadv_tvlv_gateway_data gw;
+ uint32_t down, up;
+ char gw_mode;
+
+ gw_mode = atomic_read(&bat_priv->gw_mode);
+
+ switch (gw_mode) {
+ case BATADV_GW_MODE_OFF:
+ case BATADV_GW_MODE_CLIENT:
+ batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_GW, 1);
+ break;
+ case BATADV_GW_MODE_SERVER:
+ down = atomic_read(&bat_priv->gw.bandwidth_down);
+ up = atomic_read(&bat_priv->gw.bandwidth_up);
+ gw.bandwidth_down = htonl(down);
+ gw.bandwidth_up = htonl(up);
+ batadv_tvlv_container_register(bat_priv, BATADV_TVLV_GW, 1,
+ &gw, sizeof(gw));
+ break;
+ }
+}
+
ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
size_t count)
{
struct batadv_priv *bat_priv = netdev_priv(net_dev);
- long gw_bandwidth_tmp = 0;
- int up = 0, down = 0;
+ uint32_t down_curr, up_curr, down_new = 0, up_new = 0;
bool ret;
- ret = batadv_parse_gw_bandwidth(net_dev, buff, &up, &down);
+ down_curr = (unsigned int)atomic_read(&bat_priv->gw.bandwidth_down);
+ up_curr = (unsigned int)atomic_read(&bat_priv->gw.bandwidth_up);
+
+ ret = batadv_parse_gw_bandwidth(net_dev, buff, &down_new, &up_new);
if (!ret)
goto end;
- if ((!down) || (down < 256))
- down = 2000;
-
- if (!up)
- up = down / 5;
+ if (!down_new)
+ down_new = 1;
- batadv_kbit_to_gw_bandwidth(down, up, &gw_bandwidth_tmp);
+ if (!up_new)
+ up_new = down_new / 5;
- /* the gw bandwidth we guessed above might not match the given
- * speeds, hence we need to calculate it back to show the number
- * that is going to be propagated
- */
- batadv_gw_bandwidth_to_kbit((uint8_t)gw_bandwidth_tmp, &down, &up);
+ if (!up_new)
+ up_new = 1;
- if (atomic_read(&bat_priv->gw_bandwidth) == gw_bandwidth_tmp)
+ if ((down_curr == down_new) && (up_curr == up_new))
return count;
batadv_gw_deselect(bat_priv);
batadv_info(net_dev,
- "Changing gateway bandwidth from: '%i' to: '%ld' (propagating: %d%s/%d%s)\n",
- atomic_read(&bat_priv->gw_bandwidth), gw_bandwidth_tmp,
- (down > 2048 ? down / 1024 : down),
- (down > 2048 ? "MBit" : "KBit"),
- (up > 2048 ? up / 1024 : up),
- (up > 2048 ? "MBit" : "KBit"));
+ "Changing gateway bandwidth from: '%u.%u/%u.%u MBit' to: '%u.%u/%u.%u MBit'\n",
+ down_curr / 10, down_curr % 10, up_curr / 10, up_curr % 10,
+ down_new / 10, down_new % 10, up_new / 10, up_new % 10);
- atomic_set(&bat_priv->gw_bandwidth, gw_bandwidth_tmp);
+ atomic_set(&bat_priv->gw.bandwidth_down, down_new);
+ atomic_set(&bat_priv->gw.bandwidth_up, up_new);
+ batadv_gw_tvlv_container_update(bat_priv);
end:
return count;
}
+
+/**
+ * batadv_gw_tvlv_ogm_handler_v1 - process incoming gateway tvlv container
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node of the ogm
+ * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
+ * @tvlv_value: tvlv buffer containing the gateway data
+ * @tvlv_value_len: tvlv buffer length
+ */
+static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ uint8_t flags,
+ void *tvlv_value,
+ uint16_t tvlv_value_len)
+{
+ struct batadv_tvlv_gateway_data gateway, *gateway_ptr;
+
+ /* only fetch the tvlv value if the handler wasn't called via the
+ * CIFNOTFND flag and if there is data to fetch
+ */
+ if ((flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) ||
+ (tvlv_value_len < sizeof(gateway))) {
+ gateway.bandwidth_down = 0;
+ gateway.bandwidth_up = 0;
+ } else {
+ gateway_ptr = tvlv_value;
+ gateway.bandwidth_down = gateway_ptr->bandwidth_down;
+ gateway.bandwidth_up = gateway_ptr->bandwidth_up;
+ if ((gateway.bandwidth_down == 0) ||
+ (gateway.bandwidth_up == 0)) {
+ gateway.bandwidth_down = 0;
+ gateway.bandwidth_up = 0;
+ }
+ }
+
+ batadv_gw_node_update(bat_priv, orig, &gateway);
+
+ /* restart gateway selection if fast or late switching was enabled */
+ if ((gateway.bandwidth_down != 0) &&
+ (atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_CLIENT) &&
+ (atomic_read(&bat_priv->gw_sel_class) > 2))
+ batadv_gw_check_election(bat_priv, orig);
+}
+
+/**
+ * batadv_gw_init - initialise the gateway handling internals
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+void batadv_gw_init(struct batadv_priv *bat_priv)
+{
+ batadv_tvlv_handler_register(bat_priv, batadv_gw_tvlv_ogm_handler_v1,
+ NULL, BATADV_TVLV_GW, 1,
+ BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
+}
+
+/**
+ * batadv_gw_free - free the gateway handling internals
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+void batadv_gw_free(struct batadv_priv *bat_priv)
+{
+ batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_GW, 1);
+ batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_GW, 1);
+}
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 509b2bf..56384a4c 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -26,12 +26,24 @@ enum batadv_gw_modes {
BATADV_GW_MODE_SERVER,
};
+/**
+ * enum batadv_bandwidth_units - bandwidth unit types
+ * @BATADV_BW_UNIT_KBIT: unit type kbit
+ * @BATADV_BW_UNIT_MBIT: unit type mbit
+ */
+enum batadv_bandwidth_units {
+ BATADV_BW_UNIT_KBIT,
+ BATADV_BW_UNIT_MBIT,
+};
+
#define BATADV_GW_MODE_OFF_NAME "off"
#define BATADV_GW_MODE_CLIENT_NAME "client"
#define BATADV_GW_MODE_SERVER_NAME "server"
-void batadv_gw_bandwidth_to_kbit(uint8_t gw_class, int *down, int *up);
ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
size_t count);
+void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv);
+void batadv_gw_init(struct batadv_priv *bat_priv);
+void batadv_gw_free(struct batadv_priv *bat_priv);
#endif /* _NET_BATMAN_ADV_GATEWAY_COMMON_H_ */
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index c478e6b..eeb6671 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -194,22 +194,13 @@ out:
static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv,
struct batadv_hard_iface *oldif)
{
- struct batadv_vis_packet *vis_packet;
struct batadv_hard_iface *primary_if;
- struct sk_buff *skb;
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
goto out;
batadv_dat_init_own_addr(bat_priv, primary_if);
-
- skb = bat_priv->vis.my_info->skb_packet;
- vis_packet = (struct batadv_vis_packet *)skb->data;
- memcpy(vis_packet->vis_orig, primary_if->net_dev->dev_addr, ETH_ALEN);
- memcpy(vis_packet->sender_orig,
- primary_if->net_dev->dev_addr, ETH_ALEN);
-
batadv_bla_update_orig_address(bat_priv, primary_if, oldif);
out:
if (primary_if)
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index c72d1bc..8b195e6 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -36,7 +36,8 @@
#include "gateway_client.h"
#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
-#include "vis.h"
+#include "unicast.h"
+#include "gateway_common.h"
#include "hash.h"
#include "bat_algo.h"
#include "network-coding.h"
@@ -65,6 +66,7 @@ static int __init batadv_init(void)
batadv_recv_handler_init();
batadv_iv_init();
+ batadv_nc_init();
batadv_event_workqueue = create_singlethread_workqueue("bat_events");
@@ -109,8 +111,8 @@ int batadv_mesh_init(struct net_device *soft_iface)
spin_lock_init(&bat_priv->tt.roam_list_lock);
spin_lock_init(&bat_priv->tt.last_changeset_lock);
spin_lock_init(&bat_priv->gw.list_lock);
- spin_lock_init(&bat_priv->vis.hash_lock);
- spin_lock_init(&bat_priv->vis.list_lock);
+ spin_lock_init(&bat_priv->tvlv.container_list_lock);
+ spin_lock_init(&bat_priv->tvlv.handler_list_lock);
INIT_HLIST_HEAD(&bat_priv->forw_bat_list);
INIT_HLIST_HEAD(&bat_priv->forw_bcast_list);
@@ -118,6 +120,8 @@ int batadv_mesh_init(struct net_device *soft_iface)
INIT_LIST_HEAD(&bat_priv->tt.changes_list);
INIT_LIST_HEAD(&bat_priv->tt.req_list);
INIT_LIST_HEAD(&bat_priv->tt.roam_list);
+ INIT_HLIST_HEAD(&bat_priv->tvlv.container_list);
+ INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list);
ret = batadv_originator_init(bat_priv);
if (ret < 0)
@@ -130,10 +134,6 @@ int batadv_mesh_init(struct net_device *soft_iface)
batadv_tt_local_add(soft_iface, soft_iface->dev_addr,
BATADV_NULL_IFINDEX);
- ret = batadv_vis_init(bat_priv);
- if (ret < 0)
- goto err;
-
ret = batadv_bla_init(bat_priv);
if (ret < 0)
goto err;
@@ -142,10 +142,12 @@ int batadv_mesh_init(struct net_device *soft_iface)
if (ret < 0)
goto err;
- ret = batadv_nc_init(bat_priv);
+ ret = batadv_nc_mesh_init(bat_priv);
if (ret < 0)
goto err;
+ batadv_gw_init(bat_priv);
+
atomic_set(&bat_priv->gw.reselect, 0);
atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE);
@@ -164,10 +166,8 @@ void batadv_mesh_free(struct net_device *soft_iface)
batadv_purge_outstanding_packets(bat_priv, NULL);
- batadv_vis_quit(bat_priv);
-
batadv_gw_node_purge(bat_priv);
- batadv_nc_free(bat_priv);
+ batadv_nc_mesh_free(bat_priv);
batadv_dat_free(bat_priv);
batadv_bla_free(bat_priv);
@@ -184,6 +184,8 @@ void batadv_mesh_free(struct net_device *soft_iface)
*/
batadv_originator_free(bat_priv);
+ batadv_gw_free(bat_priv);
+
free_percpu(bat_priv->bat_counters);
bat_priv->bat_counters = NULL;
@@ -391,22 +393,31 @@ static void batadv_recv_handler_init(void)
for (i = 0; i < ARRAY_SIZE(batadv_rx_handler); i++)
batadv_rx_handler[i] = batadv_recv_unhandled_packet;
- /* batman icmp packet */
- batadv_rx_handler[BATADV_ICMP] = batadv_recv_icmp_packet;
+ for (i = BATADV_UNICAST_MIN; i <= BATADV_UNICAST_MAX; i++)
+ batadv_rx_handler[i] = batadv_recv_unhandled_unicast_packet;
+
+ /* compile time checks for struct member offsets */
+ BUILD_BUG_ON(offsetof(struct batadv_unicast_4addr_packet, src) != 10);
+ BUILD_BUG_ON(offsetof(struct batadv_unicast_packet, dest) != 4);
+ BUILD_BUG_ON(offsetof(struct batadv_unicast_frag_packet, dest) != 4);
+ BUILD_BUG_ON(offsetof(struct batadv_unicast_tvlv_packet, dst) != 4);
+ BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, dst) != 4);
+ BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, dst) != 4);
+
+ /* broadcast packet */
+ batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet;
+
+ /* unicast packets ... */
/* unicast with 4 addresses packet */
batadv_rx_handler[BATADV_UNICAST_4ADDR] = batadv_recv_unicast_packet;
/* unicast packet */
batadv_rx_handler[BATADV_UNICAST] = batadv_recv_unicast_packet;
/* fragmented unicast packet */
batadv_rx_handler[BATADV_UNICAST_FRAG] = batadv_recv_ucast_frag_packet;
- /* broadcast packet */
- batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet;
- /* vis packet */
- batadv_rx_handler[BATADV_VIS] = batadv_recv_vis_packet;
- /* Translation table query (request or response) */
- batadv_rx_handler[BATADV_TT_QUERY] = batadv_recv_tt_query;
- /* Roaming advertisement */
- batadv_rx_handler[BATADV_ROAM_ADV] = batadv_recv_roam_adv;
+ /* unicast tvlv packet */
+ batadv_rx_handler[BATADV_UNICAST_TVLV] = batadv_recv_unicast_tvlv;
+ /* batman icmp packet */
+ batadv_rx_handler[BATADV_ICMP] = batadv_recv_icmp_packet;
}
int
@@ -414,7 +425,12 @@ batadv_recv_handler_register(uint8_t packet_type,
int (*recv_handler)(struct sk_buff *,
struct batadv_hard_iface *))
{
- if (batadv_rx_handler[packet_type] != &batadv_recv_unhandled_packet)
+ int (*curr)(struct sk_buff *,
+ struct batadv_hard_iface *);
+ curr = batadv_rx_handler[packet_type];
+
+ if ((curr != batadv_recv_unhandled_packet) &&
+ (curr != batadv_recv_unhandled_unicast_packet))
return -EBUSY;
batadv_rx_handler[packet_type] = recv_handler;
@@ -535,6 +551,574 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr)
return htonl(crc);
}
+/**
+ * batadv_tvlv_handler_free_ref - decrement the tvlv handler refcounter and
+ * possibly free it
+ * @tvlv_handler: the tvlv handler to free
+ */
+static void
+batadv_tvlv_handler_free_ref(struct batadv_tvlv_handler *tvlv_handler)
+{
+ if (atomic_dec_and_test(&tvlv_handler->refcount))
+ kfree_rcu(tvlv_handler, rcu);
+}
+
+/**
+ * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list
+ * based on the provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv handler type to look for
+ * @version: tvlv handler version to look for
+ *
+ * Returns tvlv handler if found or NULL otherwise.
+ */
+static struct batadv_tvlv_handler
+*batadv_tvlv_handler_get(struct batadv_priv *bat_priv,
+ uint8_t type, uint8_t version)
+{
+ struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tvlv_handler_tmp,
+ &bat_priv->tvlv.handler_list, list) {
+ if (tvlv_handler_tmp->type != type)
+ continue;
+
+ if (tvlv_handler_tmp->version != version)
+ continue;
+
+ if (!atomic_inc_not_zero(&tvlv_handler_tmp->refcount))
+ continue;
+
+ tvlv_handler = tvlv_handler_tmp;
+ break;
+ }
+ rcu_read_unlock();
+
+ return tvlv_handler;
+}
+
+/**
+ * batadv_tvlv_container_free_ref - decrement the tvlv container refcounter and
+ * possibly free it
+ * @tvlv_handler: the tvlv container to free
+ */
+static void batadv_tvlv_container_free_ref(struct batadv_tvlv_container *tvlv)
+{
+ if (atomic_dec_and_test(&tvlv->refcount))
+ kfree(tvlv);
+}
+
+/**
+ * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container
+ * list based on the provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv container type to look for
+ * @version: tvlv container version to look for
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (tvlv.container_list_lock).
+ *
+ * Returns tvlv container if found or NULL otherwise.
+ */
+static struct batadv_tvlv_container
+*batadv_tvlv_container_get(struct batadv_priv *bat_priv,
+ uint8_t type, uint8_t version)
+{
+ struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL;
+
+ hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) {
+ if (tvlv_tmp->tvlv_hdr.type != type)
+ continue;
+
+ if (tvlv_tmp->tvlv_hdr.version != version)
+ continue;
+
+ if (!atomic_inc_not_zero(&tvlv_tmp->refcount))
+ continue;
+
+ tvlv = tvlv_tmp;
+ break;
+ }
+
+ return tvlv;
+}
+
+/**
+ * batadv_tvlv_container_list_size - calculate the size of the tvlv container
+ * list entries
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (tvlv.container_list_lock).
+ *
+ * Returns size of all currently registered tvlv containers in bytes.
+ */
+static uint16_t batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
+{
+ struct batadv_tvlv_container *tvlv;
+ uint16_t tvlv_len = 0;
+
+ hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
+ tvlv_len += sizeof(struct batadv_tvlv_hdr);
+ tvlv_len += ntohs(tvlv->tvlv_hdr.len);
+ }
+
+ return tvlv_len;
+}
+
+/**
+ * batadv_tvlv_container_remove - remove tvlv container from the tvlv container
+ * list
+ * @tvlv: the to be removed tvlv container
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (tvlv.container_list_lock).
+ */
+static void batadv_tvlv_container_remove(struct batadv_tvlv_container *tvlv)
+{
+ if (!tvlv)
+ return;
+
+ hlist_del(&tvlv->list);
+
+ /* first call to decrement the counter, second call to free */
+ batadv_tvlv_container_free_ref(tvlv);
+ batadv_tvlv_container_free_ref(tvlv);
+}
+
+/**
+ * batadv_tvlv_container_unregister - unregister tvlv container based on the
+ * provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv container type to unregister
+ * @version: tvlv container type to unregister
+ */
+void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
+ uint8_t type, uint8_t version)
+{
+ struct batadv_tvlv_container *tvlv;
+
+ spin_lock_bh(&bat_priv->tvlv.container_list_lock);
+ tvlv = batadv_tvlv_container_get(bat_priv, type, version);
+ batadv_tvlv_container_remove(tvlv);
+ spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+}
+
+/**
+ * batadv_tvlv_container_register - register tvlv type, version and content
+ * to be propagated with each (primary interface) OGM
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv container type
+ * @version: tvlv container version
+ * @tvlv_value: tvlv container content
+ * @tvlv_value_len: tvlv container content length
+ *
+ * If a container of the same type and version was already registered the new
+ * content is going to replace the old one.
+ */
+void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
+ uint8_t type, uint8_t version,
+ void *tvlv_value, uint16_t tvlv_value_len)
+{
+ struct batadv_tvlv_container *tvlv_old, *tvlv_new;
+
+ if (!tvlv_value)
+ tvlv_value_len = 0;
+
+ tvlv_new = kzalloc(sizeof(*tvlv_new) + tvlv_value_len, GFP_ATOMIC);
+ if (!tvlv_new)
+ return;
+
+ tvlv_new->tvlv_hdr.version = version;
+ tvlv_new->tvlv_hdr.type = type;
+ tvlv_new->tvlv_hdr.len = htons(tvlv_value_len);
+
+ memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len));
+ INIT_HLIST_NODE(&tvlv_new->list);
+ atomic_set(&tvlv_new->refcount, 1);
+
+ spin_lock_bh(&bat_priv->tvlv.container_list_lock);
+ tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
+ batadv_tvlv_container_remove(tvlv_old);
+ hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list);
+ spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+}
+
+/**
+ * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accomodate
+ * requested packet size
+ * @packet_buff: packet buffer
+ * @packet_buff_len: packet buffer size
+ * @packet_min_len: requested packet minimum size
+ * @additional_packet_len: requested additional packet size on top of minimum
+ * size
+ *
+ * Returns true of the packet buffer could be changed to the requested size,
+ * false otherwise.
+ */
+static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
+ int *packet_buff_len,
+ int min_packet_len,
+ int additional_packet_len)
+{
+ unsigned char *new_buff;
+
+ new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC);
+
+ /* keep old buffer if kmalloc should fail */
+ if (new_buff) {
+ memcpy(new_buff, *packet_buff, min_packet_len);
+ kfree(*packet_buff);
+ *packet_buff = new_buff;
+ *packet_buff_len = min_packet_len + additional_packet_len;
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * batadv_tvlv_container_ogm_append - append tvlv container content to given
+ * OGM packet buffer
+ * @bat_priv: the bat priv with all the soft interface information
+ * @packet_buff: ogm packet buffer
+ * @packet_buff_len: ogm packet buffer size including ogm header and tvlv
+ * content
+ * @packet_min_len: ogm header size to be preserved for the OGM itself
+ *
+ * The ogm packet might be enlarged or shrunk depending on the current size
+ * and the size of the to-be-appended tvlv containers.
+ *
+ * Returns size of all appended tvlv containers in bytes.
+ */
+uint16_t batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
+ unsigned char **packet_buff,
+ int *packet_buff_len,
+ int packet_min_len)
+{
+ struct batadv_tvlv_container *tvlv;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ uint16_t tvlv_value_len;
+ void *tvlv_value;
+ bool ret;
+
+ spin_lock_bh(&bat_priv->tvlv.container_list_lock);
+ tvlv_value_len = batadv_tvlv_container_list_size(bat_priv);
+
+ ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len,
+ packet_min_len, tvlv_value_len);
+
+ if (!ret)
+ goto end;
+
+ if (!tvlv_value_len)
+ goto end;
+
+ tvlv_value = (*packet_buff) + packet_min_len;
+
+ hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
+ tvlv_hdr = tvlv_value;
+ tvlv_hdr->type = tvlv->tvlv_hdr.type;
+ tvlv_hdr->version = tvlv->tvlv_hdr.version;
+ tvlv_hdr->len = tvlv->tvlv_hdr.len;
+ tvlv_value = tvlv_hdr + 1;
+ memcpy(tvlv_value, tvlv + 1, ntohs(tvlv->tvlv_hdr.len));
+ tvlv_value = (uint8_t *)tvlv_value + ntohs(tvlv->tvlv_hdr.len);
+ }
+
+end:
+ spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+ return tvlv_value_len;
+}
+
+/**
+ * batadv_tvlv_call_handler - parse the given tvlv buffer to call the
+ * appropriate handlers
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tvlv_handler: tvlv callback function handling the tvlv content
+ * @ogm_source: flag indicating wether the tvlv is an ogm or a unicast packet
+ * @orig_node: orig node emitting the ogm packet
+ * @src: source mac address of the unicast packet
+ * @dst: destination mac address of the unicast packet
+ * @tvlv_value: tvlv content
+ * @tvlv_value_len: tvlv content length
+ *
+ * Returns success if handler was not found or the return value of the handler
+ * callback.
+ */
+static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
+ struct batadv_tvlv_handler *tvlv_handler,
+ bool ogm_source,
+ struct batadv_orig_node *orig_node,
+ uint8_t *src, uint8_t *dst,
+ void *tvlv_value, uint16_t tvlv_value_len)
+{
+ if (!tvlv_handler)
+ return NET_RX_SUCCESS;
+
+ if (ogm_source) {
+ if (!tvlv_handler->ogm_handler)
+ return NET_RX_SUCCESS;
+
+ if (!orig_node)
+ return NET_RX_SUCCESS;
+
+ tvlv_handler->ogm_handler(bat_priv, orig_node,
+ BATADV_NO_FLAGS,
+ tvlv_value, tvlv_value_len);
+ tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED;
+ } else {
+ if (!src)
+ return NET_RX_SUCCESS;
+
+ if (!dst)
+ return NET_RX_SUCCESS;
+
+ if (!tvlv_handler->unicast_handler)
+ return NET_RX_SUCCESS;
+
+ return tvlv_handler->unicast_handler(bat_priv, src,
+ dst, tvlv_value,
+ tvlv_value_len);
+ }
+
+ return NET_RX_SUCCESS;
+}
+
+/**
+ * batadv_tvlv_containers_process - parse the given tvlv buffer to call the
+ * appropriate handlers
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ogm_source: flag indicating wether the tvlv is an ogm or a unicast packet
+ * @orig_node: orig node emitting the ogm packet
+ * @src: source mac address of the unicast packet
+ * @dst: destination mac address of the unicast packet
+ * @tvlv_value: tvlv content
+ * @tvlv_value_len: tvlv content length
+ *
+ * Returns success when processing an OGM or the return value of all called
+ * handler callbacks.
+ */
+int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
+ bool ogm_source,
+ struct batadv_orig_node *orig_node,
+ uint8_t *src, uint8_t *dst,
+ void *tvlv_value, uint16_t tvlv_value_len)
+{
+ struct batadv_tvlv_handler *tvlv_handler;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ uint16_t tvlv_value_cont_len;
+ uint8_t cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND;
+ int ret = NET_RX_SUCCESS;
+
+ while (tvlv_value_len >= sizeof(*tvlv_hdr)) {
+ tvlv_hdr = tvlv_value;
+ tvlv_value_cont_len = ntohs(tvlv_hdr->len);
+ tvlv_value = tvlv_hdr + 1;
+ tvlv_value_len -= sizeof(*tvlv_hdr);
+
+ if (tvlv_value_cont_len > tvlv_value_len)
+ break;
+
+ tvlv_handler = batadv_tvlv_handler_get(bat_priv,
+ tvlv_hdr->type,
+ tvlv_hdr->version);
+
+ ret |= batadv_tvlv_call_handler(bat_priv, tvlv_handler,
+ ogm_source, orig_node,
+ src, dst, tvlv_value,
+ tvlv_value_cont_len);
+ if (tvlv_handler)
+ batadv_tvlv_handler_free_ref(tvlv_handler);
+ tvlv_value = (uint8_t *)tvlv_value + tvlv_value_cont_len;
+ tvlv_value_len -= tvlv_value_cont_len;
+ }
+
+ if (!ogm_source)
+ return ret;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tvlv_handler,
+ &bat_priv->tvlv.handler_list, list) {
+ if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) &&
+ !(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED))
+ tvlv_handler->ogm_handler(bat_priv, orig_node,
+ cifnotfound, NULL, 0);
+
+ tvlv_handler->flags &= ~BATADV_TVLV_HANDLER_OGM_CALLED;
+ }
+ rcu_read_unlock();
+
+ return NET_RX_SUCCESS;
+}
+
+/**
+ * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate
+ * handlers
+ * @bat_priv: the bat priv with all the soft interface information
+ * @batadv_ogm_packet: ogm packet containing the tvlv containers
+ * @orig_node: orig node emitting the ogm packet
+ */
+void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
+ struct batadv_ogm_packet *batadv_ogm_packet,
+ struct batadv_orig_node *orig_node)
+{
+ void *tvlv_value;
+ uint16_t tvlv_value_len;
+
+ if (!batadv_ogm_packet)
+ return;
+
+ tvlv_value_len = ntohs(batadv_ogm_packet->tvlv_len);
+ if (!tvlv_value_len)
+ return;
+
+ tvlv_value = batadv_ogm_packet + 1;
+
+ batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL,
+ tvlv_value, tvlv_value_len);
+}
+
+/**
+ * batadv_tvlv_handler_register - register tvlv handler based on the provided
+ * type and version (both need to match) for ogm tvlv payload and/or unicast
+ * payload
+ * @bat_priv: the bat priv with all the soft interface information
+ * @optr: ogm tvlv handler callback function. This function receives the orig
+ * node, flags and the tvlv content as argument to process.
+ * @uptr: unicast tvlv handler callback function. This function receives the
+ * source & destination of the unicast packet as well as the tvlv content
+ * to process.
+ * @type: tvlv handler type to be registered
+ * @version: tvlv handler version to be registered
+ * @flags: flags to enable or disable TVLV API behavior
+ */
+void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
+ void (*optr)(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ uint8_t flags,
+ void *tvlv_value,
+ uint16_t tvlv_value_len),
+ int (*uptr)(struct batadv_priv *bat_priv,
+ uint8_t *src, uint8_t *dst,
+ void *tvlv_value,
+ uint16_t tvlv_value_len),
+ uint8_t type, uint8_t version, uint8_t flags)
+{
+ struct batadv_tvlv_handler *tvlv_handler;
+
+ tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
+ if (tvlv_handler) {
+ batadv_tvlv_handler_free_ref(tvlv_handler);
+ return;
+ }
+
+ tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC);
+ if (!tvlv_handler)
+ return;
+
+ tvlv_handler->ogm_handler = optr;
+ tvlv_handler->unicast_handler = uptr;
+ tvlv_handler->type = type;
+ tvlv_handler->version = version;
+ tvlv_handler->flags = flags;
+ atomic_set(&tvlv_handler->refcount, 1);
+ INIT_HLIST_NODE(&tvlv_handler->list);
+
+ spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+ hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list);
+ spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
+}
+
+/**
+ * batadv_tvlv_handler_unregister - unregister tvlv handler based on the
+ * provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv handler type to be unregistered
+ * @version: tvlv handler version to be unregistered
+ */
+void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
+ uint8_t type, uint8_t version)
+{
+ struct batadv_tvlv_handler *tvlv_handler;
+
+ tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
+ if (!tvlv_handler)
+ return;
+
+ batadv_tvlv_handler_free_ref(tvlv_handler);
+ spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+ hlist_del_rcu(&tvlv_handler->list);
+ spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
+ batadv_tvlv_handler_free_ref(tvlv_handler);
+}
+
+/**
+ * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the
+ * specified host
+ * @bat_priv: the bat priv with all the soft interface information
+ * @src: source mac address of the unicast packet
+ * @dst: destination mac address of the unicast packet
+ * @type: tvlv type
+ * @version: tvlv version
+ * @tvlv_value: tvlv content
+ * @tvlv_value_len: tvlv content length
+ */
+void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src,
+ uint8_t *dst, uint8_t type, uint8_t version,
+ void *tvlv_value, uint16_t tvlv_value_len)
+{
+ struct batadv_unicast_tvlv_packet *unicast_tvlv_packet;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ struct batadv_orig_node *orig_node;
+ struct sk_buff *skb = NULL;
+ unsigned char *tvlv_buff;
+ unsigned int tvlv_len;
+ ssize_t hdr_len = sizeof(*unicast_tvlv_packet);
+ bool ret = false;
+
+ orig_node = batadv_orig_hash_find(bat_priv, dst);
+ if (!orig_node)
+ goto out;
+
+ tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len;
+
+ skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + hdr_len + tvlv_len);
+ if (!skb)
+ goto out;
+
+ skb->priority = TC_PRIO_CONTROL;
+ skb_reserve(skb, ETH_HLEN);
+ tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len);
+ unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff;
+ unicast_tvlv_packet->header.packet_type = BATADV_UNICAST_TVLV;
+ unicast_tvlv_packet->header.version = BATADV_COMPAT_VERSION;
+ unicast_tvlv_packet->header.ttl = BATADV_TTL;
+ unicast_tvlv_packet->reserved = 0;
+ unicast_tvlv_packet->tvlv_len = htons(tvlv_len);
+ unicast_tvlv_packet->align = 0;
+ memcpy(unicast_tvlv_packet->src, src, ETH_ALEN);
+ memcpy(unicast_tvlv_packet->dst, dst, ETH_ALEN);
+
+ tvlv_buff = (unsigned char *)(unicast_tvlv_packet + 1);
+ tvlv_hdr = (struct batadv_tvlv_hdr *)tvlv_buff;
+ tvlv_hdr->version = version;
+ tvlv_hdr->type = type;
+ tvlv_hdr->len = htons(tvlv_value_len);
+ tvlv_buff += sizeof(*tvlv_hdr);
+ memcpy(tvlv_buff, tvlv_value, tvlv_value_len);
+
+ if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
+ ret = true;
+
+out:
+ if (skb && !ret)
+ kfree_skb(skb);
+ if (orig_node)
+ batadv_orig_node_free_ref(orig_node);
+}
+
static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
{
struct batadv_algo_ops *bat_algo_ops;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 2467552..e11c2ec 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -86,8 +86,6 @@
/* numbers of originator to contact for any PUT/GET DHT operation */
#define BATADV_DAT_CANDIDATES_NUM 3
-#define BATADV_VIS_INTERVAL 5000 /* 5 seconds */
-
/* how much worse secondary interfaces may be to be considered as bonding
* candidates
*/
@@ -326,4 +324,39 @@ static inline uint64_t batadv_sum_counter(struct batadv_priv *bat_priv,
*/
#define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0]))
+void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
+ uint8_t type, uint8_t version,
+ void *tvlv_value, uint16_t tvlv_value_len);
+uint16_t batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
+ unsigned char **packet_buff,
+ int *packet_buff_len,
+ int packet_min_len);
+void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
+ struct batadv_ogm_packet *batadv_ogm_packet,
+ struct batadv_orig_node *orig_node);
+void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
+ uint8_t type, uint8_t version);
+
+void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
+ void (*optr)(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ uint8_t flags,
+ void *tvlv_value,
+ uint16_t tvlv_value_len),
+ int (*uptr)(struct batadv_priv *bat_priv,
+ uint8_t *src, uint8_t *dst,
+ void *tvlv_value,
+ uint16_t tvlv_value_len),
+ uint8_t type, uint8_t version, uint8_t flags);
+void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
+ uint8_t type, uint8_t version);
+int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
+ bool ogm_source,
+ struct batadv_orig_node *orig_node,
+ uint8_t *src, uint8_t *dst,
+ void *tvlv_buff, uint16_t tvlv_buff_len);
+void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src,
+ uint8_t *dst, uint8_t type, uint8_t version,
+ void *tvlv_value, uint16_t tvlv_value_len);
+
#endif /* _NET_BATMAN_ADV_MAIN_H_ */
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index a487d46..23f611b 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -35,6 +35,20 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
/**
+ * batadv_nc_init - one-time initialization for network coding
+ */
+int __init batadv_nc_init(void)
+{
+ int ret;
+
+ /* Register our packet type */
+ ret = batadv_recv_handler_register(BATADV_CODED,
+ batadv_nc_recv_coded_packet);
+
+ return ret;
+}
+
+/**
* batadv_nc_start_timer - initialise the nc periodic worker
* @bat_priv: the bat priv with all the soft interface information
*/
@@ -45,10 +59,63 @@ static void batadv_nc_start_timer(struct batadv_priv *bat_priv)
}
/**
- * batadv_nc_init - initialise coding hash table and start house keeping
+ * batadv_nc_tvlv_container_update - update the network coding tvlv container
+ * after network coding setting change
* @bat_priv: the bat priv with all the soft interface information
*/
-int batadv_nc_init(struct batadv_priv *bat_priv)
+static void batadv_nc_tvlv_container_update(struct batadv_priv *bat_priv)
+{
+ char nc_mode;
+
+ nc_mode = atomic_read(&bat_priv->network_coding);
+
+ switch (nc_mode) {
+ case 0:
+ batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_NC, 1);
+ break;
+ case 1:
+ batadv_tvlv_container_register(bat_priv, BATADV_TVLV_NC, 1,
+ NULL, 0);
+ break;
+ }
+}
+
+/**
+ * batadv_nc_status_update - update the network coding tvlv container after
+ * network coding setting change
+ * @net_dev: the soft interface net device
+ */
+void batadv_nc_status_update(struct net_device *net_dev)
+{
+ struct batadv_priv *bat_priv = netdev_priv(net_dev);
+ batadv_nc_tvlv_container_update(bat_priv);
+}
+
+/**
+ * batadv_nc_tvlv_ogm_handler_v1 - process incoming nc tvlv container
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node of the ogm
+ * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
+ * @tvlv_value: tvlv buffer containing the gateway data
+ * @tvlv_value_len: tvlv buffer length
+ */
+static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ uint8_t flags,
+ void *tvlv_value,
+ uint16_t tvlv_value_len)
+{
+ if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND)
+ orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_NC;
+ else
+ orig->capabilities |= BATADV_ORIG_CAPA_HAS_NC;
+}
+
+/**
+ * batadv_nc_mesh_init - initialise coding hash table and start house keeping
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
{
bat_priv->nc.timestamp_fwd_flush = jiffies;
bat_priv->nc.timestamp_sniffed_purge = jiffies;
@@ -70,14 +137,13 @@ int batadv_nc_init(struct batadv_priv *bat_priv)
batadv_hash_set_lock_class(bat_priv->nc.coding_hash,
&batadv_nc_decoding_hash_lock_class_key);
- /* Register our packet type */
- if (batadv_recv_handler_register(BATADV_CODED,
- batadv_nc_recv_coded_packet) < 0)
- goto err;
-
INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker);
batadv_nc_start_timer(bat_priv);
+ batadv_tvlv_handler_register(bat_priv, batadv_nc_tvlv_ogm_handler_v1,
+ NULL, BATADV_TVLV_NC, 1,
+ BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
+ batadv_nc_tvlv_container_update(bat_priv);
return 0;
err:
@@ -793,6 +859,10 @@ void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
if (!atomic_read(&bat_priv->network_coding))
goto out;
+ /* check if orig node is network coding enabled */
+ if (!(orig_node->capabilities & BATADV_ORIG_CAPA_HAS_NC))
+ goto out;
+
/* accept ogms from 'good' neighbors and single hop neighbors */
if (!batadv_can_nc_with_orig(bat_priv, orig_node, ogm_packet) &&
!is_single_hop_neigh)
@@ -1721,12 +1791,13 @@ free_nc_packet:
}
/**
- * batadv_nc_free - clean up network coding memory
+ * batadv_nc_mesh_free - clean up network coding memory
* @bat_priv: the bat priv with all the soft interface information
*/
-void batadv_nc_free(struct batadv_priv *bat_priv)
+void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
{
- batadv_recv_handler_unregister(BATADV_CODED);
+ batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_NC, 1);
+ batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_NC, 1);
cancel_delayed_work_sync(&bat_priv->nc.work);
batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL);
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 85a4ec8..d4fd315 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -22,8 +22,10 @@
#ifdef CONFIG_BATMAN_ADV_NC
-int batadv_nc_init(struct batadv_priv *bat_priv);
-void batadv_nc_free(struct batadv_priv *bat_priv);
+void batadv_nc_status_update(struct net_device *net_dev);
+int batadv_nc_init(void);
+int batadv_nc_mesh_init(struct batadv_priv *bat_priv);
+void batadv_nc_mesh_free(struct batadv_priv *bat_priv);
void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
struct batadv_orig_node *orig_neigh_node,
@@ -46,12 +48,21 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv);
#else /* ifdef CONFIG_BATMAN_ADV_NC */
-static inline int batadv_nc_init(struct batadv_priv *bat_priv)
+static inline void batadv_nc_status_update(struct net_device *net_dev)
+{
+}
+
+static inline int batadv_nc_init(void)
+{
+ return 0;
+}
+
+static inline int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
{
return 0;
}
-static inline void batadv_nc_free(struct batadv_priv *bat_priv)
+static inline void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
{
return;
}
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index f50553a..5d53d2f 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -388,9 +388,7 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv)
hlist_for_each_entry_safe(orig_node, node_tmp,
head, hash_entry) {
if (batadv_purge_orig_node(bat_priv, orig_node)) {
- if (orig_node->gw_flags)
- batadv_gw_node_delete(bat_priv,
- orig_node);
+ batadv_gw_node_delete(bat_priv, orig_node);
hlist_del_rcu(&orig_node->hash_entry);
batadv_orig_node_free_ref(orig_node);
continue;
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index a51ccfc..4361bae 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -20,17 +20,34 @@
#ifndef _NET_BATMAN_ADV_PACKET_H_
#define _NET_BATMAN_ADV_PACKET_H_
+/**
+ * enum batadv_packettype - types for batman-adv encapsulated packets
+ * @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV
+ * @BATADV_BCAST: broadcast packets carrying broadcast payload
+ * @BATADV_CODED: network coded packets
+ *
+ * @BATADV_UNICAST: unicast packets carrying unicast payload traffic
+ * @BATADV_UNICAST_FRAG: unicast packets carrying a fragment of the original
+ * payload packet
+ * @BATADV_UNICAST_4ADDR: unicast packet including the originator address of
+ * the sender
+ * @BATADV_ICMP: unicast packet like IP ICMP used for ping or traceroute
+ * @BATADV_UNICAST_TVLV: unicast packet carrying TVLV containers
+ */
enum batadv_packettype {
- BATADV_IV_OGM = 0x01,
- BATADV_ICMP = 0x02,
- BATADV_UNICAST = 0x03,
- BATADV_BCAST = 0x04,
- BATADV_VIS = 0x05,
- BATADV_UNICAST_FRAG = 0x06,
- BATADV_TT_QUERY = 0x07,
- BATADV_ROAM_ADV = 0x08,
- BATADV_UNICAST_4ADDR = 0x09,
- BATADV_CODED = 0x0a,
+ /* 0x00 - 0x3f: local packets or special rules for handling */
+ BATADV_IV_OGM = 0x00,
+ BATADV_BCAST = 0x01,
+ BATADV_CODED = 0x02,
+ /* 0x40 - 0x7f: unicast */
+#define BATADV_UNICAST_MIN 0x40
+ BATADV_UNICAST = 0x40,
+ BATADV_UNICAST_FRAG = 0x41,
+ BATADV_UNICAST_4ADDR = 0x42,
+ BATADV_ICMP = 0x43,
+ BATADV_UNICAST_TVLV = 0x44,
+#define BATADV_UNICAST_MAX 0x7f
+ /* 0x80 - 0xff: reserved */
};
/**
@@ -48,13 +65,21 @@ enum batadv_subtype {
};
/* this file is included by batctl which needs these defines */
-#define BATADV_COMPAT_VERSION 14
+#define BATADV_COMPAT_VERSION 15
+/**
+ * enum batadv_iv_flags - flags used in B.A.T.M.A.N. IV OGM packets
+ * @BATADV_NOT_BEST_NEXT_HOP: flag is set when ogm packet is forwarded and was
+ * previously received from someone else than the best neighbor.
+ * @BATADV_PRIMARIES_FIRST_HOP: flag is set when the primary interface address
+ * is used, and the packet travels its first hop.
+ * @BATADV_DIRECTLINK: flag is for the first hop or if rebroadcasted from a
+ * one hop neighbor on the interface where it was originally received.
+ */
enum batadv_iv_flags {
- BATADV_NOT_BEST_NEXT_HOP = BIT(3),
- BATADV_PRIMARIES_FIRST_HOP = BIT(4),
- BATADV_VIS_SERVER = BIT(5),
- BATADV_DIRECTLINK = BIT(6),
+ BATADV_NOT_BEST_NEXT_HOP = BIT(0),
+ BATADV_PRIMARIES_FIRST_HOP = BIT(1),
+ BATADV_DIRECTLINK = BIT(2),
};
/* ICMP message types */
@@ -66,29 +91,27 @@ enum batadv_icmp_packettype {
BATADV_PARAMETER_PROBLEM = 12,
};
-/* vis defines */
-enum batadv_vis_packettype {
- BATADV_VIS_TYPE_SERVER_SYNC = 0,
- BATADV_VIS_TYPE_CLIENT_UPDATE = 1,
-};
-
/* fragmentation defines */
enum batadv_unicast_frag_flags {
BATADV_UNI_FRAG_HEAD = BIT(0),
BATADV_UNI_FRAG_LARGETAIL = BIT(1),
};
-/* TT_QUERY subtypes */
-#define BATADV_TT_QUERY_TYPE_MASK 0x3
+/* tt data subtypes */
+#define BATADV_TT_DATA_TYPE_MASK 0x0F
-enum batadv_tt_query_packettype {
- BATADV_TT_REQUEST = 0,
- BATADV_TT_RESPONSE = 1,
-};
-
-/* TT_QUERY flags */
-enum batadv_tt_query_flags {
- BATADV_TT_FULL_TABLE = BIT(2),
+/**
+ * enum batadv_tt_data_flags - flags for tt data tvlv
+ * @BATADV_TT_OGM_DIFF: TT diff propagated through OGM
+ * @BATADV_TT_REQUEST: TT request message
+ * @BATADV_TT_RESPONSE: TT response message
+ * @BATADV_TT_FULL_TABLE: contains full table to replace existing table
+ */
+enum batadv_tt_data_flags {
+ BATADV_TT_OGM_DIFF = BIT(0),
+ BATADV_TT_REQUEST = BIT(1),
+ BATADV_TT_RESPONSE = BIT(2),
+ BATADV_TT_FULL_TABLE = BIT(4),
};
/* BATADV_TT_CLIENT flags.
@@ -99,10 +122,10 @@ enum batadv_tt_client_flags {
BATADV_TT_CLIENT_DEL = BIT(0),
BATADV_TT_CLIENT_ROAM = BIT(1),
BATADV_TT_CLIENT_WIFI = BIT(2),
- BATADV_TT_CLIENT_TEMP = BIT(3),
BATADV_TT_CLIENT_NOPURGE = BIT(8),
BATADV_TT_CLIENT_NEW = BIT(9),
BATADV_TT_CLIENT_PENDING = BIT(10),
+ BATADV_TT_CLIENT_TEMP = BIT(11),
};
/* claim frame types for the bridge loop avoidance */
@@ -113,6 +136,22 @@ enum batadv_bla_claimframe {
BATADV_CLAIM_TYPE_REQUEST = 0x03,
};
+/**
+ * enum batadv_tvlv_type - tvlv type definitions
+ * @BATADV_TVLV_GW: gateway tvlv
+ * @BATADV_TVLV_DAT: distributed arp table tvlv
+ * @BATADV_TVLV_NC: network coding tvlv
+ * @BATADV_TVLV_TT: translation table tvlv
+ * @BATADV_TVLV_ROAM: roaming advertisement tvlv
+ */
+enum batadv_tvlv_type {
+ BATADV_TVLV_GW = 0x01,
+ BATADV_TVLV_DAT = 0x02,
+ BATADV_TVLV_NC = 0x03,
+ BATADV_TVLV_TT = 0x04,
+ BATADV_TVLV_ROAM = 0x05,
+};
+
/* the destination hardware field in the ARP frame is used to
* transport the claim type and the group id
*/
@@ -131,18 +170,25 @@ struct batadv_header {
*/
};
+/**
+ * struct batadv_ogm_packet - ogm (routing protocol) packet
+ * @header: common batman packet header
+ * @flags: contains routing relevant flags - see enum batadv_iv_flags
+ * @tvlv_len: length of tvlv data following the ogm header
+ */
struct batadv_ogm_packet {
struct batadv_header header;
- uint8_t flags; /* 0x40: DIRECTLINK flag, 0x20 VIS_SERVER flag... */
+ uint8_t flags;
__be32 seqno;
uint8_t orig[ETH_ALEN];
uint8_t prev_sender[ETH_ALEN];
- uint8_t gw_flags; /* flags related to gateway class */
+ uint8_t reserved;
uint8_t tq;
- uint8_t tt_num_changes;
- uint8_t ttvn; /* translation table version number */
- __be16 tt_crc;
-} __packed;
+ __be16 tvlv_len;
+ /* __packed is not needed as the struct size is divisible by 4,
+ * and the largest data type in this struct has a size of 4.
+ */
+};
#define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet)
@@ -231,54 +277,6 @@ struct batadv_bcast_packet {
#pragma pack()
-struct batadv_vis_packet {
- struct batadv_header header;
- uint8_t vis_type; /* which type of vis-participant sent this? */
- __be32 seqno; /* sequence number */
- uint8_t entries; /* number of entries behind this struct */
- uint8_t reserved;
- uint8_t vis_orig[ETH_ALEN]; /* originator reporting its neighbors */
- uint8_t target_orig[ETH_ALEN]; /* who should receive this packet */
- uint8_t sender_orig[ETH_ALEN]; /* who sent or forwarded this packet */
-};
-
-struct batadv_tt_query_packet {
- struct batadv_header header;
- /* the flag field is a combination of:
- * - TT_REQUEST or TT_RESPONSE
- * - TT_FULL_TABLE
- */
- uint8_t flags;
- uint8_t dst[ETH_ALEN];
- uint8_t src[ETH_ALEN];
- /* the ttvn field is:
- * if TT_REQUEST: ttvn that triggered the
- * request
- * if TT_RESPONSE: new ttvn for the src
- * orig_node
- */
- uint8_t ttvn;
- /* tt_data field is:
- * if TT_REQUEST: crc associated with the
- * ttvn
- * if TT_RESPONSE: table_size
- */
- __be16 tt_data;
-} __packed;
-
-struct batadv_roam_adv_packet {
- struct batadv_header header;
- uint8_t reserved;
- uint8_t dst[ETH_ALEN];
- uint8_t src[ETH_ALEN];
- uint8_t client[ETH_ALEN];
-} __packed;
-
-struct batadv_tt_change {
- uint8_t flags;
- uint8_t addr[ETH_ALEN];
-} __packed;
-
/**
* struct batadv_coded_packet - network coded packet
* @header: common batman packet header and ttl of first included packet
@@ -311,4 +309,82 @@ struct batadv_coded_packet {
__be16 coded_len;
};
+/**
+ * struct batadv_unicast_tvlv - generic unicast packet with tvlv payload
+ * @header: common batman packet header
+ * @reserved: reserved field (for packet alignment)
+ * @src: address of the source
+ * @dst: address of the destination
+ * @tvlv_len: length of tvlv data following the unicast tvlv header
+ * @align: 2 bytes to align the header to a 4 byte boundry
+ */
+struct batadv_unicast_tvlv_packet {
+ struct batadv_header header;
+ uint8_t reserved;
+ uint8_t dst[ETH_ALEN];
+ uint8_t src[ETH_ALEN];
+ __be16 tvlv_len;
+ uint16_t align;
+};
+
+/**
+ * struct batadv_tvlv_hdr - base tvlv header struct
+ * @type: tvlv container type (see batadv_tvlv_type)
+ * @version: tvlv container version
+ * @len: tvlv container length
+ */
+struct batadv_tvlv_hdr {
+ uint8_t type;
+ uint8_t version;
+ __be16 len;
+};
+
+/**
+ * struct batadv_tvlv_gateway_data - gateway data propagated through gw tvlv
+ * container
+ * @bandwidth_down: advertised uplink download bandwidth
+ * @bandwidth_up: advertised uplink upload bandwidth
+ */
+struct batadv_tvlv_gateway_data {
+ __be32 bandwidth_down;
+ __be32 bandwidth_up;
+};
+
+/**
+ * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container
+ * @flags: translation table flags (see batadv_tt_data_flags)
+ * @ttvn: translation table version number
+ * @reserved: field reserved for future use
+ * @crc: crc32 checksum of the local translation table
+ */
+struct batadv_tvlv_tt_data {
+ uint8_t flags;
+ uint8_t ttvn;
+ uint16_t reserved;
+ __be32 crc;
+};
+
+/**
+ * struct batadv_tvlv_tt_change - translation table diff data
+ * @flags: status indicators concerning the non-mesh client (see
+ * batadv_tt_client_flags)
+ * @reserved: reserved field
+ * @addr: mac address of non-mesh client that triggered this tt change
+ */
+struct batadv_tvlv_tt_change {
+ uint8_t flags;
+ uint8_t reserved;
+ uint8_t addr[ETH_ALEN];
+};
+
+/**
+ * struct batadv_tvlv_roam_adv - roaming advertisement
+ * @client: mac address of roaming client
+ * @reserved: field reserved for future use
+ */
+struct batadv_tvlv_roam_adv {
+ uint8_t client[ETH_ALEN];
+ uint16_t reserved;
+};
+
#endif /* _NET_BATMAN_ADV_PACKET_H_ */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 0439395..457dfef 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -25,7 +25,6 @@
#include "icmp_socket.h"
#include "translation-table.h"
#include "originator.h"
-#include "vis.h"
#include "unicast.h"
#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
@@ -557,126 +556,6 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
return 0;
}
-int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if)
-{
- struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
- struct batadv_tt_query_packet *tt_query;
- uint16_t tt_size;
- int hdr_size = sizeof(*tt_query);
- char tt_flag;
- size_t packet_size;
-
- if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
- return NET_RX_DROP;
-
- /* I could need to modify it */
- if (skb_cow(skb, sizeof(struct batadv_tt_query_packet)) < 0)
- goto out;
-
- tt_query = (struct batadv_tt_query_packet *)skb->data;
-
- switch (tt_query->flags & BATADV_TT_QUERY_TYPE_MASK) {
- case BATADV_TT_REQUEST:
- batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_RX);
-
- /* If we cannot provide an answer the tt_request is
- * forwarded
- */
- if (!batadv_send_tt_response(bat_priv, tt_query)) {
- if (tt_query->flags & BATADV_TT_FULL_TABLE)
- tt_flag = 'F';
- else
- tt_flag = '.';
-
- batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Routing TT_REQUEST to %pM [%c]\n",
- tt_query->dst,
- tt_flag);
- return batadv_route_unicast_packet(skb, recv_if);
- }
- break;
- case BATADV_TT_RESPONSE:
- batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_RX);
-
- if (batadv_is_my_mac(bat_priv, tt_query->dst)) {
- /* packet needs to be linearized to access the TT
- * changes
- */
- if (skb_linearize(skb) < 0)
- goto out;
- /* skb_linearize() possibly changed skb->data */
- tt_query = (struct batadv_tt_query_packet *)skb->data;
-
- tt_size = batadv_tt_len(ntohs(tt_query->tt_data));
-
- /* Ensure we have all the claimed data */
- packet_size = sizeof(struct batadv_tt_query_packet);
- packet_size += tt_size;
- if (unlikely(skb_headlen(skb) < packet_size))
- goto out;
-
- batadv_handle_tt_response(bat_priv, tt_query);
- } else {
- if (tt_query->flags & BATADV_TT_FULL_TABLE)
- tt_flag = 'F';
- else
- tt_flag = '.';
- batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Routing TT_RESPONSE to %pM [%c]\n",
- tt_query->dst,
- tt_flag);
- return batadv_route_unicast_packet(skb, recv_if);
- }
- break;
- }
-
-out:
- /* returning NET_RX_DROP will make the caller function kfree the skb */
- return NET_RX_DROP;
-}
-
-int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if)
-{
- struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
- struct batadv_roam_adv_packet *roam_adv_packet;
- struct batadv_orig_node *orig_node;
-
- if (batadv_check_unicast_packet(bat_priv, skb,
- sizeof(*roam_adv_packet)) < 0)
- goto out;
-
- batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX);
-
- roam_adv_packet = (struct batadv_roam_adv_packet *)skb->data;
-
- if (!batadv_is_my_mac(bat_priv, roam_adv_packet->dst))
- return batadv_route_unicast_packet(skb, recv_if);
-
- /* check if it is a backbone gateway. we don't accept
- * roaming advertisement from it, as it has the same
- * entries as we have.
- */
- if (batadv_bla_is_backbone_gw_orig(bat_priv, roam_adv_packet->src))
- goto out;
-
- orig_node = batadv_orig_hash_find(bat_priv, roam_adv_packet->src);
- if (!orig_node)
- goto out;
-
- batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Received ROAMING_ADV from %pM (client %pM)\n",
- roam_adv_packet->src, roam_adv_packet->client);
-
- batadv_tt_global_add(bat_priv, orig_node, roam_adv_packet->client,
- BATADV_TT_CLIENT_ROAM,
- atomic_read(&orig_node->last_ttvn) + 1);
-
- batadv_orig_node_free_ref(orig_node);
-out:
- /* returning NET_RX_DROP will make the caller function kfree the skb */
- return NET_RX_DROP;
-}
-
/* find a suitable router for this originator, and use
* bonding if possible. increases the found neighbors
* refcount.
@@ -1032,6 +911,34 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
return 1;
}
+/**
+ * batadv_recv_unhandled_unicast_packet - receive and process packets which
+ * are in the unicast number space but not yet known to the implementation
+ * @skb: unicast tvlv packet to process
+ * @recv_if: pointer to interface this packet was received on
+ *
+ * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
+ * otherwise.
+ */
+int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if)
+{
+ struct batadv_unicast_packet *unicast_packet;
+ struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+ int check, hdr_size = sizeof(*unicast_packet);
+
+ check = batadv_check_unicast_packet(bat_priv, skb, hdr_size);
+ if (check < 0)
+ return NET_RX_DROP;
+
+ /* we don't know about this type, drop it. */
+ unicast_packet = (struct batadv_unicast_packet *)skb->data;
+ if (batadv_is_my_mac(bat_priv, unicast_packet->dest))
+ return NET_RX_DROP;
+
+ return batadv_route_unicast_packet(skb, recv_if);
+}
+
int batadv_recv_unicast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if)
{
@@ -1139,6 +1046,54 @@ rx_success:
return batadv_route_unicast_packet(skb, recv_if);
}
+/**
+ * batadv_recv_unicast_tvlv - receive and process unicast tvlv packets
+ * @skb: unicast tvlv packet to process
+ * @recv_if: pointer to interface this packet was received on
+ * @dst_addr: the payload destination
+ *
+ * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
+ * otherwise.
+ */
+int batadv_recv_unicast_tvlv(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if)
+{
+ struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+ struct batadv_unicast_tvlv_packet *unicast_tvlv_packet;
+ unsigned char *tvlv_buff;
+ uint16_t tvlv_buff_len;
+ int hdr_size = sizeof(*unicast_tvlv_packet);
+ int ret = NET_RX_DROP;
+
+ if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
+ return NET_RX_DROP;
+
+ /* the header is likely to be modified while forwarding */
+ if (skb_cow(skb, hdr_size) < 0)
+ return NET_RX_DROP;
+
+ /* packet needs to be linearized to access the tvlv content */
+ if (skb_linearize(skb) < 0)
+ return NET_RX_DROP;
+
+ unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)skb->data;
+
+ tvlv_buff = (unsigned char *)(skb->data + hdr_size);
+ tvlv_buff_len = ntohs(unicast_tvlv_packet->tvlv_len);
+
+ if (tvlv_buff_len > skb->len - hdr_size)
+ return NET_RX_DROP;
+
+ ret = batadv_tvlv_containers_process(bat_priv, false, NULL,
+ unicast_tvlv_packet->src,
+ unicast_tvlv_packet->dst,
+ tvlv_buff, tvlv_buff_len);
+
+ if (ret != NET_RX_SUCCESS)
+ ret = batadv_route_unicast_packet(skb, recv_if);
+
+ return ret;
+}
int batadv_recv_bcast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if)
@@ -1240,53 +1195,3 @@ out:
batadv_orig_node_free_ref(orig_node);
return ret;
}
-
-int batadv_recv_vis_packet(struct sk_buff *skb,
- struct batadv_hard_iface *recv_if)
-{
- struct batadv_vis_packet *vis_packet;
- struct ethhdr *ethhdr;
- struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
- int hdr_size = sizeof(*vis_packet);
-
- /* keep skb linear */
- if (skb_linearize(skb) < 0)
- return NET_RX_DROP;
-
- if (unlikely(!pskb_may_pull(skb, hdr_size)))
- return NET_RX_DROP;
-
- vis_packet = (struct batadv_vis_packet *)skb->data;
- ethhdr = eth_hdr(skb);
-
- /* not for me */
- if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest))
- return NET_RX_DROP;
-
- /* ignore own packets */
- if (batadv_is_my_mac(bat_priv, vis_packet->vis_orig))
- return NET_RX_DROP;
-
- if (batadv_is_my_mac(bat_priv, vis_packet->sender_orig))
- return NET_RX_DROP;
-
- switch (vis_packet->vis_type) {
- case BATADV_VIS_TYPE_SERVER_SYNC:
- batadv_receive_server_sync_packet(bat_priv, vis_packet,
- skb_headlen(skb));
- break;
-
- case BATADV_VIS_TYPE_CLIENT_UPDATE:
- batadv_receive_client_update_packet(bat_priv, vis_packet,
- skb_headlen(skb));
- break;
-
- default: /* ignore unknown packet */
- break;
- }
-
- /* We take a copy of the data in the packet, so we should
- * always free the skbuf.
- */
- return NET_RX_DROP;
-}
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 72a29bd..ea15fa6 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -34,12 +34,14 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
int batadv_recv_bcast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
-int batadv_recv_vis_packet(struct sk_buff *skb,
- struct batadv_hard_iface *recv_if);
int batadv_recv_tt_query(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
int batadv_recv_roam_adv(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
+int batadv_recv_unicast_tvlv(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if);
+int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if);
struct batadv_neigh_node *
batadv_find_router(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 0266edd..81d69fb 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -24,7 +24,6 @@
#include "translation-table.h"
#include "soft-interface.h"
#include "hard-interface.h"
-#include "vis.h"
#include "gateway_common.h"
#include "originator.h"
#include "network-coding.h"
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 813db4e..25e6004 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -469,10 +469,10 @@ static int batadv_softif_init_late(struct net_device *dev)
atomic_set(&bat_priv->distributed_arp_table, 1);
#endif
atomic_set(&bat_priv->ap_isolation, 0);
- atomic_set(&bat_priv->vis_mode, BATADV_VIS_TYPE_CLIENT_UPDATE);
atomic_set(&bat_priv->gw_mode, BATADV_GW_MODE_OFF);
atomic_set(&bat_priv->gw_sel_class, 20);
- atomic_set(&bat_priv->gw_bandwidth, 41);
+ atomic_set(&bat_priv->gw.bandwidth_down, 100);
+ atomic_set(&bat_priv->gw.bandwidth_up, 20);
atomic_set(&bat_priv->orig_interval, 1000);
atomic_set(&bat_priv->hop_penalty, 30);
#ifdef CONFIG_BATMAN_ADV_DEBUG
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 4114b96..869eb46 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -21,11 +21,11 @@
#include "sysfs.h"
#include "translation-table.h"
#include "distributed-arp-table.h"
+#include "network-coding.h"
#include "originator.h"
#include "hard-interface.h"
#include "gateway_common.h"
#include "gateway_client.h"
-#include "vis.h"
static struct net_device *batadv_kobj_to_netdev(struct kobject *obj)
{
@@ -230,74 +230,6 @@ __batadv_store_uint_attr(const char *buff, size_t count,
return ret;
}
-static ssize_t batadv_show_vis_mode(struct kobject *kobj,
- struct attribute *attr, char *buff)
-{
- struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
- int vis_mode = atomic_read(&bat_priv->vis_mode);
- const char *mode;
-
- if (vis_mode == BATADV_VIS_TYPE_CLIENT_UPDATE)
- mode = "client";
- else
- mode = "server";
-
- return sprintf(buff, "%s\n", mode);
-}
-
-static ssize_t batadv_store_vis_mode(struct kobject *kobj,
- struct attribute *attr, char *buff,
- size_t count)
-{
- struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
- struct batadv_priv *bat_priv = netdev_priv(net_dev);
- unsigned long val;
- int ret, vis_mode_tmp = -1;
- const char *old_mode, *new_mode;
-
- ret = kstrtoul(buff, 10, &val);
-
- if (((count == 2) && (!ret) &&
- (val == BATADV_VIS_TYPE_CLIENT_UPDATE)) ||
- (strncmp(buff, "client", 6) == 0) ||
- (strncmp(buff, "off", 3) == 0))
- vis_mode_tmp = BATADV_VIS_TYPE_CLIENT_UPDATE;
-
- if (((count == 2) && (!ret) &&
- (val == BATADV_VIS_TYPE_SERVER_SYNC)) ||
- (strncmp(buff, "server", 6) == 0))
- vis_mode_tmp = BATADV_VIS_TYPE_SERVER_SYNC;
-
- if (vis_mode_tmp < 0) {
- if (buff[count - 1] == '\n')
- buff[count - 1] = '\0';
-
- batadv_info(net_dev,
- "Invalid parameter for 'vis mode' setting received: %s\n",
- buff);
- return -EINVAL;
- }
-
- if (atomic_read(&bat_priv->vis_mode) == vis_mode_tmp)
- return count;
-
- if (atomic_read(&bat_priv->vis_mode) == BATADV_VIS_TYPE_CLIENT_UPDATE)
- old_mode = "client";
- else
- old_mode = "server";
-
- if (vis_mode_tmp == BATADV_VIS_TYPE_CLIENT_UPDATE)
- new_mode = "client";
- else
- new_mode = "server";
-
- batadv_info(net_dev, "Changing vis mode from: %s to: %s\n", old_mode,
- new_mode);
-
- atomic_set(&bat_priv->vis_mode, (unsigned int)vis_mode_tmp);
- return count;
-}
-
static ssize_t batadv_show_bat_algo(struct kobject *kobj,
struct attribute *attr, char *buff)
{
@@ -390,6 +322,7 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
*/
batadv_gw_check_client_stop(bat_priv);
atomic_set(&bat_priv->gw_mode, (unsigned int)gw_mode_tmp);
+ batadv_gw_tvlv_container_update(bat_priv);
return count;
}
@@ -397,15 +330,13 @@ static ssize_t batadv_show_gw_bwidth(struct kobject *kobj,
struct attribute *attr, char *buff)
{
struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
- int down, up;
- int gw_bandwidth = atomic_read(&bat_priv->gw_bandwidth);
-
- batadv_gw_bandwidth_to_kbit(gw_bandwidth, &down, &up);
- return sprintf(buff, "%i%s/%i%s\n",
- (down > 2048 ? down / 1024 : down),
- (down > 2048 ? "MBit" : "KBit"),
- (up > 2048 ? up / 1024 : up),
- (up > 2048 ? "MBit" : "KBit"));
+ uint32_t down, up;
+
+ down = atomic_read(&bat_priv->gw.bandwidth_down);
+ up = atomic_read(&bat_priv->gw.bandwidth_up);
+
+ return sprintf(buff, "%u.%u/%u.%u MBit\n", down / 10,
+ down % 10, up / 10, up % 10);
}
static ssize_t batadv_store_gw_bwidth(struct kobject *kobj,
@@ -426,12 +357,11 @@ BATADV_ATTR_SIF_BOOL(bonding, S_IRUGO | S_IWUSR, NULL);
BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR, NULL);
#endif
#ifdef CONFIG_BATMAN_ADV_DAT
-BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR, NULL);
+BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR,
+ batadv_dat_status_update);
#endif
BATADV_ATTR_SIF_BOOL(fragmentation, S_IRUGO | S_IWUSR, batadv_update_min_mtu);
BATADV_ATTR_SIF_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL);
-static BATADV_ATTR(vis_mode, S_IRUGO | S_IWUSR, batadv_show_vis_mode,
- batadv_store_vis_mode);
static BATADV_ATTR(routing_algo, S_IRUGO, batadv_show_bat_algo, NULL);
static BATADV_ATTR(gw_mode, S_IRUGO | S_IWUSR, batadv_show_gw_mode,
batadv_store_gw_mode);
@@ -447,7 +377,8 @@ static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL);
#endif
#ifdef CONFIG_BATMAN_ADV_NC
-BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR, NULL);
+BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR,
+ batadv_nc_status_update);
#endif
static struct batadv_attribute *batadv_mesh_attrs[] = {
@@ -461,7 +392,6 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {
#endif
&batadv_attr_fragmentation,
&batadv_attr_ap_isolation,
- &batadv_attr_vis_mode,
&batadv_attr_routing_algo,
&batadv_attr_gw_mode,
&batadv_attr_orig_interval,
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 34510f3..c741694 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -27,7 +27,7 @@
#include "routing.h"
#include "bridge_loop_avoidance.h"
-#include <linux/crc16.h>
+#include <linux/crc32c.h>
/* hash class keys */
static struct lock_class_key batadv_tt_local_hash_lock_class_key;
@@ -180,11 +180,11 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
bool del_op_requested, del_op_entry;
tt_change_node = kmalloc(sizeof(*tt_change_node), GFP_ATOMIC);
-
if (!tt_change_node)
return;
tt_change_node->change.flags = flags;
+ tt_change_node->change.reserved = 0;
memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN);
del_op_requested = flags & BATADV_TT_CLIENT_DEL;
@@ -229,9 +229,15 @@ unlock:
atomic_inc(&bat_priv->tt.local_changes);
}
-int batadv_tt_len(int changes_num)
+/**
+ * batadv_tt_len - compute length in bytes of given number of tt changes
+ * @changes_num: number of tt changes
+ *
+ * Returns computed length in bytes.
+ */
+static int batadv_tt_len(int changes_num)
{
- return changes_num * sizeof(struct batadv_tt_change);
+ return changes_num * sizeof(struct batadv_tvlv_tt_change);
}
static int batadv_tt_local_init(struct batadv_priv *bat_priv)
@@ -376,71 +382,52 @@ out:
batadv_tt_global_entry_free_ref(tt_global);
}
-static void batadv_tt_realloc_packet_buff(unsigned char **packet_buff,
- int *packet_buff_len,
- int min_packet_len,
- int new_packet_len)
-{
- unsigned char *new_buff;
-
- new_buff = kmalloc(new_packet_len, GFP_ATOMIC);
-
- /* keep old buffer if kmalloc should fail */
- if (new_buff) {
- memcpy(new_buff, *packet_buff, min_packet_len);
- kfree(*packet_buff);
- *packet_buff = new_buff;
- *packet_buff_len = new_packet_len;
- }
-}
-
-static void batadv_tt_prepare_packet_buff(struct batadv_priv *bat_priv,
- unsigned char **packet_buff,
- int *packet_buff_len,
- int min_packet_len)
+/**
+ * batadv_tt_tvlv_container_update - update the translation table tvlv container
+ * after local tt changes have been committed
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv)
{
- int req_len;
+ struct batadv_tt_change_node *entry, *safe;
+ struct batadv_tvlv_tt_data *tt_data;
+ struct batadv_tvlv_tt_change *tt_change;
+ int tt_diff_len = 0, tt_change_len = 0;
+ int tt_diff_entries_num = 0, tt_diff_entries_count = 0;
- req_len = min_packet_len;
- req_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes));
+ tt_diff_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes));
/* if we have too many changes for one packet don't send any
* and wait for the tt table request which will be fragmented
*/
- if (req_len > bat_priv->soft_iface->mtu)
- req_len = min_packet_len;
+ if (tt_diff_len > bat_priv->soft_iface->mtu)
+ tt_diff_len = 0;
- batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len,
- min_packet_len, req_len);
-}
-
-static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv,
- unsigned char **packet_buff,
- int *packet_buff_len,
- int min_packet_len)
-{
- struct batadv_tt_change_node *entry, *safe;
- int count = 0, tot_changes = 0, new_len;
- unsigned char *tt_buff;
+ tt_data = kzalloc(sizeof(*tt_data) + tt_diff_len, GFP_ATOMIC);
+ if (!tt_data)
+ return;
- batadv_tt_prepare_packet_buff(bat_priv, packet_buff,
- packet_buff_len, min_packet_len);
+ tt_data->flags = BATADV_TT_OGM_DIFF;
+ tt_data->ttvn = atomic_read(&bat_priv->tt.vn);
+ tt_data->crc = htonl(bat_priv->tt.local_crc);
- new_len = *packet_buff_len - min_packet_len;
- tt_buff = *packet_buff + min_packet_len;
+ if (tt_diff_len == 0)
+ goto container_register;
- if (new_len > 0)
- tot_changes = new_len / batadv_tt_len(1);
+ tt_diff_entries_num = tt_diff_len / batadv_tt_len(1);
spin_lock_bh(&bat_priv->tt.changes_list_lock);
atomic_set(&bat_priv->tt.local_changes, 0);
+ tt_change = (struct batadv_tvlv_tt_change *)(tt_data + 1);
+
list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list,
list) {
- if (count < tot_changes) {
- memcpy(tt_buff + batadv_tt_len(count),
- &entry->change, sizeof(struct batadv_tt_change));
- count++;
+ if (tt_diff_entries_count < tt_diff_entries_num) {
+ memcpy(tt_change + tt_diff_entries_count,
+ &entry->change,
+ sizeof(struct batadv_tvlv_tt_change));
+ tt_diff_entries_count++;
}
list_del(&entry->list);
kfree(entry);
@@ -452,20 +439,25 @@ static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv,
kfree(bat_priv->tt.last_changeset);
bat_priv->tt.last_changeset_len = 0;
bat_priv->tt.last_changeset = NULL;
+ tt_change_len = batadv_tt_len(tt_diff_entries_count);
/* check whether this new OGM has no changes due to size problems */
- if (new_len > 0) {
+ if (tt_diff_entries_count > 0) {
/* if kmalloc() fails we will reply with the full table
* instead of providing the diff
*/
- bat_priv->tt.last_changeset = kmalloc(new_len, GFP_ATOMIC);
+ bat_priv->tt.last_changeset = kzalloc(tt_diff_len, GFP_ATOMIC);
if (bat_priv->tt.last_changeset) {
- memcpy(bat_priv->tt.last_changeset, tt_buff, new_len);
- bat_priv->tt.last_changeset_len = new_len;
+ memcpy(bat_priv->tt.last_changeset,
+ tt_change, tt_change_len);
+ bat_priv->tt.last_changeset_len = tt_diff_len;
}
}
spin_unlock_bh(&bat_priv->tt.last_changeset_lock);
- return count;
+container_register:
+ batadv_tvlv_container_register(bat_priv, BATADV_TVLV_TT, 1, tt_data,
+ sizeof(*tt_data) + tt_change_len);
+ kfree(tt_data);
}
int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
@@ -489,7 +481,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
goto out;
seq_printf(seq,
- "Locally retrieved addresses (from %s) announced via TT (TTVN: %u CRC: %#.4x):\n",
+ "Locally retrieved addresses (from %s) announced via TT (TTVN: %u CRC: %#.8x):\n",
net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn),
bat_priv->tt.local_crc);
seq_printf(seq, " %-13s %-7s %-10s\n", "Client", "Flags",
@@ -1001,7 +993,7 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry,
if (best_entry) {
last_ttvn = atomic_read(&best_entry->orig_node->last_ttvn);
seq_printf(seq,
- " %c %pM (%3u) via %pM (%3u) (%#.4x) [%c%c%c]\n",
+ " %c %pM (%3u) via %pM (%3u) (%#.8x) [%c%c%c]\n",
'*', tt_global_entry->common.addr,
best_entry->ttvn, best_entry->orig_node->orig,
last_ttvn, best_entry->orig_node->tt_crc,
@@ -1045,7 +1037,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq,
"Globally announced TT entries received via the mesh %s\n",
net_dev->name);
- seq_printf(seq, " %-13s %s %-15s %s (%-6s) %s\n",
+ seq_printf(seq, " %-13s %s %-15s %s (%-10s) %s\n",
"Client", "(TTVN)", "Originator", "(Curr TTVN)", "CRC",
"Flags");
@@ -1402,17 +1394,19 @@ out:
return orig_node;
}
-/* Calculates the checksum of the local table of a given orig_node */
-static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
+/**
+ * batadv_tt_global_crc - calculates the checksum of the local table belonging
+ * to the given orig_node
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node)
{
- uint16_t total = 0, total_one;
struct batadv_hashtable *hash = bat_priv->tt.global_hash;
struct batadv_tt_common_entry *tt_common;
struct batadv_tt_global_entry *tt_global;
struct hlist_head *head;
- uint32_t i;
- int j;
+ uint32_t i, crc = 0;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -1443,27 +1437,24 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
orig_node))
continue;
- total_one = 0;
- for (j = 0; j < ETH_ALEN; j++)
- total_one = crc16_byte(total_one,
- tt_common->addr[j]);
- total ^= total_one;
+ crc ^= crc32c(0, tt_common->addr, ETH_ALEN);
}
rcu_read_unlock();
}
- return total;
+ return crc;
}
-/* Calculates the checksum of the local table */
-static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv)
+/**
+ * batadv_tt_local_crc - calculates the checksum of the local table
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv)
{
- uint16_t total = 0, total_one;
struct batadv_hashtable *hash = bat_priv->tt.local_hash;
struct batadv_tt_common_entry *tt_common;
struct hlist_head *head;
- uint32_t i;
- int j;
+ uint32_t i, crc = 0;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -1475,16 +1466,13 @@ static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv)
*/
if (tt_common->flags & BATADV_TT_CLIENT_NEW)
continue;
- total_one = 0;
- for (j = 0; j < ETH_ALEN; j++)
- total_one = crc16_byte(total_one,
- tt_common->addr[j]);
- total ^= total_one;
+
+ crc ^= crc32c(0, tt_common->addr, ETH_ALEN);
}
rcu_read_unlock();
}
- return total;
+ return crc;
}
static void batadv_tt_req_list_free(struct batadv_priv *bat_priv)
@@ -1504,7 +1492,7 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv)
static void batadv_tt_save_orig_buffer(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
const unsigned char *tt_buff,
- uint8_t tt_num_changes)
+ uint16_t tt_num_changes)
{
uint16_t tt_buff_len = batadv_tt_len(tt_num_changes);
@@ -1569,9 +1557,14 @@ unlock:
return tt_req_node;
}
-/* data_ptr is useless here, but has to be kept to respect the prototype */
-static int batadv_tt_local_valid_entry(const void *entry_ptr,
- const void *data_ptr)
+/**
+ * batadv_tt_local_valid - verify that given tt entry is a valid one
+ * @entry_ptr: to be checked local tt entry
+ * @data_ptr: not used but definition required to satisfy the callback prototype
+ *
+ * Returns 1 if the entry is a valid, 0 otherwise.
+ */
+static int batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr)
{
const struct batadv_tt_common_entry *tt_common_entry = entry_ptr;
@@ -1598,41 +1591,45 @@ static int batadv_tt_global_valid(const void *entry_ptr,
return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node);
}
-static struct sk_buff *
-batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
- struct batadv_hashtable *hash,
- struct batadv_priv *bat_priv,
- int (*valid_cb)(const void *, const void *),
- void *cb_data)
+/**
+ * batadv_tt_tvlv_generate - creates tvlv tt data buffer to fill it with the
+ * tt entries from the specified tt hash
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hash: hash table containing the tt entries
+ * @tt_len: expected tvlv tt data buffer length in number of bytes
+ * @valid_cb: function to filter tt change entries
+ * @cb_data: data passed to the filter function as argument
+ *
+ * Returns pointer to allocated tvlv tt data buffer if operation was
+ * successful or NULL otherwise.
+ */
+static struct batadv_tvlv_tt_data *
+batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
+ struct batadv_hashtable *hash, uint16_t tt_len,
+ int (*valid_cb)(const void *, const void *),
+ void *cb_data)
{
struct batadv_tt_common_entry *tt_common_entry;
- struct batadv_tt_query_packet *tt_response;
- struct batadv_tt_change *tt_change;
+ struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
+ struct batadv_tvlv_tt_change *tt_change;
struct hlist_head *head;
- struct sk_buff *skb = NULL;
- uint16_t tt_tot, tt_count;
- ssize_t tt_query_size = sizeof(struct batadv_tt_query_packet);
+ uint16_t tt_tot, tt_num_entries = 0;
+ ssize_t tvlv_tt_size = sizeof(struct batadv_tvlv_tt_data);
uint32_t i;
- size_t len;
- if (tt_query_size + tt_len > bat_priv->soft_iface->mtu) {
- tt_len = bat_priv->soft_iface->mtu - tt_query_size;
- tt_len -= tt_len % sizeof(struct batadv_tt_change);
+ if (tvlv_tt_size + tt_len > bat_priv->soft_iface->mtu) {
+ tt_len = bat_priv->soft_iface->mtu - tvlv_tt_size;
+ tt_len -= tt_len % sizeof(struct batadv_tvlv_tt_change);
}
- tt_tot = tt_len / sizeof(struct batadv_tt_change);
- len = tt_query_size + tt_len;
- skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
- if (!skb)
- goto out;
+ tt_tot = tt_len / sizeof(struct batadv_tvlv_tt_change);
- skb->priority = TC_PRIO_CONTROL;
- skb_reserve(skb, ETH_HLEN);
- tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len);
- tt_response->ttvn = ttvn;
+ tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data) + tt_len,
+ GFP_ATOMIC);
+ if (!tvlv_tt_data)
+ goto out;
- tt_change = (struct batadv_tt_change *)(skb->data + tt_query_size);
- tt_count = 0;
+ tt_change = (struct batadv_tvlv_tt_change *)(tvlv_tt_data + 1);
rcu_read_lock();
for (i = 0; i < hash->size; i++) {
@@ -1640,7 +1637,7 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
hlist_for_each_entry_rcu(tt_common_entry,
head, hash_entry) {
- if (tt_count == tt_tot)
+ if (tt_tot == tt_num_entries)
break;
if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data)))
@@ -1649,33 +1646,36 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
memcpy(tt_change->addr, tt_common_entry->addr,
ETH_ALEN);
tt_change->flags = tt_common_entry->flags;
+ tt_change->reserved = 0;
- tt_count++;
+ tt_num_entries++;
tt_change++;
}
}
rcu_read_unlock();
- /* store in the message the number of entries we have successfully
- * copied
- */
- tt_response->tt_data = htons(tt_count);
-
out:
- return skb;
+ return tvlv_tt_data;
}
+/**
+ * batadv_send_tt_request - send a TT Request message to a given node
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst_orig_node: the destination of the message
+ * @ttvn: the version number that the source of the message is looking for
+ * @tt_crc: the CRC associated with the version number
+ * @full_table: ask for the entire translation table if true, while only for the
+ * last TT diff otherwise
+ */
static int batadv_send_tt_request(struct batadv_priv *bat_priv,
struct batadv_orig_node *dst_orig_node,
- uint8_t ttvn, uint16_t tt_crc,
+ uint8_t ttvn, uint32_t tt_crc,
bool full_table)
{
- struct sk_buff *skb = NULL;
- struct batadv_tt_query_packet *tt_request;
+ struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
struct batadv_hard_iface *primary_if;
struct batadv_tt_req_node *tt_req_node = NULL;
- int ret = 1;
- size_t tt_req_len;
+ bool ret = false;
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
@@ -1688,157 +1688,136 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
if (!tt_req_node)
goto out;
- skb = netdev_alloc_skb_ip_align(NULL, sizeof(*tt_request) + ETH_HLEN);
- if (!skb)
+ tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data), GFP_ATOMIC);
+ if (!tvlv_tt_data)
goto out;
- skb->priority = TC_PRIO_CONTROL;
- skb_reserve(skb, ETH_HLEN);
-
- tt_req_len = sizeof(*tt_request);
- tt_request = (struct batadv_tt_query_packet *)skb_put(skb, tt_req_len);
-
- tt_request->header.packet_type = BATADV_TT_QUERY;
- tt_request->header.version = BATADV_COMPAT_VERSION;
- memcpy(tt_request->src, primary_if->net_dev->dev_addr, ETH_ALEN);
- memcpy(tt_request->dst, dst_orig_node->orig, ETH_ALEN);
- tt_request->header.ttl = BATADV_TTL;
- tt_request->ttvn = ttvn;
- tt_request->tt_data = htons(tt_crc);
- tt_request->flags = BATADV_TT_REQUEST;
+ tvlv_tt_data->flags = BATADV_TT_REQUEST;
+ tvlv_tt_data->ttvn = ttvn;
+ tvlv_tt_data->crc = htonl(tt_crc);
if (full_table)
- tt_request->flags |= BATADV_TT_FULL_TABLE;
+ tvlv_tt_data->flags |= BATADV_TT_FULL_TABLE;
batadv_dbg(BATADV_DBG_TT, bat_priv, "Sending TT_REQUEST to %pM [%c]\n",
- dst_orig_node->orig, (full_table ? 'F' : '.'));
+ dst_orig_node->orig, full_table ? 'F' : '.');
batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_TX);
-
- if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL) != NET_XMIT_DROP)
- ret = 0;
+ batadv_tvlv_unicast_send(bat_priv, primary_if->net_dev->dev_addr,
+ dst_orig_node->orig, BATADV_TVLV_TT, 1,
+ tvlv_tt_data, sizeof(*tvlv_tt_data));
+ ret = true;
out:
if (primary_if)
batadv_hardif_free_ref(primary_if);
- if (ret)
- kfree_skb(skb);
if (ret && tt_req_node) {
spin_lock_bh(&bat_priv->tt.req_list_lock);
list_del(&tt_req_node->list);
spin_unlock_bh(&bat_priv->tt.req_list_lock);
kfree(tt_req_node);
}
+ kfree(tvlv_tt_data);
return ret;
}
-static bool
-batadv_send_other_tt_response(struct batadv_priv *bat_priv,
- struct batadv_tt_query_packet *tt_request)
+/**
+ * batadv_send_other_tt_response - send reply to tt request concerning another
+ * node's translation table
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tt_data: tt data containing the tt request information
+ * @req_src: mac address of tt request sender
+ * @req_dst: mac address of tt request recipient
+ *
+ * Returns true if tt request reply was sent, false otherwise.
+ */
+static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
+ struct batadv_tvlv_tt_data *tt_data,
+ uint8_t *req_src, uint8_t *req_dst)
{
struct batadv_orig_node *req_dst_orig_node;
struct batadv_orig_node *res_dst_orig_node = NULL;
- uint8_t orig_ttvn, req_ttvn, ttvn;
- int res, ret = false;
- unsigned char *tt_buff;
- bool full_table;
- uint16_t tt_len, tt_tot;
- struct sk_buff *skb = NULL;
- struct batadv_tt_query_packet *tt_response;
- uint8_t *packet_pos;
- size_t len;
+ struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
+ uint8_t orig_ttvn, req_ttvn;
+ uint16_t tt_len;
+ bool ret = false, full_table;
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received TT_REQUEST from %pM for ttvn: %u (%pM) [%c]\n",
- tt_request->src, tt_request->ttvn, tt_request->dst,
- (tt_request->flags & BATADV_TT_FULL_TABLE ? 'F' : '.'));
+ req_src, tt_data->ttvn, req_dst,
+ (tt_data->flags & BATADV_TT_FULL_TABLE ? 'F' : '.'));
/* Let's get the orig node of the REAL destination */
- req_dst_orig_node = batadv_orig_hash_find(bat_priv, tt_request->dst);
+ req_dst_orig_node = batadv_orig_hash_find(bat_priv, req_dst);
if (!req_dst_orig_node)
goto out;
- res_dst_orig_node = batadv_orig_hash_find(bat_priv, tt_request->src);
+ res_dst_orig_node = batadv_orig_hash_find(bat_priv, req_src);
if (!res_dst_orig_node)
goto out;
orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn);
- req_ttvn = tt_request->ttvn;
+ req_ttvn = tt_data->ttvn;
- /* I don't have the requested data */
+ /* this node doesn't have the requested data */
if (orig_ttvn != req_ttvn ||
- tt_request->tt_data != htons(req_dst_orig_node->tt_crc))
+ tt_data->crc != htonl(req_dst_orig_node->tt_crc))
goto out;
/* If the full table has been explicitly requested */
- if (tt_request->flags & BATADV_TT_FULL_TABLE ||
+ if (tt_data->flags & BATADV_TT_FULL_TABLE ||
!req_dst_orig_node->tt_buff)
full_table = true;
else
full_table = false;
- /* In this version, fragmentation is not implemented, then
- * I'll send only one packet with as much TT entries as I can
+ /* TT fragmentation hasn't been implemented yet, so send as many
+ * TT entries fit a single packet as possible only
*/
if (!full_table) {
spin_lock_bh(&req_dst_orig_node->tt_buff_lock);
tt_len = req_dst_orig_node->tt_buff_len;
- tt_tot = tt_len / sizeof(struct batadv_tt_change);
- len = sizeof(*tt_response) + tt_len;
- skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
- if (!skb)
+ tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data) + tt_len,
+ GFP_ATOMIC);
+ if (!tvlv_tt_data)
goto unlock;
- skb->priority = TC_PRIO_CONTROL;
- skb_reserve(skb, ETH_HLEN);
- packet_pos = skb_put(skb, len);
- tt_response = (struct batadv_tt_query_packet *)packet_pos;
- tt_response->ttvn = req_ttvn;
- tt_response->tt_data = htons(tt_tot);
-
- tt_buff = skb->data + sizeof(*tt_response);
/* Copy the last orig_node's OGM buffer */
- memcpy(tt_buff, req_dst_orig_node->tt_buff,
+ memcpy(tvlv_tt_data + 1, req_dst_orig_node->tt_buff,
req_dst_orig_node->tt_buff_len);
-
spin_unlock_bh(&req_dst_orig_node->tt_buff_lock);
} else {
tt_len = (uint16_t)atomic_read(&req_dst_orig_node->tt_size);
- tt_len *= sizeof(struct batadv_tt_change);
- ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn);
-
- skb = batadv_tt_response_fill_table(tt_len, ttvn,
- bat_priv->tt.global_hash,
- bat_priv,
- batadv_tt_global_valid,
- req_dst_orig_node);
- if (!skb)
+ tt_len = batadv_tt_len(tt_len);
+
+ tvlv_tt_data = batadv_tt_tvlv_generate(bat_priv,
+ bat_priv->tt.global_hash,
+ tt_len,
+ batadv_tt_global_valid,
+ req_dst_orig_node);
+ if (!tvlv_tt_data)
goto out;
-
- tt_response = (struct batadv_tt_query_packet *)skb->data;
}
- tt_response->header.packet_type = BATADV_TT_QUERY;
- tt_response->header.version = BATADV_COMPAT_VERSION;
- tt_response->header.ttl = BATADV_TTL;
- memcpy(tt_response->src, req_dst_orig_node->orig, ETH_ALEN);
- memcpy(tt_response->dst, tt_request->src, ETH_ALEN);
- tt_response->flags = BATADV_TT_RESPONSE;
+ tvlv_tt_data->flags = BATADV_TT_RESPONSE;
+ tvlv_tt_data->ttvn = req_ttvn;
if (full_table)
- tt_response->flags |= BATADV_TT_FULL_TABLE;
+ tvlv_tt_data->flags |= BATADV_TT_FULL_TABLE;
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Sending TT_RESPONSE %pM for %pM (ttvn: %u)\n",
- res_dst_orig_node->orig, req_dst_orig_node->orig, req_ttvn);
+ "Sending TT_RESPONSE %pM for %pM [%c] (ttvn: %u)\n",
+ res_dst_orig_node->orig, req_dst_orig_node->orig,
+ full_table ? 'F' : '.', req_ttvn);
batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX);
- res = batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL);
- if (res != NET_XMIT_DROP)
- ret = true;
+ batadv_tvlv_unicast_send(bat_priv, req_dst_orig_node->orig,
+ req_src, BATADV_TVLV_TT, 1,
+ tvlv_tt_data, sizeof(*tvlv_tt_data) + tt_len);
+ ret = true;
goto out;
unlock:
@@ -1849,37 +1828,40 @@ out:
batadv_orig_node_free_ref(res_dst_orig_node);
if (req_dst_orig_node)
batadv_orig_node_free_ref(req_dst_orig_node);
- if (!ret)
- kfree_skb(skb);
+ kfree(tvlv_tt_data);
return ret;
}
-static bool
-batadv_send_my_tt_response(struct batadv_priv *bat_priv,
- struct batadv_tt_query_packet *tt_request)
+/**
+ * batadv_send_my_tt_response - send reply to tt request concerning this node's
+ * translation table
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tt_data: tt data containing the tt request information
+ * @req_src: mac address of tt request sender
+ *
+ * Returns true if tt request reply was sent, false otherwise.
+ */
+static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
+ struct batadv_tvlv_tt_data *tt_data,
+ uint8_t *req_src)
{
+ struct batadv_tvlv_tt_data *tvlv_tt_data = NULL;
struct batadv_orig_node *orig_node;
struct batadv_hard_iface *primary_if = NULL;
- uint8_t my_ttvn, req_ttvn, ttvn;
- int ret = false;
- unsigned char *tt_buff;
+ uint8_t my_ttvn, req_ttvn;
bool full_table;
- uint16_t tt_len, tt_tot;
- struct sk_buff *skb = NULL;
- struct batadv_tt_query_packet *tt_response;
- uint8_t *packet_pos;
- size_t len;
+ uint16_t tt_len;
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received TT_REQUEST from %pM for ttvn: %u (me) [%c]\n",
- tt_request->src, tt_request->ttvn,
- (tt_request->flags & BATADV_TT_FULL_TABLE ? 'F' : '.'));
+ req_src, tt_data->ttvn,
+ (tt_data->flags & BATADV_TT_FULL_TABLE ? 'F' : '.'));
my_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
- req_ttvn = tt_request->ttvn;
+ req_ttvn = tt_data->ttvn;
- orig_node = batadv_orig_hash_find(bat_priv, tt_request->src);
+ orig_node = batadv_orig_hash_find(bat_priv, req_src);
if (!orig_node)
goto out;
@@ -1890,71 +1872,58 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
/* If the full table has been explicitly requested or the gap
* is too big send the whole local translation table
*/
- if (tt_request->flags & BATADV_TT_FULL_TABLE || my_ttvn != req_ttvn ||
+ if (tt_data->flags & BATADV_TT_FULL_TABLE || my_ttvn != req_ttvn ||
!bat_priv->tt.last_changeset)
full_table = true;
else
full_table = false;
- /* In this version, fragmentation is not implemented, then
- * I'll send only one packet with as much TT entries as I can
+ /* TT fragmentation hasn't been implemented yet, so send as many
+ * TT entries fit a single packet as possible only
*/
if (!full_table) {
spin_lock_bh(&bat_priv->tt.last_changeset_lock);
tt_len = bat_priv->tt.last_changeset_len;
- tt_tot = tt_len / sizeof(struct batadv_tt_change);
- len = sizeof(*tt_response) + tt_len;
- skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
- if (!skb)
+ tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data) + tt_len,
+ GFP_ATOMIC);
+ if (!tvlv_tt_data)
goto unlock;
- skb->priority = TC_PRIO_CONTROL;
- skb_reserve(skb, ETH_HLEN);
- packet_pos = skb_put(skb, len);
- tt_response = (struct batadv_tt_query_packet *)packet_pos;
- tt_response->ttvn = req_ttvn;
- tt_response->tt_data = htons(tt_tot);
-
- tt_buff = skb->data + sizeof(*tt_response);
- memcpy(tt_buff, bat_priv->tt.last_changeset,
+ /* Copy the last orig_node's OGM buffer */
+ memcpy(tvlv_tt_data + 1, bat_priv->tt.last_changeset,
bat_priv->tt.last_changeset_len);
spin_unlock_bh(&bat_priv->tt.last_changeset_lock);
} else {
tt_len = (uint16_t)atomic_read(&bat_priv->tt.local_entry_num);
- tt_len *= sizeof(struct batadv_tt_change);
- ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
-
- skb = batadv_tt_response_fill_table(tt_len, ttvn,
- bat_priv->tt.local_hash,
- bat_priv,
- batadv_tt_local_valid_entry,
- NULL);
- if (!skb)
+ tt_len = batadv_tt_len(tt_len);
+ req_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
+
+ tvlv_tt_data = batadv_tt_tvlv_generate(bat_priv,
+ bat_priv->tt.local_hash,
+ tt_len,
+ batadv_tt_local_valid,
+ NULL);
+ if (!tvlv_tt_data)
goto out;
-
- tt_response = (struct batadv_tt_query_packet *)skb->data;
}
- tt_response->header.packet_type = BATADV_TT_QUERY;
- tt_response->header.version = BATADV_COMPAT_VERSION;
- tt_response->header.ttl = BATADV_TTL;
- memcpy(tt_response->src, primary_if->net_dev->dev_addr, ETH_ALEN);
- memcpy(tt_response->dst, tt_request->src, ETH_ALEN);
- tt_response->flags = BATADV_TT_RESPONSE;
+ tvlv_tt_data->flags = BATADV_TT_RESPONSE;
+ tvlv_tt_data->ttvn = req_ttvn;
if (full_table)
- tt_response->flags |= BATADV_TT_FULL_TABLE;
+ tvlv_tt_data->flags |= BATADV_TT_FULL_TABLE;
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Sending TT_RESPONSE to %pM [%c]\n",
- orig_node->orig,
- (tt_response->flags & BATADV_TT_FULL_TABLE ? 'F' : '.'));
+ "Sending TT_RESPONSE to %pM [%c] (ttvn: %u)\n",
+ orig_node->orig, full_table ? 'F' : '.', req_ttvn);
batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX);
- if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
- ret = true;
+ batadv_tvlv_unicast_send(bat_priv, primary_if->net_dev->dev_addr,
+ req_src, BATADV_TVLV_TT, 1,
+ tvlv_tt_data, sizeof(*tvlv_tt_data) + tt_len);
+
goto out;
unlock:
@@ -1964,29 +1933,39 @@ out:
batadv_orig_node_free_ref(orig_node);
if (primary_if)
batadv_hardif_free_ref(primary_if);
- if (!ret)
- kfree_skb(skb);
- /* This packet was for me, so it doesn't need to be re-routed */
+ kfree(tvlv_tt_data);
+ /* The packet was for this host, so it doesn't need to be re-routed */
return true;
}
-bool batadv_send_tt_response(struct batadv_priv *bat_priv,
- struct batadv_tt_query_packet *tt_request)
+/**
+ * batadv_send_tt_response - send reply to tt request
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tt_data: tt data containing the tt request information
+ * @req_src: mac address of tt request sender
+ * @req_dst: mac address of tt request recipient
+ *
+ * Returns true if tt request reply was sent, false otherwise.
+ */
+static bool batadv_send_tt_response(struct batadv_priv *bat_priv,
+ struct batadv_tvlv_tt_data *tt_data,
+ uint8_t *req_src, uint8_t *req_dst)
{
- if (batadv_is_my_mac(bat_priv, tt_request->dst)) {
+ if (batadv_is_my_mac(bat_priv, req_dst)) {
/* don't answer backbone gws! */
- if (batadv_bla_is_backbone_gw_orig(bat_priv, tt_request->src))
+ if (batadv_bla_is_backbone_gw_orig(bat_priv, req_src))
return true;
- return batadv_send_my_tt_response(bat_priv, tt_request);
+ return batadv_send_my_tt_response(bat_priv, tt_data, req_src);
} else {
- return batadv_send_other_tt_response(bat_priv, tt_request);
+ return batadv_send_other_tt_response(bat_priv, tt_data,
+ req_src, req_dst);
}
}
static void _batadv_tt_update_changes(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
- struct batadv_tt_change *tt_change,
+ struct batadv_tvlv_tt_change *tt_change,
uint16_t tt_num_changes, uint8_t ttvn)
{
int i;
@@ -2016,11 +1995,12 @@ static void _batadv_tt_update_changes(struct batadv_priv *bat_priv,
}
static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
- struct batadv_tt_query_packet *tt_response)
+ struct batadv_tvlv_tt_data *tt_data,
+ uint8_t *resp_src, uint16_t num_entries)
{
struct batadv_orig_node *orig_node;
- orig_node = batadv_orig_hash_find(bat_priv, tt_response->src);
+ orig_node = batadv_orig_hash_find(bat_priv, resp_src);
if (!orig_node)
goto out;
@@ -2028,9 +2008,8 @@ static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
batadv_tt_global_del_orig(bat_priv, orig_node, "Received full table");
_batadv_tt_update_changes(bat_priv, orig_node,
- (struct batadv_tt_change *)(tt_response + 1),
- ntohs(tt_response->tt_data),
- tt_response->ttvn);
+ (struct batadv_tvlv_tt_change *)(tt_data + 1),
+ num_entries, tt_data->ttvn);
spin_lock_bh(&orig_node->tt_buff_lock);
kfree(orig_node->tt_buff);
@@ -2038,7 +2017,7 @@ static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
orig_node->tt_buff = NULL;
spin_unlock_bh(&orig_node->tt_buff_lock);
- atomic_set(&orig_node->last_ttvn, tt_response->ttvn);
+ atomic_set(&orig_node->last_ttvn, tt_data->ttvn);
out:
if (orig_node)
@@ -2048,7 +2027,7 @@ out:
static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
uint16_t tt_num_changes, uint8_t ttvn,
- struct batadv_tt_change *tt_change)
+ struct batadv_tvlv_tt_change *tt_change)
{
_batadv_tt_update_changes(bat_priv, orig_node, tt_change,
tt_num_changes, ttvn);
@@ -2079,40 +2058,46 @@ out:
return ret;
}
-void batadv_handle_tt_response(struct batadv_priv *bat_priv,
- struct batadv_tt_query_packet *tt_response)
+/**
+ * batadv_handle_tt_response - process incoming tt reply
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tt_data: tt data containing the tt request information
+ * @resp_src: mac address of tt reply sender
+ * @num_entries: number of tt change entries appended to the tt data
+ */
+static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
+ struct batadv_tvlv_tt_data *tt_data,
+ uint8_t *resp_src, uint16_t num_entries)
{
struct batadv_tt_req_node *node, *safe;
struct batadv_orig_node *orig_node = NULL;
- struct batadv_tt_change *tt_change;
+ struct batadv_tvlv_tt_change *tt_change;
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received TT_RESPONSE from %pM for ttvn %d t_size: %d [%c]\n",
- tt_response->src, tt_response->ttvn,
- ntohs(tt_response->tt_data),
- (tt_response->flags & BATADV_TT_FULL_TABLE ? 'F' : '.'));
+ resp_src, tt_data->ttvn, num_entries,
+ (tt_data->flags & BATADV_TT_FULL_TABLE ? 'F' : '.'));
/* we should have never asked a backbone gw */
- if (batadv_bla_is_backbone_gw_orig(bat_priv, tt_response->src))
+ if (batadv_bla_is_backbone_gw_orig(bat_priv, resp_src))
goto out;
- orig_node = batadv_orig_hash_find(bat_priv, tt_response->src);
+ orig_node = batadv_orig_hash_find(bat_priv, resp_src);
if (!orig_node)
goto out;
- if (tt_response->flags & BATADV_TT_FULL_TABLE) {
- batadv_tt_fill_gtable(bat_priv, tt_response);
+ if (tt_data->flags & BATADV_TT_FULL_TABLE) {
+ batadv_tt_fill_gtable(bat_priv, tt_data, resp_src, num_entries);
} else {
- tt_change = (struct batadv_tt_change *)(tt_response + 1);
- batadv_tt_update_changes(bat_priv, orig_node,
- ntohs(tt_response->tt_data),
- tt_response->ttvn, tt_change);
+ tt_change = (struct batadv_tvlv_tt_change *)(tt_data + 1);
+ batadv_tt_update_changes(bat_priv, orig_node, num_entries,
+ tt_data->ttvn, tt_change);
}
/* Delete the tt_req_node from pending tt_requests list */
spin_lock_bh(&bat_priv->tt.req_list_lock);
list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
- if (!batadv_compare_eth(node->addr, tt_response->src))
+ if (!batadv_compare_eth(node->addr, resp_src))
continue;
list_del(&node->list);
kfree(node);
@@ -2126,25 +2111,6 @@ out:
batadv_orig_node_free_ref(orig_node);
}
-int batadv_tt_init(struct batadv_priv *bat_priv)
-{
- int ret;
-
- ret = batadv_tt_local_init(bat_priv);
- if (ret < 0)
- return ret;
-
- ret = batadv_tt_global_init(bat_priv);
- if (ret < 0)
- return ret;
-
- INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge);
- queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work,
- msecs_to_jiffies(BATADV_TT_WORK_PERIOD));
-
- return 1;
-}
-
static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv)
{
struct batadv_tt_roam_node *node, *safe;
@@ -2228,11 +2194,12 @@ unlock:
static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
struct batadv_orig_node *orig_node)
{
- struct sk_buff *skb = NULL;
- struct batadv_roam_adv_packet *roam_adv_packet;
- int ret = 1;
struct batadv_hard_iface *primary_if;
- size_t len = sizeof(*roam_adv_packet);
+ struct batadv_tvlv_roam_adv tvlv_roam;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ goto out;
/* before going on we have to check whether the client has
* already roamed to us too many times
@@ -2240,40 +2207,22 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
if (!batadv_tt_check_roam_count(bat_priv, client))
goto out;
- skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
- if (!skb)
- goto out;
-
- skb->priority = TC_PRIO_CONTROL;
- skb_reserve(skb, ETH_HLEN);
-
- roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len);
-
- roam_adv_packet->header.packet_type = BATADV_ROAM_ADV;
- roam_adv_packet->header.version = BATADV_COMPAT_VERSION;
- roam_adv_packet->header.ttl = BATADV_TTL;
- roam_adv_packet->reserved = 0;
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if)
- goto out;
- memcpy(roam_adv_packet->src, primary_if->net_dev->dev_addr, ETH_ALEN);
- batadv_hardif_free_ref(primary_if);
- memcpy(roam_adv_packet->dst, orig_node->orig, ETH_ALEN);
- memcpy(roam_adv_packet->client, client, ETH_ALEN);
-
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Sending ROAMING_ADV to %pM (client %pM)\n",
orig_node->orig, client);
batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX);
- if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
- ret = 0;
+ memcpy(tvlv_roam.client, client, sizeof(tvlv_roam.client));
+ tvlv_roam.reserved = 0;
+
+ batadv_tvlv_unicast_send(bat_priv, primary_if->net_dev->dev_addr,
+ orig_node->orig, BATADV_TVLV_ROAM, 1,
+ &tvlv_roam, sizeof(tvlv_roam));
out:
- if (ret && skb)
- kfree_skb(skb);
- return;
+ if (primary_if)
+ batadv_hardif_free_ref(primary_if);
}
static void batadv_tt_purge(struct work_struct *work)
@@ -2297,6 +2246,9 @@ static void batadv_tt_purge(struct work_struct *work)
void batadv_tt_free(struct batadv_priv *bat_priv)
{
+ batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_TT, 1);
+ batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_TT, 1);
+
cancel_delayed_work_sync(&bat_priv->tt.work);
batadv_tt_local_table_free(bat_priv);
@@ -2384,14 +2336,20 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
}
}
-static int batadv_tt_commit_changes(struct batadv_priv *bat_priv,
- unsigned char **packet_buff,
- int *packet_buff_len, int packet_min_len)
+/**
+ * batadv_tt_local_commit_changes - commit all pending local tt changes which
+ * have been queued in the time since the last commit
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv)
{
uint16_t changed_num = 0;
- if (atomic_read(&bat_priv->tt.local_changes) < 1)
- return -ENOENT;
+ if (atomic_read(&bat_priv->tt.local_changes) < 1) {
+ if (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt))
+ batadv_tt_tvlv_container_update(bat_priv);
+ return;
+ }
changed_num = batadv_tt_set_flags(bat_priv->tt.local_hash,
BATADV_TT_CLIENT_NEW, false);
@@ -2409,32 +2367,7 @@ static int batadv_tt_commit_changes(struct batadv_priv *bat_priv,
/* reset the sending counter */
atomic_set(&bat_priv->tt.ogm_append_cnt, BATADV_TT_OGM_APPEND_MAX);
-
- return batadv_tt_changes_fill_buff(bat_priv, packet_buff,
- packet_buff_len, packet_min_len);
-}
-
-/* when calling this function (hard_iface == primary_if) has to be true */
-int batadv_tt_append_diff(struct batadv_priv *bat_priv,
- unsigned char **packet_buff, int *packet_buff_len,
- int packet_min_len)
-{
- int tt_num_changes;
-
- /* if at least one change happened */
- tt_num_changes = batadv_tt_commit_changes(bat_priv, packet_buff,
- packet_buff_len,
- packet_min_len);
-
- /* if the changes have been sent often enough */
- if ((tt_num_changes < 0) &&
- (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt))) {
- batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len,
- packet_min_len, packet_min_len);
- tt_num_changes = 0;
- }
-
- return tt_num_changes;
+ batadv_tt_tvlv_container_update(bat_priv);
}
bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, uint8_t *src,
@@ -2468,14 +2401,25 @@ out:
return ret;
}
-void batadv_tt_update_orig(struct batadv_priv *bat_priv,
- struct batadv_orig_node *orig_node,
- const unsigned char *tt_buff, uint8_t tt_num_changes,
- uint8_t ttvn, uint16_t tt_crc)
+/**
+ * batadv_tt_update_orig - update global translation table with new tt
+ * information received via ogms
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node of the ogm
+ * @tt_buff: buffer holding the tt information
+ * @tt_num_changes: number of tt changes inside the tt buffer
+ * @ttvn: translation table version number of this changeset
+ * @tt_crc: crc32 checksum of orig node's translation table
+ */
+static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
+ const unsigned char *tt_buff,
+ uint16_t tt_num_changes, uint8_t ttvn,
+ uint32_t tt_crc)
{
uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
bool full_table = true;
- struct batadv_tt_change *tt_change;
+ struct batadv_tvlv_tt_change *tt_change;
/* don't care about a backbone gateways updates. */
if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig))
@@ -2496,7 +2440,7 @@ void batadv_tt_update_orig(struct batadv_priv *bat_priv,
goto request_table;
}
- tt_change = (struct batadv_tt_change *)tt_buff;
+ tt_change = (struct batadv_tvlv_tt_change *)tt_buff;
batadv_tt_update_changes(bat_priv, orig_node, tt_num_changes,
ttvn, tt_change);
@@ -2525,7 +2469,7 @@ void batadv_tt_update_orig(struct batadv_priv *bat_priv,
orig_node->tt_crc != tt_crc) {
request_table:
batadv_dbg(BATADV_DBG_TT, bat_priv,
- "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u crc: %#.4x last_crc: %#.4x num_changes: %u)\n",
+ "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u crc: %#.8x last_crc: %#.8x num_changes: %u)\n",
orig_node->orig, ttvn, orig_ttvn, tt_crc,
orig_node->tt_crc, tt_num_changes);
batadv_send_tt_request(bat_priv, orig_node, ttvn,
@@ -2605,3 +2549,198 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
out:
return ret;
}
+
+/**
+ * batadv_tt_tvlv_ogm_handler_v1 - process incoming tt tvlv container
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node of the ogm
+ * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
+ * @tvlv_value: tvlv buffer containing the gateway data
+ * @tvlv_value_len: tvlv buffer length
+ */
+static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ uint8_t flags,
+ void *tvlv_value,
+ uint16_t tvlv_value_len)
+{
+ struct batadv_tvlv_tt_data *tt_data;
+ uint16_t num_entries;
+
+ if (tvlv_value_len < sizeof(*tt_data))
+ return;
+
+ tt_data = (struct batadv_tvlv_tt_data *)tvlv_value;
+ tvlv_value_len -= sizeof(*tt_data);
+
+ num_entries = tvlv_value_len / batadv_tt_len(1);
+
+ batadv_tt_update_orig(bat_priv, orig,
+ (unsigned char *)(tt_data + 1),
+ num_entries, tt_data->ttvn, ntohl(tt_data->crc));
+}
+
+/**
+ * batadv_tt_tvlv_unicast_handler_v1 - process incoming (unicast) tt tvlv
+ * container
+ * @bat_priv: the bat priv with all the soft interface information
+ * @src: mac address of tt tvlv sender
+ * @dst: mac address of tt tvlv recipient
+ * @tvlv_value: tvlv buffer containing the tt data
+ * @tvlv_value_len: tvlv buffer length
+ *
+ * Returns NET_RX_DROP if the tt tvlv is to be re-routed, NET_RX_SUCCESS
+ * otherwise.
+ */
+static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
+ uint8_t *src, uint8_t *dst,
+ void *tvlv_value,
+ uint16_t tvlv_value_len)
+{
+ struct batadv_tvlv_tt_data *tt_data;
+ uint16_t num_entries;
+ char tt_flag;
+ bool ret;
+
+ if (tvlv_value_len < sizeof(*tt_data))
+ return NET_RX_SUCCESS;
+
+ tt_data = (struct batadv_tvlv_tt_data *)tvlv_value;
+ tvlv_value_len -= sizeof(*tt_data);
+
+ num_entries = tvlv_value_len / batadv_tt_len(1);
+
+ switch (tt_data->flags & BATADV_TT_DATA_TYPE_MASK) {
+ case BATADV_TT_REQUEST:
+ batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_RX);
+
+ /* If this node cannot provide a TT response the tt_request is
+ * forwarded
+ */
+ ret = batadv_send_tt_response(bat_priv, tt_data, src, dst);
+ if (!ret) {
+ if (tt_data->flags & BATADV_TT_FULL_TABLE)
+ tt_flag = 'F';
+ else
+ tt_flag = '.';
+
+ batadv_dbg(BATADV_DBG_TT, bat_priv,
+ "Routing TT_REQUEST to %pM [%c]\n",
+ dst, tt_flag);
+ /* tvlv API will re-route the packet */
+ return NET_RX_DROP;
+ }
+ break;
+ case BATADV_TT_RESPONSE:
+ batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_RX);
+
+ if (batadv_is_my_mac(bat_priv, dst)) {
+ batadv_handle_tt_response(bat_priv, tt_data,
+ src, num_entries);
+ return NET_RX_SUCCESS;
+ }
+
+ if (tt_data->flags & BATADV_TT_FULL_TABLE)
+ tt_flag = 'F';
+ else
+ tt_flag = '.';
+
+ batadv_dbg(BATADV_DBG_TT, bat_priv,
+ "Routing TT_RESPONSE to %pM [%c]\n", dst, tt_flag);
+
+ /* tvlv API will re-route the packet */
+ return NET_RX_DROP;
+ }
+
+ return NET_RX_SUCCESS;
+}
+
+/**
+ * batadv_roam_tvlv_unicast_handler_v1 - process incoming tt roam tvlv container
+ * @bat_priv: the bat priv with all the soft interface information
+ * @src: mac address of tt tvlv sender
+ * @dst: mac address of tt tvlv recipient
+ * @tvlv_value: tvlv buffer containing the tt data
+ * @tvlv_value_len: tvlv buffer length
+ *
+ * Returns NET_RX_DROP if the tt roam tvlv is to be re-routed, NET_RX_SUCCESS
+ * otherwise.
+ */
+static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
+ uint8_t *src, uint8_t *dst,
+ void *tvlv_value,
+ uint16_t tvlv_value_len)
+{
+ struct batadv_tvlv_roam_adv *roaming_adv;
+ struct batadv_orig_node *orig_node = NULL;
+
+ /* If this node is not the intended recipient of the
+ * roaming advertisement the packet is forwarded
+ * (the tvlv API will re-route the packet).
+ */
+ if (!batadv_is_my_mac(bat_priv, dst))
+ return NET_RX_DROP;
+
+ /* check if it is a backbone gateway. we don't accept
+ * roaming advertisement from it, as it has the same
+ * entries as we have.
+ */
+ if (batadv_bla_is_backbone_gw_orig(bat_priv, src))
+ goto out;
+
+ if (tvlv_value_len < sizeof(*roaming_adv))
+ goto out;
+
+ orig_node = batadv_orig_hash_find(bat_priv, src);
+ if (!orig_node)
+ goto out;
+
+ batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX);
+ roaming_adv = (struct batadv_tvlv_roam_adv *)tvlv_value;
+
+ batadv_dbg(BATADV_DBG_TT, bat_priv,
+ "Received ROAMING_ADV from %pM (client %pM)\n",
+ src, roaming_adv->client);
+
+ batadv_tt_global_add(bat_priv, orig_node, roaming_adv->client,
+ BATADV_TT_CLIENT_ROAM,
+ atomic_read(&orig_node->last_ttvn) + 1);
+
+out:
+ if (orig_node)
+ batadv_orig_node_free_ref(orig_node);
+ return NET_RX_SUCCESS;
+}
+
+/**
+ * batadv_tt_init - initialise the translation table internals
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return 0 on success or negative error number in case of failure.
+ */
+int batadv_tt_init(struct batadv_priv *bat_priv)
+{
+ int ret;
+
+ ret = batadv_tt_local_init(bat_priv);
+ if (ret < 0)
+ return ret;
+
+ ret = batadv_tt_global_init(bat_priv);
+ if (ret < 0)
+ return ret;
+
+ batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1,
+ batadv_tt_tvlv_unicast_handler_v1,
+ BATADV_TVLV_TT, 1, BATADV_NO_FLAGS);
+
+ batadv_tvlv_handler_register(bat_priv, NULL,
+ batadv_roam_tvlv_unicast_handler_v1,
+ BATADV_TVLV_ROAM, 1, BATADV_NO_FLAGS);
+
+ INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge);
+ queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work,
+ msecs_to_jiffies(BATADV_TT_WORK_PERIOD));
+
+ return 1;
+}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 659a3bb..b4b6dea 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -20,7 +20,6 @@
#ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
#define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
-int batadv_tt_len(int changes_num);
int batadv_tt_init(struct batadv_priv *bat_priv);
void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
int ifindex);
@@ -43,20 +42,10 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
const uint8_t *src,
const uint8_t *addr);
void batadv_tt_free(struct batadv_priv *bat_priv);
-bool batadv_send_tt_response(struct batadv_priv *bat_priv,
- struct batadv_tt_query_packet *tt_request);
bool batadv_is_my_client(struct batadv_priv *bat_priv, const uint8_t *addr);
-void batadv_handle_tt_response(struct batadv_priv *bat_priv,
- struct batadv_tt_query_packet *tt_response);
bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, uint8_t *src,
uint8_t *dst);
-void batadv_tt_update_orig(struct batadv_priv *bat_priv,
- struct batadv_orig_node *orig_node,
- const unsigned char *tt_buff, uint8_t tt_num_changes,
- uint8_t ttvn, uint16_t tt_crc);
-int batadv_tt_append_diff(struct batadv_priv *bat_priv,
- unsigned char **packet_buff, int *packet_buff_len,
- int packet_min_len);
+void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv);
bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
uint8_t *addr);
bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index b2c94e1..8fbd89d 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -99,8 +99,7 @@ struct batadv_hard_iface {
* @last_seen: time when last packet from this node was received
* @bcast_seqno_reset: time when the broadcast seqno window was reset
* @batman_seqno_reset: time when the batman seqno window was reset
- * @gw_flags: flags related to gateway class
- * @flags: for now only VIS_SERVER flag
+ * @capabilities: announced capabilities of this originator
* @last_ttvn: last seen translation table version number
* @tt_crc: CRC of the translation table
* @tt_buff: last tt changeset this node received from the orig node
@@ -147,10 +146,9 @@ struct batadv_orig_node {
unsigned long last_seen;
unsigned long bcast_seqno_reset;
unsigned long batman_seqno_reset;
- uint8_t gw_flags;
- uint8_t flags;
+ uint8_t capabilities;
atomic_t last_ttvn;
- uint16_t tt_crc;
+ uint32_t tt_crc;
unsigned char *tt_buff;
int16_t tt_buff_len;
spinlock_t tt_buff_lock; /* protects tt_buff & tt_buff_len */
@@ -186,9 +184,21 @@ struct batadv_orig_node {
};
/**
+ * enum batadv_orig_capabilities - orig node capabilities
+ * @BATADV_ORIG_CAPA_HAS_DAT: orig node has distributed arp table enabled
+ * @BATADV_ORIG_CAPA_HAS_NC: orig node has network coding enabled
+ */
+enum batadv_orig_capabilities {
+ BATADV_ORIG_CAPA_HAS_DAT = BIT(0),
+ BATADV_ORIG_CAPA_HAS_NC = BIT(1),
+};
+
+/**
* struct batadv_gw_node - structure for orig nodes announcing gw capabilities
* @list: list node for batadv_priv_gw::list
* @orig_node: pointer to corresponding orig node
+ * @bandwidth_down: advertised uplink download bandwidth
+ * @bandwidth_up: advertised uplink upload bandwidth
* @deleted: this struct is scheduled for deletion
* @refcount: number of contexts the object is used
* @rcu: struct used for freeing in an RCU-safe manner
@@ -196,6 +206,8 @@ struct batadv_orig_node {
struct batadv_gw_node {
struct hlist_node list;
struct batadv_orig_node *orig_node;
+ uint32_t bandwidth_down;
+ uint32_t bandwidth_up;
unsigned long deleted;
atomic_t refcount;
struct rcu_head rcu;
@@ -363,7 +375,7 @@ struct batadv_priv_tt {
spinlock_t req_list_lock; /* protects req_list */
spinlock_t roam_list_lock; /* protects roam_list */
atomic_t local_entry_num;
- uint16_t local_crc;
+ uint32_t local_crc;
unsigned char *last_changeset;
int16_t last_changeset_len;
/* protects last_changeset & last_changeset_len */
@@ -420,31 +432,31 @@ struct batadv_priv_debug_log {
* @list: list of available gateway nodes
* @list_lock: lock protecting gw_list & curr_gw
* @curr_gw: pointer to currently selected gateway node
+ * @bandwidth_down: advertised uplink download bandwidth (if gw_mode server)
+ * @bandwidth_up: advertised uplink upload bandwidth (if gw_mode server)
* @reselect: bool indicating a gateway re-selection is in progress
*/
struct batadv_priv_gw {
struct hlist_head list;
spinlock_t list_lock; /* protects gw_list & curr_gw */
struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */
+ atomic_t bandwidth_down;
+ atomic_t bandwidth_up;
atomic_t reselect;
};
/**
- * struct batadv_priv_vis - per mesh interface vis data
- * @send_list: list of batadv_vis_info packets to sent
- * @hash: hash table containing vis data from other nodes in the network
- * @hash_lock: lock protecting the hash table
- * @list_lock: lock protecting my_info::recv_list
- * @work: work queue callback item for vis packet sending
- * @my_info: holds this node's vis data sent on a regular basis
+ * struct batadv_priv_tvlv - per mesh interface tvlv data
+ * @container_list: list of registered tvlv containers to be sent with each OGM
+ * @handler_list: list of the various tvlv content handlers
+ * @container_list_lock: protects tvlv container list access
+ * @handler_list_lock: protects handler list access
*/
-struct batadv_priv_vis {
- struct list_head send_list;
- struct batadv_hashtable *hash;
- spinlock_t hash_lock; /* protects hash */
- spinlock_t list_lock; /* protects my_info::recv_list */
- struct delayed_work work;
- struct batadv_vis_info *my_info;
+struct batadv_priv_tvlv {
+ struct hlist_head container_list;
+ struct hlist_head handler_list;
+ spinlock_t container_list_lock; /* protects container_list */
+ spinlock_t handler_list_lock; /* protects handler_list */
};
/**
@@ -504,10 +516,8 @@ struct batadv_priv_nc {
* enabled
* @distributed_arp_table: bool indicating whether distributed ARP table is
* enabled
- * @vis_mode: vis operation: client or server (see batadv_vis_packettype)
* @gw_mode: gateway operation: off, client or server (see batadv_gw_modes)
* @gw_sel_class: gateway selection class (applies if gw_mode client)
- * @gw_bandwidth: gateway announced bandwidth (applies if gw_mode server)
* @orig_interval: OGM broadcast interval in milliseconds
* @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop
* @log_level: configured log level (see batadv_dbg_level)
@@ -531,7 +541,7 @@ struct batadv_priv_nc {
* @debug_log: holding debug logging relevant data
* @gw: gateway data
* @tt: translation table data
- * @vis: vis data
+ * @tvlv: type-version-length-value data
* @dat: distributed arp table data
* @network_coding: bool indicating whether network coding is enabled
* @batadv_priv_nc: network coding data
@@ -551,10 +561,8 @@ struct batadv_priv {
#ifdef CONFIG_BATMAN_ADV_DAT
atomic_t distributed_arp_table;
#endif
- atomic_t vis_mode;
atomic_t gw_mode;
atomic_t gw_sel_class;
- atomic_t gw_bandwidth;
atomic_t orig_interval;
atomic_t hop_penalty;
#ifdef CONFIG_BATMAN_ADV_DEBUG
@@ -583,7 +591,7 @@ struct batadv_priv {
#endif
struct batadv_priv_gw gw;
struct batadv_priv_tt tt;
- struct batadv_priv_vis vis;
+ struct batadv_priv_tvlv tvlv;
#ifdef CONFIG_BATMAN_ADV_DAT
struct batadv_priv_dat dat;
#endif
@@ -740,7 +748,7 @@ struct batadv_tt_orig_list_entry {
*/
struct batadv_tt_change_node {
struct list_head list;
- struct batadv_tt_change change;
+ struct batadv_tvlv_tt_change change;
};
/**
@@ -878,66 +886,6 @@ struct batadv_frag_packet_list_entry {
};
/**
- * struct batadv_vis_info - local data for vis information
- * @first_seen: timestamp used for purging stale vis info entries
- * @recv_list: List of server-neighbors we have received this packet from. This
- * packet should not be re-forward to them again. List elements are struct
- * batadv_vis_recvlist_node
- * @send_list: list of packets to be forwarded
- * @refcount: number of contexts the object is used
- * @hash_entry: hlist node for batadv_priv_vis::hash
- * @bat_priv: pointer to soft_iface this orig node belongs to
- * @skb_packet: contains the vis packet
- */
-struct batadv_vis_info {
- unsigned long first_seen;
- struct list_head recv_list;
- struct list_head send_list;
- struct kref refcount;
- struct hlist_node hash_entry;
- struct batadv_priv *bat_priv;
- struct sk_buff *skb_packet;
-} __packed;
-
-/**
- * struct batadv_vis_info_entry - contains link information for vis
- * @src: source MAC of the link, all zero for local TT entry
- * @dst: destination MAC of the link, client mac address for local TT entry
- * @quality: transmission quality of the link, or 0 for local TT entry
- */
-struct batadv_vis_info_entry {
- uint8_t src[ETH_ALEN];
- uint8_t dest[ETH_ALEN];
- uint8_t quality;
-} __packed;
-
-/**
- * struct batadv_vis_recvlist_node - list entry for batadv_vis_info::recv_list
- * @list: list node for batadv_vis_info::recv_list
- * @mac: MAC address of the originator from where the vis_info was received
- */
-struct batadv_vis_recvlist_node {
- struct list_head list;
- uint8_t mac[ETH_ALEN];
-};
-
-/**
- * struct batadv_vis_if_list_entry - auxiliary data for vis data generation
- * @addr: MAC address of the interface
- * @primary: true if this interface is the primary interface
- * @list: list node the interface list
- *
- * While scanning for vis-entries of a particular vis-originator
- * this list collects its interfaces to create a subgraph/cluster
- * out of them later
- */
-struct batadv_vis_if_list_entry {
- uint8_t addr[ETH_ALEN];
- bool primary;
- struct hlist_node list;
-};
-
-/**
* struct batadv_algo_ops - mesh algorithm callbacks
* @list: list node for the batadv_algo_list
* @name: name of the algorithm
@@ -992,4 +940,60 @@ struct batadv_dat_candidate {
struct batadv_orig_node *orig_node;
};
+/**
+ * struct batadv_tvlv_container - container for tvlv appended to OGMs
+ * @list: hlist node for batadv_priv_tvlv::container_list
+ * @tvlv_hdr: tvlv header information needed to construct the tvlv
+ * @value_len: length of the buffer following this struct which contains
+ * the actual tvlv payload
+ * @refcount: number of contexts the object is used
+ */
+struct batadv_tvlv_container {
+ struct hlist_node list;
+ struct batadv_tvlv_hdr tvlv_hdr;
+ atomic_t refcount;
+};
+
+/**
+ * struct batadv_tvlv_handler - handler for specific tvlv type and version
+ * @list: hlist node for batadv_priv_tvlv::handler_list
+ * @ogm_handler: handler callback which is given the tvlv payload to process on
+ * incoming OGM packets
+ * @unicast_handler: handler callback which is given the tvlv payload to process
+ * on incoming unicast tvlv packets
+ * @type: tvlv type this handler feels responsible for
+ * @version: tvlv version this handler feels responsible for
+ * @flags: tvlv handler flags
+ * @refcount: number of contexts the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ */
+struct batadv_tvlv_handler {
+ struct hlist_node list;
+ void (*ogm_handler)(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ uint8_t flags,
+ void *tvlv_value, uint16_t tvlv_value_len);
+ int (*unicast_handler)(struct batadv_priv *bat_priv,
+ uint8_t *src, uint8_t *dst,
+ void *tvlv_value, uint16_t tvlv_value_len);
+ uint8_t type;
+ uint8_t version;
+ uint8_t flags;
+ atomic_t refcount;
+ struct rcu_head rcu;
+};
+
+/**
+ * enum batadv_tvlv_handler_flags - tvlv handler flags definitions
+ * @BATADV_TVLV_HANDLER_OGM_CIFNOTFND: tvlv ogm processing function will call
+ * this handler even if its type was not found (with no data)
+ * @BATADV_TVLV_HANDLER_OGM_CALLED: interval tvlv handling flag - the API marks
+ * a handler as being called, so it won't be called if the
+ * BATADV_TVLV_HANDLER_OGM_CIFNOTFND flag was set
+ */
+enum batadv_tvlv_handler_flags {
+ BATADV_TVLV_HANDLER_OGM_CIFNOTFND = BIT(1),
+ BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2),
+};
+
#endif /* _NET_BATMAN_ADV_TYPES_H_ */
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
deleted file mode 100644
index d8ea31a..0000000
--- a/net/batman-adv/vis.c
+++ /dev/null
@@ -1,938 +0,0 @@
-/* Copyright (C) 2008-2013 B.A.T.M.A.N. contributors:
- *
- * Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#include "main.h"
-#include "send.h"
-#include "translation-table.h"
-#include "vis.h"
-#include "soft-interface.h"
-#include "hard-interface.h"
-#include "hash.h"
-#include "originator.h"
-
-#define BATADV_MAX_VIS_PACKET_SIZE 1000
-
-/* hash class keys */
-static struct lock_class_key batadv_vis_hash_lock_class_key;
-
-/* free the info */
-static void batadv_free_info(struct kref *ref)
-{
- struct batadv_vis_info *info;
- struct batadv_priv *bat_priv;
- struct batadv_vis_recvlist_node *entry, *tmp;
-
- info = container_of(ref, struct batadv_vis_info, refcount);
- bat_priv = info->bat_priv;
-
- list_del_init(&info->send_list);
- spin_lock_bh(&bat_priv->vis.list_lock);
- list_for_each_entry_safe(entry, tmp, &info->recv_list, list) {
- list_del(&entry->list);
- kfree(entry);
- }
-
- spin_unlock_bh(&bat_priv->vis.list_lock);
- kfree_skb(info->skb_packet);
- kfree(info);
-}
-
-/* Compare two vis packets, used by the hashing algorithm */
-static int batadv_vis_info_cmp(const struct hlist_node *node, const void *data2)
-{
- const struct batadv_vis_info *d1, *d2;
- const struct batadv_vis_packet *p1, *p2;
-
- d1 = container_of(node, struct batadv_vis_info, hash_entry);
- d2 = data2;
- p1 = (struct batadv_vis_packet *)d1->skb_packet->data;
- p2 = (struct batadv_vis_packet *)d2->skb_packet->data;
- return batadv_compare_eth(p1->vis_orig, p2->vis_orig);
-}
-
-/* hash function to choose an entry in a hash table of given size
- * hash algorithm from http://en.wikipedia.org/wiki/Hash_table
- */
-static uint32_t batadv_vis_info_choose(const void *data, uint32_t size)
-{
- const struct batadv_vis_info *vis_info = data;
- const struct batadv_vis_packet *packet;
- const unsigned char *key;
- uint32_t hash = 0;
- size_t i;
-
- packet = (struct batadv_vis_packet *)vis_info->skb_packet->data;
- key = packet->vis_orig;
- for (i = 0; i < ETH_ALEN; i++) {
- hash += key[i];
- hash += (hash << 10);
- hash ^= (hash >> 6);
- }
-
- hash += (hash << 3);
- hash ^= (hash >> 11);
- hash += (hash << 15);
-
- return hash % size;
-}
-
-static struct batadv_vis_info *
-batadv_vis_hash_find(struct batadv_priv *bat_priv, const void *data)
-{
- struct batadv_hashtable *hash = bat_priv->vis.hash;
- struct hlist_head *head;
- struct batadv_vis_info *vis_info, *vis_info_tmp = NULL;
- uint32_t index;
-
- if (!hash)
- return NULL;
-
- index = batadv_vis_info_choose(data, hash->size);
- head = &hash->table[index];
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(vis_info, head, hash_entry) {
- if (!batadv_vis_info_cmp(&vis_info->hash_entry, data))
- continue;
-
- vis_info_tmp = vis_info;
- break;
- }
- rcu_read_unlock();
-
- return vis_info_tmp;
-}
-
-/* insert interface to the list of interfaces of one originator, if it
- * does not already exist in the list
- */
-static void batadv_vis_data_insert_interface(const uint8_t *interface,
- struct hlist_head *if_list,
- bool primary)
-{
- struct batadv_vis_if_list_entry *entry;
-
- hlist_for_each_entry(entry, if_list, list) {
- if (batadv_compare_eth(entry->addr, interface))
- return;
- }
-
- /* it's a new address, add it to the list */
- entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
- if (!entry)
- return;
- memcpy(entry->addr, interface, ETH_ALEN);
- entry->primary = primary;
- hlist_add_head(&entry->list, if_list);
-}
-
-static void batadv_vis_data_read_prim_sec(struct seq_file *seq,
- const struct hlist_head *if_list)
-{
- struct batadv_vis_if_list_entry *entry;
-
- hlist_for_each_entry(entry, if_list, list) {
- if (entry->primary)
- seq_puts(seq, "PRIMARY, ");
- else
- seq_printf(seq, "SEC %pM, ", entry->addr);
- }
-}
-
-/* read an entry */
-static ssize_t
-batadv_vis_data_read_entry(struct seq_file *seq,
- const struct batadv_vis_info_entry *entry,
- const uint8_t *src, bool primary)
-{
- if (primary && entry->quality == 0)
- return seq_printf(seq, "TT %pM, ", entry->dest);
- else if (batadv_compare_eth(entry->src, src))
- return seq_printf(seq, "TQ %pM %d, ", entry->dest,
- entry->quality);
-
- return 0;
-}
-
-static void
-batadv_vis_data_insert_interfaces(struct hlist_head *list,
- struct batadv_vis_packet *packet,
- struct batadv_vis_info_entry *entries)
-{
- int i;
-
- for (i = 0; i < packet->entries; i++) {
- if (entries[i].quality == 0)
- continue;
-
- if (batadv_compare_eth(entries[i].src, packet->vis_orig))
- continue;
-
- batadv_vis_data_insert_interface(entries[i].src, list, false);
- }
-}
-
-static void batadv_vis_data_read_entries(struct seq_file *seq,
- struct hlist_head *list,
- struct batadv_vis_packet *packet,
- struct batadv_vis_info_entry *entries)
-{
- int i;
- struct batadv_vis_if_list_entry *entry;
-
- hlist_for_each_entry(entry, list, list) {
- seq_printf(seq, "%pM,", entry->addr);
-
- for (i = 0; i < packet->entries; i++)
- batadv_vis_data_read_entry(seq, &entries[i],
- entry->addr, entry->primary);
-
- /* add primary/secondary records */
- if (batadv_compare_eth(entry->addr, packet->vis_orig))
- batadv_vis_data_read_prim_sec(seq, list);
-
- seq_puts(seq, "\n");
- }
-}
-
-static void batadv_vis_seq_print_text_bucket(struct seq_file *seq,
- const struct hlist_head *head)
-{
- struct batadv_vis_info *info;
- struct batadv_vis_packet *packet;
- uint8_t *entries_pos;
- struct batadv_vis_info_entry *entries;
- struct batadv_vis_if_list_entry *entry;
- struct hlist_node *n;
-
- HLIST_HEAD(vis_if_list);
-
- hlist_for_each_entry_rcu(info, head, hash_entry) {
- packet = (struct batadv_vis_packet *)info->skb_packet->data;
- entries_pos = (uint8_t *)packet + sizeof(*packet);
- entries = (struct batadv_vis_info_entry *)entries_pos;
-
- batadv_vis_data_insert_interface(packet->vis_orig, &vis_if_list,
- true);
- batadv_vis_data_insert_interfaces(&vis_if_list, packet,
- entries);
- batadv_vis_data_read_entries(seq, &vis_if_list, packet,
- entries);
-
- hlist_for_each_entry_safe(entry, n, &vis_if_list, list) {
- hlist_del(&entry->list);
- kfree(entry);
- }
- }
-}
-
-int batadv_vis_seq_print_text(struct seq_file *seq, void *offset)
-{
- struct batadv_hard_iface *primary_if;
- struct hlist_head *head;
- struct net_device *net_dev = (struct net_device *)seq->private;
- struct batadv_priv *bat_priv = netdev_priv(net_dev);
- struct batadv_hashtable *hash = bat_priv->vis.hash;
- uint32_t i;
- int ret = 0;
- int vis_server = atomic_read(&bat_priv->vis_mode);
-
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if)
- goto out;
-
- if (vis_server == BATADV_VIS_TYPE_CLIENT_UPDATE)
- goto out;
-
- spin_lock_bh(&bat_priv->vis.hash_lock);
- for (i = 0; i < hash->size; i++) {
- head = &hash->table[i];
- batadv_vis_seq_print_text_bucket(seq, head);
- }
- spin_unlock_bh(&bat_priv->vis.hash_lock);
-
-out:
- if (primary_if)
- batadv_hardif_free_ref(primary_if);
- return ret;
-}
-
-/* add the info packet to the send list, if it was not
- * already linked in.
- */
-static void batadv_send_list_add(struct batadv_priv *bat_priv,
- struct batadv_vis_info *info)
-{
- if (list_empty(&info->send_list)) {
- kref_get(&info->refcount);
- list_add_tail(&info->send_list, &bat_priv->vis.send_list);
- }
-}
-
-/* delete the info packet from the send list, if it was
- * linked in.
- */
-static void batadv_send_list_del(struct batadv_vis_info *info)
-{
- if (!list_empty(&info->send_list)) {
- list_del_init(&info->send_list);
- kref_put(&info->refcount, batadv_free_info);
- }
-}
-
-/* tries to add one entry to the receive list. */
-static void batadv_recv_list_add(struct batadv_priv *bat_priv,
- struct list_head *recv_list, const char *mac)
-{
- struct batadv_vis_recvlist_node *entry;
-
- entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
- if (!entry)
- return;
-
- memcpy(entry->mac, mac, ETH_ALEN);
- spin_lock_bh(&bat_priv->vis.list_lock);
- list_add_tail(&entry->list, recv_list);
- spin_unlock_bh(&bat_priv->vis.list_lock);
-}
-
-/* returns 1 if this mac is in the recv_list */
-static int batadv_recv_list_is_in(struct batadv_priv *bat_priv,
- const struct list_head *recv_list,
- const char *mac)
-{
- const struct batadv_vis_recvlist_node *entry;
-
- spin_lock_bh(&bat_priv->vis.list_lock);
- list_for_each_entry(entry, recv_list, list) {
- if (batadv_compare_eth(entry->mac, mac)) {
- spin_unlock_bh(&bat_priv->vis.list_lock);
- return 1;
- }
- }
- spin_unlock_bh(&bat_priv->vis.list_lock);
- return 0;
-}
-
-/* try to add the packet to the vis_hash. return NULL if invalid (e.g. too old,
- * broken.. ). vis hash must be locked outside. is_new is set when the packet
- * is newer than old entries in the hash.
- */
-static struct batadv_vis_info *
-batadv_add_packet(struct batadv_priv *bat_priv,
- struct batadv_vis_packet *vis_packet, int vis_info_len,
- int *is_new, int make_broadcast)
-{
- struct batadv_vis_info *info, *old_info;
- struct batadv_vis_packet *search_packet, *old_packet;
- struct batadv_vis_info search_elem;
- struct batadv_vis_packet *packet;
- struct sk_buff *tmp_skb;
- int hash_added;
- size_t len;
- size_t max_entries;
-
- *is_new = 0;
- /* sanity check */
- if (!bat_priv->vis.hash)
- return NULL;
-
- /* see if the packet is already in vis_hash */
- search_elem.skb_packet = dev_alloc_skb(sizeof(*search_packet));
- if (!search_elem.skb_packet)
- return NULL;
- len = sizeof(*search_packet);
- tmp_skb = search_elem.skb_packet;
- search_packet = (struct batadv_vis_packet *)skb_put(tmp_skb, len);
-
- memcpy(search_packet->vis_orig, vis_packet->vis_orig, ETH_ALEN);
- old_info = batadv_vis_hash_find(bat_priv, &search_elem);
- kfree_skb(search_elem.skb_packet);
-
- if (old_info) {
- tmp_skb = old_info->skb_packet;
- old_packet = (struct batadv_vis_packet *)tmp_skb->data;
- if (!batadv_seq_after(ntohl(vis_packet->seqno),
- ntohl(old_packet->seqno))) {
- if (old_packet->seqno == vis_packet->seqno) {
- batadv_recv_list_add(bat_priv,
- &old_info->recv_list,
- vis_packet->sender_orig);
- return old_info;
- } else {
- /* newer packet is already in hash. */
- return NULL;
- }
- }
- /* remove old entry */
- batadv_hash_remove(bat_priv->vis.hash, batadv_vis_info_cmp,
- batadv_vis_info_choose, old_info);
- batadv_send_list_del(old_info);
- kref_put(&old_info->refcount, batadv_free_info);
- }
-
- info = kmalloc(sizeof(*info), GFP_ATOMIC);
- if (!info)
- return NULL;
-
- len = sizeof(*packet) + vis_info_len;
- info->skb_packet = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
- if (!info->skb_packet) {
- kfree(info);
- return NULL;
- }
- info->skb_packet->priority = TC_PRIO_CONTROL;
- skb_reserve(info->skb_packet, ETH_HLEN);
- packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len);
-
- kref_init(&info->refcount);
- INIT_LIST_HEAD(&info->send_list);
- INIT_LIST_HEAD(&info->recv_list);
- info->first_seen = jiffies;
- info->bat_priv = bat_priv;
- memcpy(packet, vis_packet, len);
-
- /* initialize and add new packet. */
- *is_new = 1;
-
- /* Make it a broadcast packet, if required */
- if (make_broadcast)
- memcpy(packet->target_orig, batadv_broadcast_addr, ETH_ALEN);
-
- /* repair if entries is longer than packet. */
- max_entries = vis_info_len / sizeof(struct batadv_vis_info_entry);
- if (packet->entries > max_entries)
- packet->entries = max_entries;
-
- batadv_recv_list_add(bat_priv, &info->recv_list, packet->sender_orig);
-
- /* try to add it */
- hash_added = batadv_hash_add(bat_priv->vis.hash, batadv_vis_info_cmp,
- batadv_vis_info_choose, info,
- &info->hash_entry);
- if (hash_added != 0) {
- /* did not work (for some reason) */
- kref_put(&info->refcount, batadv_free_info);
- info = NULL;
- }
-
- return info;
-}
-
-/* handle the server sync packet, forward if needed. */
-void batadv_receive_server_sync_packet(struct batadv_priv *bat_priv,
- struct batadv_vis_packet *vis_packet,
- int vis_info_len)
-{
- struct batadv_vis_info *info;
- int is_new, make_broadcast;
- int vis_server = atomic_read(&bat_priv->vis_mode);
-
- make_broadcast = (vis_server == BATADV_VIS_TYPE_SERVER_SYNC);
-
- spin_lock_bh(&bat_priv->vis.hash_lock);
- info = batadv_add_packet(bat_priv, vis_packet, vis_info_len,
- &is_new, make_broadcast);
- if (!info)
- goto end;
-
- /* only if we are server ourselves and packet is newer than the one in
- * hash.
- */
- if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC && is_new)
- batadv_send_list_add(bat_priv, info);
-end:
- spin_unlock_bh(&bat_priv->vis.hash_lock);
-}
-
-/* handle an incoming client update packet and schedule forward if needed. */
-void batadv_receive_client_update_packet(struct batadv_priv *bat_priv,
- struct batadv_vis_packet *vis_packet,
- int vis_info_len)
-{
- struct batadv_vis_info *info;
- struct batadv_vis_packet *packet;
- int is_new;
- int vis_server = atomic_read(&bat_priv->vis_mode);
- int are_target = 0;
-
- /* clients shall not broadcast. */
- if (is_broadcast_ether_addr(vis_packet->target_orig))
- return;
-
- /* Are we the target for this VIS packet? */
- if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC &&
- batadv_is_my_mac(bat_priv, vis_packet->target_orig))
- are_target = 1;
-
- spin_lock_bh(&bat_priv->vis.hash_lock);
- info = batadv_add_packet(bat_priv, vis_packet, vis_info_len,
- &is_new, are_target);
-
- if (!info)
- goto end;
- /* note that outdated packets will be dropped at this point. */
-
- packet = (struct batadv_vis_packet *)info->skb_packet->data;
-
- /* send only if we're the target server or ... */
- if (are_target && is_new) {
- packet->vis_type = BATADV_VIS_TYPE_SERVER_SYNC; /* upgrade! */
- batadv_send_list_add(bat_priv, info);
-
- /* ... we're not the recipient (and thus need to forward). */
- } else if (!batadv_is_my_mac(bat_priv, packet->target_orig)) {
- batadv_send_list_add(bat_priv, info);
- }
-
-end:
- spin_unlock_bh(&bat_priv->vis.hash_lock);
-}
-
-/* Walk the originators and find the VIS server with the best tq. Set the packet
- * address to its address and return the best_tq.
- *
- * Must be called with the originator hash locked
- */
-static int batadv_find_best_vis_server(struct batadv_priv *bat_priv,
- struct batadv_vis_info *info)
-{
- struct batadv_hashtable *hash = bat_priv->orig_hash;
- struct batadv_neigh_node *router;
- struct hlist_head *head;
- struct batadv_orig_node *orig_node;
- struct batadv_vis_packet *packet;
- int best_tq = -1;
- uint32_t i;
-
- packet = (struct batadv_vis_packet *)info->skb_packet->data;
-
- for (i = 0; i < hash->size; i++) {
- head = &hash->table[i];
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
- router = batadv_orig_node_get_router(orig_node);
- if (!router)
- continue;
-
- if ((orig_node->flags & BATADV_VIS_SERVER) &&
- (router->tq_avg > best_tq)) {
- best_tq = router->tq_avg;
- memcpy(packet->target_orig, orig_node->orig,
- ETH_ALEN);
- }
- batadv_neigh_node_free_ref(router);
- }
- rcu_read_unlock();
- }
-
- return best_tq;
-}
-
-/* Return true if the vis packet is full. */
-static bool batadv_vis_packet_full(const struct batadv_vis_info *info)
-{
- const struct batadv_vis_packet *packet;
- size_t num;
-
- packet = (struct batadv_vis_packet *)info->skb_packet->data;
- num = BATADV_MAX_VIS_PACKET_SIZE / sizeof(struct batadv_vis_info_entry);
-
- if (num < packet->entries + 1)
- return true;
- return false;
-}
-
-/* generates a packet of own vis data,
- * returns 0 on success, -1 if no packet could be generated
- */
-static int batadv_generate_vis_packet(struct batadv_priv *bat_priv)
-{
- struct batadv_hashtable *hash = bat_priv->orig_hash;
- struct hlist_head *head;
- struct batadv_orig_node *orig_node;
- struct batadv_neigh_node *router;
- struct batadv_vis_info *info = bat_priv->vis.my_info;
- struct batadv_vis_packet *packet;
- struct batadv_vis_info_entry *entry;
- struct batadv_tt_common_entry *tt_common_entry;
- uint8_t *packet_pos;
- int best_tq = -1;
- uint32_t i;
-
- info->first_seen = jiffies;
- packet = (struct batadv_vis_packet *)info->skb_packet->data;
- packet->vis_type = atomic_read(&bat_priv->vis_mode);
-
- memcpy(packet->target_orig, batadv_broadcast_addr, ETH_ALEN);
- packet->header.ttl = BATADV_TTL;
- packet->seqno = htonl(ntohl(packet->seqno) + 1);
- packet->entries = 0;
- packet->reserved = 0;
- skb_trim(info->skb_packet, sizeof(*packet));
-
- if (packet->vis_type == BATADV_VIS_TYPE_CLIENT_UPDATE) {
- best_tq = batadv_find_best_vis_server(bat_priv, info);
-
- if (best_tq < 0)
- return best_tq;
- }
-
- for (i = 0; i < hash->size; i++) {
- head = &hash->table[i];
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
- router = batadv_orig_node_get_router(orig_node);
- if (!router)
- continue;
-
- if (!batadv_compare_eth(router->addr, orig_node->orig))
- goto next;
-
- if (router->if_incoming->if_status != BATADV_IF_ACTIVE)
- goto next;
-
- if (router->tq_avg < 1)
- goto next;
-
- /* fill one entry into buffer. */
- packet_pos = skb_put(info->skb_packet, sizeof(*entry));
- entry = (struct batadv_vis_info_entry *)packet_pos;
- memcpy(entry->src,
- router->if_incoming->net_dev->dev_addr,
- ETH_ALEN);
- memcpy(entry->dest, orig_node->orig, ETH_ALEN);
- entry->quality = router->tq_avg;
- packet->entries++;
-
-next:
- batadv_neigh_node_free_ref(router);
-
- if (batadv_vis_packet_full(info))
- goto unlock;
- }
- rcu_read_unlock();
- }
-
- hash = bat_priv->tt.local_hash;
-
- for (i = 0; i < hash->size; i++) {
- head = &hash->table[i];
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tt_common_entry, head,
- hash_entry) {
- packet_pos = skb_put(info->skb_packet, sizeof(*entry));
- entry = (struct batadv_vis_info_entry *)packet_pos;
- memset(entry->src, 0, ETH_ALEN);
- memcpy(entry->dest, tt_common_entry->addr, ETH_ALEN);
- entry->quality = 0; /* 0 means TT */
- packet->entries++;
-
- if (batadv_vis_packet_full(info))
- goto unlock;
- }
- rcu_read_unlock();
- }
-
- return 0;
-
-unlock:
- rcu_read_unlock();
- return 0;
-}
-
-/* free old vis packets. Must be called with this vis_hash_lock
- * held
- */
-static void batadv_purge_vis_packets(struct batadv_priv *bat_priv)
-{
- uint32_t i;
- struct batadv_hashtable *hash = bat_priv->vis.hash;
- struct hlist_node *node_tmp;
- struct hlist_head *head;
- struct batadv_vis_info *info;
-
- for (i = 0; i < hash->size; i++) {
- head = &hash->table[i];
-
- hlist_for_each_entry_safe(info, node_tmp,
- head, hash_entry) {
- /* never purge own data. */
- if (info == bat_priv->vis.my_info)
- continue;
-
- if (batadv_has_timed_out(info->first_seen,
- BATADV_VIS_TIMEOUT)) {
- hlist_del(&info->hash_entry);
- batadv_send_list_del(info);
- kref_put(&info->refcount, batadv_free_info);
- }
- }
- }
-}
-
-static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv,
- struct batadv_vis_info *info)
-{
- struct batadv_hashtable *hash = bat_priv->orig_hash;
- struct hlist_head *head;
- struct batadv_orig_node *orig_node;
- struct batadv_vis_packet *packet;
- struct sk_buff *skb;
- uint32_t i, res;
-
-
- packet = (struct batadv_vis_packet *)info->skb_packet->data;
-
- /* send to all routers in range. */
- for (i = 0; i < hash->size; i++) {
- head = &hash->table[i];
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
- /* if it's a vis server and reachable, send it. */
- if (!(orig_node->flags & BATADV_VIS_SERVER))
- continue;
-
- /* don't send it if we already received the packet from
- * this node.
- */
- if (batadv_recv_list_is_in(bat_priv, &info->recv_list,
- orig_node->orig))
- continue;
-
- memcpy(packet->target_orig, orig_node->orig, ETH_ALEN);
- skb = skb_clone(info->skb_packet, GFP_ATOMIC);
- if (!skb)
- continue;
-
- res = batadv_send_skb_to_orig(skb, orig_node, NULL);
- if (res == NET_XMIT_DROP)
- kfree_skb(skb);
- }
- rcu_read_unlock();
- }
-}
-
-static void batadv_unicast_vis_packet(struct batadv_priv *bat_priv,
- struct batadv_vis_info *info)
-{
- struct batadv_orig_node *orig_node;
- struct sk_buff *skb;
- struct batadv_vis_packet *packet;
-
- packet = (struct batadv_vis_packet *)info->skb_packet->data;
-
- orig_node = batadv_orig_hash_find(bat_priv, packet->target_orig);
- if (!orig_node)
- goto out;
-
- skb = skb_clone(info->skb_packet, GFP_ATOMIC);
- if (!skb)
- goto out;
-
- if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP)
- kfree_skb(skb);
-
-out:
- if (orig_node)
- batadv_orig_node_free_ref(orig_node);
-}
-
-/* only send one vis packet. called from batadv_send_vis_packets() */
-static void batadv_send_vis_packet(struct batadv_priv *bat_priv,
- struct batadv_vis_info *info)
-{
- struct batadv_hard_iface *primary_if;
- struct batadv_vis_packet *packet;
-
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if)
- goto out;
-
- packet = (struct batadv_vis_packet *)info->skb_packet->data;
- if (packet->header.ttl < 2) {
- pr_debug("Error - can't send vis packet: ttl exceeded\n");
- goto out;
- }
-
- memcpy(packet->sender_orig, primary_if->net_dev->dev_addr, ETH_ALEN);
- packet->header.ttl--;
-
- if (is_broadcast_ether_addr(packet->target_orig))
- batadv_broadcast_vis_packet(bat_priv, info);
- else
- batadv_unicast_vis_packet(bat_priv, info);
- packet->header.ttl++; /* restore TTL */
-
-out:
- if (primary_if)
- batadv_hardif_free_ref(primary_if);
-}
-
-/* called from timer; send (and maybe generate) vis packet. */
-static void batadv_send_vis_packets(struct work_struct *work)
-{
- struct delayed_work *delayed_work;
- struct batadv_priv *bat_priv;
- struct batadv_priv_vis *priv_vis;
- struct batadv_vis_info *info;
-
- delayed_work = container_of(work, struct delayed_work, work);
- priv_vis = container_of(delayed_work, struct batadv_priv_vis, work);
- bat_priv = container_of(priv_vis, struct batadv_priv, vis);
- spin_lock_bh(&bat_priv->vis.hash_lock);
- batadv_purge_vis_packets(bat_priv);
-
- if (batadv_generate_vis_packet(bat_priv) == 0) {
- /* schedule if generation was successful */
- batadv_send_list_add(bat_priv, bat_priv->vis.my_info);
- }
-
- while (!list_empty(&bat_priv->vis.send_list)) {
- info = list_first_entry(&bat_priv->vis.send_list,
- typeof(*info), send_list);
-
- kref_get(&info->refcount);
- spin_unlock_bh(&bat_priv->vis.hash_lock);
-
- batadv_send_vis_packet(bat_priv, info);
-
- spin_lock_bh(&bat_priv->vis.hash_lock);
- batadv_send_list_del(info);
- kref_put(&info->refcount, batadv_free_info);
- }
- spin_unlock_bh(&bat_priv->vis.hash_lock);
-
- queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work,
- msecs_to_jiffies(BATADV_VIS_INTERVAL));
-}
-
-/* init the vis server. this may only be called when if_list is already
- * initialized (e.g. bat0 is initialized, interfaces have been added)
- */
-int batadv_vis_init(struct batadv_priv *bat_priv)
-{
- struct batadv_vis_packet *packet;
- int hash_added;
- unsigned int len;
- unsigned long first_seen;
- struct sk_buff *tmp_skb;
-
- if (bat_priv->vis.hash)
- return 0;
-
- spin_lock_bh(&bat_priv->vis.hash_lock);
-
- bat_priv->vis.hash = batadv_hash_new(256);
- if (!bat_priv->vis.hash) {
- pr_err("Can't initialize vis_hash\n");
- goto err;
- }
-
- batadv_hash_set_lock_class(bat_priv->vis.hash,
- &batadv_vis_hash_lock_class_key);
-
- bat_priv->vis.my_info = kmalloc(BATADV_MAX_VIS_PACKET_SIZE, GFP_ATOMIC);
- if (!bat_priv->vis.my_info)
- goto err;
-
- len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE + ETH_HLEN;
- bat_priv->vis.my_info->skb_packet = netdev_alloc_skb_ip_align(NULL,
- len);
- if (!bat_priv->vis.my_info->skb_packet)
- goto free_info;
-
- bat_priv->vis.my_info->skb_packet->priority = TC_PRIO_CONTROL;
- skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN);
- tmp_skb = bat_priv->vis.my_info->skb_packet;
- packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet));
-
- /* prefill the vis info */
- first_seen = jiffies - msecs_to_jiffies(BATADV_VIS_INTERVAL);
- bat_priv->vis.my_info->first_seen = first_seen;
- INIT_LIST_HEAD(&bat_priv->vis.my_info->recv_list);
- INIT_LIST_HEAD(&bat_priv->vis.my_info->send_list);
- kref_init(&bat_priv->vis.my_info->refcount);
- bat_priv->vis.my_info->bat_priv = bat_priv;
- packet->header.version = BATADV_COMPAT_VERSION;
- packet->header.packet_type = BATADV_VIS;
- packet->header.ttl = BATADV_TTL;
- packet->seqno = 0;
- packet->reserved = 0;
- packet->entries = 0;
-
- INIT_LIST_HEAD(&bat_priv->vis.send_list);
-
- hash_added = batadv_hash_add(bat_priv->vis.hash, batadv_vis_info_cmp,
- batadv_vis_info_choose,
- bat_priv->vis.my_info,
- &bat_priv->vis.my_info->hash_entry);
- if (hash_added != 0) {
- pr_err("Can't add own vis packet into hash\n");
- /* not in hash, need to remove it manually. */
- kref_put(&bat_priv->vis.my_info->refcount, batadv_free_info);
- goto err;
- }
-
- spin_unlock_bh(&bat_priv->vis.hash_lock);
-
- INIT_DELAYED_WORK(&bat_priv->vis.work, batadv_send_vis_packets);
- queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work,
- msecs_to_jiffies(BATADV_VIS_INTERVAL));
-
- return 0;
-
-free_info:
- kfree(bat_priv->vis.my_info);
- bat_priv->vis.my_info = NULL;
-err:
- spin_unlock_bh(&bat_priv->vis.hash_lock);
- batadv_vis_quit(bat_priv);
- return -ENOMEM;
-}
-
-/* Decrease the reference count on a hash item info */
-static void batadv_free_info_ref(struct hlist_node *node, void *arg)
-{
- struct batadv_vis_info *info;
-
- info = container_of(node, struct batadv_vis_info, hash_entry);
- batadv_send_list_del(info);
- kref_put(&info->refcount, batadv_free_info);
-}
-
-/* shutdown vis-server */
-void batadv_vis_quit(struct batadv_priv *bat_priv)
-{
- if (!bat_priv->vis.hash)
- return;
-
- cancel_delayed_work_sync(&bat_priv->vis.work);
-
- spin_lock_bh(&bat_priv->vis.hash_lock);
- /* properly remove, kill timers ... */
- batadv_hash_delete(bat_priv->vis.hash, batadv_free_info_ref, NULL);
- bat_priv->vis.hash = NULL;
- bat_priv->vis.my_info = NULL;
- spin_unlock_bh(&bat_priv->vis.hash_lock);
-}
diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h
deleted file mode 100644
index ad92b0e..0000000
--- a/net/batman-adv/vis.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Copyright (C) 2008-2013 B.A.T.M.A.N. contributors:
- *
- * Simon Wunderlich, Marek Lindner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- */
-
-#ifndef _NET_BATMAN_ADV_VIS_H_
-#define _NET_BATMAN_ADV_VIS_H_
-
-/* timeout of vis packets in milliseconds */
-#define BATADV_VIS_TIMEOUT 200000
-
-int batadv_vis_seq_print_text(struct seq_file *seq, void *offset);
-void batadv_receive_server_sync_packet(struct batadv_priv *bat_priv,
- struct batadv_vis_packet *vis_packet,
- int vis_info_len);
-void batadv_receive_client_update_packet(struct batadv_priv *bat_priv,
- struct batadv_vis_packet *vis_packet,
- int vis_info_len);
-int batadv_vis_init(struct batadv_priv *bat_priv);
-void batadv_vis_quit(struct batadv_priv *bat_priv);
-
-#endif /* _NET_BATMAN_ADV_VIS_H_ */
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index d1c5786..005d876 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -363,7 +363,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
- memcpy(eth->h_source, br->dev->dev_addr, 6);
+ memcpy(eth->h_source, br->dev->dev_addr, ETH_ALEN);
eth->h_dest[0] = 1;
eth->h_dest[1] = 0;
eth->h_dest[2] = 0x5e;
@@ -433,7 +433,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
- memcpy(eth->h_source, br->dev->dev_addr, 6);
+ memcpy(eth->h_source, br->dev->dev_addr, ETH_ALEN);
eth->h_proto = htons(ETH_P_IPV6);
skb_put(skb, sizeof(*eth));
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 8b84c58..3fb3c84 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -28,7 +28,7 @@ static bool ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh,
uint32_t cmp[2] = { 0, 0 };
int key = ((const unsigned char *)mac)[5];
- memcpy(((char *) cmp) + 2, mac, 6);
+ memcpy(((char *) cmp) + 2, mac, ETH_ALEN);
start = wh->table[key];
limit = wh->table[key + 1];
if (ip) {
diff --git a/net/compat.c b/net/compat.c
index f0a1ba6..8903258 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -71,6 +71,8 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg)
__get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
__get_user(kmsg->msg_flags, &umsg->msg_flags))
return -EFAULT;
+ if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
+ return -EINVAL;
kmsg->msg_name = compat_ptr(tmp1);
kmsg->msg_iov = compat_ptr(tmp2);
kmsg->msg_control = compat_ptr(tmp3);
diff --git a/net/core/dev.c b/net/core/dev.c
index 5c713f2..1b6eadf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1307,7 +1307,7 @@ static int __dev_close_many(struct list_head *head)
ASSERT_RTNL();
might_sleep();
- list_for_each_entry(dev, head, unreg_list) {
+ list_for_each_entry(dev, head, close_list) {
call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
clear_bit(__LINK_STATE_START, &dev->state);
@@ -1323,7 +1323,7 @@ static int __dev_close_many(struct list_head *head)
dev_deactivate_many(head);
- list_for_each_entry(dev, head, unreg_list) {
+ list_for_each_entry(dev, head, close_list) {
const struct net_device_ops *ops = dev->netdev_ops;
/*
@@ -1351,7 +1351,7 @@ static int __dev_close(struct net_device *dev)
/* Temporarily disable netpoll until the interface is down */
netpoll_rx_disable(dev);
- list_add(&dev->unreg_list, &single);
+ list_add(&dev->close_list, &single);
retval = __dev_close_many(&single);
list_del(&single);
@@ -1362,21 +1362,20 @@ static int __dev_close(struct net_device *dev)
static int dev_close_many(struct list_head *head)
{
struct net_device *dev, *tmp;
- LIST_HEAD(tmp_list);
- list_for_each_entry_safe(dev, tmp, head, unreg_list)
+ /* Remove the devices that don't need to be closed */
+ list_for_each_entry_safe(dev, tmp, head, close_list)
if (!(dev->flags & IFF_UP))
- list_move(&dev->unreg_list, &tmp_list);
+ list_del_init(&dev->close_list);
__dev_close_many(head);
- list_for_each_entry(dev, head, unreg_list) {
+ list_for_each_entry_safe(dev, tmp, head, close_list) {
rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
call_netdevice_notifiers(NETDEV_DOWN, dev);
+ list_del_init(&dev->close_list);
}
- /* rollback_registered_many needs the complete original list */
- list_splice(&tmp_list, head);
return 0;
}
@@ -1397,7 +1396,7 @@ int dev_close(struct net_device *dev)
/* Block netpoll rx while the interface is going down */
netpoll_rx_disable(dev);
- list_add(&dev->unreg_list, &single);
+ list_add(&dev->close_list, &single);
dev_close_many(&single);
list_del(&single);
@@ -1917,7 +1916,8 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
return new_map;
}
-int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index)
+int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
+ u16 index)
{
struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
struct xps_map *map, *new_map;
@@ -4373,42 +4373,40 @@ struct netdev_adjacent {
/* upper master flag, there can only be one master device per list */
bool master;
- /* indicates that this dev is our first-level lower/upper device */
- bool neighbour;
-
/* counter for the number of times this device was added to us */
u16 ref_nr;
+ /* private field for the users */
+ void *private;
+
struct list_head list;
struct rcu_head rcu;
};
-static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
- struct net_device *adj_dev,
- bool upper)
+static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev,
+ struct net_device *adj_dev,
+ struct list_head *adj_list)
{
struct netdev_adjacent *adj;
- struct list_head *dev_list;
-
- dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list;
- list_for_each_entry(adj, dev_list, list) {
+ list_for_each_entry_rcu(adj, adj_list, list) {
if (adj->dev == adj_dev)
return adj;
}
return NULL;
}
-static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev,
- struct net_device *udev)
+static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
+ struct net_device *adj_dev,
+ struct list_head *adj_list)
{
- return __netdev_find_adj(dev, udev, true);
-}
+ struct netdev_adjacent *adj;
-static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev,
- struct net_device *ldev)
-{
- return __netdev_find_adj(dev, ldev, false);
+ list_for_each_entry(adj, adj_list, list) {
+ if (adj->dev == adj_dev)
+ return adj;
+ }
+ return NULL;
}
/**
@@ -4425,7 +4423,7 @@ bool netdev_has_upper_dev(struct net_device *dev,
{
ASSERT_RTNL();
- return __netdev_find_upper(dev, upper_dev);
+ return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);
}
EXPORT_SYMBOL(netdev_has_upper_dev);
@@ -4440,7 +4438,7 @@ bool netdev_has_any_upper_dev(struct net_device *dev)
{
ASSERT_RTNL();
- return !list_empty(&dev->upper_dev_list);
+ return !list_empty(&dev->all_adj_list.upper);
}
EXPORT_SYMBOL(netdev_has_any_upper_dev);
@@ -4457,10 +4455,10 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
ASSERT_RTNL();
- if (list_empty(&dev->upper_dev_list))
+ if (list_empty(&dev->adj_list.upper))
return NULL;
- upper = list_first_entry(&dev->upper_dev_list,
+ upper = list_first_entry(&dev->adj_list.upper,
struct netdev_adjacent, list);
if (likely(upper->master))
return upper->dev;
@@ -4468,15 +4466,26 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_master_upper_dev_get);
-/* netdev_upper_get_next_dev_rcu - Get the next dev from upper list
+void *netdev_adjacent_get_private(struct list_head *adj_list)
+{
+ struct netdev_adjacent *adj;
+
+ adj = list_entry(adj_list, struct netdev_adjacent, list);
+
+ return adj->private;
+}
+EXPORT_SYMBOL(netdev_adjacent_get_private);
+
+/**
+ * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
* @dev: device
* @iter: list_head ** of the current position
*
* Gets the next device from the dev's upper list, starting from iter
* position. The caller must hold RCU read lock.
*/
-struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
- struct list_head **iter)
+struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
{
struct netdev_adjacent *upper;
@@ -4484,14 +4493,71 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
- if (&upper->list == &dev->upper_dev_list)
+ if (&upper->list == &dev->all_adj_list.upper)
return NULL;
*iter = &upper->list;
return upper->dev;
}
-EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
+
+/**
+ * netdev_lower_get_next_private - Get the next ->private from the
+ * lower neighbour list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent->private from the dev's lower neighbour
+ * list, starting from iter position. The caller must hold either hold the
+ * RTNL lock or its own locking that guarantees that the neighbour lower
+ * list will remain unchainged.
+ */
+void *netdev_lower_get_next_private(struct net_device *dev,
+ struct list_head **iter)
+{
+ struct netdev_adjacent *lower;
+
+ lower = list_entry(*iter, struct netdev_adjacent, list);
+
+ if (&lower->list == &dev->adj_list.lower)
+ return NULL;
+
+ if (iter)
+ *iter = lower->list.next;
+
+ return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_get_next_private);
+
+/**
+ * netdev_lower_get_next_private_rcu - Get the next ->private from the
+ * lower neighbour list, RCU
+ * variant
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent->private from the dev's lower neighbour
+ * list, starting from iter position. The caller must hold RCU read lock.
+ */
+void *netdev_lower_get_next_private_rcu(struct net_device *dev,
+ struct list_head **iter)
+{
+ struct netdev_adjacent *lower;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
+
+ if (&lower->list == &dev->adj_list.lower)
+ return NULL;
+
+ if (iter)
+ *iter = &lower->list;
+
+ return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
/**
* netdev_master_upper_dev_get_rcu - Get master upper device
@@ -4504,7 +4570,7 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
{
struct netdev_adjacent *upper;
- upper = list_first_or_null_rcu(&dev->upper_dev_list,
+ upper = list_first_or_null_rcu(&dev->adj_list.upper,
struct netdev_adjacent, list);
if (upper && likely(upper->master))
return upper->dev;
@@ -4514,15 +4580,16 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
static int __netdev_adjacent_dev_insert(struct net_device *dev,
struct net_device *adj_dev,
- bool neighbour, bool master,
- bool upper)
+ struct list_head *dev_list,
+ void *private, bool master)
{
struct netdev_adjacent *adj;
+ char linkname[IFNAMSIZ+7];
+ int ret;
- adj = __netdev_find_adj(dev, adj_dev, upper);
+ adj = __netdev_find_adj(dev, adj_dev, dev_list);
if (adj) {
- BUG_ON(neighbour);
adj->ref_nr++;
return 0;
}
@@ -4533,124 +4600,178 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
adj->dev = adj_dev;
adj->master = master;
- adj->neighbour = neighbour;
adj->ref_nr = 1;
-
+ adj->private = private;
dev_hold(adj_dev);
- pr_debug("dev_hold for %s, because of %s link added from %s to %s\n",
- adj_dev->name, upper ? "upper" : "lower", dev->name,
- adj_dev->name);
- if (!upper) {
- list_add_tail_rcu(&adj->list, &dev->lower_dev_list);
- return 0;
+ pr_debug("dev_hold for %s, because of link added from %s to %s\n",
+ adj_dev->name, dev->name, adj_dev->name);
+
+ if (dev_list == &dev->adj_list.lower) {
+ sprintf(linkname, "lower_%s", adj_dev->name);
+ ret = sysfs_create_link(&(dev->dev.kobj),
+ &(adj_dev->dev.kobj), linkname);
+ if (ret)
+ goto free_adj;
+ } else if (dev_list == &dev->adj_list.upper) {
+ sprintf(linkname, "upper_%s", adj_dev->name);
+ ret = sysfs_create_link(&(dev->dev.kobj),
+ &(adj_dev->dev.kobj), linkname);
+ if (ret)
+ goto free_adj;
}
- /* Ensure that master upper link is always the first item in list. */
- if (master)
- list_add_rcu(&adj->list, &dev->upper_dev_list);
- else
- list_add_tail_rcu(&adj->list, &dev->upper_dev_list);
+ /* Ensure that master link is always the first item in list. */
+ if (master) {
+ ret = sysfs_create_link(&(dev->dev.kobj),
+ &(adj_dev->dev.kobj), "master");
+ if (ret)
+ goto remove_symlinks;
+
+ list_add_rcu(&adj->list, dev_list);
+ } else {
+ list_add_tail_rcu(&adj->list, dev_list);
+ }
return 0;
-}
-static inline int __netdev_upper_dev_insert(struct net_device *dev,
- struct net_device *udev,
- bool master, bool neighbour)
-{
- return __netdev_adjacent_dev_insert(dev, udev, neighbour, master,
- true);
-}
+remove_symlinks:
+ if (dev_list == &dev->adj_list.lower) {
+ sprintf(linkname, "lower_%s", adj_dev->name);
+ sysfs_remove_link(&(dev->dev.kobj), linkname);
+ } else if (dev_list == &dev->adj_list.upper) {
+ sprintf(linkname, "upper_%s", adj_dev->name);
+ sysfs_remove_link(&(dev->dev.kobj), linkname);
+ }
-static inline int __netdev_lower_dev_insert(struct net_device *dev,
- struct net_device *ldev,
- bool neighbour)
-{
- return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false,
- false);
+free_adj:
+ kfree(adj);
+
+ return ret;
}
void __netdev_adjacent_dev_remove(struct net_device *dev,
- struct net_device *adj_dev, bool upper)
+ struct net_device *adj_dev,
+ struct list_head *dev_list)
{
struct netdev_adjacent *adj;
+ char linkname[IFNAMSIZ+7];
- if (upper)
- adj = __netdev_find_upper(dev, adj_dev);
- else
- adj = __netdev_find_lower(dev, adj_dev);
+ adj = __netdev_find_adj(dev, adj_dev, dev_list);
- if (!adj)
+ if (!adj) {
+ pr_err("tried to remove device %s from %s\n",
+ dev->name, adj_dev->name);
BUG();
+ }
if (adj->ref_nr > 1) {
+ pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,
+ adj->ref_nr-1);
adj->ref_nr--;
return;
}
+ if (adj->master)
+ sysfs_remove_link(&(dev->dev.kobj), "master");
+
+ if (dev_list == &dev->adj_list.lower) {
+ sprintf(linkname, "lower_%s", adj_dev->name);
+ sysfs_remove_link(&(dev->dev.kobj), linkname);
+ } else if (dev_list == &dev->adj_list.upper) {
+ sprintf(linkname, "upper_%s", adj_dev->name);
+ sysfs_remove_link(&(dev->dev.kobj), linkname);
+ }
+
list_del_rcu(&adj->list);
- pr_debug("dev_put for %s, because of %s link removed from %s to %s\n",
- adj_dev->name, upper ? "upper" : "lower", dev->name,
- adj_dev->name);
+ pr_debug("dev_put for %s, because link removed from %s to %s\n",
+ adj_dev->name, dev->name, adj_dev->name);
dev_put(adj_dev);
kfree_rcu(adj, rcu);
}
-static inline void __netdev_upper_dev_remove(struct net_device *dev,
- struct net_device *udev)
-{
- return __netdev_adjacent_dev_remove(dev, udev, true);
-}
-
-static inline void __netdev_lower_dev_remove(struct net_device *dev,
- struct net_device *ldev)
-{
- return __netdev_adjacent_dev_remove(dev, ldev, false);
-}
-
-int __netdev_adjacent_dev_insert_link(struct net_device *dev,
- struct net_device *upper_dev,
- bool master, bool neighbour)
+int __netdev_adjacent_dev_link_lists(struct net_device *dev,
+ struct net_device *upper_dev,
+ struct list_head *up_list,
+ struct list_head *down_list,
+ void *private, bool master)
{
int ret;
- ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour);
+ ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
+ master);
if (ret)
return ret;
- ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour);
+ ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
+ false);
if (ret) {
- __netdev_upper_dev_remove(dev, upper_dev);
+ __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
return ret;
}
return 0;
}
-static inline int __netdev_adjacent_dev_link(struct net_device *dev,
- struct net_device *udev)
+int __netdev_adjacent_dev_link(struct net_device *dev,
+ struct net_device *upper_dev)
{
- return __netdev_adjacent_dev_insert_link(dev, udev, false, false);
+ return __netdev_adjacent_dev_link_lists(dev, upper_dev,
+ &dev->all_adj_list.upper,
+ &upper_dev->all_adj_list.lower,
+ NULL, false);
}
-static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
- struct net_device *udev,
- bool master)
+void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
+ struct net_device *upper_dev,
+ struct list_head *up_list,
+ struct list_head *down_list)
{
- return __netdev_adjacent_dev_insert_link(dev, udev, master, true);
+ __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
+ __netdev_adjacent_dev_remove(upper_dev, dev, down_list);
}
void __netdev_adjacent_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
- __netdev_upper_dev_remove(dev, upper_dev);
- __netdev_lower_dev_remove(upper_dev, dev);
+ __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
+ &dev->all_adj_list.upper,
+ &upper_dev->all_adj_list.lower);
+}
+
+int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
+ struct net_device *upper_dev,
+ void *private, bool master)
+{
+ int ret = __netdev_adjacent_dev_link(dev, upper_dev);
+
+ if (ret)
+ return ret;
+
+ ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
+ &dev->adj_list.upper,
+ &upper_dev->adj_list.lower,
+ private, master);
+ if (ret) {
+ __netdev_adjacent_dev_unlink(dev, upper_dev);
+ return ret;
+ }
+
+ return 0;
}
+void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
+ struct net_device *upper_dev)
+{
+ __netdev_adjacent_dev_unlink(dev, upper_dev);
+ __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
+ &dev->adj_list.upper,
+ &upper_dev->adj_list.lower);
+}
static int __netdev_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev, bool master)
+ struct net_device *upper_dev, bool master,
+ void *private)
{
struct netdev_adjacent *i, *j, *to_i, *to_j;
int ret = 0;
@@ -4661,26 +4782,29 @@ static int __netdev_upper_dev_link(struct net_device *dev,
return -EBUSY;
/* To prevent loops, check if dev is not upper device to upper_dev. */
- if (__netdev_find_upper(upper_dev, dev))
+ if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
return -EBUSY;
- if (__netdev_find_upper(dev, upper_dev))
+ if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))
return -EEXIST;
if (master && netdev_master_upper_dev_get(dev))
return -EBUSY;
- ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master);
+ ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
+ master);
if (ret)
return ret;
/* Now that we linked these devs, make all the upper_dev's
- * upper_dev_list visible to every dev's lower_dev_list and vice
+ * all_adj_list.upper visible to every dev's all_adj_list.lower an
* versa, and don't forget the devices itself. All of these
* links are non-neighbours.
*/
- list_for_each_entry(i, &dev->lower_dev_list, list) {
- list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
+ list_for_each_entry(i, &dev->all_adj_list.lower, list) {
+ list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
+ pr_debug("Interlinking %s with %s, non-neighbour\n",
+ i->dev->name, j->dev->name);
ret = __netdev_adjacent_dev_link(i->dev, j->dev);
if (ret)
goto rollback_mesh;
@@ -4688,14 +4812,18 @@ static int __netdev_upper_dev_link(struct net_device *dev,
}
/* add dev to every upper_dev's upper device */
- list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+ list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
+ pr_debug("linking %s's upper device %s with %s\n",
+ upper_dev->name, i->dev->name, dev->name);
ret = __netdev_adjacent_dev_link(dev, i->dev);
if (ret)
goto rollback_upper_mesh;
}
/* add upper_dev to every dev's lower device */
- list_for_each_entry(i, &dev->lower_dev_list, list) {
+ list_for_each_entry(i, &dev->all_adj_list.lower, list) {
+ pr_debug("linking %s's lower device %s with %s\n", dev->name,
+ i->dev->name, upper_dev->name);
ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
if (ret)
goto rollback_lower_mesh;
@@ -4706,7 +4834,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
rollback_lower_mesh:
to_i = i;
- list_for_each_entry(i, &dev->lower_dev_list, list) {
+ list_for_each_entry(i, &dev->all_adj_list.lower, list) {
if (i == to_i)
break;
__netdev_adjacent_dev_unlink(i->dev, upper_dev);
@@ -4716,7 +4844,7 @@ rollback_lower_mesh:
rollback_upper_mesh:
to_i = i;
- list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+ list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
if (i == to_i)
break;
__netdev_adjacent_dev_unlink(dev, i->dev);
@@ -4727,8 +4855,8 @@ rollback_upper_mesh:
rollback_mesh:
to_i = i;
to_j = j;
- list_for_each_entry(i, &dev->lower_dev_list, list) {
- list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
+ list_for_each_entry(i, &dev->all_adj_list.lower, list) {
+ list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
if (i == to_i && j == to_j)
break;
__netdev_adjacent_dev_unlink(i->dev, j->dev);
@@ -4737,7 +4865,7 @@ rollback_mesh:
break;
}
- __netdev_adjacent_dev_unlink(dev, upper_dev);
+ __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
return ret;
}
@@ -4755,7 +4883,7 @@ rollback_mesh:
int netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev)
{
- return __netdev_upper_dev_link(dev, upper_dev, false);
+ return __netdev_upper_dev_link(dev, upper_dev, false, NULL);
}
EXPORT_SYMBOL(netdev_upper_dev_link);
@@ -4773,10 +4901,18 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
int netdev_master_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev)
{
- return __netdev_upper_dev_link(dev, upper_dev, true);
+ return __netdev_upper_dev_link(dev, upper_dev, true, NULL);
}
EXPORT_SYMBOL(netdev_master_upper_dev_link);
+int netdev_master_upper_dev_link_private(struct net_device *dev,
+ struct net_device *upper_dev,
+ void *private)
+{
+ return __netdev_upper_dev_link(dev, upper_dev, true, private);
+}
+EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
+
/**
* netdev_upper_dev_unlink - Removes a link to upper device
* @dev: device
@@ -4791,29 +4927,59 @@ void netdev_upper_dev_unlink(struct net_device *dev,
struct netdev_adjacent *i, *j;
ASSERT_RTNL();
- __netdev_adjacent_dev_unlink(dev, upper_dev);
+ __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
/* Here is the tricky part. We must remove all dev's lower
* devices from all upper_dev's upper devices and vice
* versa, to maintain the graph relationship.
*/
- list_for_each_entry(i, &dev->lower_dev_list, list)
- list_for_each_entry(j, &upper_dev->upper_dev_list, list)
+ list_for_each_entry(i, &dev->all_adj_list.lower, list)
+ list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
__netdev_adjacent_dev_unlink(i->dev, j->dev);
/* remove also the devices itself from lower/upper device
* list
*/
- list_for_each_entry(i, &dev->lower_dev_list, list)
+ list_for_each_entry(i, &dev->all_adj_list.lower, list)
__netdev_adjacent_dev_unlink(i->dev, upper_dev);
- list_for_each_entry(i, &upper_dev->upper_dev_list, list)
+ list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
__netdev_adjacent_dev_unlink(dev, i->dev);
call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
+void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
+ struct net_device *lower_dev)
+{
+ struct netdev_adjacent *lower;
+
+ if (!lower_dev)
+ return NULL;
+ lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower);
+ if (!lower)
+ return NULL;
+
+ return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu);
+
+void *netdev_lower_dev_get_private(struct net_device *dev,
+ struct net_device *lower_dev)
+{
+ struct netdev_adjacent *lower;
+
+ if (!lower_dev)
+ return NULL;
+ lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
+ if (!lower)
+ return NULL;
+
+ return lower->private;
+}
+EXPORT_SYMBOL(netdev_lower_dev_get_private);
+
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -4822,7 +4988,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
ops->ndo_change_rx_flags(dev, flags);
}
-static int __dev_set_promiscuity(struct net_device *dev, int inc)
+static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
{
unsigned int old_flags = dev->flags;
kuid_t uid;
@@ -4865,6 +5031,8 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
dev_change_rx_flags(dev, IFF_PROMISC);
}
+ if (notify)
+ __dev_notify_flags(dev, old_flags, IFF_PROMISC);
return 0;
}
@@ -4884,7 +5052,7 @@ int dev_set_promiscuity(struct net_device *dev, int inc)
unsigned int old_flags = dev->flags;
int err;
- err = __dev_set_promiscuity(dev, inc);
+ err = __dev_set_promiscuity(dev, inc, true);
if (err < 0)
return err;
if (dev->flags != old_flags)
@@ -4893,22 +5061,9 @@ int dev_set_promiscuity(struct net_device *dev, int inc)
}
EXPORT_SYMBOL(dev_set_promiscuity);
-/**
- * dev_set_allmulti - update allmulti count on a device
- * @dev: device
- * @inc: modifier
- *
- * Add or remove reception of all multicast frames to a device. While the
- * count in the device remains above zero the interface remains listening
- * to all interfaces. Once it hits zero the device reverts back to normal
- * filtering operation. A negative @inc value is used to drop the counter
- * when releasing a resource needing all multicasts.
- * Return 0 if successful or a negative errno code on error.
- */
-
-int dev_set_allmulti(struct net_device *dev, int inc)
+static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
{
- unsigned int old_flags = dev->flags;
+ unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
ASSERT_RTNL();
@@ -4931,9 +5086,30 @@ int dev_set_allmulti(struct net_device *dev, int inc)
if (dev->flags ^ old_flags) {
dev_change_rx_flags(dev, IFF_ALLMULTI);
dev_set_rx_mode(dev);
+ if (notify)
+ __dev_notify_flags(dev, old_flags,
+ dev->gflags ^ old_gflags);
}
return 0;
}
+
+/**
+ * dev_set_allmulti - update allmulti count on a device
+ * @dev: device
+ * @inc: modifier
+ *
+ * Add or remove reception of all multicast frames to a device. While the
+ * count in the device remains above zero the interface remains listening
+ * to all interfaces. Once it hits zero the device reverts back to normal
+ * filtering operation. A negative @inc value is used to drop the counter
+ * when releasing a resource needing all multicasts.
+ * Return 0 if successful or a negative errno code on error.
+ */
+
+int dev_set_allmulti(struct net_device *dev, int inc)
+{
+ return __dev_set_allmulti(dev, inc, true);
+}
EXPORT_SYMBOL(dev_set_allmulti);
/*
@@ -4958,10 +5134,10 @@ void __dev_set_rx_mode(struct net_device *dev)
* therefore calling __dev_set_promiscuity here is safe.
*/
if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
- __dev_set_promiscuity(dev, 1);
+ __dev_set_promiscuity(dev, 1, false);
dev->uc_promisc = true;
} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
- __dev_set_promiscuity(dev, -1);
+ __dev_set_promiscuity(dev, -1, false);
dev->uc_promisc = false;
}
}
@@ -5050,9 +5226,13 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
if ((flags ^ dev->gflags) & IFF_PROMISC) {
int inc = (flags & IFF_PROMISC) ? 1 : -1;
+ unsigned int old_flags = dev->flags;
dev->gflags ^= IFF_PROMISC;
- dev_set_promiscuity(dev, inc);
+
+ if (__dev_set_promiscuity(dev, inc, false) >= 0)
+ if (dev->flags != old_flags)
+ dev_set_rx_mode(dev);
}
/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
@@ -5063,16 +5243,20 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
dev->gflags ^= IFF_ALLMULTI;
- dev_set_allmulti(dev, inc);
+ __dev_set_allmulti(dev, inc, false);
}
return ret;
}
-void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
+void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
+ unsigned int gchanges)
{
unsigned int changes = dev->flags ^ old_flags;
+ if (gchanges)
+ rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges);
+
if (changes & IFF_UP) {
if (dev->flags & IFF_UP)
call_netdevice_notifiers(NETDEV_UP, dev);
@@ -5101,17 +5285,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
int dev_change_flags(struct net_device *dev, unsigned int flags)
{
int ret;
- unsigned int changes, old_flags = dev->flags;
+ unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
ret = __dev_change_flags(dev, flags);
if (ret < 0)
return ret;
- changes = old_flags ^ dev->flags;
- if (changes)
- rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
-
- __dev_notify_flags(dev, old_flags);
+ changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
+ __dev_notify_flags(dev, old_flags, changes);
return ret;
}
EXPORT_SYMBOL(dev_change_flags);
@@ -5247,15 +5428,18 @@ static int dev_new_index(struct net *net)
/* Delayed registration/unregisteration */
static LIST_HEAD(net_todo_list);
+static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
static void net_set_todo(struct net_device *dev)
{
list_add_tail(&dev->todo_list, &net_todo_list);
+ dev_net(dev)->dev_unreg_count++;
}
static void rollback_registered_many(struct list_head *head)
{
struct net_device *dev, *tmp;
+ LIST_HEAD(close_head);
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
@@ -5278,7 +5462,9 @@ static void rollback_registered_many(struct list_head *head)
}
/* If device is running, close it first. */
- dev_close_many(head);
+ list_for_each_entry(dev, head, unreg_list)
+ list_add_tail(&dev->close_list, &close_head);
+ dev_close_many(&close_head);
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
@@ -5918,6 +6104,12 @@ void netdev_run_todo(void)
if (dev->destructor)
dev->destructor(dev);
+ /* Report a network device has been unregistered */
+ rtnl_lock();
+ dev_net(dev)->dev_unreg_count--;
+ __rtnl_unlock();
+ wake_up(&netdev_unregistering_wq);
+
/* Free network device */
kobject_put(&dev->dev.kobj);
}
@@ -6068,9 +6260,12 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
+ INIT_LIST_HEAD(&dev->close_list);
INIT_LIST_HEAD(&dev->link_watch_list);
- INIT_LIST_HEAD(&dev->upper_dev_list);
- INIT_LIST_HEAD(&dev->lower_dev_list);
+ INIT_LIST_HEAD(&dev->adj_list.upper);
+ INIT_LIST_HEAD(&dev->adj_list.lower);
+ INIT_LIST_HEAD(&dev->all_adj_list.upper);
+ INIT_LIST_HEAD(&dev->all_adj_list.lower);
dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev);
@@ -6603,6 +6798,34 @@ static void __net_exit default_device_exit(struct net *net)
rtnl_unlock();
}
+static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
+{
+ /* Return with the rtnl_lock held when there are no network
+ * devices unregistering in any network namespace in net_list.
+ */
+ struct net *net;
+ bool unregistering;
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ prepare_to_wait(&netdev_unregistering_wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ unregistering = false;
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list) {
+ if (net->dev_unreg_count > 0) {
+ unregistering = true;
+ break;
+ }
+ }
+ if (!unregistering)
+ break;
+ __rtnl_unlock();
+ schedule();
+ }
+ finish_wait(&netdev_unregistering_wq, &wait);
+}
+
static void __net_exit default_device_exit_batch(struct list_head *net_list)
{
/* At exit all network devices most be removed from a network
@@ -6614,7 +6837,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
struct net *net;
LIST_HEAD(dev_kill_list);
- rtnl_lock();
+ /* To prevent network device cleanup code from dereferencing
+ * loopback devices or network devices that have been freed
+ * wait here for all pending unregistrations to complete,
+ * before unregistring the loopback device and allowing the
+ * network namespace be freed.
+ *
+ * The netdev todo list containing all network devices
+ * unregistrations that happen in default_device_exit_batch
+ * will run in the rtnl_unlock() at the end of
+ * default_device_exit_batch.
+ */
+ rtnl_lock_unregistering(net_list);
list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
if (dev->rtnl_link_ops)
diff --git a/net/core/filter.c b/net/core/filter.c
index 6438f29..01b7808 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -644,7 +644,6 @@ void sk_filter_release_rcu(struct rcu_head *rcu)
struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
bpf_jit_free(fp);
- kfree(fp);
}
EXPORT_SYMBOL(sk_filter_release_rcu);
@@ -683,7 +682,7 @@ int sk_unattached_filter_create(struct sk_filter **pfp,
if (fprog->filter == NULL)
return -EINVAL;
- fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL);
+ fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL);
if (!fp)
return -ENOMEM;
memcpy(fp->insns, fprog->filter, fsize);
@@ -723,6 +722,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct sk_filter *fp, *old_fp;
unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
+ unsigned int sk_fsize = sk_filter_size(fprog->len);
int err;
if (sock_flag(sk, SOCK_FILTER_LOCKED))
@@ -732,11 +732,11 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
if (fprog->filter == NULL)
return -EINVAL;
- fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
+ fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL);
if (!fp)
return -ENOMEM;
if (copy_from_user(fp->insns, fprog->filter, fsize)) {
- sock_kfree_s(sk, fp, fsize+sizeof(*fp));
+ sock_kfree_s(sk, fp, sk_fsize);
return -EFAULT;
}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 1929af8..f8e25ac 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -25,9 +25,35 @@ static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *i
memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst));
}
+/**
+ * skb_flow_get_ports - extract the upper layer ports and return them
+ * @skb: buffer to extract the ports from
+ * @thoff: transport header offset
+ * @ip_proto: protocol for which to get port offset
+ *
+ * The function will try to retrieve the ports at offset thoff + poff where poff
+ * is the protocol port offset returned from proto_ports_offset
+ */
+__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto)
+{
+ int poff = proto_ports_offset(ip_proto);
+
+ if (poff >= 0) {
+ __be32 *ports, _ports;
+
+ ports = skb_header_pointer(skb, thoff + poff,
+ sizeof(_ports), &_ports);
+ if (ports)
+ return *ports;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(skb_flow_get_ports);
+
bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
{
- int poff, nhoff = skb_network_offset(skb);
+ int nhoff = skb_network_offset(skb);
u8 ip_proto;
__be16 proto = skb->protocol;
@@ -150,16 +176,7 @@ ipv6:
}
flow->ip_proto = ip_proto;
- poff = proto_ports_offset(ip_proto);
- if (poff >= 0) {
- __be32 *ports, _ports;
-
- nhoff += poff;
- ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports);
- if (ports)
- flow->ports = *ports;
- }
-
+ flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto);
flow->thoff = (u16) nhoff;
return true;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6072610..ca15f32 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -867,7 +867,7 @@ static void neigh_invalidate(struct neighbour *neigh)
static void neigh_probe(struct neighbour *neigh)
__releases(neigh->lock)
{
- struct sk_buff *skb = skb_peek(&neigh->arp_queue);
+ struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
/* keep skb alive even if arp_queue overflows */
if (skb)
skb = skb_copy(skb, GFP_ATOMIC);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index d9cd627..9b7cf6c 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -222,11 +222,10 @@ static void net_prio_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
struct task_struct *p;
- void *v;
+ void *v = (void *)(unsigned long)css->cgroup->id;
cgroup_taskset_for_each(p, css, tset) {
task_lock(p);
- v = (void *)(unsigned long)task_netprioidx(p);
iterate_fd(p->files, 0, update_netprio, v);
task_unlock(p);
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2a0e21d..4aedf03 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1647,9 +1647,8 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
}
dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
- rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
- __dev_notify_flags(dev, old_flags);
+ __dev_notify_flags(dev, old_flags, ~0U);
return 0;
}
EXPORT_SYMBOL(rtnl_configure_link);
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 6a2f13c..3f1ec15 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -10,11 +10,24 @@
#include <net/secure_seq.h>
-static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
+#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
-void net_secret_init(void)
+static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
+
+static void net_secret_init(void)
{
- get_random_bytes(net_secret, sizeof(net_secret));
+ u32 tmp;
+ int i;
+
+ if (likely(net_secret[0]))
+ return;
+
+ for (i = NET_SECRET_SIZE; i > 0;) {
+ do {
+ get_random_bytes(&tmp, sizeof(tmp));
+ } while (!tmp);
+ cmpxchg(&net_secret[--i], 0, tmp);
+ }
}
#ifdef CONFIG_INET
@@ -42,6 +55,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
u32 hash[MD5_DIGEST_WORDS];
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + (__force u32)daddr[i];
@@ -63,6 +77,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
u32 hash[MD5_DIGEST_WORDS];
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + (__force u32) daddr[i];
@@ -82,6 +97,7 @@ __u32 secure_ip_id(__be32 daddr)
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force __u32) daddr;
hash[1] = net_secret[13];
hash[2] = net_secret[14];
@@ -96,6 +112,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4])
{
__u32 hash[4];
+ net_secret_init();
memcpy(hash, daddr, 16);
md5_transform(hash, net_secret);
@@ -107,6 +124,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -121,6 +139,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = (__force u32)dport ^ net_secret[14];
@@ -140,6 +159,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
u32 hash[MD5_DIGEST_WORDS];
u64 seq;
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -164,6 +184,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
u64 seq;
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + daddr[i];
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d81cff1..8ead744 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2936,32 +2936,30 @@ EXPORT_SYMBOL_GPL(skb_segment);
int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
{
- struct sk_buff *p = *head;
- struct sk_buff *nskb;
- struct skb_shared_info *skbinfo = skb_shinfo(skb);
- struct skb_shared_info *pinfo = skb_shinfo(p);
- unsigned int headroom;
- unsigned int len = skb_gro_len(skb);
+ struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb);
unsigned int offset = skb_gro_offset(skb);
unsigned int headlen = skb_headlen(skb);
+ struct sk_buff *nskb, *lp, *p = *head;
+ unsigned int len = skb_gro_len(skb);
unsigned int delta_truesize;
+ unsigned int headroom;
- if (p->len + len >= 65536)
+ if (unlikely(p->len + len >= 65536))
return -E2BIG;
- if (pinfo->frag_list)
- goto merge;
- else if (headlen <= offset) {
+ lp = NAPI_GRO_CB(p)->last ?: p;
+ pinfo = skb_shinfo(lp);
+
+ if (headlen <= offset) {
skb_frag_t *frag;
skb_frag_t *frag2;
int i = skbinfo->nr_frags;
int nr_frags = pinfo->nr_frags + i;
- offset -= headlen;
-
if (nr_frags > MAX_SKB_FRAGS)
- return -E2BIG;
+ goto merge;
+ offset -= headlen;
pinfo->nr_frags = nr_frags;
skbinfo->nr_frags = 0;
@@ -2992,7 +2990,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
unsigned int first_offset;
if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS)
- return -E2BIG;
+ goto merge;
first_offset = skb->data -
(unsigned char *)page_address(page) +
@@ -3010,7 +3008,10 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
goto done;
- } else if (skb_gro_len(p) != pinfo->gso_size)
+ }
+ if (pinfo->frag_list)
+ goto merge;
+ if (skb_gro_len(p) != pinfo->gso_size)
return -E2BIG;
headroom = skb_headroom(p);
@@ -3062,16 +3063,24 @@ merge:
__skb_pull(skb, offset);
- NAPI_GRO_CB(p)->last->next = skb;
+ if (!NAPI_GRO_CB(p)->last)
+ skb_shinfo(p)->frag_list = skb;
+ else
+ NAPI_GRO_CB(p)->last->next = skb;
NAPI_GRO_CB(p)->last = skb;
skb_header_release(skb);
+ lp = p;
done:
NAPI_GRO_CB(p)->count++;
p->data_len += len;
p->truesize += delta_truesize;
p->len += len;
-
+ if (lp != p) {
+ lp->data_len += len;
+ lp->truesize += delta_truesize;
+ lp->len += len;
+ }
NAPI_GRO_CB(skb)->same_flow = 1;
return 0;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 5b6beba..fd6afa2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -914,6 +914,13 @@ set_rcvbuf:
}
break;
#endif
+
+ case SO_MAX_PACING_RATE:
+ sk->sk_max_pacing_rate = val;
+ sk->sk_pacing_rate = min(sk->sk_pacing_rate,
+ sk->sk_max_pacing_rate);
+ break;
+
default:
ret = -ENOPROTOOPT;
break;
@@ -1177,6 +1184,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
#endif
+ case SO_MAX_PACING_RATE:
+ v.val = sk->sk_max_pacing_rate;
+ break;
+
default:
return -ENOPROTOOPT;
}
@@ -2319,6 +2330,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_ll_usec = sysctl_net_busy_read;
#endif
+ sk->sk_max_pacing_rate = ~0U;
+ sk->sk_pacing_rate = ~0U;
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ebc54fe..720c362 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -409,9 +409,9 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet = inet_sk(newsk);
ireq = inet_rsk(req);
- newinet->inet_daddr = ireq->rmt_addr;
- newinet->inet_rcv_saddr = ireq->loc_addr;
- newinet->inet_saddr = ireq->loc_addr;
+ newinet->inet_daddr = ireq->ir_rmt_addr;
+ newinet->inet_rcv_saddr = ireq->ir_loc_addr;
+ newinet->inet_saddr = ireq->ir_loc_addr;
newinet->inet_opt = ireq->opt;
ireq->opt = NULL;
newinet->mc_index = inet_iif(skb);
@@ -516,10 +516,10 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req)
const struct inet_request_sock *ireq = inet_rsk(req);
struct dccp_hdr *dh = dccp_hdr(skb);
- dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr,
- ireq->rmt_addr);
- err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
- ireq->rmt_addr,
+ dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr,
+ ireq->ir_rmt_addr);
+ err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+ ireq->ir_rmt_addr,
ireq->opt);
err = net_xmit_eval(err);
}
@@ -641,8 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
goto drop_and_free;
ireq = inet_rsk(req);
- ireq->loc_addr = ip_hdr(skb)->daddr;
- ireq->rmt_addr = ip_hdr(skb)->saddr;
+ ireq->ir_loc_addr = ip_hdr(skb)->daddr;
+ ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
/*
* Step 3: Process LISTEN state
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6cf9f77..4ac71ff 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -67,7 +67,7 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
struct dccp_hdr *dh = dccp_hdr(skb);
dccp_csum_outgoing(skb);
- dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr);
+ dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr);
}
static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
@@ -216,7 +216,7 @@ out:
static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
{
- struct inet6_request_sock *ireq6 = inet6_rsk(req);
+ struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff *skb;
struct in6_addr *final_p, final;
@@ -226,12 +226,12 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_DCCP;
- fl6.daddr = ireq6->rmt_addr;
- fl6.saddr = ireq6->loc_addr;
+ fl6.daddr = ireq->ir_v6_rmt_addr;
+ fl6.saddr = ireq->ir_v6_loc_addr;
fl6.flowlabel = 0;
- fl6.flowi6_oif = ireq6->iif;
- fl6.fl6_dport = inet_rsk(req)->rmt_port;
- fl6.fl6_sport = inet_rsk(req)->loc_port;
+ fl6.flowi6_oif = ireq->ir_iif;
+ fl6.fl6_dport = ireq->ir_rmt_port;
+ fl6.fl6_sport = htons(ireq->ir_num);
security_req_classify_flow(req, flowi6_to_flowi(&fl6));
@@ -249,9 +249,9 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
struct dccp_hdr *dh = dccp_hdr(skb);
dh->dccph_checksum = dccp_v6_csum_finish(skb,
- &ireq6->loc_addr,
- &ireq6->rmt_addr);
- fl6.daddr = ireq6->rmt_addr;
+ &ireq->ir_v6_loc_addr,
+ &ireq->ir_v6_rmt_addr);
+ fl6.daddr = ireq->ir_v6_rmt_addr;
err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
err = net_xmit_eval(err);
}
@@ -264,8 +264,7 @@ done:
static void dccp_v6_reqsk_destructor(struct request_sock *req)
{
dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
- if (inet6_rsk(req)->pktopts != NULL)
- kfree_skb(inet6_rsk(req)->pktopts);
+ kfree_skb(inet_rsk(req)->pktopts);
}
static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
@@ -359,7 +358,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct request_sock *req;
struct dccp_request_sock *dreq;
- struct inet6_request_sock *ireq6;
+ struct inet_request_sock *ireq;
struct ipv6_pinfo *np = inet6_sk(sk);
const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
@@ -398,22 +397,22 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
- ireq6 = inet6_rsk(req);
- ireq6->rmt_addr = ipv6_hdr(skb)->saddr;
- ireq6->loc_addr = ipv6_hdr(skb)->daddr;
+ ireq = inet_rsk(req);
+ ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+ ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
atomic_inc(&skb->users);
- ireq6->pktopts = skb;
+ ireq->pktopts = skb;
}
- ireq6->iif = sk->sk_bound_dev_if;
+ ireq->ir_iif = sk->sk_bound_dev_if;
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
- ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
- ireq6->iif = inet6_iif(skb);
+ ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ ireq->ir_iif = inet6_iif(skb);
/*
* Step 3: Process LISTEN state
@@ -446,7 +445,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
struct request_sock *req,
struct dst_entry *dst)
{
- struct inet6_request_sock *ireq6 = inet6_rsk(req);
+ struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct inet_sock *newinet;
struct dccp6_sock *newdp6;
@@ -467,11 +466,11 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
- ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
+ ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
- newnp->rcv_saddr = newnp->saddr;
+ newsk->sk_v6_rcv_saddr = newnp->saddr;
inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
newsk->sk_backlog_rcv = dccp_v4_do_rcv;
@@ -505,12 +504,12 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_DCCP;
- fl6.daddr = ireq6->rmt_addr;
+ fl6.daddr = ireq->ir_v6_rmt_addr;
final_p = fl6_update_dst(&fl6, np->opt, &final);
- fl6.saddr = ireq6->loc_addr;
+ fl6.saddr = ireq->ir_v6_loc_addr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.fl6_dport = inet_rsk(req)->rmt_port;
- fl6.fl6_sport = inet_rsk(req)->loc_port;
+ fl6.fl6_dport = ireq->ir_rmt_port;
+ fl6.fl6_sport = htons(ireq->ir_num);
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
@@ -538,10 +537,10 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
- newnp->daddr = ireq6->rmt_addr;
- newnp->saddr = ireq6->loc_addr;
- newnp->rcv_saddr = ireq6->loc_addr;
- newsk->sk_bound_dev_if = ireq6->iif;
+ newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
+ newnp->saddr = ireq->ir_v6_loc_addr;
+ newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
+ newsk->sk_bound_dev_if = ireq->ir_iif;
/* Now IPv6 options...
@@ -554,10 +553,10 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
/* Clone pktoptions received with SYN */
newnp->pktoptions = NULL;
- if (ireq6->pktopts != NULL) {
- newnp->pktoptions = skb_clone(ireq6->pktopts, GFP_ATOMIC);
- consume_skb(ireq6->pktopts);
- ireq6->pktopts = NULL;
+ if (ireq->pktopts != NULL) {
+ newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
+ consume_skb(ireq->pktopts);
+ ireq->pktopts = NULL;
if (newnp->pktoptions)
skb_set_owner_r(newnp->pktoptions, newsk);
}
@@ -885,7 +884,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
return -EINVAL;
}
- np->daddr = usin->sin6_addr;
+ sk->sk_v6_daddr = usin->sin6_addr;
np->flow_label = fl6.flowlabel;
/*
@@ -915,16 +914,16 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
goto failure;
}
ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
- ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr);
+ ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &sk->sk_v6_rcv_saddr);
return err;
}
- if (!ipv6_addr_any(&np->rcv_saddr))
- saddr = &np->rcv_saddr;
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+ saddr = &sk->sk_v6_rcv_saddr;
fl6.flowi6_proto = IPPROTO_DCCP;
- fl6.daddr = np->daddr;
+ fl6.daddr = sk->sk_v6_daddr;
fl6.saddr = saddr ? *saddr : np->saddr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.fl6_dport = usin->sin6_port;
@@ -941,7 +940,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (saddr == NULL) {
saddr = &fl6.saddr;
- np->rcv_saddr = *saddr;
+ sk->sk_v6_rcv_saddr = *saddr;
}
/* set the source address */
@@ -963,7 +962,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
goto late_failure;
dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32,
- np->daddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
inet->inet_sport,
inet->inet_dport);
err = dccp_connect(sk);
diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h
index 6eef81f..af259e1 100644
--- a/net/dccp/ipv6.h
+++ b/net/dccp/ipv6.h
@@ -25,12 +25,10 @@ struct dccp6_sock {
struct dccp6_request_sock {
struct dccp_request_sock dccp;
- struct inet6_request_sock inet6;
};
struct dccp6_timewait_sock {
struct inet_timewait_sock inet;
- struct inet6_timewait_sock tw6;
};
#endif /* _DCCP_IPV6_H */
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 662071b..9e2f78b 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -56,12 +56,9 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
const struct ipv6_pinfo *np = inet6_sk(sk);
- struct inet6_timewait_sock *tw6;
- tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
- tw6 = inet6_twsk((struct sock *)tw);
- tw6->tw_v6_daddr = np->daddr;
- tw6->tw_v6_rcv_saddr = np->rcv_saddr;
+ tw->tw_v6_daddr = sk->sk_v6_daddr;
+ tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
tw->tw_ipv6only = np->ipv6only;
}
#endif
@@ -269,10 +266,10 @@ int dccp_reqsk_init(struct request_sock *req,
{
struct dccp_request_sock *dreq = dccp_rsk(req);
- inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
- inet_rsk(req)->loc_port = dccp_hdr(skb)->dccph_dport;
- inet_rsk(req)->acked = 0;
- dreq->dreq_timestamp_echo = 0;
+ inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport;
+ inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport);
+ inet_rsk(req)->acked = 0;
+ dreq->dreq_timestamp_echo = 0;
/* inherit feature negotiation options from listening socket */
return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d17fc90..8876078 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -424,8 +424,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
/* Build and checksum header */
dh = dccp_zeroed_hdr(skb, dccp_header_size);
- dh->dccph_sport = inet_rsk(req)->loc_port;
- dh->dccph_dport = inet_rsk(req)->rmt_port;
+ dh->dccph_sport = htons(inet_rsk(req)->ir_num);
+ dh->dccph_dport = inet_rsk(req)->ir_rmt_port;
dh->dccph_doff = (dccp_header_size +
DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
dh->dccph_type = DCCP_PKT_RESPONSE;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ba64750..eb892b4 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1158,10 +1158,8 @@ static int __init dccp_init(void)
goto out_free_bind_bucket_cachep;
}
- for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) {
+ for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
- INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
- }
if (inet_ehash_locks_alloc(&dccp_hashinfo))
goto out_free_dccp_ehash;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index be1f64d..8f032ba 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -58,7 +58,7 @@
#include <net/ipv6.h>
#include <net/ip.h>
#include <net/dsa.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
__setup("ether=", netdev_boot_setup);
@@ -133,7 +133,7 @@ int eth_rebuild_header(struct sk_buff *skb)
return arp_find(eth->h_dest, skb);
#endif
default:
- printk(KERN_DEBUG
+ netdev_dbg(dev,
"%s: unable to resolve type %X addresses.\n",
dev->name, ntohs(eth->h_proto));
@@ -169,20 +169,9 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
else
skb->pkt_type = PACKET_MULTICAST;
}
-
- /*
- * This ALLMULTI check should be redundant by 1.4
- * so don't forget to remove it.
- *
- * Seems, you forgot to remove it. All silly devices
- * seems to set IFF_PROMISC.
- */
-
- else if (1 /*dev->flags&IFF_PROMISC */ ) {
- if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
- dev->dev_addr)))
- skb->pkt_type = PACKET_OTHERHOST;
- }
+ else if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
+ dev->dev_addr)))
+ skb->pkt_type = PACKET_OTHERHOST;
/*
* Some variants of DSA tagging don't have an ethertype field
@@ -190,12 +179,13 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
* variants has been configured on the receiving interface,
* and if so, set skb->protocol without looking at the packet.
*/
- if (netdev_uses_dsa_tags(dev))
+ if (unlikely(netdev_uses_dsa_tags(dev)))
return htons(ETH_P_DSA);
- if (netdev_uses_trailer_tags(dev))
+
+ if (unlikely(netdev_uses_trailer_tags(dev)))
return htons(ETH_P_TRAILER);
- if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
+ if (likely(ntohs(eth->h_proto) >= ETH_P_802_3_MIN))
return eth->h_proto;
/*
@@ -204,7 +194,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
* layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
* won't work for fault tolerant netware but does for the rest.
*/
- if (skb->len >= 2 && *(unsigned short *)(skb->data) == 0xFFFF)
+ if (unlikely(skb->len >= 2 && *(unsigned short *)(skb->data) == 0xFFFF))
return htons(ETH_P_802_3);
/*
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index c85e71e..ff41b4d 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -1372,6 +1372,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
if (!real_dev)
return -ENODEV;
+ if (real_dev->type != ARPHRD_IEEE802154)
+ return -EINVAL;
lowpan_dev_info(dev)->real_dev = real_dev;
lowpan_dev_info(dev)->fragment_tag = 0;
@@ -1386,6 +1388,9 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
entry->ldev = dev;
+ /* Set the lowpan harware address to the wpan hardware address. */
+ memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN);
+
mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
INIT_LIST_HEAD(&entry->list);
list_add_tail(&entry->list, &lowpan_devices);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7a1874b..35913fb 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -263,10 +263,8 @@ void build_ehash_secret(void)
get_random_bytes(&rnd, sizeof(rnd));
} while (rnd == 0);
- if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) {
+ if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0)
get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
- net_secret_init();
- }
}
EXPORT_SYMBOL(build_ehash_secret);
@@ -1548,6 +1546,7 @@ static const struct net_protocol tcp_protocol = {
};
static const struct net_protocol udp_protocol = {
+ .early_demux = udp_v4_early_demux,
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3df6d3e..ec9a9ef 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -762,12 +762,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
if (IS_LEAF(node) || ((struct tnode *) node)->pos >
tn->pos + tn->bits - 1) {
- if (tkey_extract_bits(node->key,
- oldtnode->pos + oldtnode->bits,
- 1) == 0)
- put_child(tn, 2*i, node);
- else
- put_child(tn, 2*i+1, node);
+ put_child(tn,
+ tkey_extract_bits(node->key, oldtnode->pos, oldtnode->bits + 1),
+ node);
continue;
}
@@ -1120,12 +1117,8 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
* first tnode need some special handling
*/
- if (tp)
- pos = tp->pos+tp->bits;
- else
- pos = 0;
-
if (n) {
+ pos = tp ? tp->pos+tp->bits : 0;
newpos = tkey_mismatch(key, pos, n->key);
tn = tnode_new(n->key, newpos, 1);
} else {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 5f7d11a..5c0e8bc 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -353,6 +353,9 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
saddr = fib_compute_spec_dst(skb);
ipc.opt = NULL;
ipc.tx_flags = 0;
+ ipc.ttl = 0;
+ ipc.tos = -1;
+
if (icmp_param->replyopts.opt.opt.optlen) {
ipc.opt = &icmp_param->replyopts.opt;
if (ipc.opt->opt.srr)
@@ -608,6 +611,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
ipc.addr = iph->saddr;
ipc.opt = &icmp_param->replyopts.opt;
ipc.tx_flags = 0;
+ ipc.ttl = 0;
+ ipc.tos = -1;
rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
type, code, icmp_param);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index dace87f..7defdc9 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -736,7 +736,7 @@ static void igmp_gq_timer_expire(unsigned long data)
in_dev->mr_gq_running = 0;
igmpv3_send_report(in_dev, NULL);
- __in_dev_put(in_dev);
+ in_dev_put(in_dev);
}
static void igmp_ifc_timer_expire(unsigned long data)
@@ -749,7 +749,7 @@ static void igmp_ifc_timer_expire(unsigned long data)
igmp_ifc_start_timer(in_dev,
unsolicited_report_interval(in_dev));
}
- __in_dev_put(in_dev);
+ in_dev_put(in_dev);
}
static void igmp_ifc_event(struct in_device *in_dev)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 6acb541..fc0e649 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -29,27 +29,19 @@ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
EXPORT_SYMBOL(inet_csk_timer_bug_msg);
#endif
-/*
- * This struct holds the first and last local port number.
- */
-struct local_ports sysctl_local_ports __read_mostly = {
- .lock = __SEQLOCK_UNLOCKED(sysctl_local_ports.lock),
- .range = { 32768, 61000 },
-};
-
unsigned long *sysctl_local_reserved_ports;
EXPORT_SYMBOL(sysctl_local_reserved_ports);
-void inet_get_local_port_range(int *low, int *high)
+void inet_get_local_port_range(struct net *net, int *low, int *high)
{
unsigned int seq;
do {
- seq = read_seqbegin(&sysctl_local_ports.lock);
+ seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock);
- *low = sysctl_local_ports.range[0];
- *high = sysctl_local_ports.range[1];
- } while (read_seqretry(&sysctl_local_ports.lock, seq));
+ *low = net->ipv4.sysctl_local_ports.range[0];
+ *high = net->ipv4.sysctl_local_ports.range[1];
+ } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq));
}
EXPORT_SYMBOL(inet_get_local_port_range);
@@ -79,17 +71,16 @@ int inet_csk_bind_conflict(const struct sock *sk,
(!reuseport || !sk2->sk_reuseport ||
(sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid, sock_i_uid(sk2))))) {
- const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
- if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
- sk2_rcv_saddr == sk_rcv_saddr(sk))
+
+ if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
+ sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
break;
}
if (!relax && reuse && sk2->sk_reuse &&
sk2->sk_state != TCP_LISTEN) {
- const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
- if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
- sk2_rcv_saddr == sk_rcv_saddr(sk))
+ if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
+ sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
break;
}
}
@@ -116,7 +107,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
int remaining, rover, low, high;
again:
- inet_get_local_port_range(&low, &high);
+ inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
smallest_rover = rover = net_random() % remaining + low;
@@ -421,8 +412,8 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol,
flags,
- (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
- ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
+ (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
+ ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport);
security_req_classify_flow(req, flowi4_to_flowi(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
@@ -457,8 +448,8 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
- (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
- ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
+ (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
+ ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport);
security_req_classify_flow(req, flowi4_to_flowi(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
@@ -504,9 +495,9 @@ struct request_sock *inet_csk_search_req(const struct sock *sk,
prev = &req->dl_next) {
const struct inet_request_sock *ireq = inet_rsk(req);
- if (ireq->rmt_port == rport &&
- ireq->rmt_addr == raddr &&
- ireq->loc_addr == laddr &&
+ if (ireq->ir_rmt_port == rport &&
+ ireq->ir_rmt_addr == raddr &&
+ ireq->ir_loc_addr == laddr &&
AF_INET_FAMILY(req->rsk_ops->family)) {
WARN_ON(req->sk);
*prevp = prev;
@@ -523,7 +514,8 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
- const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port,
+ const u32 h = inet_synq_hash(inet_rsk(req)->ir_rmt_addr,
+ inet_rsk(req)->ir_rmt_port,
lopt->hash_rnd, lopt->nr_table_entries);
reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
@@ -683,9 +675,9 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
newsk->sk_state = TCP_SYN_RECV;
newicsk->icsk_bind_hash = NULL;
- inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port;
- inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port);
- inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port;
+ inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
+ inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
+ inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
newsk->sk_write_space = sk_stream_write_space;
newicsk->icsk_retransmits = 0;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 5f64875..41e1c3e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -121,13 +121,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
#if IS_ENABLED(CONFIG_IPV6)
if (r->idiag_family == AF_INET6) {
- const struct ipv6_pinfo *np = inet6_sk(sk);
- *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr;
- *(struct in6_addr *)r->id.idiag_dst = np->daddr;
+ *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr;
+ *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr;
if (ext & (1 << (INET_DIAG_TCLASS - 1)))
- if (nla_put_u8(skb, INET_DIAG_TCLASS, np->tclass) < 0)
+ if (nla_put_u8(skb, INET_DIAG_TCLASS,
+ inet6_sk(sk)->tclass) < 0)
goto errout;
}
#endif
@@ -222,7 +222,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
u32 portid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
- long tmo;
+ s32 tmo;
struct inet_diag_msg *r;
struct nlmsghdr *nlh;
@@ -234,7 +234,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
r = nlmsg_data(nlh);
BUG_ON(tw->tw_state != TCP_TIME_WAIT);
- tmo = tw->tw_ttd - jiffies;
+ tmo = tw->tw_ttd - inet_tw_time_stamp();
if (tmo < 0)
tmo = 0;
@@ -248,18 +248,15 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
r->id.idiag_dst[0] = tw->tw_daddr;
r->idiag_state = tw->tw_substate;
r->idiag_timer = 3;
- r->idiag_expires = DIV_ROUND_UP(tmo * 1000, HZ);
+ r->idiag_expires = jiffies_to_msecs(tmo);
r->idiag_rqueue = 0;
r->idiag_wqueue = 0;
r->idiag_uid = 0;
r->idiag_inode = 0;
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == AF_INET6) {
- const struct inet6_timewait_sock *tw6 =
- inet6_twsk((struct sock *)tw);
-
- *(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr;
- *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr;
+ *(struct in6_addr *)r->id.idiag_src = tw->tw_v6_rcv_saddr;
+ *(struct in6_addr *)r->id.idiag_dst = tw->tw_v6_daddr;
}
#endif
@@ -273,10 +270,11 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
const struct nlmsghdr *unlh)
{
if (sk->sk_state == TCP_TIME_WAIT)
- return inet_twsk_diag_fill((struct inet_timewait_sock *)sk,
- skb, r, portid, seq, nlmsg_flags,
- unlh);
- return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh);
+ return inet_twsk_diag_fill(inet_twsk(sk), skb, r, portid, seq,
+ nlmsg_flags, unlh);
+
+ return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
+ nlmsg_flags, unlh);
}
int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
@@ -489,10 +487,9 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
entry.family = sk->sk_family;
#if IS_ENABLED(CONFIG_IPV6)
if (entry.family == AF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
- entry.saddr = np->rcv_saddr.s6_addr32;
- entry.daddr = np->daddr.s6_addr32;
+ entry.saddr = sk->sk_v6_rcv_saddr.s6_addr32;
+ entry.daddr = sk->sk_v6_daddr.s6_addr32;
} else
#endif
{
@@ -635,22 +632,22 @@ static int inet_csk_diag_dump(struct sock *sk,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
-static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
+static int inet_twsk_diag_dump(struct sock *sk,
struct sk_buff *skb,
struct netlink_callback *cb,
struct inet_diag_req_v2 *r,
const struct nlattr *bc)
{
+ struct inet_timewait_sock *tw = inet_twsk(sk);
+
if (bc != NULL) {
struct inet_diag_entry entry;
entry.family = tw->tw_family;
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == AF_INET6) {
- struct inet6_timewait_sock *tw6 =
- inet6_twsk((struct sock *)tw);
- entry.saddr = tw6->tw_v6_rcv_saddr.s6_addr32;
- entry.daddr = tw6->tw_v6_daddr.s6_addr32;
+ entry.saddr = tw->tw_v6_rcv_saddr.s6_addr32;
+ entry.daddr = tw->tw_v6_daddr.s6_addr32;
} else
#endif
{
@@ -682,12 +679,12 @@ static inline void inet_diag_req_addrs(const struct sock *sk,
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6) {
if (req->rsk_ops->family == AF_INET6) {
- entry->saddr = inet6_rsk(req)->loc_addr.s6_addr32;
- entry->daddr = inet6_rsk(req)->rmt_addr.s6_addr32;
+ entry->saddr = ireq->ir_v6_loc_addr.s6_addr32;
+ entry->daddr = ireq->ir_v6_rmt_addr.s6_addr32;
} else if (req->rsk_ops->family == AF_INET) {
- ipv6_addr_set_v4mapped(ireq->loc_addr,
+ ipv6_addr_set_v4mapped(ireq->ir_loc_addr,
&entry->saddr_storage);
- ipv6_addr_set_v4mapped(ireq->rmt_addr,
+ ipv6_addr_set_v4mapped(ireq->ir_rmt_addr,
&entry->daddr_storage);
entry->saddr = entry->saddr_storage.s6_addr32;
entry->daddr = entry->daddr_storage.s6_addr32;
@@ -695,8 +692,8 @@ static inline void inet_diag_req_addrs(const struct sock *sk,
} else
#endif
{
- entry->saddr = &ireq->loc_addr;
- entry->daddr = &ireq->rmt_addr;
+ entry->saddr = &ireq->ir_loc_addr;
+ entry->daddr = &ireq->ir_rmt_addr;
}
}
@@ -731,9 +728,9 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
tmo = 0;
r->id.idiag_sport = inet->inet_sport;
- r->id.idiag_dport = ireq->rmt_port;
- r->id.idiag_src[0] = ireq->loc_addr;
- r->id.idiag_dst[0] = ireq->rmt_addr;
+ r->id.idiag_dport = ireq->ir_rmt_port;
+ r->id.idiag_src[0] = ireq->ir_loc_addr;
+ r->id.idiag_dst[0] = ireq->ir_rmt_addr;
r->idiag_expires = jiffies_to_msecs(tmo);
r->idiag_rqueue = 0;
r->idiag_wqueue = 0;
@@ -792,13 +789,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
if (reqnum < s_reqnum)
continue;
- if (r->id.idiag_dport != ireq->rmt_port &&
+ if (r->id.idiag_dport != ireq->ir_rmt_port &&
r->id.idiag_dport)
continue;
if (bc) {
inet_diag_req_addrs(sk, req, &entry);
- entry.dport = ntohs(ireq->rmt_port);
+ entry.dport = ntohs(ireq->ir_rmt_port);
if (!inet_diag_bc_run(bc, &entry))
continue;
@@ -911,8 +908,7 @@ skip_listen_ht:
num = 0;
- if (hlist_nulls_empty(&head->chain) &&
- hlist_nulls_empty(&head->twchain))
+ if (hlist_nulls_empty(&head->chain))
continue;
if (i > s_i)
@@ -920,7 +916,7 @@ skip_listen_ht:
spin_lock_bh(lock);
sk_nulls_for_each(sk, node, &head->chain) {
- struct inet_sock *inet = inet_sk(sk);
+ int res;
if (!net_eq(sock_net(sk), net))
continue;
@@ -929,15 +925,19 @@ skip_listen_ht:
if (!(r->idiag_states & (1 << sk->sk_state)))
goto next_normal;
if (r->sdiag_family != AF_UNSPEC &&
- sk->sk_family != r->sdiag_family)
+ sk->sk_family != r->sdiag_family)
goto next_normal;
- if (r->id.idiag_sport != inet->inet_sport &&
+ if (r->id.idiag_sport != htons(sk->sk_num) &&
r->id.idiag_sport)
goto next_normal;
- if (r->id.idiag_dport != inet->inet_dport &&
+ if (r->id.idiag_dport != sk->sk_dport &&
r->id.idiag_dport)
goto next_normal;
- if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
+ if (sk->sk_state == TCP_TIME_WAIT)
+ res = inet_twsk_diag_dump(sk, skb, cb, r, bc);
+ else
+ res = inet_csk_diag_dump(sk, skb, cb, r, bc);
+ if (res < 0) {
spin_unlock_bh(lock);
goto done;
}
@@ -945,33 +945,6 @@ next_normal:
++num;
}
- if (r->idiag_states & TCPF_TIME_WAIT) {
- struct inet_timewait_sock *tw;
-
- inet_twsk_for_each(tw, node,
- &head->twchain) {
- if (!net_eq(twsk_net(tw), net))
- continue;
-
- if (num < s_num)
- goto next_dying;
- if (r->sdiag_family != AF_UNSPEC &&
- tw->tw_family != r->sdiag_family)
- goto next_dying;
- if (r->id.idiag_sport != tw->tw_sport &&
- r->id.idiag_sport)
- goto next_dying;
- if (r->id.idiag_dport != tw->tw_dport &&
- r->id.idiag_dport)
- goto next_dying;
- if (inet_twsk_diag_dump(tw, skb, cb, r, bc) < 0) {
- spin_unlock_bh(lock);
- goto done;
- }
-next_dying:
- ++num;
- }
- }
spin_unlock_bh(lock);
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 7bd8983..a4b66bb 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -230,6 +230,19 @@ begin:
}
EXPORT_SYMBOL_GPL(__inet_lookup_listener);
+/* All sockets share common refcount, but have different destructors */
+void sock_gen_put(struct sock *sk)
+{
+ if (!atomic_dec_and_test(&sk->sk_refcnt))
+ return;
+
+ if (sk->sk_state == TCP_TIME_WAIT)
+ inet_twsk_free(inet_twsk(sk));
+ else
+ sk_free(sk);
+}
+EXPORT_SYMBOL_GPL(sock_gen_put);
+
struct sock *__inet_lookup_established(struct net *net,
struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
@@ -255,13 +268,13 @@ begin:
if (likely(INET_MATCH(sk, net, acookie,
saddr, daddr, ports, dif))) {
if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
- goto begintw;
+ goto out;
if (unlikely(!INET_MATCH(sk, net, acookie,
saddr, daddr, ports, dif))) {
- sock_put(sk);
+ sock_gen_put(sk);
goto begin;
}
- goto out;
+ goto found;
}
}
/*
@@ -271,37 +284,9 @@ begin:
*/
if (get_nulls_value(node) != slot)
goto begin;
-
-begintw:
- /* Must check for a TIME_WAIT'er before going to listener hash. */
- sk_nulls_for_each_rcu(sk, node, &head->twchain) {
- if (sk->sk_hash != hash)
- continue;
- if (likely(INET_TW_MATCH(sk, net, acookie,
- saddr, daddr, ports,
- dif))) {
- if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
- sk = NULL;
- goto out;
- }
- if (unlikely(!INET_TW_MATCH(sk, net, acookie,
- saddr, daddr, ports,
- dif))) {
- sock_put(sk);
- goto begintw;
- }
- goto out;
- }
- }
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
- goto begintw;
- sk = NULL;
out:
+ sk = NULL;
+found:
rcu_read_unlock();
return sk;
}
@@ -326,39 +311,29 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2;
const struct hlist_nulls_node *node;
- struct inet_timewait_sock *tw;
+ struct inet_timewait_sock *tw = NULL;
int twrefcnt = 0;
spin_lock(lock);
- /* Check TIME-WAIT sockets first. */
- sk_nulls_for_each(sk2, node, &head->twchain) {
- if (sk2->sk_hash != hash)
- continue;
-
- if (likely(INET_TW_MATCH(sk2, net, acookie,
- saddr, daddr, ports, dif))) {
- tw = inet_twsk(sk2);
- if (twsk_unique(sk, sk2, twp))
- goto unique;
- else
- goto not_unique;
- }
- }
- tw = NULL;
-
- /* And established part... */
sk_nulls_for_each(sk2, node, &head->chain) {
if (sk2->sk_hash != hash)
continue;
+
if (likely(INET_MATCH(sk2, net, acookie,
- saddr, daddr, ports, dif)))
+ saddr, daddr, ports, dif))) {
+ if (sk2->sk_state == TCP_TIME_WAIT) {
+ tw = inet_twsk(sk2);
+ if (twsk_unique(sk, sk2, twp))
+ break;
+ }
goto not_unique;
+ }
}
-unique:
/* Must record num and sport now. Otherwise we will see
- * in hash table socket with a funny identity. */
+ * in hash table socket with a funny identity.
+ */
inet->inet_num = lport;
inet->inet_sport = htons(lport);
sk->sk_hash = hash;
@@ -494,7 +469,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
u32 offset = hint + port_offset;
struct inet_timewait_sock *tw = NULL;
- inet_get_local_port_range(&low, &high);
+ inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
local_bh_disable();
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 1f27c9f..6d592f8 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -87,19 +87,11 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
refcnt += inet_twsk_bind_unhash(tw, hashinfo);
spin_unlock(&bhead->lock);
-#ifdef SOCK_REFCNT_DEBUG
- if (atomic_read(&tw->tw_refcnt) != 1) {
- pr_debug("%s timewait_sock %p refcnt=%d\n",
- tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt));
- }
-#endif
- while (refcnt) {
- inet_twsk_put(tw);
- refcnt--;
- }
+ BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt));
+ atomic_sub(refcnt, &tw->tw_refcnt);
}
-static noinline void inet_twsk_free(struct inet_timewait_sock *tw)
+void inet_twsk_free(struct inet_timewait_sock *tw)
{
struct module *owner = tw->tw_prot->owner;
twsk_destructor((struct sock *)tw);
@@ -118,6 +110,18 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
}
EXPORT_SYMBOL_GPL(inet_twsk_put);
+static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
+ struct hlist_nulls_head *list)
+{
+ hlist_nulls_add_head_rcu(&tw->tw_node, list);
+}
+
+static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
+ struct hlist_head *list)
+{
+ hlist_add_head(&tw->tw_bind_node, list);
+}
+
/*
* Enter the time wait state. This is called with locally disabled BH.
* Essentially we whip up a timewait bucket, copy the relevant info into it
@@ -146,26 +150,21 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
spin_lock(lock);
/*
- * Step 2: Hash TW into TIMEWAIT chain.
- * Should be done before removing sk from established chain
- * because readers are lockless and search established first.
+ * Step 2: Hash TW into tcp ehash chain.
+ * Notes :
+ * - tw_refcnt is set to 3 because :
+ * - We have one reference from bhash chain.
+ * - We have one reference from ehash chain.
+ * We can use atomic_set() because prior spin_lock()/spin_unlock()
+ * committed into memory all tw fields.
*/
- inet_twsk_add_node_rcu(tw, &ehead->twchain);
+ atomic_set(&tw->tw_refcnt, 1 + 1 + 1);
+ inet_twsk_add_node_rcu(tw, &ehead->chain);
- /* Step 3: Remove SK from established hash. */
+ /* Step 3: Remove SK from hash chain */
if (__sk_nulls_del_node_init_rcu(sk))
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
- /*
- * Notes :
- * - We initially set tw_refcnt to 0 in inet_twsk_alloc()
- * - We add one reference for the bhash link
- * - We add one reference for the ehash link
- * - We want this refcnt update done before allowing other
- * threads to find this tw in ehash chain.
- */
- atomic_add(1 + 1 + 1, &tw->tw_refcnt);
-
spin_unlock(lock);
}
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
@@ -387,11 +386,11 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw,
if (slot >= INET_TWDR_TWKILL_SLOTS)
slot = INET_TWDR_TWKILL_SLOTS - 1;
}
- tw->tw_ttd = jiffies + timeo;
+ tw->tw_ttd = inet_tw_time_stamp() + timeo;
slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1);
list = &twdr->cells[slot];
} else {
- tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK);
+ tw->tw_ttd = inet_tw_time_stamp() + (slot << INET_TWDR_RECYCLE_TICK);
if (twdr->twcal_hand < 0) {
twdr->twcal_hand = 0;
@@ -490,7 +489,9 @@ void inet_twsk_purge(struct inet_hashinfo *hashinfo,
restart_rcu:
rcu_read_lock();
restart:
- sk_nulls_for_each_rcu(sk, node, &head->twchain) {
+ sk_nulls_for_each_rcu(sk, node, &head->chain) {
+ if (sk->sk_state != TCP_TIME_WAIT)
+ continue;
tw = inet_twsk(sk);
if ((tw->tw_family != family) ||
atomic_read(&twsk_net(tw)->count))
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a04d872..7d8357b 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1060,6 +1060,9 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
rt->dst.dev->mtu : dst_mtu(&rt->dst);
cork->dst = &rt->dst;
cork->length = 0;
+ cork->ttl = ipc->ttl;
+ cork->tos = ipc->tos;
+ cork->priority = ipc->priority;
cork->tx_flags = ipc->tx_flags;
return 0;
@@ -1311,7 +1314,9 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
if (cork->flags & IPCORK_OPT)
opt = cork->opt;
- if (rt->rt_type == RTN_MULTICAST)
+ if (cork->ttl != 0)
+ ttl = cork->ttl;
+ else if (rt->rt_type == RTN_MULTICAST)
ttl = inet->mc_ttl;
else
ttl = ip_select_ttl(inet, &rt->dst);
@@ -1319,7 +1324,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = 5;
- iph->tos = inet->tos;
+ iph->tos = (cork->tos != -1) ? cork->tos : inet->tos;
iph->frag_off = df;
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
@@ -1331,7 +1336,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
ip_options_build(skb, opt, cork->addr, rt, 0);
}
- skb->priority = sk->sk_priority;
+ skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority;
skb->mark = sk->sk_mark;
/*
* Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
@@ -1481,6 +1486,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
ipc.addr = daddr;
ipc.opt = NULL;
ipc.tx_flags = 0;
+ ipc.ttl = 0;
+ ipc.tos = -1;
if (replyopts.opt.opt.optlen) {
ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index d9c4f11..0626f2c 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -189,7 +189,7 @@ EXPORT_SYMBOL(ip_cmsg_recv);
int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
{
- int err;
+ int err, val;
struct cmsghdr *cmsg;
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
@@ -215,6 +215,24 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
ipc->addr = info->ipi_spec_dst.s_addr;
break;
}
+ case IP_TTL:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+ return -EINVAL;
+ val = *(int *)CMSG_DATA(cmsg);
+ if (val < 1 || val > 255)
+ return -EINVAL;
+ ipc->ttl = val;
+ break;
+ case IP_TOS:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+ return -EINVAL;
+ val = *(int *)CMSG_DATA(cmsg);
+ if (val < 0 || val > 255)
+ return -EINVAL;
+ ipc->tos = val;
+ ipc->priority = rt_tos2priority(ipc->tos);
+ break;
+
default:
return -EINVAL;
}
@@ -1034,11 +1052,12 @@ e_inval:
* destination in skb->cb[] before dst drop.
* This way, receiver doesnt make cache line misses to read rtable.
*/
-void ipv4_pktinfo_prepare(struct sk_buff *skb)
+void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
{
struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
- if (skb_rtable(skb)) {
+ if ((inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) &&
+ skb_rtable(skb)) {
pktinfo->ipi_ifindex = inet_iif(skb);
pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
} else {
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index ac9fabe0..63a6d6d 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -623,6 +623,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tunnel->err_count = 0;
}
+ tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
ttl = tnl_params->ttl;
if (ttl == 0) {
if (skb->protocol == htons(ETH_P_IP))
@@ -641,18 +642,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ rt->dst.header_len;
- if (max_headroom > dev->needed_headroom) {
+ if (max_headroom > dev->needed_headroom)
dev->needed_headroom = max_headroom;
- if (skb_cow_head(skb, dev->needed_headroom)) {
- dev->stats.tx_dropped++;
- dev_kfree_skb(skb);
- return;
- }
+
+ if (skb_cow_head(skb, dev->needed_headroom)) {
+ dev->stats.tx_dropped++;
+ dev_kfree_skb(skb);
+ return;
}
err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
- ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df,
- !net_eq(tunnel->net, dev_net(dev)));
+ tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return;
@@ -853,8 +853,10 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- if (!IS_ERR(itn->fb_tunnel_dev))
+ if (!IS_ERR(itn->fb_tunnel_dev)) {
itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
+ }
rtnl_unlock();
return PTR_RET(itn->fb_tunnel_dev);
@@ -884,8 +886,6 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
if (!net_eq(dev_net(t->dev), net))
unregister_netdevice_queue(t->dev, head);
}
- if (itn->fb_tunnel_dev)
- unregister_netdevice_queue(itn->fb_tunnel_dev, head);
}
void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index d6c856b..c31e3ad 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -61,7 +61,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
/* Push down and install the IP header. */
- __skb_push(skb, sizeof(struct iphdr));
+ skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
iph = ip_hdr(skb);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index e805e7b..91f69bc 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -49,70 +49,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly;
static int vti_net_id __read_mostly;
static int vti_tunnel_init(struct net_device *dev);
-static int vti_err(struct sk_buff *skb, u32 info)
-{
-
- /* All the routers (except for Linux) return only
- * 8 bytes of packet payload. It means, that precise relaying of
- * ICMP in the real Internet is absolutely infeasible.
- */
- struct net *net = dev_net(skb->dev);
- struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
- struct iphdr *iph = (struct iphdr *)skb->data;
- const int type = icmp_hdr(skb)->type;
- const int code = icmp_hdr(skb)->code;
- struct ip_tunnel *t;
- int err;
-
- switch (type) {
- default:
- case ICMP_PARAMETERPROB:
- return 0;
-
- case ICMP_DEST_UNREACH:
- switch (code) {
- case ICMP_SR_FAILED:
- case ICMP_PORT_UNREACH:
- /* Impossible event. */
- return 0;
- default:
- /* All others are translated to HOST_UNREACH. */
- break;
- }
- break;
- case ICMP_TIME_EXCEEDED:
- if (code != ICMP_EXC_TTL)
- return 0;
- break;
- }
-
- err = -ENOENT;
-
- t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
- iph->daddr, iph->saddr, 0);
- if (t == NULL)
- goto out;
-
- if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->parms.link, 0, IPPROTO_IPIP, 0);
- err = 0;
- goto out;
- }
-
- err = 0;
- if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
- goto out;
-
- if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
- t->err_count++;
- else
- t->err_count = 1;
- t->err_time = jiffies;
-out:
- return err;
-}
-
/* We dont digest the packet therefore let the packet pass */
static int vti_rcv(struct sk_buff *skb)
{
@@ -296,9 +232,8 @@ static void __net_init vti_fb_tunnel_init(struct net_device *dev)
iph->ihl = 5;
}
-static struct xfrm_tunnel vti_handler __read_mostly = {
+static struct xfrm_tunnel_notifier vti_handler __read_mostly = {
.handler = vti_rcv,
- .err_handler = vti_err,
.priority = 1,
};
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 67e17dc..b6346bf 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -267,7 +267,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
if (th == NULL)
return NF_DROP;
- synproxy_parse_options(skb, par->thoff, th, &opts);
+ if (!synproxy_parse_options(skb, par->thoff, th, &opts))
+ return NF_DROP;
if (th->syn && !(th->ack || th->fin || th->rst)) {
/* Initial SYN from client */
@@ -350,7 +351,8 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
/* fall through */
case TCP_CONNTRACK_SYN_SENT:
- synproxy_parse_options(skb, thoff, th, &opts);
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
if (!th->syn && th->ack &&
CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
@@ -373,7 +375,9 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
if (!th->syn || !th->ack)
break;
- synproxy_parse_options(skb, thoff, th, &opts);
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy->tsoff = opts.tsval - synproxy->its;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index d7d9882..9afbdb1 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -202,15 +202,14 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
#if IS_ENABLED(CONFIG_IPV6)
} else if (skb->protocol == htons(ETH_P_IPV6) &&
sk->sk_family == AF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk,
(int) isk->inet_num,
- &inet6_sk(sk)->rcv_saddr,
+ &sk->sk_v6_rcv_saddr,
sk->sk_bound_dev_if);
- if (!ipv6_addr_any(&np->rcv_saddr) &&
- !ipv6_addr_equal(&np->rcv_saddr,
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
+ !ipv6_addr_equal(&sk->sk_v6_rcv_saddr,
&ipv6_hdr(skb)->daddr))
continue;
#endif
@@ -237,11 +236,11 @@ static void inet_get_ping_group_range_net(struct net *net, kgid_t *low,
unsigned int seq;
do {
- seq = read_seqbegin(&sysctl_local_ports.lock);
+ seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock);
*low = data[0];
*high = data[1];
- } while (read_seqretry(&sysctl_local_ports.lock, seq));
+ } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq));
}
@@ -362,7 +361,7 @@ static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr)
} else if (saddr->sa_family == AF_INET6) {
struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr;
struct ipv6_pinfo *np = inet6_sk(sk);
- np->rcv_saddr = np->saddr = addr->sin6_addr;
+ sk->sk_v6_rcv_saddr = np->saddr = addr->sin6_addr;
#endif
}
}
@@ -376,7 +375,7 @@ static void ping_clear_saddr(struct sock *sk, int dif)
#if IS_ENABLED(CONFIG_IPV6)
} else if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
- memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr));
+ memset(&sk->sk_v6_rcv_saddr, 0, sizeof(sk->sk_v6_rcv_saddr));
memset(&np->saddr, 0, sizeof(np->saddr));
#endif
}
@@ -416,10 +415,12 @@ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
(int)sk->sk_bound_dev_if);
err = 0;
- if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) ||
- (sk->sk_family == AF_INET6 &&
- !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)))
+ if (sk->sk_family == AF_INET && isk->inet_rcv_saddr)
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6 && !ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+ sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
+#endif
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
@@ -429,7 +430,7 @@ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
- memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr));
+ memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr));
#endif
sk_dst_reset(sk);
@@ -713,6 +714,8 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
ipc.opt = NULL;
ipc.oif = sk->sk_bound_dev_if;
ipc.tx_flags = 0;
+ ipc.ttl = 0;
+ ipc.tos = -1;
sock_tx_timestamp(sk, &ipc.tx_flags);
@@ -744,7 +747,7 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
return -EINVAL;
faddr = ipc.opt->opt.faddr;
}
- tos = RT_TOS(inet->tos);
+ tos = get_rttos(&ipc, inet);
if (sock_flag(sk, SOCK_LOCALROUTE) ||
(msg->msg_flags & MSG_DONTROUTE) ||
(ipc.opt && ipc.opt->opt.is_strictroute)) {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bfec521..41e1d28 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -218,8 +218,10 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
ipv4_sk_update_pmtu(skb, sk, info);
- else if (type == ICMP_REDIRECT)
+ else if (type == ICMP_REDIRECT) {
ipv4_sk_redirect(skb, sk);
+ return;
+ }
/* Report error on raw socket, if:
1. User requested ip_recverr.
@@ -297,7 +299,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
/* Charge it to the socket. */
- ipv4_pktinfo_prepare(skb);
+ ipv4_pktinfo_prepare(sk, skb);
if (sock_queue_rcv_skb(sk, skb) < 0) {
kfree_skb(skb);
return NET_RX_DROP;
@@ -517,6 +519,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
ipc.addr = inet->inet_saddr;
ipc.opt = NULL;
ipc.tx_flags = 0;
+ ipc.ttl = 0;
+ ipc.tos = -1;
ipc.oif = sk->sk_bound_dev_if;
if (msg->msg_controllen) {
@@ -556,7 +560,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
daddr = ipc.opt->opt.faddr;
}
}
- tos = RT_CONN_FLAGS(sk);
+ tos = get_rtconn_flags(&ipc, sk);
if (msg->msg_flags & MSG_DONTROUTE)
tos |= RTO_ONLINK;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 727f436..6011615 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2072,7 +2072,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
RT_SCOPE_LINK);
goto make_route;
}
- if (fl4->saddr) {
+ if (!fl4->saddr) {
if (ipv4_is_multicast(fl4->daddr))
fl4->saddr = inet_select_addr(dev_out, 0,
fl4->flowi4_scope);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 14a15c4..3b64c59 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -89,8 +89,7 @@ __u32 cookie_init_timestamp(struct request_sock *req)
static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport,
- __be16 dport, __u32 sseq, __u32 count,
- __u32 data)
+ __be16 dport, __u32 sseq, __u32 data)
{
/*
* Compute the secure sequence number.
@@ -102,7 +101,7 @@ static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport,
* As an extra hack, we add a small "data" value that encodes the
* MSS into the second hash value.
*/
-
+ u32 count = tcp_cookie_time();
return (cookie_hash(saddr, daddr, sport, dport, 0, 0) +
sseq + (count << COOKIEBITS) +
((cookie_hash(saddr, daddr, sport, dport, count, 1) + data)
@@ -114,22 +113,21 @@ static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport,
* If the syncookie is bad, the data returned will be out of
* range. This must be checked by the caller.
*
- * The count value used to generate the cookie must be within
- * "maxdiff" if the current (passed-in) "count". The return value
- * is (__u32)-1 if this test fails.
+ * The count value used to generate the cookie must be less than
+ * MAX_SYNCOOKIE_AGE minutes in the past.
+ * The return value (__u32)-1 if this test fails.
*/
static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr,
- __be16 sport, __be16 dport, __u32 sseq,
- __u32 count, __u32 maxdiff)
+ __be16 sport, __be16 dport, __u32 sseq)
{
- __u32 diff;
+ u32 diff, count = tcp_cookie_time();
/* Strip away the layers from the cookie */
cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq;
/* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */
diff = (count - (cookie >> COOKIEBITS)) & ((__u32) - 1 >> COOKIEBITS);
- if (diff >= maxdiff)
+ if (diff >= MAX_SYNCOOKIE_AGE)
return (__u32)-1;
return (cookie -
@@ -138,22 +136,22 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr,
}
/*
- * MSS Values are taken from the 2009 paper
- * 'Measuring TCP Maximum Segment Size' by S. Alcock and R. Nelson:
- * - values 1440 to 1460 accounted for 80% of observed mss values
- * - values outside the 536-1460 range are rare (<0.2%).
+ * MSS Values are chosen based on the 2011 paper
+ * 'An Analysis of TCP Maximum Segement Sizes' by S. Alcock and R. Nelson.
+ * Values ..
+ * .. lower than 536 are rare (< 0.2%)
+ * .. between 537 and 1299 account for less than < 1.5% of observed values
+ * .. in the 1300-1349 range account for about 15 to 20% of observed mss values
+ * .. exceeding 1460 are very rare (< 0.04%)
*
- * Table must be sorted.
+ * 1460 is the single most frequently announced mss value (30 to 46% depending
+ * on monitor location). Table must be sorted.
*/
static __u16 const msstab[] = {
- 64,
- 512,
536,
- 1024,
- 1440,
+ 1300,
+ 1440, /* 1440, 1452: PPPoE */
1460,
- 4312,
- 8960,
};
/*
@@ -173,7 +171,7 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
th->source, th->dest, ntohl(th->seq),
- jiffies / (HZ * 60), mssind);
+ mssind);
}
EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence);
@@ -189,13 +187,6 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
}
/*
- * This (misnamed) value is the age of syncookie which is permitted.
- * Its ideal value should be dependent on TCP_TIMEOUT_INIT and
- * sysctl_tcp_retries1. It's a rather complicated formula (exponential
- * backoff) to compute at runtime so it's currently hardcoded here.
- */
-#define COUNTER_TRIES 4
-/*
* Check if a ack sequence number is a valid syncookie.
* Return the decoded mss if it is, or 0 if not.
*/
@@ -204,9 +195,7 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
{
__u32 seq = ntohl(th->seq) - 1;
__u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
- th->source, th->dest, seq,
- jiffies / (HZ * 60),
- COUNTER_TRIES);
+ th->source, th->dest, seq);
return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
}
@@ -315,10 +304,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
req->mss = mss;
- ireq->loc_port = th->dest;
- ireq->rmt_port = th->source;
- ireq->loc_addr = ip_hdr(skb)->daddr;
- ireq->rmt_addr = ip_hdr(skb)->saddr;
+ ireq->ir_num = ntohs(th->dest);
+ ireq->ir_rmt_port = th->source;
+ ireq->ir_loc_addr = ip_hdr(skb)->daddr;
+ ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
ireq->ecn_ok = ecn_ok;
ireq->snd_wscale = tcp_opt.snd_wscale;
ireq->sack_ok = tcp_opt.sack_ok;
@@ -358,8 +347,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
inet_sk_flowi_flags(sk),
- (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
- ireq->loc_addr, th->source, th->dest);
+ (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr,
+ ireq->ir_loc_addr, th->source, th->dest);
security_req_classify_flow(req, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(sock_net(sk), &fl4);
if (IS_ERR(rt)) {
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 540279f..c08f096 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -43,12 +43,12 @@ static int ip_ping_group_range_min[] = { 0, 0 };
static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
/* Update system visible IP port range */
-static void set_local_port_range(int range[2])
+static void set_local_port_range(struct net *net, int range[2])
{
- write_seqlock(&sysctl_local_ports.lock);
- sysctl_local_ports.range[0] = range[0];
- sysctl_local_ports.range[1] = range[1];
- write_sequnlock(&sysctl_local_ports.lock);
+ write_seqlock(&net->ipv4.sysctl_local_ports.lock);
+ net->ipv4.sysctl_local_ports.range[0] = range[0];
+ net->ipv4.sysctl_local_ports.range[1] = range[1];
+ write_sequnlock(&net->ipv4.sysctl_local_ports.lock);
}
/* Validate changes from /proc interface. */
@@ -56,6 +56,8 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
+ struct net *net =
+ container_of(table->data, struct net, ipv4.sysctl_local_ports.range);
int ret;
int range[2];
struct ctl_table tmp = {
@@ -66,14 +68,15 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
.extra2 = &ip_local_port_range_max,
};
- inet_get_local_port_range(range, range + 1);
+ inet_get_local_port_range(net, &range[0], &range[1]);
+
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && ret == 0) {
if (range[1] < range[0])
ret = -EINVAL;
else
- set_local_port_range(range);
+ set_local_port_range(net, range);
}
return ret;
@@ -83,23 +86,27 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
{
kgid_t *data = table->data;
+ struct net *net =
+ container_of(table->data, struct net, ipv4.sysctl_ping_group_range);
unsigned int seq;
do {
- seq = read_seqbegin(&sysctl_local_ports.lock);
+ seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock);
*low = data[0];
*high = data[1];
- } while (read_seqretry(&sysctl_local_ports.lock, seq));
+ } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq));
}
/* Update system visible IP port range */
static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high)
{
kgid_t *data = table->data;
- write_seqlock(&sysctl_local_ports.lock);
+ struct net *net =
+ container_of(table->data, struct net, ipv4.sysctl_ping_group_range);
+ write_seqlock(&net->ipv4.sysctl_local_ports.lock);
data[0] = low;
data[1] = high;
- write_sequnlock(&sysctl_local_ports.lock);
+ write_sequnlock(&net->ipv4.sysctl_local_ports.lock);
}
/* Validate changes from /proc interface. */
@@ -475,13 +482,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "ip_local_port_range",
- .data = &sysctl_local_ports.range,
- .maxlen = sizeof(sysctl_local_ports.range),
- .mode = 0644,
- .proc_handler = ipv4_local_port_range,
- },
- {
.procname = "ip_local_reserved_ports",
.data = NULL, /* initialized in sysctl_ipv4_init */
.maxlen = 65536,
@@ -854,6 +854,13 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "ip_local_port_range",
+ .maxlen = sizeof(init_net.ipv4.sysctl_local_ports.range),
+ .data = &init_net.ipv4.sysctl_local_ports.range,
+ .mode = 0644,
+ .proc_handler = ipv4_local_port_range,
+ },
+ {
.procname = "tcp_mem",
.maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem),
.mode = 0644,
@@ -888,6 +895,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
&net->ipv4.sysctl_ping_group_range;
table[7].data =
&net->ipv4.sysctl_tcp_ecn;
+ table[8].data =
+ &net->ipv4.sysctl_local_ports.range;
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
@@ -901,6 +910,13 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1);
net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0);
+ /*
+ * Set defaults for local port range
+ */
+ seqlock_init(&net->ipv4.sysctl_local_ports.lock);
+ net->ipv4.sysctl_local_ports.range[0] = 32768;
+ net->ipv4.sysctl_local_ports.range[1] = 61000;
+
tcp_init_mem(net);
net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6e5617b..be4b161 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3137,10 +3137,9 @@ void __init tcp_init(void)
&tcp_hashinfo.ehash_mask,
0,
thash_entries ? 0 : 512 * 1024);
- for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) {
+ for (i = 0; i <= tcp_hashinfo.ehash_mask; i++)
INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i);
- INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i);
- }
+
if (inet_ehash_locks_alloc(&tcp_hashinfo))
panic("TCP: failed to alloc ehash_locks");
tcp_hashinfo.bhash =
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 25a89ea..eb651a0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -267,11 +267,31 @@ static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr
* 1. Tuning sk->sk_sndbuf, when connection enters established state.
*/
-static void tcp_fixup_sndbuf(struct sock *sk)
+static void tcp_sndbuf_expand(struct sock *sk)
{
- int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
+ const struct tcp_sock *tp = tcp_sk(sk);
+ int sndmem, per_mss;
+ u32 nr_segs;
+
+ /* Worst case is non GSO/TSO : each frame consumes one skb
+ * and skb->head is kmalloced using power of two area of memory
+ */
+ per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
+ MAX_TCP_HEADER +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+ per_mss = roundup_pow_of_two(per_mss) +
+ SKB_DATA_ALIGN(sizeof(struct sk_buff));
+
+ nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
+ nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
+
+ /* Fast Recovery (RFC 5681 3.2) :
+ * Cubic needs 1.7 factor, rounded to 2 to include
+ * extra cushion (application might react slowly to POLLOUT)
+ */
+ sndmem = 2 * nr_segs * per_mss;
- sndmem *= TCP_INIT_CWND;
if (sk->sk_sndbuf < sndmem)
sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
}
@@ -355,6 +375,12 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) *
tcp_default_init_rwnd(mss);
+ /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
+ * Allow enough cushion so that sender is not limited by our window
+ */
+ if (sysctl_tcp_moderate_rcvbuf)
+ rcvmem <<= 2;
+
if (sk->sk_rcvbuf < rcvmem)
sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
}
@@ -370,9 +396,11 @@ void tcp_init_buffer_space(struct sock *sk)
if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
tcp_fixup_rcvbuf(sk);
if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
- tcp_fixup_sndbuf(sk);
+ tcp_sndbuf_expand(sk);
tp->rcvq_space.space = tp->rcv_wnd;
+ tp->rcvq_space.time = tcp_time_stamp;
+ tp->rcvq_space.seq = tp->copied_seq;
maxwin = tcp_full_space(sk);
@@ -512,48 +540,62 @@ void tcp_rcv_space_adjust(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
int time;
- int space;
-
- if (tp->rcvq_space.time == 0)
- goto new_measure;
+ int copied;
time = tcp_time_stamp - tp->rcvq_space.time;
if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
return;
- space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
+ /* Number of bytes copied to user in last RTT */
+ copied = tp->copied_seq - tp->rcvq_space.seq;
+ if (copied <= tp->rcvq_space.space)
+ goto new_measure;
- space = max(tp->rcvq_space.space, space);
+ /* A bit of theory :
+ * copied = bytes received in previous RTT, our base window
+ * To cope with packet losses, we need a 2x factor
+ * To cope with slow start, and sender growing its cwin by 100 %
+ * every RTT, we need a 4x factor, because the ACK we are sending
+ * now is for the next RTT, not the current one :
+ * <prev RTT . ><current RTT .. ><next RTT .... >
+ */
- if (tp->rcvq_space.space != space) {
- int rcvmem;
+ if (sysctl_tcp_moderate_rcvbuf &&
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+ int rcvwin, rcvmem, rcvbuf;
- tp->rcvq_space.space = space;
+ /* minimal window to cope with packet losses, assuming
+ * steady state. Add some cushion because of small variations.
+ */
+ rcvwin = (copied << 1) + 16 * tp->advmss;
- if (sysctl_tcp_moderate_rcvbuf &&
- !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int new_clamp = space;
+ /* If rate increased by 25%,
+ * assume slow start, rcvwin = 3 * copied
+ * If rate increased by 50%,
+ * assume sender can use 2x growth, rcvwin = 4 * copied
+ */
+ if (copied >=
+ tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) {
+ if (copied >=
+ tp->rcvq_space.space + (tp->rcvq_space.space >> 1))
+ rcvwin <<= 1;
+ else
+ rcvwin += (rcvwin >> 1);
+ }
- /* Receive space grows, normalize in order to
- * take into account packet headers and sk_buff
- * structure overhead.
- */
- space /= tp->advmss;
- if (!space)
- space = 1;
- rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
- while (tcp_win_from_space(rcvmem) < tp->advmss)
- rcvmem += 128;
- space *= rcvmem;
- space = min(space, sysctl_tcp_rmem[2]);
- if (space > sk->sk_rcvbuf) {
- sk->sk_rcvbuf = space;
-
- /* Make the window clamp follow along. */
- tp->window_clamp = new_clamp;
- }
+ rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
+ while (tcp_win_from_space(rcvmem) < tp->advmss)
+ rcvmem += 128;
+
+ rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
+ if (rcvbuf > sk->sk_rcvbuf) {
+ sk->sk_rcvbuf = rcvbuf;
+
+ /* Make the window clamp follow along. */
+ tp->window_clamp = rcvwin;
}
}
+ tp->rcvq_space.space = copied;
new_measure:
tp->rcvq_space.seq = tp->copied_seq;
@@ -713,7 +755,12 @@ static void tcp_update_pacing_rate(struct sock *sk)
if (tp->srtt > 8 + 2)
do_div(rate, tp->srtt);
- sk->sk_pacing_rate = min_t(u64, rate, ~0U);
+ /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate
+ * without any lock. We want to make sure compiler wont store
+ * intermediate values in this location.
+ */
+ ACCESS_ONCE(sk->sk_pacing_rate) = min_t(u64, rate,
+ sk->sk_max_pacing_rate);
}
/* Calculate rto without backoff. This is the second half of Van Jacobson's
@@ -1284,7 +1331,10 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
tp->lost_cnt_hint -= tcp_skb_pcount(prev);
}
- TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;
+ TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+ TCP_SKB_CB(prev)->end_seq++;
+
if (skb == tcp_highest_sack(sk))
tcp_advance_highest_sack(sk, skb);
@@ -2970,7 +3020,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
const struct inet_connection_sock *icsk = inet_csk(sk);
struct sk_buff *skb;
u32 now = tcp_time_stamp;
- int fully_acked = true;
+ bool fully_acked = true;
int flag = 0;
u32 pkts_acked = 0;
u32 reord = tp->packets_out;
@@ -4701,15 +4751,7 @@ static void tcp_new_space(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (tcp_should_expand_sndbuf(sk)) {
- int sndmem = SKB_TRUESIZE(max_t(u32,
- tp->rx_opt.mss_clamp,
- tp->mss_cache) +
- MAX_TCP_HEADER);
- int demanded = max_t(unsigned int, tp->snd_cwnd,
- tp->reordering + 1);
- sndmem *= 2 * demanded;
- if (sndmem > sk->sk_sndbuf)
- sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
+ tcp_sndbuf_expand(sk);
tp->snd_cwnd_stamp = tcp_time_stamp;
}
@@ -5674,8 +5716,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
tcp_init_congestion_control(sk);
tcp_mtup_init(sk);
- tcp_init_buffer_space(sk);
tp->copied_seq = tp->rcv_nxt;
+ tcp_init_buffer_space(sk);
}
smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b14266b..114d1b74 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -835,11 +835,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
skb = tcp_make_synack(sk, dst, req, NULL);
if (skb) {
- __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
+ __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
skb_set_queue_mapping(skb, queue_mapping);
- err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
- ireq->rmt_addr,
+ err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+ ireq->ir_rmt_addr,
ireq->opt);
err = net_xmit_eval(err);
if (!tcp_rsk(req)->snt_synack && !err)
@@ -972,7 +972,7 @@ static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
{
union tcp_md5_addr *addr;
- addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
+ addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
return tcp_md5_do_lookup(sk, addr, AF_INET);
}
@@ -1149,8 +1149,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
saddr = inet_sk(sk)->inet_saddr;
daddr = inet_sk(sk)->inet_daddr;
} else if (req) {
- saddr = inet_rsk(req)->loc_addr;
- daddr = inet_rsk(req)->rmt_addr;
+ saddr = inet_rsk(req)->ir_loc_addr;
+ daddr = inet_rsk(req)->ir_rmt_addr;
} else {
const struct iphdr *iph = ip_hdr(skb);
saddr = iph->saddr;
@@ -1366,8 +1366,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
kfree_skb(skb_synack);
return -1;
}
- err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
- ireq->rmt_addr, ireq->opt);
+ err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
+ ireq->ir_rmt_addr, ireq->opt);
err = net_xmit_eval(err);
if (!err)
tcp_rsk(req)->snt_synack = tcp_time_stamp;
@@ -1410,8 +1410,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
inet_csk(child)->icsk_af_ops->rebuild_header(child);
tcp_init_congestion_control(child);
tcp_mtup_init(child);
- tcp_init_buffer_space(child);
tcp_init_metrics(child);
+ tcp_init_buffer_space(child);
/* Queue the data carried in the SYN packet. We need to first
* bump skb's refcnt because the caller will attempt to free it.
@@ -1502,8 +1502,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_openreq_init(req, &tmp_opt, skb);
ireq = inet_rsk(req);
- ireq->loc_addr = daddr;
- ireq->rmt_addr = saddr;
+ ireq->ir_loc_addr = daddr;
+ ireq->ir_rmt_addr = saddr;
ireq->no_srccheck = inet_sk(sk)->transparent;
ireq->opt = tcp_v4_save_options(skb);
@@ -1578,15 +1578,15 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
if (skb_synack) {
- __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
+ __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr);
skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
} else
goto drop_and_free;
if (likely(!do_fastopen)) {
int err;
- err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
- ireq->rmt_addr, ireq->opt);
+ err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
+ ireq->ir_rmt_addr, ireq->opt);
err = net_xmit_eval(err);
if (err || want_cookie)
goto drop_and_free;
@@ -1644,9 +1644,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp = tcp_sk(newsk);
newinet = inet_sk(newsk);
ireq = inet_rsk(req);
- newinet->inet_daddr = ireq->rmt_addr;
- newinet->inet_rcv_saddr = ireq->loc_addr;
- newinet->inet_saddr = ireq->loc_addr;
+ newinet->inet_daddr = ireq->ir_rmt_addr;
+ newinet->inet_rcv_saddr = ireq->ir_loc_addr;
+ newinet->inet_saddr = ireq->ir_loc_addr;
inet_opt = ireq->opt;
rcu_assign_pointer(newinet->inet_opt, inet_opt);
ireq->opt = NULL;
@@ -2194,18 +2194,6 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
#ifdef CONFIG_PROC_FS
/* Proc filesystem TCP sock list dumping. */
-static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
-{
- return hlist_nulls_empty(head) ? NULL :
- list_entry(head->first, struct inet_timewait_sock, tw_node);
-}
-
-static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
-{
- return !is_a_nulls(tw->tw_node.next) ?
- hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
-}
-
/*
* Get next listener socket follow cur. If cur is NULL, get first socket
* starting from bucket given in st->bucket; when st->bucket is zero the
@@ -2309,10 +2297,9 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
return rc;
}
-static inline bool empty_bucket(struct tcp_iter_state *st)
+static inline bool empty_bucket(const struct tcp_iter_state *st)
{
- return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
- hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
+ return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
}
/*
@@ -2329,7 +2316,6 @@ static void *established_get_first(struct seq_file *seq)
for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
struct sock *sk;
struct hlist_nulls_node *node;
- struct inet_timewait_sock *tw;
spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
/* Lockless fast path for the common case of empty buckets */
@@ -2345,18 +2331,7 @@ static void *established_get_first(struct seq_file *seq)
rc = sk;
goto out;
}
- st->state = TCP_SEQ_STATE_TIME_WAIT;
- inet_twsk_for_each(tw, node,
- &tcp_hashinfo.ehash[st->bucket].twchain) {
- if (tw->tw_family != st->family ||
- !net_eq(twsk_net(tw), net)) {
- continue;
- }
- rc = tw;
- goto out;
- }
spin_unlock_bh(lock);
- st->state = TCP_SEQ_STATE_ESTABLISHED;
}
out:
return rc;
@@ -2365,7 +2340,6 @@ out:
static void *established_get_next(struct seq_file *seq, void *cur)
{
struct sock *sk = cur;
- struct inet_timewait_sock *tw;
struct hlist_nulls_node *node;
struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
@@ -2373,45 +2347,16 @@ static void *established_get_next(struct seq_file *seq, void *cur)
++st->num;
++st->offset;
- if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
- tw = cur;
- tw = tw_next(tw);
-get_tw:
- while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
- tw = tw_next(tw);
- }
- if (tw) {
- cur = tw;
- goto out;
- }
- spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
- st->state = TCP_SEQ_STATE_ESTABLISHED;
-
- /* Look for next non empty bucket */
- st->offset = 0;
- while (++st->bucket <= tcp_hashinfo.ehash_mask &&
- empty_bucket(st))
- ;
- if (st->bucket > tcp_hashinfo.ehash_mask)
- return NULL;
-
- spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
- sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
- } else
- sk = sk_nulls_next(sk);
+ sk = sk_nulls_next(sk);
sk_nulls_for_each_from(sk, node) {
if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
- goto found;
+ return sk;
}
- st->state = TCP_SEQ_STATE_TIME_WAIT;
- tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
- goto get_tw;
-found:
- cur = sk;
-out:
- return cur;
+ spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+ ++st->bucket;
+ return established_get_first(seq);
}
static void *established_get_idx(struct seq_file *seq, loff_t pos)
@@ -2464,10 +2409,9 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
if (rc)
break;
st->bucket = 0;
+ st->state = TCP_SEQ_STATE_ESTABLISHED;
/* Fallthrough */
case TCP_SEQ_STATE_ESTABLISHED:
- case TCP_SEQ_STATE_TIME_WAIT:
- st->state = TCP_SEQ_STATE_ESTABLISHED;
if (st->bucket > tcp_hashinfo.ehash_mask)
break;
rc = established_get_first(seq);
@@ -2524,7 +2468,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
break;
case TCP_SEQ_STATE_ESTABLISHED:
- case TCP_SEQ_STATE_TIME_WAIT:
rc = established_get_next(seq, v);
break;
}
@@ -2548,7 +2491,6 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
if (v != SEQ_START_TOKEN)
spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
break;
- case TCP_SEQ_STATE_TIME_WAIT:
case TCP_SEQ_STATE_ESTABLISHED:
if (v)
spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
@@ -2606,10 +2548,10 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
" %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK%n",
i,
- ireq->loc_addr,
+ ireq->ir_loc_addr,
ntohs(inet_sk(sk)->inet_sport),
- ireq->rmt_addr,
- ntohs(ireq->rmt_port),
+ ireq->ir_rmt_addr,
+ ntohs(ireq->ir_rmt_port),
TCP_SYN_RECV,
0, 0, /* could print option size, but that is af dependent. */
1, /* timers active (only the expire timer) */
@@ -2707,6 +2649,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw,
static int tcp4_seq_show(struct seq_file *seq, void *v)
{
struct tcp_iter_state *st;
+ struct sock *sk = v;
int len;
if (v == SEQ_START_TOKEN) {
@@ -2721,14 +2664,14 @@ static int tcp4_seq_show(struct seq_file *seq, void *v)
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
case TCP_SEQ_STATE_ESTABLISHED:
- get_tcp4_sock(v, seq, st->num, &len);
+ if (sk->sk_state == TCP_TIME_WAIT)
+ get_timewait4_sock(v, seq, st->num, &len);
+ else
+ get_tcp4_sock(v, seq, st->num, &len);
break;
case TCP_SEQ_STATE_OPENREQ:
get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
break;
- case TCP_SEQ_STATE_TIME_WAIT:
- get_timewait4_sock(v, seq, st->num, &len);
- break;
}
seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
out:
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 52f3c6b..4a2a841 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -215,13 +215,15 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
addr.family = req->rsk_ops->family;
switch (addr.family) {
case AF_INET:
- addr.addr.a4 = inet_rsk(req)->rmt_addr;
+ addr.addr.a4 = inet_rsk(req)->ir_rmt_addr;
hash = (__force unsigned int) addr.addr.a4;
break;
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- *(struct in6_addr *)addr.addr.a6 = inet6_rsk(req)->rmt_addr;
- hash = ipv6_addr_hash(&inet6_rsk(req)->rmt_addr);
+ *(struct in6_addr *)addr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr;
+ hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr);
break;
+#endif
default:
return NULL;
}
@@ -240,7 +242,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
{
- struct inet6_timewait_sock *tw6;
struct tcp_metrics_block *tm;
struct inetpeer_addr addr;
unsigned int hash;
@@ -252,11 +253,12 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock
addr.addr.a4 = tw->tw_daddr;
hash = (__force unsigned int) addr.addr.a4;
break;
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- tw6 = inet6_twsk((struct sock *)tw);
- *(struct in6_addr *)addr.addr.a6 = tw6->tw_v6_daddr;
- hash = ipv6_addr_hash(&tw6->tw_v6_daddr);
+ *(struct in6_addr *)addr.addr.a6 = tw->tw_v6_daddr;
+ hash = ipv6_addr_hash(&tw->tw_v6_daddr);
break;
+#endif
default:
return NULL;
}
@@ -288,10 +290,12 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
addr.addr.a4 = inet_sk(sk)->inet_daddr;
hash = (__force unsigned int) addr.addr.a4;
break;
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- *(struct in6_addr *)addr.addr.a6 = inet6_sk(sk)->daddr;
- hash = ipv6_addr_hash(&inet6_sk(sk)->daddr);
+ *(struct in6_addr *)addr.addr.a6 = sk->sk_v6_daddr;
+ hash = ipv6_addr_hash(&sk->sk_v6_daddr);
break;
+#endif
default:
return NULL;
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 58a3e69..97b6841 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -293,12 +293,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
- struct inet6_timewait_sock *tw6;
- tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
- tw6 = inet6_twsk((struct sock *)tw);
- tw6->tw_v6_daddr = np->daddr;
- tw6->tw_v6_rcv_saddr = np->rcv_saddr;
+ tw->tw_v6_daddr = sk->sk_v6_daddr;
+ tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
tw->tw_tclass = np->tclass;
tw->tw_ipv6only = np->ipv6only;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7c83cb8..e5ce0e1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -637,6 +637,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
unsigned int size = 0;
unsigned int eff_sacks;
+ opts->options = 0;
+
#ifdef CONFIG_TCP_MD5SIG
*md5 = tp->af_specific->md5_lookup(sk, sk);
if (unlikely(*md5)) {
@@ -848,15 +850,15 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
BUG_ON(!skb || !tcp_skb_pcount(skb));
- /* If congestion control is doing timestamping, we must
- * take such a timestamp before we potentially clone/copy.
- */
- if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
- __net_timestamp(skb);
-
- if (likely(clone_it)) {
+ if (clone_it) {
const struct sk_buff *fclone = skb + 1;
+ /* If congestion control is doing timestamping, we must
+ * take such a timestamp before we potentially clone/copy.
+ */
+ if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
+ __net_timestamp(skb);
+
if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
fclone->fclone == SKB_FCLONE_CLONE))
NET_INC_STATS_BH(sock_net(sk),
@@ -895,8 +897,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb_orphan(skb);
skb->sk = sk;
- skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ?
- tcp_wfree : sock_wfree;
+ skb->destructor = tcp_wfree;
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
/* Build TCP header and checksum it. */
@@ -1840,7 +1841,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
-
tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
BUG_ON(!tso_segs);
@@ -1869,13 +1869,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
break;
}
- /* TSQ : sk_wmem_alloc accounts skb truesize,
- * including skb overhead. But thats OK.
+ /* TCP Small Queues :
+ * Control number of packets in qdisc/devices to two packets / or ~1 ms.
+ * This allows for :
+ * - better RTT estimation and ACK scheduling
+ * - faster recovery
+ * - high rates
*/
- if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) {
+ limit = max(skb->truesize, sk->sk_pacing_rate >> 10);
+
+ if (atomic_read(&sk->sk_wmem_alloc) > limit) {
set_bit(TSQ_THROTTLED, &tp->tsq_flags);
break;
}
+
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
@@ -2727,8 +2734,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
th->syn = 1;
th->ack = 1;
TCP_ECN_make_synack(req, th);
- th->source = ireq->loc_port;
- th->dest = ireq->rmt_port;
+ th->source = htons(ireq->ir_num);
+ th->dest = ireq->ir_rmt_port;
/* Setting of flags are superfluous here for callers (and ECE is
* not even correctly set)
*/
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 611beab..8b97d71e 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -101,22 +101,6 @@ static inline int tcp_probe_avail(void)
si4.sin_addr.s_addr = inet->inet_##mem##addr; \
} while (0) \
-#if IS_ENABLED(CONFIG_IPV6)
-#define tcp_probe_copy_fl_to_si6(inet, si6, mem) \
- do { \
- struct ipv6_pinfo *pi6 = inet->pinet6; \
- si6.sin6_family = AF_INET6; \
- si6.sin6_port = inet->inet_##mem##port; \
- si6.sin6_addr = pi6->mem##addr; \
- si6.sin6_flowinfo = 0; /* No need here. */ \
- si6.sin6_scope_id = 0; /* No need here. */ \
- } while (0)
-#else
-#define tcp_probe_copy_fl_to_si6(fl, si6, mem) \
- do { \
- memset(&si6, 0, sizeof(si6)); \
- } while (0)
-#endif
/*
* Hook inserted to be called before each receive packet.
@@ -147,8 +131,17 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d);
break;
case AF_INET6:
- tcp_probe_copy_fl_to_si6(inet, p->src.v6, s);
- tcp_probe_copy_fl_to_si6(inet, p->dst.v6, d);
+ memset(&p->src.v6, 0, sizeof(p->src.v6));
+ memset(&p->dst.v6, 0, sizeof(p->dst.v6));
+#if IS_ENABLED(CONFIG_IPV6)
+ p->src.v6.sin6_family = AF_INET6;
+ p->src.v6.sin6_port = inet->inet_sport;
+ p->src.v6.sin6_addr = inet6_sk(sk)->saddr;
+
+ p->dst.v6.sin6_family = AF_INET6;
+ p->dst.v6.sin6_port = inet->inet_dport;
+ p->dst.v6.sin6_addr = sk->sk_v6_daddr;
+#endif
break;
default:
BUG();
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4b85e6f..af07b5b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -374,9 +374,8 @@ void tcp_retransmit_timer(struct sock *sk)
}
#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"),
- &np->daddr,
+ &sk->sk_v6_daddr,
ntohs(inet->inet_dport), inet->inet_num,
tp->snd_una, tp->snd_nxt);
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 74d2c95..9f27bb8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -103,6 +103,7 @@
#include <linux/seq_file.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
+#include <net/inet_hashtables.h>
#include <net/route.h>
#include <net/checksum.h>
#include <net/xfrm.h>
@@ -219,7 +220,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
unsigned short first, last;
DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
- inet_get_local_port_range(&low, &high);
+ inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
rand = net_random();
@@ -565,6 +566,26 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
}
EXPORT_SYMBOL_GPL(udp4_lib_lookup);
+static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
+ __be16 loc_port, __be32 loc_addr,
+ __be16 rmt_port, __be32 rmt_addr,
+ int dif, unsigned short hnum)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (!net_eq(sock_net(sk), net) ||
+ udp_sk(sk)->udp_port_hash != hnum ||
+ (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
+ (inet->inet_dport != rmt_port && inet->inet_dport) ||
+ (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
+ ipv6_only_sock(sk) ||
+ (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ return false;
+ if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif))
+ return false;
+ return true;
+}
+
static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
__be16 loc_port, __be32 loc_addr,
__be16 rmt_port, __be32 rmt_addr,
@@ -575,20 +596,11 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
unsigned short hnum = ntohs(loc_port);
sk_nulls_for_each_from(s, node) {
- struct inet_sock *inet = inet_sk(s);
-
- if (!net_eq(sock_net(s), net) ||
- udp_sk(s)->udp_port_hash != hnum ||
- (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
- (inet->inet_dport != rmt_port && inet->inet_dport) ||
- (inet->inet_rcv_saddr &&
- inet->inet_rcv_saddr != loc_addr) ||
- ipv6_only_sock(s) ||
- (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
- continue;
- if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
- continue;
- goto found;
+ if (__udp_is_mcast_sock(net, s,
+ loc_port, loc_addr,
+ rmt_port, rmt_addr,
+ dif, hnum))
+ goto found;
}
s = NULL;
found:
@@ -658,7 +670,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
break;
case ICMP_REDIRECT:
ipv4_sk_redirect(skb, sk);
- break;
+ goto out;
}
/*
@@ -855,6 +867,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
ipc.opt = NULL;
ipc.tx_flags = 0;
+ ipc.ttl = 0;
+ ipc.tos = -1;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
@@ -938,7 +952,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
faddr = ipc.opt->opt.faddr;
connected = 0;
}
- tos = RT_TOS(inet->tos);
+ tos = get_rttos(&ipc, inet);
if (sock_flag(sk, SOCK_LOCALROUTE) ||
(msg->msg_flags & MSG_DONTROUTE) ||
(ipc.opt && ipc.opt->opt.is_strictroute)) {
@@ -1403,8 +1417,10 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
int rc;
- if (inet_sk(sk)->inet_daddr)
+ if (inet_sk(sk)->inet_daddr) {
sock_rps_save_rxhash(sk, skb);
+ sk_mark_napi_id(sk, skb);
+ }
rc = sock_queue_rcv_skb(sk, skb);
if (rc < 0) {
@@ -1528,7 +1544,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
rc = 0;
- ipv4_pktinfo_prepare(skb);
+ ipv4_pktinfo_prepare(sk, skb);
bh_lock_sock(sk);
if (!sock_owned_by_user(sk))
rc = __udp_queue_rcv_skb(sk, skb);
@@ -1577,6 +1593,14 @@ static void flush_stack(struct sock **stack, unsigned int count,
kfree_skb(skb1);
}
+static void udp_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+
+ dst_hold(dst);
+ sk->sk_rx_dst = dst;
+}
+
/*
* Multicasts and broadcasts go to each listener.
*
@@ -1705,16 +1729,32 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (udp4_csum_init(skb, uh, proto))
goto csum_error;
- if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
- return __udp4_lib_mcast_deliver(net, skb, uh,
- saddr, daddr, udptable);
+ if (skb->sk) {
+ int ret;
+ sk = skb->sk;
+
+ if (unlikely(sk->sk_rx_dst == NULL))
+ udp_sk_rx_dst_set(sk, skb);
- sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
+ ret = udp_queue_rcv_skb(sk, skb);
+
+ /* a return value > 0 means to resubmit the input, but
+ * it wants the return to be -protocol, or 0
+ */
+ if (ret > 0)
+ return -ret;
+ return 0;
+ } else {
+ if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
+ return __udp4_lib_mcast_deliver(net, skb, uh,
+ saddr, daddr, udptable);
+
+ sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
+ }
if (sk != NULL) {
int ret;
- sk_mark_napi_id(sk, skb);
ret = udp_queue_rcv_skb(sk, skb);
sock_put(sk);
@@ -1768,6 +1808,135 @@ drop:
return 0;
}
+/* We can only early demux multicast if there is a single matching socket.
+ * If more than one socket found returns NULL
+ */
+static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
+ __be16 loc_port, __be32 loc_addr,
+ __be16 rmt_port, __be32 rmt_addr,
+ int dif)
+{
+ struct sock *sk, *result;
+ struct hlist_nulls_node *node;
+ unsigned short hnum = ntohs(loc_port);
+ unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask);
+ struct udp_hslot *hslot = &udp_table.hash[slot];
+
+ rcu_read_lock();
+begin:
+ count = 0;
+ result = NULL;
+ sk_nulls_for_each_rcu(sk, node, &hslot->head) {
+ if (__udp_is_mcast_sock(net, sk,
+ loc_port, loc_addr,
+ rmt_port, rmt_addr,
+ dif, hnum)) {
+ result = sk;
+ ++count;
+ }
+ }
+ /*
+ * if the nulls value we got at the end of this lookup is
+ * not the expected one, we must restart lookup.
+ * We probably met an item that was moved to another chain.
+ */
+ if (get_nulls_value(node) != slot)
+ goto begin;
+
+ if (result) {
+ if (count != 1 ||
+ unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
+ result = NULL;
+ else if (unlikely(!__udp_is_mcast_sock(net, result,
+ loc_port, loc_addr,
+ rmt_port, rmt_addr,
+ dif, hnum))) {
+ sock_put(result);
+ result = NULL;
+ }
+ }
+ rcu_read_unlock();
+ return result;
+}
+
+/* For unicast we should only early demux connected sockets or we can
+ * break forwarding setups. The chains here can be long so only check
+ * if the first socket is an exact match and if not move on.
+ */
+static struct sock *__udp4_lib_demux_lookup(struct net *net,
+ __be16 loc_port, __be32 loc_addr,
+ __be16 rmt_port, __be32 rmt_addr,
+ int dif)
+{
+ struct sock *sk, *result;
+ struct hlist_nulls_node *node;
+ unsigned short hnum = ntohs(loc_port);
+ unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
+ unsigned int slot2 = hash2 & udp_table.mask;
+ struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
+ INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr)
+ const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
+
+ rcu_read_lock();
+ result = NULL;
+ udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
+ if (INET_MATCH(sk, net, acookie,
+ rmt_addr, loc_addr, ports, dif))
+ result = sk;
+ /* Only check first socket in chain */
+ break;
+ }
+
+ if (result) {
+ if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
+ result = NULL;
+ else if (unlikely(!INET_MATCH(sk, net, acookie,
+ rmt_addr, loc_addr,
+ ports, dif))) {
+ sock_put(result);
+ result = NULL;
+ }
+ }
+ rcu_read_unlock();
+ return result;
+}
+
+void udp_v4_early_demux(struct sk_buff *skb)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct udphdr *uh = udp_hdr(skb);
+ struct sock *sk;
+ struct dst_entry *dst;
+ struct net *net = dev_net(skb->dev);
+ int dif = skb->dev->ifindex;
+
+ /* validate the packet */
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
+ return;
+
+ if (skb->pkt_type == PACKET_BROADCAST ||
+ skb->pkt_type == PACKET_MULTICAST)
+ sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
+ uh->source, iph->saddr, dif);
+ else if (skb->pkt_type == PACKET_HOST)
+ sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
+ uh->source, iph->saddr, dif);
+ else
+ return;
+
+ if (!sk)
+ return;
+
+ skb->sk = sk;
+ skb->destructor = sock_edemux;
+ dst = sk->sk_rx_dst;
+
+ if (dst)
+ dst = dst_check(dst, 0);
+ if (dst)
+ skb_dst_set_noref(skb, dst);
+}
+
int udp_rcv(struct sk_buff *skb)
{
return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP);
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index b5663c3..31b1815 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -16,13 +16,13 @@
#include <net/xfrm.h>
/* Informational hook. The decap is still done here. */
-static struct xfrm_tunnel __rcu *rcv_notify_handlers __read_mostly;
+static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly;
static DEFINE_MUTEX(xfrm4_mode_tunnel_input_mutex);
-int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel *handler)
+int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler)
{
- struct xfrm_tunnel __rcu **pprev;
- struct xfrm_tunnel *t;
+ struct xfrm_tunnel_notifier __rcu **pprev;
+ struct xfrm_tunnel_notifier *t;
int ret = -EEXIST;
int priority = handler->priority;
@@ -50,10 +50,10 @@ err:
}
EXPORT_SYMBOL_GPL(xfrm4_mode_tunnel_input_register);
-int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel *handler)
+int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler)
{
- struct xfrm_tunnel __rcu **pprev;
- struct xfrm_tunnel *t;
+ struct xfrm_tunnel_notifier __rcu **pprev;
+ struct xfrm_tunnel_notifier *t;
int ret = -ENOENT;
mutex_lock(&xfrm4_mode_tunnel_input_mutex);
@@ -134,7 +134,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct xfrm_tunnel *handler;
+ struct xfrm_tunnel_notifier *handler;
int err = -EINVAL;
if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d6ff126..cd3fb30 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1499,6 +1499,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
return false;
}
+/* Compares an address/prefix_len with addresses on device @dev.
+ * If one is found it returns true.
+ */
+bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
+ const unsigned int prefix_len, struct net_device *dev)
+{
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+ bool ret = false;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
+ if (ret)
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(ipv6_chk_custom_prefix);
+
int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
{
struct inet6_dev *idev;
@@ -2193,43 +2220,21 @@ ok:
else
stored_lft = 0;
if (!update_lft && !create && stored_lft) {
- if (valid_lft > MIN_VALID_LIFETIME ||
- valid_lft > stored_lft)
- update_lft = 1;
- else if (stored_lft <= MIN_VALID_LIFETIME) {
- /* valid_lft <= stored_lft is always true */
- /*
- * RFC 4862 Section 5.5.3e:
- * "Note that the preferred lifetime of
- * the corresponding address is always
- * reset to the Preferred Lifetime in
- * the received Prefix Information
- * option, regardless of whether the
- * valid lifetime is also reset or
- * ignored."
- *
- * So if the preferred lifetime in
- * this advertisement is different
- * than what we have stored, but the
- * valid lifetime is invalid, just
- * reset prefered_lft.
- *
- * We must set the valid lifetime
- * to the stored lifetime since we'll
- * be updating the timestamp below,
- * else we'll set it back to the
- * minimum.
- */
- if (prefered_lft != ifp->prefered_lft) {
- valid_lft = stored_lft;
- update_lft = 1;
- }
- } else {
- valid_lft = MIN_VALID_LIFETIME;
- if (valid_lft < prefered_lft)
- prefered_lft = valid_lft;
- update_lft = 1;
- }
+ const u32 minimum_lft = min(
+ stored_lft, (u32)MIN_VALID_LIFETIME);
+ valid_lft = max(valid_lft, minimum_lft);
+
+ /* RFC4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of the
+ * corresponding address is always reset to
+ * the Preferred Lifetime in the received
+ * Prefix Information option, regardless of
+ * whether the valid lifetime is also reset or
+ * ignored."
+ *
+ * So we should always update prefered_lft here.
+ */
+ update_lft = 1;
}
if (update_lft) {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 7c96100..a2cb07c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -364,7 +364,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_rcv_saddr = v4addr;
inet->inet_saddr = v4addr;
- np->rcv_saddr = addr->sin6_addr;
+ sk->sk_v6_rcv_saddr = addr->sin6_addr;
if (!(addr_type & IPV6_ADDR_MULTICAST))
np->saddr = addr->sin6_addr;
@@ -461,14 +461,14 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
peer == 1)
return -ENOTCONN;
sin->sin6_port = inet->inet_dport;
- sin->sin6_addr = np->daddr;
+ sin->sin6_addr = sk->sk_v6_daddr;
if (np->sndflow)
sin->sin6_flowinfo = np->flow_label;
} else {
- if (ipv6_addr_any(&np->rcv_saddr))
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
sin->sin6_addr = np->saddr;
else
- sin->sin6_addr = np->rcv_saddr;
+ sin->sin6_addr = sk->sk_v6_rcv_saddr;
sin->sin6_port = inet->inet_sport;
}
@@ -655,7 +655,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = sk->sk_protocol;
- fl6.daddr = np->daddr;
+ fl6.daddr = sk->sk_v6_daddr;
fl6.saddr = np->saddr;
fl6.flowlabel = np->flow_label;
fl6.flowi6_oif = sk->sk_bound_dev_if;
@@ -1028,52 +1028,4 @@ out_unregister_tcp_proto:
}
module_init(inet6_init);
-static void __exit inet6_exit(void)
-{
- if (disable_ipv6_mod)
- return;
-
- /* First of all disallow new sockets creation. */
- sock_unregister(PF_INET6);
- /* Disallow any further netlink messages */
- rtnl_unregister_all(PF_INET6);
-
- udpv6_exit();
- udplitev6_exit();
- tcpv6_exit();
-
- /* Cleanup code parts. */
- ipv6_packet_cleanup();
- ipv6_frag_exit();
- ipv6_exthdrs_exit();
- addrconf_cleanup();
- ip6_flowlabel_cleanup();
- ndisc_late_cleanup();
- ip6_route_cleanup();
-#ifdef CONFIG_PROC_FS
-
- /* Cleanup code parts. */
- if6_proc_exit();
- ipv6_misc_proc_exit();
- udplite6_proc_exit();
- raw6_proc_exit();
-#endif
- ipv6_netfilter_fini();
- ipv6_stub = NULL;
- igmp6_cleanup();
- ndisc_cleanup();
- ip6_mr_cleanup();
- icmpv6_cleanup();
- rawv6_exit();
-
- unregister_pernet_subsys(&inet6_net_ops);
- proto_unregister(&rawv6_prot);
- proto_unregister(&udplitev6_prot);
- proto_unregister(&udpv6_prot);
- proto_unregister(&tcpv6_prot);
-
- rcu_barrier(); /* Wait for completion of call_rcu()'s */
-}
-module_exit(inet6_exit);
-
MODULE_ALIAS_NETPROTO(PF_INET6);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 48b6bd2..a454b0f 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -107,16 +107,16 @@ ipv4_connected:
if (err)
goto out;
- ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr);
+ ipv6_addr_set_v4mapped(inet->inet_daddr, &sk->sk_v6_daddr);
if (ipv6_addr_any(&np->saddr) ||
ipv6_mapped_addr_any(&np->saddr))
ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
- if (ipv6_addr_any(&np->rcv_saddr) ||
- ipv6_mapped_addr_any(&np->rcv_saddr)) {
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) ||
+ ipv6_mapped_addr_any(&sk->sk_v6_rcv_saddr)) {
ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
- &np->rcv_saddr);
+ &sk->sk_v6_rcv_saddr);
if (sk->sk_prot->rehash)
sk->sk_prot->rehash(sk);
}
@@ -145,7 +145,7 @@ ipv4_connected:
}
}
- np->daddr = *daddr;
+ sk->sk_v6_daddr = *daddr;
np->flow_label = fl6.flowlabel;
inet->inet_dport = usin->sin6_port;
@@ -156,7 +156,7 @@ ipv4_connected:
*/
fl6.flowi6_proto = sk->sk_protocol;
- fl6.daddr = np->daddr;
+ fl6.daddr = sk->sk_v6_daddr;
fl6.saddr = np->saddr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
@@ -183,16 +183,16 @@ ipv4_connected:
if (ipv6_addr_any(&np->saddr))
np->saddr = fl6.saddr;
- if (ipv6_addr_any(&np->rcv_saddr)) {
- np->rcv_saddr = fl6.saddr;
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ sk->sk_v6_rcv_saddr = fl6.saddr;
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
if (sk->sk_prot->rehash)
sk->sk_prot->rehash(sk);
}
ip6_dst_store(sk, dst,
- ipv6_addr_equal(&fl6.daddr, &np->daddr) ?
- &np->daddr : NULL,
+ ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
+ &sk->sk_v6_daddr : NULL,
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
&np->saddr :
@@ -883,11 +883,10 @@ EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl);
void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
__u16 srcp, __u16 destp, int bucket)
{
- struct ipv6_pinfo *np = inet6_sk(sp);
const struct in6_addr *dest, *src;
- dest = &np->daddr;
- src = &np->rcv_saddr;
+ dest = &sp->sk_v6_daddr;
+ src = &sp->sk_v6_rcv_saddr;
seq_printf(seq,
"%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
"%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n",
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index e4311cb..77bb8af 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -70,20 +70,20 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
struct flowi6 *fl6,
const struct request_sock *req)
{
- struct inet6_request_sock *treq = inet6_rsk(req);
+ struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *final_p, final;
struct dst_entry *dst;
memset(fl6, 0, sizeof(*fl6));
fl6->flowi6_proto = IPPROTO_TCP;
- fl6->daddr = treq->rmt_addr;
+ fl6->daddr = ireq->ir_v6_rmt_addr;
final_p = fl6_update_dst(fl6, np->opt, &final);
- fl6->saddr = treq->loc_addr;
- fl6->flowi6_oif = treq->iif;
+ fl6->saddr = ireq->ir_v6_loc_addr;
+ fl6->flowi6_oif = ireq->ir_iif;
fl6->flowi6_mark = sk->sk_mark;
- fl6->fl6_dport = inet_rsk(req)->rmt_port;
- fl6->fl6_sport = inet_rsk(req)->loc_port;
+ fl6->fl6_dport = ireq->ir_rmt_port;
+ fl6->fl6_sport = htons(ireq->ir_num);
security_req_classify_flow(req, flowi6_to_flowi(fl6));
dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
@@ -129,13 +129,13 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk,
lopt->nr_table_entries)];
(req = *prev) != NULL;
prev = &req->dl_next) {
- const struct inet6_request_sock *treq = inet6_rsk(req);
+ const struct inet_request_sock *ireq = inet_rsk(req);
- if (inet_rsk(req)->rmt_port == rport &&
+ if (ireq->ir_rmt_port == rport &&
req->rsk_ops->family == AF_INET6 &&
- ipv6_addr_equal(&treq->rmt_addr, raddr) &&
- ipv6_addr_equal(&treq->loc_addr, laddr) &&
- (!treq->iif || treq->iif == iif)) {
+ ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) &&
+ ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) &&
+ (!ireq->ir_iif || ireq->ir_iif == iif)) {
WARN_ON(req->sk != NULL);
*prevp = prev;
return req;
@@ -153,8 +153,8 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
- const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr,
- inet_rsk(req)->rmt_port,
+ const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr,
+ inet_rsk(req)->ir_rmt_port,
lopt->hash_rnd, lopt->nr_table_entries);
reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
@@ -165,11 +165,10 @@ EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
sin6->sin6_family = AF_INET6;
- sin6->sin6_addr = np->daddr;
+ sin6->sin6_addr = sk->sk_v6_daddr;
sin6->sin6_port = inet_sk(sk)->inet_dport;
/* We do not store received flowlabel for TCP */
sin6->sin6_flowinfo = 0;
@@ -203,7 +202,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
memset(fl6, 0, sizeof(*fl6));
fl6->flowi6_proto = sk->sk_protocol;
- fl6->daddr = np->daddr;
+ fl6->daddr = sk->sk_v6_daddr;
fl6->saddr = np->saddr;
fl6->flowlabel = np->flow_label;
IP6_ECN_flow_xmit(sk, fl6->flowlabel);
@@ -245,7 +244,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
skb_dst_set_noref(skb, dst);
/* Restore final destination back after routing done */
- fl6.daddr = np->daddr;
+ fl6.daddr = sk->sk_v6_daddr;
res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
rcu_read_unlock();
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 32b4a16..842d833 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -89,43 +89,22 @@ begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
continue;
- if (likely(INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
- if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
- goto begintw;
- if (unlikely(!INET6_MATCH(sk, net, saddr, daddr,
- ports, dif))) {
- sock_put(sk);
- goto begin;
- }
- goto out;
- }
- }
- if (get_nulls_value(node) != slot)
- goto begin;
-
-begintw:
- /* Must check for a TIME_WAIT'er before going to listener hash. */
- sk_nulls_for_each_rcu(sk, node, &head->twchain) {
- if (sk->sk_hash != hash)
+ if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
continue;
- if (likely(INET6_TW_MATCH(sk, net, saddr, daddr,
- ports, dif))) {
- if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
- sk = NULL;
- goto out;
- }
- if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr,
- ports, dif))) {
- sock_put(sk);
- goto begintw;
- }
+ if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
goto out;
+
+ if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
+ sock_gen_put(sk);
+ goto begin;
}
+ goto found;
}
if (get_nulls_value(node) != slot)
- goto begintw;
- sk = NULL;
+ goto begin;
out:
+ sk = NULL;
+found:
rcu_read_unlock();
return sk;
}
@@ -140,11 +119,10 @@ static inline int compute_score(struct sock *sk, struct net *net,
if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
sk->sk_family == PF_INET6) {
- const struct ipv6_pinfo *np = inet6_sk(sk);
score = 1;
- if (!ipv6_addr_any(&np->rcv_saddr)) {
- if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return -1;
score++;
}
@@ -236,9 +214,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_sock *inet = inet_sk(sk);
- const struct ipv6_pinfo *np = inet6_sk(sk);
- const struct in6_addr *daddr = &np->rcv_saddr;
- const struct in6_addr *saddr = &np->daddr;
+ const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
+ const struct in6_addr *saddr = &sk->sk_v6_daddr;
const int dif = sk->sk_bound_dev_if;
const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
struct net *net = sock_net(sk);
@@ -248,38 +225,28 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2;
const struct hlist_nulls_node *node;
- struct inet_timewait_sock *tw;
+ struct inet_timewait_sock *tw = NULL;
int twrefcnt = 0;
spin_lock(lock);
- /* Check TIME-WAIT sockets first. */
- sk_nulls_for_each(sk2, node, &head->twchain) {
- if (sk2->sk_hash != hash)
- continue;
-
- if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr,
- ports, dif))) {
- tw = inet_twsk(sk2);
- if (twsk_unique(sk, sk2, twp))
- goto unique;
- else
- goto not_unique;
- }
- }
- tw = NULL;
-
- /* And established part... */
sk_nulls_for_each(sk2, node, &head->chain) {
if (sk2->sk_hash != hash)
continue;
- if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif)))
+
+ if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) {
+ if (sk2->sk_state == TCP_TIME_WAIT) {
+ tw = inet_twsk(sk2);
+ if (twsk_unique(sk, sk2, twp))
+ break;
+ }
goto not_unique;
+ }
}
-unique:
/* Must record num and sport now. Otherwise we will see
- * in hash table socket with a funny identity. */
+ * in hash table socket with a funny identity.
+ */
inet->inet_num = lport;
inet->inet_sport = htons(lport);
sk->sk_hash = hash;
@@ -312,9 +279,9 @@ not_unique:
static inline u32 inet6_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
- const struct ipv6_pinfo *np = inet6_sk(sk);
- return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
- np->daddr.s6_addr32,
+
+ return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
inet->inet_dport);
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 5bec666..5550a81 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1529,25 +1529,6 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
fib6_walk(&c.w);
}
-void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg),
- int prune, void *arg)
-{
- struct fib6_table *table;
- struct hlist_head *head;
- unsigned int h;
-
- rcu_read_lock();
- for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
- head = &net->ipv6.fib_table_hash[h];
- hlist_for_each_entry_rcu(table, head, tb6_hlist) {
- read_lock_bh(&table->tb6_lock);
- fib6_clean_tree(net, &table->tb6_root,
- func, prune, arg);
- read_unlock_bh(&table->tb6_lock);
- }
- }
- rcu_read_unlock();
-}
void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
int prune, void *arg)
{
@@ -1782,3 +1763,189 @@ void fib6_gc_cleanup(void)
unregister_pernet_subsys(&fib6_net_ops);
kmem_cache_destroy(fib6_node_kmem);
}
+
+#ifdef CONFIG_PROC_FS
+
+struct ipv6_route_iter {
+ struct seq_net_private p;
+ struct fib6_walker_t w;
+ loff_t skip;
+ struct fib6_table *tbl;
+ __u32 sernum;
+};
+
+static int ipv6_route_seq_show(struct seq_file *seq, void *v)
+{
+ struct rt6_info *rt = v;
+ struct ipv6_route_iter *iter = seq->private;
+
+ seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ seq_printf(seq, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
+#else
+ seq_puts(seq, "00000000000000000000000000000000 00 ");
+#endif
+ if (rt->rt6i_flags & RTF_GATEWAY)
+ seq_printf(seq, "%pi6", &rt->rt6i_gateway);
+ else
+ seq_puts(seq, "00000000000000000000000000000000");
+
+ seq_printf(seq, " %08x %08x %08x %08x %8s\n",
+ rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
+ rt->dst.__use, rt->rt6i_flags,
+ rt->dst.dev ? rt->dst.dev->name : "");
+ iter->w.leaf = NULL;
+ return 0;
+}
+
+static int ipv6_route_yield(struct fib6_walker_t *w)
+{
+ struct ipv6_route_iter *iter = w->args;
+
+ if (!iter->skip)
+ return 1;
+
+ do {
+ iter->w.leaf = iter->w.leaf->dst.rt6_next;
+ iter->skip--;
+ if (!iter->skip && iter->w.leaf)
+ return 1;
+ } while (iter->w.leaf);
+
+ return 0;
+}
+
+static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter)
+{
+ memset(&iter->w, 0, sizeof(iter->w));
+ iter->w.func = ipv6_route_yield;
+ iter->w.root = &iter->tbl->tb6_root;
+ iter->w.state = FWS_INIT;
+ iter->w.node = iter->w.root;
+ iter->w.args = iter;
+ iter->sernum = iter->w.root->fn_sernum;
+ INIT_LIST_HEAD(&iter->w.lh);
+ fib6_walker_link(&iter->w);
+}
+
+static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
+ struct net *net)
+{
+ unsigned int h;
+ struct hlist_node *node;
+
+ if (tbl) {
+ h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1;
+ node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist));
+ } else {
+ h = 0;
+ node = NULL;
+ }
+
+ while (!node && h < FIB6_TABLE_HASHSZ) {
+ node = rcu_dereference_bh(
+ hlist_first_rcu(&net->ipv6.fib_table_hash[h++]));
+ }
+ return hlist_entry_safe(node, struct fib6_table, tb6_hlist);
+}
+
+static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
+{
+ if (iter->sernum != iter->w.root->fn_sernum) {
+ iter->sernum = iter->w.root->fn_sernum;
+ iter->w.state = FWS_INIT;
+ iter->w.node = iter->w.root;
+ WARN_ON(iter->w.skip);
+ iter->w.skip = iter->w.count;
+ }
+}
+
+static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ int r;
+ struct rt6_info *n;
+ struct net *net = seq_file_net(seq);
+ struct ipv6_route_iter *iter = seq->private;
+
+ if (!v)
+ goto iter_table;
+
+ n = ((struct rt6_info *)v)->dst.rt6_next;
+ if (n) {
+ ++*pos;
+ return n;
+ }
+
+iter_table:
+ ipv6_route_check_sernum(iter);
+ read_lock(&iter->tbl->tb6_lock);
+ r = fib6_walk_continue(&iter->w);
+ read_unlock(&iter->tbl->tb6_lock);
+ if (r > 0) {
+ if (v)
+ ++*pos;
+ return iter->w.leaf;
+ } else if (r < 0) {
+ fib6_walker_unlink(&iter->w);
+ return NULL;
+ }
+ fib6_walker_unlink(&iter->w);
+
+ iter->tbl = ipv6_route_seq_next_table(iter->tbl, net);
+ if (!iter->tbl)
+ return NULL;
+
+ ipv6_route_seq_setup_walk(iter);
+ goto iter_table;
+}
+
+static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU_BH)
+{
+ struct net *net = seq_file_net(seq);
+ struct ipv6_route_iter *iter = seq->private;
+
+ rcu_read_lock_bh();
+ iter->tbl = ipv6_route_seq_next_table(NULL, net);
+ iter->skip = *pos;
+
+ if (iter->tbl) {
+ ipv6_route_seq_setup_walk(iter);
+ return ipv6_route_seq_next(seq, NULL, pos);
+ } else {
+ return NULL;
+ }
+}
+
+static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
+{
+ struct fib6_walker_t *w = &iter->w;
+ return w->node && !(w->state == FWS_U && w->node == w->root);
+}
+
+static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU_BH)
+{
+ struct ipv6_route_iter *iter = seq->private;
+
+ if (ipv6_route_iter_active(iter))
+ fib6_walker_unlink(&iter->w);
+
+ rcu_read_unlock_bh();
+}
+
+static const struct seq_operations ipv6_route_seq_ops = {
+ .start = ipv6_route_seq_start,
+ .next = ipv6_route_seq_next,
+ .stop = ipv6_route_seq_stop,
+ .show = ipv6_route_seq_show
+};
+
+int ipv6_route_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &ipv6_route_seq_ops,
+ sizeof(struct ipv6_route_iter));
+}
+
+#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 6b26e9f..1ef1fa2 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -618,7 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
struct ip6_tnl *tunnel = netdev_priv(dev);
struct net_device *tdev; /* Device to other host */
struct ipv6hdr *ipv6h; /* Our new IP header */
- unsigned int max_headroom; /* The extra header space needed */
+ unsigned int max_headroom = 0; /* The extra header space needed */
int gre_hlen;
struct ipv6_tel_txoption opt;
int mtu;
@@ -693,7 +693,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
- max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
+ max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
@@ -1173,9 +1173,8 @@ done:
static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
- struct ip6_tnl *tunnel = netdev_priv(dev);
if (new_mtu < 68 ||
- new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
+ new_mtu > 0xFFF8 - dev->hard_header_len)
return -EINVAL;
dev->mtu = new_mtu;
return 0;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3a692d5..a54c45c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1015,6 +1015,8 @@ static inline int ip6_ufo_append_data(struct sock *sk,
* udp datagram
*/
if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
+ struct frag_hdr fhdr;
+
skb = sock_alloc_send_skb(sk,
hh_len + fragheaderlen + transhdrlen + 20,
(flags & MSG_DONTWAIT), &err);
@@ -1036,12 +1038,6 @@ static inline int ip6_ufo_append_data(struct sock *sk,
skb->protocol = htons(ETH_P_IPV6);
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
- }
-
- err = skb_append_datato_frags(sk,skb, getfrag, from,
- (length - transhdrlen));
- if (!err) {
- struct frag_hdr fhdr;
/* Specify the length of each IPv6 datagram fragment.
* It has to be a multiple of 8.
@@ -1052,15 +1048,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
ipv6_select_ident(&fhdr, rt);
skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
__skb_queue_tail(&sk->sk_write_queue, skb);
-
- return 0;
}
- /* There is not enough support do UPD LSO,
- * so follow normal path
- */
- kfree_skb(skb);
- return err;
+ return skb_append_datato_frags(sk, skb, getfrag, from,
+ (length - transhdrlen));
}
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
@@ -1227,27 +1218,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
* --yoshfuji
*/
- cork->length += length;
- if (length > mtu) {
- int proto = sk->sk_protocol;
- if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
- ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
- return -EMSGSIZE;
- }
-
- if (proto == IPPROTO_UDP &&
- (rt->dst.dev->features & NETIF_F_UFO)) {
+ if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
+ sk->sk_protocol == IPPROTO_RAW)) {
+ ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
+ return -EMSGSIZE;
+ }
- err = ip6_ufo_append_data(sk, getfrag, from, length,
- hh_len, fragheaderlen,
- transhdrlen, mtu, flags, rt);
- if (err)
- goto error;
- return 0;
- }
+ skb = skb_peek_tail(&sk->sk_write_queue);
+ cork->length += length;
+ if (((length > mtu) ||
+ (skb && skb_is_gso(skb))) &&
+ (sk->sk_protocol == IPPROTO_UDP) &&
+ (rt->dst.dev->features & NETIF_F_UFO)) {
+ err = ip6_ufo_append_data(sk, getfrag, from, length,
+ hh_len, fragheaderlen,
+ transhdrlen, mtu, flags, rt);
+ if (err)
+ goto error;
+ return 0;
}
- if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+ if (!skb)
goto alloc_new_skb;
while (length > 0) {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 2d8f482..583b77e 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1430,9 +1430,17 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
static int
ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
{
- if (new_mtu < IPV6_MIN_MTU) {
- return -EINVAL;
+ struct ip6_tnl *tnl = netdev_priv(dev);
+
+ if (tnl->parms.proto == IPPROTO_IPIP) {
+ if (new_mtu < 68)
+ return -EINVAL;
+ } else {
+ if (new_mtu < IPV6_MIN_MTU)
+ return -EINVAL;
}
+ if (new_mtu > 0xFFF8 - dev->hard_header_len)
+ return -EINVAL;
dev->mtu = new_mtu;
return 0;
}
@@ -1731,8 +1739,6 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
}
}
- t = rtnl_dereference(ip6n->tnls_wc[0]);
- unregister_netdevice_queue(t->dev, &list);
unregister_netdevice_many(&list);
}
@@ -1752,6 +1758,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
if (!ip6n->fb_tnl_dev)
goto err_alloc_dev;
dev_net_set(ip6n->fb_tnl_dev, net);
+ ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index d1e2e8e..4919a8e 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -174,7 +174,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
}
if (ipv6_only_sock(sk) ||
- !ipv6_addr_v4mapped(&np->daddr)) {
+ !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
retv = -EADDRNOTAVAIL;
break;
}
@@ -1011,7 +1011,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
struct in6_pktinfo src_info;
src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
np->sticky_pktinfo.ipi6_ifindex;
- src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr;
+ src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr;
put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxhlim) {
@@ -1026,7 +1026,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
struct in6_pktinfo src_info;
src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
np->sticky_pktinfo.ipi6_ifindex;
- src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr;
+ src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr :
+ np->sticky_pktinfo.ipi6_addr;
put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxohlim) {
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 096cd67..d18f9f9 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2034,7 +2034,7 @@ static void mld_dad_timer_expire(unsigned long data)
if (idev->mc_dad_count)
mld_dad_start_timer(idev, idev->mc_maxdelay);
}
- __in6_dev_put(idev);
+ in6_dev_put(idev);
}
static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
@@ -2379,7 +2379,7 @@ static void mld_gq_timer_expire(unsigned long data)
idev->mc_gq_running = 0;
mld_send_report(idev, NULL);
- __in6_dev_put(idev);
+ in6_dev_put(idev);
}
static void mld_ifc_timer_expire(unsigned long data)
@@ -2392,7 +2392,7 @@ static void mld_ifc_timer_expire(unsigned long data)
if (idev->mc_ifc_count)
mld_ifc_start_timer(idev, idev->mc_maxdelay);
}
- __in6_dev_put(idev);
+ in6_dev_put(idev);
}
static void mld_ifc_event(struct inet6_dev *idev)
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 19cfea8..2748b04 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -282,7 +282,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
if (th == NULL)
return NF_DROP;
- synproxy_parse_options(skb, par->thoff, th, &opts);
+ if (!synproxy_parse_options(skb, par->thoff, th, &opts))
+ return NF_DROP;
if (th->syn && !(th->ack || th->fin || th->rst)) {
/* Initial SYN from client */
@@ -372,7 +373,8 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
/* fall through */
case TCP_CONNTRACK_SYN_SENT:
- synproxy_parse_options(skb, thoff, th, &opts);
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
if (!th->syn && th->ack &&
CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
@@ -395,7 +397,9 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
if (!th->syn || !th->ack)
break;
- synproxy_parse_options(skb, thoff, th, &opts);
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy->tsoff = opts.tsval - synproxy->its;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index d6e4dd8..54b75ea 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -297,9 +297,9 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
struct nf_conn *ct;
- tuple.src.u3.in6 = inet6->rcv_saddr;
+ tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
tuple.src.u.tcp.port = inet->inet_sport;
- tuple.dst.u3.in6 = inet6->daddr;
+ tuple.dst.u3.in6 = sk->sk_v6_daddr;
tuple.dst.u.tcp.port = inet->inet_dport;
tuple.dst.protonum = sk->sk_protocol;
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 18f19df..8815e31 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -116,7 +116,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
- daddr = &np->daddr;
+ daddr = &sk->sk_v6_daddr;
}
if (!iif)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 58916bb..3c00842 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -77,20 +77,19 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
sk_for_each_from(sk)
if (inet_sk(sk)->inet_num == num) {
- struct ipv6_pinfo *np = inet6_sk(sk);
if (!net_eq(sock_net(sk), net))
continue;
- if (!ipv6_addr_any(&np->daddr) &&
- !ipv6_addr_equal(&np->daddr, rmt_addr))
+ if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+ !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
continue;
if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
continue;
- if (!ipv6_addr_any(&np->rcv_saddr)) {
- if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ if (ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
goto found;
if (is_multicast &&
inet6_mc_check(sk, loc_addr, rmt_addr))
@@ -302,7 +301,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
- np->rcv_saddr = addr->sin6_addr;
+ sk->sk_v6_rcv_saddr = addr->sin6_addr;
if (!(addr_type & IPV6_ADDR_MULTICAST))
np->saddr = addr->sin6_addr;
err = 0;
@@ -335,8 +334,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
ip6_sk_update_pmtu(skb, sk, info);
harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
}
- if (type == NDISC_REDIRECT)
+ if (type == NDISC_REDIRECT) {
ip6_sk_redirect(skb, sk);
+ return;
+ }
if (np->recverr) {
u8 *payload = skb->data;
if (!inet->hdrincl)
@@ -802,8 +803,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
* sk->sk_dst_cache.
*/
if (sk->sk_state == TCP_ESTABLISHED &&
- ipv6_addr_equal(daddr, &np->daddr))
- daddr = &np->daddr;
+ ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
+ daddr = &sk->sk_v6_daddr;
if (addr_len >= sizeof(struct sockaddr_in6) &&
sin6->sin6_scope_id &&
@@ -814,7 +815,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
return -EDESTADDRREQ;
proto = inet->inet_num;
- daddr = &np->daddr;
+ daddr = &sk->sk_v6_daddr;
fl6.flowlabel = np->flow_label;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c979dd9..c3130ff 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1137,7 +1137,6 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif;
fl6.flowi6_mark = mark;
- fl6.flowi6_flags = 0;
fl6.daddr = iph->daddr;
fl6.saddr = iph->saddr;
fl6.flowlabel = ip6_flowinfo(iph);
@@ -1236,7 +1235,6 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif;
fl6.flowi6_mark = mark;
- fl6.flowi6_flags = 0;
fl6.daddr = iph->daddr;
fl6.saddr = iph->saddr;
fl6.flowlabel = ip6_flowinfo(iph);
@@ -1258,7 +1256,6 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif;
fl6.flowi6_mark = mark;
- fl6.flowi6_flags = 0;
fl6.daddr = msg->dest;
fl6.saddr = iph->daddr;
@@ -2800,56 +2797,12 @@ static int ip6_route_dev_notify(struct notifier_block *this,
#ifdef CONFIG_PROC_FS
-struct rt6_proc_arg
-{
- char *buffer;
- int offset;
- int length;
- int skip;
- int len;
-};
-
-static int rt6_info_route(struct rt6_info *rt, void *p_arg)
-{
- struct seq_file *m = p_arg;
-
- seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
-
-#ifdef CONFIG_IPV6_SUBTREES
- seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
-#else
- seq_puts(m, "00000000000000000000000000000000 00 ");
-#endif
- if (rt->rt6i_flags & RTF_GATEWAY) {
- seq_printf(m, "%pi6", &rt->rt6i_gateway);
- } else {
- seq_puts(m, "00000000000000000000000000000000");
- }
- seq_printf(m, " %08x %08x %08x %08x %8s\n",
- rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
- rt->dst.__use, rt->rt6i_flags,
- rt->dst.dev ? rt->dst.dev->name : "");
- return 0;
-}
-
-static int ipv6_route_show(struct seq_file *m, void *v)
-{
- struct net *net = (struct net *)m->private;
- fib6_clean_all_ro(net, rt6_info_route, 0, m);
- return 0;
-}
-
-static int ipv6_route_open(struct inode *inode, struct file *file)
-{
- return single_open_net(inode, file, ipv6_route_show);
-}
-
static const struct file_operations ipv6_route_proc_fops = {
.owner = THIS_MODULE,
.open = ipv6_route_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release_net,
+ .release = seq_release_net,
};
static int rt6_stats_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 7ee5cb9..1926945 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -566,6 +566,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
return false;
}
+/* Checks if an address matches an address on the tunnel interface.
+ * Used to detect the NAT of proto 41 packets and let them pass spoofing test.
+ * Long story:
+ * This function is called after we considered the packet as spoofed
+ * in is_spoofed_6rd.
+ * We may have a router that is doing NAT for proto 41 packets
+ * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb
+ * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd
+ * function will return true, dropping the packet.
+ * But, we can still check if is spoofed against the IP
+ * addresses associated with the interface.
+ */
+static bool only_dnatted(const struct ip_tunnel *tunnel,
+ const struct in6_addr *v6dst)
+{
+ int prefix_len;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ prefix_len = tunnel->ip6rd.prefixlen + 32
+ - tunnel->ip6rd.relay_prefixlen;
+#else
+ prefix_len = 48;
+#endif
+ return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev);
+}
+
+/* Returns true if a packet is spoofed */
+static bool packet_is_spoofed(struct sk_buff *skb,
+ const struct iphdr *iph,
+ struct ip_tunnel *tunnel)
+{
+ const struct ipv6hdr *ipv6h;
+
+ if (tunnel->dev->priv_flags & IFF_ISATAP) {
+ if (!isatap_chksrc(skb, iph, tunnel))
+ return true;
+
+ return false;
+ }
+
+ if (tunnel->dev->flags & IFF_POINTOPOINT)
+ return false;
+
+ ipv6h = ipv6_hdr(skb);
+
+ if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) {
+ net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+ &iph->saddr, &ipv6h->saddr,
+ &iph->daddr, &ipv6h->daddr);
+ return true;
+ }
+
+ if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr)))
+ return false;
+
+ if (only_dnatted(tunnel, &ipv6h->daddr))
+ return false;
+
+ net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+ &iph->saddr, &ipv6h->saddr,
+ &iph->daddr, &ipv6h->daddr);
+ return true;
+}
+
static int ipip6_rcv(struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
@@ -586,19 +650,9 @@ static int ipip6_rcv(struct sk_buff *skb)
IPCB(skb)->flags = 0;
skb->protocol = htons(ETH_P_IPV6);
- if (tunnel->dev->priv_flags & IFF_ISATAP) {
- if (!isatap_chksrc(skb, iph, tunnel)) {
- tunnel->dev->stats.rx_errors++;
- goto out;
- }
- } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) {
- if (is_spoofed_6rd(tunnel, iph->saddr,
- &ipv6_hdr(skb)->saddr) ||
- is_spoofed_6rd(tunnel, iph->daddr,
- &ipv6_hdr(skb)->daddr)) {
- tunnel->dev->stats.rx_errors++;
- goto out;
- }
+ if (packet_is_spoofed(skb, iph, tunnel)) {
+ tunnel->dev->stats.rx_errors++;
+ goto out;
}
__skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
@@ -748,7 +802,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
if (neigh == NULL) {
- net_dbg_ratelimited("sit: nexthop == NULL\n");
+ net_dbg_ratelimited("nexthop == NULL\n");
goto tx_error;
}
@@ -777,7 +831,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
if (neigh == NULL) {
- net_dbg_ratelimited("sit: nexthop == NULL\n");
+ net_dbg_ratelimited("nexthop == NULL\n");
goto tx_error;
}
@@ -1612,6 +1666,7 @@ static int __net_init sit_init_net(struct net *net)
goto err_alloc_dev;
}
dev_net_set(sitn->fb_tunnel_dev, net);
+ sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops;
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
@@ -1646,7 +1701,6 @@ static void __net_exit sit_exit_net(struct net *net)
rtnl_lock();
sit_destroy_tunnels(sitn, &list);
- unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
}
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bf63ac8..d04d3f1d 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -24,26 +24,21 @@
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
-/* Table must be sorted. */
+/* RFC 2460, Section 8.3:
+ * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..]
+ *
+ * Due to IPV6_MIN_MTU=1280 the lowest possible MSS is 1220, which allows
+ * using higher values than ipv4 tcp syncookies.
+ * The other values are chosen based on ethernet (1500 and 9k MTU), plus
+ * one that accounts for common encap (PPPoe) overhead. Table must be sorted.
+ */
static __u16 const msstab[] = {
- 64,
- 512,
- 536,
- 1280 - 60,
+ 1280 - 60, /* IPV6_MIN_MTU - 60 */
1480 - 60,
1500 - 60,
- 4460 - 60,
9000 - 60,
};
-/*
- * This (misnamed) value is the age of syncookie which is permitted.
- * Its ideal value should be dependent on TCP_TIMEOUT_INIT and
- * sysctl_tcp_retries1. It's a rather complicated formula (exponential
- * backoff) to compute at runtime so it's currently hardcoded here.
- */
-#define COUNTER_TRIES 4
-
static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst)
@@ -86,8 +81,9 @@ static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *dadd
static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr,
const struct in6_addr *daddr,
__be16 sport, __be16 dport, __u32 sseq,
- __u32 count, __u32 data)
+ __u32 data)
{
+ u32 count = tcp_cookie_time();
return (cookie_hash(saddr, daddr, sport, dport, 0, 0) +
sseq + (count << COOKIEBITS) +
((cookie_hash(saddr, daddr, sport, dport, count, 1) + data)
@@ -96,15 +92,14 @@ static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr,
static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr,
const struct in6_addr *daddr, __be16 sport,
- __be16 dport, __u32 sseq, __u32 count,
- __u32 maxdiff)
+ __be16 dport, __u32 sseq)
{
- __u32 diff;
+ __u32 diff, count = tcp_cookie_time();
cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq;
diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS);
- if (diff >= maxdiff)
+ if (diff >= MAX_SYNCOOKIE_AGE)
return (__u32)-1;
return (cookie -
@@ -125,8 +120,7 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
*mssp = msstab[mssind];
return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source,
- th->dest, ntohl(th->seq),
- jiffies / (HZ * 60), mssind);
+ th->dest, ntohl(th->seq), mssind);
}
EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence);
@@ -146,8 +140,7 @@ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
{
__u32 seq = ntohl(th->seq) - 1;
__u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
- th->source, th->dest, seq,
- jiffies / (HZ * 60), COUNTER_TRIES);
+ th->source, th->dest, seq);
return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
}
@@ -157,7 +150,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
{
struct tcp_options_received tcp_opt;
struct inet_request_sock *ireq;
- struct inet6_request_sock *ireq6;
struct tcp_request_sock *treq;
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -194,7 +186,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ireq = inet_rsk(req);
- ireq6 = inet6_rsk(req);
treq = tcp_rsk(req);
treq->listener = NULL;
@@ -202,22 +193,22 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out_free;
req->mss = mss;
- ireq->rmt_port = th->source;
- ireq->loc_port = th->dest;
- ireq6->rmt_addr = ipv6_hdr(skb)->saddr;
- ireq6->loc_addr = ipv6_hdr(skb)->daddr;
+ ireq->ir_rmt_port = th->source;
+ ireq->ir_num = ntohs(th->dest);
+ ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+ ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
atomic_inc(&skb->users);
- ireq6->pktopts = skb;
+ ireq->pktopts = skb;
}
- ireq6->iif = sk->sk_bound_dev_if;
+ ireq->ir_iif = sk->sk_bound_dev_if;
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
- ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
- ireq6->iif = inet6_iif(skb);
+ ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ ireq->ir_iif = inet6_iif(skb);
req->expires = 0UL;
req->num_retrans = 0;
@@ -241,12 +232,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
struct flowi6 fl6;
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
- fl6.daddr = ireq6->rmt_addr;
+ fl6.daddr = ireq->ir_v6_rmt_addr;
final_p = fl6_update_dst(&fl6, np->opt, &final);
- fl6.saddr = ireq6->loc_addr;
+ fl6.saddr = ireq->ir_v6_loc_addr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_dport = inet_rsk(req)->rmt_port;
+ fl6.fl6_dport = ireq->ir_rmt_port;
fl6.fl6_sport = inet_sk(sk)->inet_sport;
security_req_classify_flow(req, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5c71501..b996ee2 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -192,13 +192,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
}
if (tp->rx_opt.ts_recent_stamp &&
- !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
+ !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
tp->write_seq = 0;
}
- np->daddr = usin->sin6_addr;
+ sk->sk_v6_daddr = usin->sin6_addr;
np->flow_label = fl6.flowlabel;
/*
@@ -237,17 +237,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
} else {
ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
- &np->rcv_saddr);
+ &sk->sk_v6_rcv_saddr);
}
return err;
}
- if (!ipv6_addr_any(&np->rcv_saddr))
- saddr = &np->rcv_saddr;
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+ saddr = &sk->sk_v6_rcv_saddr;
fl6.flowi6_proto = IPPROTO_TCP;
- fl6.daddr = np->daddr;
+ fl6.daddr = sk->sk_v6_daddr;
fl6.saddr = saddr ? *saddr : np->saddr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
@@ -266,7 +266,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (saddr == NULL) {
saddr = &fl6.saddr;
- np->rcv_saddr = *saddr;
+ sk->sk_v6_rcv_saddr = *saddr;
}
/* set the source address */
@@ -279,7 +279,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
rt = (struct rt6_info *) dst;
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp &&
- ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
+ ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr))
tcp_fetch_timewait_stamp(sk, dst);
icsk->icsk_ext_hdr_len = 0;
@@ -298,7 +298,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (!tp->write_seq && likely(!tp->repair))
tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
- np->daddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
inet->inet_sport,
inet->inet_dport);
@@ -465,7 +465,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
u16 queue_mapping)
{
- struct inet6_request_sock *treq = inet6_rsk(req);
+ struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff * skb;
int err = -ENOMEM;
@@ -477,9 +477,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
skb = tcp_make_synack(sk, dst, req, NULL);
if (skb) {
- __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
+ __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
+ &ireq->ir_v6_rmt_addr);
- fl6->daddr = treq->rmt_addr;
+ fl6->daddr = ireq->ir_v6_rmt_addr;
skb_set_queue_mapping(skb, queue_mapping);
err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
err = net_xmit_eval(err);
@@ -502,7 +503,7 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
- kfree_skb(inet6_rsk(req)->pktopts);
+ kfree_skb(inet_rsk(req)->pktopts);
}
#ifdef CONFIG_TCP_MD5SIG
@@ -515,13 +516,13 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
struct sock *addr_sk)
{
- return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
+ return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
}
static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
struct request_sock *req)
{
- return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
+ return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr);
}
static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
@@ -621,10 +622,10 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
if (sk) {
saddr = &inet6_sk(sk)->saddr;
- daddr = &inet6_sk(sk)->daddr;
+ daddr = &sk->sk_v6_daddr;
} else if (req) {
- saddr = &inet6_rsk(req)->loc_addr;
- daddr = &inet6_rsk(req)->rmt_addr;
+ saddr = &inet_rsk(req)->ir_v6_loc_addr;
+ daddr = &inet_rsk(req)->ir_v6_rmt_addr;
} else {
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
saddr = &ip6h->saddr;
@@ -949,7 +950,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct tcp_options_received tmp_opt;
struct request_sock *req;
- struct inet6_request_sock *treq;
+ struct inet_request_sock *ireq;
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
__u32 isn = TCP_SKB_CB(skb)->when;
@@ -994,25 +995,25 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb);
- treq = inet6_rsk(req);
- treq->rmt_addr = ipv6_hdr(skb)->saddr;
- treq->loc_addr = ipv6_hdr(skb)->daddr;
+ ireq = inet_rsk(req);
+ ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+ ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
if (!want_cookie || tmp_opt.tstamp_ok)
TCP_ECN_create_request(req, skb, sock_net(sk));
- treq->iif = sk->sk_bound_dev_if;
+ ireq->ir_iif = sk->sk_bound_dev_if;
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
- ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
- treq->iif = inet6_iif(skb);
+ ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ ireq->ir_iif = inet6_iif(skb);
if (!isn) {
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
atomic_inc(&skb->users);
- treq->pktopts = skb;
+ ireq->pktopts = skb;
}
if (want_cookie) {
@@ -1051,7 +1052,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
* to the moment of synflood.
*/
LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
- &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
+ &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source));
goto drop_and_release;
}
@@ -1086,7 +1087,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst)
{
- struct inet6_request_sock *treq;
+ struct inet_request_sock *ireq;
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct tcp6_sock *newtcp6sk;
struct inet_sock *newinet;
@@ -1116,11 +1117,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
- ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
+ ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
- newnp->rcv_saddr = newnp->saddr;
+ newsk->sk_v6_rcv_saddr = newnp->saddr;
inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
@@ -1151,7 +1152,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
return newsk;
}
- treq = inet6_rsk(req);
+ ireq = inet_rsk(req);
if (sk_acceptq_is_full(sk))
goto out_overflow;
@@ -1185,10 +1186,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
- newnp->daddr = treq->rmt_addr;
- newnp->saddr = treq->loc_addr;
- newnp->rcv_saddr = treq->loc_addr;
- newsk->sk_bound_dev_if = treq->iif;
+ newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
+ newnp->saddr = ireq->ir_v6_loc_addr;
+ newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
+ newsk->sk_bound_dev_if = ireq->ir_iif;
/* Now IPv6 options...
@@ -1203,11 +1204,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
/* Clone pktoptions received with SYN */
newnp->pktoptions = NULL;
- if (treq->pktopts != NULL) {
- newnp->pktoptions = skb_clone(treq->pktopts,
+ if (ireq->pktopts != NULL) {
+ newnp->pktoptions = skb_clone(ireq->pktopts,
sk_gfp_atomic(sk, GFP_ATOMIC));
- consume_skb(treq->pktopts);
- treq->pktopts = NULL;
+ consume_skb(ireq->pktopts);
+ ireq->pktopts = NULL;
if (newnp->pktoptions)
skb_set_owner_r(newnp->pktoptions, newsk);
}
@@ -1244,13 +1245,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
#ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
- if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
+ if ((key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr)) != NULL) {
/* We're using one, so create a matching key
* on the newsk structure. If we fail to get
* memory, then we end up not copying the key
* across. Shucks.
*/
- tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
+ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
AF_INET6, key->key, key->keylen,
sk_gfp_atomic(sk, GFP_ATOMIC));
}
@@ -1722,8 +1723,8 @@ static void get_openreq6(struct seq_file *seq,
const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
{
int ttd = req->expires - jiffies;
- const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
- const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
+ const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
+ const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
if (ttd < 0)
ttd = 0;
@@ -1734,10 +1735,10 @@ static void get_openreq6(struct seq_file *seq,
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3],
- ntohs(inet_rsk(req)->loc_port),
+ inet_rsk(req)->ir_num,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3],
- ntohs(inet_rsk(req)->rmt_port),
+ ntohs(inet_rsk(req)->ir_rmt_port),
TCP_SYN_RECV,
0,0, /* could print option size, but that is af dependent. */
1, /* timers active (only the expire timer) */
@@ -1758,10 +1759,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
const struct inet_sock *inet = inet_sk(sp);
const struct tcp_sock *tp = tcp_sk(sp);
const struct inet_connection_sock *icsk = inet_csk(sp);
- const struct ipv6_pinfo *np = inet6_sk(sp);
- dest = &np->daddr;
- src = &np->rcv_saddr;
+ dest = &sp->sk_v6_daddr;
+ src = &sp->sk_v6_rcv_saddr;
destp = ntohs(inet->inet_dport);
srcp = ntohs(inet->inet_sport);
@@ -1810,11 +1810,10 @@ static void get_timewait6_sock(struct seq_file *seq,
{
const struct in6_addr *dest, *src;
__u16 destp, srcp;
- const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
- long delta = tw->tw_ttd - jiffies;
+ s32 delta = tw->tw_ttd - inet_tw_time_stamp();
- dest = &tw6->tw_v6_daddr;
- src = &tw6->tw_v6_rcv_saddr;
+ dest = &tw->tw_v6_daddr;
+ src = &tw->tw_v6_rcv_saddr;
destp = ntohs(tw->tw_dport);
srcp = ntohs(tw->tw_sport);
@@ -1834,6 +1833,7 @@ static void get_timewait6_sock(struct seq_file *seq,
static int tcp6_seq_show(struct seq_file *seq, void *v)
{
struct tcp_iter_state *st;
+ struct sock *sk = v;
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
@@ -1849,14 +1849,14 @@ static int tcp6_seq_show(struct seq_file *seq, void *v)
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
case TCP_SEQ_STATE_ESTABLISHED:
- get_tcp6_sock(seq, v, st->num);
+ if (sk->sk_state == TCP_TIME_WAIT)
+ get_timewait6_sock(seq, v, st->num);
+ else
+ get_tcp6_sock(seq, v, st->num);
break;
case TCP_SEQ_STATE_OPENREQ:
get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
break;
- case TCP_SEQ_STATE_TIME_WAIT:
- get_timewait6_sock(seq, v, st->num);
- break;
}
out:
return 0;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f405815..b496de1 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -55,20 +55,17 @@
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
{
- const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- __be32 sk1_rcv_saddr = sk_rcv_saddr(sk);
- __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
int sk_ipv6only = ipv6_only_sock(sk);
int sk2_ipv6only = inet_v6_ipv6only(sk2);
- int addr_type = ipv6_addr_type(sk_rcv_saddr6);
+ int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
/* if both are mapped, treat as IPv4 */
if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
return (!sk2_ipv6only &&
- (!sk1_rcv_saddr || !sk2_rcv_saddr ||
- sk1_rcv_saddr == sk2_rcv_saddr));
+ (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr ||
+ sk->sk_rcv_saddr == sk2->sk_rcv_saddr));
if (addr_type2 == IPV6_ADDR_ANY &&
!(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
@@ -79,7 +76,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
return 1;
if (sk2_rcv_saddr6 &&
- ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
+ ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
return 1;
return 0;
@@ -107,7 +104,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
unsigned int hash2_nulladdr =
udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
unsigned int hash2_partial =
- udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0);
+ udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
/* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial;
@@ -117,7 +114,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
static void udp_v6_rehash(struct sock *sk)
{
u16 new_hash = udp6_portaddr_hash(sock_net(sk),
- &inet6_sk(sk)->rcv_saddr,
+ &sk->sk_v6_rcv_saddr,
inet_sk(sk)->inet_num);
udp_lib_rehash(sk, new_hash);
@@ -133,7 +130,6 @@ static inline int compute_score(struct sock *sk, struct net *net,
if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
sk->sk_family == PF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
score = 0;
@@ -142,13 +138,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score++;
}
- if (!ipv6_addr_any(&np->rcv_saddr)) {
- if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return -1;
score++;
}
- if (!ipv6_addr_any(&np->daddr)) {
- if (!ipv6_addr_equal(&np->daddr, saddr))
+ if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
return -1;
score++;
}
@@ -171,10 +167,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
sk->sk_family == PF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
- if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
return -1;
score = 0;
if (inet->inet_dport) {
@@ -182,8 +177,8 @@ static inline int compute_score2(struct sock *sk, struct net *net,
return -1;
score++;
}
- if (!ipv6_addr_any(&np->daddr)) {
- if (!ipv6_addr_equal(&np->daddr, saddr))
+ if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
return -1;
score++;
}
@@ -525,8 +520,10 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type == ICMPV6_PKT_TOOBIG)
ip6_sk_update_pmtu(skb, sk, info);
- if (type == NDISC_REDIRECT)
+ if (type == NDISC_REDIRECT) {
ip6_sk_redirect(skb, sk);
+ goto out;
+ }
np = inet6_sk(sk);
@@ -549,8 +546,10 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
int rc;
- if (!ipv6_addr_any(&inet6_sk(sk)->daddr))
+ if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
sock_rps_save_rxhash(sk, skb);
+ sk_mark_napi_id(sk, skb);
+ }
rc = sock_queue_rcv_skb(sk, skb);
if (rc < 0) {
@@ -688,20 +687,19 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
if (udp_sk(s)->udp_port_hash == num &&
s->sk_family == PF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(s);
if (inet->inet_dport) {
if (inet->inet_dport != rmt_port)
continue;
}
- if (!ipv6_addr_any(&np->daddr) &&
- !ipv6_addr_equal(&np->daddr, rmt_addr))
+ if (!ipv6_addr_any(&sk->sk_v6_daddr) &&
+ !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
continue;
if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
continue;
- if (!ipv6_addr_any(&np->rcv_saddr)) {
- if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))
continue;
}
if (!inet6_mc_check(s, loc_addr, rmt_addr))
@@ -844,7 +842,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (sk != NULL) {
int ret;
- sk_mark_napi_id(sk, skb);
ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
@@ -1062,7 +1059,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
} else if (!up->pending) {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
- daddr = &np->daddr;
+ daddr = &sk->sk_v6_daddr;
} else
daddr = NULL;
@@ -1132,8 +1129,8 @@ do_udp_sendmsg:
* sk->sk_dst_cache.
*/
if (sk->sk_state == TCP_ESTABLISHED &&
- ipv6_addr_equal(daddr, &np->daddr))
- daddr = &np->daddr;
+ ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
+ daddr = &sk->sk_v6_daddr;
if (addr_len >= sizeof(struct sockaddr_in6) &&
sin6->sin6_scope_id &&
@@ -1144,7 +1141,7 @@ do_udp_sendmsg:
return -EDESTADDRREQ;
fl6.fl6_dport = inet->inet_dport;
- daddr = &np->daddr;
+ daddr = &sk->sk_v6_daddr;
fl6.flowlabel = np->flow_label;
connected = 1;
}
@@ -1260,8 +1257,8 @@ do_append_data:
if (dst) {
if (connected) {
ip6_dst_store(sk, dst,
- ipv6_addr_equal(&fl6.daddr, &np->daddr) ?
- &np->daddr : NULL,
+ ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
+ &sk->sk_v6_daddr : NULL,
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
&np->saddr :
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index feae495a..9af77d9 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -115,6 +115,11 @@ struct l2tp_net {
static void l2tp_session_set_header_len(struct l2tp_session *session, int version);
static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
+static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
+{
+ return sk->sk_user_data;
+}
+
static inline struct l2tp_net *l2tp_pernet(struct net *net)
{
BUG_ON(!net);
@@ -504,7 +509,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk,
return 0;
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6) {
+ if (sk->sk_family == PF_INET6 && !l2tp_tunnel(sk)->v4mapped) {
if (!uh->check) {
LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n");
return 1;
@@ -1128,7 +1133,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
/* Queue the packet to IP for output */
skb->local_df = 1;
#if IS_ENABLED(CONFIG_IPV6)
- if (skb->sk->sk_family == PF_INET6)
+ if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped)
error = inet6_csk_xmit(skb, NULL);
else
#endif
@@ -1176,7 +1181,7 @@ static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb,
!(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
__wsum csum = skb_checksum(skb, 0, udp_len, 0);
skb->ip_summed = CHECKSUM_UNNECESSARY;
- uh->check = csum_ipv6_magic(&np->saddr, &np->daddr, udp_len,
+ uh->check = csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr, udp_len,
IPPROTO_UDP, csum);
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
@@ -1184,7 +1189,7 @@ static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb,
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
- uh->check = ~csum_ipv6_magic(&np->saddr, &np->daddr,
+ uh->check = ~csum_ipv6_magic(&np->saddr, &sk->sk_v6_daddr,
udp_len, IPPROTO_UDP, 0);
}
}
@@ -1255,7 +1260,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
/* Calculate UDP checksum if configured to do so */
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6)
+ if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
l2tp_xmit_ipv6_csum(sk, skb, udp_len);
else
#endif
@@ -1304,10 +1309,9 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
*/
static void l2tp_tunnel_destruct(struct sock *sk)
{
- struct l2tp_tunnel *tunnel;
+ struct l2tp_tunnel *tunnel = l2tp_tunnel(sk);
struct l2tp_net *pn;
- tunnel = sk->sk_user_data;
if (tunnel == NULL)
goto end;
@@ -1675,7 +1679,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
}
/* Check if this socket has already been prepped */
- tunnel = (struct l2tp_tunnel *)sk->sk_user_data;
+ tunnel = l2tp_tunnel(sk);
if (tunnel != NULL) {
/* This socket has already been prepped */
err = -EBUSY;
@@ -1704,6 +1708,24 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
if (cfg != NULL)
tunnel->debug = cfg->debug;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == PF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (ipv6_addr_v4mapped(&np->saddr) &&
+ ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ tunnel->v4mapped = true;
+ inet->inet_saddr = np->saddr.s6_addr32[3];
+ inet->inet_rcv_saddr = sk->sk_v6_rcv_saddr.s6_addr32[3];
+ inet->inet_daddr = sk->sk_v6_daddr.s6_addr32[3];
+ } else {
+ tunnel->v4mapped = false;
+ }
+ }
+#endif
+
/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
tunnel->encap = encap;
if (encap == L2TP_ENCAPTYPE_UDP) {
@@ -1712,7 +1734,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy;
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6)
+ if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
udpv6_encap_enable();
else
#endif
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 66a559b..6f251cb 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -194,6 +194,9 @@ struct l2tp_tunnel {
struct sock *sock; /* Parent socket */
int fd; /* Parent fd, if tunnel socket
* was created by userspace */
+#if IS_ENABLED(CONFIG_IPV6)
+ bool v4mapped;
+#endif
struct work_struct del_work;
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 072d720..2d6760a 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -127,9 +127,10 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
#if IS_ENABLED(CONFIG_IPV6)
if (tunnel->sock->sk_family == AF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(tunnel->sock);
+ const struct ipv6_pinfo *np = inet6_sk(tunnel->sock);
+
seq_printf(m, " from %pI6c to %pI6c\n",
- &np->saddr, &np->daddr);
+ &np->saddr, &tunnel->sock->sk_v6_daddr);
} else
#endif
seq_printf(m, " from %pI4 to %pI4\n",
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index b8a6039..cfd6530 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -63,7 +63,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net,
struct sock *sk;
sk_for_each_bound(sk, &l2tp_ip6_bind_table) {
- struct in6_addr *addr = inet6_rcv_saddr(sk);
+ const struct in6_addr *addr = inet6_rcv_saddr(sk);
struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk);
if (l2tp == NULL)
@@ -331,7 +331,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
rcu_read_unlock();
inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
- np->rcv_saddr = addr->l2tp_addr;
+ sk->sk_v6_rcv_saddr = addr->l2tp_addr;
np->saddr = addr->l2tp_addr;
l2tp_ip6_sk(sk)->conn_id = addr->l2tp_conn_id;
@@ -421,14 +421,14 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
if (!lsk->peer_conn_id)
return -ENOTCONN;
lsa->l2tp_conn_id = lsk->peer_conn_id;
- lsa->l2tp_addr = np->daddr;
+ lsa->l2tp_addr = sk->sk_v6_daddr;
if (np->sndflow)
lsa->l2tp_flowinfo = np->flow_label;
} else {
- if (ipv6_addr_any(&np->rcv_saddr))
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
lsa->l2tp_addr = np->saddr;
else
- lsa->l2tp_addr = np->rcv_saddr;
+ lsa->l2tp_addr = sk->sk_v6_rcv_saddr;
lsa->l2tp_conn_id = lsk->conn_id;
}
@@ -537,8 +537,8 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk,
* sk->sk_dst_cache.
*/
if (sk->sk_state == TCP_ESTABLISHED &&
- ipv6_addr_equal(daddr, &np->daddr))
- daddr = &np->daddr;
+ ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
+ daddr = &sk->sk_v6_daddr;
if (addr_len >= sizeof(struct sockaddr_in6) &&
lsa->l2tp_scope_id &&
@@ -548,7 +548,7 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk,
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
- daddr = &np->daddr;
+ daddr = &sk->sk_v6_daddr;
fl6.flowlabel = np->flow_label;
}
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 0825ff2..be446d5 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -306,8 +306,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
if (np) {
if (nla_put(skb, L2TP_ATTR_IP6_SADDR, sizeof(np->saddr),
&np->saddr) ||
- nla_put(skb, L2TP_ATTR_IP6_DADDR, sizeof(np->daddr),
- &np->daddr))
+ nla_put(skb, L2TP_ATTR_IP6_DADDR, sizeof(sk->sk_v6_daddr),
+ &sk->sk_v6_daddr))
goto nla_put_failure;
} else
#endif
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 5ebee2d..f0a7ada 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -906,8 +906,8 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
#if IS_ENABLED(CONFIG_IPV6)
} else if ((tunnel->version == 2) &&
(tunnel->sock->sk_family == AF_INET6)) {
- struct ipv6_pinfo *np = inet6_sk(tunnel->sock);
struct sockaddr_pppol2tpin6 sp;
+
len = sizeof(sp);
memset(&sp, 0, len);
sp.sa_family = AF_PPPOX;
@@ -920,13 +920,13 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
sp.pppol2tp.d_session = session->peer_session_id;
sp.pppol2tp.addr.sin6_family = AF_INET6;
sp.pppol2tp.addr.sin6_port = inet->inet_dport;
- memcpy(&sp.pppol2tp.addr.sin6_addr, &np->daddr,
- sizeof(np->daddr));
+ memcpy(&sp.pppol2tp.addr.sin6_addr, &tunnel->sock->sk_v6_daddr,
+ sizeof(tunnel->sock->sk_v6_daddr));
memcpy(uaddr, &sp, len);
} else if ((tunnel->version == 3) &&
(tunnel->sock->sk_family == AF_INET6)) {
- struct ipv6_pinfo *np = inet6_sk(tunnel->sock);
struct sockaddr_pppol2tpv3in6 sp;
+
len = sizeof(sp);
memset(&sp, 0, len);
sp.sa_family = AF_PPPOX;
@@ -939,8 +939,8 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
sp.pppol2tp.d_session = session->peer_session_id;
sp.pppol2tp.addr.sin6_family = AF_INET6;
sp.pppol2tp.addr.sin6_port = inet->inet_dport;
- memcpy(&sp.pppol2tp.addr.sin6_addr, &np->daddr,
- sizeof(np->daddr));
+ memcpy(&sp.pppol2tp.addr.sin6_addr, &tunnel->sock->sk_v6_daddr,
+ sizeof(tunnel->sock->sk_v6_daddr));
memcpy(uaddr, &sp, len);
#endif
} else if (tunnel->version == 3) {
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 54563ad..355cc3b 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -154,6 +154,7 @@ static void lapb_t1timer_expiry(unsigned long param)
} else {
lapb->n2count++;
lapb_requeue_frames(lapb);
+ lapb_kick(lapb);
}
break;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index f0247a4..0011ac8 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3073,6 +3073,9 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
case NL80211_IFTYPE_ADHOC:
if (!bssid)
return 0;
+ if (ether_addr_equal(sdata->vif.addr, hdr->addr2) ||
+ ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2))
+ return 0;
if (ieee80211_is_beacon(hdr->frame_control)) {
return 1;
} else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) {
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 5d62c58..d4cee98 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -77,13 +77,13 @@ DECLARE_EVENT_CLASS(local_sdata_addr_evt,
TP_STRUCT__entry(
LOCAL_ENTRY
VIF_ENTRY
- __array(char, addr, 6)
+ __array(char, addr, ETH_ALEN)
),
TP_fast_assign(
LOCAL_ASSIGN;
VIF_ASSIGN;
- memcpy(__entry->addr, sdata->vif.addr, 6);
+ memcpy(__entry->addr, sdata->vif.addr, ETH_ALEN);
),
TP_printk(
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 550a688..1220f5a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2101,7 +2101,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
- int rate, skip, shift;
+ int rate, shift;
u8 i, exrates, *pos;
u32 basic_rates = sdata->vif.bss_conf.basic_rates;
u32 rate_flags;
@@ -2129,14 +2129,11 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
pos = skb_put(skb, exrates + 2);
*pos++ = WLAN_EID_EXT_SUPP_RATES;
*pos++ = exrates;
- skip = 0;
for (i = 8; i < sband->n_bitrates; i++) {
u8 basic = 0;
if ((rate_flags & sband->bitrates[i].flags)
!= rate_flags)
continue;
- if (skip++ < 8)
- continue;
if (need_basic && basic_rates & BIT(i))
basic = 0x80;
rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
index ba36c28..a2d6263 100644
--- a/net/netfilter/ipset/Kconfig
+++ b/net/netfilter/ipset/Kconfig
@@ -1,7 +1,7 @@
menuconfig IP_SET
tristate "IP set support"
depends on INET && NETFILTER
- depends on NETFILTER_NETLINK
+ select NETFILTER_NETLINK
help
This option adds IP set support to the kernel.
In order to define and use the sets, you need the userspace utility
@@ -90,6 +90,15 @@ config IP_SET_HASH_IPPORTNET
To compile it as a module, choose M here. If unsure, say N.
+config IP_SET_HASH_NETPORTNET
+ tristate "hash:net,port,net set support"
+ depends on IP_SET
+ help
+ This option adds the hash:net,port,net set type support, by which
+ one can store two IPv4/IPv6 subnets, and a protocol/port in a set.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP_SET_HASH_NET
tristate "hash:net set support"
depends on IP_SET
@@ -99,6 +108,15 @@ config IP_SET_HASH_NET
To compile it as a module, choose M here. If unsure, say N.
+config IP_SET_HASH_NETNET
+ tristate "hash:net,net set support"
+ depends on IP_SET
+ help
+ This option adds the hash:net,net set type support, by which
+ one can store IPv4/IPv6 network address/prefix pairs in a set.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP_SET_HASH_NETPORT
tristate "hash:net,port set support"
depends on IP_SET
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
index 6e965ec..44b2d38 100644
--- a/net/netfilter/ipset/Makefile
+++ b/net/netfilter/ipset/Makefile
@@ -20,6 +20,8 @@ obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o
obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o
obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o
obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o
+obj-$(CONFIG_IP_SET_HASH_NETNET) += ip_set_hash_netnet.o
+obj-$(CONFIG_IP_SET_HASH_NETPORTNET) += ip_set_hash_netportnet.o
# list types
obj-$(CONFIG_IP_SET_LIST_SET) += ip_set_list_set.o
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 2524337..a13e15b 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -8,38 +8,32 @@
#ifndef __IP_SET_BITMAP_IP_GEN_H
#define __IP_SET_BITMAP_IP_GEN_H
-#define CONCAT(a, b) a##b
-#define TOKEN(a,b) CONCAT(a, b)
-
-#define mtype_do_test TOKEN(MTYPE, _do_test)
-#define mtype_gc_test TOKEN(MTYPE, _gc_test)
-#define mtype_is_filled TOKEN(MTYPE, _is_filled)
-#define mtype_do_add TOKEN(MTYPE, _do_add)
-#define mtype_do_del TOKEN(MTYPE, _do_del)
-#define mtype_do_list TOKEN(MTYPE, _do_list)
-#define mtype_do_head TOKEN(MTYPE, _do_head)
-#define mtype_adt_elem TOKEN(MTYPE, _adt_elem)
-#define mtype_add_timeout TOKEN(MTYPE, _add_timeout)
-#define mtype_gc_init TOKEN(MTYPE, _gc_init)
-#define mtype_kadt TOKEN(MTYPE, _kadt)
-#define mtype_uadt TOKEN(MTYPE, _uadt)
-#define mtype_destroy TOKEN(MTYPE, _destroy)
-#define mtype_flush TOKEN(MTYPE, _flush)
-#define mtype_head TOKEN(MTYPE, _head)
-#define mtype_same_set TOKEN(MTYPE, _same_set)
-#define mtype_elem TOKEN(MTYPE, _elem)
-#define mtype_test TOKEN(MTYPE, _test)
-#define mtype_add TOKEN(MTYPE, _add)
-#define mtype_del TOKEN(MTYPE, _del)
-#define mtype_list TOKEN(MTYPE, _list)
-#define mtype_gc TOKEN(MTYPE, _gc)
+#define mtype_do_test IPSET_TOKEN(MTYPE, _do_test)
+#define mtype_gc_test IPSET_TOKEN(MTYPE, _gc_test)
+#define mtype_is_filled IPSET_TOKEN(MTYPE, _is_filled)
+#define mtype_do_add IPSET_TOKEN(MTYPE, _do_add)
+#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup)
+#define mtype_do_del IPSET_TOKEN(MTYPE, _do_del)
+#define mtype_do_list IPSET_TOKEN(MTYPE, _do_list)
+#define mtype_do_head IPSET_TOKEN(MTYPE, _do_head)
+#define mtype_adt_elem IPSET_TOKEN(MTYPE, _adt_elem)
+#define mtype_add_timeout IPSET_TOKEN(MTYPE, _add_timeout)
+#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt)
+#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt)
+#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy)
+#define mtype_flush IPSET_TOKEN(MTYPE, _flush)
+#define mtype_head IPSET_TOKEN(MTYPE, _head)
+#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set)
+#define mtype_elem IPSET_TOKEN(MTYPE, _elem)
+#define mtype_test IPSET_TOKEN(MTYPE, _test)
+#define mtype_add IPSET_TOKEN(MTYPE, _add)
+#define mtype_del IPSET_TOKEN(MTYPE, _del)
+#define mtype_list IPSET_TOKEN(MTYPE, _list)
+#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype MTYPE
-#define ext_timeout(e, m) \
- (unsigned long *)((e) + (m)->offset[IPSET_OFFSET_TIMEOUT])
-#define ext_counter(e, m) \
- (struct ip_set_counter *)((e) + (m)->offset[IPSET_OFFSET_COUNTER])
-#define get_ext(map, id) ((map)->extensions + (map)->dsize * (id))
+#define get_ext(set, map, id) ((map)->extensions + (set)->dsize * (id))
static void
mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
@@ -49,11 +43,22 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
init_timer(&map->gc);
map->gc.data = (unsigned long) set;
map->gc.function = gc;
- map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+ map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&map->gc);
}
static void
+mtype_ext_cleanup(struct ip_set *set)
+{
+ struct mtype *map = set->data;
+ u32 id;
+
+ for (id = 0; id < map->elements; id++)
+ if (test_bit(id, map->members))
+ ip_set_ext_destroy(set, get_ext(set, map, id));
+}
+
+static void
mtype_destroy(struct ip_set *set)
{
struct mtype *map = set->data;
@@ -62,8 +67,11 @@ mtype_destroy(struct ip_set *set)
del_timer_sync(&map->gc);
ip_set_free(map->members);
- if (map->dsize)
+ if (set->dsize) {
+ if (set->extensions & IPSET_EXT_DESTROY)
+ mtype_ext_cleanup(set);
ip_set_free(map->extensions);
+ }
kfree(map);
set->data = NULL;
@@ -74,6 +82,8 @@ mtype_flush(struct ip_set *set)
{
struct mtype *map = set->data;
+ if (set->extensions & IPSET_EXT_DESTROY)
+ mtype_ext_cleanup(set);
memset(map->members, 0, map->memsize);
}
@@ -91,12 +101,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
htonl(sizeof(*map) +
map->memsize +
- map->dsize * map->elements)) ||
- (SET_WITH_TIMEOUT(set) &&
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) ||
- (SET_WITH_COUNTER(set) &&
- nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
- htonl(IPSET_FLAG_WITH_COUNTERS))))
+ set->dsize * map->elements)))
+ goto nla_put_failure;
+ if (unlikely(ip_set_put_flags(skb, set)))
goto nla_put_failure;
ipset_nest_end(skb, nested);
@@ -111,16 +118,16 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
{
struct mtype *map = set->data;
const struct mtype_adt_elem *e = value;
- void *x = get_ext(map, e->id);
- int ret = mtype_do_test(e, map);
+ void *x = get_ext(set, map, e->id);
+ int ret = mtype_do_test(e, map, set->dsize);
if (ret <= 0)
return ret;
if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(x, map)))
+ ip_set_timeout_expired(ext_timeout(x, set)))
return 0;
if (SET_WITH_COUNTER(set))
- ip_set_update_counter(ext_counter(x, map), ext, mext, flags);
+ ip_set_update_counter(ext_counter(x, set), ext, mext, flags);
return 1;
}
@@ -130,26 +137,30 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
{
struct mtype *map = set->data;
const struct mtype_adt_elem *e = value;
- void *x = get_ext(map, e->id);
- int ret = mtype_do_add(e, map, flags);
+ void *x = get_ext(set, map, e->id);
+ int ret = mtype_do_add(e, map, flags, set->dsize);
if (ret == IPSET_ADD_FAILED) {
if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(x, map)))
+ ip_set_timeout_expired(ext_timeout(x, set)))
ret = 0;
else if (!(flags & IPSET_FLAG_EXIST))
return -IPSET_ERR_EXIST;
+ /* Element is re-added, cleanup extensions */
+ ip_set_ext_destroy(set, x);
}
if (SET_WITH_TIMEOUT(set))
#ifdef IP_SET_BITMAP_STORED_TIMEOUT
- mtype_add_timeout(ext_timeout(x, map), e, ext, map, ret);
+ mtype_add_timeout(ext_timeout(x, set), e, ext, set, map, ret);
#else
- ip_set_timeout_set(ext_timeout(x, map), ext->timeout);
+ ip_set_timeout_set(ext_timeout(x, set), ext->timeout);
#endif
if (SET_WITH_COUNTER(set))
- ip_set_init_counter(ext_counter(x, map), ext);
+ ip_set_init_counter(ext_counter(x, set), ext);
+ if (SET_WITH_COMMENT(set))
+ ip_set_init_comment(ext_comment(x, set), ext);
return 0;
}
@@ -159,16 +170,27 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
{
struct mtype *map = set->data;
const struct mtype_adt_elem *e = value;
- const void *x = get_ext(map, e->id);
+ void *x = get_ext(set, map, e->id);
- if (mtype_do_del(e, map) ||
- (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(x, map))))
+ if (mtype_do_del(e, map))
+ return -IPSET_ERR_EXIST;
+
+ ip_set_ext_destroy(set, x);
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(x, set)))
return -IPSET_ERR_EXIST;
return 0;
}
+#ifndef IP_SET_BITMAP_STORED_TIMEOUT
+static inline bool
+mtype_is_filled(const struct mtype_elem *x)
+{
+ return true;
+}
+#endif
+
static int
mtype_list(const struct ip_set *set,
struct sk_buff *skb, struct netlink_callback *cb)
@@ -183,13 +205,13 @@ mtype_list(const struct ip_set *set,
return -EMSGSIZE;
for (; cb->args[2] < map->elements; cb->args[2]++) {
id = cb->args[2];
- x = get_ext(map, id);
+ x = get_ext(set, map, id);
if (!test_bit(id, map->members) ||
(SET_WITH_TIMEOUT(set) &&
#ifdef IP_SET_BITMAP_STORED_TIMEOUT
mtype_is_filled((const struct mtype_elem *) x) &&
#endif
- ip_set_timeout_expired(ext_timeout(x, map))))
+ ip_set_timeout_expired(ext_timeout(x, set))))
continue;
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested) {
@@ -199,23 +221,10 @@ mtype_list(const struct ip_set *set,
} else
goto nla_put_failure;
}
- if (mtype_do_list(skb, map, id))
+ if (mtype_do_list(skb, map, id, set->dsize))
goto nla_put_failure;
- if (SET_WITH_TIMEOUT(set)) {
-#ifdef IP_SET_BITMAP_STORED_TIMEOUT
- if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_stored(map, id,
- ext_timeout(x, map)))))
- goto nla_put_failure;
-#else
- if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(
- ext_timeout(x, map)))))
- goto nla_put_failure;
-#endif
- }
- if (SET_WITH_COUNTER(set) &&
- ip_set_put_counter(skb, ext_counter(x, map)))
+ if (ip_set_put_extensions(skb, set, x,
+ mtype_is_filled((const struct mtype_elem *) x)))
goto nla_put_failure;
ipset_nest_end(skb, nested);
}
@@ -228,11 +237,11 @@ mtype_list(const struct ip_set *set,
nla_put_failure:
nla_nest_cancel(skb, nested);
- ipset_nest_end(skb, adt);
if (unlikely(id == first)) {
cb->args[2] = 0;
return -EMSGSIZE;
}
+ ipset_nest_end(skb, adt);
return 0;
}
@@ -241,21 +250,23 @@ mtype_gc(unsigned long ul_set)
{
struct ip_set *set = (struct ip_set *) ul_set;
struct mtype *map = set->data;
- const void *x;
+ void *x;
u32 id;
/* We run parallel with other readers (test element)
* but adding/deleting new entries is locked out */
read_lock_bh(&set->lock);
for (id = 0; id < map->elements; id++)
- if (mtype_gc_test(id, map)) {
- x = get_ext(map, id);
- if (ip_set_timeout_expired(ext_timeout(x, map)))
+ if (mtype_gc_test(id, map, set->dsize)) {
+ x = get_ext(set, map, id);
+ if (ip_set_timeout_expired(ext_timeout(x, set))) {
clear_bit(id, map->members);
+ ip_set_ext_destroy(set, x);
+ }
}
read_unlock_bh(&set->lock);
- map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+ map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&map->gc);
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index f1a8128..6f1f9f4 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -25,12 +25,13 @@
#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_bitmap.h>
-#define REVISION_MIN 0
-#define REVISION_MAX 1 /* Counter support added */
+#define IPSET_TYPE_REV_MIN 0
+/* 1 Counter support added */
+#define IPSET_TYPE_REV_MAX 2 /* Comment support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_bitmap:ip");
#define MTYPE bitmap_ip
@@ -44,10 +45,7 @@ struct bitmap_ip {
u32 elements; /* number of max elements in the set */
u32 hosts; /* number of hosts in a subnet */
size_t memsize; /* members size */
- size_t dsize; /* extensions struct size */
- size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
u8 netmask; /* subnet netmask */
- u32 timeout; /* timeout parameter */
struct timer_list gc; /* garbage collection */
};
@@ -65,20 +63,21 @@ ip_to_id(const struct bitmap_ip *m, u32 ip)
/* Common functions */
static inline int
-bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
+bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e,
+ struct bitmap_ip *map, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
static inline int
-bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map)
+bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map, size_t dsize)
{
return !!test_bit(id, map->members);
}
static inline int
bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map,
- u32 flags)
+ u32 flags, size_t dsize)
{
return !!test_and_set_bit(e->id, map->members);
}
@@ -90,7 +89,8 @@ bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
}
static inline int
-bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id)
+bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id,
+ size_t dsize)
{
return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
htonl(map->first_ip + id * map->hosts));
@@ -113,7 +113,7 @@ bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb,
struct bitmap_ip *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct bitmap_ip_adt_elem e = { };
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
u32 ip;
ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
@@ -131,9 +131,9 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
{
struct bitmap_ip *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- u32 ip, ip_to;
+ u32 ip = 0, ip_to = 0;
struct bitmap_ip_adt_elem e = { };
- struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret = 0;
if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -200,7 +200,7 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
return x->first_ip == y->first_ip &&
x->last_ip == y->last_ip &&
x->netmask == y->netmask &&
- x->timeout == y->timeout &&
+ a->timeout == b->timeout &&
a->extensions == b->extensions;
}
@@ -209,25 +209,6 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
struct bitmap_ip_elem {
};
-/* Timeout variant */
-
-struct bitmap_ipt_elem {
- unsigned long timeout;
-};
-
-/* Plain variant with counter */
-
-struct bitmap_ipc_elem {
- struct ip_set_counter counter;
-};
-
-/* Timeout variant with counter */
-
-struct bitmap_ipct_elem {
- unsigned long timeout;
- struct ip_set_counter counter;
-};
-
#include "ip_set_bitmap_gen.h"
/* Create bitmap:ip type of sets */
@@ -240,8 +221,8 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
- if (map->dsize) {
- map->extensions = ip_set_alloc(map->dsize * elements);
+ if (set->dsize) {
+ map->extensions = ip_set_alloc(set->dsize * elements);
if (!map->extensions) {
kfree(map->members);
return false;
@@ -252,7 +233,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
map->elements = elements;
map->hosts = hosts;
map->netmask = netmask;
- map->timeout = IPSET_NO_TIMEOUT;
+ set->timeout = IPSET_NO_TIMEOUT;
set->data = map;
set->family = NFPROTO_IPV4;
@@ -261,10 +242,11 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
}
static int
-bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+ u32 flags)
{
struct bitmap_ip *map;
- u32 first_ip, last_ip, hosts, cadt_flags = 0;
+ u32 first_ip = 0, last_ip = 0, hosts;
u64 elements;
u8 netmask = 32;
int ret;
@@ -336,61 +318,15 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
map->memsize = bitmap_bytes(0, elements - 1);
set->variant = &bitmap_ip;
- if (tb[IPSET_ATTR_CADT_FLAGS])
- cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
- if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
- set->extensions |= IPSET_EXT_COUNTER;
- if (tb[IPSET_ATTR_TIMEOUT]) {
- map->dsize = sizeof(struct bitmap_ipct_elem);
- map->offset[IPSET_OFFSET_TIMEOUT] =
- offsetof(struct bitmap_ipct_elem, timeout);
- map->offset[IPSET_OFFSET_COUNTER] =
- offsetof(struct bitmap_ipct_elem, counter);
-
- if (!init_map_ip(set, map, first_ip, last_ip,
- elements, hosts, netmask)) {
- kfree(map);
- return -ENOMEM;
- }
-
- map->timeout = ip_set_timeout_uget(
- tb[IPSET_ATTR_TIMEOUT]);
- set->extensions |= IPSET_EXT_TIMEOUT;
-
- bitmap_ip_gc_init(set, bitmap_ip_gc);
- } else {
- map->dsize = sizeof(struct bitmap_ipc_elem);
- map->offset[IPSET_OFFSET_COUNTER] =
- offsetof(struct bitmap_ipc_elem, counter);
-
- if (!init_map_ip(set, map, first_ip, last_ip,
- elements, hosts, netmask)) {
- kfree(map);
- return -ENOMEM;
- }
- }
- } else if (tb[IPSET_ATTR_TIMEOUT]) {
- map->dsize = sizeof(struct bitmap_ipt_elem);
- map->offset[IPSET_OFFSET_TIMEOUT] =
- offsetof(struct bitmap_ipt_elem, timeout);
-
- if (!init_map_ip(set, map, first_ip, last_ip,
- elements, hosts, netmask)) {
- kfree(map);
- return -ENOMEM;
- }
-
- map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- set->extensions |= IPSET_EXT_TIMEOUT;
-
+ set->dsize = ip_set_elem_len(set, tb, 0);
+ if (!init_map_ip(set, map, first_ip, last_ip,
+ elements, hosts, netmask)) {
+ kfree(map);
+ return -ENOMEM;
+ }
+ if (tb[IPSET_ATTR_TIMEOUT]) {
+ set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
bitmap_ip_gc_init(set, bitmap_ip_gc);
- } else {
- map->dsize = 0;
- if (!init_map_ip(set, map, first_ip, last_ip,
- elements, hosts, netmask)) {
- kfree(map);
- return -ENOMEM;
- }
}
return 0;
}
@@ -401,8 +337,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
.features = IPSET_TYPE_IP,
.dimension = IPSET_DIM_ONE,
.family = NFPROTO_IPV4,
- .revision_min = REVISION_MIN,
- .revision_max = REVISION_MAX,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
.create = bitmap_ip_create,
.create_policy = {
[IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -420,6 +356,7 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 3b30e0b..740eabe 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -25,12 +25,13 @@
#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_bitmap.h>
-#define REVISION_MIN 0
-#define REVISION_MAX 1 /* Counter support added */
+#define IPSET_TYPE_REV_MIN 0
+/* 1 Counter support added */
+#define IPSET_TYPE_REV_MAX 2 /* Comment support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_bitmap:ip,mac");
#define MTYPE bitmap_ipmac
@@ -48,11 +49,8 @@ struct bitmap_ipmac {
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
- u32 timeout; /* timeout value */
- struct timer_list gc; /* garbage collector */
size_t memsize; /* members size */
- size_t dsize; /* size of element */
- size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
+ struct timer_list gc; /* garbage collector */
};
/* ADT structure for generic function args */
@@ -82,13 +80,13 @@ get_elem(void *extensions, u16 id, size_t dsize)
static inline int
bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
- const struct bitmap_ipmac *map)
+ const struct bitmap_ipmac *map, size_t dsize)
{
const struct bitmap_ipmac_elem *elem;
if (!test_bit(e->id, map->members))
return 0;
- elem = get_elem(map->extensions, e->id, map->dsize);
+ elem = get_elem(map->extensions, e->id, dsize);
if (elem->filled == MAC_FILLED)
return e->ether == NULL ||
ether_addr_equal(e->ether, elem->ether);
@@ -97,13 +95,13 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
}
static inline int
-bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map)
+bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
{
const struct bitmap_ipmac_elem *elem;
if (!test_bit(id, map->members))
return 0;
- elem = get_elem(map->extensions, id, map->dsize);
+ elem = get_elem(map->extensions, id, dsize);
/* Timer not started for the incomplete elements */
return elem->filled == MAC_FILLED;
}
@@ -117,13 +115,13 @@ bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem)
static inline int
bitmap_ipmac_add_timeout(unsigned long *timeout,
const struct bitmap_ipmac_adt_elem *e,
- const struct ip_set_ext *ext,
+ const struct ip_set_ext *ext, struct ip_set *set,
struct bitmap_ipmac *map, int mode)
{
u32 t = ext->timeout;
if (mode == IPSET_ADD_START_STORED_TIMEOUT) {
- if (t == map->timeout)
+ if (t == set->timeout)
/* Timeout was not specified, get stored one */
t = *timeout;
ip_set_timeout_set(timeout, t);
@@ -142,11 +140,11 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
static inline int
bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
- struct bitmap_ipmac *map, u32 flags)
+ struct bitmap_ipmac *map, u32 flags, size_t dsize)
{
struct bitmap_ipmac_elem *elem;
- elem = get_elem(map->extensions, e->id, map->dsize);
+ elem = get_elem(map->extensions, e->id, dsize);
if (test_and_set_bit(e->id, map->members)) {
if (elem->filled == MAC_FILLED) {
if (e->ether && (flags & IPSET_FLAG_EXIST))
@@ -178,22 +176,12 @@ bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e,
return !test_and_clear_bit(e->id, map->members);
}
-static inline unsigned long
-ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout)
-{
- const struct bitmap_ipmac_elem *elem =
- get_elem(map->extensions, id, map->dsize);
-
- return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) :
- *timeout;
-}
-
static inline int
bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
- u32 id)
+ u32 id, size_t dsize)
{
const struct bitmap_ipmac_elem *elem =
- get_elem(map->extensions, id, map->dsize);
+ get_elem(map->extensions, id, dsize);
return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
htonl(map->first_ip + id)) ||
@@ -216,7 +204,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
struct bitmap_ipmac *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct bitmap_ipmac_adt_elem e = {};
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
u32 ip;
/* MAC can be src only */
@@ -245,8 +233,8 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
const struct bitmap_ipmac *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct bitmap_ipmac_adt_elem e = {};
- struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
- u32 ip;
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 ip = 0;
int ret = 0;
if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -285,43 +273,12 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)
return x->first_ip == y->first_ip &&
x->last_ip == y->last_ip &&
- x->timeout == y->timeout &&
+ a->timeout == b->timeout &&
a->extensions == b->extensions;
}
/* Plain variant */
-/* Timeout variant */
-
-struct bitmap_ipmact_elem {
- struct {
- unsigned char ether[ETH_ALEN];
- unsigned char filled;
- } __attribute__ ((aligned));
- unsigned long timeout;
-};
-
-/* Plain variant with counter */
-
-struct bitmap_ipmacc_elem {
- struct {
- unsigned char ether[ETH_ALEN];
- unsigned char filled;
- } __attribute__ ((aligned));
- struct ip_set_counter counter;
-};
-
-/* Timeout variant with counter */
-
-struct bitmap_ipmacct_elem {
- struct {
- unsigned char ether[ETH_ALEN];
- unsigned char filled;
- } __attribute__ ((aligned));
- unsigned long timeout;
- struct ip_set_counter counter;
-};
-
#include "ip_set_bitmap_gen.h"
/* Create bitmap:ip,mac type of sets */
@@ -330,11 +287,11 @@ static bool
init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
u32 first_ip, u32 last_ip, u32 elements)
{
- map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize);
+ map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
- if (map->dsize) {
- map->extensions = ip_set_alloc(map->dsize * elements);
+ if (set->dsize) {
+ map->extensions = ip_set_alloc(set->dsize * elements);
if (!map->extensions) {
kfree(map->members);
return false;
@@ -343,7 +300,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
map->first_ip = first_ip;
map->last_ip = last_ip;
map->elements = elements;
- map->timeout = IPSET_NO_TIMEOUT;
+ set->timeout = IPSET_NO_TIMEOUT;
set->data = map;
set->family = NFPROTO_IPV4;
@@ -352,10 +309,10 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
}
static int
-bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
+bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
u32 flags)
{
- u32 first_ip, last_ip, cadt_flags = 0;
+ u32 first_ip = 0, last_ip = 0;
u64 elements;
struct bitmap_ipmac *map;
int ret;
@@ -399,57 +356,15 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
map->memsize = bitmap_bytes(0, elements - 1);
set->variant = &bitmap_ipmac;
- if (tb[IPSET_ATTR_CADT_FLAGS])
- cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
- if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
- set->extensions |= IPSET_EXT_COUNTER;
- if (tb[IPSET_ATTR_TIMEOUT]) {
- map->dsize = sizeof(struct bitmap_ipmacct_elem);
- map->offset[IPSET_OFFSET_TIMEOUT] =
- offsetof(struct bitmap_ipmacct_elem, timeout);
- map->offset[IPSET_OFFSET_COUNTER] =
- offsetof(struct bitmap_ipmacct_elem, counter);
-
- if (!init_map_ipmac(set, map, first_ip, last_ip,
- elements)) {
- kfree(map);
- return -ENOMEM;
- }
- map->timeout = ip_set_timeout_uget(
- tb[IPSET_ATTR_TIMEOUT]);
- set->extensions |= IPSET_EXT_TIMEOUT;
- bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
- } else {
- map->dsize = sizeof(struct bitmap_ipmacc_elem);
- map->offset[IPSET_OFFSET_COUNTER] =
- offsetof(struct bitmap_ipmacc_elem, counter);
-
- if (!init_map_ipmac(set, map, first_ip, last_ip,
- elements)) {
- kfree(map);
- return -ENOMEM;
- }
- }
- } else if (tb[IPSET_ATTR_TIMEOUT]) {
- map->dsize = sizeof(struct bitmap_ipmact_elem);
- map->offset[IPSET_OFFSET_TIMEOUT] =
- offsetof(struct bitmap_ipmact_elem, timeout);
-
- if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
- kfree(map);
- return -ENOMEM;
- }
- map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- set->extensions |= IPSET_EXT_TIMEOUT;
+ set->dsize = ip_set_elem_len(set, tb,
+ sizeof(struct bitmap_ipmac_elem));
+ if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
+ kfree(map);
+ return -ENOMEM;
+ }
+ if (tb[IPSET_ATTR_TIMEOUT]) {
+ set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
- } else {
- map->dsize = sizeof(struct bitmap_ipmac_elem);
-
- if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
- kfree(map);
- return -ENOMEM;
- }
- set->variant = &bitmap_ipmac;
}
return 0;
}
@@ -460,8 +375,8 @@ static struct ip_set_type bitmap_ipmac_type = {
.features = IPSET_TYPE_IP | IPSET_TYPE_MAC,
.dimension = IPSET_DIM_TWO,
.family = NFPROTO_IPV4,
- .revision_min = REVISION_MIN,
- .revision_max = REVISION_MAX,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
.create = bitmap_ipmac_create,
.create_policy = {
[IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -478,6 +393,7 @@ static struct ip_set_type bitmap_ipmac_type = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 8207d1f..e7603c5 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -20,12 +20,13 @@
#include <linux/netfilter/ipset/ip_set_bitmap.h>
#include <linux/netfilter/ipset/ip_set_getport.h>
-#define REVISION_MIN 0
-#define REVISION_MAX 1 /* Counter support added */
+#define IPSET_TYPE_REV_MIN 0
+/* 1 Counter support added */
+#define IPSET_TYPE_REV_MAX 2 /* Comment support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("bitmap:port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_bitmap:port");
#define MTYPE bitmap_port
@@ -38,9 +39,6 @@ struct bitmap_port {
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
size_t memsize; /* members size */
- size_t dsize; /* extensions struct size */
- size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
- u32 timeout; /* timeout parameter */
struct timer_list gc; /* garbage collection */
};
@@ -59,20 +57,20 @@ port_to_id(const struct bitmap_port *m, u16 port)
static inline int
bitmap_port_do_test(const struct bitmap_port_adt_elem *e,
- const struct bitmap_port *map)
+ const struct bitmap_port *map, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
static inline int
-bitmap_port_gc_test(u16 id, const struct bitmap_port *map)
+bitmap_port_gc_test(u16 id, const struct bitmap_port *map, size_t dsize)
{
return !!test_bit(id, map->members);
}
static inline int
bitmap_port_do_add(const struct bitmap_port_adt_elem *e,
- struct bitmap_port *map, u32 flags)
+ struct bitmap_port *map, u32 flags, size_t dsize)
{
return !!test_and_set_bit(e->id, map->members);
}
@@ -85,7 +83,8 @@ bitmap_port_do_del(const struct bitmap_port_adt_elem *e,
}
static inline int
-bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id)
+bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id,
+ size_t dsize)
{
return nla_put_net16(skb, IPSET_ATTR_PORT,
htons(map->first_port + id));
@@ -106,7 +105,7 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
struct bitmap_port *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct bitmap_port_adt_elem e = {};
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
__be16 __port;
u16 port = 0;
@@ -131,7 +130,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
struct bitmap_port *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct bitmap_port_adt_elem e = {};
- struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port; /* wraparound */
u16 port_to;
int ret = 0;
@@ -191,7 +190,7 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
return x->first_port == y->first_port &&
x->last_port == y->last_port &&
- x->timeout == y->timeout &&
+ a->timeout == b->timeout &&
a->extensions == b->extensions;
}
@@ -200,25 +199,6 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
struct bitmap_port_elem {
};
-/* Timeout variant */
-
-struct bitmap_portt_elem {
- unsigned long timeout;
-};
-
-/* Plain variant with counter */
-
-struct bitmap_portc_elem {
- struct ip_set_counter counter;
-};
-
-/* Timeout variant with counter */
-
-struct bitmap_portct_elem {
- unsigned long timeout;
- struct ip_set_counter counter;
-};
-
#include "ip_set_bitmap_gen.h"
/* Create bitmap:ip type of sets */
@@ -230,8 +210,8 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
- if (map->dsize) {
- map->extensions = ip_set_alloc(map->dsize * map->elements);
+ if (set->dsize) {
+ map->extensions = ip_set_alloc(set->dsize * map->elements);
if (!map->extensions) {
kfree(map->members);
return false;
@@ -239,7 +219,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
}
map->first_port = first_port;
map->last_port = last_port;
- map->timeout = IPSET_NO_TIMEOUT;
+ set->timeout = IPSET_NO_TIMEOUT;
set->data = map;
set->family = NFPROTO_UNSPEC;
@@ -248,11 +228,11 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
}
static int
-bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+ u32 flags)
{
struct bitmap_port *map;
u16 first_port, last_port;
- u32 cadt_flags = 0;
if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -276,53 +256,14 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
map->elements = last_port - first_port + 1;
map->memsize = map->elements * sizeof(unsigned long);
set->variant = &bitmap_port;
- if (tb[IPSET_ATTR_CADT_FLAGS])
- cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
- if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
- set->extensions |= IPSET_EXT_COUNTER;
- if (tb[IPSET_ATTR_TIMEOUT]) {
- map->dsize = sizeof(struct bitmap_portct_elem);
- map->offset[IPSET_OFFSET_TIMEOUT] =
- offsetof(struct bitmap_portct_elem, timeout);
- map->offset[IPSET_OFFSET_COUNTER] =
- offsetof(struct bitmap_portct_elem, counter);
- if (!init_map_port(set, map, first_port, last_port)) {
- kfree(map);
- return -ENOMEM;
- }
-
- map->timeout =
- ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- set->extensions |= IPSET_EXT_TIMEOUT;
- bitmap_port_gc_init(set, bitmap_port_gc);
- } else {
- map->dsize = sizeof(struct bitmap_portc_elem);
- map->offset[IPSET_OFFSET_COUNTER] =
- offsetof(struct bitmap_portc_elem, counter);
- if (!init_map_port(set, map, first_port, last_port)) {
- kfree(map);
- return -ENOMEM;
- }
- }
- } else if (tb[IPSET_ATTR_TIMEOUT]) {
- map->dsize = sizeof(struct bitmap_portt_elem);
- map->offset[IPSET_OFFSET_TIMEOUT] =
- offsetof(struct bitmap_portt_elem, timeout);
- if (!init_map_port(set, map, first_port, last_port)) {
- kfree(map);
- return -ENOMEM;
- }
-
- map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- set->extensions |= IPSET_EXT_TIMEOUT;
+ set->dsize = ip_set_elem_len(set, tb, 0);
+ if (!init_map_port(set, map, first_port, last_port)) {
+ kfree(map);
+ return -ENOMEM;
+ }
+ if (tb[IPSET_ATTR_TIMEOUT]) {
+ set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
bitmap_port_gc_init(set, bitmap_port_gc);
- } else {
- map->dsize = 0;
- if (!init_map_port(set, map, first_port, last_port)) {
- kfree(map);
- return -ENOMEM;
- }
-
}
return 0;
}
@@ -333,8 +274,8 @@ static struct ip_set_type bitmap_port_type = {
.features = IPSET_TYPE_PORT,
.dimension = IPSET_DIM_ONE,
.family = NFPROTO_UNSPEC,
- .revision_min = REVISION_MIN,
- .revision_max = REVISION_MAX,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
.create = bitmap_port_create,
.create_policy = {
[IPSET_ATTR_PORT] = { .type = NLA_U16 },
@@ -349,6 +290,7 @@ static struct ip_set_type bitmap_port_type = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index f2e30fb..dc9284b 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -17,6 +17,8 @@
#include <linux/spinlock.h>
#include <linux/rculist.h>
#include <net/netlink.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <linux/netfilter.h>
#include <linux/netfilter/x_tables.h>
@@ -27,8 +29,17 @@ static LIST_HEAD(ip_set_type_list); /* all registered set types */
static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */
static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */
-static struct ip_set * __rcu *ip_set_list; /* all individual sets */
-static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
+struct ip_set_net {
+ struct ip_set * __rcu *ip_set_list; /* all individual sets */
+ ip_set_id_t ip_set_max; /* max number of sets */
+ int is_deleted; /* deleted by ip_set_net_exit */
+};
+static int ip_set_net_id __read_mostly;
+
+static inline struct ip_set_net *ip_set_pernet(struct net *net)
+{
+ return net_generic(net, ip_set_net_id);
+}
#define IP_SET_INC 64
#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
@@ -45,8 +56,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
/* When the nfnl mutex is held: */
#define nfnl_dereference(p) \
rcu_dereference_protected(p, 1)
-#define nfnl_set(id) \
- nfnl_dereference(ip_set_list)[id]
+#define nfnl_set(inst, id) \
+ nfnl_dereference((inst)->ip_set_list)[id]
/*
* The set types are implemented in modules and registered set types
@@ -315,6 +326,60 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
}
EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
+typedef void (*destroyer)(void *);
+/* ipset data extension types, in size order */
+
+const struct ip_set_ext_type ip_set_extensions[] = {
+ [IPSET_EXT_ID_COUNTER] = {
+ .type = IPSET_EXT_COUNTER,
+ .flag = IPSET_FLAG_WITH_COUNTERS,
+ .len = sizeof(struct ip_set_counter),
+ .align = __alignof__(struct ip_set_counter),
+ },
+ [IPSET_EXT_ID_TIMEOUT] = {
+ .type = IPSET_EXT_TIMEOUT,
+ .len = sizeof(unsigned long),
+ .align = __alignof__(unsigned long),
+ },
+ [IPSET_EXT_ID_COMMENT] = {
+ .type = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
+ .flag = IPSET_FLAG_WITH_COMMENT,
+ .len = sizeof(struct ip_set_comment),
+ .align = __alignof__(struct ip_set_comment),
+ .destroy = (destroyer) ip_set_comment_free,
+ },
+};
+EXPORT_SYMBOL_GPL(ip_set_extensions);
+
+static inline bool
+add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
+{
+ return ip_set_extensions[id].flag ?
+ (flags & ip_set_extensions[id].flag) :
+ !!tb[IPSET_ATTR_TIMEOUT];
+}
+
+size_t
+ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
+{
+ enum ip_set_ext_id id;
+ size_t offset = 0;
+ u32 cadt_flags = 0;
+
+ if (tb[IPSET_ATTR_CADT_FLAGS])
+ cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+ for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
+ if (!add_extension(id, cadt_flags, tb))
+ continue;
+ offset += ALIGN(len + offset, ip_set_extensions[id].align);
+ set->offset[id] = offset;
+ set->extensions |= ip_set_extensions[id].type;
+ offset += ip_set_extensions[id].len;
+ }
+ return len + offset;
+}
+EXPORT_SYMBOL_GPL(ip_set_elem_len);
+
int
ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext *ext)
@@ -334,6 +399,12 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
ext->packets = be64_to_cpu(nla_get_be64(
tb[IPSET_ATTR_PACKETS]));
}
+ if (tb[IPSET_ATTR_COMMENT]) {
+ if (!(set->extensions & IPSET_EXT_COMMENT))
+ return -IPSET_ERR_COMMENT;
+ ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
+ }
+
return 0;
}
EXPORT_SYMBOL_GPL(ip_set_get_extensions);
@@ -374,13 +445,14 @@ __ip_set_put(struct ip_set *set)
*/
static inline struct ip_set *
-ip_set_rcu_get(ip_set_id_t index)
+ip_set_rcu_get(struct net *net, ip_set_id_t index)
{
struct ip_set *set;
+ struct ip_set_net *inst = ip_set_pernet(net);
rcu_read_lock();
/* ip_set_list itself needs to be protected */
- set = rcu_dereference(ip_set_list)[index];
+ set = rcu_dereference(inst->ip_set_list)[index];
rcu_read_unlock();
return set;
@@ -390,7 +462,8 @@ int
ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
const struct xt_action_param *par, struct ip_set_adt_opt *opt)
{
- struct ip_set *set = ip_set_rcu_get(index);
+ struct ip_set *set = ip_set_rcu_get(
+ dev_net(par->in ? par->in : par->out), index);
int ret = 0;
BUG_ON(set == NULL);
@@ -428,7 +501,8 @@ int
ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
const struct xt_action_param *par, struct ip_set_adt_opt *opt)
{
- struct ip_set *set = ip_set_rcu_get(index);
+ struct ip_set *set = ip_set_rcu_get(
+ dev_net(par->in ? par->in : par->out), index);
int ret;
BUG_ON(set == NULL);
@@ -450,7 +524,8 @@ int
ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
const struct xt_action_param *par, struct ip_set_adt_opt *opt)
{
- struct ip_set *set = ip_set_rcu_get(index);
+ struct ip_set *set = ip_set_rcu_get(
+ dev_net(par->in ? par->in : par->out), index);
int ret = 0;
BUG_ON(set == NULL);
@@ -474,14 +549,15 @@ EXPORT_SYMBOL_GPL(ip_set_del);
*
*/
ip_set_id_t
-ip_set_get_byname(const char *name, struct ip_set **set)
+ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
{
ip_set_id_t i, index = IPSET_INVALID_ID;
struct ip_set *s;
+ struct ip_set_net *inst = ip_set_pernet(net);
rcu_read_lock();
- for (i = 0; i < ip_set_max; i++) {
- s = rcu_dereference(ip_set_list)[i];
+ for (i = 0; i < inst->ip_set_max; i++) {
+ s = rcu_dereference(inst->ip_set_list)[i];
if (s != NULL && STREQ(s->name, name)) {
__ip_set_get(s);
index = i;
@@ -501,17 +577,26 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname);
* to be valid, after calling this function.
*
*/
-void
-ip_set_put_byindex(ip_set_id_t index)
+
+static inline void
+__ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
{
struct ip_set *set;
rcu_read_lock();
- set = rcu_dereference(ip_set_list)[index];
+ set = rcu_dereference(inst->ip_set_list)[index];
if (set != NULL)
__ip_set_put(set);
rcu_read_unlock();
}
+
+void
+ip_set_put_byindex(struct net *net, ip_set_id_t index)
+{
+ struct ip_set_net *inst = ip_set_pernet(net);
+
+ __ip_set_put_byindex(inst, index);
+}
EXPORT_SYMBOL_GPL(ip_set_put_byindex);
/*
@@ -522,9 +607,9 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex);
*
*/
const char *
-ip_set_name_byindex(ip_set_id_t index)
+ip_set_name_byindex(struct net *net, ip_set_id_t index)
{
- const struct ip_set *set = ip_set_rcu_get(index);
+ const struct ip_set *set = ip_set_rcu_get(net, index);
BUG_ON(set == NULL);
BUG_ON(set->ref == 0);
@@ -546,14 +631,15 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex);
* The nfnl mutex is used in the function.
*/
ip_set_id_t
-ip_set_nfnl_get(const char *name)
+ip_set_nfnl_get(struct net *net, const char *name)
{
ip_set_id_t i, index = IPSET_INVALID_ID;
struct ip_set *s;
+ struct ip_set_net *inst = ip_set_pernet(net);
nfnl_lock(NFNL_SUBSYS_IPSET);
- for (i = 0; i < ip_set_max; i++) {
- s = nfnl_set(i);
+ for (i = 0; i < inst->ip_set_max; i++) {
+ s = nfnl_set(inst, i);
if (s != NULL && STREQ(s->name, name)) {
__ip_set_get(s);
index = i;
@@ -573,15 +659,16 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get);
* The nfnl mutex is used in the function.
*/
ip_set_id_t
-ip_set_nfnl_get_byindex(ip_set_id_t index)
+ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
{
struct ip_set *set;
+ struct ip_set_net *inst = ip_set_pernet(net);
- if (index > ip_set_max)
+ if (index > inst->ip_set_max)
return IPSET_INVALID_ID;
nfnl_lock(NFNL_SUBSYS_IPSET);
- set = nfnl_set(index);
+ set = nfnl_set(inst, index);
if (set)
__ip_set_get(set);
else
@@ -600,13 +687,17 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
* The nfnl mutex is used in the function.
*/
void
-ip_set_nfnl_put(ip_set_id_t index)
+ip_set_nfnl_put(struct net *net, ip_set_id_t index)
{
struct ip_set *set;
+ struct ip_set_net *inst = ip_set_pernet(net);
+
nfnl_lock(NFNL_SUBSYS_IPSET);
- set = nfnl_set(index);
- if (set != NULL)
- __ip_set_put(set);
+ if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
+ set = nfnl_set(inst, index);
+ if (set != NULL)
+ __ip_set_put(set);
+ }
nfnl_unlock(NFNL_SUBSYS_IPSET);
}
EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
@@ -664,14 +755,14 @@ static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = {
};
static struct ip_set *
-find_set_and_id(const char *name, ip_set_id_t *id)
+find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
{
struct ip_set *set = NULL;
ip_set_id_t i;
*id = IPSET_INVALID_ID;
- for (i = 0; i < ip_set_max; i++) {
- set = nfnl_set(i);
+ for (i = 0; i < inst->ip_set_max; i++) {
+ set = nfnl_set(inst, i);
if (set != NULL && STREQ(set->name, name)) {
*id = i;
break;
@@ -681,22 +772,23 @@ find_set_and_id(const char *name, ip_set_id_t *id)
}
static inline struct ip_set *
-find_set(const char *name)
+find_set(struct ip_set_net *inst, const char *name)
{
ip_set_id_t id;
- return find_set_and_id(name, &id);
+ return find_set_and_id(inst, name, &id);
}
static int
-find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
+find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
+ struct ip_set **set)
{
struct ip_set *s;
ip_set_id_t i;
*index = IPSET_INVALID_ID;
- for (i = 0; i < ip_set_max; i++) {
- s = nfnl_set(i);
+ for (i = 0; i < inst->ip_set_max; i++) {
+ s = nfnl_set(inst, i);
if (s == NULL) {
if (*index == IPSET_INVALID_ID)
*index = i;
@@ -725,6 +817,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const attr[])
{
+ struct net *net = sock_net(ctnl);
+ struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set, *clash = NULL;
ip_set_id_t index = IPSET_INVALID_ID;
struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
@@ -783,7 +877,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
goto put_out;
}
- ret = set->type->create(set, tb, flags);
+ ret = set->type->create(net, set, tb, flags);
if (ret != 0)
goto put_out;
@@ -794,7 +888,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
* by the nfnl mutex. Find the first free index in ip_set_list
* and check clashing.
*/
- ret = find_free_id(set->name, &index, &clash);
+ ret = find_free_id(inst, set->name, &index, &clash);
if (ret == -EEXIST) {
/* If this is the same set and requested, ignore error */
if ((flags & IPSET_FLAG_EXIST) &&
@@ -807,9 +901,9 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
goto cleanup;
} else if (ret == -IPSET_ERR_MAX_SETS) {
struct ip_set **list, **tmp;
- ip_set_id_t i = ip_set_max + IP_SET_INC;
+ ip_set_id_t i = inst->ip_set_max + IP_SET_INC;
- if (i < ip_set_max || i == IPSET_INVALID_ID)
+ if (i < inst->ip_set_max || i == IPSET_INVALID_ID)
/* Wraparound */
goto cleanup;
@@ -817,14 +911,14 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
if (!list)
goto cleanup;
/* nfnl mutex is held, both lists are valid */
- tmp = nfnl_dereference(ip_set_list);
- memcpy(list, tmp, sizeof(struct ip_set *) * ip_set_max);
- rcu_assign_pointer(ip_set_list, list);
+ tmp = nfnl_dereference(inst->ip_set_list);
+ memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max);
+ rcu_assign_pointer(inst->ip_set_list, list);
/* Make sure all current packets have passed through */
synchronize_net();
/* Use new list */
- index = ip_set_max;
- ip_set_max = i;
+ index = inst->ip_set_max;
+ inst->ip_set_max = i;
kfree(tmp);
ret = 0;
} else if (ret)
@@ -834,7 +928,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
* Finally! Add our shiny new set to the list, and be done.
*/
pr_debug("create: '%s' created with index %u!\n", set->name, index);
- nfnl_set(index) = set;
+ nfnl_set(inst, index) = set;
return ret;
@@ -857,12 +951,12 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
};
static void
-ip_set_destroy_set(ip_set_id_t index)
+ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index)
{
- struct ip_set *set = nfnl_set(index);
+ struct ip_set *set = nfnl_set(inst, index);
pr_debug("set: %s\n", set->name);
- nfnl_set(index) = NULL;
+ nfnl_set(inst, index) = NULL;
/* Must call it without holding any lock */
set->variant->destroy(set);
@@ -875,6 +969,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const attr[])
{
+ struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
struct ip_set *s;
ip_set_id_t i;
int ret = 0;
@@ -894,21 +989,22 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
*/
read_lock_bh(&ip_set_ref_lock);
if (!attr[IPSET_ATTR_SETNAME]) {
- for (i = 0; i < ip_set_max; i++) {
- s = nfnl_set(i);
+ for (i = 0; i < inst->ip_set_max; i++) {
+ s = nfnl_set(inst, i);
if (s != NULL && s->ref) {
ret = -IPSET_ERR_BUSY;
goto out;
}
}
read_unlock_bh(&ip_set_ref_lock);
- for (i = 0; i < ip_set_max; i++) {
- s = nfnl_set(i);
+ for (i = 0; i < inst->ip_set_max; i++) {
+ s = nfnl_set(inst, i);
if (s != NULL)
- ip_set_destroy_set(i);
+ ip_set_destroy_set(inst, i);
}
} else {
- s = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &i);
+ s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
+ &i);
if (s == NULL) {
ret = -ENOENT;
goto out;
@@ -918,7 +1014,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
}
read_unlock_bh(&ip_set_ref_lock);
- ip_set_destroy_set(i);
+ ip_set_destroy_set(inst, i);
}
return 0;
out:
@@ -943,6 +1039,7 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const attr[])
{
+ struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
struct ip_set *s;
ip_set_id_t i;
@@ -950,13 +1047,13 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
return -IPSET_ERR_PROTOCOL;
if (!attr[IPSET_ATTR_SETNAME]) {
- for (i = 0; i < ip_set_max; i++) {
- s = nfnl_set(i);
+ for (i = 0; i < inst->ip_set_max; i++) {
+ s = nfnl_set(inst, i);
if (s != NULL)
ip_set_flush_set(s);
}
} else {
- s = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+ s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
if (s == NULL)
return -ENOENT;
@@ -982,6 +1079,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const attr[])
{
+ struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
struct ip_set *set, *s;
const char *name2;
ip_set_id_t i;
@@ -992,7 +1090,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
attr[IPSET_ATTR_SETNAME2] == NULL))
return -IPSET_ERR_PROTOCOL;
- set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+ set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
if (set == NULL)
return -ENOENT;
@@ -1003,8 +1101,8 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
}
name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
- for (i = 0; i < ip_set_max; i++) {
- s = nfnl_set(i);
+ for (i = 0; i < inst->ip_set_max; i++) {
+ s = nfnl_set(inst, i);
if (s != NULL && STREQ(s->name, name2)) {
ret = -IPSET_ERR_EXIST_SETNAME2;
goto out;
@@ -1031,6 +1129,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const attr[])
{
+ struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
struct ip_set *from, *to;
ip_set_id_t from_id, to_id;
char from_name[IPSET_MAXNAMELEN];
@@ -1040,11 +1139,13 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
attr[IPSET_ATTR_SETNAME2] == NULL))
return -IPSET_ERR_PROTOCOL;
- from = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &from_id);
+ from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
+ &from_id);
if (from == NULL)
return -ENOENT;
- to = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id);
+ to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]),
+ &to_id);
if (to == NULL)
return -IPSET_ERR_EXIST_SETNAME2;
@@ -1061,8 +1162,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
write_lock_bh(&ip_set_ref_lock);
swap(from->ref, to->ref);
- nfnl_set(from_id) = to;
- nfnl_set(to_id) = from;
+ nfnl_set(inst, from_id) = to;
+ nfnl_set(inst, to_id) = from;
write_unlock_bh(&ip_set_ref_lock);
return 0;
@@ -1081,9 +1182,10 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
static int
ip_set_dump_done(struct netlink_callback *cb)
{
+ struct ip_set_net *inst = (struct ip_set_net *)cb->data;
if (cb->args[2]) {
- pr_debug("release set %s\n", nfnl_set(cb->args[1])->name);
- ip_set_put_byindex((ip_set_id_t) cb->args[1]);
+ pr_debug("release set %s\n", nfnl_set(inst, cb->args[1])->name);
+ __ip_set_put_byindex(inst, (ip_set_id_t) cb->args[1]);
}
return 0;
}
@@ -1109,6 +1211,7 @@ dump_init(struct netlink_callback *cb)
struct nlattr *attr = (void *)nlh + min_len;
u32 dump_type;
ip_set_id_t index;
+ struct ip_set_net *inst = (struct ip_set_net *)cb->data;
/* Second pass, so parser can't fail */
nla_parse(cda, IPSET_ATTR_CMD_MAX,
@@ -1122,7 +1225,7 @@ dump_init(struct netlink_callback *cb)
if (cda[IPSET_ATTR_SETNAME]) {
struct ip_set *set;
- set = find_set_and_id(nla_data(cda[IPSET_ATTR_SETNAME]),
+ set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]),
&index);
if (set == NULL)
return -ENOENT;
@@ -1150,6 +1253,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
u32 dump_type, dump_flags;
int ret = 0;
+ struct ip_set_net *inst = (struct ip_set_net *)cb->data;
if (!cb->args[0]) {
ret = dump_init(cb);
@@ -1163,18 +1267,18 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
}
}
- if (cb->args[1] >= ip_set_max)
+ if (cb->args[1] >= inst->ip_set_max)
goto out;
dump_type = DUMP_TYPE(cb->args[0]);
dump_flags = DUMP_FLAGS(cb->args[0]);
- max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
+ max = dump_type == DUMP_ONE ? cb->args[1] + 1 : inst->ip_set_max;
dump_last:
pr_debug("args[0]: %u %u args[1]: %ld\n",
dump_type, dump_flags, cb->args[1]);
for (; cb->args[1] < max; cb->args[1]++) {
index = (ip_set_id_t) cb->args[1];
- set = nfnl_set(index);
+ set = nfnl_set(inst, index);
if (set == NULL) {
if (dump_type == DUMP_ONE) {
ret = -ENOENT;
@@ -1252,8 +1356,8 @@ next_set:
release_refcount:
/* If there was an error or set is done, release set */
if (ret || !cb->args[2]) {
- pr_debug("release set %s\n", nfnl_set(index)->name);
- ip_set_put_byindex(index);
+ pr_debug("release set %s\n", nfnl_set(inst, index)->name);
+ __ip_set_put_byindex(inst, index);
cb->args[2] = 0;
}
out:
@@ -1271,6 +1375,8 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const attr[])
{
+ struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+
if (unlikely(protocol_failed(attr)))
return -IPSET_ERR_PROTOCOL;
@@ -1278,6 +1384,7 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
struct netlink_dump_control c = {
.dump = ip_set_dump_start,
.done = ip_set_dump_done,
+ .data = (void *)inst
};
return netlink_dump_start(ctnl, skb, nlh, &c);
}
@@ -1356,6 +1463,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const attr[])
{
+ struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
struct ip_set *set;
struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
const struct nlattr *nla;
@@ -1374,7 +1482,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
attr[IPSET_ATTR_LINENO] == NULL))))
return -IPSET_ERR_PROTOCOL;
- set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+ set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
if (set == NULL)
return -ENOENT;
@@ -1410,6 +1518,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const attr[])
{
+ struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
struct ip_set *set;
struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
const struct nlattr *nla;
@@ -1428,7 +1537,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
attr[IPSET_ATTR_LINENO] == NULL))))
return -IPSET_ERR_PROTOCOL;
- set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+ set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
if (set == NULL)
return -ENOENT;
@@ -1464,6 +1573,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const attr[])
{
+ struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
struct ip_set *set;
struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
int ret = 0;
@@ -1474,7 +1584,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
!flag_nested(attr[IPSET_ATTR_DATA])))
return -IPSET_ERR_PROTOCOL;
- set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+ set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
if (set == NULL)
return -ENOENT;
@@ -1499,6 +1609,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const attr[])
{
+ struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
const struct ip_set *set;
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
@@ -1508,7 +1619,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
attr[IPSET_ATTR_SETNAME] == NULL))
return -IPSET_ERR_PROTOCOL;
- set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
+ set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
if (set == NULL)
return -ENOENT;
@@ -1733,8 +1844,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
unsigned int *op;
void *data;
int copylen = *len, ret = 0;
+ struct net *net = sock_net(sk);
+ struct ip_set_net *inst = ip_set_pernet(net);
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (optval != SO_IP_SET)
return -EBADF;
@@ -1783,22 +1896,39 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
}
req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
nfnl_lock(NFNL_SUBSYS_IPSET);
- find_set_and_id(req_get->set.name, &id);
+ find_set_and_id(inst, req_get->set.name, &id);
req_get->set.index = id;
nfnl_unlock(NFNL_SUBSYS_IPSET);
goto copy;
}
+ case IP_SET_OP_GET_FNAME: {
+ struct ip_set_req_get_set_family *req_get = data;
+ ip_set_id_t id;
+
+ if (*len != sizeof(struct ip_set_req_get_set_family)) {
+ ret = -EINVAL;
+ goto done;
+ }
+ req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
+ nfnl_lock(NFNL_SUBSYS_IPSET);
+ find_set_and_id(inst, req_get->set.name, &id);
+ req_get->set.index = id;
+ if (id != IPSET_INVALID_ID)
+ req_get->family = nfnl_set(inst, id)->family;
+ nfnl_unlock(NFNL_SUBSYS_IPSET);
+ goto copy;
+ }
case IP_SET_OP_GET_BYINDEX: {
struct ip_set_req_get_set *req_get = data;
struct ip_set *set;
if (*len != sizeof(struct ip_set_req_get_set) ||
- req_get->set.index >= ip_set_max) {
+ req_get->set.index >= inst->ip_set_max) {
ret = -EINVAL;
goto done;
}
nfnl_lock(NFNL_SUBSYS_IPSET);
- set = nfnl_set(req_get->set.index);
+ set = nfnl_set(inst, req_get->set.index);
strncpy(req_get->set.name, set ? set->name : "",
IPSET_MAXNAMELEN);
nfnl_unlock(NFNL_SUBSYS_IPSET);
@@ -1827,49 +1957,82 @@ static struct nf_sockopt_ops so_set __read_mostly = {
.owner = THIS_MODULE,
};
-static int __init
-ip_set_init(void)
+static int __net_init
+ip_set_net_init(struct net *net)
{
+ struct ip_set_net *inst = ip_set_pernet(net);
+
struct ip_set **list;
- int ret;
- if (max_sets)
- ip_set_max = max_sets;
- if (ip_set_max >= IPSET_INVALID_ID)
- ip_set_max = IPSET_INVALID_ID - 1;
+ inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX;
+ if (inst->ip_set_max >= IPSET_INVALID_ID)
+ inst->ip_set_max = IPSET_INVALID_ID - 1;
- list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL);
+ list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL);
if (!list)
return -ENOMEM;
+ inst->is_deleted = 0;
+ rcu_assign_pointer(inst->ip_set_list, list);
+ pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
+ return 0;
+}
+
+static void __net_exit
+ip_set_net_exit(struct net *net)
+{
+ struct ip_set_net *inst = ip_set_pernet(net);
+
+ struct ip_set *set = NULL;
+ ip_set_id_t i;
+
+ inst->is_deleted = 1; /* flag for ip_set_nfnl_put */
+
+ for (i = 0; i < inst->ip_set_max; i++) {
+ set = nfnl_set(inst, i);
+ if (set != NULL)
+ ip_set_destroy_set(inst, i);
+ }
+ kfree(rcu_dereference_protected(inst->ip_set_list, 1));
+}
+
+static struct pernet_operations ip_set_net_ops = {
+ .init = ip_set_net_init,
+ .exit = ip_set_net_exit,
+ .id = &ip_set_net_id,
+ .size = sizeof(struct ip_set_net)
+};
+
- rcu_assign_pointer(ip_set_list, list);
- ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
+static int __init
+ip_set_init(void)
+{
+ int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
if (ret != 0) {
pr_err("ip_set: cannot register with nfnetlink.\n");
- kfree(list);
return ret;
}
ret = nf_register_sockopt(&so_set);
if (ret != 0) {
pr_err("SO_SET registry failed: %d\n", ret);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
- kfree(list);
return ret;
}
-
- pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL);
+ ret = register_pernet_subsys(&ip_set_net_ops);
+ if (ret) {
+ pr_err("ip_set: cannot register pernet_subsys.\n");
+ nf_unregister_sockopt(&so_set);
+ nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
+ return ret;
+ }
return 0;
}
static void __exit
ip_set_fini(void)
{
- struct ip_set **list = rcu_dereference_protected(ip_set_list, 1);
-
- /* There can't be any existing set */
+ unregister_pernet_subsys(&ip_set_net_ops);
nf_unregister_sockopt(&so_set);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
- kfree(list);
pr_debug("these are the famous last words\n");
}
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index dac156f..29fb01d 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -102,9 +102,25 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
int protocol = iph->protocol;
/* See comments at tcp_match in ip_tables.c */
- if (protocol <= 0 || (ntohs(iph->frag_off) & IP_OFFSET))
+ if (protocol <= 0)
return false;
+ if (ntohs(iph->frag_off) & IP_OFFSET)
+ switch (protocol) {
+ case IPPROTO_TCP:
+ case IPPROTO_SCTP:
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ case IPPROTO_ICMP:
+ /* Port info not available for fragment offset > 0 */
+ return false;
+ default:
+ /* Other protocols doesn't have ports,
+ so we can match fragments */
+ *proto = protocol;
+ return true;
+ }
+
return get_port(skb, protocol, protooff, src, port, proto);
}
EXPORT_SYMBOL_GPL(ip_set_get_ip4_port);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 707bc52..6a80dbd 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -15,8 +15,7 @@
#define rcu_dereference_bh(p) rcu_dereference(p)
#endif
-#define CONCAT(a, b) a##b
-#define TOKEN(a, b) CONCAT(a, b)
+#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1)
/* Hashing which uses arrays to resolve clashing. The hash table is resized
* (doubled) when searching becomes too long.
@@ -78,10 +77,14 @@ struct htable {
#define hbucket(h, i) (&((h)->bucket[i]))
+#ifndef IPSET_NET_COUNT
+#define IPSET_NET_COUNT 1
+#endif
+
/* Book-keeping of the prefixes added to the set */
struct net_prefixes {
- u8 cidr; /* the different cidr values in the set */
- u32 nets; /* number of elements per cidr */
+ u32 nets[IPSET_NET_COUNT]; /* number of elements per cidr */
+ u8 cidr[IPSET_NET_COUNT]; /* the different cidr values in the set */
};
/* Compute the hash table size */
@@ -114,23 +117,6 @@ htable_bits(u32 hashsize)
return bits;
}
-/* Destroy the hashtable part of the set */
-static void
-ahash_destroy(struct htable *t)
-{
- struct hbucket *n;
- u32 i;
-
- for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = hbucket(t, i);
- if (n->size)
- /* FIXME: use slab cache */
- kfree(n->value);
- }
-
- ip_set_free(t);
-}
-
static int
hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
{
@@ -156,30 +142,30 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
}
#ifdef IP_SET_HASH_WITH_NETS
+#if IPSET_NET_COUNT > 1
+#define __CIDR(cidr, i) (cidr[i])
+#else
+#define __CIDR(cidr, i) (cidr)
+#endif
#ifdef IP_SET_HASH_WITH_NETS_PACKED
/* When cidr is packed with nomatch, cidr - 1 is stored in the entry */
-#define CIDR(cidr) (cidr + 1)
+#define CIDR(cidr, i) (__CIDR(cidr, i) + 1)
#else
-#define CIDR(cidr) (cidr)
+#define CIDR(cidr, i) (__CIDR(cidr, i))
#endif
#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
#ifdef IP_SET_HASH_WITH_MULTI
-#define NETS_LENGTH(family) (SET_HOST_MASK(family) + 1)
+#define NLEN(family) (SET_HOST_MASK(family) + 1)
#else
-#define NETS_LENGTH(family) SET_HOST_MASK(family)
+#define NLEN(family) SET_HOST_MASK(family)
#endif
#else
-#define NETS_LENGTH(family) 0
+#define NLEN(family) 0
#endif /* IP_SET_HASH_WITH_NETS */
-#define ext_timeout(e, h) \
-(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_TIMEOUT])
-#define ext_counter(e, h) \
-(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_COUNTER])
-
#endif /* _IP_SET_HASH_GEN_H */
/* Family dependent templates */
@@ -194,6 +180,8 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#undef mtype_data_next
#undef mtype_elem
+#undef mtype_ahash_destroy
+#undef mtype_ext_cleanup
#undef mtype_add_cidr
#undef mtype_del_cidr
#undef mtype_ahash_memsize
@@ -220,41 +208,44 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#undef HKEY
-#define mtype_data_equal TOKEN(MTYPE, _data_equal)
+#define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal)
#ifdef IP_SET_HASH_WITH_NETS
-#define mtype_do_data_match TOKEN(MTYPE, _do_data_match)
+#define mtype_do_data_match IPSET_TOKEN(MTYPE, _do_data_match)
#else
#define mtype_do_data_match(d) 1
#endif
-#define mtype_data_set_flags TOKEN(MTYPE, _data_set_flags)
-#define mtype_data_reset_flags TOKEN(MTYPE, _data_reset_flags)
-#define mtype_data_netmask TOKEN(MTYPE, _data_netmask)
-#define mtype_data_list TOKEN(MTYPE, _data_list)
-#define mtype_data_next TOKEN(MTYPE, _data_next)
-#define mtype_elem TOKEN(MTYPE, _elem)
-#define mtype_add_cidr TOKEN(MTYPE, _add_cidr)
-#define mtype_del_cidr TOKEN(MTYPE, _del_cidr)
-#define mtype_ahash_memsize TOKEN(MTYPE, _ahash_memsize)
-#define mtype_flush TOKEN(MTYPE, _flush)
-#define mtype_destroy TOKEN(MTYPE, _destroy)
-#define mtype_gc_init TOKEN(MTYPE, _gc_init)
-#define mtype_same_set TOKEN(MTYPE, _same_set)
-#define mtype_kadt TOKEN(MTYPE, _kadt)
-#define mtype_uadt TOKEN(MTYPE, _uadt)
+#define mtype_data_set_flags IPSET_TOKEN(MTYPE, _data_set_flags)
+#define mtype_data_reset_elem IPSET_TOKEN(MTYPE, _data_reset_elem)
+#define mtype_data_reset_flags IPSET_TOKEN(MTYPE, _data_reset_flags)
+#define mtype_data_netmask IPSET_TOKEN(MTYPE, _data_netmask)
+#define mtype_data_list IPSET_TOKEN(MTYPE, _data_list)
+#define mtype_data_next IPSET_TOKEN(MTYPE, _data_next)
+#define mtype_elem IPSET_TOKEN(MTYPE, _elem)
+#define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy)
+#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup)
+#define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr)
+#define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr)
+#define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize)
+#define mtype_flush IPSET_TOKEN(MTYPE, _flush)
+#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy)
+#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set)
+#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt)
+#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt)
#define mtype MTYPE
-#define mtype_elem TOKEN(MTYPE, _elem)
-#define mtype_add TOKEN(MTYPE, _add)
-#define mtype_del TOKEN(MTYPE, _del)
-#define mtype_test_cidrs TOKEN(MTYPE, _test_cidrs)
-#define mtype_test TOKEN(MTYPE, _test)
-#define mtype_expire TOKEN(MTYPE, _expire)
-#define mtype_resize TOKEN(MTYPE, _resize)
-#define mtype_head TOKEN(MTYPE, _head)
-#define mtype_list TOKEN(MTYPE, _list)
-#define mtype_gc TOKEN(MTYPE, _gc)
-#define mtype_variant TOKEN(MTYPE, _variant)
-#define mtype_data_match TOKEN(MTYPE, _data_match)
+#define mtype_elem IPSET_TOKEN(MTYPE, _elem)
+#define mtype_add IPSET_TOKEN(MTYPE, _add)
+#define mtype_del IPSET_TOKEN(MTYPE, _del)
+#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs)
+#define mtype_test IPSET_TOKEN(MTYPE, _test)
+#define mtype_expire IPSET_TOKEN(MTYPE, _expire)
+#define mtype_resize IPSET_TOKEN(MTYPE, _resize)
+#define mtype_head IPSET_TOKEN(MTYPE, _head)
+#define mtype_list IPSET_TOKEN(MTYPE, _list)
+#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
+#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
+#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
#ifndef HKEY_DATALEN
#define HKEY_DATALEN sizeof(struct mtype_elem)
@@ -269,13 +260,10 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
/* The generic hash structure */
struct htype {
- struct htable *table; /* the hash table */
+ struct htable __rcu *table; /* the hash table */
u32 maxelem; /* max elements in the hash */
u32 elements; /* current element (vs timeout) */
u32 initval; /* random jhash init value */
- u32 timeout; /* timeout value, if enabled */
- size_t dsize; /* data struct size */
- size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
struct timer_list gc; /* garbage collection when timeout enabled */
struct mtype_elem next; /* temporary storage for uadd */
#ifdef IP_SET_HASH_WITH_MULTI
@@ -297,49 +285,49 @@ struct htype {
/* Network cidr size book keeping when the hash stores different
* sized networks */
static void
-mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length)
+mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
{
int i, j;
/* Add in increasing prefix order, so larger cidr first */
- for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) {
+ for (i = 0, j = -1; i < nets_length && h->nets[i].nets[n]; i++) {
if (j != -1)
continue;
- else if (h->nets[i].cidr < cidr)
+ else if (h->nets[i].cidr[n] < cidr)
j = i;
- else if (h->nets[i].cidr == cidr) {
- h->nets[i].nets++;
+ else if (h->nets[i].cidr[n] == cidr) {
+ h->nets[i].nets[n]++;
return;
}
}
if (j != -1) {
for (; i > j; i--) {
- h->nets[i].cidr = h->nets[i - 1].cidr;
- h->nets[i].nets = h->nets[i - 1].nets;
+ h->nets[i].cidr[n] = h->nets[i - 1].cidr[n];
+ h->nets[i].nets[n] = h->nets[i - 1].nets[n];
}
}
- h->nets[i].cidr = cidr;
- h->nets[i].nets = 1;
+ h->nets[i].cidr[n] = cidr;
+ h->nets[i].nets[n] = 1;
}
static void
-mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length)
+mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
{
u8 i, j, net_end = nets_length - 1;
for (i = 0; i < nets_length; i++) {
- if (h->nets[i].cidr != cidr)
+ if (h->nets[i].cidr[n] != cidr)
continue;
- if (h->nets[i].nets > 1 || i == net_end ||
- h->nets[i + 1].nets == 0) {
- h->nets[i].nets--;
+ if (h->nets[i].nets[n] > 1 || i == net_end ||
+ h->nets[i + 1].nets[n] == 0) {
+ h->nets[i].nets[n]--;
return;
}
- for (j = i; j < net_end && h->nets[j].nets; j++) {
- h->nets[j].cidr = h->nets[j + 1].cidr;
- h->nets[j].nets = h->nets[j + 1].nets;
+ for (j = i; j < net_end && h->nets[j].nets[n]; j++) {
+ h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
+ h->nets[j].nets[n] = h->nets[j + 1].nets[n];
}
- h->nets[j].nets = 0;
+ h->nets[j].nets[n] = 0;
return;
}
}
@@ -347,10 +335,10 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length)
/* Calculate the actual memory size of the set data */
static size_t
-mtype_ahash_memsize(const struct htype *h, u8 nets_length)
+mtype_ahash_memsize(const struct htype *h, const struct htable *t,
+ u8 nets_length, size_t dsize)
{
u32 i;
- struct htable *t = h->table;
size_t memsize = sizeof(*h)
+ sizeof(*t)
#ifdef IP_SET_HASH_WITH_NETS
@@ -359,35 +347,70 @@ mtype_ahash_memsize(const struct htype *h, u8 nets_length)
+ jhash_size(t->htable_bits) * sizeof(struct hbucket);
for (i = 0; i < jhash_size(t->htable_bits); i++)
- memsize += t->bucket[i].size * h->dsize;
+ memsize += t->bucket[i].size * dsize;
return memsize;
}
+/* Get the ith element from the array block n */
+#define ahash_data(n, i, dsize) \
+ ((struct mtype_elem *)((n)->value + ((i) * (dsize))))
+
+static void
+mtype_ext_cleanup(struct ip_set *set, struct hbucket *n)
+{
+ int i;
+
+ for (i = 0; i < n->pos; i++)
+ ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
+}
+
/* Flush a hash type of set: destroy all elements */
static void
mtype_flush(struct ip_set *set)
{
struct htype *h = set->data;
- struct htable *t = h->table;
+ struct htable *t;
struct hbucket *n;
u32 i;
+ t = rcu_dereference_bh_nfnl(h->table);
for (i = 0; i < jhash_size(t->htable_bits); i++) {
n = hbucket(t, i);
if (n->size) {
+ if (set->extensions & IPSET_EXT_DESTROY)
+ mtype_ext_cleanup(set, n);
n->size = n->pos = 0;
/* FIXME: use slab cache */
kfree(n->value);
}
}
#ifdef IP_SET_HASH_WITH_NETS
- memset(h->nets, 0, sizeof(struct net_prefixes)
- * NETS_LENGTH(set->family));
+ memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family));
#endif
h->elements = 0;
}
+/* Destroy the hashtable part of the set */
+static void
+mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
+{
+ struct hbucket *n;
+ u32 i;
+
+ for (i = 0; i < jhash_size(t->htable_bits); i++) {
+ n = hbucket(t, i);
+ if (n->size) {
+ if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
+ mtype_ext_cleanup(set, n);
+ /* FIXME: use slab cache */
+ kfree(n->value);
+ }
+ }
+
+ ip_set_free(t);
+}
+
/* Destroy a hash type of set */
static void
mtype_destroy(struct ip_set *set)
@@ -397,7 +420,7 @@ mtype_destroy(struct ip_set *set)
if (set->extensions & IPSET_EXT_TIMEOUT)
del_timer_sync(&h->gc);
- ahash_destroy(h->table);
+ mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table), true);
#ifdef IP_SET_HASH_WITH_RBTREE
rbtree_destroy(&h->rbtree);
#endif
@@ -414,10 +437,10 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
init_timer(&h->gc);
h->gc.data = (unsigned long) set;
h->gc.function = gc;
- h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
+ h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&h->gc);
pr_debug("gc initialized, run in every %u\n",
- IPSET_GC_PERIOD(h->timeout));
+ IPSET_GC_PERIOD(set->timeout));
}
static bool
@@ -428,37 +451,40 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
/* Resizing changes htable_bits, so we ignore it */
return x->maxelem == y->maxelem &&
- x->timeout == y->timeout &&
+ a->timeout == b->timeout &&
#ifdef IP_SET_HASH_WITH_NETMASK
x->netmask == y->netmask &&
#endif
a->extensions == b->extensions;
}
-/* Get the ith element from the array block n */
-#define ahash_data(n, i, dsize) \
- ((struct mtype_elem *)((n)->value + ((i) * (dsize))))
-
/* Delete expired elements from the hashtable */
static void
-mtype_expire(struct htype *h, u8 nets_length, size_t dsize)
+mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
{
- struct htable *t = h->table;
+ struct htable *t;
struct hbucket *n;
struct mtype_elem *data;
u32 i;
int j;
+#ifdef IP_SET_HASH_WITH_NETS
+ u8 k;
+#endif
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh(h->table);
for (i = 0; i < jhash_size(t->htable_bits); i++) {
n = hbucket(t, i);
for (j = 0; j < n->pos; j++) {
data = ahash_data(n, j, dsize);
- if (ip_set_timeout_expired(ext_timeout(data, h))) {
+ if (ip_set_timeout_expired(ext_timeout(data, set))) {
pr_debug("expired %u/%u\n", i, j);
#ifdef IP_SET_HASH_WITH_NETS
- mtype_del_cidr(h, CIDR(data->cidr),
- nets_length);
+ for (k = 0; k < IPSET_NET_COUNT; k++)
+ mtype_del_cidr(h, CIDR(data->cidr, k),
+ nets_length, k);
#endif
+ ip_set_ext_destroy(set, data);
if (j != n->pos - 1)
/* Not last one */
memcpy(data,
@@ -481,6 +507,7 @@ mtype_expire(struct htype *h, u8 nets_length, size_t dsize)
n->value = tmp;
}
}
+ rcu_read_unlock_bh();
}
static void
@@ -491,10 +518,10 @@ mtype_gc(unsigned long ul_set)
pr_debug("called\n");
write_lock_bh(&set->lock);
- mtype_expire(h, NETS_LENGTH(set->family), h->dsize);
+ mtype_expire(set, h, NLEN(set->family), set->dsize);
write_unlock_bh(&set->lock);
- h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
+ h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&h->gc);
}
@@ -505,7 +532,7 @@ static int
mtype_resize(struct ip_set *set, bool retried)
{
struct htype *h = set->data;
- struct htable *t, *orig = h->table;
+ struct htable *t, *orig = rcu_dereference_bh_nfnl(h->table);
u8 htable_bits = orig->htable_bits;
#ifdef IP_SET_HASH_WITH_NETS
u8 flags;
@@ -520,8 +547,7 @@ mtype_resize(struct ip_set *set, bool retried)
if (SET_WITH_TIMEOUT(set) && !retried) {
i = h->elements;
write_lock_bh(&set->lock);
- mtype_expire(set->data, NETS_LENGTH(set->family),
- h->dsize);
+ mtype_expire(set, set->data, NLEN(set->family), set->dsize);
write_unlock_bh(&set->lock);
if (h->elements < i)
return 0;
@@ -548,25 +574,25 @@ retry:
for (i = 0; i < jhash_size(orig->htable_bits); i++) {
n = hbucket(orig, i);
for (j = 0; j < n->pos; j++) {
- data = ahash_data(n, j, h->dsize);
+ data = ahash_data(n, j, set->dsize);
#ifdef IP_SET_HASH_WITH_NETS
flags = 0;
mtype_data_reset_flags(data, &flags);
#endif
m = hbucket(t, HKEY(data, h->initval, htable_bits));
- ret = hbucket_elem_add(m, AHASH_MAX(h), h->dsize);
+ ret = hbucket_elem_add(m, AHASH_MAX(h), set->dsize);
if (ret < 0) {
#ifdef IP_SET_HASH_WITH_NETS
mtype_data_reset_flags(data, &flags);
#endif
read_unlock_bh(&set->lock);
- ahash_destroy(t);
+ mtype_ahash_destroy(set, t, false);
if (ret == -EAGAIN)
goto retry;
return ret;
}
- d = ahash_data(m, m->pos++, h->dsize);
- memcpy(d, data, h->dsize);
+ d = ahash_data(m, m->pos++, set->dsize);
+ memcpy(d, data, set->dsize);
#ifdef IP_SET_HASH_WITH_NETS
mtype_data_reset_flags(d, &flags);
#endif
@@ -581,7 +607,7 @@ retry:
pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
orig->htable_bits, orig, t->htable_bits, t);
- ahash_destroy(orig);
+ mtype_ahash_destroy(set, orig, false);
return 0;
}
@@ -604,7 +630,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
/* FIXME: when set is full, we slow down here */
- mtype_expire(h, NETS_LENGTH(set->family), h->dsize);
+ mtype_expire(set, h, NLEN(set->family), set->dsize);
if (h->elements >= h->maxelem) {
if (net_ratelimit())
@@ -618,11 +644,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
key = HKEY(value, h->initval, t->htable_bits);
n = hbucket(t, key);
for (i = 0; i < n->pos; i++) {
- data = ahash_data(n, i, h->dsize);
+ data = ahash_data(n, i, set->dsize);
if (mtype_data_equal(data, d, &multi)) {
if (flag_exist ||
(SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, h)))) {
+ ip_set_timeout_expired(ext_timeout(data, set)))) {
/* Just the extensions could be overwritten */
j = i;
goto reuse_slot;
@@ -633,30 +659,37 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
/* Reuse first timed out entry */
if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, h)) &&
+ ip_set_timeout_expired(ext_timeout(data, set)) &&
j != AHASH_MAX(h) + 1)
j = i;
}
reuse_slot:
if (j != AHASH_MAX(h) + 1) {
/* Fill out reused slot */
- data = ahash_data(n, j, h->dsize);
+ data = ahash_data(n, j, set->dsize);
#ifdef IP_SET_HASH_WITH_NETS
- mtype_del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family));
- mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+ for (i = 0; i < IPSET_NET_COUNT; i++) {
+ mtype_del_cidr(h, CIDR(data->cidr, i),
+ NLEN(set->family), i);
+ mtype_add_cidr(h, CIDR(d->cidr, i),
+ NLEN(set->family), i);
+ }
#endif
+ ip_set_ext_destroy(set, data);
} else {
/* Use/create a new slot */
TUNE_AHASH_MAX(h, multi);
- ret = hbucket_elem_add(n, AHASH_MAX(h), h->dsize);
+ ret = hbucket_elem_add(n, AHASH_MAX(h), set->dsize);
if (ret != 0) {
if (ret == -EAGAIN)
mtype_data_next(&h->next, d);
goto out;
}
- data = ahash_data(n, n->pos++, h->dsize);
+ data = ahash_data(n, n->pos++, set->dsize);
#ifdef IP_SET_HASH_WITH_NETS
- mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+ for (i = 0; i < IPSET_NET_COUNT; i++)
+ mtype_add_cidr(h, CIDR(d->cidr, i), NLEN(set->family),
+ i);
#endif
h->elements++;
}
@@ -665,9 +698,11 @@ reuse_slot:
mtype_data_set_flags(data, flags);
#endif
if (SET_WITH_TIMEOUT(set))
- ip_set_timeout_set(ext_timeout(data, h), ext->timeout);
+ ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
if (SET_WITH_COUNTER(set))
- ip_set_init_counter(ext_counter(data, h), ext);
+ ip_set_init_counter(ext_counter(data, set), ext);
+ if (SET_WITH_COMMENT(set))
+ ip_set_init_comment(ext_comment(data, set), ext);
out:
rcu_read_unlock_bh();
@@ -682,47 +717,60 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags)
{
struct htype *h = set->data;
- struct htable *t = h->table;
+ struct htable *t;
const struct mtype_elem *d = value;
struct mtype_elem *data;
struct hbucket *n;
- int i;
+ int i, ret = -IPSET_ERR_EXIST;
+#ifdef IP_SET_HASH_WITH_NETS
+ u8 j;
+#endif
u32 key, multi = 0;
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh(h->table);
key = HKEY(value, h->initval, t->htable_bits);
n = hbucket(t, key);
for (i = 0; i < n->pos; i++) {
- data = ahash_data(n, i, h->dsize);
+ data = ahash_data(n, i, set->dsize);
if (!mtype_data_equal(data, d, &multi))
continue;
if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, h)))
- return -IPSET_ERR_EXIST;
+ ip_set_timeout_expired(ext_timeout(data, set)))
+ goto out;
if (i != n->pos - 1)
/* Not last one */
- memcpy(data, ahash_data(n, n->pos - 1, h->dsize),
- h->dsize);
+ memcpy(data, ahash_data(n, n->pos - 1, set->dsize),
+ set->dsize);
n->pos--;
h->elements--;
#ifdef IP_SET_HASH_WITH_NETS
- mtype_del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
+ for (j = 0; j < IPSET_NET_COUNT; j++)
+ mtype_del_cidr(h, CIDR(d->cidr, j), NLEN(set->family),
+ j);
#endif
+ ip_set_ext_destroy(set, data);
if (n->pos + AHASH_INIT_SIZE < n->size) {
void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
- * h->dsize,
+ * set->dsize,
GFP_ATOMIC);
- if (!tmp)
- return 0;
+ if (!tmp) {
+ ret = 0;
+ goto out;
+ }
n->size -= AHASH_INIT_SIZE;
- memcpy(tmp, n->value, n->size * h->dsize);
+ memcpy(tmp, n->value, n->size * set->dsize);
kfree(n->value);
n->value = tmp;
}
- return 0;
+ ret = 0;
+ goto out;
}
- return -IPSET_ERR_EXIST;
+out:
+ rcu_read_unlock_bh();
+ return ret;
}
static inline int
@@ -730,8 +778,7 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
struct ip_set_ext *mext, struct ip_set *set, u32 flags)
{
if (SET_WITH_COUNTER(set))
- ip_set_update_counter(ext_counter(data,
- (struct htype *)(set->data)),
+ ip_set_update_counter(ext_counter(data, set),
ext, mext, flags);
return mtype_do_data_match(data);
}
@@ -745,25 +792,38 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
struct ip_set_ext *mext, u32 flags)
{
struct htype *h = set->data;
- struct htable *t = h->table;
+ struct htable *t = rcu_dereference_bh(h->table);
struct hbucket *n;
struct mtype_elem *data;
+#if IPSET_NET_COUNT == 2
+ struct mtype_elem orig = *d;
+ int i, j = 0, k;
+#else
int i, j = 0;
+#endif
u32 key, multi = 0;
- u8 nets_length = NETS_LENGTH(set->family);
+ u8 nets_length = NLEN(set->family);
pr_debug("test by nets\n");
- for (; j < nets_length && h->nets[j].nets && !multi; j++) {
- mtype_data_netmask(d, h->nets[j].cidr);
+ for (; j < nets_length && h->nets[j].nets[0] && !multi; j++) {
+#if IPSET_NET_COUNT == 2
+ mtype_data_reset_elem(d, &orig);
+ mtype_data_netmask(d, h->nets[j].cidr[0], false);
+ for (k = 0; k < nets_length && h->nets[k].nets[1] && !multi;
+ k++) {
+ mtype_data_netmask(d, h->nets[k].cidr[1], true);
+#else
+ mtype_data_netmask(d, h->nets[j].cidr[0]);
+#endif
key = HKEY(d, h->initval, t->htable_bits);
n = hbucket(t, key);
for (i = 0; i < n->pos; i++) {
- data = ahash_data(n, i, h->dsize);
+ data = ahash_data(n, i, set->dsize);
if (!mtype_data_equal(data, d, &multi))
continue;
if (SET_WITH_TIMEOUT(set)) {
if (!ip_set_timeout_expired(
- ext_timeout(data, h)))
+ ext_timeout(data, set)))
return mtype_data_match(data, ext,
mext, set,
flags);
@@ -774,6 +834,9 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
return mtype_data_match(data, ext,
mext, set, flags);
}
+#if IPSET_NET_COUNT == 2
+ }
+#endif
}
return 0;
}
@@ -785,30 +848,41 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags)
{
struct htype *h = set->data;
- struct htable *t = h->table;
+ struct htable *t;
struct mtype_elem *d = value;
struct hbucket *n;
struct mtype_elem *data;
- int i;
+ int i, ret = 0;
u32 key, multi = 0;
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh(h->table);
#ifdef IP_SET_HASH_WITH_NETS
/* If we test an IP address and not a network address,
* try all possible network sizes */
- if (CIDR(d->cidr) == SET_HOST_MASK(set->family))
- return mtype_test_cidrs(set, d, ext, mext, flags);
+ for (i = 0; i < IPSET_NET_COUNT; i++)
+ if (CIDR(d->cidr, i) != SET_HOST_MASK(set->family))
+ break;
+ if (i == IPSET_NET_COUNT) {
+ ret = mtype_test_cidrs(set, d, ext, mext, flags);
+ goto out;
+ }
#endif
key = HKEY(d, h->initval, t->htable_bits);
n = hbucket(t, key);
for (i = 0; i < n->pos; i++) {
- data = ahash_data(n, i, h->dsize);
+ data = ahash_data(n, i, set->dsize);
if (mtype_data_equal(data, d, &multi) &&
!(SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, h))))
- return mtype_data_match(data, ext, mext, set, flags);
+ ip_set_timeout_expired(ext_timeout(data, set)))) {
+ ret = mtype_data_match(data, ext, mext, set, flags);
+ goto out;
+ }
}
- return 0;
+out:
+ rcu_read_unlock_bh();
+ return ret;
}
/* Reply a HEADER request: fill out the header part of the set */
@@ -816,18 +890,18 @@ static int
mtype_head(struct ip_set *set, struct sk_buff *skb)
{
const struct htype *h = set->data;
+ const struct htable *t;
struct nlattr *nested;
size_t memsize;
- read_lock_bh(&set->lock);
- memsize = mtype_ahash_memsize(h, NETS_LENGTH(set->family));
- read_unlock_bh(&set->lock);
+ t = rcu_dereference_bh_nfnl(h->table);
+ memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize);
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested)
goto nla_put_failure;
if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
- htonl(jhash_size(h->table->htable_bits))) ||
+ htonl(jhash_size(t->htable_bits))) ||
nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
goto nla_put_failure;
#ifdef IP_SET_HASH_WITH_NETMASK
@@ -836,12 +910,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
goto nla_put_failure;
#endif
if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
- nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
- ((set->extensions & IPSET_EXT_TIMEOUT) &&
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout))) ||
- ((set->extensions & IPSET_EXT_COUNTER) &&
- nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
- htonl(IPSET_FLAG_WITH_COUNTERS))))
+ nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
+ goto nla_put_failure;
+ if (unlikely(ip_set_put_flags(skb, set)))
goto nla_put_failure;
ipset_nest_end(skb, nested);
@@ -856,7 +927,7 @@ mtype_list(const struct ip_set *set,
struct sk_buff *skb, struct netlink_callback *cb)
{
const struct htype *h = set->data;
- const struct htable *t = h->table;
+ const struct htable *t = rcu_dereference_bh_nfnl(h->table);
struct nlattr *atd, *nested;
const struct hbucket *n;
const struct mtype_elem *e;
@@ -874,9 +945,9 @@ mtype_list(const struct ip_set *set,
n = hbucket(t, cb->args[2]);
pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
for (i = 0; i < n->pos; i++) {
- e = ahash_data(n, i, h->dsize);
+ e = ahash_data(n, i, set->dsize);
if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, h)))
+ ip_set_timeout_expired(ext_timeout(e, set)))
continue;
pr_debug("list hash %lu hbucket %p i %u, data %p\n",
cb->args[2], n, i, e);
@@ -890,13 +961,7 @@ mtype_list(const struct ip_set *set,
}
if (mtype_data_list(skb, e))
goto nla_put_failure;
- if (SET_WITH_TIMEOUT(set) &&
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(
- ext_timeout(e, h)))))
- goto nla_put_failure;
- if (SET_WITH_COUNTER(set) &&
- ip_set_put_counter(skb, ext_counter(e, h)))
+ if (ip_set_put_extensions(skb, set, e, true))
goto nla_put_failure;
ipset_nest_end(skb, nested);
}
@@ -909,24 +974,24 @@ mtype_list(const struct ip_set *set,
nla_put_failure:
nlmsg_trim(skb, incomplete);
- ipset_nest_end(skb, atd);
if (unlikely(first == cb->args[2])) {
pr_warning("Can't list set %s: one bucket does not fit into "
"a message. Please report it!\n", set->name);
cb->args[2] = 0;
return -EMSGSIZE;
}
+ ipset_nest_end(skb, atd);
return 0;
}
static int
-TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
- const struct xt_action_param *par,
- enum ipset_adt adt, struct ip_set_adt_opt *opt);
+IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt);
static int
-TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
- enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
+IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
static const struct ip_set_type_variant mtype_variant = {
.kadt = mtype_kadt,
@@ -946,16 +1011,17 @@ static const struct ip_set_type_variant mtype_variant = {
#ifdef IP_SET_EMIT_CREATE
static int
-TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
+IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
+ struct nlattr *tb[], u32 flags)
{
u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
- u32 cadt_flags = 0;
u8 hbits;
#ifdef IP_SET_HASH_WITH_NETMASK
u8 netmask;
#endif
size_t hsize;
struct HTYPE *h;
+ struct htable *t;
if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
return -IPSET_ERR_INVALID_FAMILY;
@@ -1005,7 +1071,7 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
h->netmask = netmask;
#endif
get_random_bytes(&h->initval, sizeof(h->initval));
- h->timeout = IPSET_NO_TIMEOUT;
+ set->timeout = IPSET_NO_TIMEOUT;
hbits = htable_bits(hashsize);
hsize = htable_size(hbits);
@@ -1013,91 +1079,37 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
kfree(h);
return -ENOMEM;
}
- h->table = ip_set_alloc(hsize);
- if (!h->table) {
+ t = ip_set_alloc(hsize);
+ if (!t) {
kfree(h);
return -ENOMEM;
}
- h->table->htable_bits = hbits;
+ t->htable_bits = hbits;
+ rcu_assign_pointer(h->table, t);
set->data = h;
- if (set->family == NFPROTO_IPV4)
- set->variant = &TOKEN(HTYPE, 4_variant);
- else
- set->variant = &TOKEN(HTYPE, 6_variant);
-
- if (tb[IPSET_ATTR_CADT_FLAGS])
- cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
- if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
- set->extensions |= IPSET_EXT_COUNTER;
- if (tb[IPSET_ATTR_TIMEOUT]) {
- h->timeout =
- ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- set->extensions |= IPSET_EXT_TIMEOUT;
- if (set->family == NFPROTO_IPV4) {
- h->dsize =
- sizeof(struct TOKEN(HTYPE, 4ct_elem));
- h->offset[IPSET_OFFSET_TIMEOUT] =
- offsetof(struct TOKEN(HTYPE, 4ct_elem),
- timeout);
- h->offset[IPSET_OFFSET_COUNTER] =
- offsetof(struct TOKEN(HTYPE, 4ct_elem),
- counter);
- TOKEN(HTYPE, 4_gc_init)(set,
- TOKEN(HTYPE, 4_gc));
- } else {
- h->dsize =
- sizeof(struct TOKEN(HTYPE, 6ct_elem));
- h->offset[IPSET_OFFSET_TIMEOUT] =
- offsetof(struct TOKEN(HTYPE, 6ct_elem),
- timeout);
- h->offset[IPSET_OFFSET_COUNTER] =
- offsetof(struct TOKEN(HTYPE, 6ct_elem),
- counter);
- TOKEN(HTYPE, 6_gc_init)(set,
- TOKEN(HTYPE, 6_gc));
- }
- } else {
- if (set->family == NFPROTO_IPV4) {
- h->dsize =
- sizeof(struct TOKEN(HTYPE, 4c_elem));
- h->offset[IPSET_OFFSET_COUNTER] =
- offsetof(struct TOKEN(HTYPE, 4c_elem),
- counter);
- } else {
- h->dsize =
- sizeof(struct TOKEN(HTYPE, 6c_elem));
- h->offset[IPSET_OFFSET_COUNTER] =
- offsetof(struct TOKEN(HTYPE, 6c_elem),
- counter);
- }
- }
- } else if (tb[IPSET_ATTR_TIMEOUT]) {
- h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
- set->extensions |= IPSET_EXT_TIMEOUT;
- if (set->family == NFPROTO_IPV4) {
- h->dsize = sizeof(struct TOKEN(HTYPE, 4t_elem));
- h->offset[IPSET_OFFSET_TIMEOUT] =
- offsetof(struct TOKEN(HTYPE, 4t_elem),
- timeout);
- TOKEN(HTYPE, 4_gc_init)(set, TOKEN(HTYPE, 4_gc));
- } else {
- h->dsize = sizeof(struct TOKEN(HTYPE, 6t_elem));
- h->offset[IPSET_OFFSET_TIMEOUT] =
- offsetof(struct TOKEN(HTYPE, 6t_elem),
- timeout);
- TOKEN(HTYPE, 6_gc_init)(set, TOKEN(HTYPE, 6_gc));
- }
+ if (set->family == NFPROTO_IPV4) {
+ set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
+ set->dsize = ip_set_elem_len(set, tb,
+ sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
} else {
+ set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
+ set->dsize = ip_set_elem_len(set, tb,
+ sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
+ }
+ if (tb[IPSET_ATTR_TIMEOUT]) {
+ set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
if (set->family == NFPROTO_IPV4)
- h->dsize = sizeof(struct TOKEN(HTYPE, 4_elem));
+ IPSET_TOKEN(HTYPE, 4_gc_init)(set,
+ IPSET_TOKEN(HTYPE, 4_gc));
else
- h->dsize = sizeof(struct TOKEN(HTYPE, 6_elem));
+ IPSET_TOKEN(HTYPE, 6_gc_init)(set,
+ IPSET_TOKEN(HTYPE, 6_gc));
}
pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
- set->name, jhash_size(h->table->htable_bits),
- h->table->htable_bits, h->maxelem, set->data, h->table);
+ set->name, jhash_size(t->htable_bits),
+ t->htable_bits, h->maxelem, set->data, t);
return 0;
}
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index c74e6e1..e65fc24 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -23,19 +23,20 @@
#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
-#define REVISION_MIN 0
-#define REVISION_MAX 1 /* Counters support */
+#define IPSET_TYPE_REV_MIN 0
+/* 1 Counters support */
+#define IPSET_TYPE_REV_MAX 2 /* Comments support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:ip");
/* Type specific function prefix */
#define HTYPE hash_ip
#define IP_SET_HASH_WITH_NETMASK
-/* IPv4 variants */
+/* IPv4 variant */
/* Member elements */
struct hash_ip4_elem {
@@ -43,22 +44,6 @@ struct hash_ip4_elem {
__be32 ip;
};
-struct hash_ip4t_elem {
- __be32 ip;
- unsigned long timeout;
-};
-
-struct hash_ip4c_elem {
- __be32 ip;
- struct ip_set_counter counter;
-};
-
-struct hash_ip4ct_elem {
- __be32 ip;
- struct ip_set_counter counter;
- unsigned long timeout;
-};
-
/* Common functions */
static inline bool
@@ -99,7 +84,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_ip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ip4_elem e = {};
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
__be32 ip;
ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip);
@@ -118,8 +103,8 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
const struct hash_ip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ip4_elem e = {};
- struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
- u32 ip, ip_to, hosts;
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 ip = 0, ip_to = 0, hosts;
int ret = 0;
if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -178,29 +163,13 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-/* IPv6 variants */
+/* IPv6 variant */
/* Member elements */
struct hash_ip6_elem {
union nf_inet_addr ip;
};
-struct hash_ip6t_elem {
- union nf_inet_addr ip;
- unsigned long timeout;
-};
-
-struct hash_ip6c_elem {
- union nf_inet_addr ip;
- struct ip_set_counter counter;
-};
-
-struct hash_ip6ct_elem {
- union nf_inet_addr ip;
- struct ip_set_counter counter;
- unsigned long timeout;
-};
-
/* Common functions */
static inline bool
@@ -253,7 +222,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_ip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ip6_elem e = {};
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
hash_ip6_netmask(&e.ip, h->netmask);
@@ -270,7 +239,7 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
const struct hash_ip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ip6_elem e = {};
- struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -304,8 +273,8 @@ static struct ip_set_type hash_ip_type __read_mostly = {
.features = IPSET_TYPE_IP,
.dimension = IPSET_DIM_ONE,
.family = NFPROTO_UNSPEC,
- .revision_min = REVISION_MIN,
- .revision_max = REVISION_MAX,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
.create = hash_ip_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
@@ -324,6 +293,7 @@ static struct ip_set_type hash_ip_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 7a2d2bd..525a595 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -24,19 +24,20 @@
#include <linux/netfilter/ipset/ip_set_getport.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
-#define REVISION_MIN 0
-/* 1 SCTP and UDPLITE support added */
-#define REVISION_MAX 2 /* Counters support added */
+#define IPSET_TYPE_REV_MIN 0
+/* 1 SCTP and UDPLITE support added */
+/* 2 Counters support added */
+#define IPSET_TYPE_REV_MAX 3 /* Comments support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:ip,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:ip,port");
/* Type specific function prefix */
#define HTYPE hash_ipport
-/* IPv4 variants */
+/* IPv4 variant */
/* Member elements */
struct hash_ipport4_elem {
@@ -46,31 +47,6 @@ struct hash_ipport4_elem {
u8 padding;
};
-struct hash_ipport4t_elem {
- __be32 ip;
- __be16 port;
- u8 proto;
- u8 padding;
- unsigned long timeout;
-};
-
-struct hash_ipport4c_elem {
- __be32 ip;
- __be16 port;
- u8 proto;
- u8 padding;
- struct ip_set_counter counter;
-};
-
-struct hash_ipport4ct_elem {
- __be32 ip;
- __be16 port;
- u8 proto;
- u8 padding;
- struct ip_set_counter counter;
- unsigned long timeout;
-};
-
/* Common functions */
static inline bool
@@ -116,10 +92,9 @@ hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_ipport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipport4_elem e = { };
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
&e.port, &e.proto))
@@ -136,8 +111,8 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
const struct hash_ipport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipport4_elem e = { };
- struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
- u32 ip, ip_to, p = 0, port, port_to;
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 ip, ip_to = 0, p = 0, port, port_to;
bool with_ports = false;
int ret;
@@ -222,7 +197,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-/* IPv6 variants */
+/* IPv6 variant */
struct hash_ipport6_elem {
union nf_inet_addr ip;
@@ -231,31 +206,6 @@ struct hash_ipport6_elem {
u8 padding;
};
-struct hash_ipport6t_elem {
- union nf_inet_addr ip;
- __be16 port;
- u8 proto;
- u8 padding;
- unsigned long timeout;
-};
-
-struct hash_ipport6c_elem {
- union nf_inet_addr ip;
- __be16 port;
- u8 proto;
- u8 padding;
- struct ip_set_counter counter;
-};
-
-struct hash_ipport6ct_elem {
- union nf_inet_addr ip;
- __be16 port;
- u8 proto;
- u8 padding;
- struct ip_set_counter counter;
- unsigned long timeout;
-};
-
/* Common functions */
static inline bool
@@ -306,10 +256,9 @@ hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_ipport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipport6_elem e = { };
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
&e.port, &e.proto))
@@ -326,7 +275,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
const struct hash_ipport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipport6_elem e = { };
- struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port, port_to;
bool with_ports = false;
int ret;
@@ -396,8 +345,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
.features = IPSET_TYPE_IP | IPSET_TYPE_PORT,
.dimension = IPSET_DIM_TWO,
.family = NFPROTO_UNSPEC,
- .revision_min = REVISION_MIN,
- .revision_max = REVISION_MAX,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
.create = hash_ipport_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
@@ -419,6 +368,7 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 34e8a1a..f563663 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -24,19 +24,20 @@
#include <linux/netfilter/ipset/ip_set_getport.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
-#define REVISION_MIN 0
-/* 1 SCTP and UDPLITE support added */
-#define REVISION_MAX 2 /* Counters support added */
+#define IPSET_TYPE_REV_MIN 0
+/* 1 SCTP and UDPLITE support added */
+/* 2 Counters support added */
+#define IPSET_TYPE_REV_MAX 3 /* Comments support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:ip,port,ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:ip,port,ip");
/* Type specific function prefix */
#define HTYPE hash_ipportip
-/* IPv4 variants */
+/* IPv4 variant */
/* Member elements */
struct hash_ipportip4_elem {
@@ -47,34 +48,6 @@ struct hash_ipportip4_elem {
u8 padding;
};
-struct hash_ipportip4t_elem {
- __be32 ip;
- __be32 ip2;
- __be16 port;
- u8 proto;
- u8 padding;
- unsigned long timeout;
-};
-
-struct hash_ipportip4c_elem {
- __be32 ip;
- __be32 ip2;
- __be16 port;
- u8 proto;
- u8 padding;
- struct ip_set_counter counter;
-};
-
-struct hash_ipportip4ct_elem {
- __be32 ip;
- __be32 ip2;
- __be16 port;
- u8 proto;
- u8 padding;
- struct ip_set_counter counter;
- unsigned long timeout;
-};
-
static inline bool
hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
const struct hash_ipportip4_elem *ip2,
@@ -120,10 +93,9 @@ hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_ipportip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportip4_elem e = { };
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
&e.port, &e.proto))
@@ -141,8 +113,8 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
const struct hash_ipportip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportip4_elem e = { };
- struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
- u32 ip, ip_to, p = 0, port, port_to;
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 ip, ip_to = 0, p = 0, port, port_to;
bool with_ports = false;
int ret;
@@ -231,7 +203,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-/* IPv6 variants */
+/* IPv6 variant */
struct hash_ipportip6_elem {
union nf_inet_addr ip;
@@ -241,34 +213,6 @@ struct hash_ipportip6_elem {
u8 padding;
};
-struct hash_ipportip6t_elem {
- union nf_inet_addr ip;
- union nf_inet_addr ip2;
- __be16 port;
- u8 proto;
- u8 padding;
- unsigned long timeout;
-};
-
-struct hash_ipportip6c_elem {
- union nf_inet_addr ip;
- union nf_inet_addr ip2;
- __be16 port;
- u8 proto;
- u8 padding;
- struct ip_set_counter counter;
-};
-
-struct hash_ipportip6ct_elem {
- union nf_inet_addr ip;
- union nf_inet_addr ip2;
- __be16 port;
- u8 proto;
- u8 padding;
- struct ip_set_counter counter;
- unsigned long timeout;
-};
-
/* Common functions */
static inline bool
@@ -319,10 +263,9 @@ hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- const struct hash_ipportip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportip6_elem e = { };
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
&e.port, &e.proto))
@@ -340,7 +283,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
const struct hash_ipportip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportip6_elem e = { };
- struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port, port_to;
bool with_ports = false;
int ret;
@@ -414,8 +357,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
.features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
.dimension = IPSET_DIM_THREE,
.family = NFPROTO_UNSPEC,
- .revision_min = REVISION_MIN,
- .revision_max = REVISION_MAX,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
.create = hash_ipportip_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
@@ -437,6 +380,7 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index f15f3e2..5d87fe8 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -24,15 +24,16 @@
#include <linux/netfilter/ipset/ip_set_getport.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
-#define REVISION_MIN 0
-/* 1 SCTP and UDPLITE support added */
-/* 2 Range as input support for IPv4 added */
-/* 3 nomatch flag support added */
-#define REVISION_MAX 4 /* Counters support added */
+#define IPSET_TYPE_REV_MIN 0
+/* 1 SCTP and UDPLITE support added */
+/* 2 Range as input support for IPv4 added */
+/* 3 nomatch flag support added */
+/* 4 Counters support added */
+#define IPSET_TYPE_REV_MAX 5 /* Comments support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:ip,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:ip,port,net");
/* Type specific function prefix */
@@ -46,7 +47,7 @@ MODULE_ALIAS("ip_set_hash:ip,port,net");
#define IP_SET_HASH_WITH_PROTO
#define IP_SET_HASH_WITH_NETS
-/* IPv4 variants */
+/* IPv4 variant */
/* Member elements */
struct hash_ipportnet4_elem {
@@ -58,37 +59,6 @@ struct hash_ipportnet4_elem {
u8 proto;
};
-struct hash_ipportnet4t_elem {
- __be32 ip;
- __be32 ip2;
- __be16 port;
- u8 cidr:7;
- u8 nomatch:1;
- u8 proto;
- unsigned long timeout;
-};
-
-struct hash_ipportnet4c_elem {
- __be32 ip;
- __be32 ip2;
- __be16 port;
- u8 cidr:7;
- u8 nomatch:1;
- u8 proto;
- struct ip_set_counter counter;
-};
-
-struct hash_ipportnet4ct_elem {
- __be32 ip;
- __be32 ip2;
- __be16 port;
- u8 cidr:7;
- u8 nomatch:1;
- u8 proto;
- struct ip_set_counter counter;
- unsigned long timeout;
-};
-
/* Common functions */
static inline bool
@@ -170,9 +140,9 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_ipportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet4_elem e = {
- .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
+ .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
};
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (adt == IPSET_TEST)
e.cidr = HOST_MASK - 1;
@@ -195,9 +165,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
const struct hash_ipportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
- struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
- u32 ip, ip_to, p = 0, port, port_to;
- u32 ip2_from, ip2_to, ip2_last, ip2;
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 ip = 0, ip_to = 0, p = 0, port, port_to;
+ u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
bool with_ports = false;
u8 cidr;
int ret;
@@ -272,7 +242,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ip > ip_to)
swap(ip, ip_to);
} else if (tb[IPSET_ATTR_CIDR]) {
- u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
if (!cidr || cidr > 32)
return -IPSET_ERR_INVALID_CIDR;
@@ -306,9 +276,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
: port;
for (; p <= port_to; p++) {
e.port = htons(p);
- ip2 = retried
- && ip == ntohl(h->next.ip)
- && p == ntohs(h->next.port)
+ ip2 = retried &&
+ ip == ntohl(h->next.ip) &&
+ p == ntohs(h->next.port)
? ntohl(h->next.ip2) : ip2_from;
while (!after(ip2, ip2_to)) {
e.ip2 = htonl(ip2);
@@ -328,7 +298,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-/* IPv6 variants */
+/* IPv6 variant */
struct hash_ipportnet6_elem {
union nf_inet_addr ip;
@@ -339,37 +309,6 @@ struct hash_ipportnet6_elem {
u8 proto;
};
-struct hash_ipportnet6t_elem {
- union nf_inet_addr ip;
- union nf_inet_addr ip2;
- __be16 port;
- u8 cidr:7;
- u8 nomatch:1;
- u8 proto;
- unsigned long timeout;
-};
-
-struct hash_ipportnet6c_elem {
- union nf_inet_addr ip;
- union nf_inet_addr ip2;
- __be16 port;
- u8 cidr:7;
- u8 nomatch:1;
- u8 proto;
- struct ip_set_counter counter;
-};
-
-struct hash_ipportnet6ct_elem {
- union nf_inet_addr ip;
- union nf_inet_addr ip2;
- __be16 port;
- u8 cidr:7;
- u8 nomatch:1;
- u8 proto;
- struct ip_set_counter counter;
- unsigned long timeout;
-};
-
/* Common functions */
static inline bool
@@ -454,9 +393,9 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_ipportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet6_elem e = {
- .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
+ .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
};
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (adt == IPSET_TEST)
e.cidr = HOST_MASK - 1;
@@ -479,7 +418,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
const struct hash_ipportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 };
- struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port, port_to;
bool with_ports = false;
u8 cidr;
@@ -574,8 +513,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
IPSET_TYPE_NOMATCH,
.dimension = IPSET_DIM_THREE,
.family = NFPROTO_UNSPEC,
- .revision_min = REVISION_MIN,
- .revision_max = REVISION_MAX,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
.create = hash_ipportnet_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
@@ -600,6 +539,7 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 223e9f5..8295cf4 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -22,21 +22,22 @@
#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
-#define REVISION_MIN 0
-/* 1 Range as input support for IPv4 added */
-/* 2 nomatch flag support added */
-#define REVISION_MAX 3 /* Counters support added */
+#define IPSET_TYPE_REV_MIN 0
+/* 1 Range as input support for IPv4 added */
+/* 2 nomatch flag support added */
+/* 3 Counters support added */
+#define IPSET_TYPE_REV_MAX 4 /* Comments support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:net");
/* Type specific function prefix */
#define HTYPE hash_net
#define IP_SET_HASH_WITH_NETS
-/* IPv4 variants */
+/* IPv4 variant */
/* Member elements */
struct hash_net4_elem {
@@ -46,31 +47,6 @@ struct hash_net4_elem {
u8 cidr;
};
-struct hash_net4t_elem {
- __be32 ip;
- u16 padding0;
- u8 nomatch;
- u8 cidr;
- unsigned long timeout;
-};
-
-struct hash_net4c_elem {
- __be32 ip;
- u16 padding0;
- u8 nomatch;
- u8 cidr;
- struct ip_set_counter counter;
-};
-
-struct hash_net4ct_elem {
- __be32 ip;
- u16 padding0;
- u8 nomatch;
- u8 cidr;
- struct ip_set_counter counter;
- unsigned long timeout;
-};
-
/* Common functions */
static inline bool
@@ -143,9 +119,9 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_net *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = {
- .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
+ .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
};
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (e.cidr == 0)
return -EINVAL;
@@ -165,8 +141,8 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
const struct hash_net *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = { .cidr = HOST_MASK };
- struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
- u32 ip = 0, ip_to, last;
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 ip = 0, ip_to = 0, last;
int ret;
if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -228,7 +204,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-/* IPv6 variants */
+/* IPv6 variant */
struct hash_net6_elem {
union nf_inet_addr ip;
@@ -237,31 +213,6 @@ struct hash_net6_elem {
u8 cidr;
};
-struct hash_net6t_elem {
- union nf_inet_addr ip;
- u16 padding0;
- u8 nomatch;
- u8 cidr;
- unsigned long timeout;
-};
-
-struct hash_net6c_elem {
- union nf_inet_addr ip;
- u16 padding0;
- u8 nomatch;
- u8 cidr;
- struct ip_set_counter counter;
-};
-
-struct hash_net6ct_elem {
- union nf_inet_addr ip;
- u16 padding0;
- u8 nomatch;
- u8 cidr;
- struct ip_set_counter counter;
- unsigned long timeout;
-};
-
/* Common functions */
static inline bool
@@ -338,9 +289,9 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_net *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net6_elem e = {
- .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
+ .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
};
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (e.cidr == 0)
return -EINVAL;
@@ -357,10 +308,9 @@ static int
hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_net *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net6_elem e = { .cidr = HOST_MASK };
- struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -406,8 +356,8 @@ static struct ip_set_type hash_net_type __read_mostly = {
.features = IPSET_TYPE_IP | IPSET_TYPE_NOMATCH,
.dimension = IPSET_DIM_ONE,
.family = NFPROTO_UNSPEC,
- .revision_min = REVISION_MIN,
- .revision_max = REVISION_MAX,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
.create = hash_net_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
@@ -425,6 +375,7 @@ static struct ip_set_type hash_net_type __read_mostly = {
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 7d798d5..3f64a66 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -23,14 +23,15 @@
#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
-#define REVISION_MIN 0
-/* 1 nomatch flag support added */
-/* 2 /0 support added */
-#define REVISION_MAX 3 /* Counters support added */
+#define IPSET_TYPE_REV_MIN 0
+/* 1 nomatch flag support added */
+/* 2 /0 support added */
+/* 3 Counters support added */
+#define IPSET_TYPE_REV_MAX 4 /* Comments support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:net,iface", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:net,iface");
/* Interface name rbtree */
@@ -134,7 +135,7 @@ iface_add(struct rb_root *root, const char **iface)
#define STREQ(a, b) (strcmp(a, b) == 0)
-/* IPv4 variants */
+/* IPv4 variant */
struct hash_netiface4_elem_hashed {
__be32 ip;
@@ -144,7 +145,7 @@ struct hash_netiface4_elem_hashed {
u8 elem;
};
-/* Member elements without timeout */
+/* Member elements */
struct hash_netiface4_elem {
__be32 ip;
u8 physdev;
@@ -154,37 +155,6 @@ struct hash_netiface4_elem {
const char *iface;
};
-struct hash_netiface4t_elem {
- __be32 ip;
- u8 physdev;
- u8 cidr;
- u8 nomatch;
- u8 elem;
- const char *iface;
- unsigned long timeout;
-};
-
-struct hash_netiface4c_elem {
- __be32 ip;
- u8 physdev;
- u8 cidr;
- u8 nomatch;
- u8 elem;
- const char *iface;
- struct ip_set_counter counter;
-};
-
-struct hash_netiface4ct_elem {
- __be32 ip;
- u8 physdev;
- u8 cidr;
- u8 nomatch;
- u8 elem;
- const char *iface;
- struct ip_set_counter counter;
- unsigned long timeout;
-};
-
/* Common functions */
static inline bool
@@ -265,10 +235,10 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
struct hash_netiface *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = {
- .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,
+ .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
.elem = 1,
};
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
int ret;
if (e.cidr == 0)
@@ -319,8 +289,8 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
struct hash_netiface *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
- struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
- u32 ip = 0, ip_to, last;
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 ip = 0, ip_to = 0, last;
char iface[IFNAMSIZ];
int ret;
@@ -399,7 +369,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-/* IPv6 variants */
+/* IPv6 variant */
struct hash_netiface6_elem_hashed {
union nf_inet_addr ip;
@@ -418,37 +388,6 @@ struct hash_netiface6_elem {
const char *iface;
};
-struct hash_netiface6t_elem {
- union nf_inet_addr ip;
- u8 physdev;
- u8 cidr;
- u8 nomatch;
- u8 elem;
- const char *iface;
- unsigned long timeout;
-};
-
-struct hash_netiface6c_elem {
- union nf_inet_addr ip;
- u8 physdev;
- u8 cidr;
- u8 nomatch;
- u8 elem;
- const char *iface;
- struct ip_set_counter counter;
-};
-
-struct hash_netiface6ct_elem {
- union nf_inet_addr ip;
- u8 physdev;
- u8 cidr;
- u8 nomatch;
- u8 elem;
- const char *iface;
- struct ip_set_counter counter;
- unsigned long timeout;
-};
-
/* Common functions */
static inline bool
@@ -534,10 +473,10 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
struct hash_netiface *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface6_elem e = {
- .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,
+ .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
.elem = 1,
};
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
int ret;
if (e.cidr == 0)
@@ -584,7 +523,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
struct hash_netiface *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 };
- struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
char iface[IFNAMSIZ];
int ret;
@@ -645,8 +584,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
IPSET_TYPE_NOMATCH,
.dimension = IPSET_DIM_TWO,
.family = NFPROTO_UNSPEC,
- .revision_min = REVISION_MIN,
- .revision_max = REVISION_MAX,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
.create = hash_netiface_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
@@ -668,6 +607,7 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
new file mode 100644
index 0000000..4260327
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -0,0 +1,483 @@
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2013 Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:net type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN 0
+#define IPSET_TYPE_REV_MAX 0
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
+IP_SET_MODULE_DESC("hash:net,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:net,net");
+
+/* Type specific function prefix */
+#define HTYPE hash_netnet
+#define IP_SET_HASH_WITH_NETS
+#define IPSET_NET_COUNT 2
+
+/* IPv4 variants */
+
+/* Member elements */
+struct hash_netnet4_elem {
+ union {
+ __be32 ip[2];
+ __be64 ipcmp;
+ };
+ u8 nomatch;
+ union {
+ u8 cidr[2];
+ u16 ccmp;
+ };
+};
+
+/* Common functions */
+
+static inline bool
+hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
+ const struct hash_netnet4_elem *ip2,
+ u32 *multi)
+{
+ return ip1->ipcmp == ip2->ipcmp &&
+ ip2->ccmp == ip2->ccmp;
+}
+
+static inline int
+hash_netnet4_do_data_match(const struct hash_netnet4_elem *elem)
+{
+ return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static inline void
+hash_netnet4_data_set_flags(struct hash_netnet4_elem *elem, u32 flags)
+{
+ elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
+}
+
+static inline void
+hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags)
+{
+ swap(*flags, elem->nomatch);
+}
+
+static inline void
+hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem,
+ struct hash_netnet4_elem *orig)
+{
+ elem->ip[1] = orig->ip[1];
+}
+
+static inline void
+hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner)
+{
+ if (inner) {
+ elem->ip[1] &= ip_set_netmask(cidr);
+ elem->cidr[1] = cidr;
+ } else {
+ elem->ip[0] &= ip_set_netmask(cidr);
+ elem->cidr[0] = cidr;
+ }
+}
+
+static bool
+hash_netnet4_data_list(struct sk_buff *skb,
+ const struct hash_netnet4_elem *data)
+{
+ u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+ if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip[0]) ||
+ nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip[1]) ||
+ nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) ||
+ nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) ||
+ (flags &&
+ nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return 1;
+}
+
+static inline void
+hash_netnet4_data_next(struct hash_netnet4_elem *next,
+ const struct hash_netnet4_elem *d)
+{
+ next->ipcmp = d->ipcmp;
+}
+
+#define MTYPE hash_netnet4
+#define PF 4
+#define HOST_MASK 32
+#include "ip_set_hash_gen.h"
+
+static int
+hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ const struct hash_netnet *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_netnet4_elem e = {
+ .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK,
+ .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK,
+ };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+ if (adt == IPSET_TEST)
+ e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK;
+
+ ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0]);
+ ip4addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1]);
+ e.ip[0] &= ip_set_netmask(e.cidr[0]);
+ e.ip[1] &= ip_set_netmask(e.cidr[1]);
+
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+ const struct hash_netnet *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_netnet4_elem e = { .cidr[0] = HOST_MASK,
+ .cidr[1] = HOST_MASK };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 ip = 0, ip_to = 0, last;
+ u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2;
+ u8 cidr, cidr2;
+ int ret;
+
+ if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+ ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) ||
+ ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ if (tb[IPSET_ATTR_CIDR]) {
+ cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ if (!cidr || cidr > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ e.cidr[0] = cidr;
+ }
+
+ if (tb[IPSET_ATTR_CIDR2]) {
+ cidr2 = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+ if (!cidr2 || cidr2 > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ e.cidr[1] = cidr2;
+ }
+
+ if (tb[IPSET_ATTR_CADT_FLAGS]) {
+ u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+ if (cadt_flags & IPSET_FLAG_NOMATCH)
+ flags |= (IPSET_FLAG_NOMATCH << 16);
+ }
+
+ if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] &&
+ tb[IPSET_ATTR_IP2_TO])) {
+ e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0]));
+ e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1]));
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+ ip_set_eexist(ret, flags) ? 0 : ret;
+ }
+
+ ip_to = ip;
+ if (tb[IPSET_ATTR_IP_TO]) {
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+ if (ret)
+ return ret;
+ if (ip_to < ip)
+ swap(ip, ip_to);
+ if (ip + UINT_MAX == ip_to)
+ return -IPSET_ERR_HASH_RANGE;
+ }
+
+ ip2_to = ip2_from;
+ if (tb[IPSET_ATTR_IP2_TO]) {
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
+ if (ret)
+ return ret;
+ if (ip2_to < ip2_from)
+ swap(ip2_from, ip2_to);
+ if (ip2_from + UINT_MAX == ip2_to)
+ return -IPSET_ERR_HASH_RANGE;
+
+ }
+
+ if (retried)
+ ip = ntohl(h->next.ip[0]);
+
+ while (!after(ip, ip_to)) {
+ e.ip[0] = htonl(ip);
+ last = ip_set_range_to_cidr(ip, ip_to, &cidr);
+ e.cidr[0] = cidr;
+ ip2 = (retried &&
+ ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1])
+ : ip2_from;
+ while (!after(ip2, ip2_to)) {
+ e.ip[1] = htonl(ip2);
+ last2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr2);
+ e.cidr[1] = cidr2;
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ if (ret && !ip_set_eexist(ret, flags))
+ return ret;
+ else
+ ret = 0;
+ ip2 = last2 + 1;
+ }
+ ip = last + 1;
+ }
+ return ret;
+}
+
+/* IPv6 variants */
+
+struct hash_netnet6_elem {
+ union nf_inet_addr ip[2];
+ u8 nomatch;
+ union {
+ u8 cidr[2];
+ u16 ccmp;
+ };
+};
+
+/* Common functions */
+
+static inline bool
+hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
+ const struct hash_netnet6_elem *ip2,
+ u32 *multi)
+{
+ return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) &&
+ ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) &&
+ ip1->ccmp == ip2->ccmp;
+}
+
+static inline int
+hash_netnet6_do_data_match(const struct hash_netnet6_elem *elem)
+{
+ return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static inline void
+hash_netnet6_data_set_flags(struct hash_netnet6_elem *elem, u32 flags)
+{
+ elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
+}
+
+static inline void
+hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags)
+{
+ swap(*flags, elem->nomatch);
+}
+
+static inline void
+hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem,
+ struct hash_netnet6_elem *orig)
+{
+ elem->ip[1] = orig->ip[1];
+}
+
+static inline void
+hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner)
+{
+ if (inner) {
+ ip6_netmask(&elem->ip[1], cidr);
+ elem->cidr[1] = cidr;
+ } else {
+ ip6_netmask(&elem->ip[0], cidr);
+ elem->cidr[0] = cidr;
+ }
+}
+
+static bool
+hash_netnet6_data_list(struct sk_buff *skb,
+ const struct hash_netnet6_elem *data)
+{
+ u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+ if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip[0].in6) ||
+ nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip[1].in6) ||
+ nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) ||
+ nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) ||
+ (flags &&
+ nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return 1;
+}
+
+static inline void
+hash_netnet6_data_next(struct hash_netnet4_elem *next,
+ const struct hash_netnet6_elem *d)
+{
+}
+
+#undef MTYPE
+#undef PF
+#undef HOST_MASK
+
+#define MTYPE hash_netnet6
+#define PF 6
+#define HOST_MASK 128
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static int
+hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ const struct hash_netnet *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_netnet6_elem e = {
+ .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK,
+ .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK
+ };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+ if (adt == IPSET_TEST)
+ e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK;
+
+ ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6);
+ ip6addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1].in6);
+ ip6_netmask(&e.ip[0], e.cidr[0]);
+ ip6_netmask(&e.ip[1], e.cidr[1]);
+
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_netnet6_elem e = { .cidr[0] = HOST_MASK,
+ .cidr[1] = HOST_MASK };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ int ret;
+
+ if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ return -IPSET_ERR_PROTOCOL;
+ if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
+ return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) ||
+ ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) ||
+ ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ if (tb[IPSET_ATTR_CIDR])
+ e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+ if (tb[IPSET_ATTR_CIDR2])
+ e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+
+ if (!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] ||
+ e.cidr[1] > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+
+ ip6_netmask(&e.ip[0], e.cidr[0]);
+ ip6_netmask(&e.ip[1], e.cidr[1]);
+
+ if (tb[IPSET_ATTR_CADT_FLAGS]) {
+ u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+ if (cadt_flags & IPSET_FLAG_NOMATCH)
+ flags |= (IPSET_FLAG_NOMATCH << 16);
+ }
+
+ ret = adtfn(set, &e, &ext, &ext, flags);
+
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+ ip_set_eexist(ret, flags) ? 0 : ret;
+}
+
+static struct ip_set_type hash_netnet_type __read_mostly = {
+ .name = "hash:net,net",
+ .protocol = IPSET_PROTOCOL,
+ .features = IPSET_TYPE_IP | IPSET_TYPE_IP2 | IPSET_TYPE_NOMATCH,
+ .dimension = IPSET_DIM_TWO,
+ .family = NFPROTO_UNSPEC,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
+ .create = hash_netnet_create,
+ .create_policy = {
+ [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
+ [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
+ [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ },
+ .adt_policy = {
+ [IPSET_ATTR_IP] = { .type = NLA_NESTED },
+ [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
+ [IPSET_ATTR_IP2] = { .type = NLA_NESTED },
+ [IPSET_ATTR_IP2_TO] = { .type = NLA_NESTED },
+ [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
+ [IPSET_ATTR_CIDR2] = { .type = NLA_U8 },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init
+hash_netnet_init(void)
+{
+ return ip_set_type_register(&hash_netnet_type);
+}
+
+static void __exit
+hash_netnet_fini(void)
+{
+ ip_set_type_unregister(&hash_netnet_type);
+}
+
+module_init(hash_netnet_init);
+module_exit(hash_netnet_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 09d6690..7097fb0 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -23,15 +23,16 @@
#include <linux/netfilter/ipset/ip_set_getport.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
-#define REVISION_MIN 0
-/* 1 SCTP and UDPLITE support added */
-/* 2 Range as input support for IPv4 added */
-/* 3 nomatch flag support added */
-#define REVISION_MAX 4 /* Counters support added */
+#define IPSET_TYPE_REV_MIN 0
+/* 1 SCTP and UDPLITE support added */
+/* 2 Range as input support for IPv4 added */
+/* 3 nomatch flag support added */
+/* 4 Counters support added */
+#define IPSET_TYPE_REV_MAX 5 /* Comments support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("hash:net,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:net,port");
/* Type specific function prefix */
@@ -45,7 +46,7 @@ MODULE_ALIAS("ip_set_hash:net,port");
*/
#define IP_SET_HASH_WITH_NETS_PACKED
-/* IPv4 variants */
+/* IPv4 variant */
/* Member elements */
struct hash_netport4_elem {
@@ -56,34 +57,6 @@ struct hash_netport4_elem {
u8 nomatch:1;
};
-struct hash_netport4t_elem {
- __be32 ip;
- __be16 port;
- u8 proto;
- u8 cidr:7;
- u8 nomatch:1;
- unsigned long timeout;
-};
-
-struct hash_netport4c_elem {
- __be32 ip;
- __be16 port;
- u8 proto;
- u8 cidr:7;
- u8 nomatch:1;
- struct ip_set_counter counter;
-};
-
-struct hash_netport4ct_elem {
- __be32 ip;
- __be16 port;
- u8 proto;
- u8 cidr:7;
- u8 nomatch:1;
- struct ip_set_counter counter;
- unsigned long timeout;
-};
-
/* Common functions */
static inline bool
@@ -162,9 +135,9 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_netport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = {
- .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
+ .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
};
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (adt == IPSET_TEST)
e.cidr = HOST_MASK - 1;
@@ -186,8 +159,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
const struct hash_netport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
- struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
- u32 port, port_to, p = 0, ip = 0, ip_to, last;
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 port, port_to, p = 0, ip = 0, ip_to = 0, last;
bool with_ports = false;
u8 cidr;
int ret;
@@ -287,7 +260,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
}
-/* IPv6 variants */
+/* IPv6 variant */
struct hash_netport6_elem {
union nf_inet_addr ip;
@@ -297,34 +270,6 @@ struct hash_netport6_elem {
u8 nomatch:1;
};
-struct hash_netport6t_elem {
- union nf_inet_addr ip;
- __be16 port;
- u8 proto;
- u8 cidr:7;
- u8 nomatch:1;
- unsigned long timeout;
-};
-
-struct hash_netport6c_elem {
- union nf_inet_addr ip;
- __be16 port;
- u8 proto;
- u8 cidr:7;
- u8 nomatch:1;
- struct ip_set_counter counter;
-};
-
-struct hash_netport6ct_elem {
- union nf_inet_addr ip;
- __be16 port;
- u8 proto;
- u8 cidr:7;
- u8 nomatch:1;
- struct ip_set_counter counter;
- unsigned long timeout;
-};
-
/* Common functions */
static inline bool
@@ -407,9 +352,9 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_netport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport6_elem e = {
- .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1,
+ .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
};
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (adt == IPSET_TEST)
e.cidr = HOST_MASK - 1;
@@ -431,7 +376,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
const struct hash_netport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport6_elem e = { .cidr = HOST_MASK - 1 };
- struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port, port_to;
bool with_ports = false;
u8 cidr;
@@ -518,8 +463,8 @@ static struct ip_set_type hash_netport_type __read_mostly = {
.features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_NOMATCH,
.dimension = IPSET_DIM_TWO,
.family = NFPROTO_UNSPEC,
- .revision_min = REVISION_MIN,
- .revision_max = REVISION_MAX,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
.create = hash_netport_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
@@ -542,6 +487,7 @@ static struct ip_set_type hash_netport_type __read_mostly = {
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
new file mode 100644
index 0000000..363fab9
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -0,0 +1,588 @@
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:ip,port,net type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_getport.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN 0
+#define IPSET_TYPE_REV_MAX 0 /* Comments support added */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
+IP_SET_MODULE_DESC("hash:net,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:net,port,net");
+
+/* Type specific function prefix */
+#define HTYPE hash_netportnet
+#define IP_SET_HASH_WITH_PROTO
+#define IP_SET_HASH_WITH_NETS
+#define IPSET_NET_COUNT 2
+
+/* IPv4 variant */
+
+/* Member elements */
+struct hash_netportnet4_elem {
+ union {
+ __be32 ip[2];
+ __be64 ipcmp;
+ };
+ __be16 port;
+ union {
+ u8 cidr[2];
+ u16 ccmp;
+ };
+ u8 nomatch:1;
+ u8 proto;
+};
+
+/* Common functions */
+
+static inline bool
+hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
+ const struct hash_netportnet4_elem *ip2,
+ u32 *multi)
+{
+ return ip1->ipcmp == ip2->ipcmp &&
+ ip1->ccmp == ip2->ccmp &&
+ ip1->port == ip2->port &&
+ ip1->proto == ip2->proto;
+}
+
+static inline int
+hash_netportnet4_do_data_match(const struct hash_netportnet4_elem *elem)
+{
+ return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static inline void
+hash_netportnet4_data_set_flags(struct hash_netportnet4_elem *elem, u32 flags)
+{
+ elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
+}
+
+static inline void
+hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags)
+{
+ swap(*flags, elem->nomatch);
+}
+
+static inline void
+hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem,
+ struct hash_netportnet4_elem *orig)
+{
+ elem->ip[1] = orig->ip[1];
+}
+
+static inline void
+hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem,
+ u8 cidr, bool inner)
+{
+ if (inner) {
+ elem->ip[1] &= ip_set_netmask(cidr);
+ elem->cidr[1] = cidr;
+ } else {
+ elem->ip[0] &= ip_set_netmask(cidr);
+ elem->cidr[0] = cidr;
+ }
+}
+
+static bool
+hash_netportnet4_data_list(struct sk_buff *skb,
+ const struct hash_netportnet4_elem *data)
+{
+ u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+ if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip[0]) ||
+ nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip[1]) ||
+ nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
+ nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) ||
+ nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) ||
+ nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
+ (flags &&
+ nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return 1;
+}
+
+static inline void
+hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
+ const struct hash_netportnet4_elem *d)
+{
+ next->ipcmp = d->ipcmp;
+ next->port = d->port;
+}
+
+#define MTYPE hash_netportnet4
+#define PF 4
+#define HOST_MASK 32
+#include "ip_set_hash_gen.h"
+
+static int
+hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ const struct hash_netportnet *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_netportnet4_elem e = {
+ .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+ .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK),
+ };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+ if (adt == IPSET_TEST)
+ e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK;
+
+ if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
+ &e.port, &e.proto))
+ return -EINVAL;
+
+ ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0]);
+ ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip[1]);
+ e.ip[0] &= ip_set_netmask(e.cidr[0]);
+ e.ip[1] &= ip_set_netmask(e.cidr[1]);
+
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+ const struct hash_netportnet *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_netportnet4_elem e = { .cidr[0] = HOST_MASK,
+ .cidr[1] = HOST_MASK };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to;
+ u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
+ bool with_ports = false;
+ u8 cidr, cidr2;
+ int ret;
+
+ if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+ !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
+ ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) ||
+ ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ if (tb[IPSET_ATTR_CIDR]) {
+ cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ if (!cidr || cidr > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ e.cidr[0] = cidr;
+ }
+
+ if (tb[IPSET_ATTR_CIDR2]) {
+ cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+ if (!cidr || cidr > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ e.cidr[1] = cidr;
+ }
+
+ if (tb[IPSET_ATTR_PORT])
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+ else
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_PROTO]) {
+ e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+ with_ports = ip_set_proto_with_ports(e.proto);
+
+ if (e.proto == 0)
+ return -IPSET_ERR_INVALID_PROTO;
+ } else
+ return -IPSET_ERR_MISSING_PROTO;
+
+ if (!(with_ports || e.proto == IPPROTO_ICMP))
+ e.port = 0;
+
+ if (tb[IPSET_ATTR_CADT_FLAGS]) {
+ u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+ if (cadt_flags & IPSET_FLAG_NOMATCH)
+ flags |= (IPSET_FLAG_NOMATCH << 16);
+ }
+
+ with_ports = with_ports && tb[IPSET_ATTR_PORT_TO];
+ if (adt == IPSET_TEST ||
+ !(tb[IPSET_ATTR_IP_TO] || with_ports || tb[IPSET_ATTR_IP2_TO])) {
+ e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0]));
+ e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1]));
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+ ip_set_eexist(ret, flags) ? 0 : ret;
+ }
+
+ ip_to = ip;
+ if (tb[IPSET_ATTR_IP_TO]) {
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+ if (ret)
+ return ret;
+ if (ip > ip_to)
+ swap(ip, ip_to);
+ if (unlikely(ip + UINT_MAX == ip_to))
+ return -IPSET_ERR_HASH_RANGE;
+ }
+
+ port_to = port = ntohs(e.port);
+ if (tb[IPSET_ATTR_PORT_TO]) {
+ port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+ if (port > port_to)
+ swap(port, port_to);
+ }
+
+ ip2_to = ip2_from;
+ if (tb[IPSET_ATTR_IP2_TO]) {
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
+ if (ret)
+ return ret;
+ if (ip2_from > ip2_to)
+ swap(ip2_from, ip2_to);
+ if (unlikely(ip2_from + UINT_MAX == ip2_to))
+ return -IPSET_ERR_HASH_RANGE;
+ }
+
+ if (retried)
+ ip = ntohl(h->next.ip[0]);
+
+ while (!after(ip, ip_to)) {
+ e.ip[0] = htonl(ip);
+ ip_last = ip_set_range_to_cidr(ip, ip_to, &cidr);
+ e.cidr[0] = cidr;
+ p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port)
+ : port;
+ for (; p <= port_to; p++) {
+ e.port = htons(p);
+ ip2 = (retried && ip == ntohl(h->next.ip[0]) &&
+ p == ntohs(h->next.port)) ? ntohl(h->next.ip[1])
+ : ip2_from;
+ while (!after(ip2, ip2_to)) {
+ e.ip[1] = htonl(ip2);
+ ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
+ &cidr2);
+ e.cidr[1] = cidr2;
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ if (ret && !ip_set_eexist(ret, flags))
+ return ret;
+ else
+ ret = 0;
+ ip2 = ip2_last + 1;
+ }
+ }
+ ip = ip_last + 1;
+ }
+ return ret;
+}
+
+/* IPv6 variant */
+
+struct hash_netportnet6_elem {
+ union nf_inet_addr ip[2];
+ __be16 port;
+ union {
+ u8 cidr[2];
+ u16 ccmp;
+ };
+ u8 nomatch:1;
+ u8 proto;
+};
+
+/* Common functions */
+
+static inline bool
+hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
+ const struct hash_netportnet6_elem *ip2,
+ u32 *multi)
+{
+ return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) &&
+ ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) &&
+ ip1->ccmp == ip2->ccmp &&
+ ip1->port == ip2->port &&
+ ip1->proto == ip2->proto;
+}
+
+static inline int
+hash_netportnet6_do_data_match(const struct hash_netportnet6_elem *elem)
+{
+ return elem->nomatch ? -ENOTEMPTY : 1;
+}
+
+static inline void
+hash_netportnet6_data_set_flags(struct hash_netportnet6_elem *elem, u32 flags)
+{
+ elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
+}
+
+static inline void
+hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags)
+{
+ swap(*flags, elem->nomatch);
+}
+
+static inline void
+hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem,
+ struct hash_netportnet6_elem *orig)
+{
+ elem->ip[1] = orig->ip[1];
+}
+
+static inline void
+hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem,
+ u8 cidr, bool inner)
+{
+ if (inner) {
+ ip6_netmask(&elem->ip[1], cidr);
+ elem->cidr[1] = cidr;
+ } else {
+ ip6_netmask(&elem->ip[0], cidr);
+ elem->cidr[0] = cidr;
+ }
+}
+
+static bool
+hash_netportnet6_data_list(struct sk_buff *skb,
+ const struct hash_netportnet6_elem *data)
+{
+ u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
+
+ if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip[0].in6) ||
+ nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip[1].in6) ||
+ nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
+ nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) ||
+ nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) ||
+ nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
+ (flags &&
+ nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return 1;
+}
+
+static inline void
+hash_netportnet6_data_next(struct hash_netportnet4_elem *next,
+ const struct hash_netportnet6_elem *d)
+{
+ next->port = d->port;
+}
+
+#undef MTYPE
+#undef PF
+#undef HOST_MASK
+
+#define MTYPE hash_netportnet6
+#define PF 6
+#define HOST_MASK 128
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static int
+hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ const struct hash_netportnet *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_netportnet6_elem e = {
+ .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+ .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK),
+ };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+ if (adt == IPSET_TEST)
+ e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK;
+
+ if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
+ &e.port, &e.proto))
+ return -EINVAL;
+
+ ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6);
+ ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip[1].in6);
+ ip6_netmask(&e.ip[0], e.cidr[0]);
+ ip6_netmask(&e.ip[1], e.cidr[1]);
+
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+ const struct hash_netportnet *h = set->data;
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_netportnet6_elem e = { .cidr[0] = HOST_MASK,
+ .cidr[1] = HOST_MASK };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ u32 port, port_to;
+ bool with_ports = false;
+ int ret;
+
+ if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+ !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ return -IPSET_ERR_PROTOCOL;
+ if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
+ return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) ||
+ ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) ||
+ ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ if (tb[IPSET_ATTR_CIDR])
+ e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+ if (tb[IPSET_ATTR_CIDR2])
+ e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+
+ if (unlikely(!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] ||
+ e.cidr[1] > HOST_MASK))
+ return -IPSET_ERR_INVALID_CIDR;
+
+ ip6_netmask(&e.ip[0], e.cidr[0]);
+ ip6_netmask(&e.ip[1], e.cidr[1]);
+
+ if (tb[IPSET_ATTR_PORT])
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
+ else
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_PROTO]) {
+ e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+ with_ports = ip_set_proto_with_ports(e.proto);
+
+ if (e.proto == 0)
+ return -IPSET_ERR_INVALID_PROTO;
+ } else
+ return -IPSET_ERR_MISSING_PROTO;
+
+ if (!(with_ports || e.proto == IPPROTO_ICMPV6))
+ e.port = 0;
+
+ if (tb[IPSET_ATTR_CADT_FLAGS]) {
+ u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+ if (cadt_flags & IPSET_FLAG_NOMATCH)
+ flags |= (IPSET_FLAG_NOMATCH << 16);
+ }
+
+ if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
+ ret = adtfn(set, &e, &ext, &ext, flags);
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
+ ip_set_eexist(ret, flags) ? 0 : ret;
+ }
+
+ port = ntohs(e.port);
+ port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
+ if (port > port_to)
+ swap(port, port_to);
+
+ if (retried)
+ port = ntohs(h->next.port);
+ for (; port <= port_to; port++) {
+ e.port = htons(port);
+ ret = adtfn(set, &e, &ext, &ext, flags);
+
+ if (ret && !ip_set_eexist(ret, flags))
+ return ret;
+ else
+ ret = 0;
+ }
+ return ret;
+}
+
+static struct ip_set_type hash_netportnet_type __read_mostly = {
+ .name = "hash:net,port,net",
+ .protocol = IPSET_PROTOCOL,
+ .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2 |
+ IPSET_TYPE_NOMATCH,
+ .dimension = IPSET_DIM_THREE,
+ .family = NFPROTO_UNSPEC,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
+ .create = hash_netportnet_create,
+ .create_policy = {
+ [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
+ [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
+ [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ },
+ .adt_policy = {
+ [IPSET_ATTR_IP] = { .type = NLA_NESTED },
+ [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
+ [IPSET_ATTR_IP2] = { .type = NLA_NESTED },
+ [IPSET_ATTR_IP2_TO] = { .type = NLA_NESTED },
+ [IPSET_ATTR_PORT] = { .type = NLA_U16 },
+ [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
+ [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
+ [IPSET_ATTR_CIDR2] = { .type = NLA_U8 },
+ [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init
+hash_netportnet_init(void)
+{
+ return ip_set_type_register(&hash_netportnet_type);
+}
+
+static void __exit
+hash_netportnet_fini(void)
+{
+ ip_set_type_unregister(&hash_netportnet_type);
+}
+
+module_init(hash_netportnet_init);
+module_exit(hash_netportnet_fini);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 979b8c9..ec6f6d1 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -15,12 +15,13 @@
#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_list.h>
-#define REVISION_MIN 0
-#define REVISION_MAX 1 /* Counters support added */
+#define IPSET_TYPE_REV_MIN 0
+/* 1 Counters support added */
+#define IPSET_TYPE_REV_MAX 2 /* Comments support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX);
+IP_SET_MODULE_DESC("list:set", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_list:set");
/* Member elements */
@@ -28,28 +29,6 @@ struct set_elem {
ip_set_id_t id;
};
-struct sett_elem {
- struct {
- ip_set_id_t id;
- } __attribute__ ((aligned));
- unsigned long timeout;
-};
-
-struct setc_elem {
- struct {
- ip_set_id_t id;
- } __attribute__ ((aligned));
- struct ip_set_counter counter;
-};
-
-struct setct_elem {
- struct {
- ip_set_id_t id;
- } __attribute__ ((aligned));
- struct ip_set_counter counter;
- unsigned long timeout;
-};
-
struct set_adt_elem {
ip_set_id_t id;
ip_set_id_t refid;
@@ -58,24 +37,14 @@ struct set_adt_elem {
/* Type structure */
struct list_set {
- size_t dsize; /* element size */
- size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
u32 size; /* size of set list array */
- u32 timeout; /* timeout value */
struct timer_list gc; /* garbage collection */
+ struct net *net; /* namespace */
struct set_elem members[0]; /* the set members */
};
-static inline struct set_elem *
-list_set_elem(const struct list_set *map, u32 id)
-{
- return (struct set_elem *)((void *)map->members + id * map->dsize);
-}
-
-#define ext_timeout(e, m) \
-(unsigned long *)((void *)(e) + (m)->offset[IPSET_OFFSET_TIMEOUT])
-#define ext_counter(e, m) \
-(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_OFFSET_COUNTER])
+#define list_set_elem(set, map, id) \
+ (struct set_elem *)((void *)(map)->members + (id) * (set)->dsize)
static int
list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
@@ -92,16 +61,16 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
for (i = 0; i < map->size; i++) {
- e = list_set_elem(map, i);
+ e = list_set_elem(set, map, i);
if (e->id == IPSET_INVALID_ID)
return 0;
if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, map)))
+ ip_set_timeout_expired(ext_timeout(e, set)))
continue;
ret = ip_set_test(e->id, skb, par, opt);
if (ret > 0) {
if (SET_WITH_COUNTER(set))
- ip_set_update_counter(ext_counter(e, map),
+ ip_set_update_counter(ext_counter(e, set),
ext, &opt->ext,
cmdflags);
return ret;
@@ -121,11 +90,11 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
int ret;
for (i = 0; i < map->size; i++) {
- e = list_set_elem(map, i);
+ e = list_set_elem(set, map, i);
if (e->id == IPSET_INVALID_ID)
return 0;
if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, map)))
+ ip_set_timeout_expired(ext_timeout(e, set)))
continue;
ret = ip_set_add(e->id, skb, par, opt);
if (ret == 0)
@@ -145,11 +114,11 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
int ret;
for (i = 0; i < map->size; i++) {
- e = list_set_elem(map, i);
+ e = list_set_elem(set, map, i);
if (e->id == IPSET_INVALID_ID)
return 0;
if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, map)))
+ ip_set_timeout_expired(ext_timeout(e, set)))
continue;
ret = ip_set_del(e->id, skb, par, opt);
if (ret == 0)
@@ -163,8 +132,7 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
- struct list_set *map = set->data;
- struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
switch (adt) {
case IPSET_TEST:
@@ -188,10 +156,10 @@ id_eq(const struct ip_set *set, u32 i, ip_set_id_t id)
if (i >= map->size)
return 0;
- e = list_set_elem(map, i);
+ e = list_set_elem(set, map, i);
return !!(e->id == id &&
!(SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, map))));
+ ip_set_timeout_expired(ext_timeout(e, set))));
}
static int
@@ -199,28 +167,36 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
const struct ip_set_ext *ext)
{
struct list_set *map = set->data;
- struct set_elem *e = list_set_elem(map, i);
+ struct set_elem *e = list_set_elem(set, map, i);
if (e->id != IPSET_INVALID_ID) {
- if (i == map->size - 1)
+ if (i == map->size - 1) {
/* Last element replaced: e.g. add new,before,last */
- ip_set_put_byindex(e->id);
- else {
- struct set_elem *x = list_set_elem(map, map->size - 1);
+ ip_set_put_byindex(map->net, e->id);
+ ip_set_ext_destroy(set, e);
+ } else {
+ struct set_elem *x = list_set_elem(set, map,
+ map->size - 1);
/* Last element pushed off */
- if (x->id != IPSET_INVALID_ID)
- ip_set_put_byindex(x->id);
- memmove(list_set_elem(map, i + 1), e,
- map->dsize * (map->size - (i + 1)));
+ if (x->id != IPSET_INVALID_ID) {
+ ip_set_put_byindex(map->net, x->id);
+ ip_set_ext_destroy(set, x);
+ }
+ memmove(list_set_elem(set, map, i + 1), e,
+ set->dsize * (map->size - (i + 1)));
+ /* Extensions must be initialized to zero */
+ memset(e, 0, set->dsize);
}
}
e->id = d->id;
if (SET_WITH_TIMEOUT(set))
- ip_set_timeout_set(ext_timeout(e, map), ext->timeout);
+ ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
if (SET_WITH_COUNTER(set))
- ip_set_init_counter(ext_counter(e, map), ext);
+ ip_set_init_counter(ext_counter(e, set), ext);
+ if (SET_WITH_COMMENT(set))
+ ip_set_init_comment(ext_comment(e, set), ext);
return 0;
}
@@ -228,16 +204,17 @@ static int
list_set_del(struct ip_set *set, u32 i)
{
struct list_set *map = set->data;
- struct set_elem *e = list_set_elem(map, i);
+ struct set_elem *e = list_set_elem(set, map, i);
- ip_set_put_byindex(e->id);
+ ip_set_put_byindex(map->net, e->id);
+ ip_set_ext_destroy(set, e);
if (i < map->size - 1)
- memmove(e, list_set_elem(map, i + 1),
- map->dsize * (map->size - (i + 1)));
+ memmove(e, list_set_elem(set, map, i + 1),
+ set->dsize * (map->size - (i + 1)));
/* Last element */
- e = list_set_elem(map, map->size - 1);
+ e = list_set_elem(set, map, map->size - 1);
e->id = IPSET_INVALID_ID;
return 0;
}
@@ -247,13 +224,16 @@ set_cleanup_entries(struct ip_set *set)
{
struct list_set *map = set->data;
struct set_elem *e;
- u32 i;
+ u32 i = 0;
- for (i = 0; i < map->size; i++) {
- e = list_set_elem(map, i);
+ while (i < map->size) {
+ e = list_set_elem(set, map, i);
if (e->id != IPSET_INVALID_ID &&
- ip_set_timeout_expired(ext_timeout(e, map)))
+ ip_set_timeout_expired(ext_timeout(e, set)))
list_set_del(set, i);
+ /* Check element moved to position i in next loop */
+ else
+ i++;
}
}
@@ -268,11 +248,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
int ret;
for (i = 0; i < map->size; i++) {
- e = list_set_elem(map, i);
+ e = list_set_elem(set, map, i);
if (e->id == IPSET_INVALID_ID)
return 0;
else if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, map)))
+ ip_set_timeout_expired(ext_timeout(e, set)))
continue;
else if (e->id != d->id)
continue;
@@ -299,14 +279,14 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
bool flag_exist = flags & IPSET_FLAG_EXIST;
u32 i, ret = 0;
+ if (SET_WITH_TIMEOUT(set))
+ set_cleanup_entries(set);
+
/* Check already added element */
for (i = 0; i < map->size; i++) {
- e = list_set_elem(map, i);
+ e = list_set_elem(set, map, i);
if (e->id == IPSET_INVALID_ID)
goto insert;
- else if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, map)))
- continue;
else if (e->id != d->id)
continue;
@@ -319,18 +299,22 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
/* Can't re-add */
return -IPSET_ERR_EXIST;
/* Update extensions */
+ ip_set_ext_destroy(set, e);
+
if (SET_WITH_TIMEOUT(set))
- ip_set_timeout_set(ext_timeout(e, map), ext->timeout);
+ ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
if (SET_WITH_COUNTER(set))
- ip_set_init_counter(ext_counter(e, map), ext);
+ ip_set_init_counter(ext_counter(e, set), ext);
+ if (SET_WITH_COMMENT(set))
+ ip_set_init_comment(ext_comment(e, set), ext);
/* Set is already added to the list */
- ip_set_put_byindex(d->id);
+ ip_set_put_byindex(map->net, d->id);
return 0;
}
insert:
ret = -IPSET_ERR_LIST_FULL;
for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) {
- e = list_set_elem(map, i);
+ e = list_set_elem(set, map, i);
if (e->id == IPSET_INVALID_ID)
ret = d->before != 0 ? -IPSET_ERR_REF_EXIST
: list_set_add(set, i, d, ext);
@@ -355,12 +339,12 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
u32 i;
for (i = 0; i < map->size; i++) {
- e = list_set_elem(map, i);
+ e = list_set_elem(set, map, i);
if (e->id == IPSET_INVALID_ID)
return d->before != 0 ? -IPSET_ERR_REF_EXIST
: -IPSET_ERR_EXIST;
else if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, map)))
+ ip_set_timeout_expired(ext_timeout(e, set)))
continue;
else if (e->id != d->id)
continue;
@@ -386,7 +370,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
struct list_set *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct set_adt_elem e = { .refid = IPSET_INVALID_ID };
- struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
struct ip_set *s;
int ret = 0;
@@ -403,7 +387,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
- e.id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s);
+ e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s);
if (e.id == IPSET_INVALID_ID)
return -IPSET_ERR_NAME;
/* "Loop detection" */
@@ -423,7 +407,8 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
}
if (tb[IPSET_ATTR_NAMEREF]) {
- e.refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]),
+ e.refid = ip_set_get_byname(map->net,
+ nla_data(tb[IPSET_ATTR_NAMEREF]),
&s);
if (e.refid == IPSET_INVALID_ID) {
ret = -IPSET_ERR_NAMEREF;
@@ -439,9 +424,9 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
finish:
if (e.refid != IPSET_INVALID_ID)
- ip_set_put_byindex(e.refid);
+ ip_set_put_byindex(map->net, e.refid);
if (adt != IPSET_ADD || ret)
- ip_set_put_byindex(e.id);
+ ip_set_put_byindex(map->net, e.id);
return ip_set_eexist(ret, flags) ? 0 : ret;
}
@@ -454,9 +439,10 @@ list_set_flush(struct ip_set *set)
u32 i;
for (i = 0; i < map->size; i++) {
- e = list_set_elem(map, i);
+ e = list_set_elem(set, map, i);
if (e->id != IPSET_INVALID_ID) {
- ip_set_put_byindex(e->id);
+ ip_set_put_byindex(map->net, e->id);
+ ip_set_ext_destroy(set, e);
e->id = IPSET_INVALID_ID;
}
}
@@ -485,14 +471,11 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
if (!nested)
goto nla_put_failure;
if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
- (SET_WITH_TIMEOUT(set) &&
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) ||
- (SET_WITH_COUNTER(set) &&
- nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
- htonl(IPSET_FLAG_WITH_COUNTERS))) ||
nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
- htonl(sizeof(*map) + map->size * map->dsize)))
+ htonl(sizeof(*map) + map->size * set->dsize)))
+ goto nla_put_failure;
+ if (unlikely(ip_set_put_flags(skb, set)))
goto nla_put_failure;
ipset_nest_end(skb, nested);
@@ -515,11 +498,11 @@ list_set_list(const struct ip_set *set,
return -EMSGSIZE;
for (; cb->args[2] < map->size; cb->args[2]++) {
i = cb->args[2];
- e = list_set_elem(map, i);
+ e = list_set_elem(set, map, i);
if (e->id == IPSET_INVALID_ID)
goto finish;
if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, map)))
+ ip_set_timeout_expired(ext_timeout(e, set)))
continue;
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested) {
@@ -530,15 +513,9 @@ list_set_list(const struct ip_set *set,
goto nla_put_failure;
}
if (nla_put_string(skb, IPSET_ATTR_NAME,
- ip_set_name_byindex(e->id)))
- goto nla_put_failure;
- if (SET_WITH_TIMEOUT(set) &&
- nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
- htonl(ip_set_timeout_get(
- ext_timeout(e, map)))))
+ ip_set_name_byindex(map->net, e->id)))
goto nla_put_failure;
- if (SET_WITH_COUNTER(set) &&
- ip_set_put_counter(skb, ext_counter(e, map)))
+ if (ip_set_put_extensions(skb, set, e, true))
goto nla_put_failure;
ipset_nest_end(skb, nested);
}
@@ -550,11 +527,11 @@ finish:
nla_put_failure:
nla_nest_cancel(skb, nested);
- ipset_nest_end(skb, atd);
if (unlikely(i == first)) {
cb->args[2] = 0;
return -EMSGSIZE;
}
+ ipset_nest_end(skb, atd);
return 0;
}
@@ -565,7 +542,7 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
const struct list_set *y = b->data;
return x->size == y->size &&
- x->timeout == y->timeout &&
+ a->timeout == b->timeout &&
a->extensions == b->extensions;
}
@@ -594,7 +571,7 @@ list_set_gc(unsigned long ul_set)
set_cleanup_entries(set);
write_unlock_bh(&set->lock);
- map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+ map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&map->gc);
}
@@ -606,43 +583,40 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
init_timer(&map->gc);
map->gc.data = (unsigned long) set;
map->gc.function = gc;
- map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
+ map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&map->gc);
}
/* Create list:set type of sets */
-static struct list_set *
-init_list_set(struct ip_set *set, u32 size, size_t dsize,
- unsigned long timeout)
+static bool
+init_list_set(struct net *net, struct ip_set *set, u32 size)
{
struct list_set *map;
struct set_elem *e;
u32 i;
- map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL);
+ map = kzalloc(sizeof(*map) + size * set->dsize, GFP_KERNEL);
if (!map)
- return NULL;
+ return false;
map->size = size;
- map->dsize = dsize;
- map->timeout = timeout;
+ map->net = net;
set->data = map;
for (i = 0; i < size; i++) {
- e = list_set_elem(map, i);
+ e = list_set_elem(set, map, i);
e->id = IPSET_INVALID_ID;
}
- return map;
+ return true;
}
static int
-list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
+list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
+ u32 flags)
{
- struct list_set *map;
- u32 size = IP_SET_LIST_DEFAULT_SIZE, cadt_flags = 0;
- unsigned long timeout = 0;
+ u32 size = IP_SET_LIST_DEFAULT_SIZE;
if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
@@ -654,45 +628,13 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
if (size < IP_SET_LIST_MIN_SIZE)
size = IP_SET_LIST_MIN_SIZE;
- if (tb[IPSET_ATTR_CADT_FLAGS])
- cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
- if (tb[IPSET_ATTR_TIMEOUT])
- timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
set->variant = &set_variant;
- if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
- set->extensions |= IPSET_EXT_COUNTER;
- if (tb[IPSET_ATTR_TIMEOUT]) {
- map = init_list_set(set, size,
- sizeof(struct setct_elem), timeout);
- if (!map)
- return -ENOMEM;
- set->extensions |= IPSET_EXT_TIMEOUT;
- map->offset[IPSET_OFFSET_TIMEOUT] =
- offsetof(struct setct_elem, timeout);
- map->offset[IPSET_OFFSET_COUNTER] =
- offsetof(struct setct_elem, counter);
- list_set_gc_init(set, list_set_gc);
- } else {
- map = init_list_set(set, size,
- sizeof(struct setc_elem), 0);
- if (!map)
- return -ENOMEM;
- map->offset[IPSET_OFFSET_COUNTER] =
- offsetof(struct setc_elem, counter);
- }
- } else if (tb[IPSET_ATTR_TIMEOUT]) {
- map = init_list_set(set, size,
- sizeof(struct sett_elem), timeout);
- if (!map)
- return -ENOMEM;
- set->extensions |= IPSET_EXT_TIMEOUT;
- map->offset[IPSET_OFFSET_TIMEOUT] =
- offsetof(struct sett_elem, timeout);
+ set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem));
+ if (!init_list_set(net, set, size))
+ return -ENOMEM;
+ if (tb[IPSET_ATTR_TIMEOUT]) {
+ set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
list_set_gc_init(set, list_set_gc);
- } else {
- map = init_list_set(set, size, sizeof(struct set_elem), 0);
- if (!map)
- return -ENOMEM;
}
return 0;
}
@@ -703,8 +645,8 @@ static struct ip_set_type list_set_type __read_mostly = {
.features = IPSET_TYPE_NAME | IPSET_DUMP_LAST,
.dimension = IPSET_DIM_ONE,
.family = NFPROTO_UNSPEC,
- .revision_min = REVISION_MIN,
- .revision_max = REVISION_MAX,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
.create = list_set_create,
.create_policy = {
[IPSET_ATTR_SIZE] = { .type = NLA_U32 },
@@ -721,6 +663,7 @@ static struct ip_set_type list_set_type __read_mostly = {
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4f69e83..74fd00c 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -116,6 +116,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
struct ip_vs_cpu_stats *s;
+ struct ip_vs_service *svc;
s = this_cpu_ptr(dest->stats.cpustats);
s->ustats.inpkts++;
@@ -123,11 +124,14 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->ustats.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
- s = this_cpu_ptr(dest->svc->stats.cpustats);
+ rcu_read_lock();
+ svc = rcu_dereference(dest->svc);
+ s = this_cpu_ptr(svc->stats.cpustats);
s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
+ rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
s->ustats.inpkts++;
@@ -146,6 +150,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
struct ip_vs_cpu_stats *s;
+ struct ip_vs_service *svc;
s = this_cpu_ptr(dest->stats.cpustats);
s->ustats.outpkts++;
@@ -153,11 +158,14 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->ustats.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
- s = this_cpu_ptr(dest->svc->stats.cpustats);
+ rcu_read_lock();
+ svc = rcu_dereference(dest->svc);
+ s = this_cpu_ptr(svc->stats.cpustats);
s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
+ rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
s->ustats.outpkts++;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c8148e4..a3df9bd 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -460,7 +460,7 @@ static inline void
__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
{
atomic_inc(&svc->refcnt);
- dest->svc = svc;
+ rcu_assign_pointer(dest->svc, svc);
}
static void ip_vs_service_free(struct ip_vs_service *svc)
@@ -470,18 +470,25 @@ static void ip_vs_service_free(struct ip_vs_service *svc)
kfree(svc);
}
-static void
-__ip_vs_unbind_svc(struct ip_vs_dest *dest)
+static void ip_vs_service_rcu_free(struct rcu_head *head)
{
- struct ip_vs_service *svc = dest->svc;
+ struct ip_vs_service *svc;
+
+ svc = container_of(head, struct ip_vs_service, rcu_head);
+ ip_vs_service_free(svc);
+}
- dest->svc = NULL;
+static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay)
+{
if (atomic_dec_and_test(&svc->refcnt)) {
IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
svc->fwmark,
IP_VS_DBG_ADDR(svc->af, &svc->addr),
ntohs(svc->port));
- ip_vs_service_free(svc);
+ if (do_delay)
+ call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
+ else
+ ip_vs_service_free(svc);
}
}
@@ -667,11 +674,6 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
IP_VS_DBG_ADDR(svc->af, &dest->addr),
ntohs(dest->port),
atomic_read(&dest->refcnt));
- /* We can not reuse dest while in grace period
- * because conns still can use dest->svc
- */
- if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
- continue;
if (dest->af == svc->af &&
ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
dest->port == dport &&
@@ -697,8 +699,10 @@ out:
static void ip_vs_dest_free(struct ip_vs_dest *dest)
{
+ struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
+
__ip_vs_dst_cache_reset(dest);
- __ip_vs_unbind_svc(dest);
+ __ip_vs_svc_put(svc, false);
free_percpu(dest->stats.cpustats);
kfree(dest);
}
@@ -771,6 +775,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
struct ip_vs_dest_user_kern *udest, int add)
{
struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct ip_vs_service *old_svc;
struct ip_vs_scheduler *sched;
int conn_flags;
@@ -792,13 +797,14 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
atomic_set(&dest->conn_flags, conn_flags);
/* bind the service */
- if (!dest->svc) {
+ old_svc = rcu_dereference_protected(dest->svc, 1);
+ if (!old_svc) {
__ip_vs_bind_svc(dest, svc);
} else {
- if (dest->svc != svc) {
- __ip_vs_unbind_svc(dest);
+ if (old_svc != svc) {
ip_vs_zero_stats(&dest->stats);
__ip_vs_bind_svc(dest, svc);
+ __ip_vs_svc_put(old_svc, true);
}
}
@@ -998,16 +1004,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
return 0;
}
-static void ip_vs_dest_wait_readers(struct rcu_head *head)
-{
- struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest,
- rcu_head);
-
- /* End of grace period after unlinking */
- clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
-}
-
-
/*
* Delete a destination (must be already unlinked from the service)
*/
@@ -1023,20 +1019,16 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
*/
ip_vs_rs_unhash(dest);
- if (!cleanup) {
- set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
- call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers);
- }
-
spin_lock_bh(&ipvs->dest_trash_lock);
IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->refcnt));
if (list_empty(&ipvs->dest_trash) && !cleanup)
mod_timer(&ipvs->dest_trash_timer,
- jiffies + IP_VS_DEST_TRASH_PERIOD);
+ jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
/* dest lives in trash without reference */
list_add(&dest->t_list, &ipvs->dest_trash);
+ dest->idle_start = 0;
spin_unlock_bh(&ipvs->dest_trash_lock);
ip_vs_dest_put(dest);
}
@@ -1108,24 +1100,30 @@ static void ip_vs_dest_trash_expire(unsigned long data)
struct net *net = (struct net *) data;
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_dest *dest, *next;
+ unsigned long now = jiffies;
spin_lock(&ipvs->dest_trash_lock);
list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
- /* Skip if dest is in grace period */
- if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
- continue;
if (atomic_read(&dest->refcnt) > 0)
continue;
+ if (dest->idle_start) {
+ if (time_before(now, dest->idle_start +
+ IP_VS_DEST_TRASH_PERIOD))
+ continue;
+ } else {
+ dest->idle_start = max(1UL, now);
+ continue;
+ }
IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
dest->vfwmark,
- IP_VS_DBG_ADDR(dest->svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port));
list_del(&dest->t_list);
ip_vs_dest_free(dest);
}
if (!list_empty(&ipvs->dest_trash))
mod_timer(&ipvs->dest_trash_timer,
- jiffies + IP_VS_DEST_TRASH_PERIOD);
+ jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
spin_unlock(&ipvs->dest_trash_lock);
}
@@ -1320,14 +1318,6 @@ out:
return ret;
}
-static void ip_vs_service_rcu_free(struct rcu_head *head)
-{
- struct ip_vs_service *svc;
-
- svc = container_of(head, struct ip_vs_service, rcu_head);
- ip_vs_service_free(svc);
-}
-
/*
* Delete a service from the service list
* - The service must be unlinked, unlocked and not referenced!
@@ -1376,13 +1366,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
/*
* Free the service if nobody refers to it
*/
- if (atomic_dec_and_test(&svc->refcnt)) {
- IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
- svc->fwmark,
- IP_VS_DBG_ADDR(svc->af, &svc->addr),
- ntohs(svc->port));
- call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
- }
+ __ip_vs_svc_put(svc, true);
/* decrease the module use count */
ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 6bee6d0..1425e9a 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -59,12 +59,13 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
struct ip_vs_cpu_stats __percpu *stats)
{
int i;
+ bool add = false;
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
unsigned int start;
__u64 inbytes, outbytes;
- if (i) {
+ if (add) {
sum->conns += s->ustats.conns;
sum->inpkts += s->ustats.inpkts;
sum->outpkts += s->ustats.outpkts;
@@ -76,6 +77,7 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
sum->inbytes += inbytes;
sum->outbytes += outbytes;
} else {
+ add = true;
sum->conns = s->ustats.conns;
sum->inpkts = s->ustats.inpkts;
sum->outpkts = s->ustats.outpkts;
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 1383b0e..eff13c9 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -93,7 +93,7 @@ struct ip_vs_lblc_entry {
struct hlist_node list;
int af; /* address family */
union nf_inet_addr addr; /* destination IP address */
- struct ip_vs_dest __rcu *dest; /* real server (cache) */
+ struct ip_vs_dest *dest; /* real server (cache) */
unsigned long lastuse; /* last used time */
struct rcu_head rcu_head;
};
@@ -130,20 +130,21 @@ static struct ctl_table vs_vars_table[] = {
};
#endif
-static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
+static void ip_vs_lblc_rcu_free(struct rcu_head *head)
{
- struct ip_vs_dest *dest;
+ struct ip_vs_lblc_entry *en = container_of(head,
+ struct ip_vs_lblc_entry,
+ rcu_head);
- hlist_del_rcu(&en->list);
- /*
- * We don't kfree dest because it is referred either by its service
- * or the trash dest list.
- */
- dest = rcu_dereference_protected(en->dest, 1);
- ip_vs_dest_put(dest);
- kfree_rcu(en, rcu_head);
+ ip_vs_dest_put(en->dest);
+ kfree(en);
}
+static inline void ip_vs_lblc_del(struct ip_vs_lblc_entry *en)
+{
+ hlist_del_rcu(&en->list);
+ call_rcu(&en->rcu_head, ip_vs_lblc_rcu_free);
+}
/*
* Returns hash value for IPVS LBLC entry
@@ -203,30 +204,23 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
struct ip_vs_lblc_entry *en;
en = ip_vs_lblc_get(dest->af, tbl, daddr);
- if (!en) {
- en = kmalloc(sizeof(*en), GFP_ATOMIC);
- if (!en)
- return NULL;
-
- en->af = dest->af;
- ip_vs_addr_copy(dest->af, &en->addr, daddr);
- en->lastuse = jiffies;
+ if (en) {
+ if (en->dest == dest)
+ return en;
+ ip_vs_lblc_del(en);
+ }
+ en = kmalloc(sizeof(*en), GFP_ATOMIC);
+ if (!en)
+ return NULL;
- ip_vs_dest_hold(dest);
- RCU_INIT_POINTER(en->dest, dest);
+ en->af = dest->af;
+ ip_vs_addr_copy(dest->af, &en->addr, daddr);
+ en->lastuse = jiffies;
- ip_vs_lblc_hash(tbl, en);
- } else {
- struct ip_vs_dest *old_dest;
+ ip_vs_dest_hold(dest);
+ en->dest = dest;
- old_dest = rcu_dereference_protected(en->dest, 1);
- if (old_dest != dest) {
- ip_vs_dest_put(old_dest);
- ip_vs_dest_hold(dest);
- /* No ordering constraints for refcnt */
- RCU_INIT_POINTER(en->dest, dest);
- }
- }
+ ip_vs_lblc_hash(tbl, en);
return en;
}
@@ -246,7 +240,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
tbl->dead = 1;
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
- ip_vs_lblc_free(en);
+ ip_vs_lblc_del(en);
atomic_dec(&tbl->entries);
}
}
@@ -281,7 +275,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
sysctl_lblc_expiration(svc)))
continue;
- ip_vs_lblc_free(en);
+ ip_vs_lblc_del(en);
atomic_dec(&tbl->entries);
}
spin_unlock(&svc->sched_lock);
@@ -335,7 +329,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
continue;
- ip_vs_lblc_free(en);
+ ip_vs_lblc_del(en);
atomic_dec(&tbl->entries);
goal--;
}
@@ -443,8 +437,8 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
continue;
doh = ip_vs_dest_conn_overhead(dest);
- if (loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight)) {
+ if ((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight)) {
least = dest;
loh = doh;
}
@@ -511,7 +505,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
* free up entries from the trash at any time.
*/
- dest = rcu_dereference(en->dest);
+ dest = en->dest;
if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
goto out;
@@ -631,7 +625,7 @@ static void __exit ip_vs_lblc_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
unregister_pernet_subsys(&ip_vs_lblc_ops);
- synchronize_rcu();
+ rcu_barrier();
}
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 5199448..0b85500 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -89,7 +89,7 @@
*/
struct ip_vs_dest_set_elem {
struct list_head list; /* list link */
- struct ip_vs_dest __rcu *dest; /* destination server */
+ struct ip_vs_dest *dest; /* destination server */
struct rcu_head rcu_head;
};
@@ -107,11 +107,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
if (check) {
list_for_each_entry(e, &set->list, list) {
- struct ip_vs_dest *d;
-
- d = rcu_dereference_protected(e->dest, 1);
- if (d == dest)
- /* already existed */
+ if (e->dest == dest)
return;
}
}
@@ -121,7 +117,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
return;
ip_vs_dest_hold(dest);
- RCU_INIT_POINTER(e->dest, dest);
+ e->dest = dest;
list_add_rcu(&e->list, &set->list);
atomic_inc(&set->size);
@@ -129,22 +125,27 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
set->lastmod = jiffies;
}
+static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head)
+{
+ struct ip_vs_dest_set_elem *e;
+
+ e = container_of(head, struct ip_vs_dest_set_elem, rcu_head);
+ ip_vs_dest_put(e->dest);
+ kfree(e);
+}
+
static void
ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
{
struct ip_vs_dest_set_elem *e;
list_for_each_entry(e, &set->list, list) {
- struct ip_vs_dest *d;
-
- d = rcu_dereference_protected(e->dest, 1);
- if (d == dest) {
+ if (e->dest == dest) {
/* HIT */
atomic_dec(&set->size);
set->lastmod = jiffies;
- ip_vs_dest_put(dest);
list_del_rcu(&e->list);
- kfree_rcu(e, rcu_head);
+ call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
break;
}
}
@@ -155,16 +156,8 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
struct ip_vs_dest_set_elem *e, *ep;
list_for_each_entry_safe(e, ep, &set->list, list) {
- struct ip_vs_dest *d;
-
- d = rcu_dereference_protected(e->dest, 1);
- /*
- * We don't kfree dest because it is referred either
- * by its service or by the trash dest list.
- */
- ip_vs_dest_put(d);
list_del_rcu(&e->list);
- kfree_rcu(e, rcu_head);
+ call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
}
}
@@ -175,12 +168,9 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
struct ip_vs_dest *dest, *least;
int loh, doh;
- if (set == NULL)
- return NULL;
-
/* select the first destination server, whose weight > 0 */
list_for_each_entry_rcu(e, &set->list, list) {
- least = rcu_dereference(e->dest);
+ least = e->dest;
if (least->flags & IP_VS_DEST_F_OVERLOAD)
continue;
@@ -195,13 +185,13 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
/* find the destination with the weighted least load */
nextstage:
list_for_each_entry_continue_rcu(e, &set->list, list) {
- dest = rcu_dereference(e->dest);
+ dest = e->dest;
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
doh = ip_vs_dest_conn_overhead(dest);
- if ((loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight))
+ if (((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight))
&& (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
least = dest;
loh = doh;
@@ -232,7 +222,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
/* select the first destination server, whose weight > 0 */
list_for_each_entry(e, &set->list, list) {
- most = rcu_dereference_protected(e->dest, 1);
+ most = e->dest;
if (atomic_read(&most->weight) > 0) {
moh = ip_vs_dest_conn_overhead(most);
goto nextstage;
@@ -243,11 +233,11 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
/* find the destination with the weighted most load */
nextstage:
list_for_each_entry_continue(e, &set->list, list) {
- dest = rcu_dereference_protected(e->dest, 1);
+ dest = e->dest;
doh = ip_vs_dest_conn_overhead(dest);
/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
- if ((moh * atomic_read(&dest->weight) <
- doh * atomic_read(&most->weight))
+ if (((__s64)moh * atomic_read(&dest->weight) <
+ (__s64)doh * atomic_read(&most->weight))
&& (atomic_read(&dest->weight) > 0)) {
most = dest;
moh = doh;
@@ -611,8 +601,8 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
continue;
doh = ip_vs_dest_conn_overhead(dest);
- if (loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight)) {
+ if ((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight)) {
least = dest;
loh = doh;
}
@@ -819,7 +809,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
unregister_pernet_subsys(&ip_vs_lblcr_ops);
- synchronize_rcu();
+ rcu_barrier();
}
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index d8d9860..961a6de 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -40,7 +40,7 @@
#include <net/ip_vs.h>
-static inline unsigned int
+static inline int
ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
{
/*
@@ -59,7 +59,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_iphdr *iph)
{
struct ip_vs_dest *dest, *least = NULL;
- unsigned int loh = 0, doh;
+ int loh = 0, doh;
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
@@ -92,8 +92,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
}
if (!least ||
- (loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight))) {
+ ((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight))) {
least = dest;
loh = doh;
}
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index a5284cc..e446b9f 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -44,7 +44,7 @@
#include <net/ip_vs.h>
-static inline unsigned int
+static inline int
ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
{
/*
@@ -63,7 +63,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_iphdr *iph)
{
struct ip_vs_dest *dest, *least;
- unsigned int loh, doh;
+ int loh, doh;
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
@@ -99,8 +99,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
doh = ip_vs_sed_dest_overhead(dest);
- if (loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight)) {
+ if ((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight)) {
least = dest;
loh = doh;
}
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 6dc1fa1..b5b4650 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -35,7 +35,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_iphdr *iph)
{
struct ip_vs_dest *dest, *least;
- unsigned int loh, doh;
+ int loh, doh;
IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
@@ -71,8 +71,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
doh = ip_vs_dest_conn_overhead(dest);
- if (loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight)) {
+ if ((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight)) {
least = dest;
loh = doh;
}
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index e0c4373..466410e 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -52,66 +52,8 @@ module_param(sip_direct_media, int, 0600);
MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling "
"endpoints only (default 1)");
-unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int protoff,
- unsigned int dataoff, const char **dptr,
- unsigned int *datalen) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
-
-void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, unsigned int protoff,
- s16 off) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook);
-
-unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
- unsigned int protoff,
- unsigned int dataoff,
- const char **dptr,
- unsigned int *datalen,
- struct nf_conntrack_expect *exp,
- unsigned int matchoff,
- unsigned int matchlen) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook);
-
-unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int protoff,
- unsigned int dataoff,
- const char **dptr,
- unsigned int *datalen,
- unsigned int sdpoff,
- enum sdp_header_types type,
- enum sdp_header_types term,
- const union nf_inet_addr *addr)
- __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook);
-
-unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int protoff,
- unsigned int dataoff,
- const char **dptr,
- unsigned int *datalen,
- unsigned int matchoff,
- unsigned int matchlen,
- u_int16_t port) __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook);
-
-unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
- unsigned int protoff,
- unsigned int dataoff,
- const char **dptr,
- unsigned int *datalen,
- unsigned int sdpoff,
- const union nf_inet_addr *addr)
- __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook);
-
-unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int protoff,
- unsigned int dataoff,
- const char **dptr,
- unsigned int *datalen,
- struct nf_conntrack_expect *rtp_exp,
- struct nf_conntrack_expect *rtcp_exp,
- unsigned int mediaoff,
- unsigned int medialen,
- union nf_inet_addr *rtp_addr)
- __read_mostly;
-EXPORT_SYMBOL_GPL(nf_nat_sdp_media_hook);
+const struct nf_nat_sip_hooks *nf_nat_sip_hooks;
+EXPORT_SYMBOL_GPL(nf_nat_sip_hooks);
static int string_len(const struct nf_conn *ct, const char *dptr,
const char *limit, int *shift)
@@ -914,8 +856,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
int direct_rtp = 0, skip_expect = 0, ret = NF_DROP;
u_int16_t base_port;
__be16 rtp_port, rtcp_port;
- typeof(nf_nat_sdp_port_hook) nf_nat_sdp_port;
- typeof(nf_nat_sdp_media_hook) nf_nat_sdp_media;
+ const struct nf_nat_sip_hooks *hooks;
saddr = NULL;
if (sip_direct_media) {
@@ -966,22 +907,23 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
#endif
skip_expect = 1;
} while (!skip_expect);
- rcu_read_unlock();
base_port = ntohs(tuple.dst.u.udp.port) & ~1;
rtp_port = htons(base_port);
rtcp_port = htons(base_port + 1);
if (direct_rtp) {
- nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook);
- if (nf_nat_sdp_port &&
- !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen,
+ hooks = rcu_dereference(nf_nat_sip_hooks);
+ if (hooks &&
+ !hooks->sdp_port(skb, protoff, dataoff, dptr, datalen,
mediaoff, medialen, ntohs(rtp_port)))
goto err1;
}
- if (skip_expect)
+ if (skip_expect) {
+ rcu_read_unlock();
return NF_ACCEPT;
+ }
rtp_exp = nf_ct_expect_alloc(ct);
if (rtp_exp == NULL)
@@ -995,10 +937,10 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
nf_ct_expect_init(rtcp_exp, class, nf_ct_l3num(ct), saddr, daddr,
IPPROTO_UDP, NULL, &rtcp_port);
- nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook);
- if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp)
- ret = nf_nat_sdp_media(skb, protoff, dataoff, dptr, datalen,
- rtp_exp, rtcp_exp,
+ hooks = rcu_dereference(nf_nat_sip_hooks);
+ if (hooks && ct->status & IPS_NAT_MASK && !direct_rtp)
+ ret = hooks->sdp_media(skb, protoff, dataoff, dptr,
+ datalen, rtp_exp, rtcp_exp,
mediaoff, medialen, daddr);
else {
if (nf_ct_expect_related(rtp_exp) == 0) {
@@ -1012,6 +954,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
err2:
nf_ct_expect_put(rtp_exp);
err1:
+ rcu_read_unlock();
return ret;
}
@@ -1051,13 +994,12 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
unsigned int caddr_len, maddr_len;
unsigned int i;
union nf_inet_addr caddr, maddr, rtp_addr;
+ const struct nf_nat_sip_hooks *hooks;
unsigned int port;
const struct sdp_media_type *t;
int ret = NF_ACCEPT;
- typeof(nf_nat_sdp_addr_hook) nf_nat_sdp_addr;
- typeof(nf_nat_sdp_session_hook) nf_nat_sdp_session;
- nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook);
+ hooks = rcu_dereference(nf_nat_sip_hooks);
/* Find beginning of session description */
if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
@@ -1125,10 +1067,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
}
/* Update media connection address if present */
- if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) {
- ret = nf_nat_sdp_addr(skb, protoff, dataoff,
+ if (maddr_len && hooks && ct->status & IPS_NAT_MASK) {
+ ret = hooks->sdp_addr(skb, protoff, dataoff,
dptr, datalen, mediaoff,
- SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
+ SDP_HDR_CONNECTION,
+ SDP_HDR_MEDIA,
&rtp_addr);
if (ret != NF_ACCEPT) {
nf_ct_helper_log(skb, ct, "cannot mangle SDP");
@@ -1139,10 +1082,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
}
/* Update session connection and owner addresses */
- nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook);
- if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK)
- ret = nf_nat_sdp_session(skb, protoff, dataoff,
- dptr, datalen, sdpoff, &rtp_addr);
+ hooks = rcu_dereference(nf_nat_sip_hooks);
+ if (hooks && ct->status & IPS_NAT_MASK)
+ ret = hooks->sdp_session(skb, protoff, dataoff,
+ dptr, datalen, sdpoff,
+ &rtp_addr);
return ret;
}
@@ -1242,11 +1186,11 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
unsigned int matchoff, matchlen;
struct nf_conntrack_expect *exp;
union nf_inet_addr *saddr, daddr;
+ const struct nf_nat_sip_hooks *hooks;
__be16 port;
u8 proto;
unsigned int expires = 0;
int ret;
- typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect;
/* Expected connections can not register again. */
if (ct->status & IPS_EXPECTED)
@@ -1309,10 +1253,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
exp->helper = nfct_help(ct)->helper;
exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
- nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook);
- if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK)
- ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen,
- exp, matchoff, matchlen);
+ hooks = rcu_dereference(nf_nat_sip_hooks);
+ if (hooks && ct->status & IPS_NAT_MASK)
+ ret = hooks->expect(skb, protoff, dataoff, dptr, datalen,
+ exp, matchoff, matchlen);
else {
if (nf_ct_expect_related(exp) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
@@ -1515,7 +1459,7 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
unsigned int protoff, unsigned int dataoff,
const char **dptr, unsigned int *datalen)
{
- typeof(nf_nat_sip_hook) nf_nat_sip;
+ const struct nf_nat_sip_hooks *hooks;
int ret;
if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
@@ -1524,9 +1468,9 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
ret = process_sip_response(skb, protoff, dataoff, dptr, datalen);
if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
- nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
- if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff,
- dptr, datalen)) {
+ hooks = rcu_dereference(nf_nat_sip_hooks);
+ if (hooks && !hooks->msg(skb, protoff, dataoff,
+ dptr, datalen)) {
nf_ct_helper_log(skb, ct, "cannot NAT SIP message");
ret = NF_DROP;
}
@@ -1546,7 +1490,6 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
s16 diff, tdiff = 0;
int ret = NF_ACCEPT;
bool term;
- typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust;
if (ctinfo != IP_CT_ESTABLISHED &&
ctinfo != IP_CT_ESTABLISHED_REPLY)
@@ -1610,9 +1553,11 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
}
if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
- nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook);
- if (nf_nat_sip_seq_adjust)
- nf_nat_sip_seq_adjust(skb, protoff, tdiff);
+ const struct nf_nat_sip_hooks *hooks;
+
+ hooks = rcu_dereference(nf_nat_sip_hooks);
+ if (hooks)
+ hooks->seq_adjust(skb, protoff, tdiff);
}
return ret;
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index f979040..b4d691d 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -625,33 +625,26 @@ static struct nf_ct_helper_expectfn sip_nat = {
static void __exit nf_nat_sip_fini(void)
{
- RCU_INIT_POINTER(nf_nat_sip_hook, NULL);
- RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL);
- RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL);
- RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL);
- RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL);
- RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL);
- RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_sip_hooks, NULL);
+
nf_ct_helper_expectfn_unregister(&sip_nat);
synchronize_rcu();
}
+static const struct nf_nat_sip_hooks sip_hooks = {
+ .msg = nf_nat_sip,
+ .seq_adjust = nf_nat_sip_seq_adjust,
+ .expect = nf_nat_sip_expect,
+ .sdp_addr = nf_nat_sdp_addr,
+ .sdp_port = nf_nat_sdp_port,
+ .sdp_session = nf_nat_sdp_session,
+ .sdp_media = nf_nat_sdp_media,
+};
+
static int __init nf_nat_sip_init(void)
{
- BUG_ON(nf_nat_sip_hook != NULL);
- BUG_ON(nf_nat_sip_seq_adjust_hook != NULL);
- BUG_ON(nf_nat_sip_expect_hook != NULL);
- BUG_ON(nf_nat_sdp_addr_hook != NULL);
- BUG_ON(nf_nat_sdp_port_hook != NULL);
- BUG_ON(nf_nat_sdp_session_hook != NULL);
- BUG_ON(nf_nat_sdp_media_hook != NULL);
- RCU_INIT_POINTER(nf_nat_sip_hook, nf_nat_sip);
- RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, nf_nat_sip_seq_adjust);
- RCU_INIT_POINTER(nf_nat_sip_expect_hook, nf_nat_sip_expect);
- RCU_INIT_POINTER(nf_nat_sdp_addr_hook, nf_nat_sdp_addr);
- RCU_INIT_POINTER(nf_nat_sdp_port_hook, nf_nat_sdp_port);
- RCU_INIT_POINTER(nf_nat_sdp_session_hook, nf_nat_sdp_session);
- RCU_INIT_POINTER(nf_nat_sdp_media_hook, nf_nat_sdp_media);
+ BUG_ON(nf_nat_sip_hooks != NULL);
+ RCU_INIT_POINTER(nf_nat_sip_hooks, &sip_hooks);
nf_ct_helper_expectfn_register(&sip_nat);
return 0;
}
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 6fd967c..cdf4567 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -24,7 +24,7 @@
int synproxy_net_id;
EXPORT_SYMBOL_GPL(synproxy_net_id);
-void
+bool
synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
const struct tcphdr *th, struct synproxy_options *opts)
{
@@ -32,7 +32,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
u8 buf[40], *ptr;
ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
- BUG_ON(ptr == NULL);
+ if (ptr == NULL)
+ return false;
opts->options = 0;
while (length > 0) {
@@ -41,16 +42,16 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
switch (opcode) {
case TCPOPT_EOL:
- return;
+ return true;
case TCPOPT_NOP:
length--;
continue;
default:
opsize = *ptr++;
if (opsize < 2)
- return;
+ return true;
if (opsize > length)
- return;
+ return true;
switch (opcode) {
case TCPOPT_MSS:
@@ -84,6 +85,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
length -= opsize;
}
}
+ return true;
}
EXPORT_SYMBOL_GPL(synproxy_parse_options);
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 5058049..476accd 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -49,10 +49,8 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
};
static int
-ctnl_timeout_parse_policy(struct ctnl_timeout *timeout,
- struct nf_conntrack_l4proto *l4proto,
- struct net *net,
- const struct nlattr *attr)
+ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
+ struct net *net, const struct nlattr *attr)
{
int ret = 0;
@@ -64,8 +62,7 @@ ctnl_timeout_parse_policy(struct ctnl_timeout *timeout,
if (ret < 0)
return ret;
- ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net,
- &timeout->data);
+ ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
}
return ret;
}
@@ -123,7 +120,8 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb,
goto err_proto_put;
}
- ret = ctnl_timeout_parse_policy(matching, l4proto, net,
+ ret = ctnl_timeout_parse_policy(&matching->data,
+ l4proto, net,
cda[CTA_TIMEOUT_DATA]);
return ret;
}
@@ -138,7 +136,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb,
goto err_proto_put;
}
- ret = ctnl_timeout_parse_policy(timeout, l4proto, net,
+ ret = ctnl_timeout_parse_policy(&timeout->data, l4proto, net,
cda[CTA_TIMEOUT_DATA]);
if (ret < 0)
goto err;
@@ -342,6 +340,147 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb,
return ret;
}
+static int
+cttimeout_default_set(struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
+{
+ __u16 l3num;
+ __u8 l4num;
+ struct nf_conntrack_l4proto *l4proto;
+ struct net *net = sock_net(skb->sk);
+ unsigned int *timeouts;
+ int ret;
+
+ if (!cda[CTA_TIMEOUT_L3PROTO] ||
+ !cda[CTA_TIMEOUT_L4PROTO] ||
+ !cda[CTA_TIMEOUT_DATA])
+ return -EINVAL;
+
+ l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
+ l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
+ l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+
+ /* This protocol is not supported, skip. */
+ if (l4proto->l4proto != l4num) {
+ ret = -EOPNOTSUPP;
+ goto err;
+ }
+
+ timeouts = l4proto->get_timeouts(net);
+
+ ret = ctnl_timeout_parse_policy(timeouts, l4proto, net,
+ cda[CTA_TIMEOUT_DATA]);
+ if (ret < 0)
+ goto err;
+
+ nf_ct_l4proto_put(l4proto);
+ return 0;
+err:
+ nf_ct_l4proto_put(l4proto);
+ return ret;
+}
+
+static int
+cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
+ u32 seq, u32 type, int event,
+ struct nf_conntrack_l4proto *l4proto)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ unsigned int flags = portid ? NLM_F_MULTI : 0;
+
+ event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8;
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
+ if (nlh == NULL)
+ goto nlmsg_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = AF_UNSPEC;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) ||
+ nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto))
+ goto nla_put_failure;
+
+ if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
+ struct nlattr *nest_parms;
+ unsigned int *timeouts = l4proto->get_timeouts(net);
+ int ret;
+
+ nest_parms = nla_nest_start(skb,
+ CTA_TIMEOUT_DATA | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+
+ ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts);
+ if (ret < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest_parms);
+ }
+
+ nlmsg_end(skb, nlh);
+ return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -1;
+}
+
+static int cttimeout_default_get(struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
+{
+ __u16 l3num;
+ __u8 l4num;
+ struct nf_conntrack_l4proto *l4proto;
+ struct net *net = sock_net(skb->sk);
+ struct sk_buff *skb2;
+ int ret, err;
+
+ if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO])
+ return -EINVAL;
+
+ l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
+ l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
+ l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+
+ /* This protocol is not supported, skip. */
+ if (l4proto->l4proto != l4num) {
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (skb2 == NULL) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ ret = cttimeout_default_fill_info(net, skb2, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(nlh->nlmsg_type),
+ IPCTNL_MSG_TIMEOUT_DEFAULT_SET,
+ l4proto);
+ if (ret <= 0) {
+ kfree_skb(skb2);
+ err = -ENOMEM;
+ goto err;
+ }
+ ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+ if (ret > 0)
+ ret = 0;
+
+ /* this avoids a loop in nfnetlink. */
+ return ret == -EAGAIN ? -ENOBUFS : ret;
+err:
+ nf_ct_l4proto_put(l4proto);
+ return err;
+}
+
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
static struct ctnl_timeout *ctnl_timeout_find_get(const char *name)
{
@@ -384,6 +523,12 @@ static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = {
[IPCTNL_MSG_TIMEOUT_DELETE] = { .call = cttimeout_del_timeout,
.attr_count = CTA_TIMEOUT_MAX,
.policy = cttimeout_nla_policy },
+ [IPCTNL_MSG_TIMEOUT_DEFAULT_SET]= { .call = cttimeout_default_set,
+ .attr_count = CTA_TIMEOUT_MAX,
+ .policy = cttimeout_nla_policy },
+ [IPCTNL_MSG_TIMEOUT_DEFAULT_GET]= { .call = cttimeout_default_get,
+ .attr_count = CTA_TIMEOUT_MAX,
+ .policy = cttimeout_nla_policy },
};
static const struct nfnetlink_subsystem cttimeout_subsys = {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index d92cc31..3c4b69e 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -319,7 +319,8 @@ nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags)
}
static struct sk_buff *
-nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size)
+nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
+ unsigned int pkt_size)
{
struct sk_buff *skb;
unsigned int n;
@@ -328,13 +329,13 @@ nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size)
* message. WARNING: has to be <= 128k due to slab restrictions */
n = max(inst_size, pkt_size);
- skb = nfnetlink_alloc_skb(&init_net, n, peer_portid, GFP_ATOMIC);
+ skb = nfnetlink_alloc_skb(net, n, peer_portid, GFP_ATOMIC);
if (!skb) {
if (n > pkt_size) {
/* try to allocate only as much as we need for current
* packet */
- skb = nfnetlink_alloc_skb(&init_net, pkt_size,
+ skb = nfnetlink_alloc_skb(net, pkt_size,
peer_portid, GFP_ATOMIC);
if (!skb)
pr_err("nfnetlink_log: can't even alloc %u bytes\n",
@@ -702,8 +703,8 @@ nfulnl_log_packet(struct net *net,
}
if (!inst->skb) {
- inst->skb = nfulnl_alloc_skb(inst->peer_portid, inst->nlbufsiz,
- size);
+ inst->skb = nfulnl_alloc_skb(net, inst->peer_portid,
+ inst->nlbufsiz, size);
if (!inst->skb)
goto alloc_failure;
}
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index ae2e5c1..21258cf 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -298,7 +298,7 @@ nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet,
}
static struct sk_buff *
-nfqnl_build_packet_message(struct nfqnl_instance *queue,
+nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct nf_queue_entry *entry,
__be32 **packet_id_ptr)
{
@@ -372,7 +372,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
if (queue->flags & NFQA_CFG_F_CONNTRACK)
ct = nfqnl_ct_get(entskb, &size, &ctinfo);
- skb = nfnetlink_alloc_skb(&init_net, size, queue->peer_portid,
+ skb = nfnetlink_alloc_skb(net, size, queue->peer_portid,
GFP_ATOMIC);
if (!skb)
return NULL;
@@ -525,7 +525,7 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
__be32 *packet_id_ptr;
int failopen = 0;
- nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr);
+ nskb = nfqnl_build_packet_message(net, queue, entry, &packet_id_ptr);
if (nskb == NULL) {
err = -ENOMEM;
goto err_out;
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index cd24290..e762de5 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -43,10 +43,42 @@ optlen(const u_int8_t *opt, unsigned int offset)
return opt[offset+1];
}
+static u_int32_t tcpmss_reverse_mtu(struct net *net,
+ const struct sk_buff *skb,
+ unsigned int family)
+{
+ struct flowi fl;
+ const struct nf_afinfo *ai;
+ struct rtable *rt = NULL;
+ u_int32_t mtu = ~0U;
+
+ if (family == PF_INET) {
+ struct flowi4 *fl4 = &fl.u.ip4;
+ memset(fl4, 0, sizeof(*fl4));
+ fl4->daddr = ip_hdr(skb)->saddr;
+ } else {
+ struct flowi6 *fl6 = &fl.u.ip6;
+
+ memset(fl6, 0, sizeof(*fl6));
+ fl6->daddr = ipv6_hdr(skb)->saddr;
+ }
+ rcu_read_lock();
+ ai = nf_get_afinfo(family);
+ if (ai != NULL)
+ ai->route(net, (struct dst_entry **)&rt, &fl, false);
+ rcu_read_unlock();
+
+ if (rt != NULL) {
+ mtu = dst_mtu(&rt->dst);
+ dst_release(&rt->dst);
+ }
+ return mtu;
+}
+
static int
tcpmss_mangle_packet(struct sk_buff *skb,
const struct xt_action_param *par,
- unsigned int in_mtu,
+ unsigned int family,
unsigned int tcphoff,
unsigned int minlen)
{
@@ -76,6 +108,9 @@ tcpmss_mangle_packet(struct sk_buff *skb,
return -1;
if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
+ struct net *net = dev_net(par->in ? par->in : par->out);
+ unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family);
+
if (dst_mtu(skb_dst(skb)) <= minlen) {
net_err_ratelimited("unknown or invalid path-MTU (%u)\n",
dst_mtu(skb_dst(skb)));
@@ -165,37 +200,6 @@ tcpmss_mangle_packet(struct sk_buff *skb,
return TCPOLEN_MSS;
}
-static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
- unsigned int family)
-{
- struct flowi fl;
- const struct nf_afinfo *ai;
- struct rtable *rt = NULL;
- u_int32_t mtu = ~0U;
-
- if (family == PF_INET) {
- struct flowi4 *fl4 = &fl.u.ip4;
- memset(fl4, 0, sizeof(*fl4));
- fl4->daddr = ip_hdr(skb)->saddr;
- } else {
- struct flowi6 *fl6 = &fl.u.ip6;
-
- memset(fl6, 0, sizeof(*fl6));
- fl6->daddr = ipv6_hdr(skb)->saddr;
- }
- rcu_read_lock();
- ai = nf_get_afinfo(family);
- if (ai != NULL)
- ai->route(&init_net, (struct dst_entry **)&rt, &fl, false);
- rcu_read_unlock();
-
- if (rt != NULL) {
- mtu = dst_mtu(&rt->dst);
- dst_release(&rt->dst);
- }
- return mtu;
-}
-
static unsigned int
tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
@@ -204,7 +208,7 @@ tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
int ret;
ret = tcpmss_mangle_packet(skb, par,
- tcpmss_reverse_mtu(skb, PF_INET),
+ PF_INET,
iph->ihl * 4,
sizeof(*iph) + sizeof(struct tcphdr));
if (ret < 0)
@@ -233,7 +237,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
if (tcphoff < 0)
return NF_DROP;
ret = tcpmss_mangle_packet(skb, par,
- tcpmss_reverse_mtu(skb, PF_INET6),
+ PF_INET6,
tcphoff,
sizeof(*ipv6h) + sizeof(struct tcphdr));
if (ret < 0)
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 5d8a3a3..ef8a926 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -200,7 +200,7 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
in->ifindex);
if (sk) {
int connected = (sk->sk_state == TCP_ESTABLISHED);
- int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr);
+ int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr);
/* NOTE: we return listeners even if bound to
* 0.0.0.0, those are filtered out in
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 31790e7..e7c4e0e 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -81,7 +81,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
struct xt_set_info_match_v0 *info = par->matchinfo;
ip_set_id_t index;
- index = ip_set_nfnl_get_byindex(info->match_set.index);
+ index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
if (index == IPSET_INVALID_ID) {
pr_warning("Cannot find set indentified by id %u to match\n",
@@ -91,7 +91,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) {
pr_warning("Protocol error: set match dimension "
"is over the limit!\n");
- ip_set_nfnl_put(info->match_set.index);
+ ip_set_nfnl_put(par->net, info->match_set.index);
return -ERANGE;
}
@@ -106,9 +106,104 @@ set_match_v0_destroy(const struct xt_mtdtor_param *par)
{
struct xt_set_info_match_v0 *info = par->matchinfo;
- ip_set_nfnl_put(info->match_set.index);
+ ip_set_nfnl_put(par->net, info->match_set.index);
}
+/* Revision 1 match */
+
+static bool
+set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_set_info_match_v1 *info = par->matchinfo;
+ ADT_OPT(opt, par->family, info->match_set.dim,
+ info->match_set.flags, 0, UINT_MAX);
+
+ if (opt.flags & IPSET_RETURN_NOMATCH)
+ opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH;
+
+ return match_set(info->match_set.index, skb, par, &opt,
+ info->match_set.flags & IPSET_INV_MATCH);
+}
+
+static int
+set_match_v1_checkentry(const struct xt_mtchk_param *par)
+{
+ struct xt_set_info_match_v1 *info = par->matchinfo;
+ ip_set_id_t index;
+
+ index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
+
+ if (index == IPSET_INVALID_ID) {
+ pr_warning("Cannot find set indentified by id %u to match\n",
+ info->match_set.index);
+ return -ENOENT;
+ }
+ if (info->match_set.dim > IPSET_DIM_MAX) {
+ pr_warning("Protocol error: set match dimension "
+ "is over the limit!\n");
+ ip_set_nfnl_put(par->net, info->match_set.index);
+ return -ERANGE;
+ }
+
+ return 0;
+}
+
+static void
+set_match_v1_destroy(const struct xt_mtdtor_param *par)
+{
+ struct xt_set_info_match_v1 *info = par->matchinfo;
+
+ ip_set_nfnl_put(par->net, info->match_set.index);
+}
+
+/* Revision 3 match */
+
+static bool
+match_counter(u64 counter, const struct ip_set_counter_match *info)
+{
+ switch (info->op) {
+ case IPSET_COUNTER_NONE:
+ return true;
+ case IPSET_COUNTER_EQ:
+ return counter == info->value;
+ case IPSET_COUNTER_NE:
+ return counter != info->value;
+ case IPSET_COUNTER_LT:
+ return counter < info->value;
+ case IPSET_COUNTER_GT:
+ return counter > info->value;
+ }
+ return false;
+}
+
+static bool
+set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_set_info_match_v3 *info = par->matchinfo;
+ ADT_OPT(opt, par->family, info->match_set.dim,
+ info->match_set.flags, info->flags, UINT_MAX);
+ int ret;
+
+ if (info->packets.op != IPSET_COUNTER_NONE ||
+ info->bytes.op != IPSET_COUNTER_NONE)
+ opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS;
+
+ ret = match_set(info->match_set.index, skb, par, &opt,
+ info->match_set.flags & IPSET_INV_MATCH);
+
+ if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS))
+ return ret;
+
+ if (!match_counter(opt.ext.packets, &info->packets))
+ return 0;
+ return match_counter(opt.ext.bytes, &info->bytes);
+}
+
+#define set_match_v3_checkentry set_match_v1_checkentry
+#define set_match_v3_destroy set_match_v1_destroy
+
+/* Revision 0 interface: backward compatible with netfilter/iptables */
+
static unsigned int
set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
{
@@ -133,7 +228,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
ip_set_id_t index;
if (info->add_set.index != IPSET_INVALID_ID) {
- index = ip_set_nfnl_get_byindex(info->add_set.index);
+ index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
if (index == IPSET_INVALID_ID) {
pr_warning("Cannot find add_set index %u as target\n",
info->add_set.index);
@@ -142,12 +237,12 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
}
if (info->del_set.index != IPSET_INVALID_ID) {
- index = ip_set_nfnl_get_byindex(info->del_set.index);
+ index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
if (index == IPSET_INVALID_ID) {
pr_warning("Cannot find del_set index %u as target\n",
info->del_set.index);
if (info->add_set.index != IPSET_INVALID_ID)
- ip_set_nfnl_put(info->add_set.index);
+ ip_set_nfnl_put(par->net, info->add_set.index);
return -ENOENT;
}
}
@@ -156,9 +251,9 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
pr_warning("Protocol error: SET target dimension "
"is over the limit!\n");
if (info->add_set.index != IPSET_INVALID_ID)
- ip_set_nfnl_put(info->add_set.index);
+ ip_set_nfnl_put(par->net, info->add_set.index);
if (info->del_set.index != IPSET_INVALID_ID)
- ip_set_nfnl_put(info->del_set.index);
+ ip_set_nfnl_put(par->net, info->del_set.index);
return -ERANGE;
}
@@ -175,57 +270,12 @@ set_target_v0_destroy(const struct xt_tgdtor_param *par)
const struct xt_set_info_target_v0 *info = par->targinfo;
if (info->add_set.index != IPSET_INVALID_ID)
- ip_set_nfnl_put(info->add_set.index);
+ ip_set_nfnl_put(par->net, info->add_set.index);
if (info->del_set.index != IPSET_INVALID_ID)
- ip_set_nfnl_put(info->del_set.index);
+ ip_set_nfnl_put(par->net, info->del_set.index);
}
-/* Revision 1 match and target */
-
-static bool
-set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
-{
- const struct xt_set_info_match_v1 *info = par->matchinfo;
- ADT_OPT(opt, par->family, info->match_set.dim,
- info->match_set.flags, 0, UINT_MAX);
-
- if (opt.flags & IPSET_RETURN_NOMATCH)
- opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH;
-
- return match_set(info->match_set.index, skb, par, &opt,
- info->match_set.flags & IPSET_INV_MATCH);
-}
-
-static int
-set_match_v1_checkentry(const struct xt_mtchk_param *par)
-{
- struct xt_set_info_match_v1 *info = par->matchinfo;
- ip_set_id_t index;
-
- index = ip_set_nfnl_get_byindex(info->match_set.index);
-
- if (index == IPSET_INVALID_ID) {
- pr_warning("Cannot find set indentified by id %u to match\n",
- info->match_set.index);
- return -ENOENT;
- }
- if (info->match_set.dim > IPSET_DIM_MAX) {
- pr_warning("Protocol error: set match dimension "
- "is over the limit!\n");
- ip_set_nfnl_put(info->match_set.index);
- return -ERANGE;
- }
-
- return 0;
-}
-
-static void
-set_match_v1_destroy(const struct xt_mtdtor_param *par)
-{
- struct xt_set_info_match_v1 *info = par->matchinfo;
-
- ip_set_nfnl_put(info->match_set.index);
-}
+/* Revision 1 target */
static unsigned int
set_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
@@ -251,7 +301,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
ip_set_id_t index;
if (info->add_set.index != IPSET_INVALID_ID) {
- index = ip_set_nfnl_get_byindex(info->add_set.index);
+ index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
if (index == IPSET_INVALID_ID) {
pr_warning("Cannot find add_set index %u as target\n",
info->add_set.index);
@@ -260,12 +310,12 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
}
if (info->del_set.index != IPSET_INVALID_ID) {
- index = ip_set_nfnl_get_byindex(info->del_set.index);
+ index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
if (index == IPSET_INVALID_ID) {
pr_warning("Cannot find del_set index %u as target\n",
info->del_set.index);
if (info->add_set.index != IPSET_INVALID_ID)
- ip_set_nfnl_put(info->add_set.index);
+ ip_set_nfnl_put(par->net, info->add_set.index);
return -ENOENT;
}
}
@@ -274,9 +324,9 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
pr_warning("Protocol error: SET target dimension "
"is over the limit!\n");
if (info->add_set.index != IPSET_INVALID_ID)
- ip_set_nfnl_put(info->add_set.index);
+ ip_set_nfnl_put(par->net, info->add_set.index);
if (info->del_set.index != IPSET_INVALID_ID)
- ip_set_nfnl_put(info->del_set.index);
+ ip_set_nfnl_put(par->net, info->del_set.index);
return -ERANGE;
}
@@ -289,9 +339,9 @@ set_target_v1_destroy(const struct xt_tgdtor_param *par)
const struct xt_set_info_target_v1 *info = par->targinfo;
if (info->add_set.index != IPSET_INVALID_ID)
- ip_set_nfnl_put(info->add_set.index);
+ ip_set_nfnl_put(par->net, info->add_set.index);
if (info->del_set.index != IPSET_INVALID_ID)
- ip_set_nfnl_put(info->del_set.index);
+ ip_set_nfnl_put(par->net, info->del_set.index);
}
/* Revision 2 target */
@@ -320,52 +370,6 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
#define set_target_v2_checkentry set_target_v1_checkentry
#define set_target_v2_destroy set_target_v1_destroy
-/* Revision 3 match */
-
-static bool
-match_counter(u64 counter, const struct ip_set_counter_match *info)
-{
- switch (info->op) {
- case IPSET_COUNTER_NONE:
- return true;
- case IPSET_COUNTER_EQ:
- return counter == info->value;
- case IPSET_COUNTER_NE:
- return counter != info->value;
- case IPSET_COUNTER_LT:
- return counter < info->value;
- case IPSET_COUNTER_GT:
- return counter > info->value;
- }
- return false;
-}
-
-static bool
-set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
-{
- const struct xt_set_info_match_v3 *info = par->matchinfo;
- ADT_OPT(opt, par->family, info->match_set.dim,
- info->match_set.flags, info->flags, UINT_MAX);
- int ret;
-
- if (info->packets.op != IPSET_COUNTER_NONE ||
- info->bytes.op != IPSET_COUNTER_NONE)
- opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS;
-
- ret = match_set(info->match_set.index, skb, par, &opt,
- info->match_set.flags & IPSET_INV_MATCH);
-
- if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS))
- return ret;
-
- if (!match_counter(opt.ext.packets, &info->packets))
- return 0;
- return match_counter(opt.ext.bytes, &info->bytes);
-}
-
-#define set_match_v3_checkentry set_match_v1_checkentry
-#define set_match_v3_destroy set_match_v1_destroy
-
static struct xt_match set_matches[] __read_mostly = {
{
.name = "set",
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 06df2b9..3dd0e37 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -370,7 +370,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
*/
wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
sk->sk_state != TCP_TIME_WAIT &&
- ipv6_addr_any(&inet6_sk(sk)->rcv_saddr));
+ ipv6_addr_any(&sk->sk_v6_rcv_saddr));
/* Ignore non-transparent sockets,
if XT_SOCKET_TRANSPARENT is used */
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 96a458e..dce1beb 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -817,7 +817,7 @@ int netlbl_req_setattr(struct request_sock *req,
switch (req->rsk_ops->family) {
case AF_INET:
entry = netlbl_domhsh_getentry_af4(secattr->domain,
- inet_rsk(req)->rmt_addr);
+ inet_rsk(req)->ir_rmt_addr);
if (entry == NULL) {
ret_val = -ENOENT;
goto req_setattr_return;
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index a481c03..56e22b7 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -173,7 +173,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
skb->local_df = 1;
- inet_get_local_port_range(&port_min, &port_max);
+ inet_get_local_port_range(net, &port_min, &port_max);
src_port = vxlan_src_port(port_min, port_max, skb);
err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 189e3c5..272d8e9 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -231,14 +231,14 @@ override:
}
if (R_tab) {
police->rate_present = true;
- psched_ratecfg_precompute(&police->rate, &R_tab->rate);
+ psched_ratecfg_precompute(&police->rate, &R_tab->rate, 0);
qdisc_put_rtab(R_tab);
} else {
police->rate_present = false;
}
if (P_tab) {
police->peak_present = true;
- psched_ratecfg_precompute(&police->peak, &P_tab->rate);
+ psched_ratecfg_precompute(&police->peak, &P_tab->rate, 0);
qdisc_put_rtab(P_tab);
} else {
police->peak_present = false;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index d76a35d..636d913 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -137,7 +137,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
struct nlattr **tb,
struct nlattr *est)
{
- int err = -EINVAL;
+ int err;
struct tcf_exts e;
struct tcf_ematch_tree t;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 867b4a3..16006c9 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -72,11 +72,11 @@ static void cgrp_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
struct task_struct *p;
- void *v;
+ struct cgroup_cls_state *cs = css_cls_state(css);
+ void *v = (void *)(unsigned long)cs->classid;
cgroup_taskset_for_each(p, css, tset) {
task_lock(p);
- v = (void *)(unsigned long)task_cls_classid(p);
iterate_fd(p->files, 0, update_classid, v);
task_unlock(p);
}
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index 938b7cb..1ac41d3 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -24,11 +24,12 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len,
{
struct xt_set_info *set = data;
ip_set_id_t index;
+ struct net *net = qdisc_dev(tp->q)->nd_net;
if (data_len != sizeof(*set))
return -EINVAL;
- index = ip_set_nfnl_get_byindex(set->index);
+ index = ip_set_nfnl_get_byindex(net, set->index);
if (index == IPSET_INVALID_ID)
return -ENOENT;
@@ -37,7 +38,7 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len,
if (em->data)
return 0;
- ip_set_nfnl_put(index);
+ ip_set_nfnl_put(net, index);
return -ENOMEM;
}
@@ -45,7 +46,7 @@ static void em_ipset_destroy(struct tcf_proto *p, struct tcf_ematch *em)
{
const struct xt_set_info *set = (const void *) em->data;
if (set) {
- ip_set_nfnl_put(set->index);
+ ip_set_nfnl_put(qdisc_dev(p->q)->nd_net, set->index);
kfree((void *) em->data);
}
}
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 7c3de6f..e5cef956 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -793,8 +793,10 @@ static int em_meta_change(struct tcf_proto *tp, void *data, int len,
goto errout;
meta = kzalloc(sizeof(*meta), GFP_KERNEL);
- if (meta == NULL)
+ if (meta == NULL) {
+ err = -ENOMEM;
goto errout;
+ }
memcpy(&meta->lvalue.hdr, &hdr->left, sizeof(hdr->left));
memcpy(&meta->rvalue.hdr, &hdr->right, sizeof(hdr->right));
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2adda7f..cd81505 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -737,9 +737,11 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
+ int drops;
if (n == 0)
return;
+ drops = max_t(int, n, 0);
while ((parentid = sch->parent)) {
if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
return;
@@ -756,6 +758,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
cops->put(sch, cl);
}
sch->q.qlen -= n;
+ sch->qstats.drops += drops;
}
}
EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 32ad015..a9dfdda 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -285,7 +285,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
/* remove one skb from head of flow queue */
-static struct sk_buff *fq_dequeue_head(struct fq_flow *flow)
+static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
{
struct sk_buff *skb = flow->head;
@@ -293,6 +293,8 @@ static struct sk_buff *fq_dequeue_head(struct fq_flow *flow)
flow->head = skb->next;
skb->next = NULL;
flow->qlen--;
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ sch->q.qlen--;
}
return skb;
}
@@ -418,8 +420,9 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
struct fq_flow_head *head;
struct sk_buff *skb;
struct fq_flow *f;
+ u32 rate;
- skb = fq_dequeue_head(&q->internal);
+ skb = fq_dequeue_head(sch, &q->internal);
if (skb)
goto out;
fq_check_throttled(q, now);
@@ -449,7 +452,7 @@ begin:
goto begin;
}
- skb = fq_dequeue_head(f);
+ skb = fq_dequeue_head(sch, f);
if (!skb) {
head->first = f->next;
/* force a pass through old_flows to prevent starvation */
@@ -466,43 +469,70 @@ begin:
f->time_next_packet = now;
f->credit -= qdisc_pkt_len(skb);
- if (f->credit <= 0 &&
- q->rate_enable &&
- skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) {
- u32 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate;
+ if (f->credit > 0 || !q->rate_enable)
+ goto out;
- rate = min(rate, q->flow_max_rate);
- if (rate) {
- u64 len = (u64)qdisc_pkt_len(skb) * NSEC_PER_SEC;
+ rate = q->flow_max_rate;
+ if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT)
+ rate = min(skb->sk->sk_pacing_rate, rate);
- do_div(len, rate);
- /* Since socket rate can change later,
- * clamp the delay to 125 ms.
- * TODO: maybe segment the too big skb, as in commit
- * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
- */
- if (unlikely(len > 125 * NSEC_PER_MSEC)) {
- len = 125 * NSEC_PER_MSEC;
- q->stat_pkts_too_long++;
- }
+ if (rate != ~0U) {
+ u32 plen = max(qdisc_pkt_len(skb), q->quantum);
+ u64 len = (u64)plen * NSEC_PER_SEC;
- f->time_next_packet = now + len;
+ if (likely(rate))
+ do_div(len, rate);
+ /* Since socket rate can change later,
+ * clamp the delay to 125 ms.
+ * TODO: maybe segment the too big skb, as in commit
+ * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
+ */
+ if (unlikely(len > 125 * NSEC_PER_MSEC)) {
+ len = 125 * NSEC_PER_MSEC;
+ q->stat_pkts_too_long++;
}
+
+ f->time_next_packet = now + len;
}
out:
- sch->qstats.backlog -= qdisc_pkt_len(skb);
qdisc_bstats_update(sch, skb);
- sch->q.qlen--;
qdisc_unthrottled(sch);
return skb;
}
static void fq_reset(struct Qdisc *sch)
{
+ struct fq_sched_data *q = qdisc_priv(sch);
+ struct rb_root *root;
struct sk_buff *skb;
+ struct rb_node *p;
+ struct fq_flow *f;
+ unsigned int idx;
- while ((skb = fq_dequeue(sch)) != NULL)
+ while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL)
kfree_skb(skb);
+
+ if (!q->fq_root)
+ return;
+
+ for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
+ root = &q->fq_root[idx];
+ while ((p = rb_first(root)) != NULL) {
+ f = container_of(p, struct fq_flow, fq_node);
+ rb_erase(p, root);
+
+ while ((skb = fq_dequeue_head(sch, f)) != NULL)
+ kfree_skb(skb);
+
+ kmem_cache_free(fq_flow_cachep, f);
+ }
+ }
+ q->new_flows.first = NULL;
+ q->old_flows.first = NULL;
+ q->delayed = RB_ROOT;
+ q->flows = 0;
+ q->inactive_flows = 0;
+ q->throttled_flows = 0;
}
static void fq_rehash(struct fq_sched_data *q,
@@ -622,7 +652,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
if (tb[TCA_FQ_INITIAL_QUANTUM])
- q->quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
+ q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]);
@@ -645,6 +675,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = fq_dequeue(sch);
+ if (!skb)
+ break;
kfree_skb(skb);
drop_count++;
}
@@ -657,21 +689,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
static void fq_destroy(struct Qdisc *sch)
{
struct fq_sched_data *q = qdisc_priv(sch);
- struct rb_root *root;
- struct rb_node *p;
- unsigned int idx;
- if (q->fq_root) {
- for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
- root = &q->fq_root[idx];
- while ((p = rb_first(root)) != NULL) {
- rb_erase(p, root);
- kmem_cache_free(fq_flow_cachep,
- container_of(p, struct fq_flow, fq_node));
- }
- }
- kfree(q->fq_root);
- }
+ fq_reset(sch);
+ kfree(q->fq_root);
qdisc_watchdog_cancel(&q->watchdog);
}
@@ -711,12 +731,14 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
if (opts == NULL)
goto nla_put_failure;
+ /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore,
+ * do not bother giving its value
+ */
if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
- nla_put_u32(skb, TCA_FQ_FLOW_DEFAULT_RATE, q->flow_default_rate) ||
nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
goto nla_put_failure;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a74e278..7fc899a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -829,7 +829,7 @@ void dev_deactivate_many(struct list_head *head)
struct net_device *dev;
bool sync_needed = false;
- list_for_each_entry(dev, head, unreg_list) {
+ list_for_each_entry(dev, head, close_list) {
netdev_for_each_tx_queue(dev, dev_deactivate_queue,
&noop_qdisc);
if (dev_ingress_queue(dev))
@@ -848,7 +848,7 @@ void dev_deactivate_many(struct list_head *head)
synchronize_net();
/* Wait for outstanding qdisc_run calls. */
- list_for_each_entry(dev, head, unreg_list)
+ list_for_each_entry(dev, head, close_list)
while (some_qdisc_is_busy(dev))
yield();
}
@@ -857,7 +857,7 @@ void dev_deactivate(struct net_device *dev)
{
LIST_HEAD(single);
- list_add(&dev->unreg_list, &single);
+ list_add(&dev->close_list, &single);
dev_deactivate_many(&single);
list_del(&single);
}
@@ -910,11 +910,12 @@ void dev_shutdown(struct net_device *dev)
}
void psched_ratecfg_precompute(struct psched_ratecfg *r,
- const struct tc_ratespec *conf)
+ const struct tc_ratespec *conf,
+ u64 rate64)
{
memset(r, 0, sizeof(*r));
r->overhead = conf->overhead;
- r->rate_bytes_ps = conf->rate;
+ r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
r->mult = 1;
/*
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 863846c..0e1e38b 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -997,6 +997,8 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
[TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
[TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
+ [TCA_HTB_RATE64] = { .type = NLA_U64 },
+ [TCA_HTB_CEIL64] = { .type = NLA_U64 },
};
static void htb_work_func(struct work_struct *work)
@@ -1114,6 +1116,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
opt.level = cl->level;
if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
+ if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
+ nla_put_u64(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps))
+ goto nla_put_failure;
+ if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
+ nla_put_u64(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps))
+ goto nla_put_failure;
nla_nest_end(skb, nest);
spin_unlock_bh(root_lock);
@@ -1332,6 +1340,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
struct nlattr *tb[TCA_HTB_MAX + 1];
struct tc_htb_opt *hopt;
+ u64 rate64, ceil64;
/* extract all subattrs from opt attr */
if (!opt)
@@ -1491,8 +1500,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
cl->prio = TC_HTB_NUMPRIO - 1;
}
- psched_ratecfg_precompute(&cl->rate, &hopt->rate);
- psched_ratecfg_precompute(&cl->ceil, &hopt->ceil);
+ rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
+
+ ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
+
+ psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
+ psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 1aaf1b6..b057122 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -341,9 +341,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
q->tokens = q->buffer;
q->ptokens = q->mtu;
- psched_ratecfg_precompute(&q->rate, &rtab->rate);
+ psched_ratecfg_precompute(&q->rate, &rtab->rate, 0);
if (ptab) {
- psched_ratecfg_precompute(&q->peak, &ptab->rate);
+ psched_ratecfg_precompute(&q->peak, &ptab->rate, 0);
q->peak_present = true;
} else {
q->peak_present = false;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e7b2d4f..f6334aa 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -426,20 +426,20 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk)
{
addr->v6.sin6_family = AF_INET6;
addr->v6.sin6_port = 0;
- addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr;
+ addr->v6.sin6_addr = sk->sk_v6_rcv_saddr;
}
/* Initialize sk->sk_rcv_saddr from sctp_addr. */
static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
{
if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) {
- inet6_sk(sk)->rcv_saddr.s6_addr32[0] = 0;
- inet6_sk(sk)->rcv_saddr.s6_addr32[1] = 0;
- inet6_sk(sk)->rcv_saddr.s6_addr32[2] = htonl(0x0000ffff);
- inet6_sk(sk)->rcv_saddr.s6_addr32[3] =
+ sk->sk_v6_rcv_saddr.s6_addr32[0] = 0;
+ sk->sk_v6_rcv_saddr.s6_addr32[1] = 0;
+ sk->sk_v6_rcv_saddr.s6_addr32[2] = htonl(0x0000ffff);
+ sk->sk_v6_rcv_saddr.s6_addr32[3] =
addr->v4.sin_addr.s_addr;
} else {
- inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr;
+ sk->sk_v6_rcv_saddr = addr->v6.sin6_addr;
}
}
@@ -447,12 +447,12 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
{
if (addr->sa.sa_family == AF_INET && sctp_sk(sk)->v4mapped) {
- inet6_sk(sk)->daddr.s6_addr32[0] = 0;
- inet6_sk(sk)->daddr.s6_addr32[1] = 0;
- inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff);
- inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
+ sk->sk_v6_daddr.s6_addr32[0] = 0;
+ sk->sk_v6_daddr.s6_addr32[1] = 0;
+ sk->sk_v6_daddr.s6_addr32[2] = htonl(0x0000ffff);
+ sk->sk_v6_daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
} else {
- inet6_sk(sk)->daddr = addr->v6.sin6_addr;
+ sk->sk_v6_daddr = addr->v6.sin6_addr;
}
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 911b71b..72046b9 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5890,7 +5890,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
int low, high, remaining, index;
unsigned int rover;
- inet_get_local_port_range(&low, &high);
+ inet_get_local_port_range(sock_net(sk), &low, &high);
remaining = (high - low) + 1;
rover = net_random() % remaining + low;
diff --git a/net/socket.c b/net/socket.c
index ebed4b6..c226ace 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1964,6 +1964,16 @@ struct used_address {
unsigned int name_len;
};
+static int copy_msghdr_from_user(struct msghdr *kmsg,
+ struct msghdr __user *umsg)
+{
+ if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
+ return -EFAULT;
+ if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
+ return -EINVAL;
+ return 0;
+}
+
static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
struct msghdr *msg_sys, unsigned int flags,
struct used_address *used_address)
@@ -1982,8 +1992,11 @@ static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
if (MSG_CMSG_COMPAT & flags) {
if (get_compat_msghdr(msg_sys, msg_compat))
return -EFAULT;
- } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
- return -EFAULT;
+ } else {
+ err = copy_msghdr_from_user(msg_sys, msg);
+ if (err)
+ return err;
+ }
if (msg_sys->msg_iovlen > UIO_FASTIOV) {
err = -EMSGSIZE;
@@ -2191,8 +2204,11 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
if (MSG_CMSG_COMPAT & flags) {
if (get_compat_msghdr(msg_sys, msg_compat))
return -EFAULT;
- } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
- return -EFAULT;
+ } else {
+ err = copy_msghdr_from_user(msg_sys, msg);
+ if (err)
+ return err;
+ }
if (msg_sys->msg_iovlen > UIO_FASTIOV) {
err = -EMSGSIZE;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index fcac5d1..0846566 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1075,6 +1075,15 @@ gss_destroy(struct rpc_auth *auth)
kref_put(&gss_auth->kref, gss_free_callback);
}
+/*
+ * Auths may be shared between rpc clients that were cloned from a
+ * common client with the same xprt, if they also share the flavor and
+ * target_name.
+ *
+ * The auth is looked up from the oldest parent sharing the same
+ * cl_xprt, and the auth itself references only that common parent
+ * (which is guaranteed to last as long as any of its descendants).
+ */
static struct gss_auth *
gss_auth_find_or_add_hashed(struct rpc_auth_create_args *args,
struct rpc_clnt *clnt,
@@ -1088,6 +1097,8 @@ gss_auth_find_or_add_hashed(struct rpc_auth_create_args *args,
gss_auth,
hash,
hashval) {
+ if (gss_auth->client != clnt)
+ continue;
if (gss_auth->rpc_auth.au_flavor != args->pseudoflavor)
continue;
if (gss_auth->target_name != args->target_name) {
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 9c9caaa..b6e59f0 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -291,12 +291,14 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
&inet_sk(sk)->inet_rcv_saddr,
inet_sk(sk)->inet_num);
break;
+#if IS_ENABLED(CONFIG_IPV6)
case PF_INET6:
len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n",
proto_name,
- &inet6_sk(sk)->rcv_saddr,
+ &sk->sk_v6_rcv_saddr,
inet_sk(sk)->inet_num);
break;
+#endif
default:
len = snprintf(buf, remaining, "*unknown-%d*\n",
sk->sk_family);
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 9bc6db04..e7000be 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -47,12 +47,12 @@ static int net_ctl_permissions(struct ctl_table_header *head,
/* Allow network administrator to have same access as root. */
if (ns_capable(net->user_ns, CAP_NET_ADMIN) ||
- uid_eq(root_uid, current_uid())) {
+ uid_eq(root_uid, current_euid())) {
int mode = (table->mode >> 6) & 7;
return (mode << 6) | (mode << 3) | mode;
}
/* Allow netns root group to have the same access as the root group */
- if (gid_eq(root_gid, current_gid())) {
+ if (in_egroup_p(root_gid)) {
int mode = (table->mode >> 3) & 7;
return (mode << 3) | mode;
}
diff --git a/net/unix/diag.c b/net/unix/diag.c
index d591091..86fa0f3 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -124,6 +124,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
rep->udiag_family = AF_UNIX;
rep->udiag_type = sk->sk_type;
rep->udiag_state = sk->sk_state;
+ rep->pad = 0;
rep->udiag_ino = sk_ino;
sock_diag_save_cookie(sk, rep->udiag_cookie);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 6715396..fe8d4f2 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -566,18 +566,13 @@ int wiphy_register(struct wiphy *wiphy)
/* check and set up bitrates */
ieee80211_set_bitrate_flags(wiphy);
-
+ rtnl_lock();
res = device_add(&rdev->wiphy.dev);
- if (res)
- return res;
-
- res = rfkill_register(rdev->rfkill);
if (res) {
- device_del(&rdev->wiphy.dev);
+ rtnl_unlock();
return res;
}
- rtnl_lock();
/* set up regulatory info */
wiphy_regulatory_register(wiphy);
@@ -606,6 +601,15 @@ int wiphy_register(struct wiphy *wiphy)
rdev->wiphy.registered = true;
rtnl_unlock();
+
+ res = rfkill_register(rdev->rfkill);
+ if (res) {
+ rfkill_destroy(rdev->rfkill);
+ rdev->rfkill = NULL;
+ wiphy_unregister(&rdev->wiphy);
+ return res;
+ }
+
return 0;
}
EXPORT_SYMBOL(wiphy_register);
@@ -640,7 +644,8 @@ void wiphy_unregister(struct wiphy *wiphy)
rtnl_unlock();
__count == 0; }));
- rfkill_unregister(rdev->rfkill);
+ if (rdev->rfkill)
+ rfkill_unregister(rdev->rfkill);
rtnl_lock();
rdev->wiphy.registered = false;
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 39bff7d..403fe29 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -263,6 +263,8 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
if (chan->flags & IEEE80211_CHAN_DISABLED)
continue;
wdev->wext.ibss.chandef.chan = chan;
+ wdev->wext.ibss.chandef.center_freq1 =
+ chan->center_freq;
break;
}
@@ -347,6 +349,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
if (chan) {
wdev->wext.ibss.chandef.chan = chan;
wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
+ wdev->wext.ibss.chandef.center_freq1 = freq;
wdev->wext.ibss.channel_fixed = true;
} else {
/* cfg80211_ibss_wext_join will pick one if needed */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2838206..cbbef88 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2421,7 +2421,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
change = true;
}
- if (flags && (*flags & NL80211_MNTR_FLAG_ACTIVE) &&
+ if (flags && (*flags & MONITOR_FLAG_ACTIVE) &&
!(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
return -EOPNOTSUPP;
@@ -2483,7 +2483,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL,
&flags);
- if (!err && (flags & NL80211_MNTR_FLAG_ACTIVE) &&
+ if (!err && (flags & MONITOR_FLAG_ACTIVE) &&
!(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
return -EOPNOTSUPP;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index b9c3f9e..d6e7f98 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -468,7 +468,7 @@ expired:
}
err = __xfrm_state_delete(x);
- if (!err && x->id.spi)
+ if (!err)
km_state_expired(x, 1, 0);
xfrm_audit_state_delete(x, err ? 0 : 1,
OpenPOWER on IntegriCloud