diff options
Diffstat (limited to 'net')
247 files changed, 7627 insertions, 3086 deletions
diff --git a/net/802/garp.c b/net/802/garp.c index 1610295..070bf44 100644 --- a/net/802/garp.c +++ b/net/802/garp.c @@ -553,7 +553,7 @@ static void garp_release_port(struct net_device *dev) if (rtnl_dereference(port->applicants[i])) return; } - rcu_assign_pointer(dev->garp_port, NULL); + RCU_INIT_POINTER(dev->garp_port, NULL); kfree_rcu(port, rcu); } @@ -605,7 +605,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl ASSERT_RTNL(); - rcu_assign_pointer(port->applicants[appl->type], NULL); + RCU_INIT_POINTER(port->applicants[appl->type], NULL); /* Delete timer and generate a final TRANSMIT_PDU event to flush out * all pending messages before the applicant is gone. */ diff --git a/net/802/stp.c b/net/802/stp.c index 978c30b..0e136ef 100644 --- a/net/802/stp.c +++ b/net/802/stp.c @@ -88,9 +88,9 @@ void stp_proto_unregister(const struct stp_proto *proto) { mutex_lock(&stp_proto_mutex); if (is_zero_ether_addr(proto->group_address)) - rcu_assign_pointer(stp_proto, NULL); + RCU_INIT_POINTER(stp_proto, NULL); else - rcu_assign_pointer(garp_protos[proto->group_address[5] - + RCU_INIT_POINTER(garp_protos[proto->group_address[5] - GARP_ADDR_MIN], NULL); synchronize_rcu(); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 8970ba1..5471628 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -133,7 +133,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) if (grp->nr_vlans == 0) { vlan_gvrp_uninit_applicant(real_dev); - rcu_assign_pointer(real_dev->vlgrp, NULL); + RCU_INIT_POINTER(real_dev->vlgrp, NULL); /* Free the group, after all cpu's are done. */ call_rcu(&grp->rcu, vlan_rcu_free); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 5f27f8e..f1f2f7b 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -167,6 +167,8 @@ struct sk_buff *vlan_untag(struct sk_buff *skb) if (unlikely(!skb)) goto err_free; + skb_reset_network_header(skb); + skb_reset_transport_header(skb); return skb; err_free: diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 9d40a07..c8cf939 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -610,7 +610,8 @@ static int vlan_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { const struct vlan_dev_info *vlan = vlan_dev_info(dev); - return dev_ethtool_get_settings(vlan->real_dev, cmd); + + return __ethtool_get_settings(vlan->real_dev, cmd); } static void vlan_ethtool_get_drvinfo(struct net_device *dev, @@ -674,7 +675,6 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = vlan_dev_set_mac_address, .ndo_set_rx_mode = vlan_dev_set_rx_mode, - .ndo_set_multicast_list = vlan_dev_set_rx_mode, .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_do_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 175b513..e317583 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -263,7 +263,6 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) { int in, out, inp, outp; struct virtio_chan *chan = client->trans; - char *rdata = (char *)req->rc+sizeof(struct p9_fcall); unsigned long flags; size_t pdata_off = 0; struct trans_rpage_info *rpinfo = NULL; @@ -346,7 +345,8 @@ req_retry_pinned: * Arrange in such a way that server places header in the * alloced memory and payload onto the user buffer. */ - inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11); + inp = pack_sg_list(chan->sg, out, + VIRTQUEUE_NUM, req->rc->sdata, 11); /* * Running executables in the filesystem may result in * a read request with kernel buffer as opposed to user buffer. @@ -366,8 +366,8 @@ req_retry_pinned: } in += inp; } else { - in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, - req->rc->capacity); + in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, + req->rc->sdata, req->rc->capacity); } err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); @@ -592,7 +592,14 @@ static struct p9_trans_module p9_virtio_trans = { .close = p9_virtio_close, .request = p9_virtio_request, .cancel = p9_virtio_cancel, - .maxsize = PAGE_SIZE*VIRTQUEUE_NUM, + + /* + * We leave one entry for input and one entry for response + * headers. We also skip one more entry to accomodate, address + * that are not at page boundary, that can result in an extra + * page in zero copy. + */ + .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), .pref = P9_TRANS_PREF_PAYLOAD_SEP, .def = 0, .owner = THIS_MODULE, diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 2252c20..d07223c 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -242,8 +242,6 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev, if (brdev->payload == p_bridged) { skb_push(skb, 2); memset(skb->data, 0, 2); - } else { /* p_routed */ - skb_pull(skb, ETH_HLEN); } } skb_debug(skb); @@ -560,12 +558,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) spin_unlock_irqrestore(&rq->lock, flags); skb_queue_walk_safe(&queue, skb, tmp) { - struct net_device *dev = skb->dev; + struct net_device *dev; + + br2684_push(atmvcc, skb); + dev = skb->dev; dev->stats.rx_bytes -= skb->len; dev->stats.rx_packets--; - - br2684_push(atmvcc, skb); } /* initialize netdev carrier state */ diff --git a/net/atm/lec.c b/net/atm/lec.c index 215c9fa..f1964ca 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -643,7 +643,7 @@ static const struct net_device_ops lec_netdev_ops = { .ndo_start_xmit = lec_start_xmit, .ndo_change_mtu = lec_change_mtu, .ndo_tx_timeout = lec_tx_timeout, - .ndo_set_multicast_list = lec_set_multicast_list, + .ndo_set_rx_mode = lec_set_multicast_list, }; static const unsigned char lec_ctrl_magic[] = { diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 2de93d0..ce68611 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -19,8 +19,8 @@ # obj-$(CONFIG_BATMAN_ADV) += batman-adv.o -batman-adv-y += aggregation.o batman-adv-y += bat_debugfs.o +batman-adv-y += bat_iv_ogm.o batman-adv-y += bat_sysfs.o batman-adv-y += bitarray.o batman-adv-y += gateway_client.o diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c deleted file mode 100644 index 69467fe..0000000 --- a/net/batman-adv/aggregation.c +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: - * - * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - * - */ - -#include "main.h" -#include "translation-table.h" -#include "aggregation.h" -#include "send.h" -#include "routing.h" -#include "hard-interface.h" - -/* return true if new_packet can be aggregated with forw_packet */ -static bool can_aggregate_with(const struct batman_packet *new_batman_packet, - struct bat_priv *bat_priv, - int packet_len, - unsigned long send_time, - bool directlink, - const struct hard_iface *if_incoming, - const struct forw_packet *forw_packet) -{ - struct batman_packet *batman_packet = - (struct batman_packet *)forw_packet->skb->data; - int aggregated_bytes = forw_packet->packet_len + packet_len; - struct hard_iface *primary_if = NULL; - bool res = false; - - /** - * we can aggregate the current packet to this aggregated packet - * if: - * - * - the send time is within our MAX_AGGREGATION_MS time - * - the resulting packet wont be bigger than - * MAX_AGGREGATION_BYTES - */ - - if (time_before(send_time, forw_packet->send_time) && - time_after_eq(send_time + msecs_to_jiffies(MAX_AGGREGATION_MS), - forw_packet->send_time) && - (aggregated_bytes <= MAX_AGGREGATION_BYTES)) { - - /** - * check aggregation compatibility - * -> direct link packets are broadcasted on - * their interface only - * -> aggregate packet if the current packet is - * a "global" packet as well as the base - * packet - */ - - primary_if = primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; - - /* packets without direct link flag and high TTL - * are flooded through the net */ - if ((!directlink) && - (!(batman_packet->flags & DIRECTLINK)) && - (batman_packet->ttl != 1) && - - /* own packets originating non-primary - * interfaces leave only that interface */ - ((!forw_packet->own) || - (forw_packet->if_incoming == primary_if))) { - res = true; - goto out; - } - - /* if the incoming packet is sent via this one - * interface only - we still can aggregate */ - if ((directlink) && - (new_batman_packet->ttl == 1) && - (forw_packet->if_incoming == if_incoming) && - - /* packets from direct neighbors or - * own secondary interface packets - * (= secondary interface packets in general) */ - (batman_packet->flags & DIRECTLINK || - (forw_packet->own && - forw_packet->if_incoming != primary_if))) { - res = true; - goto out; - } - } - -out: - if (primary_if) - hardif_free_ref(primary_if); - return res; -} - -/* create a new aggregated packet and add this packet to it */ -static void new_aggregated_packet(const unsigned char *packet_buff, - int packet_len, unsigned long send_time, - bool direct_link, - struct hard_iface *if_incoming, - int own_packet) -{ - struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct forw_packet *forw_packet_aggr; - unsigned char *skb_buff; - - if (!atomic_inc_not_zero(&if_incoming->refcount)) - return; - - /* own packet should always be scheduled */ - if (!own_packet) { - if (!atomic_dec_not_zero(&bat_priv->batman_queue_left)) { - bat_dbg(DBG_BATMAN, bat_priv, - "batman packet queue full\n"); - goto out; - } - } - - forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC); - if (!forw_packet_aggr) { - if (!own_packet) - atomic_inc(&bat_priv->batman_queue_left); - goto out; - } - - if ((atomic_read(&bat_priv->aggregated_ogms)) && - (packet_len < MAX_AGGREGATION_BYTES)) - forw_packet_aggr->skb = dev_alloc_skb(MAX_AGGREGATION_BYTES + - sizeof(struct ethhdr)); - else - forw_packet_aggr->skb = dev_alloc_skb(packet_len + - sizeof(struct ethhdr)); - - if (!forw_packet_aggr->skb) { - if (!own_packet) - atomic_inc(&bat_priv->batman_queue_left); - kfree(forw_packet_aggr); - goto out; - } - skb_reserve(forw_packet_aggr->skb, sizeof(struct ethhdr)); - - INIT_HLIST_NODE(&forw_packet_aggr->list); - - skb_buff = skb_put(forw_packet_aggr->skb, packet_len); - forw_packet_aggr->packet_len = packet_len; - memcpy(skb_buff, packet_buff, packet_len); - - forw_packet_aggr->own = own_packet; - forw_packet_aggr->if_incoming = if_incoming; - forw_packet_aggr->num_packets = 0; - forw_packet_aggr->direct_link_flags = NO_FLAGS; - forw_packet_aggr->send_time = send_time; - - /* save packet direct link flag status */ - if (direct_link) - forw_packet_aggr->direct_link_flags |= 1; - - /* add new packet to packet list */ - spin_lock_bh(&bat_priv->forw_bat_list_lock); - hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list); - spin_unlock_bh(&bat_priv->forw_bat_list_lock); - - /* start timer for this packet */ - INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work, - send_outstanding_bat_packet); - queue_delayed_work(bat_event_workqueue, - &forw_packet_aggr->delayed_work, - send_time - jiffies); - - return; -out: - hardif_free_ref(if_incoming); -} - -/* aggregate a new packet into the existing aggregation */ -static void aggregate(struct forw_packet *forw_packet_aggr, - const unsigned char *packet_buff, int packet_len, - bool direct_link) -{ - unsigned char *skb_buff; - - skb_buff = skb_put(forw_packet_aggr->skb, packet_len); - memcpy(skb_buff, packet_buff, packet_len); - forw_packet_aggr->packet_len += packet_len; - forw_packet_aggr->num_packets++; - - /* save packet direct link flag status */ - if (direct_link) - forw_packet_aggr->direct_link_flags |= - (1 << forw_packet_aggr->num_packets); -} - -void add_bat_packet_to_list(struct bat_priv *bat_priv, - unsigned char *packet_buff, int packet_len, - struct hard_iface *if_incoming, int own_packet, - unsigned long send_time) -{ - /** - * _aggr -> pointer to the packet we want to aggregate with - * _pos -> pointer to the position in the queue - */ - struct forw_packet *forw_packet_aggr = NULL, *forw_packet_pos = NULL; - struct hlist_node *tmp_node; - struct batman_packet *batman_packet = - (struct batman_packet *)packet_buff; - bool direct_link = batman_packet->flags & DIRECTLINK ? 1 : 0; - - /* find position for the packet in the forward queue */ - spin_lock_bh(&bat_priv->forw_bat_list_lock); - /* own packets are not to be aggregated */ - if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) { - hlist_for_each_entry(forw_packet_pos, tmp_node, - &bat_priv->forw_bat_list, list) { - if (can_aggregate_with(batman_packet, - bat_priv, - packet_len, - send_time, - direct_link, - if_incoming, - forw_packet_pos)) { - forw_packet_aggr = forw_packet_pos; - break; - } - } - } - - /* nothing to aggregate with - either aggregation disabled or no - * suitable aggregation packet found */ - if (!forw_packet_aggr) { - /* the following section can run without the lock */ - spin_unlock_bh(&bat_priv->forw_bat_list_lock); - - /** - * if we could not aggregate this packet with one of the others - * we hold it back for a while, so that it might be aggregated - * later on - */ - if ((!own_packet) && - (atomic_read(&bat_priv->aggregated_ogms))) - send_time += msecs_to_jiffies(MAX_AGGREGATION_MS); - - new_aggregated_packet(packet_buff, packet_len, - send_time, direct_link, - if_incoming, own_packet); - } else { - aggregate(forw_packet_aggr, - packet_buff, packet_len, - direct_link); - spin_unlock_bh(&bat_priv->forw_bat_list_lock); - } -} - -/* unpack the aggregated packets and process them one by one */ -void receive_aggr_bat_packet(const struct ethhdr *ethhdr, - unsigned char *packet_buff, int packet_len, - struct hard_iface *if_incoming) -{ - struct batman_packet *batman_packet; - int buff_pos = 0; - unsigned char *tt_buff; - - batman_packet = (struct batman_packet *)packet_buff; - - do { - /* network to host order for our 32bit seqno and the - orig_interval */ - batman_packet->seqno = ntohl(batman_packet->seqno); - batman_packet->tt_crc = ntohs(batman_packet->tt_crc); - - tt_buff = packet_buff + buff_pos + BAT_PACKET_LEN; - - receive_bat_packet(ethhdr, batman_packet, tt_buff, if_incoming); - - buff_pos += BAT_PACKET_LEN + - tt_len(batman_packet->tt_num_changes); - - batman_packet = (struct batman_packet *) - (packet_buff + buff_pos); - } while (aggregated_packet(buff_pos, packet_len, - batman_packet->tt_num_changes)); -} diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h deleted file mode 100644 index 216337b..0000000 --- a/net/batman-adv/aggregation.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: - * - * Marek Lindner, Simon Wunderlich - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - * - */ - -#ifndef _NET_BATMAN_ADV_AGGREGATION_H_ -#define _NET_BATMAN_ADV_AGGREGATION_H_ - -#include "main.h" - -/* is there another aggregated packet here? */ -static inline int aggregated_packet(int buff_pos, int packet_len, - int tt_num_changes) -{ - int next_buff_pos = buff_pos + BAT_PACKET_LEN + (tt_num_changes * - sizeof(struct tt_change)); - - return (next_buff_pos <= packet_len) && - (next_buff_pos <= MAX_AGGREGATION_BYTES); -} - -void add_bat_packet_to_list(struct bat_priv *bat_priv, - unsigned char *packet_buff, int packet_len, - struct hard_iface *if_incoming, int own_packet, - unsigned long send_time); -void receive_aggr_bat_packet(const struct ethhdr *ethhdr, - unsigned char *packet_buff, int packet_len, - struct hard_iface *if_incoming); - -#endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */ diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c new file mode 100644 index 0000000..3512e25 --- /dev/null +++ b/net/batman-adv/bat_iv_ogm.c @@ -0,0 +1,1170 @@ +/* + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#include "main.h" +#include "bat_ogm.h" +#include "translation-table.h" +#include "ring_buffer.h" +#include "originator.h" +#include "routing.h" +#include "gateway_common.h" +#include "gateway_client.h" +#include "hard-interface.h" +#include "send.h" + +void bat_ogm_init(struct hard_iface *hard_iface) +{ + struct batman_ogm_packet *batman_ogm_packet; + + hard_iface->packet_len = BATMAN_OGM_LEN; + hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC); + + batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff; + batman_ogm_packet->packet_type = BAT_OGM; + batman_ogm_packet->version = COMPAT_VERSION; + batman_ogm_packet->flags = NO_FLAGS; + batman_ogm_packet->ttl = 2; + batman_ogm_packet->tq = TQ_MAX_VALUE; + batman_ogm_packet->tt_num_changes = 0; + batman_ogm_packet->ttvn = 0; +} + +void bat_ogm_init_primary(struct hard_iface *hard_iface) +{ + struct batman_ogm_packet *batman_ogm_packet; + + batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff; + batman_ogm_packet->flags = PRIMARIES_FIRST_HOP; + batman_ogm_packet->ttl = TTL; +} + +void bat_ogm_update_mac(struct hard_iface *hard_iface) +{ + struct batman_ogm_packet *batman_ogm_packet; + + batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff; + memcpy(batman_ogm_packet->orig, + hard_iface->net_dev->dev_addr, ETH_ALEN); + memcpy(batman_ogm_packet->prev_sender, + hard_iface->net_dev->dev_addr, ETH_ALEN); +} + +/* when do we schedule our own ogm to be sent */ +static unsigned long bat_ogm_emit_send_time(const struct bat_priv *bat_priv) +{ + return jiffies + msecs_to_jiffies( + atomic_read(&bat_priv->orig_interval) - + JITTER + (random32() % 2*JITTER)); +} + +/* when do we schedule a ogm packet to be sent */ +static unsigned long bat_ogm_fwd_send_time(void) +{ + return jiffies + msecs_to_jiffies(random32() % (JITTER/2)); +} + +/* apply hop penalty for a normal link */ +static uint8_t hop_penalty(uint8_t tq, const struct bat_priv *bat_priv) +{ + int hop_penalty = atomic_read(&bat_priv->hop_penalty); + return (tq * (TQ_MAX_VALUE - hop_penalty)) / (TQ_MAX_VALUE); +} + +/* is there another aggregated packet here? */ +static int bat_ogm_aggr_packet(int buff_pos, int packet_len, + int tt_num_changes) +{ + int next_buff_pos = buff_pos + BATMAN_OGM_LEN + tt_len(tt_num_changes); + + return (next_buff_pos <= packet_len) && + (next_buff_pos <= MAX_AGGREGATION_BYTES); +} + +/* send a batman ogm to a given interface */ +static void bat_ogm_send_to_if(struct forw_packet *forw_packet, + struct hard_iface *hard_iface) +{ + struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + char *fwd_str; + uint8_t packet_num; + int16_t buff_pos; + struct batman_ogm_packet *batman_ogm_packet; + struct sk_buff *skb; + + if (hard_iface->if_status != IF_ACTIVE) + return; + + packet_num = 0; + buff_pos = 0; + batman_ogm_packet = (struct batman_ogm_packet *)forw_packet->skb->data; + + /* adjust all flags and log packets */ + while (bat_ogm_aggr_packet(buff_pos, forw_packet->packet_len, + batman_ogm_packet->tt_num_changes)) { + + /* we might have aggregated direct link packets with an + * ordinary base packet */ + if ((forw_packet->direct_link_flags & (1 << packet_num)) && + (forw_packet->if_incoming == hard_iface)) + batman_ogm_packet->flags |= DIRECTLINK; + else + batman_ogm_packet->flags &= ~DIRECTLINK; + + fwd_str = (packet_num > 0 ? "Forwarding" : (forw_packet->own ? + "Sending own" : + "Forwarding")); + bat_dbg(DBG_BATMAN, bat_priv, + "%s %spacket (originator %pM, seqno %d, TQ %d, TTL %d," + " IDF %s, ttvn %d) on interface %s [%pM]\n", + fwd_str, (packet_num > 0 ? "aggregated " : ""), + batman_ogm_packet->orig, + ntohl(batman_ogm_packet->seqno), + batman_ogm_packet->tq, batman_ogm_packet->ttl, + (batman_ogm_packet->flags & DIRECTLINK ? + "on" : "off"), + batman_ogm_packet->ttvn, hard_iface->net_dev->name, + hard_iface->net_dev->dev_addr); + + buff_pos += BATMAN_OGM_LEN + + tt_len(batman_ogm_packet->tt_num_changes); + packet_num++; + batman_ogm_packet = (struct batman_ogm_packet *) + (forw_packet->skb->data + buff_pos); + } + + /* create clone because function is called more than once */ + skb = skb_clone(forw_packet->skb, GFP_ATOMIC); + if (skb) + send_skb_packet(skb, hard_iface, broadcast_addr); +} + +/* send a batman ogm packet */ +void bat_ogm_emit(struct forw_packet *forw_packet) +{ + struct hard_iface *hard_iface; + struct net_device *soft_iface; + struct bat_priv *bat_priv; + struct hard_iface *primary_if = NULL; + struct batman_ogm_packet *batman_ogm_packet; + unsigned char directlink; + + batman_ogm_packet = (struct batman_ogm_packet *) + (forw_packet->skb->data); + directlink = (batman_ogm_packet->flags & DIRECTLINK ? 1 : 0); + + if (!forw_packet->if_incoming) { + pr_err("Error - can't forward packet: incoming iface not " + "specified\n"); + goto out; + } + + soft_iface = forw_packet->if_incoming->soft_iface; + bat_priv = netdev_priv(soft_iface); + + if (forw_packet->if_incoming->if_status != IF_ACTIVE) + goto out; + + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + /* multihomed peer assumed */ + /* non-primary OGMs are only broadcasted on their interface */ + if ((directlink && (batman_ogm_packet->ttl == 1)) || + (forw_packet->own && (forw_packet->if_incoming != primary_if))) { + + /* FIXME: what about aggregated packets ? */ + bat_dbg(DBG_BATMAN, bat_priv, + "%s packet (originator %pM, seqno %d, TTL %d) " + "on interface %s [%pM]\n", + (forw_packet->own ? "Sending own" : "Forwarding"), + batman_ogm_packet->orig, + ntohl(batman_ogm_packet->seqno), + batman_ogm_packet->ttl, + forw_packet->if_incoming->net_dev->name, + forw_packet->if_incoming->net_dev->dev_addr); + + /* skb is only used once and than forw_packet is free'd */ + send_skb_packet(forw_packet->skb, forw_packet->if_incoming, + broadcast_addr); + forw_packet->skb = NULL; + + goto out; + } + + /* broadcast on every interface */ + rcu_read_lock(); + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if (hard_iface->soft_iface != soft_iface) + continue; + + bat_ogm_send_to_if(forw_packet, hard_iface); + } + rcu_read_unlock(); + +out: + if (primary_if) + hardif_free_ref(primary_if); +} + +/* return true if new_packet can be aggregated with forw_packet */ +static bool bat_ogm_can_aggregate(const struct batman_ogm_packet + *new_batman_ogm_packet, + struct bat_priv *bat_priv, + int packet_len, unsigned long send_time, + bool directlink, + const struct hard_iface *if_incoming, + const struct forw_packet *forw_packet) +{ + struct batman_ogm_packet *batman_ogm_packet; + int aggregated_bytes = forw_packet->packet_len + packet_len; + struct hard_iface *primary_if = NULL; + bool res = false; + + batman_ogm_packet = (struct batman_ogm_packet *)forw_packet->skb->data; + + /** + * we can aggregate the current packet to this aggregated packet + * if: + * + * - the send time is within our MAX_AGGREGATION_MS time + * - the resulting packet wont be bigger than + * MAX_AGGREGATION_BYTES + */ + + if (time_before(send_time, forw_packet->send_time) && + time_after_eq(send_time + msecs_to_jiffies(MAX_AGGREGATION_MS), + forw_packet->send_time) && + (aggregated_bytes <= MAX_AGGREGATION_BYTES)) { + + /** + * check aggregation compatibility + * -> direct link packets are broadcasted on + * their interface only + * -> aggregate packet if the current packet is + * a "global" packet as well as the base + * packet + */ + + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + /* packets without direct link flag and high TTL + * are flooded through the net */ + if ((!directlink) && + (!(batman_ogm_packet->flags & DIRECTLINK)) && + (batman_ogm_packet->ttl != 1) && + + /* own packets originating non-primary + * interfaces leave only that interface */ + ((!forw_packet->own) || + (forw_packet->if_incoming == primary_if))) { + res = true; + goto out; + } + + /* if the incoming packet is sent via this one + * interface only - we still can aggregate */ + if ((directlink) && + (new_batman_ogm_packet->ttl == 1) && + (forw_packet->if_incoming == if_incoming) && + + /* packets from direct neighbors or + * own secondary interface packets + * (= secondary interface packets in general) */ + (batman_ogm_packet->flags & DIRECTLINK || + (forw_packet->own && + forw_packet->if_incoming != primary_if))) { + res = true; + goto out; + } + } + +out: + if (primary_if) + hardif_free_ref(primary_if); + return res; +} + +/* create a new aggregated packet and add this packet to it */ +static void bat_ogm_aggregate_new(const unsigned char *packet_buff, + int packet_len, unsigned long send_time, + bool direct_link, + struct hard_iface *if_incoming, + int own_packet) +{ + struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct forw_packet *forw_packet_aggr; + unsigned char *skb_buff; + + if (!atomic_inc_not_zero(&if_incoming->refcount)) + return; + + /* own packet should always be scheduled */ + if (!own_packet) { + if (!atomic_dec_not_zero(&bat_priv->batman_queue_left)) { + bat_dbg(DBG_BATMAN, bat_priv, + "batman packet queue full\n"); + goto out; + } + } + + forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC); + if (!forw_packet_aggr) { + if (!own_packet) + atomic_inc(&bat_priv->batman_queue_left); + goto out; + } + + if ((atomic_read(&bat_priv->aggregated_ogms)) && + (packet_len < MAX_AGGREGATION_BYTES)) + forw_packet_aggr->skb = dev_alloc_skb(MAX_AGGREGATION_BYTES + + sizeof(struct ethhdr)); + else + forw_packet_aggr->skb = dev_alloc_skb(packet_len + + sizeof(struct ethhdr)); + + if (!forw_packet_aggr->skb) { + if (!own_packet) + atomic_inc(&bat_priv->batman_queue_left); + kfree(forw_packet_aggr); + goto out; + } + skb_reserve(forw_packet_aggr->skb, sizeof(struct ethhdr)); + + INIT_HLIST_NODE(&forw_packet_aggr->list); + + skb_buff = skb_put(forw_packet_aggr->skb, packet_len); + forw_packet_aggr->packet_len = packet_len; + memcpy(skb_buff, packet_buff, packet_len); + + forw_packet_aggr->own = own_packet; + forw_packet_aggr->if_incoming = if_incoming; + forw_packet_aggr->num_packets = 0; + forw_packet_aggr->direct_link_flags = NO_FLAGS; + forw_packet_aggr->send_time = send_time; + + /* save packet direct link flag status */ + if (direct_link) + forw_packet_aggr->direct_link_flags |= 1; + + /* add new packet to packet list */ + spin_lock_bh(&bat_priv->forw_bat_list_lock); + hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list); + spin_unlock_bh(&bat_priv->forw_bat_list_lock); + + /* start timer for this packet */ + INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work, + send_outstanding_bat_ogm_packet); + queue_delayed_work(bat_event_workqueue, + &forw_packet_aggr->delayed_work, + send_time - jiffies); + + return; +out: + hardif_free_ref(if_incoming); +} + +/* aggregate a new packet into the existing ogm packet */ +static void bat_ogm_aggregate(struct forw_packet *forw_packet_aggr, + const unsigned char *packet_buff, + int packet_len, bool direct_link) +{ + unsigned char *skb_buff; + + skb_buff = skb_put(forw_packet_aggr->skb, packet_len); + memcpy(skb_buff, packet_buff, packet_len); + forw_packet_aggr->packet_len += packet_len; + forw_packet_aggr->num_packets++; + + /* save packet direct link flag status */ + if (direct_link) + forw_packet_aggr->direct_link_flags |= + (1 << forw_packet_aggr->num_packets); +} + +static void bat_ogm_queue_add(struct bat_priv *bat_priv, + unsigned char *packet_buff, + int packet_len, struct hard_iface *if_incoming, + int own_packet, unsigned long send_time) +{ + /** + * _aggr -> pointer to the packet we want to aggregate with + * _pos -> pointer to the position in the queue + */ + struct forw_packet *forw_packet_aggr = NULL, *forw_packet_pos = NULL; + struct hlist_node *tmp_node; + struct batman_ogm_packet *batman_ogm_packet; + bool direct_link; + + batman_ogm_packet = (struct batman_ogm_packet *)packet_buff; + direct_link = batman_ogm_packet->flags & DIRECTLINK ? 1 : 0; + + /* find position for the packet in the forward queue */ + spin_lock_bh(&bat_priv->forw_bat_list_lock); + /* own packets are not to be aggregated */ + if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) { + hlist_for_each_entry(forw_packet_pos, tmp_node, + &bat_priv->forw_bat_list, list) { + if (bat_ogm_can_aggregate(batman_ogm_packet, + bat_priv, packet_len, + send_time, direct_link, + if_incoming, + forw_packet_pos)) { + forw_packet_aggr = forw_packet_pos; + break; + } + } + } + + /* nothing to aggregate with - either aggregation disabled or no + * suitable aggregation packet found */ + if (!forw_packet_aggr) { + /* the following section can run without the lock */ + spin_unlock_bh(&bat_priv->forw_bat_list_lock); + + /** + * if we could not aggregate this packet with one of the others + * we hold it back for a while, so that it might be aggregated + * later on + */ + if ((!own_packet) && + (atomic_read(&bat_priv->aggregated_ogms))) + send_time += msecs_to_jiffies(MAX_AGGREGATION_MS); + + bat_ogm_aggregate_new(packet_buff, packet_len, + send_time, direct_link, + if_incoming, own_packet); + } else { + bat_ogm_aggregate(forw_packet_aggr, packet_buff, packet_len, + direct_link); + spin_unlock_bh(&bat_priv->forw_bat_list_lock); + } +} + +static void bat_ogm_forward(struct orig_node *orig_node, + const struct ethhdr *ethhdr, + struct batman_ogm_packet *batman_ogm_packet, + int directlink, struct hard_iface *if_incoming) +{ + struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct neigh_node *router; + uint8_t in_tq, in_ttl, tq_avg = 0; + uint8_t tt_num_changes; + + if (batman_ogm_packet->ttl <= 1) { + bat_dbg(DBG_BATMAN, bat_priv, "ttl exceeded\n"); + return; + } + + router = orig_node_get_router(orig_node); + + in_tq = batman_ogm_packet->tq; + in_ttl = batman_ogm_packet->ttl; + tt_num_changes = batman_ogm_packet->tt_num_changes; + + batman_ogm_packet->ttl--; + memcpy(batman_ogm_packet->prev_sender, ethhdr->h_source, ETH_ALEN); + + /* rebroadcast tq of our best ranking neighbor to ensure the rebroadcast + * of our best tq value */ + if (router && router->tq_avg != 0) { + + /* rebroadcast ogm of best ranking neighbor as is */ + if (!compare_eth(router->addr, ethhdr->h_source)) { + batman_ogm_packet->tq = router->tq_avg; + + if (router->last_ttl) + batman_ogm_packet->ttl = router->last_ttl - 1; + } + + tq_avg = router->tq_avg; + } + + if (router) + neigh_node_free_ref(router); + + /* apply hop penalty */ + batman_ogm_packet->tq = hop_penalty(batman_ogm_packet->tq, bat_priv); + + bat_dbg(DBG_BATMAN, bat_priv, + "Forwarding packet: tq_orig: %i, tq_avg: %i, " + "tq_forw: %i, ttl_orig: %i, ttl_forw: %i\n", + in_tq, tq_avg, batman_ogm_packet->tq, in_ttl - 1, + batman_ogm_packet->ttl); + + batman_ogm_packet->seqno = htonl(batman_ogm_packet->seqno); + batman_ogm_packet->tt_crc = htons(batman_ogm_packet->tt_crc); + + /* switch of primaries first hop flag when forwarding */ + batman_ogm_packet->flags &= ~PRIMARIES_FIRST_HOP; + if (directlink) + batman_ogm_packet->flags |= DIRECTLINK; + else + batman_ogm_packet->flags &= ~DIRECTLINK; + + bat_ogm_queue_add(bat_priv, (unsigned char *)batman_ogm_packet, + BATMAN_OGM_LEN + tt_len(tt_num_changes), + if_incoming, 0, bat_ogm_fwd_send_time()); +} + +void bat_ogm_schedule(struct hard_iface *hard_iface, int tt_num_changes) +{ + struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batman_ogm_packet *batman_ogm_packet; + struct hard_iface *primary_if; + int vis_server; + + vis_server = atomic_read(&bat_priv->vis_mode); + primary_if = primary_if_get_selected(bat_priv); + + batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff; + + /* change sequence number to network order */ + batman_ogm_packet->seqno = + htonl((uint32_t)atomic_read(&hard_iface->seqno)); + + batman_ogm_packet->ttvn = atomic_read(&bat_priv->ttvn); + batman_ogm_packet->tt_crc = htons((uint16_t) + atomic_read(&bat_priv->tt_crc)); + if (tt_num_changes >= 0) + batman_ogm_packet->tt_num_changes = tt_num_changes; + + if (vis_server == VIS_TYPE_SERVER_SYNC) + batman_ogm_packet->flags |= VIS_SERVER; + else + batman_ogm_packet->flags &= ~VIS_SERVER; + + if ((hard_iface == primary_if) && + (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER)) + batman_ogm_packet->gw_flags = + (uint8_t)atomic_read(&bat_priv->gw_bandwidth); + else + batman_ogm_packet->gw_flags = NO_FLAGS; + + atomic_inc(&hard_iface->seqno); + + slide_own_bcast_window(hard_iface); + bat_ogm_queue_add(bat_priv, hard_iface->packet_buff, + hard_iface->packet_len, hard_iface, 1, + bat_ogm_emit_send_time(bat_priv)); + + if (primary_if) + hardif_free_ref(primary_if); +} + +static void bat_ogm_orig_update(struct bat_priv *bat_priv, + struct orig_node *orig_node, + const struct ethhdr *ethhdr, + const struct batman_ogm_packet + *batman_ogm_packet, + struct hard_iface *if_incoming, + const unsigned char *tt_buff, int is_duplicate) +{ + struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; + struct neigh_node *router = NULL; + struct orig_node *orig_node_tmp; + struct hlist_node *node; + uint8_t bcast_own_sum_orig, bcast_own_sum_neigh; + + bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): " + "Searching and updating originator entry of received packet\n"); + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_node->neigh_list, list) { + if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) && + (tmp_neigh_node->if_incoming == if_incoming) && + atomic_inc_not_zero(&tmp_neigh_node->refcount)) { + if (neigh_node) + neigh_node_free_ref(neigh_node); + neigh_node = tmp_neigh_node; + continue; + } + + if (is_duplicate) + continue; + + spin_lock_bh(&tmp_neigh_node->tq_lock); + ring_buffer_set(tmp_neigh_node->tq_recv, + &tmp_neigh_node->tq_index, 0); + tmp_neigh_node->tq_avg = + ring_buffer_avg(tmp_neigh_node->tq_recv); + spin_unlock_bh(&tmp_neigh_node->tq_lock); + } + + if (!neigh_node) { + struct orig_node *orig_tmp; + + orig_tmp = get_orig_node(bat_priv, ethhdr->h_source); + if (!orig_tmp) + goto unlock; + + neigh_node = create_neighbor(orig_node, orig_tmp, + ethhdr->h_source, if_incoming); + + orig_node_free_ref(orig_tmp); + if (!neigh_node) + goto unlock; + } else + bat_dbg(DBG_BATMAN, bat_priv, + "Updating existing last-hop neighbor of originator\n"); + + rcu_read_unlock(); + + orig_node->flags = batman_ogm_packet->flags; + neigh_node->last_valid = jiffies; + + spin_lock_bh(&neigh_node->tq_lock); + ring_buffer_set(neigh_node->tq_recv, + &neigh_node->tq_index, + batman_ogm_packet->tq); + neigh_node->tq_avg = ring_buffer_avg(neigh_node->tq_recv); + spin_unlock_bh(&neigh_node->tq_lock); + + if (!is_duplicate) { + orig_node->last_ttl = batman_ogm_packet->ttl; + neigh_node->last_ttl = batman_ogm_packet->ttl; + } + + bonding_candidate_add(orig_node, neigh_node); + + /* if this neighbor already is our next hop there is nothing + * to change */ + router = orig_node_get_router(orig_node); + if (router == neigh_node) + goto update_tt; + + /* if this neighbor does not offer a better TQ we won't consider it */ + if (router && (router->tq_avg > neigh_node->tq_avg)) + goto update_tt; + + /* if the TQ is the same and the link not more symmetric we + * won't consider it either */ + if (router && (neigh_node->tq_avg == router->tq_avg)) { + orig_node_tmp = router->orig_node; + spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); + bcast_own_sum_orig = + orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); + + orig_node_tmp = neigh_node->orig_node; + spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); + bcast_own_sum_neigh = + orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); + + if (bcast_own_sum_orig >= bcast_own_sum_neigh) + goto update_tt; + } + + update_route(bat_priv, orig_node, neigh_node); + +update_tt: + /* I have to check for transtable changes only if the OGM has been + * sent through a primary interface */ + if (((batman_ogm_packet->orig != ethhdr->h_source) && + (batman_ogm_packet->ttl > 2)) || + (batman_ogm_packet->flags & PRIMARIES_FIRST_HOP)) + tt_update_orig(bat_priv, orig_node, tt_buff, + batman_ogm_packet->tt_num_changes, + batman_ogm_packet->ttvn, + batman_ogm_packet->tt_crc); + + if (orig_node->gw_flags != batman_ogm_packet->gw_flags) + gw_node_update(bat_priv, orig_node, + batman_ogm_packet->gw_flags); + + orig_node->gw_flags = batman_ogm_packet->gw_flags; + + /* restart gateway selection if fast or late switching was enabled */ + if ((orig_node->gw_flags) && + (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) && + (atomic_read(&bat_priv->gw_sel_class) > 2)) + gw_check_election(bat_priv, orig_node); + + goto out; + +unlock: + rcu_read_unlock(); +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (router) + neigh_node_free_ref(router); +} + +static int bat_ogm_calc_tq(struct orig_node *orig_node, + struct orig_node *orig_neigh_node, + struct batman_ogm_packet *batman_ogm_packet, + struct hard_iface *if_incoming) +{ + struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct neigh_node *neigh_node = NULL, *tmp_neigh_node; + struct hlist_node *node; + uint8_t total_count; + uint8_t orig_eq_count, neigh_rq_count, tq_own; + int tq_asym_penalty, ret = 0; + + /* find corresponding one hop neighbor */ + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_neigh_node->neigh_list, list) { + + if (!compare_eth(tmp_neigh_node->addr, orig_neigh_node->orig)) + continue; + + if (tmp_neigh_node->if_incoming != if_incoming) + continue; + + if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) + continue; + + neigh_node = tmp_neigh_node; + break; + } + rcu_read_unlock(); + + if (!neigh_node) + neigh_node = create_neighbor(orig_neigh_node, + orig_neigh_node, + orig_neigh_node->orig, + if_incoming); + + if (!neigh_node) + goto out; + + /* if orig_node is direct neighbor update neigh_node last_valid */ + if (orig_node == orig_neigh_node) + neigh_node->last_valid = jiffies; + + orig_node->last_valid = jiffies; + + /* find packet count of corresponding one hop neighbor */ + spin_lock_bh(&orig_node->ogm_cnt_lock); + orig_eq_count = orig_neigh_node->bcast_own_sum[if_incoming->if_num]; + neigh_rq_count = neigh_node->real_packet_count; + spin_unlock_bh(&orig_node->ogm_cnt_lock); + + /* pay attention to not get a value bigger than 100 % */ + total_count = (orig_eq_count > neigh_rq_count ? + neigh_rq_count : orig_eq_count); + + /* if we have too few packets (too less data) we set tq_own to zero */ + /* if we receive too few packets it is not considered bidirectional */ + if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) || + (neigh_rq_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM)) + tq_own = 0; + else + /* neigh_node->real_packet_count is never zero as we + * only purge old information when getting new + * information */ + tq_own = (TQ_MAX_VALUE * total_count) / neigh_rq_count; + + /* + * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does + * affect the nearly-symmetric links only a little, but + * punishes asymmetric links more. This will give a value + * between 0 and TQ_MAX_VALUE + */ + tq_asym_penalty = TQ_MAX_VALUE - (TQ_MAX_VALUE * + (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) * + (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) * + (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count)) / + (TQ_LOCAL_WINDOW_SIZE * + TQ_LOCAL_WINDOW_SIZE * + TQ_LOCAL_WINDOW_SIZE); + + batman_ogm_packet->tq = ((batman_ogm_packet->tq * tq_own + * tq_asym_penalty) / + (TQ_MAX_VALUE * TQ_MAX_VALUE)); + + bat_dbg(DBG_BATMAN, bat_priv, + "bidirectional: " + "orig = %-15pM neigh = %-15pM => own_bcast = %2i, " + "real recv = %2i, local tq: %3i, asym_penalty: %3i, " + "total tq: %3i\n", + orig_node->orig, orig_neigh_node->orig, total_count, + neigh_rq_count, tq_own, tq_asym_penalty, batman_ogm_packet->tq); + + /* if link has the minimum required transmission quality + * consider it bidirectional */ + if (batman_ogm_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT) + ret = 1; + +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + return ret; +} + +/* processes a batman packet for all interfaces, adjusts the sequence number and + * finds out whether it is a duplicate. + * returns: + * 1 the packet is a duplicate + * 0 the packet has not yet been received + * -1 the packet is old and has been received while the seqno window + * was protected. Caller should drop it. + */ +static int bat_ogm_update_seqnos(const struct ethhdr *ethhdr, + const struct batman_ogm_packet + *batman_ogm_packet, + const struct hard_iface *if_incoming) +{ + struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct orig_node *orig_node; + struct neigh_node *tmp_neigh_node; + struct hlist_node *node; + int is_duplicate = 0; + int32_t seq_diff; + int need_update = 0; + int set_mark, ret = -1; + + orig_node = get_orig_node(bat_priv, batman_ogm_packet->orig); + if (!orig_node) + return 0; + + spin_lock_bh(&orig_node->ogm_cnt_lock); + seq_diff = batman_ogm_packet->seqno - orig_node->last_real_seqno; + + /* signalize caller that the packet is to be dropped. */ + if (window_protected(bat_priv, seq_diff, + &orig_node->batman_seqno_reset)) + goto out; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_node->neigh_list, list) { + + is_duplicate |= get_bit_status(tmp_neigh_node->real_bits, + orig_node->last_real_seqno, + batman_ogm_packet->seqno); + + if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) && + (tmp_neigh_node->if_incoming == if_incoming)) + set_mark = 1; + else + set_mark = 0; + + /* if the window moved, set the update flag. */ + need_update |= bit_get_packet(bat_priv, + tmp_neigh_node->real_bits, + seq_diff, set_mark); + + tmp_neigh_node->real_packet_count = + bit_packet_count(tmp_neigh_node->real_bits); + } + rcu_read_unlock(); + + if (need_update) { + bat_dbg(DBG_BATMAN, bat_priv, + "updating last_seqno: old %d, new %d\n", + orig_node->last_real_seqno, batman_ogm_packet->seqno); + orig_node->last_real_seqno = batman_ogm_packet->seqno; + } + + ret = is_duplicate; + +out: + spin_unlock_bh(&orig_node->ogm_cnt_lock); + orig_node_free_ref(orig_node); + return ret; +} + +static void bat_ogm_process(const struct ethhdr *ethhdr, + struct batman_ogm_packet *batman_ogm_packet, + const unsigned char *tt_buff, + struct hard_iface *if_incoming) +{ + struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct hard_iface *hard_iface; + struct orig_node *orig_neigh_node, *orig_node; + struct neigh_node *router = NULL, *router_router = NULL; + struct neigh_node *orig_neigh_router = NULL; + int has_directlink_flag; + int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; + int is_broadcast = 0, is_bidirectional, is_single_hop_neigh; + int is_duplicate; + uint32_t if_incoming_seqno; + + /* Silently drop when the batman packet is actually not a + * correct packet. + * + * This might happen if a packet is padded (e.g. Ethernet has a + * minimum frame length of 64 byte) and the aggregation interprets + * it as an additional length. + * + * TODO: A more sane solution would be to have a bit in the + * batman_ogm_packet to detect whether the packet is the last + * packet in an aggregation. Here we expect that the padding + * is always zero (or not 0x01) + */ + if (batman_ogm_packet->packet_type != BAT_OGM) + return; + + /* could be changed by schedule_own_packet() */ + if_incoming_seqno = atomic_read(&if_incoming->seqno); + + has_directlink_flag = (batman_ogm_packet->flags & DIRECTLINK ? 1 : 0); + + is_single_hop_neigh = (compare_eth(ethhdr->h_source, + batman_ogm_packet->orig) ? 1 : 0); + + bat_dbg(DBG_BATMAN, bat_priv, + "Received BATMAN packet via NB: %pM, IF: %s [%pM] " + "(from OG: %pM, via prev OG: %pM, seqno %d, ttvn %u, " + "crc %u, changes %u, td %d, TTL %d, V %d, IDF %d)\n", + ethhdr->h_source, if_incoming->net_dev->name, + if_incoming->net_dev->dev_addr, batman_ogm_packet->orig, + batman_ogm_packet->prev_sender, batman_ogm_packet->seqno, + batman_ogm_packet->ttvn, batman_ogm_packet->tt_crc, + batman_ogm_packet->tt_num_changes, batman_ogm_packet->tq, + batman_ogm_packet->ttl, batman_ogm_packet->version, + has_directlink_flag); + + rcu_read_lock(); + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if (hard_iface->if_status != IF_ACTIVE) + continue; + + if (hard_iface->soft_iface != if_incoming->soft_iface) + continue; + + if (compare_eth(ethhdr->h_source, + hard_iface->net_dev->dev_addr)) + is_my_addr = 1; + + if (compare_eth(batman_ogm_packet->orig, + hard_iface->net_dev->dev_addr)) + is_my_orig = 1; + + if (compare_eth(batman_ogm_packet->prev_sender, + hard_iface->net_dev->dev_addr)) + is_my_oldorig = 1; + + if (is_broadcast_ether_addr(ethhdr->h_source)) + is_broadcast = 1; + } + rcu_read_unlock(); + + if (batman_ogm_packet->version != COMPAT_VERSION) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: incompatible batman version (%i)\n", + batman_ogm_packet->version); + return; + } + + if (is_my_addr) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: received my own broadcast (sender: %pM" + ")\n", + ethhdr->h_source); + return; + } + + if (is_broadcast) { + bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: " + "ignoring all packets with broadcast source addr (sender: %pM" + ")\n", ethhdr->h_source); + return; + } + + if (is_my_orig) { + unsigned long *word; + int offset; + + orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source); + if (!orig_neigh_node) + return; + + /* neighbor has to indicate direct link and it has to + * come via the corresponding interface */ + /* save packet seqno for bidirectional check */ + if (has_directlink_flag && + compare_eth(if_incoming->net_dev->dev_addr, + batman_ogm_packet->orig)) { + offset = if_incoming->if_num * NUM_WORDS; + + spin_lock_bh(&orig_neigh_node->ogm_cnt_lock); + word = &(orig_neigh_node->bcast_own[offset]); + bit_mark(word, + if_incoming_seqno - + batman_ogm_packet->seqno - 2); + orig_neigh_node->bcast_own_sum[if_incoming->if_num] = + bit_packet_count(word); + spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock); + } + + bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: " + "originator packet from myself (via neighbor)\n"); + orig_node_free_ref(orig_neigh_node); + return; + } + + if (is_my_oldorig) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: ignoring all rebroadcast echos (sender: " + "%pM)\n", ethhdr->h_source); + return; + } + + orig_node = get_orig_node(bat_priv, batman_ogm_packet->orig); + if (!orig_node) + return; + + is_duplicate = bat_ogm_update_seqnos(ethhdr, batman_ogm_packet, + if_incoming); + + if (is_duplicate == -1) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: packet within seqno protection time " + "(sender: %pM)\n", ethhdr->h_source); + goto out; + } + + if (batman_ogm_packet->tq == 0) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: originator packet with tq equal 0\n"); + goto out; + } + + router = orig_node_get_router(orig_node); + if (router) + router_router = orig_node_get_router(router->orig_node); + + /* avoid temporary routing loops */ + if (router && router_router && + (compare_eth(router->addr, batman_ogm_packet->prev_sender)) && + !(compare_eth(batman_ogm_packet->orig, + batman_ogm_packet->prev_sender)) && + (compare_eth(router->addr, router_router->addr))) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: ignoring all rebroadcast packets that " + "may make me loop (sender: %pM)\n", ethhdr->h_source); + goto out; + } + + /* if sender is a direct neighbor the sender mac equals + * originator mac */ + orig_neigh_node = (is_single_hop_neigh ? + orig_node : + get_orig_node(bat_priv, ethhdr->h_source)); + if (!orig_neigh_node) + goto out; + + orig_neigh_router = orig_node_get_router(orig_neigh_node); + + /* drop packet if sender is not a direct neighbor and if we + * don't route towards it */ + if (!is_single_hop_neigh && (!orig_neigh_router)) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: OGM via unknown neighbor!\n"); + goto out_neigh; + } + + is_bidirectional = bat_ogm_calc_tq(orig_node, orig_neigh_node, + batman_ogm_packet, if_incoming); + + bonding_save_primary(orig_node, orig_neigh_node, batman_ogm_packet); + + /* update ranking if it is not a duplicate or has the same + * seqno and similar ttl as the non-duplicate */ + if (is_bidirectional && + (!is_duplicate || + ((orig_node->last_real_seqno == batman_ogm_packet->seqno) && + (orig_node->last_ttl - 3 <= batman_ogm_packet->ttl)))) + bat_ogm_orig_update(bat_priv, orig_node, ethhdr, + batman_ogm_packet, if_incoming, + tt_buff, is_duplicate); + + /* is single hop (direct) neighbor */ + if (is_single_hop_neigh) { + + /* mark direct link on incoming interface */ + bat_ogm_forward(orig_node, ethhdr, batman_ogm_packet, + 1, if_incoming); + + bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: " + "rebroadcast neighbor packet with direct link flag\n"); + goto out_neigh; + } + + /* multihop originator */ + if (!is_bidirectional) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: not received via bidirectional link\n"); + goto out_neigh; + } + + if (is_duplicate) { + bat_dbg(DBG_BATMAN, bat_priv, + "Drop packet: duplicate packet received\n"); + goto out_neigh; + } + + bat_dbg(DBG_BATMAN, bat_priv, + "Forwarding packet: rebroadcast originator packet\n"); + bat_ogm_forward(orig_node, ethhdr, batman_ogm_packet, 0, if_incoming); + +out_neigh: + if ((orig_neigh_node) && (!is_single_hop_neigh)) + orig_node_free_ref(orig_neigh_node); +out: + if (router) + neigh_node_free_ref(router); + if (router_router) + neigh_node_free_ref(router_router); + if (orig_neigh_router) + neigh_node_free_ref(orig_neigh_router); + + orig_node_free_ref(orig_node); +} + +void bat_ogm_receive(const struct ethhdr *ethhdr, unsigned char *packet_buff, + int packet_len, struct hard_iface *if_incoming) +{ + struct batman_ogm_packet *batman_ogm_packet; + int buff_pos = 0; + unsigned char *tt_buff; + + batman_ogm_packet = (struct batman_ogm_packet *)packet_buff; + + /* unpack the aggregated packets and process them one by one */ + do { + /* network to host order for our 32bit seqno and the + orig_interval */ + batman_ogm_packet->seqno = ntohl(batman_ogm_packet->seqno); + batman_ogm_packet->tt_crc = ntohs(batman_ogm_packet->tt_crc); + + tt_buff = packet_buff + buff_pos + BATMAN_OGM_LEN; + + bat_ogm_process(ethhdr, batman_ogm_packet, + tt_buff, if_incoming); + + buff_pos += BATMAN_OGM_LEN + + tt_len(batman_ogm_packet->tt_num_changes); + + batman_ogm_packet = (struct batman_ogm_packet *) + (packet_buff + buff_pos); + } while (bat_ogm_aggr_packet(buff_pos, packet_len, + batman_ogm_packet->tt_num_changes)); +} diff --git a/net/batman-adv/bat_ogm.h b/net/batman-adv/bat_ogm.h new file mode 100644 index 0000000..69329c1 --- /dev/null +++ b/net/batman-adv/bat_ogm.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + * + */ + +#ifndef _NET_BATMAN_ADV_OGM_H_ +#define _NET_BATMAN_ADV_OGM_H_ + +#include "main.h" + +void bat_ogm_init(struct hard_iface *hard_iface); +void bat_ogm_init_primary(struct hard_iface *hard_iface); +void bat_ogm_update_mac(struct hard_iface *hard_iface); +void bat_ogm_schedule(struct hard_iface *hard_iface, int tt_num_changes); +void bat_ogm_emit(struct forw_packet *forw_packet); +void bat_ogm_receive(const struct ethhdr *ethhdr, unsigned char *packet_buff, + int packet_len, struct hard_iface *if_incoming); + +#endif /* _NET_BATMAN_ADV_OGM_H_ */ diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c index cd15deb..b8a7414 100644 --- a/net/batman-adv/bat_sysfs.c +++ b/net/batman-adv/bat_sysfs.c @@ -380,6 +380,7 @@ static ssize_t store_gw_bwidth(struct kobject *kobj, struct attribute *attr, BAT_ATTR_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL); BAT_ATTR_BOOL(bonding, S_IRUGO | S_IWUSR, NULL); BAT_ATTR_BOOL(fragmentation, S_IRUGO | S_IWUSR, update_min_mtu); +BAT_ATTR_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL); static BAT_ATTR(vis_mode, S_IRUGO | S_IWUSR, show_vis_mode, store_vis_mode); static BAT_ATTR(gw_mode, S_IRUGO | S_IWUSR, show_gw_mode, store_gw_mode); BAT_ATTR_UINT(orig_interval, S_IRUGO | S_IWUSR, 2 * JITTER, INT_MAX, NULL); @@ -396,6 +397,7 @@ static struct bat_attribute *mesh_attrs[] = { &bat_attr_aggregated_ogms, &bat_attr_bonding, &bat_attr_fragmentation, + &bat_attr_ap_isolation, &bat_attr_vis_mode, &bat_attr_gw_mode, &bat_attr_orig_interval, diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index c1f4bfc..0be9ff3 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -97,12 +97,12 @@ static void bit_shift(unsigned long *seq_bits, int32_t n) (seq_bits[i - word_num - 1] >> (WORD_BIT_SIZE-word_offset)); /* and the upper part of the right half and shift it left to - * it's position */ + * its position */ /* for our example that would be: word[0] = 9800 + 0076 = * 9876 */ } - /* now for our last word, i==word_num, we only have the it's "left" - * half. that's the 1000 word in our example.*/ + /* now for our last word, i==word_num, we only have its "left" half. + * that's the 1000 word in our example.*/ seq_bits[i] = (seq_bits[i - word_num] << word_offset); diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 056180e..619fb73 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -532,14 +532,14 @@ static bool is_type_dhcprequest(struct sk_buff *skb, int header_len) pkt_len -= header_len + DHCP_OPTIONS_OFFSET + 1; /* Access the dhcp option lists. Each entry is made up by: - * - octect 1: option type - * - octect 2: option data len (only if type != 255 and 0) - * - octect 3: option data */ + * - octet 1: option type + * - octet 2: option data len (only if type != 255 and 0) + * - octet 3: option data */ while (*p != 255 && !ret) { - /* p now points to the first octect: option type */ + /* p now points to the first octet: option type */ if (*p == 53) { /* type 53 is the message type option. - * Jump the len octect and go to the data octect */ + * Jump the len octet and go to the data octet */ if (pkt_len < 2) goto out; p += 2; diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index db7aacf..7704df4 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -28,6 +28,7 @@ #include "bat_sysfs.h" #include "originator.h" #include "hash.h" +#include "bat_ogm.h" #include <linux/if_arp.h> @@ -131,7 +132,6 @@ static void primary_if_select(struct bat_priv *bat_priv, struct hard_iface *new_hard_iface) { struct hard_iface *curr_hard_iface; - struct batman_packet *batman_packet; ASSERT_RTNL(); @@ -147,10 +147,7 @@ static void primary_if_select(struct bat_priv *bat_priv, if (!new_hard_iface) return; - batman_packet = (struct batman_packet *)(new_hard_iface->packet_buff); - batman_packet->flags = PRIMARIES_FIRST_HOP; - batman_packet->ttl = TTL; - + bat_ogm_init_primary(new_hard_iface); primary_if_update_addr(bat_priv); } @@ -162,14 +159,6 @@ static bool hardif_is_iface_up(const struct hard_iface *hard_iface) return false; } -static void update_mac_addresses(struct hard_iface *hard_iface) -{ - memcpy(((struct batman_packet *)(hard_iface->packet_buff))->orig, - hard_iface->net_dev->dev_addr, ETH_ALEN); - memcpy(((struct batman_packet *)(hard_iface->packet_buff))->prev_sender, - hard_iface->net_dev->dev_addr, ETH_ALEN); -} - static void check_known_mac_addr(const struct net_device *net_dev) { const struct hard_iface *hard_iface; @@ -244,12 +233,12 @@ static void hardif_activate_interface(struct hard_iface *hard_iface) bat_priv = netdev_priv(hard_iface->soft_iface); - update_mac_addresses(hard_iface); + bat_ogm_update_mac(hard_iface); hard_iface->if_status = IF_TO_BE_ACTIVATED; /** * the first active interface becomes our primary interface or - * the next active interface after the old primay interface was removed + * the next active interface after the old primary interface was removed */ primary_if = primary_if_get_selected(bat_priv); if (!primary_if) @@ -283,7 +272,6 @@ int hardif_enable_interface(struct hard_iface *hard_iface, const char *iface_name) { struct bat_priv *bat_priv; - struct batman_packet *batman_packet; struct net_device *soft_iface; int ret; @@ -318,8 +306,8 @@ int hardif_enable_interface(struct hard_iface *hard_iface, hard_iface->soft_iface = soft_iface; bat_priv = netdev_priv(hard_iface->soft_iface); - hard_iface->packet_len = BAT_PACKET_LEN; - hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC); + + bat_ogm_init(hard_iface); if (!hard_iface->packet_buff) { bat_err(hard_iface->soft_iface, "Can't add interface packet " @@ -328,15 +316,6 @@ int hardif_enable_interface(struct hard_iface *hard_iface, goto err; } - batman_packet = (struct batman_packet *)(hard_iface->packet_buff); - batman_packet->packet_type = BAT_PACKET; - batman_packet->version = COMPAT_VERSION; - batman_packet->flags = NO_FLAGS; - batman_packet->ttl = 2; - batman_packet->tq = TQ_MAX_VALUE; - batman_packet->tt_num_changes = 0; - batman_packet->ttvn = 0; - hard_iface->if_num = bat_priv->num_ifaces; bat_priv->num_ifaces++; hard_iface->if_status = IF_INACTIVE; @@ -381,7 +360,7 @@ int hardif_enable_interface(struct hard_iface *hard_iface, hard_iface->net_dev->name); /* begin scheduling originator messages on that interface */ - schedule_own_packet(hard_iface); + schedule_bat_ogm(hard_iface); out: return 0; @@ -455,11 +434,8 @@ static struct hard_iface *hardif_add_interface(struct net_device *net_dev) dev_hold(net_dev); hard_iface = kmalloc(sizeof(*hard_iface), GFP_ATOMIC); - if (!hard_iface) { - pr_err("Can't add interface (%s): out of memory\n", - net_dev->name); + if (!hard_iface) goto release_dev; - } ret = sysfs_add_hardif(&hard_iface->hardif_obj, net_dev); if (ret) @@ -551,7 +527,7 @@ static int hard_if_event(struct notifier_block *this, goto hardif_put; check_known_mac_addr(hard_iface->net_dev); - update_mac_addresses(hard_iface); + bat_ogm_update_mac(hard_iface); bat_priv = netdev_priv(hard_iface->soft_iface); primary_if = primary_if_get_selected(bat_priv); @@ -573,14 +549,14 @@ out: return NOTIFY_DONE; } -/* receive a packet with the batman ethertype coming on a hard +/* incoming packets with the batman ethertype received on any active hard * interface */ static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { struct bat_priv *bat_priv; - struct batman_packet *batman_packet; + struct batman_ogm_packet *batman_ogm_packet; struct hard_iface *hard_iface; int ret; @@ -612,22 +588,22 @@ static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, if (hard_iface->if_status != IF_ACTIVE) goto err_free; - batman_packet = (struct batman_packet *)skb->data; + batman_ogm_packet = (struct batman_ogm_packet *)skb->data; - if (batman_packet->version != COMPAT_VERSION) { + if (batman_ogm_packet->version != COMPAT_VERSION) { bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: incompatible batman version (%i)\n", - batman_packet->version); + batman_ogm_packet->version); goto err_free; } /* all receive handlers return whether they received or reused * the supplied skb. if not, we have to free the skb. */ - switch (batman_packet->packet_type) { + switch (batman_ogm_packet->packet_type) { /* batman originator packet */ - case BAT_PACKET: - ret = recv_bat_packet(skb, hard_iface); + case BAT_OGM: + ret = recv_bat_ogm_packet(skb, hard_iface); break; /* batman icmp packet */ @@ -681,6 +657,36 @@ err_out: return NET_RX_DROP; } +/* This function returns true if the interface represented by ifindex is a + * 802.11 wireless device */ +bool is_wifi_iface(int ifindex) +{ + struct net_device *net_device = NULL; + bool ret = false; + + if (ifindex == NULL_IFINDEX) + goto out; + + net_device = dev_get_by_index(&init_net, ifindex); + if (!net_device) + goto out; + +#ifdef CONFIG_WIRELESS_EXT + /* pre-cfg80211 drivers have to implement WEXT, so it is possible to + * check for wireless_handlers != NULL */ + if (net_device->wireless_handlers) + ret = true; + else +#endif + /* cfg80211 drivers have to set ieee80211_ptr */ + if (net_device->ieee80211_ptr) + ret = true; +out: + if (net_device) + dev_put(net_device); + return ret; +} + struct notifier_block hard_if_notifier = { .notifier_call = hard_if_event, }; diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 442eacb..67f78d1 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -42,6 +42,7 @@ void hardif_remove_interfaces(void); int hardif_min_mtu(struct net_device *soft_iface); void update_min_mtu(struct net_device *soft_iface); void hardif_free_rcu(struct rcu_head *rcu); +bool is_wifi_iface(int ifindex); static inline void hardif_free_ref(struct hard_iface *hard_iface) { diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index dd5c9fd..d20aa71 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -76,19 +76,30 @@ static inline void hash_delete(struct hashtable_t *hash, hash_destroy(hash); } -/* adds data to the hashtable. returns 0 on success, -1 on error */ +/** + * hash_add - adds data to the hashtable + * @hash: storage hash table + * @compare: callback to determine if 2 hash elements are identical + * @choose: callback calculating the hash index + * @data: data passed to the aforementioned callbacks as argument + * @data_node: to be added element + * + * Returns 0 on success, 1 if the element already is in the hash + * and -1 on error. + */ + static inline int hash_add(struct hashtable_t *hash, hashdata_compare_cb compare, hashdata_choose_cb choose, const void *data, struct hlist_node *data_node) { - int index; + int index, ret = -1; struct hlist_head *head; struct hlist_node *node; spinlock_t *list_lock; /* spinlock to protect write access */ if (!hash) - goto err; + goto out; index = choose(data, hash->size); head = &hash->table[index]; @@ -99,6 +110,7 @@ static inline int hash_add(struct hashtable_t *hash, if (!compare(node, data)) continue; + ret = 1; goto err_unlock; } rcu_read_unlock(); @@ -108,12 +120,13 @@ static inline int hash_add(struct hashtable_t *hash, hlist_add_head_rcu(data_node, head); spin_unlock_bh(list_lock); - return 0; + ret = 0; + goto out; err_unlock: rcu_read_unlock(); -err: - return -1; +out: + return ret; } /* removes data from hash, if found. returns pointer do data on success, so you diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index b0f9068..fb87bdc 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -107,7 +107,7 @@ int mesh_init(struct net_device *soft_iface) if (tt_init(bat_priv) < 1) goto err; - tt_local_add(soft_iface, soft_iface->dev_addr); + tt_local_add(soft_iface, soft_iface->dev_addr, NULL_IFINDEX); if (vis_init(bat_priv) < 1) goto err; @@ -117,8 +117,6 @@ int mesh_init(struct net_device *soft_iface) goto end; err: - pr_err("Unable to allocate memory for mesh information structures: " - "out of mem ?\n"); mesh_free(soft_iface); return -1; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index a6df61a..964ad4d 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -28,7 +28,7 @@ #define DRIVER_DEVICE "batman-adv" #ifndef SOURCE_VERSION -#define SOURCE_VERSION "2011.3.0" +#define SOURCE_VERSION "2011.4.0" #endif /* B.A.T.M.A.N. parameters */ @@ -44,7 +44,7 @@ #define PURGE_TIMEOUT 200 #define TT_LOCAL_TIMEOUT 3600 /* in seconds */ #define TT_CLIENT_ROAM_TIMEOUT 600 -/* sliding packet range of received originator messages in squence numbers +/* sliding packet range of received originator messages in sequence numbers * (should be a multiple of our word size) */ #define TQ_LOCAL_WINDOW_SIZE 64 #define TT_REQUEST_TIMEOUT 3 /* seconds we have to keep pending tt_req */ @@ -62,6 +62,8 @@ #define NO_FLAGS 0 +#define NULL_IFINDEX 0 /* dummy ifindex used to avoid iface checks */ + #define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE) #define LOG_BUF_LEN 8192 /* has to be a power of 2 */ @@ -133,7 +135,7 @@ enum dbg_level { #include <linux/mutex.h> /* mutex */ #include <linux/module.h> /* needed by all modules */ #include <linux/netdevice.h> /* netdevice */ -#include <linux/etherdevice.h> /* ethernet address classifaction */ +#include <linux/etherdevice.h> /* ethernet address classification */ #include <linux/if_ether.h> /* ethernet header */ #include <linux/poll.h> /* poll_table */ #include <linux/kthread.h> /* kernel threads */ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index f3c3f62..0e5b772 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -252,7 +252,7 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, const uint8_t *addr) hash_added = hash_add(bat_priv->orig_hash, compare_orig, choose_orig, orig_node, &orig_node->hash_entry); - if (hash_added < 0) + if (hash_added != 0) goto free_bcast_own_sum; return orig_node; @@ -336,8 +336,7 @@ static bool purge_orig_node(struct bat_priv *bat_priv, } else { if (purge_orig_neighbors(bat_priv, orig_node, &best_neigh_node)) { - update_routes(bat_priv, orig_node, - best_neigh_node); + update_route(bat_priv, orig_node, best_neigh_node); } } @@ -493,10 +492,8 @@ static int orig_node_add_if(struct orig_node *orig_node, int max_if_num) data_ptr = kmalloc(max_if_num * sizeof(unsigned long) * NUM_WORDS, GFP_ATOMIC); - if (!data_ptr) { - pr_err("Can't resize orig: out of memory\n"); + if (!data_ptr) return -1; - } memcpy(data_ptr, orig_node->bcast_own, (max_if_num - 1) * sizeof(unsigned long) * NUM_WORDS); @@ -504,10 +501,8 @@ static int orig_node_add_if(struct orig_node *orig_node, int max_if_num) orig_node->bcast_own = data_ptr; data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC); - if (!data_ptr) { - pr_err("Can't resize orig: out of memory\n"); + if (!data_ptr) return -1; - } memcpy(data_ptr, orig_node->bcast_own_sum, (max_if_num - 1) * sizeof(uint8_t)); @@ -562,10 +557,8 @@ static int orig_node_del_if(struct orig_node *orig_node, chunk_size = sizeof(unsigned long) * NUM_WORDS; data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC); - if (!data_ptr) { - pr_err("Can't resize orig: out of memory\n"); + if (!data_ptr) return -1; - } /* copy first part */ memcpy(data_ptr, orig_node->bcast_own, del_if_num * chunk_size); @@ -583,10 +576,8 @@ free_bcast_own: goto free_own_sum; data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC); - if (!data_ptr) { - pr_err("Can't resize orig: out of memory\n"); + if (!data_ptr) return -1; - } memcpy(data_ptr, orig_node->bcast_own_sum, del_if_num * sizeof(uint8_t)); diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index b76b4be..4d9e54c 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -25,14 +25,14 @@ #define ETH_P_BATMAN 0x4305 /* unofficial/not registered Ethertype */ enum bat_packettype { - BAT_PACKET = 0x01, - BAT_ICMP = 0x02, - BAT_UNICAST = 0x03, - BAT_BCAST = 0x04, - BAT_VIS = 0x05, + BAT_OGM = 0x01, + BAT_ICMP = 0x02, + BAT_UNICAST = 0x03, + BAT_BCAST = 0x04, + BAT_VIS = 0x05, BAT_UNICAST_FRAG = 0x06, - BAT_TT_QUERY = 0x07, - BAT_ROAM_ADV = 0x08 + BAT_TT_QUERY = 0x07, + BAT_ROAM_ADV = 0x08 }; /* this file is included by batctl which needs these defines */ @@ -84,12 +84,13 @@ enum tt_query_flags { enum tt_client_flags { TT_CLIENT_DEL = 1 << 0, TT_CLIENT_ROAM = 1 << 1, + TT_CLIENT_WIFI = 1 << 2, TT_CLIENT_NOPURGE = 1 << 8, TT_CLIENT_NEW = 1 << 9, TT_CLIENT_PENDING = 1 << 10 }; -struct batman_packet { +struct batman_ogm_packet { uint8_t packet_type; uint8_t version; /* batman version field */ uint8_t ttl; @@ -104,7 +105,7 @@ struct batman_packet { uint16_t tt_crc; } __packed; -#define BAT_PACKET_LEN sizeof(struct batman_packet) +#define BATMAN_OGM_LEN sizeof(struct batman_ogm_packet) struct icmp_packet { uint8_t packet_type; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 0f32c81..f961cc5 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -22,18 +22,14 @@ #include "main.h" #include "routing.h" #include "send.h" -#include "hash.h" #include "soft-interface.h" #include "hard-interface.h" #include "icmp_socket.h" #include "translation-table.h" #include "originator.h" -#include "ring_buffer.h" #include "vis.h" -#include "aggregation.h" -#include "gateway_common.h" -#include "gateway_client.h" #include "unicast.h" +#include "bat_ogm.h" void slide_own_bcast_window(struct hard_iface *hard_iface) { @@ -64,69 +60,9 @@ void slide_own_bcast_window(struct hard_iface *hard_iface) } } -static void update_transtable(struct bat_priv *bat_priv, - struct orig_node *orig_node, - const unsigned char *tt_buff, - uint8_t tt_num_changes, uint8_t ttvn, - uint16_t tt_crc) -{ - uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); - bool full_table = true; - - /* the ttvn increased by one -> we can apply the attached changes */ - if (ttvn - orig_ttvn == 1) { - /* the OGM could not contain the changes because they were too - * many to fit in one frame or because they have already been - * sent TT_OGM_APPEND_MAX times. In this case send a tt - * request */ - if (!tt_num_changes) { - full_table = false; - goto request_table; - } - - tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn, - (struct tt_change *)tt_buff); - - /* Even if we received the crc into the OGM, we prefer - * to recompute it to spot any possible inconsistency - * in the global table */ - orig_node->tt_crc = tt_global_crc(bat_priv, orig_node); - - /* The ttvn alone is not enough to guarantee consistency - * because a single value could repesent different states - * (due to the wrap around). Thus a node has to check whether - * the resulting table (after applying the changes) is still - * consistent or not. E.g. a node could disconnect while its - * ttvn is X and reconnect on ttvn = X + TTVN_MAX: in this case - * checking the CRC value is mandatory to detect the - * inconsistency */ - if (orig_node->tt_crc != tt_crc) - goto request_table; - - /* Roaming phase is over: tables are in sync again. I can - * unset the flag */ - orig_node->tt_poss_change = false; - } else { - /* if we missed more than one change or our tables are not - * in sync anymore -> request fresh tt data */ - if (ttvn != orig_ttvn || orig_node->tt_crc != tt_crc) { -request_table: - bat_dbg(DBG_TT, bat_priv, "TT inconsistency for %pM. " - "Need to retrieve the correct information " - "(ttvn: %u last_ttvn: %u crc: %u last_crc: " - "%u num_changes: %u)\n", orig_node->orig, ttvn, - orig_ttvn, tt_crc, orig_node->tt_crc, - tt_num_changes); - send_tt_request(bat_priv, orig_node, ttvn, tt_crc, - full_table); - return; - } - } -} - -static void update_route(struct bat_priv *bat_priv, - struct orig_node *orig_node, - struct neigh_node *neigh_node) +static void _update_route(struct bat_priv *bat_priv, + struct orig_node *orig_node, + struct neigh_node *neigh_node) { struct neigh_node *curr_router; @@ -170,8 +106,8 @@ static void update_route(struct bat_priv *bat_priv, neigh_node_free_ref(curr_router); } -void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct neigh_node *neigh_node) +void update_route(struct bat_priv *bat_priv, struct orig_node *orig_node, + struct neigh_node *neigh_node) { struct neigh_node *router = NULL; @@ -181,116 +117,13 @@ void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, router = orig_node_get_router(orig_node); if (router != neigh_node) - update_route(bat_priv, orig_node, neigh_node); + _update_route(bat_priv, orig_node, neigh_node); out: if (router) neigh_node_free_ref(router); } -static int is_bidirectional_neigh(struct orig_node *orig_node, - struct orig_node *orig_neigh_node, - struct batman_packet *batman_packet, - struct hard_iface *if_incoming) -{ - struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct neigh_node *neigh_node = NULL, *tmp_neigh_node; - struct hlist_node *node; - uint8_t total_count; - uint8_t orig_eq_count, neigh_rq_count, tq_own; - int tq_asym_penalty, ret = 0; - - /* find corresponding one hop neighbor */ - rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, - &orig_neigh_node->neigh_list, list) { - - if (!compare_eth(tmp_neigh_node->addr, orig_neigh_node->orig)) - continue; - - if (tmp_neigh_node->if_incoming != if_incoming) - continue; - - if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) - continue; - - neigh_node = tmp_neigh_node; - break; - } - rcu_read_unlock(); - - if (!neigh_node) - neigh_node = create_neighbor(orig_neigh_node, - orig_neigh_node, - orig_neigh_node->orig, - if_incoming); - - if (!neigh_node) - goto out; - - /* if orig_node is direct neighbour update neigh_node last_valid */ - if (orig_node == orig_neigh_node) - neigh_node->last_valid = jiffies; - - orig_node->last_valid = jiffies; - - /* find packet count of corresponding one hop neighbor */ - spin_lock_bh(&orig_node->ogm_cnt_lock); - orig_eq_count = orig_neigh_node->bcast_own_sum[if_incoming->if_num]; - neigh_rq_count = neigh_node->real_packet_count; - spin_unlock_bh(&orig_node->ogm_cnt_lock); - - /* pay attention to not get a value bigger than 100 % */ - total_count = (orig_eq_count > neigh_rq_count ? - neigh_rq_count : orig_eq_count); - - /* if we have too few packets (too less data) we set tq_own to zero */ - /* if we receive too few packets it is not considered bidirectional */ - if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) || - (neigh_rq_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM)) - tq_own = 0; - else - /* neigh_node->real_packet_count is never zero as we - * only purge old information when getting new - * information */ - tq_own = (TQ_MAX_VALUE * total_count) / neigh_rq_count; - - /* - * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does - * affect the nearly-symmetric links only a little, but - * punishes asymmetric links more. This will give a value - * between 0 and TQ_MAX_VALUE - */ - tq_asym_penalty = TQ_MAX_VALUE - (TQ_MAX_VALUE * - (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) * - (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) * - (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count)) / - (TQ_LOCAL_WINDOW_SIZE * - TQ_LOCAL_WINDOW_SIZE * - TQ_LOCAL_WINDOW_SIZE); - - batman_packet->tq = ((batman_packet->tq * tq_own * tq_asym_penalty) / - (TQ_MAX_VALUE * TQ_MAX_VALUE)); - - bat_dbg(DBG_BATMAN, bat_priv, - "bidirectional: " - "orig = %-15pM neigh = %-15pM => own_bcast = %2i, " - "real recv = %2i, local tq: %3i, asym_penalty: %3i, " - "total tq: %3i\n", - orig_node->orig, orig_neigh_node->orig, total_count, - neigh_rq_count, tq_own, tq_asym_penalty, batman_packet->tq); - - /* if link has the minimum required transmission quality - * consider it bidirectional */ - if (batman_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT) - ret = 1; - -out: - if (neigh_node) - neigh_node_free_ref(neigh_node); - return ret; -} - /* caller must hold the neigh_list_lock */ void bonding_candidate_del(struct orig_node *orig_node, struct neigh_node *neigh_node) @@ -308,8 +141,8 @@ out: return; } -static void bonding_candidate_add(struct orig_node *orig_node, - struct neigh_node *neigh_node) +void bonding_candidate_add(struct orig_node *orig_node, + struct neigh_node *neigh_node) { struct hlist_node *node; struct neigh_node *tmp_neigh_node, *router = NULL; @@ -379,162 +212,23 @@ out: } /* copy primary address for bonding */ -static void bonding_save_primary(const struct orig_node *orig_node, - struct orig_node *orig_neigh_node, - const struct batman_packet *batman_packet) +void bonding_save_primary(const struct orig_node *orig_node, + struct orig_node *orig_neigh_node, + const struct batman_ogm_packet *batman_ogm_packet) { - if (!(batman_packet->flags & PRIMARIES_FIRST_HOP)) + if (!(batman_ogm_packet->flags & PRIMARIES_FIRST_HOP)) return; memcpy(orig_neigh_node->primary_addr, orig_node->orig, ETH_ALEN); } -static void update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, - const struct ethhdr *ethhdr, - const struct batman_packet *batman_packet, - struct hard_iface *if_incoming, - const unsigned char *tt_buff, int is_duplicate) -{ - struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; - struct neigh_node *router = NULL; - struct orig_node *orig_node_tmp; - struct hlist_node *node; - uint8_t bcast_own_sum_orig, bcast_own_sum_neigh; - - bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): " - "Searching and updating originator entry of received packet\n"); - - rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, - &orig_node->neigh_list, list) { - if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) && - (tmp_neigh_node->if_incoming == if_incoming) && - atomic_inc_not_zero(&tmp_neigh_node->refcount)) { - if (neigh_node) - neigh_node_free_ref(neigh_node); - neigh_node = tmp_neigh_node; - continue; - } - - if (is_duplicate) - continue; - - spin_lock_bh(&tmp_neigh_node->tq_lock); - ring_buffer_set(tmp_neigh_node->tq_recv, - &tmp_neigh_node->tq_index, 0); - tmp_neigh_node->tq_avg = - ring_buffer_avg(tmp_neigh_node->tq_recv); - spin_unlock_bh(&tmp_neigh_node->tq_lock); - } - - if (!neigh_node) { - struct orig_node *orig_tmp; - - orig_tmp = get_orig_node(bat_priv, ethhdr->h_source); - if (!orig_tmp) - goto unlock; - - neigh_node = create_neighbor(orig_node, orig_tmp, - ethhdr->h_source, if_incoming); - - orig_node_free_ref(orig_tmp); - if (!neigh_node) - goto unlock; - } else - bat_dbg(DBG_BATMAN, bat_priv, - "Updating existing last-hop neighbor of originator\n"); - - rcu_read_unlock(); - - orig_node->flags = batman_packet->flags; - neigh_node->last_valid = jiffies; - - spin_lock_bh(&neigh_node->tq_lock); - ring_buffer_set(neigh_node->tq_recv, - &neigh_node->tq_index, - batman_packet->tq); - neigh_node->tq_avg = ring_buffer_avg(neigh_node->tq_recv); - spin_unlock_bh(&neigh_node->tq_lock); - - if (!is_duplicate) { - orig_node->last_ttl = batman_packet->ttl; - neigh_node->last_ttl = batman_packet->ttl; - } - - bonding_candidate_add(orig_node, neigh_node); - - /* if this neighbor already is our next hop there is nothing - * to change */ - router = orig_node_get_router(orig_node); - if (router == neigh_node) - goto update_tt; - - /* if this neighbor does not offer a better TQ we won't consider it */ - if (router && (router->tq_avg > neigh_node->tq_avg)) - goto update_tt; - - /* if the TQ is the same and the link not more symetric we - * won't consider it either */ - if (router && (neigh_node->tq_avg == router->tq_avg)) { - orig_node_tmp = router->orig_node; - spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); - bcast_own_sum_orig = - orig_node_tmp->bcast_own_sum[if_incoming->if_num]; - spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); - - orig_node_tmp = neigh_node->orig_node; - spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); - bcast_own_sum_neigh = - orig_node_tmp->bcast_own_sum[if_incoming->if_num]; - spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); - - if (bcast_own_sum_orig >= bcast_own_sum_neigh) - goto update_tt; - } - - update_routes(bat_priv, orig_node, neigh_node); - -update_tt: - /* I have to check for transtable changes only if the OGM has been - * sent through a primary interface */ - if (((batman_packet->orig != ethhdr->h_source) && - (batman_packet->ttl > 2)) || - (batman_packet->flags & PRIMARIES_FIRST_HOP)) - update_transtable(bat_priv, orig_node, tt_buff, - batman_packet->tt_num_changes, - batman_packet->ttvn, - batman_packet->tt_crc); - - if (orig_node->gw_flags != batman_packet->gw_flags) - gw_node_update(bat_priv, orig_node, batman_packet->gw_flags); - - orig_node->gw_flags = batman_packet->gw_flags; - - /* restart gateway selection if fast or late switching was enabled */ - if ((orig_node->gw_flags) && - (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) && - (atomic_read(&bat_priv->gw_sel_class) > 2)) - gw_check_election(bat_priv, orig_node); - - goto out; - -unlock: - rcu_read_unlock(); -out: - if (neigh_node) - neigh_node_free_ref(neigh_node); - if (router) - neigh_node_free_ref(router); -} - /* checks whether the host restarted and is in the protection time. * returns: * 0 if the packet is to be accepted * 1 if the packet is to be ignored. */ -static int window_protected(struct bat_priv *bat_priv, - int32_t seq_num_diff, - unsigned long *last_reset) +int window_protected(struct bat_priv *bat_priv, int32_t seq_num_diff, + unsigned long *last_reset) { if ((seq_num_diff <= -TQ_LOCAL_WINDOW_SIZE) || (seq_num_diff >= EXPECTED_SEQNO_RANGE)) { @@ -552,330 +246,12 @@ static int window_protected(struct bat_priv *bat_priv, return 0; } -/* processes a batman packet for all interfaces, adjusts the sequence number and - * finds out whether it is a duplicate. - * returns: - * 1 the packet is a duplicate - * 0 the packet has not yet been received - * -1 the packet is old and has been received while the seqno window - * was protected. Caller should drop it. - */ -static int count_real_packets(const struct ethhdr *ethhdr, - const struct batman_packet *batman_packet, - const struct hard_iface *if_incoming) -{ - struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct orig_node *orig_node; - struct neigh_node *tmp_neigh_node; - struct hlist_node *node; - int is_duplicate = 0; - int32_t seq_diff; - int need_update = 0; - int set_mark, ret = -1; - - orig_node = get_orig_node(bat_priv, batman_packet->orig); - if (!orig_node) - return 0; - - spin_lock_bh(&orig_node->ogm_cnt_lock); - seq_diff = batman_packet->seqno - orig_node->last_real_seqno; - - /* signalize caller that the packet is to be dropped. */ - if (window_protected(bat_priv, seq_diff, - &orig_node->batman_seqno_reset)) - goto out; - - rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, - &orig_node->neigh_list, list) { - - is_duplicate |= get_bit_status(tmp_neigh_node->real_bits, - orig_node->last_real_seqno, - batman_packet->seqno); - - if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) && - (tmp_neigh_node->if_incoming == if_incoming)) - set_mark = 1; - else - set_mark = 0; - - /* if the window moved, set the update flag. */ - need_update |= bit_get_packet(bat_priv, - tmp_neigh_node->real_bits, - seq_diff, set_mark); - - tmp_neigh_node->real_packet_count = - bit_packet_count(tmp_neigh_node->real_bits); - } - rcu_read_unlock(); - - if (need_update) { - bat_dbg(DBG_BATMAN, bat_priv, - "updating last_seqno: old %d, new %d\n", - orig_node->last_real_seqno, batman_packet->seqno); - orig_node->last_real_seqno = batman_packet->seqno; - } - - ret = is_duplicate; - -out: - spin_unlock_bh(&orig_node->ogm_cnt_lock); - orig_node_free_ref(orig_node); - return ret; -} - -void receive_bat_packet(const struct ethhdr *ethhdr, - struct batman_packet *batman_packet, - const unsigned char *tt_buff, - struct hard_iface *if_incoming) -{ - struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct hard_iface *hard_iface; - struct orig_node *orig_neigh_node, *orig_node; - struct neigh_node *router = NULL, *router_router = NULL; - struct neigh_node *orig_neigh_router = NULL; - int has_directlink_flag; - int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; - int is_broadcast = 0, is_bidirectional, is_single_hop_neigh; - int is_duplicate; - uint32_t if_incoming_seqno; - - /* Silently drop when the batman packet is actually not a - * correct packet. - * - * This might happen if a packet is padded (e.g. Ethernet has a - * minimum frame length of 64 byte) and the aggregation interprets - * it as an additional length. - * - * TODO: A more sane solution would be to have a bit in the - * batman_packet to detect whether the packet is the last - * packet in an aggregation. Here we expect that the padding - * is always zero (or not 0x01) - */ - if (batman_packet->packet_type != BAT_PACKET) - return; - - /* could be changed by schedule_own_packet() */ - if_incoming_seqno = atomic_read(&if_incoming->seqno); - - has_directlink_flag = (batman_packet->flags & DIRECTLINK ? 1 : 0); - - is_single_hop_neigh = (compare_eth(ethhdr->h_source, - batman_packet->orig) ? 1 : 0); - - bat_dbg(DBG_BATMAN, bat_priv, - "Received BATMAN packet via NB: %pM, IF: %s [%pM] " - "(from OG: %pM, via prev OG: %pM, seqno %d, ttvn %u, " - "crc %u, changes %u, td %d, TTL %d, V %d, IDF %d)\n", - ethhdr->h_source, if_incoming->net_dev->name, - if_incoming->net_dev->dev_addr, batman_packet->orig, - batman_packet->prev_sender, batman_packet->seqno, - batman_packet->ttvn, batman_packet->tt_crc, - batman_packet->tt_num_changes, batman_packet->tq, - batman_packet->ttl, batman_packet->version, - has_directlink_flag); - - rcu_read_lock(); - list_for_each_entry_rcu(hard_iface, &hardif_list, list) { - if (hard_iface->if_status != IF_ACTIVE) - continue; - - if (hard_iface->soft_iface != if_incoming->soft_iface) - continue; - - if (compare_eth(ethhdr->h_source, - hard_iface->net_dev->dev_addr)) - is_my_addr = 1; - - if (compare_eth(batman_packet->orig, - hard_iface->net_dev->dev_addr)) - is_my_orig = 1; - - if (compare_eth(batman_packet->prev_sender, - hard_iface->net_dev->dev_addr)) - is_my_oldorig = 1; - - if (is_broadcast_ether_addr(ethhdr->h_source)) - is_broadcast = 1; - } - rcu_read_unlock(); - - if (batman_packet->version != COMPAT_VERSION) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: incompatible batman version (%i)\n", - batman_packet->version); - return; - } - - if (is_my_addr) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: received my own broadcast (sender: %pM" - ")\n", - ethhdr->h_source); - return; - } - - if (is_broadcast) { - bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: " - "ignoring all packets with broadcast source addr (sender: %pM" - ")\n", ethhdr->h_source); - return; - } - - if (is_my_orig) { - unsigned long *word; - int offset; - - orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source); - if (!orig_neigh_node) - return; - - /* neighbor has to indicate direct link and it has to - * come via the corresponding interface */ - /* save packet seqno for bidirectional check */ - if (has_directlink_flag && - compare_eth(if_incoming->net_dev->dev_addr, - batman_packet->orig)) { - offset = if_incoming->if_num * NUM_WORDS; - - spin_lock_bh(&orig_neigh_node->ogm_cnt_lock); - word = &(orig_neigh_node->bcast_own[offset]); - bit_mark(word, - if_incoming_seqno - batman_packet->seqno - 2); - orig_neigh_node->bcast_own_sum[if_incoming->if_num] = - bit_packet_count(word); - spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock); - } - - bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: " - "originator packet from myself (via neighbor)\n"); - orig_node_free_ref(orig_neigh_node); - return; - } - - if (is_my_oldorig) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: ignoring all rebroadcast echos (sender: " - "%pM)\n", ethhdr->h_source); - return; - } - - orig_node = get_orig_node(bat_priv, batman_packet->orig); - if (!orig_node) - return; - - is_duplicate = count_real_packets(ethhdr, batman_packet, if_incoming); - - if (is_duplicate == -1) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: packet within seqno protection time " - "(sender: %pM)\n", ethhdr->h_source); - goto out; - } - - if (batman_packet->tq == 0) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: originator packet with tq equal 0\n"); - goto out; - } - - router = orig_node_get_router(orig_node); - if (router) - router_router = orig_node_get_router(router->orig_node); - - /* avoid temporary routing loops */ - if (router && router_router && - (compare_eth(router->addr, batman_packet->prev_sender)) && - !(compare_eth(batman_packet->orig, batman_packet->prev_sender)) && - (compare_eth(router->addr, router_router->addr))) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: ignoring all rebroadcast packets that " - "may make me loop (sender: %pM)\n", ethhdr->h_source); - goto out; - } - - /* if sender is a direct neighbor the sender mac equals - * originator mac */ - orig_neigh_node = (is_single_hop_neigh ? - orig_node : - get_orig_node(bat_priv, ethhdr->h_source)); - if (!orig_neigh_node) - goto out; - - orig_neigh_router = orig_node_get_router(orig_neigh_node); - - /* drop packet if sender is not a direct neighbor and if we - * don't route towards it */ - if (!is_single_hop_neigh && (!orig_neigh_router)) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: OGM via unknown neighbor!\n"); - goto out_neigh; - } - - is_bidirectional = is_bidirectional_neigh(orig_node, orig_neigh_node, - batman_packet, if_incoming); - - bonding_save_primary(orig_node, orig_neigh_node, batman_packet); - - /* update ranking if it is not a duplicate or has the same - * seqno and similar ttl as the non-duplicate */ - if (is_bidirectional && - (!is_duplicate || - ((orig_node->last_real_seqno == batman_packet->seqno) && - (orig_node->last_ttl - 3 <= batman_packet->ttl)))) - update_orig(bat_priv, orig_node, ethhdr, batman_packet, - if_incoming, tt_buff, is_duplicate); - - /* is single hop (direct) neighbor */ - if (is_single_hop_neigh) { - - /* mark direct link on incoming interface */ - schedule_forward_packet(orig_node, ethhdr, batman_packet, - 1, if_incoming); - - bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: " - "rebroadcast neighbor packet with direct link flag\n"); - goto out_neigh; - } - - /* multihop originator */ - if (!is_bidirectional) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: not received via bidirectional link\n"); - goto out_neigh; - } - - if (is_duplicate) { - bat_dbg(DBG_BATMAN, bat_priv, - "Drop packet: duplicate packet received\n"); - goto out_neigh; - } - - bat_dbg(DBG_BATMAN, bat_priv, - "Forwarding packet: rebroadcast originator packet\n"); - schedule_forward_packet(orig_node, ethhdr, batman_packet, - 0, if_incoming); - -out_neigh: - if ((orig_neigh_node) && (!is_single_hop_neigh)) - orig_node_free_ref(orig_neigh_node); -out: - if (router) - neigh_node_free_ref(router); - if (router_router) - neigh_node_free_ref(router_router); - if (orig_neigh_router) - neigh_node_free_ref(orig_neigh_router); - - orig_node_free_ref(orig_node); -} - -int recv_bat_packet(struct sk_buff *skb, struct hard_iface *hard_iface) +int recv_bat_ogm_packet(struct sk_buff *skb, struct hard_iface *hard_iface) { struct ethhdr *ethhdr; /* drop packet if it has not necessary minimum size */ - if (unlikely(!pskb_may_pull(skb, sizeof(struct batman_packet)))) + if (unlikely(!pskb_may_pull(skb, BATMAN_OGM_LEN))) return NET_RX_DROP; ethhdr = (struct ethhdr *)skb_mac_header(skb); @@ -898,10 +274,7 @@ int recv_bat_packet(struct sk_buff *skb, struct hard_iface *hard_iface) ethhdr = (struct ethhdr *)skb_mac_header(skb); - receive_aggr_bat_packet(ethhdr, - skb->data, - skb_headlen(skb), - hard_iface); + bat_ogm_receive(ethhdr, skb->data, skb_headlen(skb), hard_iface); kfree_skb(skb); return NET_RX_SUCCESS; @@ -1243,7 +616,7 @@ int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if) } break; case TT_RESPONSE: - /* packet needs to be linearised to access the TT changes */ + /* packet needs to be linearized to access the TT changes */ if (skb_linearize(skb) < 0) goto out; @@ -1300,7 +673,7 @@ int recv_roam_adv(struct sk_buff *skb, struct hard_iface *recv_if) roam_adv_packet->client); tt_global_add(bat_priv, orig_node, roam_adv_packet->client, - atomic_read(&orig_node->last_ttvn) + 1, true); + atomic_read(&orig_node->last_ttvn) + 1, true, false); /* Roaming phase starts: I have new information but the ttvn has not * been incremented yet. This flag will make me check all the incoming @@ -1536,7 +909,7 @@ static int check_unicast_ttvn(struct bat_priv *bat_priv, ethhdr = (struct ethhdr *)(skb->data + sizeof(struct unicast_packet)); - orig_node = transtable_search(bat_priv, ethhdr->h_dest); + orig_node = transtable_search(bat_priv, NULL, ethhdr->h_dest); if (!orig_node) { if (!is_my_client(bat_priv, ethhdr->h_dest)) diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index fb14e95..7aaee0f 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -23,19 +23,15 @@ #define _NET_BATMAN_ADV_ROUTING_H_ void slide_own_bcast_window(struct hard_iface *hard_iface); -void receive_bat_packet(const struct ethhdr *ethhdr, - struct batman_packet *batman_packet, - const unsigned char *tt_buff, - struct hard_iface *if_incoming); -void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct neigh_node *neigh_node); +void update_route(struct bat_priv *bat_priv, struct orig_node *orig_node, + struct neigh_node *neigh_node); int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if); -int recv_bat_packet(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_bat_ogm_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if); int recv_roam_adv(struct sk_buff *skb, struct hard_iface *recv_if); struct neigh_node *find_router(struct bat_priv *bat_priv, @@ -43,5 +39,12 @@ struct neigh_node *find_router(struct bat_priv *bat_priv, const struct hard_iface *recv_if); void bonding_candidate_del(struct orig_node *orig_node, struct neigh_node *neigh_node); +void bonding_candidate_add(struct orig_node *orig_node, + struct neigh_node *neigh_node); +void bonding_save_primary(const struct orig_node *orig_node, + struct orig_node *orig_neigh_node, + const struct batman_ogm_packet *batman_ogm_packet); +int window_protected(struct bat_priv *bat_priv, int32_t seq_num_diff, + unsigned long *last_reset); #endif /* _NET_BATMAN_ADV_ROUTING_H_ */ diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 58d1447..8a684eb 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -26,33 +26,12 @@ #include "soft-interface.h" #include "hard-interface.h" #include "vis.h" -#include "aggregation.h" #include "gateway_common.h" #include "originator.h" +#include "bat_ogm.h" static void send_outstanding_bcast_packet(struct work_struct *work); -/* apply hop penalty for a normal link */ -static uint8_t hop_penalty(uint8_t tq, const struct bat_priv *bat_priv) -{ - int hop_penalty = atomic_read(&bat_priv->hop_penalty); - return (tq * (TQ_MAX_VALUE - hop_penalty)) / (TQ_MAX_VALUE); -} - -/* when do we schedule our own packet to be sent */ -static unsigned long own_send_time(const struct bat_priv *bat_priv) -{ - return jiffies + msecs_to_jiffies( - atomic_read(&bat_priv->orig_interval) - - JITTER + (random32() % 2*JITTER)); -} - -/* when do we schedule a forwarded packet to be sent */ -static unsigned long forward_send_time(void) -{ - return jiffies + msecs_to_jiffies(random32() % (JITTER/2)); -} - /* send out an already prepared packet to the given address via the * specified batman interface */ int send_skb_packet(struct sk_buff *skb, struct hard_iface *hard_iface, @@ -99,141 +78,17 @@ send_skb_err: return NET_XMIT_DROP; } -/* Send a packet to a given interface */ -static void send_packet_to_if(struct forw_packet *forw_packet, - struct hard_iface *hard_iface) -{ - struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - char *fwd_str; - uint8_t packet_num; - int16_t buff_pos; - struct batman_packet *batman_packet; - struct sk_buff *skb; - - if (hard_iface->if_status != IF_ACTIVE) - return; - - packet_num = 0; - buff_pos = 0; - batman_packet = (struct batman_packet *)forw_packet->skb->data; - - /* adjust all flags and log packets */ - while (aggregated_packet(buff_pos, - forw_packet->packet_len, - batman_packet->tt_num_changes)) { - - /* we might have aggregated direct link packets with an - * ordinary base packet */ - if ((forw_packet->direct_link_flags & (1 << packet_num)) && - (forw_packet->if_incoming == hard_iface)) - batman_packet->flags |= DIRECTLINK; - else - batman_packet->flags &= ~DIRECTLINK; - - fwd_str = (packet_num > 0 ? "Forwarding" : (forw_packet->own ? - "Sending own" : - "Forwarding")); - bat_dbg(DBG_BATMAN, bat_priv, - "%s %spacket (originator %pM, seqno %d, TQ %d, TTL %d," - " IDF %s, hvn %d) on interface %s [%pM]\n", - fwd_str, (packet_num > 0 ? "aggregated " : ""), - batman_packet->orig, ntohl(batman_packet->seqno), - batman_packet->tq, batman_packet->ttl, - (batman_packet->flags & DIRECTLINK ? - "on" : "off"), - batman_packet->ttvn, hard_iface->net_dev->name, - hard_iface->net_dev->dev_addr); - - buff_pos += sizeof(*batman_packet) + - tt_len(batman_packet->tt_num_changes); - packet_num++; - batman_packet = (struct batman_packet *) - (forw_packet->skb->data + buff_pos); - } - - /* create clone because function is called more than once */ - skb = skb_clone(forw_packet->skb, GFP_ATOMIC); - if (skb) - send_skb_packet(skb, hard_iface, broadcast_addr); -} - -/* send a batman packet */ -static void send_packet(struct forw_packet *forw_packet) -{ - struct hard_iface *hard_iface; - struct net_device *soft_iface; - struct bat_priv *bat_priv; - struct hard_iface *primary_if = NULL; - struct batman_packet *batman_packet = - (struct batman_packet *)(forw_packet->skb->data); - int directlink = (batman_packet->flags & DIRECTLINK ? 1 : 0); - - if (!forw_packet->if_incoming) { - pr_err("Error - can't forward packet: incoming iface not " - "specified\n"); - goto out; - } - - soft_iface = forw_packet->if_incoming->soft_iface; - bat_priv = netdev_priv(soft_iface); - - if (forw_packet->if_incoming->if_status != IF_ACTIVE) - goto out; - - primary_if = primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; - - /* multihomed peer assumed */ - /* non-primary OGMs are only broadcasted on their interface */ - if ((directlink && (batman_packet->ttl == 1)) || - (forw_packet->own && (forw_packet->if_incoming != primary_if))) { - - /* FIXME: what about aggregated packets ? */ - bat_dbg(DBG_BATMAN, bat_priv, - "%s packet (originator %pM, seqno %d, TTL %d) " - "on interface %s [%pM]\n", - (forw_packet->own ? "Sending own" : "Forwarding"), - batman_packet->orig, ntohl(batman_packet->seqno), - batman_packet->ttl, - forw_packet->if_incoming->net_dev->name, - forw_packet->if_incoming->net_dev->dev_addr); - - /* skb is only used once and than forw_packet is free'd */ - send_skb_packet(forw_packet->skb, forw_packet->if_incoming, - broadcast_addr); - forw_packet->skb = NULL; - - goto out; - } - - /* broadcast on every interface */ - rcu_read_lock(); - list_for_each_entry_rcu(hard_iface, &hardif_list, list) { - if (hard_iface->soft_iface != soft_iface) - continue; - - send_packet_to_if(forw_packet, hard_iface); - } - rcu_read_unlock(); - -out: - if (primary_if) - hardif_free_ref(primary_if); -} - static void realloc_packet_buffer(struct hard_iface *hard_iface, - int new_len) + int new_len) { unsigned char *new_buff; - struct batman_packet *batman_packet; new_buff = kmalloc(new_len, GFP_ATOMIC); /* keep old buffer if kmalloc should fail */ if (new_buff) { memcpy(new_buff, hard_iface->packet_buff, - sizeof(*batman_packet)); + BATMAN_OGM_LEN); kfree(hard_iface->packet_buff); hard_iface->packet_buff = new_buff; @@ -242,60 +97,48 @@ static void realloc_packet_buffer(struct hard_iface *hard_iface, } /* when calling this function (hard_iface == primary_if) has to be true */ -static void prepare_packet_buffer(struct bat_priv *bat_priv, +static int prepare_packet_buffer(struct bat_priv *bat_priv, struct hard_iface *hard_iface) { int new_len; - struct batman_packet *batman_packet; - new_len = BAT_PACKET_LEN + + new_len = BATMAN_OGM_LEN + tt_len((uint8_t)atomic_read(&bat_priv->tt_local_changes)); /* if we have too many changes for one packet don't send any * and wait for the tt table request which will be fragmented */ if (new_len > hard_iface->soft_iface->mtu) - new_len = BAT_PACKET_LEN; + new_len = BATMAN_OGM_LEN; realloc_packet_buffer(hard_iface, new_len); - batman_packet = (struct batman_packet *)hard_iface->packet_buff; atomic_set(&bat_priv->tt_crc, tt_local_crc(bat_priv)); /* reset the sending counter */ atomic_set(&bat_priv->tt_ogm_append_cnt, TT_OGM_APPEND_MAX); - batman_packet->tt_num_changes = tt_changes_fill_buffer(bat_priv, - hard_iface->packet_buff + BAT_PACKET_LEN, - hard_iface->packet_len - BAT_PACKET_LEN); - + return tt_changes_fill_buffer(bat_priv, + hard_iface->packet_buff + BATMAN_OGM_LEN, + hard_iface->packet_len - BATMAN_OGM_LEN); } -static void reset_packet_buffer(struct bat_priv *bat_priv, - struct hard_iface *hard_iface) +static int reset_packet_buffer(struct bat_priv *bat_priv, + struct hard_iface *hard_iface) { - struct batman_packet *batman_packet; - - realloc_packet_buffer(hard_iface, BAT_PACKET_LEN); - - batman_packet = (struct batman_packet *)hard_iface->packet_buff; - batman_packet->tt_num_changes = 0; + realloc_packet_buffer(hard_iface, BATMAN_OGM_LEN); + return 0; } -void schedule_own_packet(struct hard_iface *hard_iface) +void schedule_bat_ogm(struct hard_iface *hard_iface) { struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct hard_iface *primary_if; - unsigned long send_time; - struct batman_packet *batman_packet; - int vis_server; + int tt_num_changes = -1; if ((hard_iface->if_status == IF_NOT_IN_USE) || (hard_iface->if_status == IF_TO_BE_REMOVED)) return; - vis_server = atomic_read(&bat_priv->vis_mode); - primary_if = primary_if_get_selected(bat_priv); - /** * the interface gets activated here to avoid race conditions between * the moment of activating the interface in @@ -306,124 +149,26 @@ void schedule_own_packet(struct hard_iface *hard_iface) if (hard_iface->if_status == IF_TO_BE_ACTIVATED) hard_iface->if_status = IF_ACTIVE; + primary_if = primary_if_get_selected(bat_priv); + if (hard_iface == primary_if) { /* if at least one change happened */ if (atomic_read(&bat_priv->tt_local_changes) > 0) { tt_commit_changes(bat_priv); - prepare_packet_buffer(bat_priv, hard_iface); + tt_num_changes = prepare_packet_buffer(bat_priv, + hard_iface); } - /* if the changes have been sent enough times */ + /* if the changes have been sent often enough */ if (!atomic_dec_not_zero(&bat_priv->tt_ogm_append_cnt)) - reset_packet_buffer(bat_priv, hard_iface); + tt_num_changes = reset_packet_buffer(bat_priv, + hard_iface); } - /** - * NOTE: packet_buff might just have been re-allocated in - * prepare_packet_buffer() or in reset_packet_buffer() - */ - batman_packet = (struct batman_packet *)hard_iface->packet_buff; - - /* change sequence number to network order */ - batman_packet->seqno = - htonl((uint32_t)atomic_read(&hard_iface->seqno)); - - batman_packet->ttvn = atomic_read(&bat_priv->ttvn); - batman_packet->tt_crc = htons((uint16_t)atomic_read(&bat_priv->tt_crc)); - - if (vis_server == VIS_TYPE_SERVER_SYNC) - batman_packet->flags |= VIS_SERVER; - else - batman_packet->flags &= ~VIS_SERVER; - - if ((hard_iface == primary_if) && - (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER)) - batman_packet->gw_flags = - (uint8_t)atomic_read(&bat_priv->gw_bandwidth); - else - batman_packet->gw_flags = NO_FLAGS; - - atomic_inc(&hard_iface->seqno); - - slide_own_bcast_window(hard_iface); - send_time = own_send_time(bat_priv); - add_bat_packet_to_list(bat_priv, - hard_iface->packet_buff, - hard_iface->packet_len, - hard_iface, 1, send_time); - if (primary_if) hardif_free_ref(primary_if); -} - -void schedule_forward_packet(struct orig_node *orig_node, - const struct ethhdr *ethhdr, - struct batman_packet *batman_packet, - int directlink, - struct hard_iface *if_incoming) -{ - struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct neigh_node *router; - uint8_t in_tq, in_ttl, tq_avg = 0; - unsigned long send_time; - uint8_t tt_num_changes; - - if (batman_packet->ttl <= 1) { - bat_dbg(DBG_BATMAN, bat_priv, "ttl exceeded\n"); - return; - } - - router = orig_node_get_router(orig_node); - - in_tq = batman_packet->tq; - in_ttl = batman_packet->ttl; - tt_num_changes = batman_packet->tt_num_changes; - - batman_packet->ttl--; - memcpy(batman_packet->prev_sender, ethhdr->h_source, ETH_ALEN); - - /* rebroadcast tq of our best ranking neighbor to ensure the rebroadcast - * of our best tq value */ - if (router && router->tq_avg != 0) { - - /* rebroadcast ogm of best ranking neighbor as is */ - if (!compare_eth(router->addr, ethhdr->h_source)) { - batman_packet->tq = router->tq_avg; - - if (router->last_ttl) - batman_packet->ttl = router->last_ttl - 1; - } - - tq_avg = router->tq_avg; - } - - if (router) - neigh_node_free_ref(router); - - /* apply hop penalty */ - batman_packet->tq = hop_penalty(batman_packet->tq, bat_priv); - - bat_dbg(DBG_BATMAN, bat_priv, - "Forwarding packet: tq_orig: %i, tq_avg: %i, " - "tq_forw: %i, ttl_orig: %i, ttl_forw: %i\n", - in_tq, tq_avg, batman_packet->tq, in_ttl - 1, - batman_packet->ttl); - - batman_packet->seqno = htonl(batman_packet->seqno); - batman_packet->tt_crc = htons(batman_packet->tt_crc); - - /* switch of primaries first hop flag when forwarding */ - batman_packet->flags &= ~PRIMARIES_FIRST_HOP; - if (directlink) - batman_packet->flags |= DIRECTLINK; - else - batman_packet->flags &= ~DIRECTLINK; - send_time = forward_send_time(); - add_bat_packet_to_list(bat_priv, - (unsigned char *)batman_packet, - sizeof(*batman_packet) + tt_len(tt_num_changes), - if_incoming, 0, send_time); + bat_ogm_schedule(hard_iface, tt_num_changes); } static void forw_packet_free(struct forw_packet *forw_packet) @@ -454,7 +199,7 @@ static void _add_bcast_packet_to_list(struct bat_priv *bat_priv, } /* add a broadcast packet to the queue and setup timers. broadcast packets - * are sent multiple times to increase probability for beeing received. + * are sent multiple times to increase probability for being received. * * This function returns NETDEV_TX_OK on success and NETDEV_TX_BUSY on * errors. @@ -557,7 +302,7 @@ out: atomic_inc(&bat_priv->bcast_queue_left); } -void send_outstanding_bat_packet(struct work_struct *work) +void send_outstanding_bat_ogm_packet(struct work_struct *work) { struct delayed_work *delayed_work = container_of(work, struct delayed_work, work); @@ -573,7 +318,7 @@ void send_outstanding_bat_packet(struct work_struct *work) if (atomic_read(&bat_priv->mesh_state) == MESH_DEACTIVATING) goto out; - send_packet(forw_packet); + bat_ogm_emit(forw_packet); /** * we have to have at least one packet in the queue @@ -581,7 +326,7 @@ void send_outstanding_bat_packet(struct work_struct *work) * shutting down */ if (forw_packet->own) - schedule_own_packet(forw_packet->if_incoming); + schedule_bat_ogm(forw_packet->if_incoming); out: /* don't count own packet */ @@ -612,7 +357,7 @@ void purge_outstanding_packets(struct bat_priv *bat_priv, &bat_priv->forw_bcast_list, list) { /** - * if purge_outstanding_packets() was called with an argmument + * if purge_outstanding_packets() was called with an argument * we delete only packets belonging to the given interface */ if ((hard_iface) && @@ -641,7 +386,7 @@ void purge_outstanding_packets(struct bat_priv *bat_priv, &bat_priv->forw_bat_list, list) { /** - * if purge_outstanding_packets() was called with an argmument + * if purge_outstanding_packets() was called with an argument * we delete only packets belonging to the given interface */ if ((hard_iface) && diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 1f2d1e8..c8ca3ef 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -24,15 +24,10 @@ int send_skb_packet(struct sk_buff *skb, struct hard_iface *hard_iface, const uint8_t *dst_addr); -void schedule_own_packet(struct hard_iface *hard_iface); -void schedule_forward_packet(struct orig_node *orig_node, - const struct ethhdr *ethhdr, - struct batman_packet *batman_packet, - int directlink, - struct hard_iface *if_outgoing); +void schedule_bat_ogm(struct hard_iface *hard_iface); int add_bcast_packet_to_list(struct bat_priv *bat_priv, const struct sk_buff *skb, unsigned long delay); -void send_outstanding_bat_packet(struct work_struct *work); +void send_outstanding_bat_ogm_packet(struct work_struct *work); void purge_outstanding_packets(struct bat_priv *bat_priv, const struct hard_iface *hard_iface); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 3e2f91f..f9cc957 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -445,30 +445,31 @@ static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev, { struct bat_priv *bat_priv = netdev_priv(dev); struct ethhdr *ethhdr = (struct ethhdr *)skb->data; - struct batman_packet *batman_packet; + struct batman_ogm_packet *batman_ogm_packet; struct softif_neigh *softif_neigh = NULL; struct hard_iface *primary_if = NULL; struct softif_neigh *curr_softif_neigh = NULL; if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) - batman_packet = (struct batman_packet *) + batman_ogm_packet = (struct batman_ogm_packet *) (skb->data + ETH_HLEN + VLAN_HLEN); else - batman_packet = (struct batman_packet *)(skb->data + ETH_HLEN); + batman_ogm_packet = (struct batman_ogm_packet *) + (skb->data + ETH_HLEN); - if (batman_packet->version != COMPAT_VERSION) + if (batman_ogm_packet->version != COMPAT_VERSION) goto out; - if (batman_packet->packet_type != BAT_PACKET) + if (batman_ogm_packet->packet_type != BAT_OGM) goto out; - if (!(batman_packet->flags & PRIMARIES_FIRST_HOP)) + if (!(batman_ogm_packet->flags & PRIMARIES_FIRST_HOP)) goto out; - if (is_my_mac(batman_packet->orig)) + if (is_my_mac(batman_ogm_packet->orig)) goto out; - softif_neigh = softif_neigh_get(bat_priv, batman_packet->orig, vid); + softif_neigh = softif_neigh_get(bat_priv, batman_ogm_packet->orig, vid); if (!softif_neigh) goto out; @@ -532,11 +533,11 @@ static int interface_set_mac_addr(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - /* only modify transtable if it has been initialised before */ + /* only modify transtable if it has been initialized before */ if (atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) { tt_local_remove(bat_priv, dev->dev_addr, "mac address changed", false); - tt_local_add(dev, addr->sa_data); + tt_local_add(dev, addr->sa_data, NULL_IFINDEX); } memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); @@ -565,7 +566,7 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) struct orig_node *orig_node = NULL; int data_len = skb->len, ret; short vid = -1; - bool do_bcast = false; + bool do_bcast; if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE) goto dropped; @@ -595,18 +596,19 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) goto dropped; /* Register the client MAC in the transtable */ - tt_local_add(soft_iface, ethhdr->h_source); + tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif); - orig_node = transtable_search(bat_priv, ethhdr->h_dest); - if (is_multicast_ether_addr(ethhdr->h_dest) || - (orig_node && orig_node->gw_flags)) { + orig_node = transtable_search(bat_priv, ethhdr->h_source, + ethhdr->h_dest); + do_bcast = is_multicast_ether_addr(ethhdr->h_dest); + if (do_bcast || (orig_node && orig_node->gw_flags)) { ret = gw_is_target(bat_priv, skb, orig_node); if (ret < 0) goto dropped; - if (ret == 0) - do_bcast = true; + if (ret) + do_bcast = false; } /* ethernet packet should be broadcasted */ @@ -739,6 +741,9 @@ void interface_rx(struct net_device *soft_iface, soft_iface->last_rx = jiffies; + if (is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest)) + goto dropped; + netif_rx(skb); goto out; @@ -796,10 +801,8 @@ struct net_device *softif_create(const char *name) soft_iface = alloc_netdev(sizeof(*bat_priv), name, interface_setup); - if (!soft_iface) { - pr_err("Unable to allocate the batman interface: %s\n", name); + if (!soft_iface) goto out; - } ret = register_netdevice(soft_iface); if (ret < 0) { @@ -812,6 +815,7 @@ struct net_device *softif_create(const char *name) atomic_set(&bat_priv->aggregated_ogms, 1); atomic_set(&bat_priv->bonding, 0); + atomic_set(&bat_priv->ap_isolation, 0); atomic_set(&bat_priv->vis_mode, VIS_TYPE_CLIENT_UPDATE); atomic_set(&bat_priv->gw_mode, GW_MODE_OFF); atomic_set(&bat_priv->gw_sel_class, 20); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index fb6931d..cc53f78 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -183,7 +183,8 @@ static int tt_local_init(struct bat_priv *bat_priv) return 1; } -void tt_local_add(struct net_device *soft_iface, const uint8_t *addr) +void tt_local_add(struct net_device *soft_iface, const uint8_t *addr, + int ifindex) { struct bat_priv *bat_priv = netdev_priv(soft_iface); struct tt_local_entry *tt_local_entry = NULL; @@ -207,6 +208,8 @@ void tt_local_add(struct net_device *soft_iface, const uint8_t *addr) memcpy(tt_local_entry->addr, addr, ETH_ALEN); tt_local_entry->last_seen = jiffies; tt_local_entry->flags = NO_FLAGS; + if (is_wifi_iface(ifindex)) + tt_local_entry->flags |= TT_CLIENT_WIFI; atomic_set(&tt_local_entry->refcount, 2); /* the batman interface mac address should never be purged */ @@ -329,7 +332,7 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset) rcu_read_lock(); __hlist_for_each_rcu(node, head) - buf_size += 21; + buf_size += 29; rcu_read_unlock(); } @@ -348,8 +351,19 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset) rcu_read_lock(); hlist_for_each_entry_rcu(tt_local_entry, node, head, hash_entry) { - pos += snprintf(buff + pos, 22, " * %pM\n", - tt_local_entry->addr); + pos += snprintf(buff + pos, 30, " * %pM " + "[%c%c%c%c%c]\n", + tt_local_entry->addr, + (tt_local_entry->flags & + TT_CLIENT_ROAM ? 'R' : '.'), + (tt_local_entry->flags & + TT_CLIENT_NOPURGE ? 'P' : '.'), + (tt_local_entry->flags & + TT_CLIENT_NEW ? 'N' : '.'), + (tt_local_entry->flags & + TT_CLIENT_PENDING ? 'X' : '.'), + (tt_local_entry->flags & + TT_CLIENT_WIFI ? 'W' : '.')); } rcu_read_unlock(); } @@ -369,8 +383,8 @@ static void tt_local_set_pending(struct bat_priv *bat_priv, tt_local_event(bat_priv, tt_local_entry->addr, tt_local_entry->flags | flags); - /* The local client has to be merked as "pending to be removed" but has - * to be kept in the table in order to send it in an full tables + /* The local client has to be marked as "pending to be removed" but has + * to be kept in the table in order to send it in a full table * response issued before the net ttvn increment (consistency check) */ tt_local_entry->flags |= TT_CLIENT_PENDING; } @@ -495,7 +509,8 @@ static void tt_changes_list_free(struct bat_priv *bat_priv) /* caller must hold orig_node refcount */ int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node, - const unsigned char *tt_addr, uint8_t ttvn, bool roaming) + const unsigned char *tt_addr, uint8_t ttvn, bool roaming, + bool wifi) { struct tt_global_entry *tt_global_entry; struct orig_node *orig_node_tmp; @@ -537,6 +552,9 @@ int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node, tt_global_entry->roam_at = 0; } + if (wifi) + tt_global_entry->flags |= TT_CLIENT_WIFI; + bat_dbg(DBG_TT, bat_priv, "Creating new global tt entry: %pM (via %pM)\n", tt_global_entry->addr, orig_node->orig); @@ -582,8 +600,8 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Globally announced TT entries received via the mesh %s\n", net_dev->name); - seq_printf(seq, " %-13s %s %-15s %s\n", - "Client", "(TTVN)", "Originator", "(Curr TTVN)"); + seq_printf(seq, " %-13s %s %-15s %s %s\n", + "Client", "(TTVN)", "Originator", "(Curr TTVN)", "Flags"); buf_size = 1; /* Estimate length for: " * xx:xx:xx:xx:xx:xx (ttvn) via @@ -593,7 +611,7 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset) rcu_read_lock(); __hlist_for_each_rcu(node, head) - buf_size += 59; + buf_size += 67; rcu_read_unlock(); } @@ -612,14 +630,20 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset) rcu_read_lock(); hlist_for_each_entry_rcu(tt_global_entry, node, head, hash_entry) { - pos += snprintf(buff + pos, 61, - " * %pM (%3u) via %pM (%3u)\n", - tt_global_entry->addr, + pos += snprintf(buff + pos, 69, + " * %pM (%3u) via %pM (%3u) " + "[%c%c%c]\n", tt_global_entry->addr, tt_global_entry->ttvn, tt_global_entry->orig_node->orig, (uint8_t) atomic_read( &tt_global_entry->orig_node-> - last_ttvn)); + last_ttvn), + (tt_global_entry->flags & + TT_CLIENT_ROAM ? 'R' : '.'), + (tt_global_entry->flags & + TT_CLIENT_PENDING ? 'X' : '.'), + (tt_global_entry->flags & + TT_CLIENT_WIFI ? 'W' : '.')); } rcu_read_unlock(); } @@ -774,30 +798,56 @@ static void tt_global_table_free(struct bat_priv *bat_priv) bat_priv->tt_global_hash = NULL; } +static bool _is_ap_isolated(struct tt_local_entry *tt_local_entry, + struct tt_global_entry *tt_global_entry) +{ + bool ret = false; + + if (tt_local_entry->flags & TT_CLIENT_WIFI && + tt_global_entry->flags & TT_CLIENT_WIFI) + ret = true; + + return ret; +} + struct orig_node *transtable_search(struct bat_priv *bat_priv, - const uint8_t *addr) + const uint8_t *src, const uint8_t *addr) { - struct tt_global_entry *tt_global_entry; + struct tt_local_entry *tt_local_entry = NULL; + struct tt_global_entry *tt_global_entry = NULL; struct orig_node *orig_node = NULL; - tt_global_entry = tt_global_hash_find(bat_priv, addr); + if (src && atomic_read(&bat_priv->ap_isolation)) { + tt_local_entry = tt_local_hash_find(bat_priv, src); + if (!tt_local_entry) + goto out; + } + tt_global_entry = tt_global_hash_find(bat_priv, addr); if (!tt_global_entry) goto out; + /* check whether the clients should not communicate due to AP + * isolation */ + if (tt_local_entry && _is_ap_isolated(tt_local_entry, tt_global_entry)) + goto out; + if (!atomic_inc_not_zero(&tt_global_entry->orig_node->refcount)) - goto free_tt; + goto out; /* A global client marked as PENDING has already moved from that * originator */ if (tt_global_entry->flags & TT_CLIENT_PENDING) - goto free_tt; + goto out; orig_node = tt_global_entry->orig_node; -free_tt: - tt_global_entry_free_ref(tt_global_entry); out: + if (tt_global_entry) + tt_global_entry_free_ref(tt_global_entry); + if (tt_local_entry) + tt_local_entry_free_ref(tt_local_entry); + return orig_node; } @@ -1029,8 +1079,9 @@ out: return skb; } -int send_tt_request(struct bat_priv *bat_priv, struct orig_node *dst_orig_node, - uint8_t ttvn, uint16_t tt_crc, bool full_table) +static int send_tt_request(struct bat_priv *bat_priv, + struct orig_node *dst_orig_node, + uint8_t ttvn, uint16_t tt_crc, bool full_table) { struct sk_buff *skb = NULL; struct tt_query_packet *tt_request; @@ -1137,12 +1188,12 @@ static bool send_other_tt_response(struct bat_priv *bat_priv, orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); req_ttvn = tt_request->ttvn; - /* I have not the requested data */ + /* I don't have the requested data */ if (orig_ttvn != req_ttvn || tt_request->tt_data != req_dst_orig_node->tt_crc) goto out; - /* If it has explicitly been requested the full table */ + /* If the full table has been explicitly requested */ if (tt_request->flags & TT_FULL_TABLE || !req_dst_orig_node->tt_buff) full_table = true; @@ -1363,7 +1414,9 @@ static void _tt_update_changes(struct bat_priv *bat_priv, (tt_change + i)->flags & TT_CLIENT_ROAM); else if (!tt_global_add(bat_priv, orig_node, - (tt_change + i)->addr, ttvn, false)) + (tt_change + i)->addr, ttvn, false, + (tt_change + i)->flags & + TT_CLIENT_WIFI)) /* In case of problem while storing a * global_entry, we stop the updating * procedure without committing the @@ -1403,9 +1456,10 @@ out: orig_node_free_ref(orig_node); } -void tt_update_changes(struct bat_priv *bat_priv, struct orig_node *orig_node, - uint16_t tt_num_changes, uint8_t ttvn, - struct tt_change *tt_change) +static void tt_update_changes(struct bat_priv *bat_priv, + struct orig_node *orig_node, + uint16_t tt_num_changes, uint8_t ttvn, + struct tt_change *tt_change) { _tt_update_changes(bat_priv, orig_node, tt_change, tt_num_changes, ttvn); @@ -1720,3 +1774,90 @@ void tt_commit_changes(struct bat_priv *bat_priv) atomic_inc(&bat_priv->ttvn); bat_priv->tt_poss_change = false; } + +bool is_ap_isolated(struct bat_priv *bat_priv, uint8_t *src, uint8_t *dst) +{ + struct tt_local_entry *tt_local_entry = NULL; + struct tt_global_entry *tt_global_entry = NULL; + bool ret = true; + + if (!atomic_read(&bat_priv->ap_isolation)) + return false; + + tt_local_entry = tt_local_hash_find(bat_priv, dst); + if (!tt_local_entry) + goto out; + + tt_global_entry = tt_global_hash_find(bat_priv, src); + if (!tt_global_entry) + goto out; + + if (_is_ap_isolated(tt_local_entry, tt_global_entry)) + goto out; + + ret = false; + +out: + if (tt_global_entry) + tt_global_entry_free_ref(tt_global_entry); + if (tt_local_entry) + tt_local_entry_free_ref(tt_local_entry); + return ret; +} + +void tt_update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, + const unsigned char *tt_buff, uint8_t tt_num_changes, + uint8_t ttvn, uint16_t tt_crc) +{ + uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); + bool full_table = true; + + /* the ttvn increased by one -> we can apply the attached changes */ + if (ttvn - orig_ttvn == 1) { + /* the OGM could not contain the changes due to their size or + * because they have already been sent TT_OGM_APPEND_MAX times. + * In this case send a tt request */ + if (!tt_num_changes) { + full_table = false; + goto request_table; + } + + tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn, + (struct tt_change *)tt_buff); + + /* Even if we received the precomputed crc with the OGM, we + * prefer to recompute it to spot any possible inconsistency + * in the global table */ + orig_node->tt_crc = tt_global_crc(bat_priv, orig_node); + + /* The ttvn alone is not enough to guarantee consistency + * because a single value could represent different states + * (due to the wrap around). Thus a node has to check whether + * the resulting table (after applying the changes) is still + * consistent or not. E.g. a node could disconnect while its + * ttvn is X and reconnect on ttvn = X + TTVN_MAX: in this case + * checking the CRC value is mandatory to detect the + * inconsistency */ + if (orig_node->tt_crc != tt_crc) + goto request_table; + + /* Roaming phase is over: tables are in sync again. I can + * unset the flag */ + orig_node->tt_poss_change = false; + } else { + /* if we missed more than one change or our tables are not + * in sync anymore -> request fresh tt data */ + if (ttvn != orig_ttvn || orig_node->tt_crc != tt_crc) { +request_table: + bat_dbg(DBG_TT, bat_priv, "TT inconsistency for %pM. " + "Need to retrieve the correct information " + "(ttvn: %u last_ttvn: %u crc: %u last_crc: " + "%u num_changes: %u)\n", orig_node->orig, ttvn, + orig_ttvn, tt_crc, orig_node->tt_crc, + tt_num_changes); + send_tt_request(bat_priv, orig_node, ttvn, tt_crc, + full_table); + return; + } + } +} diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index d4122cb..30efd49 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -26,15 +26,16 @@ int tt_len(int changes_num); int tt_changes_fill_buffer(struct bat_priv *bat_priv, unsigned char *buff, int buff_len); int tt_init(struct bat_priv *bat_priv); -void tt_local_add(struct net_device *soft_iface, const uint8_t *addr); +void tt_local_add(struct net_device *soft_iface, const uint8_t *addr, + int ifindex); void tt_local_remove(struct bat_priv *bat_priv, const uint8_t *addr, const char *message, bool roaming); int tt_local_seq_print_text(struct seq_file *seq, void *offset); void tt_global_add_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, const unsigned char *tt_buff, int tt_buff_len); -int tt_global_add(struct bat_priv *bat_priv, - struct orig_node *orig_node, const unsigned char *addr, - uint8_t ttvn, bool roaming); +int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node, + const unsigned char *addr, uint8_t ttvn, bool roaming, + bool wifi); int tt_global_seq_print_text(struct seq_file *seq, void *offset); void tt_global_del_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, const char *message); @@ -42,25 +43,23 @@ void tt_global_del(struct bat_priv *bat_priv, struct orig_node *orig_node, const unsigned char *addr, const char *message, bool roaming); struct orig_node *transtable_search(struct bat_priv *bat_priv, - const uint8_t *addr); + const uint8_t *src, const uint8_t *addr); void tt_save_orig_buffer(struct bat_priv *bat_priv, struct orig_node *orig_node, const unsigned char *tt_buff, uint8_t tt_num_changes); uint16_t tt_local_crc(struct bat_priv *bat_priv); uint16_t tt_global_crc(struct bat_priv *bat_priv, struct orig_node *orig_node); void tt_free(struct bat_priv *bat_priv); -int send_tt_request(struct bat_priv *bat_priv, - struct orig_node *dst_orig_node, uint8_t hvn, - uint16_t tt_crc, bool full_table); bool send_tt_response(struct bat_priv *bat_priv, struct tt_query_packet *tt_request); -void tt_update_changes(struct bat_priv *bat_priv, struct orig_node *orig_node, - uint16_t tt_num_changes, uint8_t ttvn, - struct tt_change *tt_change); bool is_my_client(struct bat_priv *bat_priv, const uint8_t *addr); void handle_tt_response(struct bat_priv *bat_priv, struct tt_query_packet *tt_response); void send_roam_adv(struct bat_priv *bat_priv, uint8_t *client, struct orig_node *orig_node); void tt_commit_changes(struct bat_priv *bat_priv); +bool is_ap_isolated(struct bat_priv *bat_priv, uint8_t *src, uint8_t *dst); +void tt_update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, + const unsigned char *tt_buff, uint8_t tt_num_changes, + uint8_t ttvn, uint16_t tt_crc); #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 25bd1db..1ae3557 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -57,7 +57,7 @@ struct hard_iface { * @batman_seqno_reset: time when the batman seqno window was reset * @gw_flags: flags related to gateway class * @flags: for now only VIS_SERVER flag - * @last_real_seqno: last and best known squence number + * @last_real_seqno: last and best known sequence number * @last_ttl: ttl of last received packet * @last_bcast_seqno: last broadcast sequence number received by this host * @@ -146,6 +146,7 @@ struct bat_priv { atomic_t aggregated_ogms; /* boolean */ atomic_t bonding; /* boolean */ atomic_t fragmentation; /* boolean */ + atomic_t ap_isolation; /* boolean */ atomic_t vis_mode; /* VIS_TYPE_* */ atomic_t gw_mode; /* GW_MODE_* */ atomic_t gw_sel_class; /* uint */ @@ -156,7 +157,7 @@ struct bat_priv { atomic_t bcast_seqno; atomic_t bcast_queue_left; atomic_t batman_queue_left; - atomic_t ttvn; /* tranlation table version number */ + atomic_t ttvn; /* translation table version number */ atomic_t tt_ogm_append_cnt; atomic_t tt_local_changes; /* changes registered in a OGM interval */ /* The tt_poss_change flag is used to detect an ongoing roaming phase. diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 32b125f..07d1c1d 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -299,8 +299,10 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) goto find_router; } - /* check for tt host - increases orig_node refcount */ - orig_node = transtable_search(bat_priv, ethhdr->h_dest); + /* check for tt host - increases orig_node refcount. + * returns NULL in case of AP isolation */ + orig_node = transtable_search(bat_priv, ethhdr->h_source, + ethhdr->h_dest); find_router: /** diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h index 62f54b9..8fd5535 100644 --- a/net/batman-adv/unicast.h +++ b/net/batman-adv/unicast.h @@ -24,7 +24,7 @@ #include "packet.h" -#define FRAG_TIMEOUT 10000 /* purge frag list entrys after time in ms */ +#define FRAG_TIMEOUT 10000 /* purge frag list entries after time in ms */ #define FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 8a1b985..f81a6b6 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -131,7 +131,7 @@ static void vis_data_insert_interface(const uint8_t *interface, return; } - /* its a new address, add it to the list */ + /* it's a new address, add it to the list */ entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) return; @@ -465,7 +465,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv, /* try to add it */ hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, info, &info->hash_entry); - if (hash_added < 0) { + if (hash_added != 0) { /* did not work (for some reason) */ kref_put(&info->refcount, free_info); info = NULL; @@ -887,10 +887,8 @@ int vis_init(struct bat_priv *bat_priv) } bat_priv->my_vis_info = kmalloc(MAX_VIS_PACKET_SIZE, GFP_ATOMIC); - if (!bat_priv->my_vis_info) { - pr_err("Can't initialize vis packet\n"); + if (!bat_priv->my_vis_info) goto err; - } bat_priv->my_vis_info->skb_packet = dev_alloc_skb(sizeof(*packet) + MAX_VIS_PACKET_SIZE + @@ -920,7 +918,7 @@ int vis_init(struct bat_priv *bat_priv) hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, bat_priv->my_vis_info, &bat_priv->my_vis_info->hash_entry); - if (hash_added < 0) { + if (hash_added != 0) { pr_err("Can't add own vis packet into hash\n"); /* not in hash, need to remove it manually. */ kref_put(&bat_priv->my_vis_info->refcount, free_info); diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index d4f5dff..bc40864 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -217,7 +217,7 @@ static const struct net_device_ops bnep_netdev_ops = { .ndo_stop = bnep_net_close, .ndo_start_xmit = bnep_net_xmit, .ndo_validate_addr = eth_validate_addr, - .ndo_set_multicast_list = bnep_net_set_mc_list, + .ndo_set_rx_mode = bnep_net_set_mc_list, .ndo_set_mac_address = bnep_net_set_mac_addr, .ndo_tx_timeout = bnep_net_timeout, .ndo_change_mtu = eth_change_mtu, diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 32b8f9f..feb77ea 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -91,7 +91,6 @@ static int br_dev_open(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); - netif_carrier_off(dev); netdev_update_features(dev); netif_start_queue(dev); br_stp_enable_bridge(br); @@ -108,8 +107,6 @@ static int br_dev_stop(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); - netif_carrier_off(dev); - br_stp_disable_bridge(br); br_multicast_stop(br); @@ -304,7 +301,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_start_xmit = br_dev_xmit, .ndo_get_stats64 = br_get_stats64, .ndo_set_mac_address = br_set_mac_address, - .ndo_set_multicast_list = br_dev_set_multicast_list, + .ndo_set_rx_mode = br_dev_set_multicast_list, .ndo_change_mtu = br_change_mtu, .ndo_do_ioctl = br_dev_ioctl, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -361,6 +358,8 @@ void br_dev_setup(struct net_device *dev) memcpy(br->group_addr, br_group_address, ETH_ALEN); br->stp_enabled = BR_NO_STP; + br->group_fwd_mask = BR_GROUPFWD_DEFAULT; + br->designated_root = br->bridge_id; br->bridge_max_age = br->max_age = 20 * HZ; br->bridge_hello_time = br->hello_time = 2 * HZ; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 68def3b..c8e7861 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -558,19 +558,28 @@ skip: /* Create new static fdb entry */ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, - __u16 state) + __u16 state, __u16 flags) { struct net_bridge *br = source->br; struct hlist_head *head = &br->hash[br_mac_hash(addr)]; struct net_bridge_fdb_entry *fdb; fdb = fdb_find(head, addr); - if (fdb) - return -EEXIST; + if (fdb == NULL) { + if (!(flags & NLM_F_CREATE)) + return -ENOENT; - fdb = fdb_create(head, source, addr); - if (!fdb) - return -ENOMEM; + fdb = fdb_create(head, source, addr); + if (!fdb) + return -ENOMEM; + } else { + if (flags & NLM_F_EXCL) + return -EEXIST; + + if (flags & NLM_F_REPLACE) + fdb->updated = fdb->used = jiffies; + fdb->is_local = fdb->is_static = 0; + } if (state & NUD_PERMANENT) fdb->is_local = fdb->is_static = 1; @@ -626,7 +635,7 @@ int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } spin_lock_bh(&p->br->hash_lock); - err = fdb_add_entry(p, addr, ndm->ndm_state); + err = fdb_add_entry(p, addr, ndm->ndm_state, nlh->nlmsg_flags); spin_unlock_bh(&p->br->hash_lock); return err; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 3176e2e..c3b77dc 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/etherdevice.h> #include <linux/netpoll.h> #include <linux/ethtool.h> #include <linux/if_arp.h> @@ -33,20 +34,18 @@ */ static int port_cost(struct net_device *dev) { - if (dev->ethtool_ops && dev->ethtool_ops->get_settings) { - struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET, }; - - if (!dev_ethtool_get_settings(dev, &ecmd)) { - switch (ethtool_cmd_speed(&ecmd)) { - case SPEED_10000: - return 2; - case SPEED_1000: - return 4; - case SPEED_100: - return 19; - case SPEED_10: - return 100; - } + struct ethtool_cmd ecmd; + + if (!__ethtool_get_settings(dev, &ecmd)) { + switch (ethtool_cmd_speed(&ecmd)) { + case SPEED_10000: + return 2; + case SPEED_1000: + return 4; + case SPEED_100: + return 19; + case SPEED_10: + return 100; } } @@ -231,6 +230,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, int br_add_bridge(struct net *net, const char *name) { struct net_device *dev; + int res; dev = alloc_netdev(sizeof(struct net_bridge), name, br_dev_setup); @@ -240,7 +240,10 @@ int br_add_bridge(struct net *net, const char *name) dev_net_set(dev, net); - return register_netdev(dev); + res = register_netdev(dev); + if (res) + free_netdev(dev); + return res; } int br_del_bridge(struct net *net, const char *name) @@ -320,7 +323,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) /* Don't allow bridging non-ethernet like devices */ if ((dev->flags & IFF_LOOPBACK) || - dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN) + dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN || + !is_valid_ether_addr(dev->dev_addr)) return -EINVAL; /* No bridging of bridges */ @@ -348,10 +352,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj), SYSFS_BRIDGE_PORT_ATTR); if (err) - goto err0; - - err = br_fdb_insert(br, p, dev->dev_addr); - if (err) goto err1; err = br_sysfs_addif(p); @@ -392,6 +392,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev_set_mtu(br->dev, br_min_mtu(br)); + if (br_fdb_insert(br, p, dev->dev_addr)) + netdev_err(dev, "failed insert local address bridge forwarding table\n"); + kobject_uevent(&p->kobj, KOBJ_ADD); return 0; @@ -401,11 +404,9 @@ err4: err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: - br_fdb_delete_by_port(br, p, 1); -err1: kobject_put(&p->kobj); p = NULL; /* kobject_put frees */ -err0: +err1: dev_set_promiscuity(dev, -1); put_back: dev_put(dev); @@ -417,6 +418,7 @@ put_back: int br_del_if(struct net_bridge *br, struct net_device *dev) { struct net_bridge_port *p; + bool changed_addr; p = br_port_get_rtnl(dev); if (!p || p->br != br) @@ -425,9 +427,12 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) del_nbp(p); spin_lock_bh(&br->lock); - br_stp_recalculate_bridge_id(br); + changed_addr = br_stp_recalculate_bridge_id(br); spin_unlock_bh(&br->lock); + if (changed_addr) + call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); + netdev_update_features(br->dev); return 0; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index f06ee39..6f9f8c0 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -162,14 +162,37 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) p = br_port_get_rcu(skb->dev); if (unlikely(is_link_local(dest))) { - /* Pause frames shouldn't be passed up by driver anyway */ - if (skb->protocol == htons(ETH_P_PAUSE)) + /* + * See IEEE 802.1D Table 7-10 Reserved addresses + * + * Assignment Value + * Bridge Group Address 01-80-C2-00-00-00 + * (MAC Control) 802.3 01-80-C2-00-00-01 + * (Link Aggregation) 802.3 01-80-C2-00-00-02 + * 802.1X PAE address 01-80-C2-00-00-03 + * + * 802.1AB LLDP 01-80-C2-00-00-0E + * + * Others reserved for future standardization + */ + switch (dest[5]) { + case 0x00: /* Bridge Group Address */ + /* If STP is turned off, + then must forward to keep loop detection */ + if (p->br->stp_enabled == BR_NO_STP) + goto forward; + break; + + case 0x01: /* IEEE MAC (Pause) */ goto drop; - /* If STP is turned off, then forward */ - if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0) - goto forward; + default: + /* Allow selective forwarding for most other protocols */ + if (p->br->group_fwd_mask & (1u << dest[5])) + goto forward; + } + /* Deliver packet to local host only */ if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, NULL, br_handle_local_finish)) { return RX_HANDLER_CONSUMED; /* consumed by filter */ diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2d85ca7..995cbe0 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1456,7 +1456,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, { struct sk_buff *skb2; const struct ipv6hdr *ip6h; - struct icmp6hdr *icmp6h; + u8 icmp6_type; u8 nexthdr; unsigned len; int offset; @@ -1502,9 +1502,9 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, __skb_pull(skb2, offset); skb_reset_transport_header(skb2); - icmp6h = icmp6_hdr(skb2); + icmp6_type = icmp6_hdr(skb2)->icmp6_type; - switch (icmp6h->icmp6_type) { + switch (icmp6_type) { case ICMPV6_MGM_QUERY: case ICMPV6_MGM_REPORT: case ICMPV6_MGM_REDUCTION: @@ -1520,16 +1520,23 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, err = pskb_trim_rcsum(skb2, len); if (err) goto out; + err = -EINVAL; } + ip6h = ipv6_hdr(skb2); + switch (skb2->ip_summed) { case CHECKSUM_COMPLETE: - if (!csum_fold(skb2->csum)) + if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb2->len, + IPPROTO_ICMPV6, skb2->csum)) break; /*FALLTHROUGH*/ case CHECKSUM_NONE: - skb2->csum = 0; - if (skb_checksum_complete(skb2)) + skb2->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr, + &ip6h->daddr, + skb2->len, + IPPROTO_ICMPV6, 0)); + if (__skb_checksum_complete(skb2)) goto out; } @@ -1537,7 +1544,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, BR_INPUT_SKB_CB(skb)->igmp = 1; - switch (icmp6h->icmp6_type) { + switch (icmp6_type) { case ICMPV6_MGM_REPORT: { struct mld_msg *mld; diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 6545ee9..a76b621 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -34,6 +34,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v struct net_device *dev = ptr; struct net_bridge_port *p; struct net_bridge *br; + bool changed_addr; int err; /* register of bridge completed, add sysfs entries */ @@ -57,8 +58,12 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v case NETDEV_CHANGEADDR: spin_lock_bh(&br->lock); br_fdb_changeaddr(p, dev->dev_addr); - br_stp_recalculate_bridge_id(br); + changed_addr = br_stp_recalculate_bridge_id(br); spin_unlock_bh(&br->lock); + + if (changed_addr) + call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); + break; case NETDEV_CHANGE: diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 78cc364..a248fe6 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -29,6 +29,11 @@ #define BR_VERSION "2.3" +/* Control of forwarding link local multicast */ +#define BR_GROUPFWD_DEFAULT 0 +/* Don't allow forwarding control protocols like STP and LLDP */ +#define BR_GROUPFWD_RESTRICTED 0x4007u + /* Path to usermode spanning tree program */ #define BR_STP_PROG "/sbin/bridge-stp" @@ -193,6 +198,8 @@ struct net_bridge unsigned long flags; #define BR_SET_MAC_ADDR 0x00000001 + u16 group_fwd_mask; + /* STP */ bridge_id designated_root; bridge_id bridge_id; diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 68b893e..c236c0e 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -149,6 +149,39 @@ static ssize_t store_stp_state(struct device *d, static DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state, store_stp_state); +static ssize_t show_group_fwd_mask(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%#x\n", br->group_fwd_mask); +} + + +static ssize_t store_group_fwd_mask(struct device *d, + struct device_attribute *attr, const char *buf, + size_t len) +{ + struct net_bridge *br = to_bridge(d); + char *endp; + unsigned long val; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + + if (val & BR_GROUPFWD_RESTRICTED) + return -EINVAL; + + br->group_fwd_mask = val; + + return len; +} +static DEVICE_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask, + store_group_fwd_mask); + static ssize_t show_priority(struct device *d, struct device_attribute *attr, char *buf) { @@ -652,6 +685,7 @@ static struct attribute *bridge_attrs[] = { &dev_attr_max_age.attr, &dev_attr_ageing_time.attr, &dev_attr_stp_state.attr, + &dev_attr_group_fwd_mask.attr, &dev_attr_priority.attr, &dev_attr_bridge_id.attr, &dev_attr_root_id.attr, diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index ba6f73e..a9aff9c 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -4,7 +4,7 @@ menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" - depends on BRIDGE && BRIDGE_NETFILTER + depends on BRIDGE && NETFILTER select NETFILTER_XTABLES help ebtables is a general, extensible frame/packet identification diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 1bcaf36a..40d8258 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -87,14 +87,14 @@ static int __init ebtable_broute_init(void) if (ret < 0) return ret; /* see br_input.c */ - rcu_assign_pointer(br_should_route_hook, + RCU_INIT_POINTER(br_should_route_hook, (br_should_route_hook_t *)ebt_broute); return 0; } static void __exit ebtable_broute_fini(void) { - rcu_assign_pointer(br_should_route_hook, NULL); + RCU_INIT_POINTER(br_should_route_hook, NULL); synchronize_net(); unregister_pernet_subsys(&broute_net_ops); } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 2b5ca1a..5864cc4 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1198,7 +1198,8 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table) if (table->check && table->check(newinfo, table->valid_hooks)) { BUGPRINT("The table doesn't like its own initial data, lol\n"); - return ERR_PTR(-EINVAL); + ret = -EINVAL; + goto free_chainstack; } table->private = newinfo; diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 7c2fa0a..7f9ac07 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -93,10 +93,14 @@ static struct caif_device_entry *caif_device_alloc(struct net_device *dev) caifdevs = caif_device_list(dev_net(dev)); BUG_ON(!caifdevs); - caifd = kzalloc(sizeof(*caifd), GFP_ATOMIC); + caifd = kzalloc(sizeof(*caifd), GFP_KERNEL); if (!caifd) return NULL; caifd->pcpu_refcnt = alloc_percpu(int); + if (!caifd->pcpu_refcnt) { + kfree(caifd); + return NULL; + } caifd->netdev = dev; dev_hold(dev); return caifd; diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index 52fe33b..00523ec 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -78,10 +78,8 @@ struct cfcnfg *cfcnfg_create(void) /* Initiate this layer */ this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + if (!this) return NULL; - } this->mux = cfmuxl_create(); if (!this->mux) goto out_of_mem; @@ -108,8 +106,6 @@ struct cfcnfg *cfcnfg_create(void) return this; out_of_mem: - pr_warn("Out of memory\n"); - synchronize_rcu(); kfree(this->mux); @@ -448,10 +444,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, "- unknown channel type\n"); goto unlock; } - if (!servicel) { - pr_warn("Out of memory\n"); + if (!servicel) goto unlock; - } layer_set_dn(servicel, cnfg->mux); cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id); layer_set_up(servicel, adapt_layer); @@ -473,7 +467,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type, { struct cflayer *frml; struct cflayer *phy_driver = NULL; - struct cfcnfg_phyinfo *phyinfo; + struct cfcnfg_phyinfo *phyinfo = NULL; int i; u8 phyid; @@ -488,25 +482,25 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type, goto got_phyid; } pr_warn("Too many CAIF Link Layers (max 6)\n"); - goto out; + goto out_err; got_phyid: phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC); + if (!phyinfo) + goto out_err; switch (phy_type) { case CFPHYTYPE_FRAG: phy_driver = cfserl_create(CFPHYTYPE_FRAG, phyid, stx); - if (!phy_driver) { - pr_warn("Out of memory\n"); - goto out; - } + if (!phy_driver) + goto out_err; break; case CFPHYTYPE_CAIF: phy_driver = NULL; break; default: - goto out; + goto out_err; } phy_layer->id = phyid; phyinfo->pref = pref; @@ -520,11 +514,8 @@ got_phyid: frml = cffrml_create(phyid, fcs); - if (!frml) { - pr_warn("Out of memory\n"); - kfree(phyinfo); - goto out; - } + if (!frml) + goto out_err; phyinfo->frm_layer = frml; layer_set_up(frml, cnfg->mux); @@ -540,7 +531,12 @@ got_phyid: } list_add_rcu(&phyinfo->node, &cnfg->phys); -out: + mutex_unlock(&cnfg->lock); + return; + +out_err: + kfree(phy_driver); + kfree(phyinfo); mutex_unlock(&cnfg->lock); } EXPORT_SYMBOL(cfcnfg_add_phy_layer); diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index e22671b..5cf5222 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -35,15 +35,12 @@ struct cflayer *cfctrl_create(void) { struct dev_info dev_info; struct cfctrl *this = - kmalloc(sizeof(struct cfctrl), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + kzalloc(sizeof(struct cfctrl), GFP_ATOMIC); + if (!this) return NULL; - } caif_assert(offsetof(struct cfctrl, serv.layer) == 0); memset(&dev_info, 0, sizeof(dev_info)); dev_info.id = 0xff; - memset(this, 0, sizeof(*this)); cfsrvl_init(&this->serv, 0, &dev_info, false); atomic_set(&this->req_seq_no, 1); atomic_set(&this->rsp_seq_no, 1); @@ -180,10 +177,8 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid) struct cfctrl *cfctrl = container_obj(layer); struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); struct cflayer *dn = cfctrl->serv.layer.dn; - if (!pkt) { - pr_warn("Out of memory\n"); + if (!pkt) return; - } if (!dn) { pr_debug("not able to send enum request\n"); return; @@ -224,10 +219,8 @@ int cfctrl_linkup_request(struct cflayer *layer, } pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); - if (!pkt) { - pr_warn("Out of memory\n"); + if (!pkt) return -ENOMEM; - } cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP); cfpkt_addbdy(pkt, (param->chtype << 4) | param->linktype); cfpkt_addbdy(pkt, (param->priority << 3) | param->phyid); @@ -275,10 +268,8 @@ int cfctrl_linkup_request(struct cflayer *layer, return -EINVAL; } req = kzalloc(sizeof(*req), GFP_KERNEL); - if (!req) { - pr_warn("Out of memory\n"); + if (!req) return -ENOMEM; - } req->client_layer = user_layer; req->cmd = CFCTRL_CMD_LINK_SETUP; req->param = *param; @@ -312,10 +303,8 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid, struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); struct cflayer *dn = cfctrl->serv.layer.dn; - if (!pkt) { - pr_warn("Out of memory\n"); + if (!pkt) return -ENOMEM; - } if (!dn) { pr_debug("not able to send link-down request\n"); diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c index 11a2af4..65d6ef3 100644 --- a/net/caif/cfdbgl.c +++ b/net/caif/cfdbgl.c @@ -19,13 +19,10 @@ static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt); struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info) { - struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!dbg) { - pr_warn("Out of memory\n"); + struct cfsrvl *dbg = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC); + if (!dbg) return NULL; - } caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(dbg, 0, sizeof(struct cfsrvl)); cfsrvl_init(dbg, channel_id, dev_info, false); dbg->layer.receive = cfdbgl_receive; dbg->layer.transmit = cfdbgl_transmit; diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c index 0382dec..0f5ff27 100644 --- a/net/caif/cfdgml.c +++ b/net/caif/cfdgml.c @@ -26,13 +26,10 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt); struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info) { - struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!dgm) { - pr_warn("Out of memory\n"); + struct cfsrvl *dgm = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC); + if (!dgm) return NULL; - } caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(dgm, 0, sizeof(struct cfsrvl)); cfsrvl_init(dgm, channel_id, dev_info, true); dgm->layer.receive = cfdgml_receive; dgm->layer.transmit = cfdgml_transmit; diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c index 04204b2..f399211 100644 --- a/net/caif/cffrml.c +++ b/net/caif/cffrml.c @@ -34,11 +34,9 @@ static u32 cffrml_rcv_error; static u32 cffrml_rcv_checsum_error; struct cflayer *cffrml_create(u16 phyid, bool use_fcs) { - struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + struct cffrml *this = kzalloc(sizeof(struct cffrml), GFP_ATOMIC); + if (!this) return NULL; - } this->pcpu_refcnt = alloc_percpu(int); if (this->pcpu_refcnt == NULL) { kfree(this); @@ -47,7 +45,6 @@ struct cflayer *cffrml_create(u16 phyid, bool use_fcs) caif_assert(offsetof(struct cffrml, layer) == 0); - memset(this, 0, sizeof(struct cflayer)); this->layer.receive = cffrml_receive; this->layer.transmit = cffrml_transmit; this->layer.ctrlcmd = cffrml_ctrlcmd; diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index c23979e..b36f24a 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -108,7 +108,7 @@ struct cflayer *cfmuxl_remove_dnlayer(struct cflayer *layr, u8 phyid) int idx = phyid % DN_CACHE_SIZE; spin_lock_bh(&muxl->transmit_lock); - rcu_assign_pointer(muxl->dn_cache[idx], NULL); + RCU_INIT_POINTER(muxl->dn_cache[idx], NULL); dn = get_from_id(&muxl->frml_list, phyid); if (dn == NULL) goto out; @@ -164,7 +164,7 @@ struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id) if (up == NULL) goto out; - rcu_assign_pointer(muxl->up_cache[idx], NULL); + RCU_INIT_POINTER(muxl->up_cache[idx], NULL); list_del_rcu(&up->node); out: spin_unlock_bh(&muxl->receive_lock); @@ -261,7 +261,7 @@ static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, idx = layer->id % UP_CACHE_SIZE; spin_lock_bh(&muxl->receive_lock); - rcu_assign_pointer(muxl->up_cache[idx], NULL); + RCU_INIT_POINTER(muxl->up_cache[idx], NULL); list_del_rcu(&layer->node); spin_unlock_bh(&muxl->receive_lock); } diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index 0deabb4..81660f8 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -46,13 +46,10 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info, int mtu_size) { int tmp; - struct cfrfml *this = - kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); + struct cfrfml *this = kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + if (!this) return NULL; - } cfsrvl_init(&this->serv, channel_id, dev_info, false); this->serv.release = cfrfml_release; diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index 2715c84..797c8d1 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -33,13 +33,10 @@ static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, struct cflayer *cfserl_create(int type, int instance, bool use_stx) { - struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + struct cfserl *this = kzalloc(sizeof(struct cfserl), GFP_ATOMIC); + if (!this) return NULL; - } caif_assert(offsetof(struct cfserl, layer) == 0); - memset(this, 0, sizeof(struct cfserl)); this->layer.receive = cfserl_receive; this->layer.transmit = cfserl_transmit; this->layer.ctrlcmd = cfserl_ctrlcmd; diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index 535a1e7..b99f5b2 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -108,10 +108,8 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) struct caif_payload_info *info; u8 flow_on = SRVL_FLOW_ON; pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); - if (!pkt) { - pr_warn("Out of memory\n"); + if (!pkt) return -ENOMEM; - } if (cfpkt_add_head(pkt, &flow_on, 1) < 0) { pr_err("Packet is erroneous!\n"); @@ -130,10 +128,8 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) struct caif_payload_info *info; u8 flow_off = SRVL_FLOW_OFF; pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); - if (!pkt) { - pr_warn("Out of memory\n"); + if (!pkt) return -ENOMEM; - } if (cfpkt_add_head(pkt, &flow_off, 1) < 0) { pr_err("Packet is erroneous!\n"); diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c index 98e027d..53e49f3 100644 --- a/net/caif/cfutill.c +++ b/net/caif/cfutill.c @@ -26,13 +26,10 @@ static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt); struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info) { - struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!util) { - pr_warn("Out of memory\n"); + struct cfsrvl *util = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC); + if (!util) return NULL; - } caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(util, 0, sizeof(struct cfsrvl)); cfsrvl_init(util, channel_id, dev_info, true); util->layer.receive = cfutill_receive; util->layer.transmit = cfutill_transmit; diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c index 3ec83fb..910ab06 100644 --- a/net/caif/cfveil.c +++ b/net/caif/cfveil.c @@ -25,13 +25,10 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt); struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info) { - struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!vei) { - pr_warn("Out of memory\n"); + struct cfsrvl *vei = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC); + if (!vei) return NULL; - } caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(vei, 0, sizeof(struct cfsrvl)); cfsrvl_init(vei, channel_id, dev_info, true); vei->layer.receive = cfvei_receive; vei->layer.transmit = cfvei_transmit; diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c index b2f5989..e3f37db 100644 --- a/net/caif/cfvidl.c +++ b/net/caif/cfvidl.c @@ -21,14 +21,11 @@ static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt); struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info) { - struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!vid) { - pr_warn("Out of memory\n"); + struct cfsrvl *vid = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC); + if (!vid) return NULL; - } caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(vid, 0, sizeof(struct cfsrvl)); cfsrvl_init(vid, channel_id, dev_info, false); vid->layer.receive = cfvidl_receive; vid->layer.transmit = cfvidl_transmit; diff --git a/net/can/Kconfig b/net/can/Kconfig index 89395b2..0320069 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -40,5 +40,16 @@ config CAN_BCM CAN messages are used on the bus (e.g. in automotive environments). To use the Broadcast Manager, use AF_CAN with protocol CAN_BCM. +config CAN_GW + tristate "CAN Gateway/Router (with netlink configuration)" + depends on CAN + default N + ---help--- + The CAN Gateway/Router is used to route (and modify) CAN frames. + It is based on the PF_CAN core infrastructure for msg filtering and + msg sending and can optionally modify routed CAN frames on the fly. + CAN frames can be routed between CAN network interfaces (one hop). + They can be modified with AND/OR/XOR/SET operations as configured + by the netlink configuration interface known e.g. from iptables. source "drivers/net/can/Kconfig" diff --git a/net/can/Makefile b/net/can/Makefile index 2d3894b3..cef49eb 100644 --- a/net/can/Makefile +++ b/net/can/Makefile @@ -10,3 +10,6 @@ can-raw-y := raw.o obj-$(CONFIG_CAN_BCM) += can-bcm.o can-bcm-y := bcm.o + +obj-$(CONFIG_CAN_GW) += can-gw.o +can-gw-y := gw.o diff --git a/net/can/af_can.c b/net/can/af_can.c index 8ce926d..d1ff515 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -719,7 +719,7 @@ int can_proto_register(const struct can_proto *cp) proto); err = -EBUSY; } else - rcu_assign_pointer(proto_tab[proto], cp); + RCU_INIT_POINTER(proto_tab[proto], cp); mutex_unlock(&proto_tab_lock); @@ -740,7 +740,7 @@ void can_proto_unregister(const struct can_proto *cp) mutex_lock(&proto_tab_lock); BUG_ON(proto_tab[proto] != cp); - rcu_assign_pointer(proto_tab[proto], NULL); + RCU_INIT_POINTER(proto_tab[proto], NULL); mutex_unlock(&proto_tab_lock); synchronize_rcu(); @@ -857,7 +857,7 @@ static __exit void can_exit(void) struct net_device *dev; if (stats_timer) - del_timer(&can_stattimer); + del_timer_sync(&can_stattimer); can_remove_proc(); diff --git a/net/can/bcm.c b/net/can/bcm.c index d6c8ae5..c84963d 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -344,6 +344,18 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, } } +static void bcm_tx_start_timer(struct bcm_op *op) +{ + if (op->kt_ival1.tv64 && op->count) + hrtimer_start(&op->timer, + ktime_add(ktime_get(), op->kt_ival1), + HRTIMER_MODE_ABS); + else if (op->kt_ival2.tv64) + hrtimer_start(&op->timer, + ktime_add(ktime_get(), op->kt_ival2), + HRTIMER_MODE_ABS); +} + static void bcm_tx_timeout_tsklet(unsigned long data) { struct bcm_op *op = (struct bcm_op *)data; @@ -365,26 +377,12 @@ static void bcm_tx_timeout_tsklet(unsigned long data) bcm_send_to_user(op, &msg_head, NULL, 0); } - } - - if (op->kt_ival1.tv64 && (op->count > 0)) { - - /* send (next) frame */ bcm_can_tx(op); - hrtimer_start(&op->timer, - ktime_add(ktime_get(), op->kt_ival1), - HRTIMER_MODE_ABS); - } else { - if (op->kt_ival2.tv64) { + } else if (op->kt_ival2.tv64) + bcm_can_tx(op); - /* send (next) frame */ - bcm_can_tx(op); - hrtimer_start(&op->timer, - ktime_add(ktime_get(), op->kt_ival2), - HRTIMER_MODE_ABS); - } - } + bcm_tx_start_timer(op); } /* @@ -964,23 +962,20 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, hrtimer_cancel(&op->timer); } - if ((op->flags & STARTTIMER) && - ((op->kt_ival1.tv64 && op->count) || op->kt_ival2.tv64)) { - + if (op->flags & STARTTIMER) { + hrtimer_cancel(&op->timer); /* spec: send can_frame when starting timer */ op->flags |= TX_ANNOUNCE; - - if (op->kt_ival1.tv64 && (op->count > 0)) { - /* op->count-- is done in bcm_tx_timeout_handler */ - hrtimer_start(&op->timer, op->kt_ival1, - HRTIMER_MODE_REL); - } else - hrtimer_start(&op->timer, op->kt_ival2, - HRTIMER_MODE_REL); } - if (op->flags & TX_ANNOUNCE) + if (op->flags & TX_ANNOUNCE) { bcm_can_tx(op); + if (op->count) + op->count--; + } + + if (op->flags & STARTTIMER) + bcm_tx_start_timer(op); return msg_head->nframes * CFSIZ + MHSIZ; } diff --git a/net/can/gw.c b/net/can/gw.c new file mode 100644 index 0000000..ac11407 --- /dev/null +++ b/net/can/gw.c @@ -0,0 +1,959 @@ +/* + * gw.c - CAN frame Gateway/Router/Bridge with netlink interface + * + * Copyright (c) 2011 Volkswagen Group Electronic Research + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Volkswagen nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * The provided data structures and external interfaces from this code + * are not restricted to be used by modules with a GPL compatible license. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * Send feedback to <socketcan-users@lists.berlios.de> + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/rcupdate.h> +#include <linux/rculist.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/skbuff.h> +#include <linux/can.h> +#include <linux/can/core.h> +#include <linux/can/gw.h> +#include <net/rtnetlink.h> +#include <net/net_namespace.h> +#include <net/sock.h> + +#define CAN_GW_VERSION "20101209" +static __initdata const char banner[] = + KERN_INFO "can: netlink gateway (rev " CAN_GW_VERSION ")\n"; + +MODULE_DESCRIPTION("PF_CAN netlink gateway"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>"); +MODULE_ALIAS("can-gw"); + +HLIST_HEAD(cgw_list); +static struct notifier_block notifier; + +static struct kmem_cache *cgw_cache __read_mostly; + +/* structure that contains the (on-the-fly) CAN frame modifications */ +struct cf_mod { + struct { + struct can_frame and; + struct can_frame or; + struct can_frame xor; + struct can_frame set; + } modframe; + struct { + u8 and; + u8 or; + u8 xor; + u8 set; + } modtype; + void (*modfunc[MAX_MODFUNCTIONS])(struct can_frame *cf, + struct cf_mod *mod); + + /* CAN frame checksum calculation after CAN frame modifications */ + struct { + struct cgw_csum_xor xor; + struct cgw_csum_crc8 crc8; + } csum; + struct { + void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor); + void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8); + } csumfunc; +}; + + +/* + * So far we just support CAN -> CAN routing and frame modifications. + * + * The internal can_can_gw structure contains data and attributes for + * a CAN -> CAN gateway job. + */ +struct can_can_gw { + struct can_filter filter; + int src_idx; + int dst_idx; +}; + +/* list entry for CAN gateways jobs */ +struct cgw_job { + struct hlist_node list; + struct rcu_head rcu; + u32 handled_frames; + u32 dropped_frames; + struct cf_mod mod; + union { + /* CAN frame data source */ + struct net_device *dev; + } src; + union { + /* CAN frame data destination */ + struct net_device *dev; + } dst; + union { + struct can_can_gw ccgw; + /* tbc */ + }; + u8 gwtype; + u16 flags; +}; + +/* modification functions that are invoked in the hot path in can_can_gw_rcv */ + +#define MODFUNC(func, op) static void func(struct can_frame *cf, \ + struct cf_mod *mod) { op ; } + +MODFUNC(mod_and_id, cf->can_id &= mod->modframe.and.can_id) +MODFUNC(mod_and_dlc, cf->can_dlc &= mod->modframe.and.can_dlc) +MODFUNC(mod_and_data, *(u64 *)cf->data &= *(u64 *)mod->modframe.and.data) +MODFUNC(mod_or_id, cf->can_id |= mod->modframe.or.can_id) +MODFUNC(mod_or_dlc, cf->can_dlc |= mod->modframe.or.can_dlc) +MODFUNC(mod_or_data, *(u64 *)cf->data |= *(u64 *)mod->modframe.or.data) +MODFUNC(mod_xor_id, cf->can_id ^= mod->modframe.xor.can_id) +MODFUNC(mod_xor_dlc, cf->can_dlc ^= mod->modframe.xor.can_dlc) +MODFUNC(mod_xor_data, *(u64 *)cf->data ^= *(u64 *)mod->modframe.xor.data) +MODFUNC(mod_set_id, cf->can_id = mod->modframe.set.can_id) +MODFUNC(mod_set_dlc, cf->can_dlc = mod->modframe.set.can_dlc) +MODFUNC(mod_set_data, *(u64 *)cf->data = *(u64 *)mod->modframe.set.data) + +static inline void canframecpy(struct can_frame *dst, struct can_frame *src) +{ + /* + * Copy the struct members separately to ensure that no uninitialized + * data are copied in the 3 bytes hole of the struct. This is needed + * to make easy compares of the data in the struct cf_mod. + */ + + dst->can_id = src->can_id; + dst->can_dlc = src->can_dlc; + *(u64 *)dst->data = *(u64 *)src->data; +} + +static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re) +{ + /* + * absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0] + * relative to received dlc -1 .. -8 : + * e.g. for received dlc = 8 + * -1 => index = 7 (data[7]) + * -3 => index = 5 (data[5]) + * -8 => index = 0 (data[0]) + */ + + if (fr > -9 && fr < 8 && + to > -9 && to < 8 && + re > -9 && re < 8) + return 0; + else + return -EINVAL; +} + +static inline int calc_idx(int idx, int rx_dlc) +{ + if (idx < 0) + return rx_dlc + idx; + else + return idx; +} + +static void cgw_csum_xor_rel(struct can_frame *cf, struct cgw_csum_xor *xor) +{ + int from = calc_idx(xor->from_idx, cf->can_dlc); + int to = calc_idx(xor->to_idx, cf->can_dlc); + int res = calc_idx(xor->result_idx, cf->can_dlc); + u8 val = xor->init_xor_val; + int i; + + if (from < 0 || to < 0 || res < 0) + return; + + if (from <= to) { + for (i = from; i <= to; i++) + val ^= cf->data[i]; + } else { + for (i = from; i >= to; i--) + val ^= cf->data[i]; + } + + cf->data[res] = val; +} + +static void cgw_csum_xor_pos(struct can_frame *cf, struct cgw_csum_xor *xor) +{ + u8 val = xor->init_xor_val; + int i; + + for (i = xor->from_idx; i <= xor->to_idx; i++) + val ^= cf->data[i]; + + cf->data[xor->result_idx] = val; +} + +static void cgw_csum_xor_neg(struct can_frame *cf, struct cgw_csum_xor *xor) +{ + u8 val = xor->init_xor_val; + int i; + + for (i = xor->from_idx; i >= xor->to_idx; i--) + val ^= cf->data[i]; + + cf->data[xor->result_idx] = val; +} + +static void cgw_csum_crc8_rel(struct can_frame *cf, struct cgw_csum_crc8 *crc8) +{ + int from = calc_idx(crc8->from_idx, cf->can_dlc); + int to = calc_idx(crc8->to_idx, cf->can_dlc); + int res = calc_idx(crc8->result_idx, cf->can_dlc); + u8 crc = crc8->init_crc_val; + int i; + + if (from < 0 || to < 0 || res < 0) + return; + + if (from <= to) { + for (i = crc8->from_idx; i <= crc8->to_idx; i++) + crc = crc8->crctab[crc^cf->data[i]]; + } else { + for (i = crc8->from_idx; i >= crc8->to_idx; i--) + crc = crc8->crctab[crc^cf->data[i]]; + } + + switch (crc8->profile) { + + case CGW_CRC8PRF_1U8: + crc = crc8->crctab[crc^crc8->profile_data[0]]; + break; + + case CGW_CRC8PRF_16U8: + crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]]; + break; + + case CGW_CRC8PRF_SFFID_XOR: + crc = crc8->crctab[crc^(cf->can_id & 0xFF)^ + (cf->can_id >> 8 & 0xFF)]; + break; + + } + + cf->data[crc8->result_idx] = crc^crc8->final_xor_val; +} + +static void cgw_csum_crc8_pos(struct can_frame *cf, struct cgw_csum_crc8 *crc8) +{ + u8 crc = crc8->init_crc_val; + int i; + + for (i = crc8->from_idx; i <= crc8->to_idx; i++) + crc = crc8->crctab[crc^cf->data[i]]; + + switch (crc8->profile) { + + case CGW_CRC8PRF_1U8: + crc = crc8->crctab[crc^crc8->profile_data[0]]; + break; + + case CGW_CRC8PRF_16U8: + crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]]; + break; + + case CGW_CRC8PRF_SFFID_XOR: + crc = crc8->crctab[crc^(cf->can_id & 0xFF)^ + (cf->can_id >> 8 & 0xFF)]; + break; + } + + cf->data[crc8->result_idx] = crc^crc8->final_xor_val; +} + +static void cgw_csum_crc8_neg(struct can_frame *cf, struct cgw_csum_crc8 *crc8) +{ + u8 crc = crc8->init_crc_val; + int i; + + for (i = crc8->from_idx; i >= crc8->to_idx; i--) + crc = crc8->crctab[crc^cf->data[i]]; + + switch (crc8->profile) { + + case CGW_CRC8PRF_1U8: + crc = crc8->crctab[crc^crc8->profile_data[0]]; + break; + + case CGW_CRC8PRF_16U8: + crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]]; + break; + + case CGW_CRC8PRF_SFFID_XOR: + crc = crc8->crctab[crc^(cf->can_id & 0xFF)^ + (cf->can_id >> 8 & 0xFF)]; + break; + } + + cf->data[crc8->result_idx] = crc^crc8->final_xor_val; +} + +/* the receive & process & send function */ +static void can_can_gw_rcv(struct sk_buff *skb, void *data) +{ + struct cgw_job *gwj = (struct cgw_job *)data; + struct can_frame *cf; + struct sk_buff *nskb; + int modidx = 0; + + /* do not handle already routed frames - see comment below */ + if (skb_mac_header_was_set(skb)) + return; + + if (!(gwj->dst.dev->flags & IFF_UP)) { + gwj->dropped_frames++; + return; + } + + /* + * clone the given skb, which has not been done in can_rcv() + * + * When there is at least one modification function activated, + * we need to copy the skb as we want to modify skb->data. + */ + if (gwj->mod.modfunc[0]) + nskb = skb_copy(skb, GFP_ATOMIC); + else + nskb = skb_clone(skb, GFP_ATOMIC); + + if (!nskb) { + gwj->dropped_frames++; + return; + } + + /* + * Mark routed frames by setting some mac header length which is + * not relevant for the CAN frames located in the skb->data section. + * + * As dev->header_ops is not set in CAN netdevices no one is ever + * accessing the various header offsets in the CAN skbuffs anyway. + * E.g. using the packet socket to read CAN frames is still working. + */ + skb_set_mac_header(nskb, 8); + nskb->dev = gwj->dst.dev; + + /* pointer to modifiable CAN frame */ + cf = (struct can_frame *)nskb->data; + + /* perform preprocessed modification functions if there are any */ + while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) + (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod); + + /* check for checksum updates when the CAN frame has been modified */ + if (modidx) { + if (gwj->mod.csumfunc.crc8) + (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + + if (gwj->mod.csumfunc.xor) + (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); + } + + /* clear the skb timestamp if not configured the other way */ + if (!(gwj->flags & CGW_FLAGS_CAN_SRC_TSTAMP)) + nskb->tstamp.tv64 = 0; + + /* send to netdevice */ + if (can_send(nskb, gwj->flags & CGW_FLAGS_CAN_ECHO)) + gwj->dropped_frames++; + else + gwj->handled_frames++; +} + +static inline int cgw_register_filter(struct cgw_job *gwj) +{ + return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id, + gwj->ccgw.filter.can_mask, can_can_gw_rcv, + gwj, "gw"); +} + +static inline void cgw_unregister_filter(struct cgw_job *gwj) +{ + can_rx_unregister(gwj->src.dev, gwj->ccgw.filter.can_id, + gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj); +} + +static int cgw_notifier(struct notifier_block *nb, + unsigned long msg, void *data) +{ + struct net_device *dev = (struct net_device *)data; + + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + if (dev->type != ARPHRD_CAN) + return NOTIFY_DONE; + + if (msg == NETDEV_UNREGISTER) { + + struct cgw_job *gwj = NULL; + struct hlist_node *n, *nx; + + ASSERT_RTNL(); + + hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + + if (gwj->src.dev == dev || gwj->dst.dev == dev) { + hlist_del(&gwj->list); + cgw_unregister_filter(gwj); + kfree(gwj); + } + } + } + + return NOTIFY_DONE; +} + +static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj) +{ + struct cgw_frame_mod mb; + struct rtcanmsg *rtcan; + struct nlmsghdr *nlh = nlmsg_put(skb, 0, 0, 0, sizeof(*rtcan), 0); + if (!nlh) + return -EMSGSIZE; + + rtcan = nlmsg_data(nlh); + rtcan->can_family = AF_CAN; + rtcan->gwtype = gwj->gwtype; + rtcan->flags = gwj->flags; + + /* add statistics if available */ + + if (gwj->handled_frames) { + if (nla_put_u32(skb, CGW_HANDLED, gwj->handled_frames) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32)); + } + + if (gwj->dropped_frames) { + if (nla_put_u32(skb, CGW_DROPPED, gwj->dropped_frames) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32)); + } + + /* check non default settings of attributes */ + + if (gwj->mod.modtype.and) { + memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.and; + if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb)); + } + + if (gwj->mod.modtype.or) { + memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.or; + if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb)); + } + + if (gwj->mod.modtype.xor) { + memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.xor; + if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb)); + } + + if (gwj->mod.modtype.set) { + memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.set; + if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb)); + } + + if (gwj->mod.csumfunc.crc8) { + if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN, + &gwj->mod.csum.crc8) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + \ + NLA_ALIGN(CGW_CS_CRC8_LEN); + } + + if (gwj->mod.csumfunc.xor) { + if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN, + &gwj->mod.csum.xor) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + \ + NLA_ALIGN(CGW_CS_XOR_LEN); + } + + if (gwj->gwtype == CGW_TYPE_CAN_CAN) { + + if (gwj->ccgw.filter.can_id || gwj->ccgw.filter.can_mask) { + if (nla_put(skb, CGW_FILTER, sizeof(struct can_filter), + &gwj->ccgw.filter) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + + NLA_ALIGN(sizeof(struct can_filter)); + } + + if (nla_put_u32(skb, CGW_SRC_IF, gwj->ccgw.src_idx) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32)); + + if (nla_put_u32(skb, CGW_DST_IF, gwj->ccgw.dst_idx) < 0) + goto cancel; + else + nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32)); + } + + return skb->len; + +cancel: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +/* Dump information about all CAN gateway jobs, in response to RTM_GETROUTE */ +static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct cgw_job *gwj = NULL; + struct hlist_node *n; + int idx = 0; + int s_idx = cb->args[0]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(gwj, n, &cgw_list, list) { + if (idx < s_idx) + goto cont; + + if (cgw_put_job(skb, gwj) < 0) + break; +cont: + idx++; + } + rcu_read_unlock(); + + cb->args[0] = idx; + + return skb->len; +} + +/* check for common and gwtype specific attributes */ +static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, + u8 gwtype, void *gwtypeattr) +{ + struct nlattr *tb[CGW_MAX+1]; + struct cgw_frame_mod mb; + int modidx = 0; + int err = 0; + + /* initialize modification & checksum data space */ + memset(mod, 0, sizeof(*mod)); + + err = nlmsg_parse(nlh, sizeof(struct rtcanmsg), tb, CGW_MAX, NULL); + if (err < 0) + return err; + + /* check for AND/OR/XOR/SET modifications */ + + if (tb[CGW_MOD_AND] && + nla_len(tb[CGW_MOD_AND]) == CGW_MODATTR_LEN) { + nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.and, &mb.cf); + mod->modtype.and = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_and_id; + + if (mb.modtype & CGW_MOD_DLC) + mod->modfunc[modidx++] = mod_and_dlc; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_and_data; + } + + if (tb[CGW_MOD_OR] && + nla_len(tb[CGW_MOD_OR]) == CGW_MODATTR_LEN) { + nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.or, &mb.cf); + mod->modtype.or = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_or_id; + + if (mb.modtype & CGW_MOD_DLC) + mod->modfunc[modidx++] = mod_or_dlc; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_or_data; + } + + if (tb[CGW_MOD_XOR] && + nla_len(tb[CGW_MOD_XOR]) == CGW_MODATTR_LEN) { + nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.xor, &mb.cf); + mod->modtype.xor = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_xor_id; + + if (mb.modtype & CGW_MOD_DLC) + mod->modfunc[modidx++] = mod_xor_dlc; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_xor_data; + } + + if (tb[CGW_MOD_SET] && + nla_len(tb[CGW_MOD_SET]) == CGW_MODATTR_LEN) { + nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.set, &mb.cf); + mod->modtype.set = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_set_id; + + if (mb.modtype & CGW_MOD_DLC) + mod->modfunc[modidx++] = mod_set_dlc; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_set_data; + } + + /* check for checksum operations after CAN frame modifications */ + if (modidx) { + + if (tb[CGW_CS_CRC8] && + nla_len(tb[CGW_CS_CRC8]) == CGW_CS_CRC8_LEN) { + + struct cgw_csum_crc8 *c = (struct cgw_csum_crc8 *)\ + nla_data(tb[CGW_CS_CRC8]); + + err = cgw_chk_csum_parms(c->from_idx, c->to_idx, + c->result_idx); + if (err) + return err; + + nla_memcpy(&mod->csum.crc8, tb[CGW_CS_CRC8], + CGW_CS_CRC8_LEN); + + /* + * select dedicated processing function to reduce + * runtime operations in receive hot path. + */ + if (c->from_idx < 0 || c->to_idx < 0 || + c->result_idx < 0) + mod->csumfunc.crc8 = cgw_csum_crc8_rel; + else if (c->from_idx <= c->to_idx) + mod->csumfunc.crc8 = cgw_csum_crc8_pos; + else + mod->csumfunc.crc8 = cgw_csum_crc8_neg; + } + + if (tb[CGW_CS_XOR] && + nla_len(tb[CGW_CS_XOR]) == CGW_CS_XOR_LEN) { + + struct cgw_csum_xor *c = (struct cgw_csum_xor *)\ + nla_data(tb[CGW_CS_XOR]); + + err = cgw_chk_csum_parms(c->from_idx, c->to_idx, + c->result_idx); + if (err) + return err; + + nla_memcpy(&mod->csum.xor, tb[CGW_CS_XOR], + CGW_CS_XOR_LEN); + + /* + * select dedicated processing function to reduce + * runtime operations in receive hot path. + */ + if (c->from_idx < 0 || c->to_idx < 0 || + c->result_idx < 0) + mod->csumfunc.xor = cgw_csum_xor_rel; + else if (c->from_idx <= c->to_idx) + mod->csumfunc.xor = cgw_csum_xor_pos; + else + mod->csumfunc.xor = cgw_csum_xor_neg; + } + } + + if (gwtype == CGW_TYPE_CAN_CAN) { + + /* check CGW_TYPE_CAN_CAN specific attributes */ + + struct can_can_gw *ccgw = (struct can_can_gw *)gwtypeattr; + memset(ccgw, 0, sizeof(*ccgw)); + + /* check for can_filter in attributes */ + if (tb[CGW_FILTER] && + nla_len(tb[CGW_FILTER]) == sizeof(struct can_filter)) + nla_memcpy(&ccgw->filter, tb[CGW_FILTER], + sizeof(struct can_filter)); + + err = -ENODEV; + + /* specifying two interfaces is mandatory */ + if (!tb[CGW_SRC_IF] || !tb[CGW_DST_IF]) + return err; + + if (nla_len(tb[CGW_SRC_IF]) == sizeof(u32)) + nla_memcpy(&ccgw->src_idx, tb[CGW_SRC_IF], + sizeof(u32)); + + if (nla_len(tb[CGW_DST_IF]) == sizeof(u32)) + nla_memcpy(&ccgw->dst_idx, tb[CGW_DST_IF], + sizeof(u32)); + + /* both indices set to 0 for flushing all routing entries */ + if (!ccgw->src_idx && !ccgw->dst_idx) + return 0; + + /* only one index set to 0 is an error */ + if (!ccgw->src_idx || !ccgw->dst_idx) + return err; + } + + /* add the checks for other gwtypes here */ + + return 0; +} + +static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct rtcanmsg *r; + struct cgw_job *gwj; + int err = 0; + + if (nlmsg_len(nlh) < sizeof(*r)) + return -EINVAL; + + r = nlmsg_data(nlh); + if (r->can_family != AF_CAN) + return -EPFNOSUPPORT; + + /* so far we only support CAN -> CAN routings */ + if (r->gwtype != CGW_TYPE_CAN_CAN) + return -EINVAL; + + gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL); + if (!gwj) + return -ENOMEM; + + gwj->handled_frames = 0; + gwj->dropped_frames = 0; + gwj->flags = r->flags; + gwj->gwtype = r->gwtype; + + err = cgw_parse_attr(nlh, &gwj->mod, CGW_TYPE_CAN_CAN, &gwj->ccgw); + if (err < 0) + goto out; + + err = -ENODEV; + + /* ifindex == 0 is not allowed for job creation */ + if (!gwj->ccgw.src_idx || !gwj->ccgw.dst_idx) + goto out; + + gwj->src.dev = dev_get_by_index(&init_net, gwj->ccgw.src_idx); + + if (!gwj->src.dev) + goto out; + + /* check for CAN netdev not using header_ops - see gw_rcv() */ + if (gwj->src.dev->type != ARPHRD_CAN || gwj->src.dev->header_ops) + goto put_src_out; + + gwj->dst.dev = dev_get_by_index(&init_net, gwj->ccgw.dst_idx); + + if (!gwj->dst.dev) + goto put_src_out; + + /* check for CAN netdev not using header_ops - see gw_rcv() */ + if (gwj->dst.dev->type != ARPHRD_CAN || gwj->dst.dev->header_ops) + goto put_src_dst_out; + + ASSERT_RTNL(); + + err = cgw_register_filter(gwj); + if (!err) + hlist_add_head_rcu(&gwj->list, &cgw_list); + +put_src_dst_out: + dev_put(gwj->dst.dev); +put_src_out: + dev_put(gwj->src.dev); +out: + if (err) + kmem_cache_free(cgw_cache, gwj); + + return err; +} + +static void cgw_remove_all_jobs(void) +{ + struct cgw_job *gwj = NULL; + struct hlist_node *n, *nx; + + ASSERT_RTNL(); + + hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + hlist_del(&gwj->list); + cgw_unregister_filter(gwj); + kfree(gwj); + } +} + +static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct cgw_job *gwj = NULL; + struct hlist_node *n, *nx; + struct rtcanmsg *r; + struct cf_mod mod; + struct can_can_gw ccgw; + int err = 0; + + if (nlmsg_len(nlh) < sizeof(*r)) + return -EINVAL; + + r = nlmsg_data(nlh); + if (r->can_family != AF_CAN) + return -EPFNOSUPPORT; + + /* so far we only support CAN -> CAN routings */ + if (r->gwtype != CGW_TYPE_CAN_CAN) + return -EINVAL; + + err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw); + if (err < 0) + return err; + + /* two interface indices both set to 0 => remove all entries */ + if (!ccgw.src_idx && !ccgw.dst_idx) { + cgw_remove_all_jobs(); + return 0; + } + + err = -EINVAL; + + ASSERT_RTNL(); + + /* remove only the first matching entry */ + hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + + if (gwj->flags != r->flags) + continue; + + if (memcmp(&gwj->mod, &mod, sizeof(mod))) + continue; + + /* if (r->gwtype == CGW_TYPE_CAN_CAN) - is made sure here */ + if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) + continue; + + hlist_del(&gwj->list); + cgw_unregister_filter(gwj); + kfree(gwj); + err = 0; + break; + } + + return err; +} + +static __init int cgw_module_init(void) +{ + printk(banner); + + cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job), + 0, 0, NULL); + + if (!cgw_cache) + return -ENOMEM; + + /* set notifier */ + notifier.notifier_call = cgw_notifier; + register_netdevice_notifier(¬ifier); + + if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, NULL)) { + unregister_netdevice_notifier(¬ifier); + kmem_cache_destroy(cgw_cache); + return -ENOBUFS; + } + + /* Only the first call to __rtnl_register can fail */ + __rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, NULL); + __rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, NULL); + + return 0; +} + +static __exit void cgw_module_exit(void) +{ + rtnl_unregister_all(PF_CAN); + + unregister_netdevice_notifier(¬ifier); + + rtnl_lock(); + cgw_remove_all_jobs(); + rtnl_unlock(); + + rcu_barrier(); /* Wait for completion of call_rcu()'s */ + + kmem_cache_destroy(cgw_cache); +} + +module_init(cgw_module_init); +module_exit(cgw_module_exit); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 132963a..2883ea0 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -232,6 +232,7 @@ void ceph_destroy_options(struct ceph_options *opt) ceph_crypto_key_destroy(opt->key); kfree(opt->key); } + kfree(opt->mon_addr); kfree(opt); } EXPORT_SYMBOL(ceph_destroy_options); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index c340e2e..9918e9e 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2307,6 +2307,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) m->front_max = front_len; m->front_is_vmalloc = false; m->more_to_follow = false; + m->ack_stamp = 0; m->pool = NULL; /* middle */ diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c index d5f2d97..1f4cb30 100644 --- a/net/ceph/msgpool.c +++ b/net/ceph/msgpool.c @@ -7,27 +7,37 @@ #include <linux/ceph/msgpool.h> -static void *alloc_fn(gfp_t gfp_mask, void *arg) +static void *msgpool_alloc(gfp_t gfp_mask, void *arg) { struct ceph_msgpool *pool = arg; - void *p; + struct ceph_msg *msg; - p = ceph_msg_new(0, pool->front_len, gfp_mask); - if (!p) - pr_err("msgpool %s alloc failed\n", pool->name); - return p; + msg = ceph_msg_new(0, pool->front_len, gfp_mask); + if (!msg) { + dout("msgpool_alloc %s failed\n", pool->name); + } else { + dout("msgpool_alloc %s %p\n", pool->name, msg); + msg->pool = pool; + } + return msg; } -static void free_fn(void *element, void *arg) +static void msgpool_free(void *element, void *arg) { - ceph_msg_put(element); + struct ceph_msgpool *pool = arg; + struct ceph_msg *msg = element; + + dout("msgpool_release %s %p\n", pool->name, msg); + msg->pool = NULL; + ceph_msg_put(msg); } int ceph_msgpool_init(struct ceph_msgpool *pool, int front_len, int size, bool blocking, const char *name) { + dout("msgpool %s init\n", name); pool->front_len = front_len; - pool->pool = mempool_create(size, alloc_fn, free_fn, pool); + pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool); if (!pool->pool) return -ENOMEM; pool->name = name; @@ -36,14 +46,17 @@ int ceph_msgpool_init(struct ceph_msgpool *pool, void ceph_msgpool_destroy(struct ceph_msgpool *pool) { + dout("msgpool %s destroy\n", pool->name); mempool_destroy(pool->pool); } struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) { + struct ceph_msg *msg; + if (front_len > pool->front_len) { - pr_err("msgpool_get pool %s need front %d, pool size is %d\n", + dout("msgpool_get %s need front %d, pool size is %d\n", pool->name, front_len, pool->front_len); WARN_ON(1); @@ -51,14 +64,19 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, return ceph_msg_new(0, front_len, GFP_NOFS); } - return mempool_alloc(pool->pool, GFP_NOFS); + msg = mempool_alloc(pool->pool, GFP_NOFS); + dout("msgpool_get %s %p\n", pool->name, msg); + return msg; } void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) { + dout("msgpool_put %s %p\n", pool->name, msg); + /* reset msg front_len; user may have changed it */ msg->front.iov_len = pool->front_len; msg->hdr.front_len = cpu_to_le32(pool->front_len); kref_init(&msg->kref); /* retake single ref */ + mempool_free(msg, pool->pool); } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ce310ee..88ad8a2 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -217,6 +217,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_linger_item); INIT_LIST_HEAD(&req->r_linger_osd); + INIT_LIST_HEAD(&req->r_req_lru_item); req->r_flags = flags; WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); @@ -685,6 +686,18 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) put_osd(osd); } +static void remove_all_osds(struct ceph_osd_client *osdc) +{ + dout("__remove_old_osds %p\n", osdc); + mutex_lock(&osdc->request_mutex); + while (!RB_EMPTY_ROOT(&osdc->osds)) { + struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), + struct ceph_osd, o_node); + __remove_osd(osdc, osd); + } + mutex_unlock(&osdc->request_mutex); +} + static void __move_osd_to_lru(struct ceph_osd_client *osdc, struct ceph_osd *osd) { @@ -701,14 +714,14 @@ static void __remove_osd_from_lru(struct ceph_osd *osd) list_del_init(&osd->o_osd_lru); } -static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all) +static void remove_old_osds(struct ceph_osd_client *osdc) { struct ceph_osd *osd, *nosd; dout("__remove_old_osds %p\n", osdc); mutex_lock(&osdc->request_mutex); list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { - if (!remove_all && time_before(jiffies, osd->lru_ttl)) + if (time_before(jiffies, osd->lru_ttl)) break; __remove_osd(osdc, osd); } @@ -751,6 +764,7 @@ static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new) struct rb_node *parent = NULL; struct ceph_osd *osd = NULL; + dout("__insert_osd %p osd%d\n", new, new->o_osd); while (*p) { parent = *p; osd = rb_entry(parent, struct ceph_osd, o_node); @@ -803,13 +817,10 @@ static void __register_request(struct ceph_osd_client *osdc, { req->r_tid = ++osdc->last_tid; req->r_request->hdr.tid = cpu_to_le64(req->r_tid); - INIT_LIST_HEAD(&req->r_req_lru_item); - dout("__register_request %p tid %lld\n", req, req->r_tid); __insert_request(osdc, req); ceph_osdc_get_request(req); osdc->num_requests++; - if (osdc->num_requests == 1) { dout(" first request, scheduling timeout\n"); __schedule_osd_timeout(osdc); @@ -1144,7 +1155,7 @@ static void handle_osds_timeout(struct work_struct *work) dout("osds timeout\n"); down_read(&osdc->map_sem); - remove_old_osds(osdc, 0); + remove_old_osds(osdc); up_read(&osdc->map_sem); schedule_delayed_work(&osdc->osds_timeout_work, @@ -1862,8 +1873,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) ceph_osdmap_destroy(osdc->osdmap); osdc->osdmap = NULL; } - remove_old_osds(osdc, 1); - WARN_ON(!RB_EMPTY_ROOT(&osdc->osds)); + remove_all_osds(osdc); mempool_destroy(osdc->req_mempool); ceph_msgpool_destroy(&osdc->msgpool_op); ceph_msgpool_destroy(&osdc->msgpool_op_reply); diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index e97c358..fd863fe7 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -339,6 +339,7 @@ static int __insert_pg_mapping(struct ceph_pg_mapping *new, struct ceph_pg_mapping *pg = NULL; int c; + dout("__insert_pg_mapping %llx %p\n", *(u64 *)&new->pgid, new); while (*p) { parent = *p; pg = rb_entry(parent, struct ceph_pg_mapping, node); @@ -366,16 +367,33 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root, while (n) { pg = rb_entry(n, struct ceph_pg_mapping, node); c = pgid_cmp(pgid, pg->pgid); - if (c < 0) + if (c < 0) { n = n->rb_left; - else if (c > 0) + } else if (c > 0) { n = n->rb_right; - else + } else { + dout("__lookup_pg_mapping %llx got %p\n", + *(u64 *)&pgid, pg); return pg; + } } return NULL; } +static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid) +{ + struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid); + + if (pg) { + dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg); + rb_erase(&pg->node, root); + kfree(pg); + return 0; + } + dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid); + return -ENOENT; +} + /* * rbtree of pg pool info */ @@ -711,7 +729,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, void *start = *p; int err = -EINVAL; u16 version; - struct rb_node *rbp; ceph_decode_16_safe(p, end, version, bad); if (version > CEPH_OSDMAP_INC_VERSION) { @@ -861,7 +878,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, } /* new_pg_temp */ - rbp = rb_first(&map->pg_temp); ceph_decode_32_safe(p, end, len, bad); while (len--) { struct ceph_pg_mapping *pg; @@ -872,18 +888,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ceph_decode_copy(p, &pgid, sizeof(pgid)); pglen = ceph_decode_32(p); - /* remove any? */ - while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping, - node)->pgid, pgid) <= 0) { - struct ceph_pg_mapping *cur = - rb_entry(rbp, struct ceph_pg_mapping, node); - - rbp = rb_next(rbp); - dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); - rb_erase(&cur->node, &map->pg_temp); - kfree(cur); - } - if (pglen) { /* insert */ ceph_decode_need(p, end, pglen*sizeof(u32), bad); @@ -903,17 +907,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, } dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, pglen); + } else { + /* remove */ + __remove_pg_mapping(&map->pg_temp, pgid); } } - while (rbp) { - struct ceph_pg_mapping *cur = - rb_entry(rbp, struct ceph_pg_mapping, node); - - rbp = rb_next(rbp); - dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); - rb_erase(&cur->node, &map->pg_temp); - kfree(cur); - } /* ignore the rest */ *p = end; @@ -1046,10 +1044,25 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, struct ceph_pg_mapping *pg; struct ceph_pg_pool_info *pool; int ruleno; - unsigned poolid, ps, pps; + unsigned poolid, ps, pps, t; int preferred; + poolid = le32_to_cpu(pgid.pool); + ps = le16_to_cpu(pgid.ps); + preferred = (s16)le16_to_cpu(pgid.preferred); + + pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); + if (!pool) + return NULL; + /* pg_temp? */ + if (preferred >= 0) + t = ceph_stable_mod(ps, le32_to_cpu(pool->v.lpg_num), + pool->lpgp_num_mask); + else + t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), + pool->pgp_num_mask); + pgid.ps = cpu_to_le16(t); pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); if (pg) { *num = pg->len; @@ -1057,18 +1070,6 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, } /* crush */ - poolid = le32_to_cpu(pgid.pool); - ps = le16_to_cpu(pgid.ps); - preferred = (s16)le16_to_cpu(pgid.preferred); - - /* don't forcefeed bad device ids to crush */ - if (preferred >= osdmap->max_osd || - preferred >= osdmap->crush->max_devices) - preferred = -1; - - pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); - if (!pool) - return NULL; ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, pool->v.type, pool->v.size); if (ruleno < 0) { @@ -1078,6 +1079,11 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, return NULL; } + /* don't forcefeed bad device ids to crush */ + if (preferred >= osdmap->max_osd || + preferred >= osdmap->crush->max_devices) + preferred = -1; + if (preferred >= 0) pps = ceph_stable_mod(ps, le32_to_cpu(pool->v.lpgp_num), diff --git a/net/core/Makefile b/net/core/Makefile index 8a04dd2..0d357b1 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -3,7 +3,7 @@ # obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o net_namespace.o + gen_stats.o gen_estimator.o net_namespace.o secure_seq.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff --git a/net/core/datagram.c b/net/core/datagram.c index 18ac112..6449bed 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -332,7 +332,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, int err; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; @@ -418,7 +418,7 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, int err; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; @@ -508,7 +508,7 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, int err; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; @@ -594,7 +594,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, int err = 0; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; diff --git a/net/core/dev.c b/net/core/dev.c index 17d67b5..70ecb86 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -133,6 +133,9 @@ #include <linux/pci.h> #include <linux/inetdevice.h> #include <linux/cpu_rmap.h> +#include <linux/if_tunnel.h> +#include <linux/if_pppox.h> +#include <linux/ppp_defs.h> #include "net-sysfs.h" @@ -1515,6 +1518,14 @@ static inline bool is_skb_forwardable(struct net_device *dev, */ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, GFP_ATOMIC)) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + } + skb_orphan(skb); nf_reset(skb); @@ -1947,9 +1958,11 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) #ifdef CONFIG_HIGHMEM int i; if (!(dev->features & NETIF_F_HIGHDMA)) { - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - if (PageHighMem(skb_shinfo(skb)->frags[i].page)) + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + if (PageHighMem(skb_frag_page(frag))) return 1; + } } if (PCI_DMA_BUS_IS_PHYS) { @@ -1958,7 +1971,8 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) if (!pdev) return 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page); + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + dma_addr_t addr = page_to_phys(skb_frag_page(frag)); if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask) return 1; } @@ -2519,25 +2533,31 @@ static inline void ____napi_schedule(struct softnet_data *sd, /* * __skb_get_rxhash: calculate a flow hash based on src/dst addresses - * and src/dst port numbers. Returns a non-zero hash number on success - * and 0 on failure. + * and src/dst port numbers. Sets rxhash in skb to non-zero hash value + * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb + * if hash is a canonical 4-tuple hash over transport ports. */ -__u32 __skb_get_rxhash(struct sk_buff *skb) +void __skb_get_rxhash(struct sk_buff *skb) { int nhoff, hash = 0, poff; const struct ipv6hdr *ip6; const struct iphdr *ip; + const struct vlan_hdr *vlan; u8 ip_proto; - u32 addr1, addr2, ihl; + u32 addr1, addr2; + u16 proto; union { u32 v32; u16 v16[2]; } ports; nhoff = skb_network_offset(skb); + proto = skb->protocol; - switch (skb->protocol) { +again: + switch (proto) { case __constant_htons(ETH_P_IP): +ip: if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) goto done; @@ -2548,9 +2568,10 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) ip_proto = ip->protocol; addr1 = (__force u32) ip->saddr; addr2 = (__force u32) ip->daddr; - ihl = ip->ihl; + nhoff += ip->ihl * 4; break; case __constant_htons(ETH_P_IPV6): +ipv6: if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) goto done; @@ -2558,20 +2579,71 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) ip_proto = ip6->nexthdr; addr1 = (__force u32) ip6->saddr.s6_addr32[3]; addr2 = (__force u32) ip6->daddr.s6_addr32[3]; - ihl = (40 >> 2); + nhoff += 40; break; + case __constant_htons(ETH_P_8021Q): + if (!pskb_may_pull(skb, sizeof(*vlan) + nhoff)) + goto done; + vlan = (const struct vlan_hdr *) (skb->data + nhoff); + proto = vlan->h_vlan_encapsulated_proto; + nhoff += sizeof(*vlan); + goto again; + case __constant_htons(ETH_P_PPP_SES): + if (!pskb_may_pull(skb, PPPOE_SES_HLEN + nhoff)) + goto done; + proto = *((__be16 *) (skb->data + nhoff + + sizeof(struct pppoe_hdr))); + nhoff += PPPOE_SES_HLEN; + switch (proto) { + case __constant_htons(PPP_IP): + goto ip; + case __constant_htons(PPP_IPV6): + goto ipv6; + default: + goto done; + } default: goto done; } + switch (ip_proto) { + case IPPROTO_GRE: + if (pskb_may_pull(skb, nhoff + 16)) { + u8 *h = skb->data + nhoff; + __be16 flags = *(__be16 *)h; + + /* + * Only look inside GRE if version zero and no + * routing + */ + if (!(flags & (GRE_VERSION|GRE_ROUTING))) { + proto = *(__be16 *)(h + 2); + nhoff += 4; + if (flags & GRE_CSUM) + nhoff += 4; + if (flags & GRE_KEY) + nhoff += 4; + if (flags & GRE_SEQ) + nhoff += 4; + goto again; + } + } + break; + case IPPROTO_IPIP: + goto again; + default: + break; + } + ports.v32 = 0; poff = proto_ports_offset(ip_proto); if (poff >= 0) { - nhoff += ihl * 4 + poff; + nhoff += poff; if (pskb_may_pull(skb, nhoff + 4)) { ports.v32 = * (__force u32 *) (skb->data + nhoff); if (ports.v16[1] < ports.v16[0]) swap(ports.v16[0], ports.v16[1]); + skb->l4_rxhash = 1; } } @@ -2584,7 +2656,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) hash = 1; done: - return hash; + skb->rxhash = hash; } EXPORT_SYMBOL(__skb_get_rxhash); @@ -2598,10 +2670,7 @@ static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow *rflow, u16 next_cpu) { - u16 tcpu; - - tcpu = rflow->cpu = next_cpu; - if (tcpu != RPS_NO_CPU) { + if (next_cpu != RPS_NO_CPU) { #ifdef CONFIG_RFS_ACCEL struct netdev_rx_queue *rxqueue; struct rps_dev_flow_table *flow_table; @@ -2629,16 +2698,16 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, goto out; old_rflow = rflow; rflow = &flow_table->flows[flow_id]; - rflow->cpu = next_cpu; rflow->filter = rc; if (old_rflow->filter == rflow->filter) old_rflow->filter = RPS_NO_FILTER; out: #endif rflow->last_qtail = - per_cpu(softnet_data, tcpu).input_queue_head; + per_cpu(softnet_data, next_cpu).input_queue_head; } + rflow->cpu = next_cpu; return rflow; } @@ -2673,13 +2742,13 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, map = rcu_dereference(rxqueue->rps_map); if (map) { if (map->len == 1 && - !rcu_dereference_raw(rxqueue->rps_flow_table)) { + !rcu_access_pointer(rxqueue->rps_flow_table)) { tcpu = map->cpus[0]; if (cpu_online(tcpu)) cpu = tcpu; goto done; } - } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) { + } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) { goto done; } @@ -3094,8 +3163,8 @@ void netdev_rx_handler_unregister(struct net_device *dev) { ASSERT_RTNL(); - rcu_assign_pointer(dev->rx_handler, NULL); - rcu_assign_pointer(dev->rx_handler_data, NULL); + RCU_INIT_POINTER(dev->rx_handler, NULL); + RCU_INIT_POINTER(dev->rx_handler_data, NULL); } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); @@ -3187,10 +3256,9 @@ ncls: ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } - if (vlan_do_receive(&skb)) { - ret = __netif_receive_skb(skb); - goto out; - } else if (unlikely(!skb)) + if (vlan_do_receive(&skb)) + goto another_round; + else if (unlikely(!skb)) goto out; } @@ -3424,7 +3492,7 @@ pull: skb_shinfo(skb)->frags[0].size -= grow; if (unlikely(!skb_shinfo(skb)->frags[0].size)) { - put_page(skb_shinfo(skb)->frags[0].page); + skb_frag_unref(skb, 0); memmove(skb_shinfo(skb)->frags, skb_shinfo(skb)->frags + 1, --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); @@ -3488,10 +3556,9 @@ void skb_gro_reset_offset(struct sk_buff *skb) NAPI_GRO_CB(skb)->frag0_len = 0; if (skb->mac_header == skb->tail && - !PageHighMem(skb_shinfo(skb)->frags[0].page)) { + !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) { NAPI_GRO_CB(skb)->frag0 = - page_address(skb_shinfo(skb)->frags[0].page) + - skb_shinfo(skb)->frags[0].page_offset; + skb_frag_address(&skb_shinfo(skb)->frags[0]); NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size; } } @@ -4489,9 +4556,7 @@ void __dev_set_rx_mode(struct net_device *dev) if (!netif_device_present(dev)) return; - if (ops->ndo_set_rx_mode) - ops->ndo_set_rx_mode(dev); - else { + if (!(dev->priv_flags & IFF_UNICAST_FLT)) { /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. */ @@ -4502,10 +4567,10 @@ void __dev_set_rx_mode(struct net_device *dev) __dev_set_promiscuity(dev, -1); dev->uc_promisc = false; } - - if (ops->ndo_set_multicast_list) - ops->ndo_set_multicast_list(dev); } + + if (ops->ndo_set_rx_mode) + ops->ndo_set_rx_mode(dev); } void dev_set_rx_mode(struct net_device *dev) @@ -4516,30 +4581,6 @@ void dev_set_rx_mode(struct net_device *dev) } /** - * dev_ethtool_get_settings - call device's ethtool_ops::get_settings() - * @dev: device - * @cmd: memory area for ethtool_ops::get_settings() result - * - * The cmd arg is initialized properly (cleared and - * ethtool_cmd::cmd field set to ETHTOOL_GSET). - * - * Return device's ethtool_ops::get_settings() result value or - * -EOPNOTSUPP when device doesn't expose - * ethtool_ops::get_settings() operation. - */ -int dev_ethtool_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) -{ - if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) - return -EOPNOTSUPP; - - memset(cmd, 0, sizeof(struct ethtool_cmd)); - cmd->cmd = ETHTOOL_GSET; - return dev->ethtool_ops->get_settings(dev, cmd); -} -EXPORT_SYMBOL(dev_ethtool_get_settings); - -/** * dev_get_flags - get flags reported to userspace * @dev: device * @@ -4855,7 +4896,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return -EOPNOTSUPP; case SIOCADDMULTI: - if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || + if (!ops->ndo_set_rx_mode || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -4863,7 +4904,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); case SIOCDELMULTI: - if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || + if (!ops->ndo_set_rx_mode || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -5727,8 +5768,8 @@ void netdev_run_todo(void) /* paranoia */ BUG_ON(netdev_refcnt_read(dev)); - WARN_ON(rcu_dereference_raw(dev->ip_ptr)); - WARN_ON(rcu_dereference_raw(dev->ip6_ptr)); + WARN_ON(rcu_access_pointer(dev->ip_ptr)); + WARN_ON(rcu_access_pointer(dev->ip6_ptr)); WARN_ON(dev->dn_ptr); if (dev->destructor) @@ -5932,7 +5973,7 @@ void free_netdev(struct net_device *dev) kfree(dev->_rx); #endif - kfree(rcu_dereference_raw(dev->ingress_queue)); + kfree(rcu_dereference_protected(dev->ingress_queue, 1)); /* Flush device addresses */ dev_addr_flush(dev); diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index e2e6693..283d1b8 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -591,8 +591,8 @@ EXPORT_SYMBOL(dev_mc_del_global); * addresses that have no users left. The source device must be * locked by netif_tx_lock_bh. * - * This function is intended to be called from the dev->set_multicast_list - * or dev->set_rx_mode function of layered software devices. + * This function is intended to be called from the ndo_set_rx_mode + * function of layered software devices. */ int dev_mc_sync(struct net_device *to, struct net_device *from) { diff --git a/net/core/dst.c b/net/core/dst.c index 14b33baf..d5e2c4c 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -171,7 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst_init_metrics(dst, dst_default_metrics, true); dst->expires = 0UL; dst->path = dst; - dst->_neighbour = NULL; + RCU_INIT_POINTER(dst->_neighbour, NULL); #ifdef CONFIG_XFRM dst->xfrm = NULL; #endif @@ -229,11 +229,11 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) smp_rmb(); again: - neigh = dst->_neighbour; + neigh = rcu_dereference_protected(dst->_neighbour, 1); child = dst->child; if (neigh) { - dst->_neighbour = NULL; + RCU_INIT_POINTER(dst->_neighbour, NULL); neigh_release(neigh); } @@ -360,14 +360,19 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { + struct neighbour *neigh; + dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); - if (dst->_neighbour && dst->_neighbour->dev == dev) { - dst->_neighbour->dev = dst->dev; + rcu_read_lock(); + neigh = dst_get_neighbour(dst); + if (neigh && neigh->dev == dev) { + neigh->dev = dst->dev; dev_hold(dst->dev); dev_put(dev); } + rcu_read_unlock(); } } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 6cdba5f..f444817 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -569,15 +569,25 @@ int __ethtool_set_flags(struct net_device *dev, u32 data) return 0; } -static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) +int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; - int err; + ASSERT_RTNL(); - if (!dev->ethtool_ops->get_settings) + if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) return -EOPNOTSUPP; - err = dev->ethtool_ops->get_settings(dev, &cmd); + memset(cmd, 0, sizeof(struct ethtool_cmd)); + cmd->cmd = ETHTOOL_GSET; + return dev->ethtool_ops->get_settings(dev, cmd); +} +EXPORT_SYMBOL(__ethtool_get_settings); + +static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) +{ + int err; + struct ethtool_cmd cmd; + + err = __ethtool_get_settings(dev, &cmd); if (err < 0) return err; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e7ab0c0..38be474 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -384,8 +384,8 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) */ list_for_each_entry(r, &ops->rules_list, list) { if (r->action == FR_ACT_GOTO && - r->target == rule->pref) { - BUG_ON(rtnl_dereference(r->ctarget) != NULL); + r->target == rule->pref && + rtnl_dereference(r->ctarget) == NULL) { rcu_assign_pointer(r->ctarget, rule); if (--ops->unresolved_rules == 0) break; @@ -487,7 +487,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (ops->nr_goto_rules > 0) { list_for_each_entry(tmp, &ops->rules_list, list) { if (rtnl_dereference(tmp->ctarget) == rule) { - rcu_assign_pointer(tmp->ctarget, NULL); + RCU_INIT_POINTER(tmp->ctarget, NULL); ops->unresolved_rules++; } } @@ -545,7 +545,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh->flags = rule->flags; if (rule->action == FR_ACT_GOTO && - rcu_dereference_raw(rule->ctarget) == NULL) + rcu_access_pointer(rule->ctarget) == NULL) frh->flags |= FIB_RULE_UNRESOLVED; if (rule->iifname[0]) { diff --git a/net/core/filter.c b/net/core/filter.c index 36f975f..8fcc2d7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -645,7 +645,7 @@ int sk_detach_filter(struct sock *sk) filter = rcu_dereference_protected(sk->sk_filter, sock_owned_by_user(sk)); if (filter) { - rcu_assign_pointer(sk->sk_filter, NULL); + RCU_INIT_POINTER(sk->sk_filter, NULL); sk_filter_uncharge(sk, filter); ret = 0; } diff --git a/net/core/flow.c b/net/core/flow.c index bf32c33..555a456 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -30,6 +30,7 @@ struct flow_cache_entry { struct hlist_node hlist; struct list_head gc_list; } u; + struct net *net; u16 family; u8 dir; u32 genid; @@ -172,29 +173,26 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, - const struct flowi *key) + const struct flowi *key, + size_t keysize) { const u32 *k = (const u32 *) key; + const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); - return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) + return jhash2(k, length, fcp->hash_rnd) & (flow_cache_hash_size(fc) - 1); } -typedef unsigned long flow_compare_t; - /* I hear what you're saying, use memcmp. But memcmp cannot make - * important assumptions that we can here, such as alignment and - * constant size. + * important assumptions that we can here, such as alignment. */ -static int flow_key_compare(const struct flowi *key1, const struct flowi *key2) +static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, + size_t keysize) { const flow_compare_t *k1, *k1_lim, *k2; - const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); - - BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); k1 = (const flow_compare_t *) key1; - k1_lim = k1 + n_elem; + k1_lim = k1 + keysize; k2 = (const flow_compare_t *) key2; @@ -215,6 +213,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, struct flow_cache_entry *fle, *tfle; struct hlist_node *entry; struct flow_cache_object *flo; + size_t keysize; unsigned int hash; local_bh_disable(); @@ -222,6 +221,11 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, fle = NULL; flo = NULL; + + keysize = flow_key_size(family); + if (!keysize) + goto nocache; + /* Packet really early in init? Making flow_cache_init a * pre-smp initcall would solve this. --RR */ if (!fcp->hash_table) @@ -230,11 +234,12 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, if (fcp->hash_rnd_recalc) flow_new_hash_rnd(fc, fcp); - hash = flow_hash_code(fc, fcp, key); + hash = flow_hash_code(fc, fcp, key, keysize); hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { - if (tfle->family == family && + if (tfle->net == net && + tfle->family == family && tfle->dir == dir && - flow_key_compare(key, &tfle->key) == 0) { + flow_key_compare(key, &tfle->key, keysize) == 0) { fle = tfle; break; } @@ -246,9 +251,10 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); if (fle) { + fle->net = net; fle->family = family; fle->dir = dir; - memcpy(&fle->key, key, sizeof(*key)); + memcpy(&fle->key, key, keysize * sizeof(flow_compare_t)); fle->object = NULL; hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); fcp->hash_count++; diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h index 283c2b99..81e1ed7 100644 --- a/net/core/kmap_skb.h +++ b/net/core/kmap_skb.h @@ -7,7 +7,7 @@ static inline void *kmap_skb_frag(const skb_frag_t *frag) local_bh_disable(); #endif - return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); + return kmap_atomic(skb_frag_page(frag), KM_SKB_DATA_SOFTIRQ); } static inline void kunmap_skb_frag(void *vaddr) diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 357bd4e..c3519c6 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -78,8 +78,13 @@ static void rfc2863_policy(struct net_device *dev) static bool linkwatch_urgent_event(struct net_device *dev) { - return netif_running(dev) && netif_carrier_ok(dev) && - qdisc_tx_changing(dev); + if (!netif_running(dev)) + return false; + + if (dev->ifindex != dev->iflink) + return true; + + return netif_carrier_ok(dev) && qdisc_tx_changing(dev); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8fab9b0..4344964 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -844,6 +844,19 @@ static void neigh_invalidate(struct neighbour *neigh) skb_queue_purge(&neigh->arp_queue); } +static void neigh_probe(struct neighbour *neigh) + __releases(neigh->lock) +{ + struct sk_buff *skb = skb_peek(&neigh->arp_queue); + /* keep skb alive even if arp_queue overflows */ + if (skb) + skb = skb_copy(skb, GFP_ATOMIC); + write_unlock(&neigh->lock); + neigh->ops->solicit(neigh, skb); + atomic_inc(&neigh->probes); + kfree_skb(skb); +} + /* Called when a timer expires for a neighbour entry. */ static void neigh_timer_handler(unsigned long arg) @@ -920,14 +933,7 @@ static void neigh_timer_handler(unsigned long arg) neigh_hold(neigh); } if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { - struct sk_buff *skb = skb_peek(&neigh->arp_queue); - /* keep skb alive even if arp_queue overflows */ - if (skb) - skb = skb_copy(skb, GFP_ATOMIC); - write_unlock(&neigh->lock); - neigh->ops->solicit(neigh, skb); - atomic_inc(&neigh->probes); - kfree_skb(skb); + neigh_probe(neigh); } else { out: write_unlock(&neigh->lock); @@ -942,7 +948,7 @@ out: int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { int rc; - unsigned long now; + bool immediate_probe = false; write_lock_bh(&neigh->lock); @@ -950,14 +956,16 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) goto out_unlock_bh; - now = jiffies; - if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { if (neigh->parms->mcast_probes + neigh->parms->app_probes) { + unsigned long next, now = jiffies; + atomic_set(&neigh->probes, neigh->parms->ucast_probes); neigh->nud_state = NUD_INCOMPLETE; - neigh->updated = jiffies; - neigh_add_timer(neigh, now + 1); + neigh->updated = now; + next = now + max(neigh->parms->retrans_time, HZ/2); + neigh_add_timer(neigh, next); + immediate_probe = true; } else { neigh->nud_state = NUD_FAILED; neigh->updated = jiffies; @@ -989,7 +997,11 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) rc = 1; } out_unlock_bh: - write_unlock_bh(&neigh->lock); + if (immediate_probe) + neigh_probe(neigh); + else + write_unlock(&neigh->lock); + local_bh_enable(); return rc; } EXPORT_SYMBOL(__neigh_event_send); @@ -1319,11 +1331,15 @@ static void neigh_proxy_process(unsigned long arg) if (tdif <= 0) { struct net_device *dev = skb->dev; + __skb_unlink(skb, &tbl->proxy_queue); - if (tbl->proxy_redo && netif_running(dev)) + if (tbl->proxy_redo && netif_running(dev)) { + rcu_read_lock(); tbl->proxy_redo(skb); - else + rcu_read_unlock(); + } else { kfree_skb(skb); + } dev_put(dev); } else if (!sched_next || tdif < sched_next) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 1683e5d..7604a63 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -147,7 +147,7 @@ static ssize_t show_speed(struct device *dev, if (netif_running(netdev)) { struct ethtool_cmd cmd; - if (!dev_ethtool_get_settings(netdev, &cmd)) + if (!__ethtool_get_settings(netdev, &cmd)) ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd)); } rtnl_unlock(); @@ -165,7 +165,7 @@ static ssize_t show_duplex(struct device *dev, if (netif_running(netdev)) { struct ethtool_cmd cmd; - if (!dev_ethtool_get_settings(netdev, &cmd)) + if (!__ethtool_get_settings(netdev, &cmd)) ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half"); } @@ -712,13 +712,13 @@ static void rx_queue_release(struct kobject *kobj) struct rps_dev_flow_table *flow_table; - map = rcu_dereference_raw(queue->rps_map); + map = rcu_dereference_protected(queue->rps_map, 1); if (map) { RCU_INIT_POINTER(queue->rps_map, NULL); kfree_rcu(map, rcu); } - flow_table = rcu_dereference_raw(queue->rps_flow_table); + flow_table = rcu_dereference_protected(queue->rps_flow_table, 1); if (flow_table) { RCU_INIT_POINTER(queue->rps_flow_table, NULL); call_rcu(&flow_table->rcu, rps_dev_flow_table_release); @@ -987,10 +987,10 @@ static ssize_t store_xps_map(struct netdev_queue *queue, } if (nonempty) - rcu_assign_pointer(dev->xps_maps, new_dev_maps); + RCU_INIT_POINTER(dev->xps_maps, new_dev_maps); else { kfree(new_dev_maps); - rcu_assign_pointer(dev->xps_maps, NULL); + RCU_INIT_POINTER(dev->xps_maps, NULL); } if (dev_maps) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index adf84dd..f57d946 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -558,13 +558,14 @@ int __netpoll_rx(struct sk_buff *skb) if (skb_shared(skb)) goto out; - iph = (struct iphdr *)skb->data; if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; + iph = (struct iphdr *)skb->data; if (iph->ihl < 5 || iph->version != 4) goto out; if (!pskb_may_pull(skb, iph->ihl*4)) goto out; + iph = (struct iphdr *)skb->data; if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) goto out; @@ -579,6 +580,7 @@ int __netpoll_rx(struct sk_buff *skb) if (pskb_trim_rcsum(skb, len)) goto out; + iph = (struct iphdr *)skb->data; if (iph->protocol != IPPROTO_UDP) goto out; @@ -760,7 +762,7 @@ int __netpoll_setup(struct netpoll *np) } /* last thing to do is link it to the net device structure */ - rcu_assign_pointer(ndev->npinfo, npinfo); + RCU_INIT_POINTER(ndev->npinfo, npinfo); return 0; @@ -901,7 +903,7 @@ void __netpoll_cleanup(struct netpoll *np) if (ops->ndo_netpoll_cleanup) ops->ndo_netpoll_cleanup(np->dev); - rcu_assign_pointer(np->dev->npinfo, NULL); + RCU_INIT_POINTER(np->dev->npinfo, NULL); /* avoid racing with NAPI reading npinfo */ synchronize_rcu_bh(); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index e35a6fb..796044a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2602,8 +2602,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, if (!pkt_dev->page) break; } - skb_shinfo(skb)->frags[i].page = pkt_dev->page; - get_page(pkt_dev->page); + skb_frag_set_page(skb, i, pkt_dev->page); skb_shinfo(skb)->frags[i].page_offset = 0; /*last fragment, fill rest of data*/ if (i == (frags - 1)) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 99d9e95..39f8dd6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1604,7 +1604,6 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, dev_net_set(dev, net); dev->rtnl_link_ops = ops; dev->rtnl_link_state = RTNL_LINK_INITIALIZING; - dev->real_num_tx_queues = real_num_queues; if (tb[IFLA_MTU]) dev->mtu = nla_get_u32(tb[IFLA_MTU]); diff --git a/net/core/scm.c b/net/core/scm.c index 4c1ef02..ff52ad0 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -173,7 +173,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) if (err) goto error; - if (pid_vnr(p->pid) != p->creds.pid) { + if (!p->pid || pid_vnr(p->pid) != p->creds.pid) { struct pid *pid; err = -ESRCH; pid = find_get_pid(p->creds.pid); @@ -183,8 +183,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) p->pid = pid; } - if ((p->cred->euid != p->creds.uid) || - (p->cred->egid != p->creds.gid)) { + if (!p->cred || + (p->cred->euid != p->creds.uid) || + (p->cred->egid != p->creds.gid)) { struct cred *cred; err = -ENOMEM; cred = prepare_creds(); @@ -192,8 +193,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) goto error; cred->uid = cred->euid = p->creds.uid; - cred->gid = cred->egid = p->creds.uid; - put_cred(p->cred); + cred->gid = cred->egid = p->creds.gid; + if (p->cred) + put_cred(p->cred); p->cred = cred; } break; diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c new file mode 100644 index 0000000..45329d7 --- /dev/null +++ b/net/core/secure_seq.c @@ -0,0 +1,184 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/cryptohash.h> +#include <linux/module.h> +#include <linux/cache.h> +#include <linux/random.h> +#include <linux/hrtimer.h> +#include <linux/ktime.h> +#include <linux/string.h> + +#include <net/secure_seq.h> + +static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; + +static int __init net_secret_init(void) +{ + get_random_bytes(net_secret, sizeof(net_secret)); + return 0; +} +late_initcall(net_secret_init); + +static u32 seq_scale(u32 seq) +{ + /* + * As close as possible to RFC 793, which + * suggests using a 250 kHz clock. + * Further reading shows this assumes 2 Mb/s networks. + * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate. + * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but + * we also need to limit the resolution so that the u32 seq + * overlaps less than one time per MSL (2 minutes). + * Choosing a clock of 64 ns period is OK. (period of 274 s) + */ + return seq + (ktime_to_ns(ktime_get_real()) >> 6); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + daddr[i]; + secret[4] = net_secret[4] + + (((__force u16)sport << 16) + (__force u16)dport); + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + return seq_scale(hash[0]); +} +EXPORT_SYMBOL(secure_tcpv6_sequence_number); + +u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + (__force u32) daddr[i]; + secret[4] = net_secret[4] + (__force u32)dport; + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + return hash[0]; +} +#endif + +#ifdef CONFIG_INET +__u32 secure_ip_id(__be32 daddr) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force __u32) daddr; + hash[1] = net_secret[13]; + hash[2] = net_secret[14]; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return hash[0]; +} + +__u32 secure_ipv6_id(const __be32 daddr[4]) +{ + __u32 hash[4]; + + memcpy(hash, daddr, 16); + md5_transform(hash, net_secret); + + return hash[0]; +} + +__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = ((__force u16)sport << 16) + (__force u16)dport; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return seq_scale(hash[0]); +} + +u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = (__force u32)dport ^ net_secret[14]; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return hash[0]; +} +EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); +#endif + +#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) +u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + u64 seq; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = ((__force u16)sport << 16) + (__force u16)dport; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + seq = hash[0] | (((u64)hash[1]) << 32); + seq += ktime_to_ns(ktime_get_real()); + seq &= (1ull << 48) - 1; + + return seq; +} +EXPORT_SYMBOL(secure_dccp_sequence_number); + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u64 seq; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + daddr[i]; + secret[4] = net_secret[4] + + (((__force u16)sport << 16) + (__force u16)dport); + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + seq = hash[0] | (((u64)hash[1]) << 32); + seq += ktime_to_ns(ktime_get_real()); + seq &= (1ull << 48) - 1; + + return seq; +} +EXPORT_SYMBOL(secure_dccpv6_sequence_number); +#endif +#endif diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2beda82..5b2c5f1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -326,7 +326,7 @@ static void skb_release_data(struct sk_buff *skb) if (skb_shinfo(skb)->nr_frags) { int i; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - put_page(skb_shinfo(skb)->frags[i].page); + skb_frag_unref(skb, i); } /* @@ -529,6 +529,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mac_header = old->mac_header; skb_dst_copy(new, old); new->rxhash = old->rxhash; + new->ooo_okay = old->ooo_okay; + new->l4_rxhash = old->l4_rxhash; #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif @@ -611,8 +613,21 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) } EXPORT_SYMBOL_GPL(skb_morph); -/* skb frags copy userspace buffers to kernel */ -static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) +/* skb_copy_ubufs - copy userspace skb frags buffers to kernel + * @skb: the skb to modify + * @gfp_mask: allocation priority + * + * This must be called on SKBTX_DEV_ZEROCOPY skb. + * It will copy all frags into kernel and drop the reference + * to userspace pages. + * + * If this function is called from an interrupt gfp_mask() must be + * %GFP_ATOMIC. + * + * Returns 0 on success or a negative error code on failure + * to allocate kernel memory to copy to. + */ +int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) { int i; int num_frags = skb_shinfo(skb)->nr_frags; @@ -652,6 +667,8 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) skb_shinfo(skb)->frags[i - 1].page = head; head = (struct page *)head->private; } + + skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; return 0; } @@ -677,7 +694,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { if (skb_copy_ubufs(skb, gfp_mask)) return NULL; - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } n = skb + 1; @@ -803,11 +819,10 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) n = NULL; goto out; } - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; - get_page(skb_shinfo(n)->frags[i].page); + skb_frag_ref(skb, i); } skb_shinfo(n)->nr_frags = i; } @@ -896,10 +911,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { if (skb_copy_ubufs(skb, gfp_mask)) goto nofrags; - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - get_page(skb_shinfo(skb)->frags[i].page); + skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); @@ -1179,7 +1193,7 @@ drop_pages: skb_shinfo(skb)->nr_frags = i; for (; i < nfrags; i++) - put_page(skb_shinfo(skb)->frags[i].page); + skb_frag_unref(skb, i); if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); @@ -1348,7 +1362,7 @@ pull_pages: k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { if (skb_shinfo(skb)->frags[i].size <= eat) { - put_page(skb_shinfo(skb)->frags[i].page); + skb_frag_unref(skb, i); eat -= skb_shinfo(skb)->frags[i].size; } else { skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; @@ -1369,8 +1383,21 @@ pull_pages: } EXPORT_SYMBOL(__pskb_pull_tail); -/* Copy some data bits from skb to kernel buffer. */ - +/** + * skb_copy_bits - copy bits from skb to kernel buffer + * @skb: source skb + * @offset: offset in source + * @to: destination buffer + * @len: number of bytes to copy + * + * Copy the specified number of bytes from the source skb to the + * destination buffer. + * + * CAUTION ! : + * If its prototype is ever changed, + * check arch/{*}/net/{*}.S files, + * since it is called from BPF assembly code. + */ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) { int start = skb_headlen(skb); @@ -1594,7 +1621,8 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; - if (__splice_segment(f->page, f->page_offset, f->size, + if (__splice_segment(skb_frag_page(f), + f->page_offset, f->size, offset, len, skb, spd, 0, sk, pipe)) return 1; } @@ -2139,7 +2167,7 @@ static inline void skb_split_no_header(struct sk_buff *skb, * where splitting is expensive. * 2. Split is accurately. We make this. */ - get_page(skb_shinfo(skb)->frags[i].page); + skb_frag_ref(skb, i); skb_shinfo(skb1)->frags[0].page_offset += len - pos; skb_shinfo(skb1)->frags[0].size -= len - pos; skb_shinfo(skb)->frags[i].size = len - pos; @@ -2214,7 +2242,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) * commit all, so that we don't have to undo partial changes */ if (!to || - !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) { + !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom), + fragfrom->page_offset)) { merge = -1; } else { merge = to - 1; @@ -2261,7 +2290,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) to++; } else { - get_page(fragfrom->page); + __skb_frag_ref(fragfrom); fragto->page = fragfrom->page; fragto->page_offset = fragfrom->page_offset; fragto->size = todo; @@ -2283,7 +2312,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) fragto = &skb_shinfo(tgt)->frags[merge]; fragto->size += fragfrom->size; - put_page(fragfrom->page); + __skb_frag_unref(fragfrom); } /* Reposition in the original skb */ @@ -2528,8 +2557,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, left = PAGE_SIZE - frag->page_offset; copy = (length > left)? left : length; - ret = getfrag(from, (page_address(frag->page) + - frag->page_offset + frag->size), + ret = getfrag(from, skb_frag_address(frag) + frag->size, offset, copy, 0, skb); if (ret < 0) return -EFAULT; @@ -2681,7 +2709,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; - get_page(frag->page); + __skb_frag_ref(frag); size = frag->size; if (pos < offset) { @@ -2904,7 +2932,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) if (copy > len) copy = len; - sg_set_page(&sg[elt], frag->page, copy, + sg_set_page(&sg[elt], skb_frag_page(frag), copy, frag->page_offset+offset-start); elt++; if (!(len -= copy)) diff --git a/net/core/sock.c b/net/core/sock.c index bc745d0..83c462d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -387,7 +387,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { sk_tx_queue_clear(sk); - rcu_assign_pointer(sk->sk_dst_cache, NULL); + RCU_INIT_POINTER(sk->sk_dst_cache, NULL); dst_release(dst); return NULL; } @@ -738,10 +738,7 @@ set_rcvbuf: /* We implement the SO_SNDLOWAT etc to not be settable (1003.1g 5.3) */ case SO_RXQ_OVFL: - if (valbool) - sock_set_flag(sk, SOCK_RXQ_OVFL); - else - sock_reset_flag(sk, SOCK_RXQ_OVFL); + sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); break; default: ret = -ENOPROTOOPT; @@ -1158,7 +1155,7 @@ static void __sk_free(struct sock *sk) atomic_read(&sk->sk_wmem_alloc) == 0); if (filter) { sk_filter_uncharge(sk, filter); - rcu_assign_pointer(sk->sk_filter, NULL); + RCU_INIT_POINTER(sk->sk_filter, NULL); } sock_disable_timestamp(sk, SOCK_TIMESTAMP); @@ -1533,7 +1530,6 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, skb_shinfo(skb)->nr_frags = npages; for (i = 0; i < npages; i++) { struct page *page; - skb_frag_t *frag; page = alloc_pages(sk->sk_allocation, 0); if (!page) { @@ -1543,12 +1539,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, goto failure; } - frag = &skb_shinfo(skb)->frags[i]; - frag->page = page; - frag->page_offset = 0; - frag->size = (data_len >= PAGE_SIZE ? - PAGE_SIZE : - data_len); + __skb_fill_page_desc(skb, i, + page, 0, + (data_len >= PAGE_SIZE ? + PAGE_SIZE : + data_len)); data_len -= PAGE_SIZE; } diff --git a/net/core/user_dma.c b/net/core/user_dma.c index 25d717e..34e9664 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -78,7 +78,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, copy = end - offset; if (copy > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 3cb56af..9bfbc1d 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1255,7 +1255,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) spin_lock(&dcb_lock); list_for_each_entry(itr, &dcb_app_list, list) { - if (strncmp(itr->name, netdev->name, IFNAMSIZ) == 0) { + if (itr->ifindex == netdev->ifindex) { err = nla_put(skb, DCB_ATTR_IEEE_APP, sizeof(itr->app), &itr->app); if (err) { @@ -1412,7 +1412,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) goto dcb_unlock; list_for_each_entry(itr, &dcb_app_list, list) { - if (strncmp(itr->name, netdev->name, IFNAMSIZ) == 0) { + if (itr->ifindex == netdev->ifindex) { struct nlattr *app_nest = nla_nest_start(skb, DCB_ATTR_APP); if (!app_nest) @@ -2050,7 +2050,7 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app) list_for_each_entry(itr, &dcb_app_list, list) { if (itr->app.selector == app->selector && itr->app.protocol == app->protocol && - (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + itr->ifindex == dev->ifindex) { prio = itr->app.priority; break; } @@ -2073,15 +2073,17 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new) struct dcb_app_type *itr; struct dcb_app_type event; - memcpy(&event.name, dev->name, sizeof(event.name)); + event.ifindex = dev->ifindex; memcpy(&event.app, new, sizeof(event.app)); + if (dev->dcbnl_ops->getdcbx) + event.dcbx = dev->dcbnl_ops->getdcbx(dev); spin_lock(&dcb_lock); /* Search for existing match and replace */ list_for_each_entry(itr, &dcb_app_list, list) { if (itr->app.selector == new->selector && itr->app.protocol == new->protocol && - (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + itr->ifindex == dev->ifindex) { if (new->priority) itr->app.priority = new->priority; else { @@ -2101,7 +2103,7 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new) } memcpy(&entry->app, new, sizeof(*new)); - strncpy(entry->name, dev->name, IFNAMSIZ); + entry->ifindex = dev->ifindex; list_add(&entry->list, &dcb_app_list); } out: @@ -2127,7 +2129,7 @@ u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app) list_for_each_entry(itr, &dcb_app_list, list) { if (itr->app.selector == app->selector && itr->app.protocol == app->protocol && - (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + itr->ifindex == dev->ifindex) { prio |= 1 << itr->app.priority; } } @@ -2150,8 +2152,10 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) struct dcb_app_type event; int err = 0; - memcpy(&event.name, dev->name, sizeof(event.name)); + event.ifindex = dev->ifindex; memcpy(&event.app, new, sizeof(event.app)); + if (dev->dcbnl_ops->getdcbx) + event.dcbx = dev->dcbnl_ops->getdcbx(dev); spin_lock(&dcb_lock); /* Search for existing match and abort if found */ @@ -2159,7 +2163,7 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) if (itr->app.selector == new->selector && itr->app.protocol == new->protocol && itr->app.priority == new->priority && - (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + itr->ifindex == dev->ifindex) { err = -EEXIST; goto out; } @@ -2173,7 +2177,7 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) } memcpy(&entry->app, new, sizeof(*new)); - strncpy(entry->name, dev->name, IFNAMSIZ); + entry->ifindex = dev->ifindex; list_add(&entry->list, &dcb_app_list); out: spin_unlock(&dcb_lock); @@ -2194,8 +2198,10 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del) struct dcb_app_type event; int err = -ENOENT; - memcpy(&event.name, dev->name, sizeof(event.name)); + event.ifindex = dev->ifindex; memcpy(&event.app, del, sizeof(event.app)); + if (dev->dcbnl_ops->getdcbx) + event.dcbx = dev->dcbnl_ops->getdcbx(dev); spin_lock(&dcb_lock); /* Search for existing match and remove it. */ @@ -2203,7 +2209,7 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del) if (itr->app.selector == del->selector && itr->app.protocol == del->protocol && itr->app.priority == del->priority && - (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + itr->ifindex == dev->ifindex) { list_del(&itr->list); kfree(itr); err = 0; diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 0462040..67164bb 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -85,7 +85,6 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) { - struct dccp_sock *dp = dccp_sk(sk); u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2); /* @@ -98,14 +97,33 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio); val = max_ratio; } - if (val > DCCPF_ACK_RATIO_MAX) - val = DCCPF_ACK_RATIO_MAX; + dccp_feat_signal_nn_change(sk, DCCPF_ACK_RATIO, + min_t(u32, val, DCCPF_ACK_RATIO_MAX)); +} - if (val == dp->dccps_l_ack_ratio) - return; +static void ccid2_check_l_ack_ratio(struct sock *sk) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - ccid2_pr_debug("changing local ack ratio to %u\n", val); - dp->dccps_l_ack_ratio = val; + /* + * After a loss, idle period, application limited period, or RTO we + * need to check that the ack ratio is still less than the congestion + * window. Otherwise, we will send an entire congestion window of + * packets and got no response because we haven't sent ack ratio + * packets yet. + * If the ack ratio does need to be reduced, we reduce it to half of + * the congestion window (or 1 if that's zero) instead of to the + * congestion window. This prevents problems if one ack is lost. + */ + if (dccp_feat_nn_get(sk, DCCPF_ACK_RATIO) > hc->tx_cwnd) + ccid2_change_l_ack_ratio(sk, hc->tx_cwnd/2 ? : 1U); +} + +static void ccid2_change_l_seq_window(struct sock *sk, u64 val) +{ + dccp_feat_signal_nn_change(sk, DCCPF_SEQUENCE_WINDOW, + clamp_val(val, DCCPF_SEQ_WMIN, + DCCPF_SEQ_WMAX)); } static void ccid2_hc_tx_rto_expire(unsigned long data) @@ -187,6 +205,8 @@ static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now) } hc->tx_cwnd_used = 0; hc->tx_cwnd_stamp = now; + + ccid2_check_l_ack_ratio(sk); } /* This borrows the code of tcp_cwnd_restart() */ @@ -205,6 +225,8 @@ static void ccid2_cwnd_restart(struct sock *sk, const u32 now) hc->tx_cwnd_stamp = now; hc->tx_cwnd_used = 0; + + ccid2_check_l_ack_ratio(sk); } static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) @@ -405,17 +427,37 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, unsigned int *maxincr) { struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - - if (hc->tx_cwnd < hc->tx_ssthresh) { - if (*maxincr > 0 && ++hc->tx_packets_acked == 2) { + struct dccp_sock *dp = dccp_sk(sk); + int r_seq_used = hc->tx_cwnd / dp->dccps_l_ack_ratio; + + if (hc->tx_cwnd < dp->dccps_l_seq_win && + r_seq_used < dp->dccps_r_seq_win) { + if (hc->tx_cwnd < hc->tx_ssthresh) { + if (*maxincr > 0 && ++hc->tx_packets_acked >= 2) { + hc->tx_cwnd += 1; + *maxincr -= 1; + hc->tx_packets_acked = 0; + } + } else if (++hc->tx_packets_acked >= hc->tx_cwnd) { hc->tx_cwnd += 1; - *maxincr -= 1; hc->tx_packets_acked = 0; } - } else if (++hc->tx_packets_acked >= hc->tx_cwnd) { - hc->tx_cwnd += 1; - hc->tx_packets_acked = 0; } + + /* + * Adjust the local sequence window and the ack ratio to allow about + * 5 times the number of packets in the network (RFC 4340 7.5.2) + */ + if (r_seq_used * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_r_seq_win) + ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio * 2); + else if (r_seq_used * CCID2_WIN_CHANGE_FACTOR < dp->dccps_r_seq_win/2) + ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio / 2 ? : 1U); + + if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_l_seq_win) + ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win * 2); + else if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR < dp->dccps_l_seq_win/2) + ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win / 2); + /* * FIXME: RTT is sampled several times per acknowledgment (for each * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). @@ -441,9 +483,7 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; hc->tx_ssthresh = max(hc->tx_cwnd, 2U); - /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */ - if (dccp_sk(sk)->dccps_l_ack_ratio > hc->tx_cwnd) - ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); + ccid2_check_l_ack_ratio(sk); } static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, @@ -494,8 +534,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) if (hc->tx_rpdupack >= NUMDUPACK) { hc->tx_rpdupack = -1; /* XXX lame */ hc->tx_rpseq = 0; - +#ifdef __CCID2_COPES_GRACEFULLY_WITH_ACK_CONGESTION_CONTROL__ + /* + * FIXME: Ack Congestion Control is broken; in + * the current state instabilities occurred with + * Ack Ratios greater than 1; causing hang-ups + * and long RTO timeouts. This needs to be fixed + * before opening up dynamic changes. -- gerrit + */ ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio); +#endif } } } diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index f585d33..18c9754 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -43,6 +43,12 @@ struct ccid2_seq { #define CCID2_SEQBUF_LEN 1024 #define CCID2_SEQBUF_MAX 128 +/* + * Multiple of congestion window to keep the sequence window at + * (RFC 4340 7.5.2) + */ +#define CCID2_WIN_CHANGE_FACTOR 5 + /** * struct ccid2_hc_tx_sock - CCID2 TX half connection * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 5fdb072..583490a 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -474,6 +474,7 @@ static inline int dccp_ack_pending(const struct sock *sk) return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk); } +extern int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val); extern int dccp_feat_finalise_settings(struct dccp_sock *dp); extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq); extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*, diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 568def9..23cea0e 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -12,6 +12,7 @@ * ----------- * o Feature negotiation is coordinated with connection setup (as in TCP), wild * changes of parameters of an established connection are not supported. + * o Changing non-negotiable (NN) values is supported in state OPEN/PARTOPEN. * o All currently known SP features have 1-byte quantities. If in the future * extensions of RFCs 4340..42 define features with item lengths larger than * one byte, a feature-specific extension of the code will be required. @@ -343,6 +344,20 @@ static int __dccp_feat_activate(struct sock *sk, const int idx, return dccp_feat_table[idx].activation_hdlr(sk, val, rx); } +/** + * dccp_feat_activate - Activate feature value on socket + * @sk: fully connected DCCP socket (after handshake is complete) + * @feat_num: feature to activate, one of %dccp_feature_numbers + * @local: whether local (1) or remote (0) @feat_num is meant + * @fval: the value (SP or NN) to activate, or NULL to use the default value + * For general use this function is preferable over __dccp_feat_activate(). + */ +static int dccp_feat_activate(struct sock *sk, u8 feat_num, bool local, + dccp_feat_val const *fval) +{ + return __dccp_feat_activate(sk, dccp_feat_index(feat_num), local, fval); +} + /* Test for "Req'd" feature (RFC 4340, 6.4) */ static inline int dccp_feat_must_be_understood(u8 feat_num) { @@ -650,11 +665,22 @@ int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq, return -1; if (pos->needs_mandatory && dccp_insert_option_mandatory(skb)) return -1; - /* - * Enter CHANGING after transmitting the Change option (6.6.2). - */ - if (pos->state == FEAT_INITIALISING) - pos->state = FEAT_CHANGING; + + if (skb->sk->sk_state == DCCP_OPEN && + (opt == DCCPO_CONFIRM_R || opt == DCCPO_CONFIRM_L)) { + /* + * Confirms don't get retransmitted (6.6.3) once the + * connection is in state OPEN + */ + dccp_feat_list_pop(pos); + } else { + /* + * Enter CHANGING after transmitting the Change + * option (6.6.2). + */ + if (pos->state == FEAT_INITIALISING) + pos->state = FEAT_CHANGING; + } } return 0; } @@ -730,6 +756,70 @@ int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, 0, list, len); } +/** + * dccp_feat_nn_get - Query current/pending value of NN feature + * @sk: DCCP socket of an established connection + * @feat: NN feature number from %dccp_feature_numbers + * For a known NN feature, returns value currently being negotiated, or + * current (confirmed) value if no negotiation is going on. + */ +u64 dccp_feat_nn_get(struct sock *sk, u8 feat) +{ + if (dccp_feat_type(feat) == FEAT_NN) { + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_feat_entry *entry; + + entry = dccp_feat_list_lookup(&dp->dccps_featneg, feat, 1); + if (entry != NULL) + return entry->val.nn; + + switch (feat) { + case DCCPF_ACK_RATIO: + return dp->dccps_l_ack_ratio; + case DCCPF_SEQUENCE_WINDOW: + return dp->dccps_l_seq_win; + } + } + DCCP_BUG("attempt to look up unsupported feature %u", feat); + return 0; +} +EXPORT_SYMBOL_GPL(dccp_feat_nn_get); + +/** + * dccp_feat_signal_nn_change - Update NN values for an established connection + * @sk: DCCP socket of an established connection + * @feat: NN feature number from %dccp_feature_numbers + * @nn_val: the new value to use + * This function is used to communicate NN updates out-of-band. + */ +int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val) +{ + struct list_head *fn = &dccp_sk(sk)->dccps_featneg; + dccp_feat_val fval = { .nn = nn_val }; + struct dccp_feat_entry *entry; + + if (sk->sk_state != DCCP_OPEN && sk->sk_state != DCCP_PARTOPEN) + return 0; + + if (dccp_feat_type(feat) != FEAT_NN || + !dccp_feat_is_valid_nn_val(feat, nn_val)) + return -EINVAL; + + if (nn_val == dccp_feat_nn_get(sk, feat)) + return 0; /* already set or negotiation under way */ + + entry = dccp_feat_list_lookup(fn, feat, 1); + if (entry != NULL) { + dccp_pr_debug("Clobbering existing NN entry %llu -> %llu\n", + (unsigned long long)entry->val.nn, + (unsigned long long)nn_val); + dccp_feat_list_pop(entry); + } + + inet_csk_schedule_ack(sk); + return dccp_feat_push_change(fn, feat, 1, 0, &fval); +} +EXPORT_SYMBOL_GPL(dccp_feat_signal_nn_change); /* * Tracking features whose value depend on the choice of CCID @@ -1187,6 +1277,100 @@ confirmation_failed: } /** + * dccp_feat_handle_nn_established - Fast-path reception of NN options + * @sk: socket of an established DCCP connection + * @mandatory: whether @opt was preceded by a Mandatory option + * @opt: %DCCPO_CHANGE_L | %DCCPO_CONFIRM_R (NN only) + * @feat: NN number, one of %dccp_feature_numbers + * @val: NN value + * @len: length of @val in bytes + * This function combines the functionality of change_recv/confirm_recv, with + * the following differences (reset codes are the same): + * - cleanup after receiving the Confirm; + * - values are directly activated after successful parsing; + * - deliberately restricted to NN features. + * The restriction to NN features is essential since SP features can have non- + * predictable outcomes (depending on the remote configuration), and are inter- + * dependent (CCIDs for instance cause further dependencies). + */ +static u8 dccp_feat_handle_nn_established(struct sock *sk, u8 mandatory, u8 opt, + u8 feat, u8 *val, u8 len) +{ + struct list_head *fn = &dccp_sk(sk)->dccps_featneg; + const bool local = (opt == DCCPO_CONFIRM_R); + struct dccp_feat_entry *entry; + u8 type = dccp_feat_type(feat); + dccp_feat_val fval; + + dccp_feat_print_opt(opt, feat, val, len, mandatory); + + /* Ignore non-mandatory unknown and non-NN features */ + if (type == FEAT_UNKNOWN) { + if (local && !mandatory) + return 0; + goto fast_path_unknown; + } else if (type != FEAT_NN) { + return 0; + } + + /* + * We don't accept empty Confirms, since in fast-path feature + * negotiation the values are enabled immediately after sending + * the Change option. + * Empty Changes on the other hand are invalid (RFC 4340, 6.1). + */ + if (len == 0 || len > sizeof(fval.nn)) + goto fast_path_unknown; + + if (opt == DCCPO_CHANGE_L) { + fval.nn = dccp_decode_value_var(val, len); + if (!dccp_feat_is_valid_nn_val(feat, fval.nn)) + goto fast_path_unknown; + + if (dccp_feat_push_confirm(fn, feat, local, &fval) || + dccp_feat_activate(sk, feat, local, &fval)) + return DCCP_RESET_CODE_TOO_BUSY; + + /* set the `Ack Pending' flag to piggyback a Confirm */ + inet_csk_schedule_ack(sk); + + } else if (opt == DCCPO_CONFIRM_R) { + entry = dccp_feat_list_lookup(fn, feat, local); + if (entry == NULL || entry->state != FEAT_CHANGING) + return 0; + + fval.nn = dccp_decode_value_var(val, len); + /* + * Just ignore a value that doesn't match our current value. + * If the option changes twice within two RTTs, then at least + * one CONFIRM will be received for the old value after a + * new CHANGE was sent. + */ + if (fval.nn != entry->val.nn) + return 0; + + /* Only activate after receiving the Confirm option (6.6.1). */ + dccp_feat_activate(sk, feat, local, &fval); + + /* It has been confirmed - so remove the entry */ + dccp_feat_list_pop(entry); + + } else { + DCCP_WARN("Received illegal option %u\n", opt); + goto fast_path_failed; + } + return 0; + +fast_path_unknown: + if (!mandatory) + return dccp_push_empty_confirm(fn, feat, local); + +fast_path_failed: + return mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR + : DCCP_RESET_CODE_OPTION_ERROR; +} + +/** * dccp_feat_parse_options - Process Feature-Negotiation Options * @sk: for general use and used by the client during connection setup * @dreq: used by the server during connection setup @@ -1221,6 +1405,14 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq, return dccp_feat_confirm_recv(fn, mandatory, opt, feat, val, len, server); } + break; + /* + * Support for exchanging NN options on an established connection. + */ + case DCCP_OPEN: + case DCCP_PARTOPEN: + return dccp_feat_handle_nn_established(sk, mandatory, opt, feat, + val, len); } return 0; /* ignore FN options in all other states */ } diff --git a/net/dccp/feat.h b/net/dccp/feat.h index e56a4e5..90b957d 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -129,6 +129,7 @@ extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len); extern u64 dccp_decode_value_var(const u8 *bf, const u8 len); +extern u64 dccp_feat_nn_get(struct sock *sk, u8 feat); extern int dccp_insert_option_mandatory(struct sk_buff *skb); extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 8c36adf..332639b 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -26,6 +26,7 @@ #include <net/timewait_sock.h> #include <net/tcp_states.h> #include <net/xfrm.h> +#include <net/secure_seq.h> #include "ackvec.h" #include "ccid.h" diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 8dc4348..b74f761 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -29,6 +29,7 @@ #include <net/transp_v6.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> +#include <net/secure_seq.h> #include "dccp.h" #include "ipv6.h" @@ -69,13 +70,7 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb) dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr); } -static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport ) -{ - return secure_tcpv6_sequence_number(saddr, daddr, sport, dport); -} - -static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb) +static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb) { return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 152975d..e742f90 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -184,7 +184,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) dp->dccps_rate_last = jiffies; dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; - dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; dccp_init_xmit_timers(sk); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index ba4face..2ab16e1 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -388,7 +388,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) } ifa->ifa_next = dn_db->ifa_list; - rcu_assign_pointer(dn_db->ifa_list, ifa); + RCU_INIT_POINTER(dn_db->ifa_list, ifa); dn_ifaddr_notify(RTM_NEWADDR, ifa); blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa); @@ -1093,7 +1093,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms)); - rcu_assign_pointer(dev->dn_ptr, dn_db); + RCU_INIT_POINTER(dev->dn_ptr, dn_db); dn_db->dev = dev; init_timer(&dn_db->timer); @@ -1101,7 +1101,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table); if (!dn_db->neigh_parms) { - rcu_assign_pointer(dev->dn_ptr, NULL); + RCU_INIT_POINTER(dev->dn_ptr, NULL); kfree(dn_db); return NULL; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 0a47b6c..56cf9b8 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -301,7 +301,6 @@ static const struct net_device_ops dsa_netdev_ops = { .ndo_start_xmit = dsa_xmit, .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_multicast_list = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_do_ioctl = dsa_slave_ioctl, }; @@ -314,7 +313,6 @@ static const struct net_device_ops edsa_netdev_ops = { .ndo_start_xmit = edsa_xmit, .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_multicast_list = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_do_ioctl = dsa_slave_ioctl, }; @@ -327,7 +325,6 @@ static const struct net_device_ops trailer_netdev_ops = { .ndo_start_xmit = trailer_xmit, .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_multicast_list = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_do_ioctl = dsa_slave_ioctl, }; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 27997d3..a246836 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -340,7 +340,7 @@ void ether_setup(struct net_device *dev) dev->addr_len = ETH_ALEN; dev->tx_queue_len = 1000; /* Ethernet wants good queues */ dev->flags = IFF_BROADCAST|IFF_MULTICAST; - dev->priv_flags = IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_TX_SKB_SHARING; memset(dev->broadcast, 0xFF, ETH_ALEN); diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c new file mode 100644 index 0000000..19d6aef --- /dev/null +++ b/net/ieee802154/6lowpan.c @@ -0,0 +1,891 @@ +/* + * Copyright 2011, Siemens AG + * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> + */ + +/* + * Based on patches from Jon Smirl <jonsmirl@gmail.com> + * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* Jon's code is based on 6lowpan implementation for Contiki which is: + * Copyright (c) 2008, Swedish Institute of Computer Science. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define DEBUG + +#include <linux/bitops.h> +#include <linux/if_arp.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/netdevice.h> +#include <net/af_ieee802154.h> +#include <net/ieee802154.h> +#include <net/ieee802154_netdev.h> +#include <net/ipv6.h> + +#include "6lowpan.h" + +/* TTL uncompression values */ +static const u8 lowpan_ttl_values[] = {0, 1, 64, 255}; + +static LIST_HEAD(lowpan_devices); + +/* + * Uncompression of linklocal: + * 0 -> 16 bytes from packet + * 1 -> 2 bytes from prefix - bunch of zeroes and 8 from packet + * 2 -> 2 bytes from prefix - zeroes + 2 from packet + * 3 -> 2 bytes from prefix - infer 8 bytes from lladdr + * + * NOTE: => the uncompress function does change 0xf to 0x10 + * NOTE: 0x00 => no-autoconfig => unspecified + */ +static const u8 lowpan_unc_llconf[] = {0x0f, 0x28, 0x22, 0x20}; + +/* + * Uncompression of ctx-based: + * 0 -> 0 bits from packet [unspecified / reserved] + * 1 -> 8 bytes from prefix - bunch of zeroes and 8 from packet + * 2 -> 8 bytes from prefix - zeroes + 2 from packet + * 3 -> 8 bytes from prefix - infer 8 bytes from lladdr + */ +static const u8 lowpan_unc_ctxconf[] = {0x00, 0x88, 0x82, 0x80}; + +/* + * Uncompression of ctx-base + * 0 -> 0 bits from packet + * 1 -> 2 bytes from prefix - bunch of zeroes 5 from packet + * 2 -> 2 bytes from prefix - zeroes + 3 from packet + * 3 -> 2 bytes from prefix - infer 1 bytes from lladdr + */ +static const u8 lowpan_unc_mxconf[] = {0x0f, 0x25, 0x23, 0x21}; + +/* Link local prefix */ +static const u8 lowpan_llprefix[] = {0xfe, 0x80}; + +/* private device info */ +struct lowpan_dev_info { + struct net_device *real_dev; /* real WPAN device ptr */ + struct mutex dev_list_mtx; /* mutex for list ops */ +}; + +struct lowpan_dev_record { + struct net_device *ldev; + struct list_head list; +}; + +static inline struct +lowpan_dev_info *lowpan_dev_info(const struct net_device *dev) +{ + return netdev_priv(dev); +} + +static inline void lowpan_address_flip(u8 *src, u8 *dest) +{ + int i; + for (i = 0; i < IEEE802154_ADDR_LEN; i++) + (dest)[IEEE802154_ADDR_LEN - i - 1] = (src)[i]; +} + +/* list of all 6lowpan devices, uses for package delivering */ +/* print data in line */ +static inline void lowpan_raw_dump_inline(const char *caller, char *msg, + unsigned char *buf, int len) +{ +#ifdef DEBUG + if (msg) + pr_debug("(%s) %s: ", caller, msg); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, + 16, 1, buf, len, false); +#endif /* DEBUG */ +} + +/* + * print data in a table format: + * + * addr: xx xx xx xx xx xx + * addr: xx xx xx xx xx xx + * ... + */ +static inline void lowpan_raw_dump_table(const char *caller, char *msg, + unsigned char *buf, int len) +{ +#ifdef DEBUG + if (msg) + pr_debug("(%s) %s:\n", caller, msg); + print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, + 16, 1, buf, len, false); +#endif /* DEBUG */ +} + +static u8 +lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, const struct in6_addr *ipaddr, + const unsigned char *lladdr) +{ + u8 val = 0; + + if (is_addr_mac_addr_based(ipaddr, lladdr)) + val = 3; /* 0-bits */ + else if (lowpan_is_iid_16_bit_compressable(ipaddr)) { + /* compress IID to 16 bits xxxx::XXXX */ + memcpy(*hc06_ptr, &ipaddr->s6_addr16[7], 2); + *hc06_ptr += 2; + val = 2; /* 16-bits */ + } else { + /* do not compress IID => xxxx::IID */ + memcpy(*hc06_ptr, &ipaddr->s6_addr16[4], 8); + *hc06_ptr += 8; + val = 1; /* 64-bits */ + } + + return rol8(val, shift); +} + +static void +lowpan_uip_ds6_set_addr_iid(struct in6_addr *ipaddr, unsigned char *lladdr) +{ + memcpy(&ipaddr->s6_addr[8], lladdr, IEEE802154_ALEN); + /* second bit-flip (Universe/Local) is done according RFC2464 */ + ipaddr->s6_addr[8] ^= 0x02; +} + +/* + * Uncompress addresses based on a prefix and a postfix with zeroes in + * between. If the postfix is zero in length it will use the link address + * to configure the IP address (autoconf style). + * pref_post_count takes a byte where the first nibble specify prefix count + * and the second postfix count (NOTE: 15/0xf => 16 bytes copy). + */ +static int +lowpan_uncompress_addr(struct sk_buff *skb, struct in6_addr *ipaddr, + u8 const *prefix, u8 pref_post_count, unsigned char *lladdr) +{ + u8 prefcount = pref_post_count >> 4; + u8 postcount = pref_post_count & 0x0f; + + /* full nibble 15 => 16 */ + prefcount = (prefcount == 15 ? 16 : prefcount); + postcount = (postcount == 15 ? 16 : postcount); + + if (lladdr) + lowpan_raw_dump_inline(__func__, "linklocal address", + lladdr, IEEE802154_ALEN); + if (prefcount > 0) + memcpy(ipaddr, prefix, prefcount); + + if (prefcount + postcount < 16) + memset(&ipaddr->s6_addr[prefcount], 0, + 16 - (prefcount + postcount)); + + if (postcount > 0) { + memcpy(&ipaddr->s6_addr[16 - postcount], skb->data, postcount); + skb_pull(skb, postcount); + } else if (prefcount > 0) { + if (lladdr == NULL) + return -EINVAL; + + /* no IID based configuration if no prefix and no data */ + lowpan_uip_ds6_set_addr_iid(ipaddr, lladdr); + } + + pr_debug("(%s): uncompressing %d + %d => ", __func__, prefcount, + postcount); + lowpan_raw_dump_inline(NULL, NULL, ipaddr->s6_addr, 16); + + return 0; +} + +static u8 lowpan_fetch_skb_u8(struct sk_buff *skb) +{ + u8 ret; + + ret = skb->data[0]; + skb_pull(skb, 1); + + return ret; +} + +static int lowpan_header_create(struct sk_buff *skb, + struct net_device *dev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned len) +{ + u8 tmp, iphc0, iphc1, *hc06_ptr; + struct ipv6hdr *hdr; + const u8 *saddr = _saddr; + const u8 *daddr = _daddr; + u8 *head; + struct ieee802154_addr sa, da; + + if (type != ETH_P_IPV6) + return 0; + /* TODO: + * if this package isn't ipv6 one, where should it be routed? + */ + head = kzalloc(100, GFP_KERNEL); + if (head == NULL) + return -ENOMEM; + + hdr = ipv6_hdr(skb); + hc06_ptr = head + 2; + + pr_debug("(%s): IPv6 header dump:\n\tversion = %d\n\tlength = %d\n" + "\tnexthdr = 0x%02x\n\thop_lim = %d\n", __func__, + hdr->version, ntohs(hdr->payload_len), hdr->nexthdr, + hdr->hop_limit); + + lowpan_raw_dump_table(__func__, "raw skb network header dump", + skb_network_header(skb), sizeof(struct ipv6hdr)); + + if (!saddr) + saddr = dev->dev_addr; + + lowpan_raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8); + + /* + * As we copy some bit-length fields, in the IPHC encoding bytes, + * we sometimes use |= + * If the field is 0, and the current bit value in memory is 1, + * this does not work. We therefore reset the IPHC encoding here + */ + iphc0 = LOWPAN_DISPATCH_IPHC; + iphc1 = 0; + + /* TODO: context lookup */ + + lowpan_raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8); + + /* + * Traffic class, flow label + * If flow label is 0, compress it. If traffic class is 0, compress it + * We have to process both in the same time as the offset of traffic + * class depends on the presence of version and flow label + */ + + /* hc06 format of TC is ECN | DSCP , original one is DSCP | ECN */ + tmp = (hdr->priority << 4) | (hdr->flow_lbl[0] >> 4); + tmp = ((tmp & 0x03) << 6) | (tmp >> 2); + + if (((hdr->flow_lbl[0] & 0x0F) == 0) && + (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) { + /* flow label can be compressed */ + iphc0 |= LOWPAN_IPHC_FL_C; + if ((hdr->priority == 0) && + ((hdr->flow_lbl[0] & 0xF0) == 0)) { + /* compress (elide) all */ + iphc0 |= LOWPAN_IPHC_TC_C; + } else { + /* compress only the flow label */ + *hc06_ptr = tmp; + hc06_ptr += 1; + } + } else { + /* Flow label cannot be compressed */ + if ((hdr->priority == 0) && + ((hdr->flow_lbl[0] & 0xF0) == 0)) { + /* compress only traffic class */ + iphc0 |= LOWPAN_IPHC_TC_C; + *hc06_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F); + memcpy(hc06_ptr + 1, &hdr->flow_lbl[1], 2); + hc06_ptr += 3; + } else { + /* compress nothing */ + memcpy(hc06_ptr, &hdr, 4); + /* replace the top byte with new ECN | DSCP format */ + *hc06_ptr = tmp; + hc06_ptr += 4; + } + } + + /* NOTE: payload length is always compressed */ + + /* Next Header is compress if UDP */ + if (hdr->nexthdr == UIP_PROTO_UDP) + iphc0 |= LOWPAN_IPHC_NH_C; + +/* TODO: next header compression */ + + if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { + *hc06_ptr = hdr->nexthdr; + hc06_ptr += 1; + } + + /* + * Hop limit + * if 1: compress, encoding is 01 + * if 64: compress, encoding is 10 + * if 255: compress, encoding is 11 + * else do not compress + */ + switch (hdr->hop_limit) { + case 1: + iphc0 |= LOWPAN_IPHC_TTL_1; + break; + case 64: + iphc0 |= LOWPAN_IPHC_TTL_64; + break; + case 255: + iphc0 |= LOWPAN_IPHC_TTL_255; + break; + default: + *hc06_ptr = hdr->hop_limit; + break; + } + + /* source address compression */ + if (is_addr_unspecified(&hdr->saddr)) { + pr_debug("(%s): source address is unspecified, setting SAC\n", + __func__); + iphc1 |= LOWPAN_IPHC_SAC; + /* TODO: context lookup */ + } else if (is_addr_link_local(&hdr->saddr)) { + pr_debug("(%s): source address is link-local\n", __func__); + iphc1 |= lowpan_compress_addr_64(&hc06_ptr, + LOWPAN_IPHC_SAM_BIT, &hdr->saddr, saddr); + } else { + pr_debug("(%s): send the full source address\n", __func__); + memcpy(hc06_ptr, &hdr->saddr.s6_addr16[0], 16); + hc06_ptr += 16; + } + + /* destination address compression */ + if (is_addr_mcast(&hdr->daddr)) { + pr_debug("(%s): destination address is multicast", __func__); + iphc1 |= LOWPAN_IPHC_M; + if (lowpan_is_mcast_addr_compressable8(&hdr->daddr)) { + pr_debug("compressed to 1 octet\n"); + iphc1 |= LOWPAN_IPHC_DAM_11; + /* use last byte */ + *hc06_ptr = hdr->daddr.s6_addr[15]; + hc06_ptr += 1; + } else if (lowpan_is_mcast_addr_compressable32(&hdr->daddr)) { + pr_debug("compressed to 4 octets\n"); + iphc1 |= LOWPAN_IPHC_DAM_10; + /* second byte + the last three */ + *hc06_ptr = hdr->daddr.s6_addr[1]; + memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[13], 3); + hc06_ptr += 4; + } else if (lowpan_is_mcast_addr_compressable48(&hdr->daddr)) { + pr_debug("compressed to 6 octets\n"); + iphc1 |= LOWPAN_IPHC_DAM_01; + /* second byte + the last five */ + *hc06_ptr = hdr->daddr.s6_addr[1]; + memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[11], 5); + hc06_ptr += 6; + } else { + pr_debug("using full address\n"); + iphc1 |= LOWPAN_IPHC_DAM_00; + memcpy(hc06_ptr, &hdr->daddr.s6_addr[0], 16); + hc06_ptr += 16; + } + } else { + pr_debug("(%s): destination address is unicast: ", __func__); + /* TODO: context lookup */ + if (is_addr_link_local(&hdr->daddr)) { + pr_debug("destination address is link-local\n"); + iphc1 |= lowpan_compress_addr_64(&hc06_ptr, + LOWPAN_IPHC_DAM_BIT, &hdr->daddr, daddr); + } else { + pr_debug("using full address\n"); + memcpy(hc06_ptr, &hdr->daddr.s6_addr16[0], 16); + hc06_ptr += 16; + } + } + + /* TODO: UDP header compression */ + /* TODO: Next Header compression */ + + head[0] = iphc0; + head[1] = iphc1; + + skb_pull(skb, sizeof(struct ipv6hdr)); + memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head); + + kfree(head); + + lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, + skb->len); + + /* + * NOTE1: I'm still unsure about the fact that compression and WPAN + * header are created here and not later in the xmit. So wait for + * an opinion of net maintainers. + */ + /* + * NOTE2: to be absolutely correct, we must derive PANid information + * from MAC subif of the 'dev' and 'real_dev' network devices, but + * this isn't implemented in mainline yet, so currently we assign 0xff + */ + { + /* prepare wpan address data */ + sa.addr_type = IEEE802154_ADDR_LONG; + sa.pan_id = 0xff; + + da.addr_type = IEEE802154_ADDR_LONG; + da.pan_id = 0xff; + + memcpy(&(da.hwaddr), daddr, 8); + memcpy(&(sa.hwaddr), saddr, 8); + + mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; + return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, + type, (void *)&da, (void *)&sa, skb->len); + } +} + +static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr) +{ + struct sk_buff *new; + struct lowpan_dev_record *entry; + int stat = NET_RX_SUCCESS; + + new = skb_copy_expand(skb, sizeof(struct ipv6hdr), skb_tailroom(skb), + GFP_ATOMIC); + kfree_skb(skb); + + if (!new) + return -ENOMEM; + + skb_push(new, sizeof(struct ipv6hdr)); + skb_reset_network_header(new); + skb_copy_to_linear_data(new, hdr, sizeof(struct ipv6hdr)); + + new->protocol = htons(ETH_P_IPV6); + new->pkt_type = PACKET_HOST; + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &lowpan_devices, list) + if (lowpan_dev_info(entry->ldev)->real_dev == new->dev) { + skb = skb_copy(new, GFP_ATOMIC); + if (!skb) { + stat = -ENOMEM; + break; + } + + skb->dev = entry->ldev; + stat = netif_rx(skb); + } + rcu_read_unlock(); + + kfree_skb(new); + + return stat; +} + +static int +lowpan_process_data(struct sk_buff *skb) +{ + struct ipv6hdr hdr; + u8 tmp, iphc0, iphc1, num_context = 0; + u8 *_saddr, *_daddr; + int err; + + lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data, + skb->len); + /* at least two bytes will be used for the encoding */ + if (skb->len < 2) + goto drop; + iphc0 = lowpan_fetch_skb_u8(skb); + iphc1 = lowpan_fetch_skb_u8(skb); + + _saddr = mac_cb(skb)->sa.hwaddr; + _daddr = mac_cb(skb)->da.hwaddr; + + pr_debug("(%s): iphc0 = %02x, iphc1 = %02x\n", __func__, iphc0, iphc1); + + /* another if the CID flag is set */ + if (iphc1 & LOWPAN_IPHC_CID) { + pr_debug("(%s): CID flag is set, increase header with one\n", + __func__); + if (!skb->len) + goto drop; + num_context = lowpan_fetch_skb_u8(skb); + } + + hdr.version = 6; + + /* Traffic Class and Flow Label */ + switch ((iphc0 & LOWPAN_IPHC_TF) >> 3) { + /* + * Traffic Class and FLow Label carried in-line + * ECN + DSCP + 4-bit Pad + Flow Label (4 bytes) + */ + case 0: /* 00b */ + if (!skb->len) + goto drop; + tmp = lowpan_fetch_skb_u8(skb); + memcpy(&hdr.flow_lbl, &skb->data[0], 3); + skb_pull(skb, 3); + hdr.priority = ((tmp >> 2) & 0x0f); + hdr.flow_lbl[0] = ((tmp >> 2) & 0x30) | (tmp << 6) | + (hdr.flow_lbl[0] & 0x0f); + break; + /* + * Traffic class carried in-line + * ECN + DSCP (1 byte), Flow Label is elided + */ + case 1: /* 10b */ + if (!skb->len) + goto drop; + tmp = lowpan_fetch_skb_u8(skb); + hdr.priority = ((tmp >> 2) & 0x0f); + hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30); + hdr.flow_lbl[1] = 0; + hdr.flow_lbl[2] = 0; + break; + /* + * Flow Label carried in-line + * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided + */ + case 2: /* 01b */ + if (!skb->len) + goto drop; + tmp = lowpan_fetch_skb_u8(skb); + hdr.flow_lbl[0] = (skb->data[0] & 0x0F) | ((tmp >> 2) & 0x30); + memcpy(&hdr.flow_lbl[1], &skb->data[0], 2); + skb_pull(skb, 2); + break; + /* Traffic Class and Flow Label are elided */ + case 3: /* 11b */ + hdr.priority = 0; + hdr.flow_lbl[0] = 0; + hdr.flow_lbl[1] = 0; + hdr.flow_lbl[2] = 0; + break; + default: + break; + } + + /* Next Header */ + if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) { + /* Next header is carried inline */ + if (!skb->len) + goto drop; + hdr.nexthdr = lowpan_fetch_skb_u8(skb); + pr_debug("(%s): NH flag is set, next header is carried " + "inline: %02x\n", __func__, hdr.nexthdr); + } + + /* Hop Limit */ + if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I) + hdr.hop_limit = lowpan_ttl_values[iphc0 & 0x03]; + else { + if (!skb->len) + goto drop; + hdr.hop_limit = lowpan_fetch_skb_u8(skb); + } + + /* Extract SAM to the tmp variable */ + tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03; + + /* Source address uncompression */ + pr_debug("(%s): source address stateless compression\n", __func__); + err = lowpan_uncompress_addr(skb, &hdr.saddr, lowpan_llprefix, + lowpan_unc_llconf[tmp], skb->data); + if (err) + goto drop; + + /* Extract DAM to the tmp variable */ + tmp = ((iphc1 & LOWPAN_IPHC_DAM_11) >> LOWPAN_IPHC_DAM_BIT) & 0x03; + + /* check for Multicast Compression */ + if (iphc1 & LOWPAN_IPHC_M) { + if (iphc1 & LOWPAN_IPHC_DAC) { + pr_debug("(%s): destination address context-based " + "multicast compression\n", __func__); + /* TODO: implement this */ + } else { + u8 prefix[] = {0xff, 0x02}; + + pr_debug("(%s): destination address non-context-based" + " multicast compression\n", __func__); + if (0 < tmp && tmp < 3) { + if (!skb->len) + goto drop; + else + prefix[1] = lowpan_fetch_skb_u8(skb); + } + + err = lowpan_uncompress_addr(skb, &hdr.daddr, prefix, + lowpan_unc_mxconf[tmp], NULL); + if (err) + goto drop; + } + } else { + pr_debug("(%s): destination address stateless compression\n", + __func__); + err = lowpan_uncompress_addr(skb, &hdr.daddr, lowpan_llprefix, + lowpan_unc_llconf[tmp], skb->data); + if (err) + goto drop; + } + + /* TODO: UDP header parse */ + + /* Not fragmented package */ + hdr.payload_len = htons(skb->len); + + pr_debug("(%s): skb headroom size = %d, data length = %d\n", __func__, + skb_headroom(skb), skb->len); + + pr_debug("(%s): IPv6 header dump:\n\tversion = %d\n\tlength = %d\n\t" + "nexthdr = 0x%02x\n\thop_lim = %d\n", __func__, hdr.version, + ntohs(hdr.payload_len), hdr.nexthdr, hdr.hop_limit); + + lowpan_raw_dump_table(__func__, "raw header dump", (u8 *)&hdr, + sizeof(hdr)); + return lowpan_skb_deliver(skb, &hdr); +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int lowpan_set_address(struct net_device *dev, void *p) +{ + struct sockaddr *sa = p; + + if (netif_running(dev)) + return -EBUSY; + + /* TODO: validate addr */ + memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); + + return 0; +} + +static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) +{ + int err = 0; + + pr_debug("(%s): package xmit\n", __func__); + + skb->dev = lowpan_dev_info(dev)->real_dev; + if (skb->dev == NULL) { + pr_debug("(%s) ERROR: no real wpan device found\n", __func__); + dev_kfree_skb(skb); + } else + err = dev_queue_xmit(skb); + + return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK); +} + +static void lowpan_dev_free(struct net_device *dev) +{ + dev_put(lowpan_dev_info(dev)->real_dev); + free_netdev(dev); +} + +static struct header_ops lowpan_header_ops = { + .create = lowpan_header_create, +}; + +static const struct net_device_ops lowpan_netdev_ops = { + .ndo_start_xmit = lowpan_xmit, + .ndo_set_mac_address = lowpan_set_address, +}; + +static void lowpan_setup(struct net_device *dev) +{ + pr_debug("(%s)\n", __func__); + + dev->addr_len = IEEE802154_ADDR_LEN; + memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN); + dev->type = ARPHRD_IEEE802154; + dev->features = NETIF_F_NO_CSUM; + /* Frame Control + Sequence Number + Address fields + Security Header */ + dev->hard_header_len = 2 + 1 + 20 + 14; + dev->needed_tailroom = 2; /* FCS */ + dev->mtu = 1281; + dev->tx_queue_len = 0; + dev->flags = IFF_NOARP | IFF_BROADCAST; + dev->watchdog_timeo = 0; + + dev->netdev_ops = &lowpan_netdev_ops; + dev->header_ops = &lowpan_header_ops; + dev->destructor = lowpan_dev_free; +} + +static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + pr_debug("(%s)\n", __func__); + + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN) + return -EINVAL; + } + return 0; +} + +static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + if (!netif_running(dev)) + goto drop; + + if (dev->type != ARPHRD_IEEE802154) + goto drop; + + /* check that it's our buffer */ + if ((skb->data[0] & 0xe0) == 0x60) + lowpan_process_data(skb); + + return NET_RX_SUCCESS; + +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static int lowpan_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_device *real_dev; + struct lowpan_dev_record *entry; + + pr_debug("(%s)\n", __func__); + + if (!tb[IFLA_LINK]) + return -EINVAL; + /* find and hold real wpan device */ + real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); + if (!real_dev) + return -ENODEV; + + lowpan_dev_info(dev)->real_dev = real_dev; + mutex_init(&lowpan_dev_info(dev)->dev_list_mtx); + + entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL); + if (!entry) { + dev_put(real_dev); + lowpan_dev_info(dev)->real_dev = NULL; + return -ENOMEM; + } + + entry->ldev = dev; + + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); + INIT_LIST_HEAD(&entry->list); + list_add_tail(&entry->list, &lowpan_devices); + mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); + + register_netdevice(dev); + + return 0; +} + +static void lowpan_dellink(struct net_device *dev, struct list_head *head) +{ + struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev); + struct net_device *real_dev = lowpan_dev->real_dev; + struct lowpan_dev_record *entry; + struct lowpan_dev_record *tmp; + + ASSERT_RTNL(); + + mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx); + list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) { + if (entry->ldev == dev) { + list_del(&entry->list); + kfree(entry); + } + } + mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx); + + mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx); + + unregister_netdevice_queue(dev, head); + + dev_put(real_dev); +} + +static struct rtnl_link_ops lowpan_link_ops __read_mostly = { + .kind = "lowpan", + .priv_size = sizeof(struct lowpan_dev_info), + .setup = lowpan_setup, + .newlink = lowpan_newlink, + .dellink = lowpan_dellink, + .validate = lowpan_validate, +}; + +static inline int __init lowpan_netlink_init(void) +{ + return rtnl_link_register(&lowpan_link_ops); +} + +static inline void __init lowpan_netlink_fini(void) +{ + rtnl_link_unregister(&lowpan_link_ops); +} + +static struct packet_type lowpan_packet_type = { + .type = __constant_htons(ETH_P_IEEE802154), + .func = lowpan_rcv, +}; + +static int __init lowpan_init_module(void) +{ + int err = 0; + + pr_debug("(%s)\n", __func__); + + err = lowpan_netlink_init(); + if (err < 0) + goto out; + + dev_add_pack(&lowpan_packet_type); +out: + return err; +} + +static void __exit lowpan_cleanup_module(void) +{ + pr_debug("(%s)\n", __func__); + + lowpan_netlink_fini(); + + dev_remove_pack(&lowpan_packet_type); +} + +module_init(lowpan_init_module); +module_exit(lowpan_cleanup_module); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("lowpan"); diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h new file mode 100644 index 0000000..5d8cf80 --- /dev/null +++ b/net/ieee802154/6lowpan.h @@ -0,0 +1,212 @@ +/* + * Copyright 2011, Siemens AG + * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> + */ + +/* + * Based on patches from Jon Smirl <jonsmirl@gmail.com> + * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* Jon's code is based on 6lowpan implementation for Contiki which is: + * Copyright (c) 2008, Swedish Institute of Computer Science. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef __6LOWPAN_H__ +#define __6LOWPAN_H__ + +/* need to know address length to manipulate with it */ +#define IEEE802154_ALEN 8 + +#define UIP_802154_SHORTADDR_LEN 2 /* compressed ipv6 address length */ +#define UIP_IPH_LEN 40 /* ipv6 fixed header size */ +#define UIP_PROTO_UDP 17 /* ipv6 next header value for UDP */ +#define UIP_FRAGH_LEN 8 /* ipv6 fragment header size */ + +/* + * ipv6 address based on mac + * second bit-flip (Universe/Local) is done according RFC2464 + */ +#define is_addr_mac_addr_based(a, m) \ + ((((a)->s6_addr[8]) == (((m)[0]) ^ 0x02)) && \ + (((a)->s6_addr[9]) == (m)[1]) && \ + (((a)->s6_addr[10]) == (m)[2]) && \ + (((a)->s6_addr[11]) == (m)[3]) && \ + (((a)->s6_addr[12]) == (m)[4]) && \ + (((a)->s6_addr[13]) == (m)[5]) && \ + (((a)->s6_addr[14]) == (m)[6]) && \ + (((a)->s6_addr[15]) == (m)[7])) + +/* ipv6 address is unspecified */ +#define is_addr_unspecified(a) \ + ((((a)->s6_addr32[0]) == 0) && \ + (((a)->s6_addr32[1]) == 0) && \ + (((a)->s6_addr32[2]) == 0) && \ + (((a)->s6_addr32[3]) == 0)) + +/* compare ipv6 addresses prefixes */ +#define ipaddr_prefixcmp(addr1, addr2, length) \ + (memcmp(addr1, addr2, length >> 3) == 0) + +/* local link, i.e. FE80::/10 */ +#define is_addr_link_local(a) (((a)->s6_addr16[0]) == 0x80FE) + +/* + * check whether we can compress the IID to 16 bits, + * it's possible for unicast adresses with first 49 bits are zero only. + */ +#define lowpan_is_iid_16_bit_compressable(a) \ + ((((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr16[5]) == 0) && \ + (((a)->s6_addr16[6]) == 0) && \ + ((((a)->s6_addr[14]) & 0x80) == 0)) + +/* multicast address */ +#define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF) + +/* check whether the 112-bit gid of the multicast address is mappable to: */ + +/* 9 bits, for FF02::1 (all nodes) and FF02::2 (all routers) addresses only. */ +#define lowpan_is_mcast_addr_compressable(a) \ + ((((a)->s6_addr16[1]) == 0) && \ + (((a)->s6_addr16[2]) == 0) && \ + (((a)->s6_addr16[3]) == 0) && \ + (((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr16[5]) == 0) && \ + (((a)->s6_addr16[6]) == 0) && \ + (((a)->s6_addr[14]) == 0) && \ + ((((a)->s6_addr[15]) == 1) || (((a)->s6_addr[15]) == 2))) + +/* 48 bits, FFXX::00XX:XXXX:XXXX */ +#define lowpan_is_mcast_addr_compressable48(a) \ + ((((a)->s6_addr16[1]) == 0) && \ + (((a)->s6_addr16[2]) == 0) && \ + (((a)->s6_addr16[3]) == 0) && \ + (((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr[10]) == 0)) + +/* 32 bits, FFXX::00XX:XXXX */ +#define lowpan_is_mcast_addr_compressable32(a) \ + ((((a)->s6_addr16[1]) == 0) && \ + (((a)->s6_addr16[2]) == 0) && \ + (((a)->s6_addr16[3]) == 0) && \ + (((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr16[5]) == 0) && \ + (((a)->s6_addr[12]) == 0)) + +/* 8 bits, FF02::00XX */ +#define lowpan_is_mcast_addr_compressable8(a) \ + ((((a)->s6_addr[1]) == 2) && \ + (((a)->s6_addr16[1]) == 0) && \ + (((a)->s6_addr16[2]) == 0) && \ + (((a)->s6_addr16[3]) == 0) && \ + (((a)->s6_addr16[4]) == 0) && \ + (((a)->s6_addr16[5]) == 0) && \ + (((a)->s6_addr16[6]) == 0) && \ + (((a)->s6_addr[14]) == 0)) + +#define lowpan_is_addr_broadcast(a) \ + ((((a)[0]) == 0xFF) && \ + (((a)[1]) == 0xFF) && \ + (((a)[2]) == 0xFF) && \ + (((a)[3]) == 0xFF) && \ + (((a)[4]) == 0xFF) && \ + (((a)[5]) == 0xFF) && \ + (((a)[6]) == 0xFF) && \ + (((a)[7]) == 0xFF)) + +#define LOWPAN_DISPATCH_IPV6 0x41 /* 01000001 = 65 */ +#define LOWPAN_DISPATCH_HC1 0x42 /* 01000010 = 66 */ +#define LOWPAN_DISPATCH_IPHC 0x60 /* 011xxxxx = ... */ +#define LOWPAN_DISPATCH_FRAG1 0xc0 /* 11000xxx */ +#define LOWPAN_DISPATCH_FRAGN 0xe0 /* 11100xxx */ + +/* + * Values of fields within the IPHC encoding first byte + * (C stands for compressed and I for inline) + */ +#define LOWPAN_IPHC_TF 0x18 + +#define LOWPAN_IPHC_FL_C 0x10 +#define LOWPAN_IPHC_TC_C 0x08 +#define LOWPAN_IPHC_NH_C 0x04 +#define LOWPAN_IPHC_TTL_1 0x01 +#define LOWPAN_IPHC_TTL_64 0x02 +#define LOWPAN_IPHC_TTL_255 0x03 +#define LOWPAN_IPHC_TTL_I 0x00 + + +/* Values of fields within the IPHC encoding second byte */ +#define LOWPAN_IPHC_CID 0x80 + +#define LOWPAN_IPHC_SAC 0x40 +#define LOWPAN_IPHC_SAM_00 0x00 +#define LOWPAN_IPHC_SAM_01 0x10 +#define LOWPAN_IPHC_SAM_10 0x20 +#define LOWPAN_IPHC_SAM 0x30 + +#define LOWPAN_IPHC_SAM_BIT 4 + +#define LOWPAN_IPHC_M 0x08 +#define LOWPAN_IPHC_DAC 0x04 +#define LOWPAN_IPHC_DAM_00 0x00 +#define LOWPAN_IPHC_DAM_01 0x01 +#define LOWPAN_IPHC_DAM_10 0x02 +#define LOWPAN_IPHC_DAM_11 0x03 + +#define LOWPAN_IPHC_DAM_BIT 0 +/* + * LOWPAN_UDP encoding (works together with IPHC) + */ +#define LOWPAN_NHC_UDP_MASK 0xF8 +#define LOWPAN_NHC_UDP_ID 0xF0 +#define LOWPAN_NHC_UDP_CHECKSUMC 0x04 +#define LOWPAN_NHC_UDP_CHECKSUMI 0x00 + +/* values for port compression, _with checksum_ ie bit 5 set to 0 */ +#define LOWPAN_NHC_UDP_CS_P_00 0xF0 /* all inline */ +#define LOWPAN_NHC_UDP_CS_P_01 0xF1 /* source 16bit inline, + dest = 0xF0 + 8 bit inline */ +#define LOWPAN_NHC_UDP_CS_P_10 0xF2 /* source = 0xF0 + 8bit inline, + dest = 16 bit inline */ +#define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */ + +#endif /* __6LOWPAN_H__ */ diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig index 1c1de97..7dee650 100644 --- a/net/ieee802154/Kconfig +++ b/net/ieee802154/Kconfig @@ -10,3 +10,9 @@ config IEEE802154 Say Y here to compile LR-WPAN support into the kernel or say M to compile it as modules. + +config IEEE802154_6LOWPAN + tristate "6lowpan support over IEEE 802.15.4" + depends on IEEE802154 && IPV6 + ---help--- + IPv6 compression over IEEE 802.15.4. diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index 5761185..d7716d6 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -1,3 +1,5 @@ -obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o -ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o -af_802154-y := af_ieee802154.o raw.o dgram.o +obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o +obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan.o + +ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o +af_802154-y := af_ieee802154.o raw.o dgram.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1b745d4..dd2b947 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -466,8 +466,13 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; if (addr->sin_family != AF_INET) { + /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET) + * only if s_addr is INADDR_ANY. + */ err = -EAFNOSUPPORT; - goto out; + if (addr->sin_family != AF_UNSPEC || + addr->sin_addr.s_addr != htonl(INADDR_ANY)) + goto out; } chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index bc19bd0..c6b5092 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -258,7 +258,7 @@ static struct in_device *inetdev_init(struct net_device *dev) ip_mc_up(in_dev); /* we can receive as soon as ip_ptr is set -- do this last */ - rcu_assign_pointer(dev->ip_ptr, in_dev); + RCU_INIT_POINTER(dev->ip_ptr, in_dev); out: return in_dev; out_kfree: @@ -291,7 +291,7 @@ static void inetdev_destroy(struct in_device *in_dev) inet_free_ifa(ifa); } - rcu_assign_pointer(dev->ip_ptr, NULL); + RCU_INIT_POINTER(dev->ip_ptr, NULL); devinet_sysctl_unregister(in_dev); neigh_parms_release(&arp_tbl, in_dev->arp_parms); @@ -1175,7 +1175,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_REGISTER: printk(KERN_DEBUG "inetdev_event: bug\n"); - rcu_assign_pointer(dev->ip_ptr, NULL); + RCU_INIT_POINTER(dev->ip_ptr, NULL); break; case NETDEV_UP: if (!inetdev_valid_mtu(dev->mtu)) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 33e2c35..80106d8 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -142,6 +142,14 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { }; /* Release a nexthop info record */ +static void free_fib_info_rcu(struct rcu_head *head) +{ + struct fib_info *fi = container_of(head, struct fib_info, rcu); + + if (fi->fib_metrics != (u32 *) dst_default_metrics) + kfree(fi->fib_metrics); + kfree(fi); +} void free_fib_info(struct fib_info *fi) { @@ -156,7 +164,7 @@ void free_fib_info(struct fib_info *fi) } endfor_nexthops(fi); fib_info_cnt--; release_net(fi->fib_net); - kfree_rcu(fi, rcu); + call_rcu(&fi->rcu, free_fib_info_rcu); } void fib_release_info(struct fib_info *fi) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index de9e297..89d6f71 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -204,7 +204,7 @@ static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) return (struct tnode *)(parent & ~NODE_TYPE_MASK); } -/* Same as rcu_assign_pointer +/* Same as RCU_INIT_POINTER * but that macro() assumes that value is a pointer. */ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) @@ -528,7 +528,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node * if (n) node_set_parent(n, tn); - rcu_assign_pointer(tn->child[i], n); + RCU_INIT_POINTER(tn->child[i], n); } #define MAX_WORK 10 @@ -1014,7 +1014,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) tp = node_parent((struct rt_trie_node *) tn); if (!tp) - rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); + RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn); tnode_free_flush(); if (!tp) @@ -1026,7 +1026,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); - rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); + RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn); tnode_free_flush(); } @@ -1163,7 +1163,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)tn); } else { - rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); + RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn); tp = tn; } } @@ -1621,7 +1621,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) put_child(t, (struct tnode *)tp, cindex, NULL); trie_rebalance(t, tp); } else - rcu_assign_pointer(t->trie, NULL); + RCU_INIT_POINTER(t->trie, NULL); free_leaf(l); } diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index dbfc21d..8cb1ebb 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -34,7 +34,7 @@ int gre_add_protocol(const struct gre_protocol *proto, u8 version) if (gre_proto[version]) goto err_out_unlock; - rcu_assign_pointer(gre_proto[version], proto); + RCU_INIT_POINTER(gre_proto[version], proto); spin_unlock(&gre_proto_lock); return 0; @@ -54,7 +54,7 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) if (rcu_dereference_protected(gre_proto[version], lockdep_is_held(&gre_proto_lock)) != proto) goto err_out_unlock; - rcu_assign_pointer(gre_proto[version], NULL); + RCU_INIT_POINTER(gre_proto[version], NULL); spin_unlock(&gre_proto_lock); synchronize_rcu(); return 0; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f1d27f6..c7472ef 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -767,7 +767,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) break; for (i=0; i<nsrcs; i++) { /* skip inactive filters */ - if (pmc->sfcount[MCAST_INCLUDE] || + if (psf->sf_count[MCAST_INCLUDE] || pmc->sfcount[MCAST_EXCLUDE] != psf->sf_count[MCAST_EXCLUDE]) continue; @@ -1009,7 +1009,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr) /* Checking for IFF_MULTICAST here is WRONG-WRONG-WRONG. We will get multicast token leakage, when IFF_MULTICAST - is changed. This check should be done in dev->set_multicast_list + is changed. This check should be done in ndo_set_rx_mode routine. Something sort of: if (dev->mc_list && dev->flags&IFF_MULTICAST) { do it; } --ANK @@ -1242,7 +1242,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) im->next_rcu = in_dev->mc_list; in_dev->mc_count++; - rcu_assign_pointer(in_dev->mc_list, im); + RCU_INIT_POINTER(in_dev->mc_list, im); #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); @@ -1718,7 +1718,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->sfcount[sfmode]--; for (j=0; j<i; j++) - (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[i]); + (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]); } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { #ifdef CONFIG_IP_MULTICAST struct ip_sf_list *psf; @@ -1813,7 +1813,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) iml->next_rcu = inet->mc_list; iml->sflist = NULL; iml->sfmode = MCAST_EXCLUDE; - rcu_assign_pointer(inet->mc_list, iml); + RCU_INIT_POINTER(inet->mc_list, iml); ip_mc_inc_group(in_dev, addr); err = 0; done: @@ -1835,7 +1835,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, } err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, iml->sfmode, psf->sl_count, psf->sl_addr, 0); - rcu_assign_pointer(iml->sflist, NULL); + RCU_INIT_POINTER(iml->sflist, NULL); /* decrease mem now to avoid the memleak warning */ atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc); kfree_rcu(psf, rcu); @@ -2000,7 +2000,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } - rcu_assign_pointer(pmc->sflist, newpsl); + RCU_INIT_POINTER(pmc->sflist, newpsl); psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ @@ -2103,7 +2103,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) } else (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 0, NULL, 0); - rcu_assign_pointer(pmc->sflist, newpsl); + RCU_INIT_POINTER(pmc->sflist, newpsl); pmc->sfmode = msf->imsf_fmode; err = 0; done: diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 3c0369a..984ec65 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -21,6 +21,7 @@ #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> +#include <net/secure_seq.h> #include <net/ip.h> /* diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index ef7ae60..8e6be5a 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -433,7 +433,7 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, if (!lro_mgr->get_frag_header || lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, (void *)&tcph, &flags, priv)) { - mac_hdr = page_address(frags->page) + frags->page_offset; + mac_hdr = skb_frag_address(frags); goto out1; } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e382138..86f13c67 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -19,6 +19,7 @@ #include <linux/net.h> #include <net/ip.h> #include <net/inetpeer.h> +#include <net/secure_seq.h> /* * Theory of operations. diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ccaaa85..ae3bb14 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -122,6 +122,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(newskb)); + skb_dst_force(newskb); netif_rx_ni(newskb); return 0; } @@ -204,9 +205,15 @@ static inline int ip_finish_output2(struct sk_buff *skb) skb = skb2; } + rcu_read_lock(); neigh = dst_get_neighbour(dst); - if (neigh) - return neigh_output(neigh, skb); + if (neigh) { + int res = neigh_output(neigh, skb); + + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); if (net_ratelimit()) printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); @@ -982,13 +989,13 @@ alloc_new_skb: if (page && (left = PAGE_SIZE - off) > 0) { if (copy >= left) copy = left; - if (page != frag->page) { + if (page != skb_frag_page(frag)) { if (i == MAX_SKB_FRAGS) { err = -EMSGSIZE; goto error; } - get_page(page); skb_fill_page_desc(skb, i, page, off, 0); + skb_frag_ref(skb, i); frag = &skb_shinfo(skb)->frags[i]; } } else if (i < MAX_SKB_FRAGS) { @@ -1008,7 +1015,8 @@ alloc_new_skb: err = -EMSGSIZE; goto error; } - if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { + if (getfrag(from, skb_frag_address(frag)+frag->size, + offset, copy, skb->len, skb) < 0) { err = -EFAULT; goto error; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ab0c9ef..8905e92 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1067,7 +1067,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt); */ static int do_ip_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) + char __user *optval, int __user *optlen, unsigned flags) { struct inet_sock *inet = inet_sk(sk); int val; @@ -1240,7 +1240,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, msg.msg_control = optval; msg.msg_controllen = len; - msg.msg_flags = 0; + msg.msg_flags = flags; if (inet->cmsg_flags & IP_CMSG_PKTINFO) { struct in_pktinfo info; @@ -1294,7 +1294,7 @@ int ip_getsockopt(struct sock *sk, int level, { int err; - err = do_ip_getsockopt(sk, level, optname, optval, optlen); + err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && @@ -1327,7 +1327,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, return compat_mc_getsockopt(sk, level, optname, optval, optlen, ip_getsockopt); - err = do_ip_getsockopt(sk, level, optname, optval, optlen); + err = do_ip_getsockopt(sk, level, optname, optval, optlen, + MSG_CMSG_COMPAT); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 378b20b..065effd 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -231,7 +231,7 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { if (t == iter) { - rcu_assign_pointer(*tp, t->next); + RCU_INIT_POINTER(*tp, t->next); break; } } @@ -241,8 +241,8 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) { struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t); - rcu_assign_pointer(t->next, rtnl_dereference(*tp)); - rcu_assign_pointer(*tp, t); + RCU_INIT_POINTER(t->next, rtnl_dereference(*tp)); + RCU_INIT_POINTER(*tp, t); } static struct ip_tunnel * ipip_tunnel_locate(struct net *net, @@ -301,7 +301,7 @@ static void ipip_tunnel_uninit(struct net_device *dev) struct ipip_net *ipn = net_generic(net, ipip_net_id); if (dev == ipn->fb_tunnel_dev) - rcu_assign_pointer(ipn->tunnels_wc[0], NULL); + RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL); else ipip_tunnel_unlink(ipn, netdev_priv(dev)); dev_put(dev); @@ -791,7 +791,7 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev) return -ENOMEM; dev_hold(dev); - rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); + RCU_INIT_POINTER(ipn->tunnels_wc[0], tunnel); return 0; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 58e8791..6164e98 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1176,7 +1176,7 @@ static void mrtsock_destruct(struct sock *sk) ipmr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; - rcu_assign_pointer(mrt->mroute_sk, NULL); + RCU_INIT_POINTER(mrt->mroute_sk, NULL); mroute_clean_tables(mrt); } } @@ -1203,7 +1203,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi return -ENOENT; if (optname != MRT_INIT) { - if (sk != rcu_dereference_raw(mrt->mroute_sk) && + if (sk != rcu_access_pointer(mrt->mroute_sk) && !capable(CAP_NET_ADMIN)) return -EACCES; } @@ -1224,13 +1224,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { - rcu_assign_pointer(mrt->mroute_sk, sk); + RCU_INIT_POINTER(mrt->mroute_sk, sk); IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; } rtnl_unlock(); return ret; case MRT_DONE: - if (sk != rcu_dereference_raw(mrt->mroute_sk)) + if (sk != rcu_access_pointer(mrt->mroute_sk)) return -EACCES; return ip_ra_control(sk, 0, NULL); case MRT_ADD_VIF: diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 2e97e3e..929b27b 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -18,17 +18,15 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) struct rtable *rt; struct flowi4 fl4 = {}; __be32 saddr = iph->saddr; - __u8 flags = 0; + __u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; unsigned int hh_len; - if (!skb->sk && addr_type != RTN_LOCAL) { - if (addr_type == RTN_UNSPEC) - addr_type = inet_addr_type(net, saddr); - if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) - flags |= FLOWI_FLAG_ANYSRC; - else - saddr = 0; - } + if (addr_type == RTN_UNSPEC) + addr_type = inet_addr_type(net, saddr); + if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) + flags |= FLOWI_FLAG_ANYSRC; + else + saddr = 0; /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. @@ -38,7 +36,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) fl4.flowi4_tos = RT_TOS(iph->tos); fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl4.flowi4_mark = skb->mark; - fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags; + fl4.flowi4_flags = flags; rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return -1; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 5c9b9d9..e59aabd 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: + kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip_queue: error creating packet message\n"); return NULL; @@ -313,7 +314,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) { struct nf_queue_entry *entry; - if (vmsg->value > NF_MAX_VERDICT) + if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN) return -EINVAL; entry = ipq_find_dequeue_entry(vmsg->id); @@ -358,12 +359,9 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg, break; case IPQM_VERDICT: - if (pmsg->msg.verdict.value > NF_MAX_VERDICT) - status = -EINVAL; - else - status = ipq_set_verdict(&pmsg->msg.verdict, - len - sizeof(*pmsg)); - break; + status = ipq_set_verdict(&pmsg->msg.verdict, + len - sizeof(*pmsg)); + break; default: status = -EINVAL; } diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 703f366f..7b22382 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -70,14 +70,14 @@ static unsigned int help(struct sk_buff *skb, static void __exit nf_nat_amanda_fini(void) { - rcu_assign_pointer(nf_nat_amanda_hook, NULL); + RCU_INIT_POINTER(nf_nat_amanda_hook, NULL); synchronize_rcu(); } static int __init nf_nat_amanda_init(void) { BUG_ON(nf_nat_amanda_hook != NULL); - rcu_assign_pointer(nf_nat_amanda_hook, help); + RCU_INIT_POINTER(nf_nat_amanda_hook, help); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 3346de5..447bc5c 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -514,7 +514,7 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto) ret = -EBUSY; goto out; } - rcu_assign_pointer(nf_nat_protos[proto->protonum], proto); + RCU_INIT_POINTER(nf_nat_protos[proto->protonum], proto); out: spin_unlock_bh(&nf_nat_lock); return ret; @@ -525,7 +525,7 @@ EXPORT_SYMBOL(nf_nat_protocol_register); void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) { spin_lock_bh(&nf_nat_lock); - rcu_assign_pointer(nf_nat_protos[proto->protonum], + RCU_INIT_POINTER(nf_nat_protos[proto->protonum], &nf_nat_unknown_protocol); spin_unlock_bh(&nf_nat_lock); synchronize_rcu(); @@ -736,10 +736,10 @@ static int __init nf_nat_init(void) /* Sew in builtin protocols. */ spin_lock_bh(&nf_nat_lock); for (i = 0; i < MAX_IP_NAT_PROTO; i++) - rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol); - rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); - rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); - rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); + RCU_INIT_POINTER(nf_nat_protos[i], &nf_nat_unknown_protocol); + RCU_INIT_POINTER(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); + RCU_INIT_POINTER(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); + RCU_INIT_POINTER(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); spin_unlock_bh(&nf_nat_lock); /* Initialize fake conntrack so that NAT will skip it */ @@ -748,12 +748,12 @@ static int __init nf_nat_init(void) l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); BUG_ON(nf_nat_seq_adjust_hook != NULL); - rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); + RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); - rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, + RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, nfnetlink_parse_nat_setup); BUG_ON(nf_ct_nat_offset != NULL); - rcu_assign_pointer(nf_ct_nat_offset, nf_nat_get_offset); + RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset); return 0; cleanup_extend: @@ -766,9 +766,9 @@ static void __exit nf_nat_cleanup(void) unregister_pernet_subsys(&nf_nat_net_ops); nf_ct_l3proto_put(l3proto); nf_ct_extend_unregister(&nat_extend); - rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); - rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL); - rcu_assign_pointer(nf_ct_nat_offset, NULL); + RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL); + RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); + RCU_INIT_POINTER(nf_ct_nat_offset, NULL); synchronize_net(); } diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index dc73abb..e462a95 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -113,14 +113,14 @@ out: static void __exit nf_nat_ftp_fini(void) { - rcu_assign_pointer(nf_nat_ftp_hook, NULL); + RCU_INIT_POINTER(nf_nat_ftp_hook, NULL); synchronize_rcu(); } static int __init nf_nat_ftp_init(void) { BUG_ON(nf_nat_ftp_hook != NULL); - rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp); + RCU_INIT_POINTER(nf_nat_ftp_hook, nf_nat_ftp); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 790f316..b9a1136 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -581,30 +581,30 @@ static int __init init(void) BUG_ON(nat_callforwarding_hook != NULL); BUG_ON(nat_q931_hook != NULL); - rcu_assign_pointer(set_h245_addr_hook, set_h245_addr); - rcu_assign_pointer(set_h225_addr_hook, set_h225_addr); - rcu_assign_pointer(set_sig_addr_hook, set_sig_addr); - rcu_assign_pointer(set_ras_addr_hook, set_ras_addr); - rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp); - rcu_assign_pointer(nat_t120_hook, nat_t120); - rcu_assign_pointer(nat_h245_hook, nat_h245); - rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding); - rcu_assign_pointer(nat_q931_hook, nat_q931); + RCU_INIT_POINTER(set_h245_addr_hook, set_h245_addr); + RCU_INIT_POINTER(set_h225_addr_hook, set_h225_addr); + RCU_INIT_POINTER(set_sig_addr_hook, set_sig_addr); + RCU_INIT_POINTER(set_ras_addr_hook, set_ras_addr); + RCU_INIT_POINTER(nat_rtp_rtcp_hook, nat_rtp_rtcp); + RCU_INIT_POINTER(nat_t120_hook, nat_t120); + RCU_INIT_POINTER(nat_h245_hook, nat_h245); + RCU_INIT_POINTER(nat_callforwarding_hook, nat_callforwarding); + RCU_INIT_POINTER(nat_q931_hook, nat_q931); return 0; } /****************************************************************************/ static void __exit fini(void) { - rcu_assign_pointer(set_h245_addr_hook, NULL); - rcu_assign_pointer(set_h225_addr_hook, NULL); - rcu_assign_pointer(set_sig_addr_hook, NULL); - rcu_assign_pointer(set_ras_addr_hook, NULL); - rcu_assign_pointer(nat_rtp_rtcp_hook, NULL); - rcu_assign_pointer(nat_t120_hook, NULL); - rcu_assign_pointer(nat_h245_hook, NULL); - rcu_assign_pointer(nat_callforwarding_hook, NULL); - rcu_assign_pointer(nat_q931_hook, NULL); + RCU_INIT_POINTER(set_h245_addr_hook, NULL); + RCU_INIT_POINTER(set_h225_addr_hook, NULL); + RCU_INIT_POINTER(set_sig_addr_hook, NULL); + RCU_INIT_POINTER(set_ras_addr_hook, NULL); + RCU_INIT_POINTER(nat_rtp_rtcp_hook, NULL); + RCU_INIT_POINTER(nat_t120_hook, NULL); + RCU_INIT_POINTER(nat_h245_hook, NULL); + RCU_INIT_POINTER(nat_callforwarding_hook, NULL); + RCU_INIT_POINTER(nat_q931_hook, NULL); synchronize_rcu(); } diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index 535e1a8..979ae16 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -75,14 +75,14 @@ static unsigned int help(struct sk_buff *skb, static void __exit nf_nat_irc_fini(void) { - rcu_assign_pointer(nf_nat_irc_hook, NULL); + RCU_INIT_POINTER(nf_nat_irc_hook, NULL); synchronize_rcu(); } static int __init nf_nat_irc_init(void) { BUG_ON(nf_nat_irc_hook != NULL); - rcu_assign_pointer(nf_nat_irc_hook, help); + RCU_INIT_POINTER(nf_nat_irc_hook, help); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 4c06003..3e8284b 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -282,25 +282,25 @@ static int __init nf_nat_helper_pptp_init(void) nf_nat_need_gre(); BUG_ON(nf_nat_pptp_hook_outbound != NULL); - rcu_assign_pointer(nf_nat_pptp_hook_outbound, pptp_outbound_pkt); + RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt); BUG_ON(nf_nat_pptp_hook_inbound != NULL); - rcu_assign_pointer(nf_nat_pptp_hook_inbound, pptp_inbound_pkt); + RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, pptp_inbound_pkt); BUG_ON(nf_nat_pptp_hook_exp_gre != NULL); - rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, pptp_exp_gre); + RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, pptp_exp_gre); BUG_ON(nf_nat_pptp_hook_expectfn != NULL); - rcu_assign_pointer(nf_nat_pptp_hook_expectfn, pptp_nat_expected); + RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, pptp_nat_expected); return 0; } static void __exit nf_nat_helper_pptp_fini(void) { - rcu_assign_pointer(nf_nat_pptp_hook_expectfn, NULL); - rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, NULL); - rcu_assign_pointer(nf_nat_pptp_hook_inbound, NULL); - rcu_assign_pointer(nf_nat_pptp_hook_outbound, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, NULL); synchronize_rcu(); } diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 3e61faf..f52d41e 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -12,6 +12,7 @@ #include <linux/ip.h> #include <linux/netfilter.h> +#include <net/secure_seq.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_rule.h> diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index e40cf78..78844d9 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -528,13 +528,13 @@ err1: static void __exit nf_nat_sip_fini(void) { - rcu_assign_pointer(nf_nat_sip_hook, NULL); - rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, NULL); - rcu_assign_pointer(nf_nat_sip_expect_hook, NULL); - rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL); - rcu_assign_pointer(nf_nat_sdp_port_hook, NULL); - rcu_assign_pointer(nf_nat_sdp_session_hook, NULL); - rcu_assign_pointer(nf_nat_sdp_media_hook, NULL); + RCU_INIT_POINTER(nf_nat_sip_hook, NULL); + RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL); + RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL); + RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL); + RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL); + RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL); + RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL); synchronize_rcu(); } @@ -547,13 +547,13 @@ static int __init nf_nat_sip_init(void) BUG_ON(nf_nat_sdp_port_hook != NULL); BUG_ON(nf_nat_sdp_session_hook != NULL); BUG_ON(nf_nat_sdp_media_hook != NULL); - rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); - rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust); - rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect); - rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); - rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port); - rcu_assign_pointer(nf_nat_sdp_session_hook, ip_nat_sdp_session); - rcu_assign_pointer(nf_nat_sdp_media_hook, ip_nat_sdp_media); + RCU_INIT_POINTER(nf_nat_sip_hook, ip_nat_sip); + RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust); + RCU_INIT_POINTER(nf_nat_sip_expect_hook, ip_nat_sip_expect); + RCU_INIT_POINTER(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); + RCU_INIT_POINTER(nf_nat_sdp_port_hook, ip_nat_sdp_port); + RCU_INIT_POINTER(nf_nat_sdp_session_hook, ip_nat_sdp_session); + RCU_INIT_POINTER(nf_nat_sdp_media_hook, ip_nat_sdp_media); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 076b7c8..d1cb412 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1310,7 +1310,7 @@ static int __init nf_nat_snmp_basic_init(void) int ret = 0; BUG_ON(nf_nat_snmp_hook != NULL); - rcu_assign_pointer(nf_nat_snmp_hook, help); + RCU_INIT_POINTER(nf_nat_snmp_hook, help); ret = nf_conntrack_helper_register(&snmp_trap_helper); if (ret < 0) { @@ -1322,7 +1322,7 @@ static int __init nf_nat_snmp_basic_init(void) static void __exit nf_nat_snmp_basic_fini(void) { - rcu_assign_pointer(nf_nat_snmp_hook, NULL); + RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); nf_conntrack_helper_unregister(&snmp_trap_helper); } diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index a6e606e..9290048 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -284,7 +284,7 @@ static int __init nf_nat_standalone_init(void) #ifdef CONFIG_XFRM BUG_ON(ip_nat_decode_session != NULL); - rcu_assign_pointer(ip_nat_decode_session, nat_decode_session); + RCU_INIT_POINTER(ip_nat_decode_session, nat_decode_session); #endif ret = nf_nat_rule_init(); if (ret < 0) { @@ -302,7 +302,7 @@ static int __init nf_nat_standalone_init(void) nf_nat_rule_cleanup(); cleanup_decode_session: #ifdef CONFIG_XFRM - rcu_assign_pointer(ip_nat_decode_session, NULL); + RCU_INIT_POINTER(ip_nat_decode_session, NULL); synchronize_net(); #endif return ret; @@ -313,7 +313,7 @@ static void __exit nf_nat_standalone_fini(void) nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); nf_nat_rule_cleanup(); #ifdef CONFIG_XFRM - rcu_assign_pointer(ip_nat_decode_session, NULL); + RCU_INIT_POINTER(ip_nat_decode_session, NULL); synchronize_net(); #endif /* Conntrack caches are unregistered in nf_conntrack_cleanup */ diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index 7274a43..a2901bf 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -36,14 +36,14 @@ static unsigned int help(struct sk_buff *skb, static void __exit nf_nat_tftp_fini(void) { - rcu_assign_pointer(nf_nat_tftp_hook, NULL); + RCU_INIT_POINTER(nf_nat_tftp_hook, NULL); synchronize_rcu(); } static int __init nf_nat_tftp_init(void) { BUG_ON(nf_nat_tftp_hook != NULL); - rcu_assign_pointer(nf_nat_tftp_hook, help); + RCU_INIT_POINTER(nf_nat_tftp_hook, help); return 0; } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index b14ec7d..4bfad5d 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -254,6 +254,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW), + SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES), + SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 1457acb..61714bd 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -563,7 +563,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0); + inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, + daddr, saddr, 0, 0); if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1730689..26c77e1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -109,6 +109,7 @@ #include <linux/sysctl.h> #endif #include <net/atmclip.h> +#include <net/secure_seq.h> #define RT_FL_TOS(oldflp4) \ ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) @@ -119,7 +120,6 @@ static int ip_rt_max_size; static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; -static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_redirect_number __read_mostly = 9; static int ip_rt_redirect_load __read_mostly = HZ / 50; @@ -323,7 +323,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) struct rtable *r = NULL; for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { - if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)) + if (!rcu_access_pointer(rt_hash_table[st->bucket].chain)) continue; rcu_read_lock_bh(); r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); @@ -349,7 +349,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, do { if (--st->bucket < 0) return NULL; - } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)); + } while (!rcu_access_pointer(rt_hash_table[st->bucket].chain)); rcu_read_lock_bh(); r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); } @@ -721,7 +721,7 @@ static inline bool compare_hash_inputs(const struct rtable *rt1, { return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | - (rt1->rt_iif ^ rt2->rt_iif)) == 0); + (rt1->rt_route_iif ^ rt2->rt_route_iif)) == 0); } static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) @@ -730,8 +730,8 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | (rt1->rt_mark ^ rt2->rt_mark) | (rt1->rt_key_tos ^ rt2->rt_key_tos) | - (rt1->rt_oif ^ rt2->rt_oif) | - (rt1->rt_iif ^ rt2->rt_iif)) == 0; + (rt1->rt_route_iif ^ rt2->rt_route_iif) | + (rt1->rt_oif ^ rt2->rt_oif)) == 0; } static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) @@ -760,7 +760,7 @@ static void rt_do_flush(struct net *net, int process_context) if (process_context && need_resched()) cond_resched(); - rth = rcu_dereference_raw(rt_hash_table[i].chain); + rth = rcu_access_pointer(rt_hash_table[i].chain); if (!rth) continue; @@ -1628,16 +1628,18 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) { struct rtable *rt = (struct rtable *) dst; __be32 orig_gw = rt->rt_gateway; - struct neighbour *n; + struct neighbour *n, *old_n; dst_confirm(&rt->dst); - neigh_release(dst_get_neighbour(&rt->dst)); - dst_set_neighbour(&rt->dst, NULL); - rt->rt_gateway = peer->redirect_learned.a4; - rt_bind_neighbour(rt); - n = dst_get_neighbour(&rt->dst); + + n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); + if (IS_ERR(n)) + return PTR_ERR(n); + old_n = xchg(&rt->dst._neighbour, n); + if (old_n) + neigh_release(old_n); if (!n || !(n->nud_state & NUD_VALID)) { if (n) neigh_event_send(n, NULL); @@ -2317,8 +2319,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth = rcu_dereference(rth->dst.rt_next)) { if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | - (rth->rt_iif ^ iif) | - rth->rt_oif | + (rth->rt_route_iif ^ iif) | (rth->rt_key_tos ^ tos)) == 0 && rth->rt_mark == skb->mark && net_eq(dev_net(rth->dst.dev), net) && @@ -3119,13 +3120,6 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec_jiffies, }, { - .procname = "gc_interval", - .data = &ip_rt_gc_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { .procname = "redirect_load", .data = &ip_rt_redirect_load, .maxlen = sizeof(int), diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 92bb943..3bc5c8f 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -276,7 +276,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, int mss; struct rtable *rt; __u8 rcv_wscale; - bool ecn_ok; + bool ecn_ok = false; if (!sysctl_tcp_syncookies || !th->ack || th->rst) goto out; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 46febca..4c0da24 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -524,7 +524,7 @@ EXPORT_SYMBOL(tcp_ioctl); static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { - TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; tp->pushed_seq = tp->write_seq; } @@ -540,7 +540,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) skb->csum = 0; tcb->seq = tcb->end_seq = tp->write_seq; - tcb->flags = TCPHDR_ACK; + tcb->tcp_flags = TCPHDR_ACK; tcb->sacked = 0; skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); @@ -830,7 +830,7 @@ new_segment: skb_shinfo(skb)->gso_segs = 0; if (!copied) - TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; copied += copy; poffset += copy; @@ -1074,7 +1074,7 @@ new_segment: } if (!copied) - TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; @@ -2455,8 +2455,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale; } - if (tp->ecn_flags&TCP_ECN_OK) + if (tp->ecn_flags & TCP_ECN_OK) info->tcpi_options |= TCPI_OPT_ECN; + if (tp->ecn_flags & TCP_ECN_SEEN) + info->tcpi_options |= TCPI_OPT_ECN_SEEN; info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato); @@ -2857,26 +2859,25 @@ EXPORT_SYMBOL(tcp_gro_complete); #ifdef CONFIG_TCP_MD5SIG static unsigned long tcp_md5sig_users; -static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool; +static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool; static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); -static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool) +static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) { int cpu; + for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu); - if (p) { - if (p->md5_desc.tfm) - crypto_free_hash(p->md5_desc.tfm); - kfree(p); - } + struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu); + + if (p->md5_desc.tfm) + crypto_free_hash(p->md5_desc.tfm); } free_percpu(pool); } void tcp_free_md5sig_pool(void) { - struct tcp_md5sig_pool * __percpu *pool = NULL; + struct tcp_md5sig_pool __percpu *pool = NULL; spin_lock_bh(&tcp_md5sig_pool_lock); if (--tcp_md5sig_users == 0) { @@ -2889,30 +2890,24 @@ void tcp_free_md5sig_pool(void) } EXPORT_SYMBOL(tcp_free_md5sig_pool); -static struct tcp_md5sig_pool * __percpu * +static struct tcp_md5sig_pool __percpu * __tcp_alloc_md5sig_pool(struct sock *sk) { int cpu; - struct tcp_md5sig_pool * __percpu *pool; + struct tcp_md5sig_pool __percpu *pool; - pool = alloc_percpu(struct tcp_md5sig_pool *); + pool = alloc_percpu(struct tcp_md5sig_pool); if (!pool) return NULL; for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p; struct crypto_hash *hash; - p = kzalloc(sizeof(*p), sk->sk_allocation); - if (!p) - goto out_free; - *per_cpu_ptr(pool, cpu) = p; - hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); if (!hash || IS_ERR(hash)) goto out_free; - p->md5_desc.tfm = hash; + per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; } return pool; out_free: @@ -2920,9 +2915,9 @@ out_free: return NULL; } -struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk) +struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk) { - struct tcp_md5sig_pool * __percpu *pool; + struct tcp_md5sig_pool __percpu *pool; int alloc = 0; retry: @@ -2941,7 +2936,7 @@ retry: if (alloc) { /* we cannot hold spinlock here because this may sleep. */ - struct tcp_md5sig_pool * __percpu *p; + struct tcp_md5sig_pool __percpu *p; p = __tcp_alloc_md5sig_pool(sk); spin_lock_bh(&tcp_md5sig_pool_lock); @@ -2974,7 +2969,7 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool); */ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { - struct tcp_md5sig_pool * __percpu *p; + struct tcp_md5sig_pool __percpu *p; local_bh_disable(); @@ -2985,7 +2980,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) spin_unlock(&tcp_md5sig_pool_lock); if (p) - return *this_cpu_ptr(p); + return this_cpu_ptr(p); local_bh_enable(); return NULL; @@ -3035,7 +3030,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, for (i = 0; i < shi->nr_frags; ++i) { const struct skb_frag_struct *f = &shi->frags[i]; - sg_set_page(&sg, f->page, f->size, f->page_offset); + struct page *page = skb_frag_page(f); + sg_set_page(&sg, page, f->size, f->page_offset); if (crypto_hash_update(desc, &sg, f->size)) return 1; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ea0d218..81cae64 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -217,16 +217,25 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp) tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; } -static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) +static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb) { - if (tp->ecn_flags & TCP_ECN_OK) { - if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) - tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + if (!(tp->ecn_flags & TCP_ECN_OK)) + return; + + switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) { + case INET_ECN_NOT_ECT: /* Funny extension: if ECT is not set on a segment, - * it is surely retransmit. It is not in ECN RFC, - * but Linux follows this rule. */ - else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) + * and we already seen ECT on a previous segment, + * it is probably a retransmit. + */ + if (tp->ecn_flags & TCP_ECN_SEEN) tcp_enter_quickack_mode((struct sock *)tp); + break; + case INET_ECN_CE: + tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + /* fallinto */ + default: + tp->ecn_flags |= TCP_ECN_SEEN; } } @@ -1124,7 +1133,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, return 0; /* ...Then it's D-SACK, and must reside below snd_una completely */ - if (!after(end_seq, tp->snd_una)) + if (after(end_seq, tp->snd_una)) return 0; if (!before(start_seq, tp->undo_marker)) @@ -1389,9 +1398,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, BUG_ON(!pcount); - /* Tweak before seqno plays */ - if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint && - !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq)) + if (skb == tp->lost_skb_hint) tp->lost_cnt_hint += pcount; TCP_SKB_CB(prev)->end_seq += shifted; @@ -1440,7 +1447,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tp->lost_cnt_hint -= tcp_skb_pcount(prev); } - TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags; + TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags; if (skb == tcp_highest_sack(sk)) tcp_advance_highest_sack(sk, skb); @@ -2830,9 +2837,13 @@ static int tcp_try_undo_loss(struct sock *sk) static inline void tcp_complete_cwr(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - /* Do not moderate cwnd if it's already undone in cwr or recovery */ - if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { - tp->snd_cwnd = tp->snd_ssthresh; + + /* Do not moderate cwnd if it's already undone in cwr or recovery. */ + if (tp->undo_marker) { + if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) + tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); + else /* PRR */ + tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_stamp = tcp_time_stamp; } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); @@ -2950,6 +2961,38 @@ void tcp_simple_retransmit(struct sock *sk) } EXPORT_SYMBOL(tcp_simple_retransmit); +/* This function implements the PRR algorithm, specifcally the PRR-SSRB + * (proportional rate reduction with slow start reduction bound) as described in + * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. + * It computes the number of packets to send (sndcnt) based on packets newly + * delivered: + * 1) If the packets in flight is larger than ssthresh, PRR spreads the + * cwnd reductions across a full RTT. + * 2) If packets in flight is lower than ssthresh (such as due to excess + * losses and/or application stalls), do not perform any further cwnd + * reductions, but instead slow start up to ssthresh. + */ +static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, + int fast_rexmit, int flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + int sndcnt = 0; + int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); + + if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { + u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + + tp->prior_cwnd - 1; + sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; + } else { + sndcnt = min_t(int, delta, + max_t(int, tp->prr_delivered - tp->prr_out, + newly_acked_sacked) + 1); + } + + sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); + tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2961,7 +3004,8 @@ EXPORT_SYMBOL(tcp_simple_retransmit); * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) +static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, + int newly_acked_sacked, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -3111,13 +3155,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) tp->bytes_acked = 0; tp->snd_cwnd_cnt = 0; + tp->prior_cwnd = tp->snd_cwnd; + tp->prr_delivered = 0; + tp->prr_out = 0; tcp_set_ca_state(sk, TCP_CA_Recovery); fast_rexmit = 1; } if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) tcp_update_scoreboard(sk, fast_rexmit); - tcp_cwnd_down(sk, flag); + tp->prr_delivered += newly_acked_sacked; + tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag); tcp_xmit_retransmit_queue(sk); } @@ -3298,7 +3346,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, * connection startup slow start one packet too * quickly. This is severely frowned upon behavior. */ - if (!(scb->flags & TCPHDR_SYN)) { + if (!(scb->tcp_flags & TCPHDR_SYN)) { flag |= FLAG_DATA_ACKED; } else { flag |= FLAG_SYN_ACKED; @@ -3632,6 +3680,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) u32 prior_in_flight; u32 prior_fackets; int prior_packets; + int prior_sacked = tp->sacked_out; + int newly_acked_sacked = 0; int frto_cwnd = 0; /* If the ack is older than previous acks @@ -3703,6 +3753,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); + newly_acked_sacked = (prior_packets - prior_sacked) - + (tp->packets_out - tp->sacked_out); + if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); /* Guarantee sacktag reordering detection against wrap-arounds */ @@ -3715,7 +3768,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, - flag); + newly_acked_sacked, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) tcp_cong_avoid(sk, ack, prior_in_flight); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 955b8e6..48da7cc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -72,6 +72,7 @@ #include <net/timewait_sock.h> #include <net/xfrm.h> #include <net/netdma.h> +#include <net/secure_seq.h> #include <linux/inet.h> #include <linux/ipv6.h> @@ -807,20 +808,38 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -static void syn_flood_warning(const struct sk_buff *skb) +/* + * Return 1 if a syncookie should be sent + */ +int tcp_syn_flood_action(struct sock *sk, + const struct sk_buff *skb, + const char *proto) { - const char *msg; + const char *msg = "Dropping request"; + int want_cookie = 0; + struct listen_sock *lopt; + + #ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) + if (sysctl_tcp_syncookies) { msg = "Sending cookies"; - else + want_cookie = 1; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); + } else #endif - msg = "Dropping request"; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); - pr_info("TCP: Possible SYN flooding on port %d. %s.\n", - ntohs(tcp_hdr(skb)->dest), msg); + lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; + if (!lopt->synflood_warned) { + lopt->synflood_warned = 1; + pr_info("%s: Possible SYN flooding on port %d. %s. " + " Check SNMP counters.\n", + proto, ntohs(tcp_hdr(skb)->dest), msg); + } + return want_cookie; } +EXPORT_SYMBOL(tcp_syn_flood_action); /* * Save and compile IPv4 options into the request_sock if needed. @@ -908,18 +927,21 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, } sk_nocaps_add(sk, NETIF_F_GSO_MASK); } - if (tcp_alloc_md5sig_pool(sk) == NULL) { + + md5sig = tp->md5sig_info; + if (md5sig->entries4 == 0 && + tcp_alloc_md5sig_pool(sk) == NULL) { kfree(newkey); return -ENOMEM; } - md5sig = tp->md5sig_info; if (md5sig->alloced4 == md5sig->entries4) { keys = kmalloc((sizeof(*keys) * (md5sig->entries4 + 1)), GFP_ATOMIC); if (!keys) { kfree(newkey); - tcp_free_md5sig_pool(); + if (md5sig->entries4 == 0) + tcp_free_md5sig_pool(); return -ENOMEM; } @@ -963,6 +985,7 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) kfree(tp->md5sig_info->keys4); tp->md5sig_info->keys4 = NULL; tp->md5sig_info->alloced4 = 0; + tcp_free_md5sig_pool(); } else if (tp->md5sig_info->entries4 != i) { /* Need to do some manipulation */ memmove(&tp->md5sig_info->keys4[i], @@ -970,7 +993,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) (tp->md5sig_info->entries4 - i) * sizeof(struct tcp4_md5sig_key)); } - tcp_free_md5sig_pool(); return 0; } } @@ -1234,11 +1256,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; __u32 isn = TCP_SKB_CB(skb)->when; -#ifdef CONFIG_SYN_COOKIES int want_cookie = 0; -#else -#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */ -#endif /* Never answer to SYNs send to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) @@ -1249,14 +1267,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * evidently real one. */ if (inet_csk_reqsk_queue_is_full(sk) && !isn) { - if (net_ratelimit()) - syn_flood_warning(skb); -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) { - want_cookie = 1; - } else -#endif - goto drop; + want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); + if (!want_cookie) + goto drop; } /* Accept backlog is full. If we have already queued enough @@ -1302,9 +1315,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) while (l-- > 0) *c++ ^= *hash_location++; -#ifdef CONFIG_SYN_COOKIES want_cookie = 0; /* not our kind of cookie */ -#endif tmp_ext.cookie_out_never = 0; /* false */ tmp_ext.cookie_plus = tmp_opt.cookie_plus; } else if (!tp->rx_opt.cookie_in_always) { @@ -1577,7 +1588,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) #endif if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; @@ -1594,7 +1605,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) goto discard; if (nsk != sk) { - sock_rps_save_rxhash(nsk, skb->rxhash); + sock_rps_save_rxhash(nsk, skb); if (tcp_child_process(sk, nsk, skb)) { rsk = nsk; goto reset; @@ -1602,7 +1613,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } } else - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; @@ -1669,7 +1680,7 @@ int tcp_v4_rcv(struct sk_buff *skb) skb->len - th->doff * 4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); TCP_SKB_CB(skb)->when = 0; - TCP_SKB_CB(skb)->flags = iph->tos; + TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); TCP_SKB_CB(skb)->sacked = 0; sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 882e0b0..dde6b57 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -297,9 +297,9 @@ static u16 tcp_select_window(struct sock *sk) /* Packet ECN state for a SYN-ACK */ static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) { - TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR; + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; if (!(tp->ecn_flags & TCP_ECN_OK)) - TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE; + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; } /* Packet ECN state for a SYN. */ @@ -309,7 +309,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) tp->ecn_flags = 0; if (sysctl_tcp_ecn == 1) { - TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR; + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; tp->ecn_flags = TCP_ECN_OK; } } @@ -356,7 +356,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - TCP_SKB_CB(skb)->flags = flags; + TCP_SKB_CB(skb)->tcp_flags = flags; TCP_SKB_CB(skb)->sacked = 0; skb_shinfo(skb)->gso_segs = 1; @@ -826,7 +826,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcb = TCP_SKB_CB(skb); memset(&opts, 0, sizeof(opts)); - if (unlikely(tcb->flags & TCPHDR_SYN)) + if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); else tcp_options_size = tcp_established_options(sk, skb, &opts, @@ -850,9 +850,9 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->seq = htonl(tcb->seq); th->ack_seq = htonl(tp->rcv_nxt); *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | - tcb->flags); + tcb->tcp_flags); - if (unlikely(tcb->flags & TCPHDR_SYN)) { + if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) { /* RFC1323: The window in SYN & SYN/ACK segments * is never scaled. */ @@ -875,7 +875,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, } tcp_options_write((__be32 *)(th + 1), tp, &opts); - if (likely((tcb->flags & TCPHDR_SYN) == 0)) + if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0)) TCP_ECN_send(sk, skb, tcp_header_size); #ifdef CONFIG_TCP_MD5SIG @@ -889,7 +889,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, icsk->icsk_af_ops->send_check(sk, skb); - if (likely(tcb->flags & TCPHDR_ACK)) + if (likely(tcb->tcp_flags & TCPHDR_ACK)) tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); if (skb->len != tcp_header_size) @@ -1032,9 +1032,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; /* PSH and FIN should only be set in the second packet. */ - flags = TCP_SKB_CB(skb)->flags; - TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); - TCP_SKB_CB(buff)->flags = flags; + flags = TCP_SKB_CB(skb)->tcp_flags; + TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); + TCP_SKB_CB(buff)->tcp_flags = flags; TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { @@ -1095,7 +1095,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { if (skb_shinfo(skb)->frags[i].size <= eat) { - put_page(skb_shinfo(skb)->frags[i].page); + skb_frag_unref(skb, i); eat -= skb_shinfo(skb)->frags[i].size; } else { skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; @@ -1340,7 +1340,8 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, u32 in_flight, cwnd; /* Don't be strict about the congestion window for the final FIN. */ - if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1) + if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && + tcp_skb_pcount(skb) == 1) return 1; in_flight = tcp_packets_in_flight(tp); @@ -1409,7 +1410,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, * Nagle can be ignored during F-RTO too (see RFC4138). */ if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || - (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)) + (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) return 1; if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) @@ -1497,9 +1498,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; /* PSH and FIN should only be set in the second packet. */ - flags = TCP_SKB_CB(skb)->flags; - TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); - TCP_SKB_CB(buff)->flags = flags; + flags = TCP_SKB_CB(skb)->tcp_flags; + TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); + TCP_SKB_CB(buff)->tcp_flags = flags; /* This packet was never sent out yet, so no SACK bits. */ TCP_SKB_CB(buff)->sacked = 0; @@ -1530,7 +1531,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) u32 send_win, cong_win, limit, in_flight; int win_divisor; - if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto send_now; if (icsk->icsk_ca_state != TCP_CA_Open) @@ -1657,7 +1658,7 @@ static int tcp_mtu_probe(struct sock *sk) TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; - TCP_SKB_CB(nskb)->flags = TCPHDR_ACK; + TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK; TCP_SKB_CB(nskb)->sacked = 0; nskb->csum = 0; nskb->ip_summed = skb->ip_summed; @@ -1677,11 +1678,11 @@ static int tcp_mtu_probe(struct sock *sk) if (skb->len <= copy) { /* We've eaten all the data from this skb. * Throw it away. */ - TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags; + TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); } else { - TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & + TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags & ~(TCPHDR_FIN|TCPHDR_PSH); if (!skb_shinfo(skb)->nr_frags) { skb_pull(skb, copy); @@ -1796,11 +1797,13 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, tcp_event_new_data_sent(sk, skb); tcp_minshall_update(tp, mss_now, skb); - sent_pkts++; + sent_pkts += tcp_skb_pcount(skb); if (push_one) break; } + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) + tp->prr_out += sent_pkts; if (likely(sent_pkts)) { tcp_cwnd_validate(sk); @@ -1985,7 +1988,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; /* Merge over control information. This moves PSH/FIN etc. over */ - TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(next_skb)->flags; + TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags; /* All done, get rid of second SKB and account for it so * packet counting does not break. @@ -2033,7 +2036,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, if (!sysctl_tcp_retrans_collapse) return; - if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN) + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) return; tcp_for_write_queue_from_safe(skb, tmp, sk) { @@ -2125,12 +2128,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) * since it is cheap to do so and saves bytes on the network. */ if (skb->len > 0 && - (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && + (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { if (!pskb_trim(skb, 0)) { /* Reuse, even though it does some unnecessary work */ tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1, - TCP_SKB_CB(skb)->flags); + TCP_SKB_CB(skb)->tcp_flags); skb->ip_summed = CHECKSUM_NONE; } } @@ -2294,6 +2297,9 @@ begin_fwd: return; NET_INC_STATS_BH(sock_net(sk), mib_idx); + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) + tp->prr_out += tcp_skb_pcount(skb); + if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, @@ -2317,7 +2323,7 @@ void tcp_send_fin(struct sock *sk) mss_now = tcp_current_mss(sk); if (tcp_send_head(sk) != NULL) { - TCP_SKB_CB(skb)->flags |= TCPHDR_FIN; + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; TCP_SKB_CB(skb)->end_seq++; tp->write_seq++; } else { @@ -2379,11 +2385,11 @@ int tcp_send_synack(struct sock *sk) struct sk_buff *skb; skb = tcp_write_queue_head(sk); - if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) { + if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); return -EFAULT; } - if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) { + if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); if (nskb == NULL) @@ -2397,7 +2403,7 @@ int tcp_send_synack(struct sock *sk) skb = nskb; } - TCP_SKB_CB(skb)->flags |= TCPHDR_ACK; + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK; TCP_ECN_send_synack(tcp_sk(sk), skb); } TCP_SKB_CB(skb)->when = tcp_time_stamp; @@ -2794,13 +2800,13 @@ int tcp_write_wakeup(struct sock *sk) if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || skb->len > mss) { seg_size = min(seg_size, mss); - TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; if (tcp_fragment(sk, skb, seg_size, mss)) return -1; } else if (!tcp_skb_pcount(skb)) tcp_set_skb_tso_segs(sk, skb, mss); - TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; TCP_SKB_CB(skb)->when = tcp_time_stamp; err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); if (!err) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1b5a193..ebaa96b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1267,7 +1267,7 @@ int udp_disconnect(struct sock *sk, int flags) sk->sk_state = TCP_CLOSE; inet->inet_daddr = 0; inet->inet_dport = 0; - sock_rps_save_rxhash(sk, 0); + sock_rps_reset_rxhash(sk); sk->sk_bound_dev_if = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); @@ -1355,7 +1355,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) int rc; if (inet_sk(sk)->inet_daddr) - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); rc = ip_queue_rcv_skb(sk, skb); if (rc < 0) { @@ -1461,10 +1461,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } - if (rcu_dereference_raw(sk->sk_filter)) { - if (udp_lib_checksum_complete(skb)) - goto drop; - } + if (rcu_access_pointer(sk->sk_filter) && + udp_lib_checksum_complete(skb)) + goto drop; if (sk_rcvqueues_full(sk, skb)) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a55500c..e39239e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -374,8 +374,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) "%s(): cannot allocate memory for statistics; dev=%s.\n", __func__, dev->name)); neigh_parms_release(&nd_tbl, ndev->nd_parms); - ndev->dead = 1; - in6_dev_finish_destroy(ndev); + dev_put(dev); + kfree(ndev); return NULL; } @@ -428,7 +428,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) ndev->tstamp = jiffies; addrconf_sysctl_register(ndev); /* protected by rtnl_lock */ - rcu_assign_pointer(dev->ip6_ptr, ndev); + RCU_INIT_POINTER(dev->ip6_ptr, ndev); /* Join all-node multicast group */ ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); @@ -656,7 +656,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, * layer address of our nexhop router */ - if (dst_get_neighbour(&rt->dst) == NULL) + if (dst_get_neighbour_raw(&rt->dst) == NULL) ifa->flags &= ~IFA_F_OPTIMISTIC; ifa->idev = idev; @@ -824,12 +824,13 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i { struct inet6_dev *idev = ifp->idev; struct in6_addr addr, *tmpaddr; - unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp, age; + unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_tstamp, age; unsigned long regen_advance; int tmp_plen; int ret = 0; int max_addresses; u32 addr_flags; + unsigned long now = jiffies; write_lock(&idev->lock); if (ift) { @@ -874,7 +875,7 @@ retry: goto out; } memcpy(&addr.s6_addr[8], idev->rndid, 8); - age = (jiffies - ifp->tstamp) / HZ; + age = (now - ifp->tstamp) / HZ; tmp_valid_lft = min_t(__u32, ifp->valid_lft, idev->cnf.temp_valid_lft + age); @@ -884,7 +885,6 @@ retry: idev->cnf.max_desync_factor); tmp_plen = ifp->prefix_len; max_addresses = idev->cnf.max_addresses; - tmp_cstamp = ifp->cstamp; tmp_tstamp = ifp->tstamp; spin_unlock_bh(&ifp->lock); @@ -929,7 +929,7 @@ retry: ift->ifpub = ifp; ift->valid_lft = tmp_valid_lft; ift->prefered_lft = tmp_prefered_lft; - ift->cstamp = tmp_cstamp; + ift->cstamp = now; ift->tstamp = tmp_tstamp; spin_unlock_bh(&ift->lock); @@ -1999,25 +1999,50 @@ ok: #ifdef CONFIG_IPV6_PRIVACY read_lock_bh(&in6_dev->lock); /* update all temporary addresses in the list */ - list_for_each_entry(ift, &in6_dev->tempaddr_list, tmp_list) { - /* - * When adjusting the lifetimes of an existing - * temporary address, only lower the lifetimes. - * Implementations must not increase the - * lifetimes of an existing temporary address - * when processing a Prefix Information Option. - */ + list_for_each_entry(ift, &in6_dev->tempaddr_list, + tmp_list) { + int age, max_valid, max_prefered; + if (ifp != ift->ifpub) continue; + /* + * RFC 4941 section 3.3: + * If a received option will extend the lifetime + * of a public address, the lifetimes of + * temporary addresses should be extended, + * subject to the overall constraint that no + * temporary addresses should ever remain + * "valid" or "preferred" for a time longer than + * (TEMP_VALID_LIFETIME) or + * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), + * respectively. + */ + age = (now - ift->cstamp) / HZ; + max_valid = in6_dev->cnf.temp_valid_lft - age; + if (max_valid < 0) + max_valid = 0; + + max_prefered = in6_dev->cnf.temp_prefered_lft - + in6_dev->cnf.max_desync_factor - + age; + if (max_prefered < 0) + max_prefered = 0; + + if (valid_lft > max_valid) + valid_lft = max_valid; + + if (prefered_lft > max_prefered) + prefered_lft = max_prefered; + spin_lock(&ift->lock); flags = ift->flags; - if (ift->valid_lft > valid_lft && - ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ) - ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ; - if (ift->prefered_lft > prefered_lft && - ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ) - ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ; + ift->valid_lft = valid_lft; + ift->prefered_lft = prefered_lft; + ift->tstamp = now; + if (prefered_lft > 0) + ift->flags &= ~IFA_F_DEPRECATED; + spin_unlock(&ift->lock); if (!(flags&IFA_F_TENTATIVE)) ipv6_ifa_notify(0, ift); @@ -2025,9 +2050,11 @@ ok: if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) { /* - * When a new public address is created as described in [ADDRCONF], - * also create a new temporary address. Also create a temporary - * address if it's enabled but no temporary address currently exists. + * When a new public address is created as + * described in [ADDRCONF], also create a new + * temporary address. Also create a temporary + * address if it's enabled but no temporary + * address currently exists. */ read_unlock_bh(&in6_dev->lock); ipv6_create_tempaddr(ifp, NULL); @@ -2706,7 +2733,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) idev->dead = 1; /* protected by rtnl_lock */ - rcu_assign_pointer(dev->ip6_ptr, NULL); + RCU_INIT_POINTER(dev->ip6_ptr, NULL); /* Step 1.5: remove snmp6 entry */ snmp6_unregister_dev(idev); @@ -2969,12 +2996,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) ipv6_ifa_notify(RTM_NEWADDR, ifp); - /* If added prefix is link local and forwarding is off, - start sending router solicitations. + /* If added prefix is link local and we are prepared to process + router advertisements, start sending router solicitations. */ - if ((ifp->idev->cnf.forwarding == 0 || - ifp->idev->cnf.forwarding == 2) && + if (((ifp->idev->cnf.accept_ra == 1 && !ifp->idev->cnf.forwarding) || + ifp->idev->cnf.accept_ra == 2) && ifp->idev->cnf.rtr_solicits > 0 && (dev->flags&IFF_LOOPBACK) == 0 && (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 1656033..b46e9f8 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -33,6 +33,11 @@ #include <linux/errqueue.h> #include <asm/uaccess.h> +static inline int ipv6_mapped_addr_any(const struct in6_addr *a) +{ + return (ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0)); +} + int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; @@ -102,10 +107,12 @@ ipv4_connected: ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr); - if (ipv6_addr_any(&np->saddr)) + if (ipv6_addr_any(&np->saddr) || + ipv6_mapped_addr_any(&np->saddr)) ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - if (ipv6_addr_any(&np->rcv_saddr)) { + if (ipv6_addr_any(&np->rcv_saddr) || + ipv6_mapped_addr_any(&np->rcv_saddr)) { ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); if (sk->sk_prot->rehash) @@ -592,7 +599,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) return 0; } -int datagram_send_ctl(struct net *net, +int datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg, struct flowi6 *fl6, struct ipv6_txoptions *opt, int *hlimit, int *tclass, int *dontfrag) @@ -651,7 +658,8 @@ int datagram_send_ctl(struct net *net, if (addr_type != IPV6_ADDR_ANY) { int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; - if (!ipv6_chk_addr(net, &src_info->ipi6_addr, + if (!inet_sk(sk)->transparent && + !ipv6_chk_addr(net, &src_info->ipi6_addr, strict ? dev : NULL, 0)) err = -EINVAL; else diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 79a485e..1318de4 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -273,12 +273,12 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dstbuf; #endif - struct dst_entry *dst; + struct dst_entry *dst = skb_dst(skb); if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), + IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; @@ -289,9 +289,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) dstbuf = opt->dst1; #endif - dst = dst_clone(skb_dst(skb)); if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { - dst_release(dst); skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) @@ -304,7 +302,6 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); - dst_release(dst); return -1; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 1190041..2b59154 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -490,7 +490,8 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) goto out_dst_release; } - idev = in6_dev_get(skb->dev); + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); err = ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), @@ -500,19 +501,16 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (err) { ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); - goto out_put; + } else { + err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, + len + sizeof(struct icmp6hdr)); } - err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr)); - -out_put: - if (likely(idev != NULL)) - in6_dev_put(idev); + rcu_read_unlock(); out_dst_release: dst_release(dst); out: icmpv6_xmit_unlock(sk); } - EXPORT_SYMBOL(icmpv6_send); static void icmpv6_echo_reply(struct sk_buff *skb) @@ -569,7 +567,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); - idev = in6_dev_get(skb->dev); + idev = __in6_dev_get(skb->dev); msg.skb = skb; msg.offset = 0; @@ -583,13 +581,10 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (err) { ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); - goto out_put; + } else { + err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, + skb->len + sizeof(struct icmp6hdr)); } - err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); - -out_put: - if (likely(idev != NULL)) - in6_dev_put(idev); dst_release(dst); out: icmpv6_xmit_unlock(sk); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 8a58e8c..2916200 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -211,6 +211,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) struct flowi6 fl6; struct dst_entry *dst; struct in6_addr *final_p, final; + int res; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = sk->sk_protocol; @@ -241,12 +242,14 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) __inet6_csk_dst_store(sk, dst, NULL, NULL); } - skb_dst_set(skb, dst_clone(dst)); + rcu_read_lock(); + skb_dst_set_noref(skb, dst); /* Restore final destination back after routing done */ ipv6_addr_copy(&fl6.daddr, &np->daddr); - return ip6_xmit(sk, skb, &fl6, np->opt); + res = ip6_xmit(sk, skb, &fl6, np->opt); + rcu_read_unlock(); + return res; } - EXPORT_SYMBOL_GPL(inet6_csk_xmit); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index b531972..73f1a00 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -20,6 +20,7 @@ #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> #include <net/inet6_hashtables.h> +#include <net/secure_seq.h> #include <net/ip.h> int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 54a4678..320d91d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1455,7 +1455,7 @@ static int fib6_age(struct rt6_info *rt, void *arg) RT6_TRACE("aging clone %p\n", rt); return -1; } else if ((rt->rt6i_flags & RTF_GATEWAY) && - (!(dst_get_neighbour(&rt->dst)->flags & NTF_ROUTER))) { + (!(dst_get_neighbour_raw(&rt->dst)->flags & NTF_ROUTER))) { RT6_TRACE("purging route %p via non-router but gateway\n", rt); return -1; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index f3caf1b..5430394 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -322,8 +322,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo } static struct ip6_flowlabel * -fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, - int optlen, int *err_p) +fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, + char __user *optval, int optlen, int *err_p) { struct ip6_flowlabel *fl = NULL; int olen; @@ -360,7 +360,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, msg.msg_control = (void*)(fl->opt+1); memset(&flowi6, 0, sizeof(flowi6)); - err = datagram_send_ctl(net, &msg, &flowi6, fl->opt, &junk, + err = datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, &junk, &junk, &junk); if (err) goto done; @@ -528,7 +528,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) return -EINVAL; - fl = fl_create(net, &freq, optval, optlen, &err); + fl = fl_create(net, sk, &freq, optval, optlen, &err); if (fl == NULL) return err; sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 32e5339..835c04b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -135,10 +135,15 @@ static int ip6_finish_output2(struct sk_buff *skb) skb->len); } + rcu_read_lock(); neigh = dst_get_neighbour(dst); - if (neigh) - return neigh_output(neigh, skb); + if (neigh) { + int res = neigh_output(neigh, skb); + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); @@ -975,12 +980,14 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry and replace it instead with the * dst entry of the nexthop router */ + rcu_read_lock(); n = dst_get_neighbour(*dst); if (n && !(n->nud_state & NUD_VALID)) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; int redirect; + rcu_read_unlock(); ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); @@ -1000,6 +1007,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, if ((err = (*dst)->error)) goto out_err_release; } + } else { + rcu_read_unlock(); } #endif @@ -1471,13 +1480,13 @@ alloc_new_skb: if (page && (left = PAGE_SIZE - off) > 0) { if (copy >= left) copy = left; - if (page != frag->page) { + if (page != skb_frag_page(frag)) { if (i == MAX_SKB_FRAGS) { err = -EMSGSIZE; goto error; } - get_page(page); skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); + skb_frag_ref(skb, i); frag = &skb_shinfo(skb)->frags[i]; } } else if(i < MAX_SKB_FRAGS) { @@ -1497,7 +1506,8 @@ alloc_new_skb: err = -EMSGSIZE; goto error; } - if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { + if (getfrag(from, skb_frag_address(frag)+frag->size, + offset, copy, skb->len, skb) < 0) { err = -EFAULT; goto error; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 0bc9888..bdc15c9 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -218,8 +218,8 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms); - rcu_assign_pointer(t->next , rtnl_dereference(*tp)); - rcu_assign_pointer(*tp, t); + RCU_INIT_POINTER(t->next , rtnl_dereference(*tp)); + RCU_INIT_POINTER(*tp, t); } /** @@ -237,7 +237,7 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { if (t == iter) { - rcu_assign_pointer(*tp, t->next); + RCU_INIT_POINTER(*tp, t->next); break; } } @@ -350,7 +350,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); if (dev == ip6n->fb_tnl_dev) - rcu_assign_pointer(ip6n->tnls_wc[0], NULL); + RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else ip6_tnl_unlink(ip6n, t); ip6_tnl_dst_reset(t); @@ -889,7 +889,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, struct net_device_stats *stats = &t->dev->stats; struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct ipv6_tel_txoption opt; - struct dst_entry *dst; + struct dst_entry *dst = NULL, *ndst = NULL; struct net_device *tdev; int mtu; unsigned int max_headroom = sizeof(struct ipv6hdr); @@ -897,19 +897,20 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, int err = -1; int pkt_len; - if ((dst = ip6_tnl_dst_check(t)) != NULL) - dst_hold(dst); - else { - dst = ip6_route_output(net, NULL, fl6); + if (!fl6->flowi6_mark) + dst = ip6_tnl_dst_check(t); + if (!dst) { + ndst = ip6_route_output(net, NULL, fl6); - if (dst->error) + if (ndst->error) goto tx_err_link_failure; - dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - dst = NULL; + ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); + if (IS_ERR(ndst)) { + err = PTR_ERR(ndst); + ndst = NULL; goto tx_err_link_failure; } + dst = ndst; } tdev = dst->dev; @@ -955,8 +956,12 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb = new_skb; } skb_dst_drop(skb); - skb_dst_set(skb, dst_clone(dst)); - + if (fl6->flowi6_mark) { + skb_dst_set(skb, dst); + ndst = NULL; + } else { + skb_dst_set_noref(skb, dst); + } skb->transport_header = skb->network_header; proto = fl6->flowi6_proto; @@ -987,13 +992,14 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, stats->tx_errors++; stats->tx_aborted_errors++; } - ip6_tnl_dst_store(t, dst); + if (ndst) + ip6_tnl_dst_store(t, ndst); return 0; tx_err_link_failure: stats->tx_carrier_errors++; dst_link_failure(skb); tx_err_dst_release: - dst_release(dst); + dst_release(ndst); return err; } @@ -1020,9 +1026,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) dsfield = ipv4_get_dsfield(iph); - if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) & IPV6_TCLASS_MASK; + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); if (err != 0) { @@ -1069,10 +1077,12 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_proto = IPPROTO_IPV6; dsfield = ipv6_get_dsfield(ipv6h); - if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); - if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); if (err != 0) { @@ -1439,7 +1449,7 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) t->parms.proto = IPPROTO_IPV6; dev_hold(dev); - rcu_assign_pointer(ip6n->tnls_wc[0], t); + RCU_INIT_POINTER(ip6n->tnls_wc[0], t); return 0; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 705c828..def0538 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -696,8 +696,10 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, int err; err = ip6mr_fib_lookup(net, &fl6, &mrt); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return err; + } read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; @@ -2052,8 +2054,10 @@ int ip6_mr_input(struct sk_buff *skb) int err; err = ip6mr_fib_lookup(net, &fl6, &mrt); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return err; + } read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 9cb191e..2fbda5f 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -475,7 +475,7 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(net, &msg, &fl6, opt, &junk, &junk, + retv = datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk, &junk); if (retv) goto done; @@ -913,7 +913,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, } static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) + char __user *optval, int __user *optlen, unsigned flags) { struct ipv6_pinfo *np = inet6_sk(sk); int len; @@ -962,7 +962,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, msg.msg_control = optval; msg.msg_controllen = len; - msg.msg_flags = 0; + msg.msg_flags = flags; lock_sock(sk); skb = np->pktoptions; @@ -1222,7 +1222,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, if(level != SOL_IPV6) return -ENOPROTOOPT; - err = do_ipv6_getsockopt(sk, level, optname, optval, optlen); + err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) { @@ -1264,7 +1264,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, return compat_mc_getsockopt(sk, level, optname, optval, optlen, ipv6_getsockopt); - err = do_ipv6_getsockopt(sk, level, optname, optval, optlen); + err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, + MSG_CMSG_COMPAT); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 3e6ebcd..ee7839f 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1059,7 +1059,7 @@ static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, break; for (i=0; i<nsrcs; i++) { /* skip inactive filters */ - if (pmc->mca_sfcount[MCAST_INCLUDE] || + if (psf->sf_count[MCAST_INCLUDE] || pmc->mca_sfcount[MCAST_EXCLUDE] != psf->sf_count[MCAST_EXCLUDE]) continue; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 9da6e02..1f52dd2 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -533,7 +533,8 @@ void ndisc_send_skb(struct sk_buff *skb, skb_dst_set(skb, dst); - idev = in6_dev_get(dst->dev); + rcu_read_lock(); + idev = __in6_dev_get(dst->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, @@ -543,8 +544,7 @@ void ndisc_send_skb(struct sk_buff *skb, ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } - if (likely(idev != NULL)) - in6_dev_put(idev); + rcu_read_unlock(); } EXPORT_SYMBOL(ndisc_send_skb); @@ -1039,7 +1039,7 @@ static void ndisc_recv_rs(struct sk_buff *skb) if (skb->len < sizeof(*rs_msg)) return; - idev = in6_dev_get(skb->dev); + idev = __in6_dev_get(skb->dev); if (!idev) { if (net_ratelimit()) ND_PRINTK1("ICMP6 RS: can't find in6 device\n"); @@ -1080,7 +1080,7 @@ static void ndisc_recv_rs(struct sk_buff *skb) neigh_release(neigh); } out: - in6_dev_put(idev); + return; } static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) @@ -1179,7 +1179,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) * set the RA_RECV flag in the interface */ - in6_dev = in6_dev_get(skb->dev); + in6_dev = __in6_dev_get(skb->dev); if (in6_dev == NULL) { ND_PRINTK0(KERN_ERR "ICMPv6 RA: can't find inet6 device for %s.\n", @@ -1188,7 +1188,6 @@ static void ndisc_router_discovery(struct sk_buff *skb) } if (!ndisc_parse_options(opt, optlen, &ndopts)) { - in6_dev_put(in6_dev); ND_PRINTK2(KERN_WARNING "ICMP6 RA: invalid ND options\n"); return; @@ -1255,7 +1254,6 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK0(KERN_ERR "ICMPv6 RA: %s() failed to add default route.\n", __func__); - in6_dev_put(in6_dev); return; } @@ -1265,7 +1263,6 @@ static void ndisc_router_discovery(struct sk_buff *skb) "ICMPv6 RA: %s() got default router without neighbour.\n", __func__); dst_release(&rt->dst); - in6_dev_put(in6_dev); return; } neigh->flags |= NTF_ROUTER; @@ -1422,7 +1419,6 @@ out: dst_release(&rt->dst); else if (neigh) neigh_release(neigh); - in6_dev_put(in6_dev); } static void ndisc_redirect_rcv(struct sk_buff *skb) @@ -1481,13 +1477,11 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) return; } - in6_dev = in6_dev_get(skb->dev); + in6_dev = __in6_dev_get(skb->dev); if (!in6_dev) return; - if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) { - in6_dev_put(in6_dev); + if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) return; - } /* RFC2461 8.1: * The IP source address of the Redirect MUST be the same as the current @@ -1497,7 +1491,6 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { ND_PRINTK2(KERN_WARNING "ICMPv6 Redirect: invalid ND options\n"); - in6_dev_put(in6_dev); return; } if (ndopts.nd_opts_tgt_lladdr) { @@ -1506,7 +1499,6 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) if (!lladdr) { ND_PRINTK2(KERN_WARNING "ICMPv6 Redirect: invalid link-layer address length\n"); - in6_dev_put(in6_dev); return; } } @@ -1518,7 +1510,6 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) on_link); neigh_release(neigh); } - in6_dev_put(in6_dev); } void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, @@ -1651,7 +1642,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, csum_partial(icmph, len, 0)); skb_dst_set(buff, dst); - idev = in6_dev_get(dst->dev); + rcu_read_lock(); + idev = __in6_dev_get(dst->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, dst_output); @@ -1660,8 +1652,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } - if (likely(idev != NULL)) - in6_dev_put(idev); + rcu_read_unlock(); return; release: diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 2493948..e63c397 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: + kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip6_queue: error creating packet message\n"); return NULL; @@ -313,7 +314,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) { struct nf_queue_entry *entry; - if (vmsg->value > NF_MAX_VERDICT) + if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN) return -EINVAL; entry = ipq_find_dequeue_entry(vmsg->id); @@ -358,12 +359,9 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg, break; case IPQM_VERDICT: - if (pmsg->msg.verdict.value > NF_MAX_VERDICT) - status = -EINVAL; - else - status = ipq_set_verdict(&pmsg->msg.verdict, - len - sizeof(*pmsg)); - break; + status = ipq_set_verdict(&pmsg->msg.verdict, + len - sizeof(*pmsg)); + break; default: status = -EINVAL; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 6a79f308..3486f62 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -130,14 +130,14 @@ static mh_filter_t __rcu *mh_filter __read_mostly; int rawv6_mh_filter_register(mh_filter_t filter) { - rcu_assign_pointer(mh_filter, filter); + RCU_INIT_POINTER(mh_filter, filter); return 0; } EXPORT_SYMBOL(rawv6_mh_filter_register); int rawv6_mh_filter_unregister(mh_filter_t filter) { - rcu_assign_pointer(mh_filter, NULL); + RCU_INIT_POINTER(mh_filter, NULL); synchronize_rcu(); return 0; } @@ -372,9 +372,9 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, read_unlock(&raw_v6_hashinfo.lock); } -static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) +static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb) { - if ((raw6_sk(sk)->checksum || rcu_dereference_raw(sk->sk_filter)) && + if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) && skb_checksum_complete(skb)) { atomic_inc(&sk->sk_drops); kfree_skb(skb); @@ -817,8 +817,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit, - &tclass, &dontfrag); + err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e8987da..fb545ed 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -104,6 +104,9 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) struct inet_peer *peer; u32 *p = NULL; + if (!(rt->dst.flags & DST_HOST)) + return NULL; + if (!rt->rt6i_peer) rt6_bind_peer(rt, 1); @@ -241,7 +244,9 @@ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, { struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); - memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); + if (rt != NULL) + memset(&rt->rt6i_table, 0, + sizeof(*rt) - sizeof(struct dst_entry)); return rt; } @@ -252,6 +257,9 @@ static void ip6_dst_destroy(struct dst_entry *dst) struct inet6_dev *idev = rt->rt6i_idev; struct inet_peer *peer = rt->rt6i_peer; + if (!(rt->dst.flags & DST_HOST)) + dst_destroy_metrics_generic(dst); + if (idev != NULL) { rt->rt6i_idev = NULL; in6_dev_put(idev); @@ -364,7 +372,7 @@ out: #ifdef CONFIG_IPV6_ROUTER_PREF static void rt6_probe(struct rt6_info *rt) { - struct neighbour *neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; + struct neighbour *neigh; /* * Okay, this does not seem to be appropriate * for now, however, we need to check if it @@ -373,8 +381,10 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ + rcu_read_lock(); + neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; if (!neigh || (neigh->nud_state & NUD_VALID)) - return; + goto out; read_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { @@ -387,8 +397,11 @@ static void rt6_probe(struct rt6_info *rt) target = (struct in6_addr *)&neigh->primary_key; addrconf_addr_solict_mult(target, &mcaddr); ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); - } else + } else { read_unlock_bh(&neigh->lock); + } +out: + rcu_read_unlock(); } #else static inline void rt6_probe(struct rt6_info *rt) @@ -412,8 +425,11 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) static inline int rt6_check_neigh(struct rt6_info *rt) { - struct neighbour *neigh = dst_get_neighbour(&rt->dst); + struct neighbour *neigh; int m; + + rcu_read_lock(); + neigh = dst_get_neighbour(&rt->dst); if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) m = 1; @@ -430,6 +446,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt) read_unlock_bh(&neigh->lock); } else m = 0; + rcu_read_unlock(); return m; } @@ -714,9 +731,7 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort, ipv6_addr_copy(&rt->rt6i_gateway, daddr); } - rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; - rt->dst.flags |= DST_HOST; #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen && saddr) { @@ -766,10 +781,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct rt6_info *rt = ip6_rt_copy(ort, daddr); if (rt) { - rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; - rt->dst.flags |= DST_HOST; - dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour(&ort->dst))); + dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst))); } return rt; } @@ -803,7 +816,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -1069,12 +1082,15 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, neigh = NULL; } - rt->rt6i_idev = idev; + rt->dst.flags |= DST_HOST; + rt->dst.output = ip6_output; dst_set_neighbour(&rt->dst, neigh); atomic_set(&rt->dst.__refcnt, 1); - ipv6_addr_copy(&rt->rt6i_dst.addr, addr); dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); - rt->dst.output = ip6_output; + + ipv6_addr_copy(&rt->rt6i_dst.addr, addr); + rt->rt6i_dst.plen = 128; + rt->rt6i_idev = idev; spin_lock_bh(&icmp6_dst_lock); rt->dst.next = icmp6_dst_gc_list; @@ -1252,6 +1268,14 @@ int ip6_route_add(struct fib6_config *cfg) if (rt->rt6i_dst.plen == 128) rt->dst.flags |= DST_HOST; + if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) { + u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (!metrics) { + err = -ENOMEM; + goto out; + } + dst_init_metrics(&rt->dst, metrics, 0); + } #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); rt->rt6i_src.plen = cfg->fc_src_len; @@ -1587,7 +1611,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - if (neigh == dst_get_neighbour(&rt->dst)) + if (neigh == dst_get_neighbour_raw(&rt->dst)) goto out; nrt = ip6_rt_copy(rt, dest); @@ -1598,9 +1622,6 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, if (on_link) nrt->rt6i_flags &= ~RTF_GATEWAY; - nrt->rt6i_dst.plen = 128; - nrt->dst.flags |= DST_HOST; - ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); @@ -1682,7 +1703,7 @@ again: 1. It is connected route. Action: COW 2. It is gatewayed route or NONEXTHOP route. Action: clone it. */ - if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, daddr, saddr); else nrt = rt6_alloc_clone(rt, daddr); @@ -1745,9 +1766,10 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, if (rt) { rt->dst.input = ort->dst.input; rt->dst.output = ort->dst.output; + rt->dst.flags |= DST_HOST; ipv6_addr_copy(&rt->rt6i_dst.addr, dest); - rt->rt6i_dst.plen = ort->rt6i_dst.plen; + rt->rt6i_dst.plen = 128; dst_copy_metrics(&rt->dst, &ort->dst); rt->dst.error = ort->dst.error; rt->rt6i_idev = ort->rt6i_idev; @@ -2326,6 +2348,7 @@ static int rt6_fill_node(struct net *net, struct nlmsghdr *nlh; long expires; u32 table; + struct neighbour *n; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -2414,8 +2437,11 @@ static int rt6_fill_node(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - if (dst_get_neighbour(&rt->dst)) - NLA_PUT(skb, RTA_GATEWAY, 16, &dst_get_neighbour(&rt->dst)->primary_key); + rcu_read_lock(); + n = dst_get_neighbour(&rt->dst); + if (n) + NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key); + rcu_read_unlock(); if (rt->dst.dev) NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); @@ -2608,12 +2634,14 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif + rcu_read_lock(); n = dst_get_neighbour(&rt->dst); if (n) { seq_printf(m, "%pi6", n->primary_key); } else { seq_puts(m, "00000000000000000000000000000000"); } + rcu_read_unlock(); seq_printf(m, " %08x %08x %08x %08x %8s\n", rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), rt->dst.__use, rt->rt6i_flags, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 07bf108..a7a1860 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -182,7 +182,7 @@ static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { if (t == iter) { - rcu_assign_pointer(*tp, t->next); + RCU_INIT_POINTER(*tp, t->next); break; } } @@ -192,8 +192,8 @@ static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) { struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t); - rcu_assign_pointer(t->next, rtnl_dereference(*tp)); - rcu_assign_pointer(*tp, t); + RCU_INIT_POINTER(t->next, rtnl_dereference(*tp)); + RCU_INIT_POINTER(*tp, t); } static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) @@ -391,7 +391,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) p->addr = a->addr; p->flags = a->flags; t->prl_count++; - rcu_assign_pointer(t->prl, p); + RCU_INIT_POINTER(t->prl, p); out: return err; } @@ -474,7 +474,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); if (dev == sitn->fb_tunnel_dev) { - rcu_assign_pointer(sitn->tunnels_wc[0], NULL); + RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL); } else { ipip6_tunnel_unlink(sitn, netdev_priv(dev)); ipip6_tunnel_del_prl(netdev_priv(dev), NULL); @@ -672,6 +672,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (skb->protocol != htons(ETH_P_IPV6)) goto tx_error; + if (tos == 1) + tos = ipv6_get_dsfield(iph6); + /* ISATAP (RFC4214) - must come before 6to4 */ if (dev->priv_flags & IFF_ISATAP) { struct neighbour *neigh = NULL; @@ -1173,7 +1176,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; dev_hold(dev); - rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); + RCU_INIT_POINTER(sitn->tunnels_wc[0], tunnel); return 0; } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 89d5bf8..ac83896 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -165,7 +165,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int mss; struct dst_entry *dst; __u8 rcv_wscale; - bool ecn_ok; + bool ecn_ok = false; if (!sysctl_tcp_syncookies || !th->ack || th->rst) goto out; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 78aa534..5357902 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -61,6 +61,7 @@ #include <net/timewait_sock.h> #include <net/netdma.h> #include <net/inet_common.h> +#include <net/secure_seq.h> #include <asm/uaccess.h> @@ -530,20 +531,6 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, return tcp_v6_send_synack(sk, req, rvp); } -static inline void syn_flood_warning(struct sk_buff *skb) -{ -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) - printk(KERN_INFO - "TCPv6: Possible SYN flooding on port %d. " - "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest)); - else -#endif - printk(KERN_INFO - "TCPv6: Possible SYN flooding on port %d. " - "Dropping request.\n", ntohs(tcp_hdr(skb)->dest)); -} - static void tcp_v6_reqsk_destructor(struct request_sock *req) { kfree_skb(inet6_rsk(req)->pktopts); @@ -604,7 +591,8 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer, } sk_nocaps_add(sk, NETIF_F_GSO_MASK); } - if (tcp_alloc_md5sig_pool(sk) == NULL) { + if (tp->md5sig_info->entries6 == 0 && + tcp_alloc_md5sig_pool(sk) == NULL) { kfree(newkey); return -ENOMEM; } @@ -613,8 +601,9 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer, (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC); if (!keys) { - tcp_free_md5sig_pool(); kfree(newkey); + if (tp->md5sig_info->entries6 == 0) + tcp_free_md5sig_pool(); return -ENOMEM; } @@ -660,6 +649,7 @@ static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer) kfree(tp->md5sig_info->keys6); tp->md5sig_info->keys6 = NULL; tp->md5sig_info->alloced6 = 0; + tcp_free_md5sig_pool(); } else { /* shrink the database */ if (tp->md5sig_info->entries6 != i) @@ -668,7 +658,6 @@ static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer) (tp->md5sig_info->entries6 - i) * sizeof (tp->md5sig_info->keys6[0])); } - tcp_free_md5sig_pool(); return 0; } } @@ -1178,11 +1167,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL; -#ifdef CONFIG_SYN_COOKIES int want_cookie = 0; -#else -#define want_cookie 0 -#endif if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); @@ -1191,14 +1176,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop; if (inet_csk_reqsk_queue_is_full(sk) && !isn) { - if (net_ratelimit()) - syn_flood_warning(skb); -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) - want_cookie = 1; - else -#endif - goto drop; + want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6"); + if (!want_cookie) + goto drop; } if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) @@ -1248,9 +1228,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) while (l-- > 0) *c++ ^= *hash_location++; -#ifdef CONFIG_SYN_COOKIES want_cookie = 0; /* not our kind of cookie */ -#endif tmp_ext.cookie_out_never = 0; /* false */ tmp_ext.cookie_plus = tmp_opt.cookie_plus; } else if (!tp->rx_opt.cookie_in_always) { @@ -1407,6 +1385,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); @@ -1471,6 +1451,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, First: no IPv4 options. */ newinet->inet_opt = NULL; + newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; /* Clone RX bits */ @@ -1627,7 +1608,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) goto reset; if (opt_skb) @@ -1649,7 +1630,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * the new socket.. */ if(nsk != sk) { - sock_rps_save_rxhash(nsk, skb->rxhash); + sock_rps_save_rxhash(nsk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) @@ -1657,7 +1638,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } } else - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) goto reset; @@ -1741,7 +1722,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) skb->len - th->doff*4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); TCP_SKB_CB(skb)->when = 0; - TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(hdr); + TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->sacked = 0; sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 29213b5..f4ca0a5 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -509,7 +509,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) int is_udplite = IS_UDPLITE(sk); if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto drop; @@ -533,7 +533,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) } } - if (rcu_dereference_raw(sk->sk_filter)) { + if (rcu_access_pointer(sk->sk_filter)) { if (udp_lib_checksum_complete(skb)) goto drop; } @@ -1090,8 +1090,8 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit, - &tclass, &dontfrag); + err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index e8d5f44..d14152e 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -50,7 +50,7 @@ static const struct net_device_ops irlan_eth_netdev_ops = { .ndo_open = irlan_eth_open, .ndo_stop = irlan_eth_close, .ndo_start_xmit = irlan_eth_xmit, - .ndo_set_multicast_list = irlan_eth_set_multicast_list, + .ndo_set_rx_mode = irlan_eth_set_multicast_list, .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index d0b70da..2615ffc 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -40,9 +40,9 @@ extern int sysctl_slot_timeout; extern int sysctl_fast_poll_increase; extern char sysctl_devname[]; extern int sysctl_max_baud_rate; -extern int sysctl_min_tx_turn_time; -extern int sysctl_max_tx_data_size; -extern int sysctl_max_tx_window; +extern unsigned int sysctl_min_tx_turn_time; +extern unsigned int sysctl_max_tx_data_size; +extern unsigned int sysctl_max_tx_window; extern int sysctl_max_noreply_time; extern int sysctl_warn_noreply_time; extern int sysctl_lap_keepalive_time; diff --git a/net/irda/qos.c b/net/irda/qos.c index 1b51bcf..4369f7f 100644 --- a/net/irda/qos.c +++ b/net/irda/qos.c @@ -60,7 +60,7 @@ int sysctl_max_noreply_time = 12; * Default is 10us which means using the unmodified value given by the * peer except if it's 0 (0 is likely a bug in the other stack). */ -unsigned sysctl_min_tx_turn_time = 10; +unsigned int sysctl_min_tx_turn_time = 10; /* * Maximum data size to be used in transmission in payload of LAP frame. * There is a bit of confusion in the IrDA spec : @@ -75,13 +75,13 @@ unsigned sysctl_min_tx_turn_time = 10; * bytes frames or all negotiated frame sizes, but you can use the sysctl * to play with this value anyway. * Jean II */ -unsigned sysctl_max_tx_data_size = 2042; +unsigned int sysctl_max_tx_data_size = 2042; /* * Maximum transmit window, i.e. number of LAP frames between turn-around. * This allow to override what the peer told us. Some peers are buggy and * don't always support what they tell us. * Jean II */ -unsigned sysctl_max_tx_window = 7; +unsigned int sysctl_max_tx_window = 7; static int irlap_param_baud_rate(void *instance, irda_param_t *param, int get); static int irlap_param_link_disconnect(void *instance, irda_param_t *parm, diff --git a/net/iucv/Kconfig b/net/iucv/Kconfig index 16ce9cd..497fbe7 100644 --- a/net/iucv/Kconfig +++ b/net/iucv/Kconfig @@ -1,15 +1,17 @@ config IUCV - tristate "IUCV support (S390 - z/VM only)" depends on S390 + def_tristate y if S390 + prompt "IUCV support (S390 - z/VM only)" help Select this option if you want to use inter-user communication under VM or VIF. If you run on z/VM, say "Y" to enable a fast communication link between VM guests. config AFIUCV - tristate "AF_IUCV support (S390 - z/VM only)" - depends on IUCV + depends on S390 + def_tristate m if QETH_L3 || IUCV + prompt "AF_IUCV Socket support (S390 - z/VM and HiperSockets transport)" help - Select this option if you want to use inter-user communication under - VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast - communication link between VM guests. + Select this option if you want to use AF_IUCV socket applications + based on z/VM inter-user communication vehicle or based on + HiperSockets. diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index e2013e4..c39f3a4 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -27,10 +27,9 @@ #include <asm/cpcmd.h> #include <linux/kmod.h> -#include <net/iucv/iucv.h> #include <net/iucv/af_iucv.h> -#define VERSION "1.1" +#define VERSION "1.2" static char iucv_userid[80]; @@ -42,6 +41,8 @@ static struct proto iucv_proto = { .obj_size = sizeof(struct iucv_sock), }; +static struct iucv_interface *pr_iucv; + /* special AF_IUCV IPRM messages */ static const u8 iprm_shutdown[8] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}; @@ -90,6 +91,12 @@ do { \ static void iucv_sock_kill(struct sock *sk); static void iucv_sock_close(struct sock *sk); +static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); +static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, + struct sk_buff *skb, u8 flags); +static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify); + /* Call Back functions */ static void iucv_callback_rx(struct iucv_path *, struct iucv_message *); static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *); @@ -165,7 +172,7 @@ static int afiucv_pm_freeze(struct device *dev) case IUCV_CLOSING: case IUCV_CONNECTED: if (iucv->path) { - err = iucv_path_sever(iucv->path, NULL); + err = pr_iucv->path_sever(iucv->path, NULL); iucv_path_free(iucv->path); iucv->path = NULL; } @@ -229,7 +236,7 @@ static const struct dev_pm_ops afiucv_pm_ops = { static struct device_driver af_iucv_driver = { .owner = THIS_MODULE, .name = "afiucv", - .bus = &iucv_bus, + .bus = NULL, .pm = &afiucv_pm_ops, }; @@ -294,7 +301,11 @@ static inline int iucv_below_msglim(struct sock *sk) if (sk->sk_state != IUCV_CONNECTED) return 1; - return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim); + if (iucv->transport == AF_IUCV_TRANS_IUCV) + return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim); + else + return ((atomic_read(&iucv->msg_sent) < iucv->msglimit_peer) && + (atomic_read(&iucv->pendings) <= 0)); } /** @@ -312,6 +323,79 @@ static void iucv_sock_wake_msglim(struct sock *sk) rcu_read_unlock(); } +/** + * afiucv_hs_send() - send a message through HiperSockets transport + */ +static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, + struct sk_buff *skb, u8 flags) +{ + struct net *net = sock_net(sock); + struct iucv_sock *iucv = iucv_sk(sock); + struct af_iucv_trans_hdr *phs_hdr; + struct sk_buff *nskb; + int err, confirm_recv = 0; + + memset(skb->head, 0, ETH_HLEN); + phs_hdr = (struct af_iucv_trans_hdr *)skb_push(skb, + sizeof(struct af_iucv_trans_hdr)); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + memset(phs_hdr, 0, sizeof(struct af_iucv_trans_hdr)); + + phs_hdr->magic = ETH_P_AF_IUCV; + phs_hdr->version = 1; + phs_hdr->flags = flags; + if (flags == AF_IUCV_FLAG_SYN) + phs_hdr->window = iucv->msglimit; + else if ((flags == AF_IUCV_FLAG_WIN) || !flags) { + confirm_recv = atomic_read(&iucv->msg_recv); + phs_hdr->window = confirm_recv; + if (confirm_recv) + phs_hdr->flags = phs_hdr->flags | AF_IUCV_FLAG_WIN; + } + memcpy(phs_hdr->destUserID, iucv->dst_user_id, 8); + memcpy(phs_hdr->destAppName, iucv->dst_name, 8); + memcpy(phs_hdr->srcUserID, iucv->src_user_id, 8); + memcpy(phs_hdr->srcAppName, iucv->src_name, 8); + ASCEBC(phs_hdr->destUserID, sizeof(phs_hdr->destUserID)); + ASCEBC(phs_hdr->destAppName, sizeof(phs_hdr->destAppName)); + ASCEBC(phs_hdr->srcUserID, sizeof(phs_hdr->srcUserID)); + ASCEBC(phs_hdr->srcAppName, sizeof(phs_hdr->srcAppName)); + if (imsg) + memcpy(&phs_hdr->iucv_hdr, imsg, sizeof(struct iucv_message)); + + rcu_read_lock(); + skb->dev = dev_get_by_index_rcu(net, sock->sk_bound_dev_if); + rcu_read_unlock(); + if (!skb->dev) + return -ENODEV; + if (!(skb->dev->flags & IFF_UP)) + return -ENETDOWN; + if (skb->len > skb->dev->mtu) { + if (sock->sk_type == SOCK_SEQPACKET) + return -EMSGSIZE; + else + skb_trim(skb, skb->dev->mtu); + } + skb->protocol = ETH_P_AF_IUCV; + skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF; + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + skb_queue_tail(&iucv->send_skb_q, nskb); + err = dev_queue_xmit(skb); + if (err) { + skb_unlink(nskb, &iucv->send_skb_q); + kfree_skb(nskb); + } else { + atomic_sub(confirm_recv, &iucv->msg_recv); + WARN_ON(atomic_read(&iucv->msg_recv) < 0); + } + return err; +} + /* Timers */ static void iucv_sock_timeout(unsigned long arg) { @@ -380,6 +464,8 @@ static void iucv_sock_close(struct sock *sk) unsigned char user_data[16]; struct iucv_sock *iucv = iucv_sk(sk); unsigned long timeo; + int err, blen; + struct sk_buff *skb; iucv_sock_clear_timer(sk); lock_sock(sk); @@ -390,6 +476,20 @@ static void iucv_sock_close(struct sock *sk) break; case IUCV_CONNECTED: + if (iucv->transport == AF_IUCV_TRANS_HIPER) { + /* send fin */ + blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN; + skb = sock_alloc_send_skb(sk, blen, 1, &err); + if (skb) { + skb_reserve(skb, + sizeof(struct af_iucv_trans_hdr) + + ETH_HLEN); + err = afiucv_hs_send(NULL, sk, skb, + AF_IUCV_FLAG_FIN); + } + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); + } case IUCV_DISCONN: sk->sk_state = IUCV_CLOSING; sk->sk_state_change(sk); @@ -412,7 +512,7 @@ static void iucv_sock_close(struct sock *sk) low_nmcpy(user_data, iucv->src_name); high_nmcpy(user_data, iucv->dst_name); ASCEBC(user_data, sizeof(user_data)); - iucv_path_sever(iucv->path, user_data); + pr_iucv->path_sever(iucv->path, user_data); iucv_path_free(iucv->path); iucv->path = NULL; } @@ -444,23 +544,33 @@ static void iucv_sock_init(struct sock *sk, struct sock *parent) static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) { struct sock *sk; + struct iucv_sock *iucv; sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto); if (!sk) return NULL; + iucv = iucv_sk(sk); sock_init_data(sock, sk); - INIT_LIST_HEAD(&iucv_sk(sk)->accept_q); - spin_lock_init(&iucv_sk(sk)->accept_q_lock); - skb_queue_head_init(&iucv_sk(sk)->send_skb_q); - INIT_LIST_HEAD(&iucv_sk(sk)->message_q.list); - spin_lock_init(&iucv_sk(sk)->message_q.lock); - skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); - iucv_sk(sk)->send_tag = 0; - iucv_sk(sk)->flags = 0; - iucv_sk(sk)->msglimit = IUCV_QUEUELEN_DEFAULT; - iucv_sk(sk)->path = NULL; - memset(&iucv_sk(sk)->src_user_id , 0, 32); + INIT_LIST_HEAD(&iucv->accept_q); + spin_lock_init(&iucv->accept_q_lock); + skb_queue_head_init(&iucv->send_skb_q); + INIT_LIST_HEAD(&iucv->message_q.list); + spin_lock_init(&iucv->message_q.lock); + skb_queue_head_init(&iucv->backlog_skb_q); + iucv->send_tag = 0; + atomic_set(&iucv->pendings, 0); + iucv->flags = 0; + iucv->msglimit = 0; + atomic_set(&iucv->msg_sent, 0); + atomic_set(&iucv->msg_recv, 0); + iucv->path = NULL; + iucv->sk_txnotify = afiucv_hs_callback_txnotify; + memset(&iucv->src_user_id , 0, 32); + if (pr_iucv) + iucv->transport = AF_IUCV_TRANS_IUCV; + else + iucv->transport = AF_IUCV_TRANS_HIPER; sk->sk_destruct = iucv_sock_destruct; sk->sk_sndtimeo = IUCV_CONN_TIMEOUT; @@ -591,7 +701,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; struct sock *sk = sock->sk; struct iucv_sock *iucv; - int err; + int err = 0; + struct net_device *dev; + char uid[9]; /* Verify the input sockaddr */ if (!addr || addr->sa_family != AF_IUCV) @@ -610,19 +722,46 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, err = -EADDRINUSE; goto done_unlock; } - if (iucv->path) { - err = 0; + if (iucv->path) goto done_unlock; - } /* Bind the socket */ - memcpy(iucv->src_name, sa->siucv_name, 8); - /* Copy the user id */ - memcpy(iucv->src_user_id, iucv_userid, 8); - sk->sk_state = IUCV_BOUND; - err = 0; + if (pr_iucv) + if (!memcmp(sa->siucv_user_id, iucv_userid, 8)) + goto vm_bind; /* VM IUCV transport */ + /* try hiper transport */ + memcpy(uid, sa->siucv_user_id, sizeof(uid)); + ASCEBC(uid, 8); + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { + if (!memcmp(dev->perm_addr, uid, 8)) { + memcpy(iucv->src_name, sa->siucv_name, 8); + memcpy(iucv->src_user_id, sa->siucv_user_id, 8); + sock->sk->sk_bound_dev_if = dev->ifindex; + sk->sk_state = IUCV_BOUND; + iucv->transport = AF_IUCV_TRANS_HIPER; + if (!iucv->msglimit) + iucv->msglimit = IUCV_HIPER_MSGLIM_DEFAULT; + rcu_read_unlock(); + goto done_unlock; + } + } + rcu_read_unlock(); +vm_bind: + if (pr_iucv) { + /* use local userid for backward compat */ + memcpy(iucv->src_name, sa->siucv_name, 8); + memcpy(iucv->src_user_id, iucv_userid, 8); + sk->sk_state = IUCV_BOUND; + iucv->transport = AF_IUCV_TRANS_IUCV; + if (!iucv->msglimit) + iucv->msglimit = IUCV_QUEUELEN_DEFAULT; + goto done_unlock; + } + /* found no dev to bind */ + err = -ENODEV; done_unlock: /* Release the socket list lock */ write_unlock_bh(&iucv_sk_list.lock); @@ -658,45 +797,44 @@ static int iucv_sock_autobind(struct sock *sk) memcpy(&iucv->src_name, name, 8); + if (!iucv->msglimit) + iucv->msglimit = IUCV_QUEUELEN_DEFAULT; + return err; } -/* Connect an unconnected socket */ -static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, - int alen, int flags) +static int afiucv_hs_connect(struct socket *sock) { - struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; struct sock *sk = sock->sk; - struct iucv_sock *iucv; - unsigned char user_data[16]; - int err; - - if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv)) - return -EINVAL; - - if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND) - return -EBADFD; - - if (sk->sk_type != SOCK_STREAM && sk->sk_type != SOCK_SEQPACKET) - return -EINVAL; + struct sk_buff *skb; + int blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN; + int err = 0; - if (sk->sk_state == IUCV_OPEN) { - err = iucv_sock_autobind(sk); - if (unlikely(err)) - return err; + /* send syn */ + skb = sock_alloc_send_skb(sk, blen, 1, &err); + if (!skb) { + err = -ENOMEM; + goto done; } + skb->dev = NULL; + skb_reserve(skb, blen); + err = afiucv_hs_send(NULL, sk, skb, AF_IUCV_FLAG_SYN); +done: + return err; +} - lock_sock(sk); - - /* Set the destination information */ - memcpy(iucv_sk(sk)->dst_user_id, sa->siucv_user_id, 8); - memcpy(iucv_sk(sk)->dst_name, sa->siucv_name, 8); +static int afiucv_path_connect(struct socket *sock, struct sockaddr *addr) +{ + struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; + struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); + unsigned char user_data[16]; + int err; high_nmcpy(user_data, sa->siucv_name); - low_nmcpy(user_data, iucv_sk(sk)->src_name); + low_nmcpy(user_data, iucv->src_name); ASCEBC(user_data, sizeof(user_data)); - iucv = iucv_sk(sk); /* Create path. */ iucv->path = iucv_path_alloc(iucv->msglimit, IUCV_IPRMDATA, GFP_KERNEL); @@ -704,8 +842,9 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, err = -ENOMEM; goto done; } - err = iucv_path_connect(iucv->path, &af_iucv_handler, - sa->siucv_user_id, NULL, user_data, sk); + err = pr_iucv->path_connect(iucv->path, &af_iucv_handler, + sa->siucv_user_id, NULL, user_data, + sk); if (err) { iucv_path_free(iucv->path); iucv->path = NULL; @@ -724,21 +863,62 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, err = -ECONNREFUSED; break; } - goto done; } +done: + return err; +} - if (sk->sk_state != IUCV_CONNECTED) { +/* Connect an unconnected socket */ +static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, + int alen, int flags) +{ + struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; + struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); + int err; + + if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv)) + return -EINVAL; + + if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND) + return -EBADFD; + + if (sk->sk_state == IUCV_OPEN && + iucv->transport == AF_IUCV_TRANS_HIPER) + return -EBADFD; /* explicit bind required */ + + if (sk->sk_type != SOCK_STREAM && sk->sk_type != SOCK_SEQPACKET) + return -EINVAL; + + if (sk->sk_state == IUCV_OPEN) { + err = iucv_sock_autobind(sk); + if (unlikely(err)) + return err; + } + + lock_sock(sk); + + /* Set the destination information */ + memcpy(iucv->dst_user_id, sa->siucv_user_id, 8); + memcpy(iucv->dst_name, sa->siucv_name, 8); + + if (iucv->transport == AF_IUCV_TRANS_HIPER) + err = afiucv_hs_connect(sock); + else + err = afiucv_path_connect(sock, addr); + if (err) + goto done; + + if (sk->sk_state != IUCV_CONNECTED) err = iucv_sock_wait(sk, iucv_sock_in_state(sk, IUCV_CONNECTED, IUCV_DISCONN), sock_sndtimeo(sk, flags & O_NONBLOCK)); - } - if (sk->sk_state == IUCV_DISCONN) { + if (sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_CLOSED) err = -ECONNREFUSED; - } - if (err) { - iucv_path_sever(iucv->path, NULL); + if (err && iucv->transport == AF_IUCV_TRANS_IUCV) { + pr_iucv->path_sever(iucv->path, NULL); iucv_path_free(iucv->path); iucv->path = NULL; } @@ -833,20 +1013,21 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr, { struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr; struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); addr->sa_family = AF_IUCV; *len = sizeof(struct sockaddr_iucv); if (peer) { - memcpy(siucv->siucv_user_id, iucv_sk(sk)->dst_user_id, 8); - memcpy(siucv->siucv_name, &iucv_sk(sk)->dst_name, 8); + memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8); + memcpy(siucv->siucv_name, iucv->dst_name, 8); } else { - memcpy(siucv->siucv_user_id, iucv_sk(sk)->src_user_id, 8); - memcpy(siucv->siucv_name, iucv_sk(sk)->src_name, 8); + memcpy(siucv->siucv_user_id, iucv->src_user_id, 8); + memcpy(siucv->siucv_name, iucv->src_name, 8); } memset(&siucv->siucv_port, 0, sizeof(siucv->siucv_port)); memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr)); - memset(siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid)); + memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid)); return 0; } @@ -871,7 +1052,7 @@ static int iucv_send_iprm(struct iucv_path *path, struct iucv_message *msg, memcpy(prmdata, (void *) skb->data, skb->len); prmdata[7] = 0xff - (u8) skb->len; - return iucv_message_send(path, msg, IUCV_IPRMDATA, 0, + return pr_iucv->message_send(path, msg, IUCV_IPRMDATA, 0, (void *) prmdata, 8); } @@ -960,9 +1141,16 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, * this is fine for SOCK_SEQPACKET (unless we want to support * segmented records using the MSG_EOR flag), but * for SOCK_STREAM we might want to improve it in future */ - skb = sock_alloc_send_skb(sk, len, noblock, &err); + if (iucv->transport == AF_IUCV_TRANS_HIPER) + skb = sock_alloc_send_skb(sk, + len + sizeof(struct af_iucv_trans_hdr) + ETH_HLEN, + noblock, &err); + else + skb = sock_alloc_send_skb(sk, len, noblock, &err); if (!skb) goto out; + if (iucv->transport == AF_IUCV_TRANS_HIPER) + skb_reserve(skb, sizeof(struct af_iucv_trans_hdr) + ETH_HLEN); if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { err = -EFAULT; goto fail; @@ -983,6 +1171,15 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, /* increment and save iucv message tag for msg_completion cbk */ txmsg.tag = iucv->send_tag++; memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); + if (iucv->transport == AF_IUCV_TRANS_HIPER) { + atomic_inc(&iucv->msg_sent); + err = afiucv_hs_send(&txmsg, sk, skb, 0); + if (err) { + atomic_dec(&iucv->msg_sent); + goto fail; + } + goto release; + } skb_queue_tail(&iucv->send_skb_q, skb); if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) @@ -999,13 +1196,13 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, /* this error should never happen since the * IUCV_IPRMDATA path flag is set... sever path */ if (err == 0x15) { - iucv_path_sever(iucv->path, NULL); + pr_iucv->path_sever(iucv->path, NULL); skb_unlink(skb, &iucv->send_skb_q); err = -EPIPE; goto fail; } } else - err = iucv_message_send(iucv->path, &txmsg, 0, 0, + err = pr_iucv->message_send(iucv->path, &txmsg, 0, 0, (void *) skb->data, skb->len); if (err) { if (err == 3) { @@ -1023,6 +1220,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto fail; } +release: release_sock(sk); return len; @@ -1095,8 +1293,9 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, skb->len = 0; } } else { - rc = iucv_message_receive(path, msg, msg->flags & IUCV_IPRMDATA, - skb->data, len, NULL); + rc = pr_iucv->message_receive(path, msg, + msg->flags & IUCV_IPRMDATA, + skb->data, len, NULL); if (rc) { kfree_skb(skb); return; @@ -1110,7 +1309,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, kfree_skb(skb); skb = NULL; if (rc) { - iucv_path_sever(path, NULL); + pr_iucv->path_sever(path, NULL); return; } skb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q); @@ -1154,7 +1353,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); unsigned int copied, rlen; - struct sk_buff *skb, *rskb, *cskb; + struct sk_buff *skb, *rskb, *cskb, *sskb; + int blen; int err = 0; if ((sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) && @@ -1179,7 +1379,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, copied = min_t(unsigned int, rlen, len); cskb = skb; - if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) { + if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) { if (!(flags & MSG_PEEK)) skb_queue_head(&sk->sk_receive_queue, skb); return -EFAULT; @@ -1217,6 +1417,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, } kfree_skb(skb); + atomic_inc(&iucv->msg_recv); /* Queue backlog skbs */ spin_lock_bh(&iucv->message_q.lock); @@ -1233,6 +1434,24 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (skb_queue_empty(&iucv->backlog_skb_q)) { if (!list_empty(&iucv->message_q.list)) iucv_process_message_q(sk); + if (atomic_read(&iucv->msg_recv) >= + iucv->msglimit / 2) { + /* send WIN to peer */ + blen = sizeof(struct af_iucv_trans_hdr) + + ETH_HLEN; + sskb = sock_alloc_send_skb(sk, blen, 1, &err); + if (sskb) { + skb_reserve(sskb, + sizeof(struct af_iucv_trans_hdr) + + ETH_HLEN); + err = afiucv_hs_send(NULL, sk, sskb, + AF_IUCV_FLAG_WIN); + } + if (err) { + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); + } + } } spin_unlock_bh(&iucv->message_q.lock); } @@ -1327,8 +1546,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how) if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) { txmsg.class = 0; txmsg.tag = 0; - err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0, - (void *) iprm_shutdown, 8); + err = pr_iucv->message_send(iucv->path, &txmsg, IUCV_IPRMDATA, + 0, (void *) iprm_shutdown, 8); if (err) { switch (err) { case 1: @@ -1345,7 +1564,7 @@ static int iucv_sock_shutdown(struct socket *sock, int how) } if (how == RCV_SHUTDOWN || how == SHUTDOWN_MASK) { - err = iucv_path_quiesce(iucv_sk(sk)->path, NULL); + err = pr_iucv->path_quiesce(iucv->path, NULL); if (err) err = -ENOTCONN; @@ -1372,7 +1591,7 @@ static int iucv_sock_release(struct socket *sock) /* Unregister with IUCV base support */ if (iucv_sk(sk)->path) { - iucv_path_sever(iucv_sk(sk)->path, NULL); + pr_iucv->path_sever(iucv_sk(sk)->path, NULL); iucv_path_free(iucv_sk(sk)->path); iucv_sk(sk)->path = NULL; } @@ -1514,14 +1733,14 @@ static int iucv_callback_connreq(struct iucv_path *path, high_nmcpy(user_data, iucv->dst_name); ASCEBC(user_data, sizeof(user_data)); if (sk->sk_state != IUCV_LISTEN) { - err = iucv_path_sever(path, user_data); + err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); goto fail; } /* Check for backlog size */ if (sk_acceptq_is_full(sk)) { - err = iucv_path_sever(path, user_data); + err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); goto fail; } @@ -1529,7 +1748,7 @@ static int iucv_callback_connreq(struct iucv_path *path, /* Create the new socket */ nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); if (!nsk) { - err = iucv_path_sever(path, user_data); + err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); goto fail; } @@ -1553,9 +1772,9 @@ static int iucv_callback_connreq(struct iucv_path *path, /* set message limit for path based on msglimit of accepting socket */ niucv->msglimit = iucv->msglimit; path->msglim = iucv->msglimit; - err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk); + err = pr_iucv->path_accept(path, &af_iucv_handler, nuser_data, nsk); if (err) { - err = iucv_path_sever(path, user_data); + err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); iucv_sock_kill(nsk); goto fail; @@ -1589,7 +1808,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) int len; if (sk->sk_shutdown & RCV_SHUTDOWN) { - iucv_message_reject(path, msg); + pr_iucv->message_reject(path, msg); return; } @@ -1692,6 +1911,389 @@ static void iucv_callback_shutdown(struct iucv_path *path, u8 ipuser[16]) bh_unlock_sock(sk); } +/***************** HiperSockets transport callbacks ********************/ +static void afiucv_swap_src_dest(struct sk_buff *skb) +{ + struct af_iucv_trans_hdr *trans_hdr = + (struct af_iucv_trans_hdr *)skb->data; + char tmpID[8]; + char tmpName[8]; + + ASCEBC(trans_hdr->destUserID, sizeof(trans_hdr->destUserID)); + ASCEBC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName)); + ASCEBC(trans_hdr->srcUserID, sizeof(trans_hdr->srcUserID)); + ASCEBC(trans_hdr->srcAppName, sizeof(trans_hdr->srcAppName)); + memcpy(tmpID, trans_hdr->srcUserID, 8); + memcpy(tmpName, trans_hdr->srcAppName, 8); + memcpy(trans_hdr->srcUserID, trans_hdr->destUserID, 8); + memcpy(trans_hdr->srcAppName, trans_hdr->destAppName, 8); + memcpy(trans_hdr->destUserID, tmpID, 8); + memcpy(trans_hdr->destAppName, tmpName, 8); + skb_push(skb, ETH_HLEN); + memset(skb->data, 0, ETH_HLEN); +} + +/** + * afiucv_hs_callback_syn - react on received SYN + **/ +static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb) +{ + struct sock *nsk; + struct iucv_sock *iucv, *niucv; + struct af_iucv_trans_hdr *trans_hdr; + int err; + + iucv = iucv_sk(sk); + trans_hdr = (struct af_iucv_trans_hdr *)skb->data; + if (!iucv) { + /* no sock - connection refused */ + afiucv_swap_src_dest(skb); + trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN; + err = dev_queue_xmit(skb); + goto out; + } + + nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); + bh_lock_sock(sk); + if ((sk->sk_state != IUCV_LISTEN) || + sk_acceptq_is_full(sk) || + !nsk) { + /* error on server socket - connection refused */ + if (nsk) + sk_free(nsk); + afiucv_swap_src_dest(skb); + trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN; + err = dev_queue_xmit(skb); + bh_unlock_sock(sk); + goto out; + } + + niucv = iucv_sk(nsk); + iucv_sock_init(nsk, sk); + niucv->transport = AF_IUCV_TRANS_HIPER; + niucv->msglimit = iucv->msglimit; + if (!trans_hdr->window) + niucv->msglimit_peer = IUCV_HIPER_MSGLIM_DEFAULT; + else + niucv->msglimit_peer = trans_hdr->window; + memcpy(niucv->dst_name, trans_hdr->srcAppName, 8); + memcpy(niucv->dst_user_id, trans_hdr->srcUserID, 8); + memcpy(niucv->src_name, iucv->src_name, 8); + memcpy(niucv->src_user_id, iucv->src_user_id, 8); + nsk->sk_bound_dev_if = sk->sk_bound_dev_if; + afiucv_swap_src_dest(skb); + trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_ACK; + trans_hdr->window = niucv->msglimit; + /* if receiver acks the xmit connection is established */ + err = dev_queue_xmit(skb); + if (!err) { + iucv_accept_enqueue(sk, nsk); + nsk->sk_state = IUCV_CONNECTED; + sk->sk_data_ready(sk, 1); + } else + iucv_sock_kill(nsk); + bh_unlock_sock(sk); + +out: + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_callback_synack() - react on received SYN-ACK + **/ +static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb) +{ + struct iucv_sock *iucv = iucv_sk(sk); + struct af_iucv_trans_hdr *trans_hdr = + (struct af_iucv_trans_hdr *)skb->data; + + if (!iucv) + goto out; + if (sk->sk_state != IUCV_BOUND) + goto out; + bh_lock_sock(sk); + iucv->msglimit_peer = trans_hdr->window; + sk->sk_state = IUCV_CONNECTED; + sk->sk_state_change(sk); + bh_unlock_sock(sk); +out: + kfree_skb(skb); + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_callback_synfin() - react on received SYN_FIN + **/ +static int afiucv_hs_callback_synfin(struct sock *sk, struct sk_buff *skb) +{ + struct iucv_sock *iucv = iucv_sk(sk); + + if (!iucv) + goto out; + if (sk->sk_state != IUCV_BOUND) + goto out; + bh_lock_sock(sk); + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); + bh_unlock_sock(sk); +out: + kfree_skb(skb); + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_callback_fin() - react on received FIN + **/ +static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb) +{ + struct iucv_sock *iucv = iucv_sk(sk); + + /* other end of connection closed */ + if (iucv) { + bh_lock_sock(sk); + if (!list_empty(&iucv->accept_q)) + sk->sk_state = IUCV_SEVERED; + else + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); + bh_unlock_sock(sk); + } + kfree_skb(skb); + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_callback_win() - react on received WIN + **/ +static int afiucv_hs_callback_win(struct sock *sk, struct sk_buff *skb) +{ + struct iucv_sock *iucv = iucv_sk(sk); + struct af_iucv_trans_hdr *trans_hdr = + (struct af_iucv_trans_hdr *)skb->data; + + if (!iucv) + return NET_RX_SUCCESS; + + if (sk->sk_state != IUCV_CONNECTED) + return NET_RX_SUCCESS; + + atomic_sub(trans_hdr->window, &iucv->msg_sent); + iucv_sock_wake_msglim(sk); + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_callback_rx() - react on received data + **/ +static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) +{ + struct iucv_sock *iucv = iucv_sk(sk); + + if (!iucv) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + + if (sk->sk_state != IUCV_CONNECTED) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + + /* write stuff from iucv_msg to skb cb */ + if (skb->len <= sizeof(struct af_iucv_trans_hdr)) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + skb_pull(skb, sizeof(struct af_iucv_trans_hdr)); + skb_reset_transport_header(skb); + skb_reset_network_header(skb); + spin_lock(&iucv->message_q.lock); + if (skb_queue_empty(&iucv->backlog_skb_q)) { + if (sock_queue_rcv_skb(sk, skb)) { + /* handle rcv queue full */ + skb_queue_tail(&iucv->backlog_skb_q, skb); + } + } else + skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb); + spin_unlock(&iucv->message_q.lock); + return NET_RX_SUCCESS; +} + +/** + * afiucv_hs_rcv() - base function for arriving data through HiperSockets + * transport + * called from netif RX softirq + **/ +static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct hlist_node *node; + struct sock *sk; + struct iucv_sock *iucv; + struct af_iucv_trans_hdr *trans_hdr; + char nullstring[8]; + int err = 0; + + skb_pull(skb, ETH_HLEN); + trans_hdr = (struct af_iucv_trans_hdr *)skb->data; + EBCASC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName)); + EBCASC(trans_hdr->destUserID, sizeof(trans_hdr->destUserID)); + EBCASC(trans_hdr->srcAppName, sizeof(trans_hdr->srcAppName)); + EBCASC(trans_hdr->srcUserID, sizeof(trans_hdr->srcUserID)); + memset(nullstring, 0, sizeof(nullstring)); + iucv = NULL; + sk = NULL; + read_lock(&iucv_sk_list.lock); + sk_for_each(sk, node, &iucv_sk_list.head) { + if (trans_hdr->flags == AF_IUCV_FLAG_SYN) { + if ((!memcmp(&iucv_sk(sk)->src_name, + trans_hdr->destAppName, 8)) && + (!memcmp(&iucv_sk(sk)->src_user_id, + trans_hdr->destUserID, 8)) && + (!memcmp(&iucv_sk(sk)->dst_name, nullstring, 8)) && + (!memcmp(&iucv_sk(sk)->dst_user_id, + nullstring, 8))) { + iucv = iucv_sk(sk); + break; + } + } else { + if ((!memcmp(&iucv_sk(sk)->src_name, + trans_hdr->destAppName, 8)) && + (!memcmp(&iucv_sk(sk)->src_user_id, + trans_hdr->destUserID, 8)) && + (!memcmp(&iucv_sk(sk)->dst_name, + trans_hdr->srcAppName, 8)) && + (!memcmp(&iucv_sk(sk)->dst_user_id, + trans_hdr->srcUserID, 8))) { + iucv = iucv_sk(sk); + break; + } + } + } + read_unlock(&iucv_sk_list.lock); + if (!iucv) + sk = NULL; + + /* no sock + how should we send with no sock + 1) send without sock no send rc checking? + 2) introduce default sock to handle this cases + + SYN -> send SYN|ACK in good case, send SYN|FIN in bad case + data -> send FIN + SYN|ACK, SYN|FIN, FIN -> no action? */ + + switch (trans_hdr->flags) { + case AF_IUCV_FLAG_SYN: + /* connect request */ + err = afiucv_hs_callback_syn(sk, skb); + break; + case (AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_ACK): + /* connect request confirmed */ + err = afiucv_hs_callback_synack(sk, skb); + break; + case (AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN): + /* connect request refused */ + err = afiucv_hs_callback_synfin(sk, skb); + break; + case (AF_IUCV_FLAG_FIN): + /* close request */ + err = afiucv_hs_callback_fin(sk, skb); + break; + case (AF_IUCV_FLAG_WIN): + err = afiucv_hs_callback_win(sk, skb); + if (skb->len > sizeof(struct af_iucv_trans_hdr)) + err = afiucv_hs_callback_rx(sk, skb); + else + kfree(skb); + break; + case 0: + /* plain data frame */ + err = afiucv_hs_callback_rx(sk, skb); + break; + default: + ; + } + + return err; +} + +/** + * afiucv_hs_callback_txnotify() - handle send notifcations from HiperSockets + * transport + **/ +static void afiucv_hs_callback_txnotify(struct sk_buff *skb, + enum iucv_tx_notify n) +{ + struct sock *isk = skb->sk; + struct sock *sk = NULL; + struct iucv_sock *iucv = NULL; + struct sk_buff_head *list; + struct sk_buff *list_skb; + struct sk_buff *this = NULL; + unsigned long flags; + struct hlist_node *node; + + read_lock(&iucv_sk_list.lock); + sk_for_each(sk, node, &iucv_sk_list.head) + if (sk == isk) { + iucv = iucv_sk(sk); + break; + } + read_unlock(&iucv_sk_list.lock); + + if (!iucv) + return; + + bh_lock_sock(sk); + list = &iucv->send_skb_q; + list_skb = list->next; + if (skb_queue_empty(list)) + goto out_unlock; + + spin_lock_irqsave(&list->lock, flags); + while (list_skb != (struct sk_buff *)list) { + if (skb_shinfo(list_skb) == skb_shinfo(skb)) { + this = list_skb; + switch (n) { + case TX_NOTIFY_OK: + __skb_unlink(this, list); + iucv_sock_wake_msglim(sk); + kfree_skb(this); + break; + case TX_NOTIFY_PENDING: + atomic_inc(&iucv->pendings); + break; + case TX_NOTIFY_DELAYED_OK: + __skb_unlink(this, list); + atomic_dec(&iucv->pendings); + if (atomic_read(&iucv->pendings) <= 0) + iucv_sock_wake_msglim(sk); + kfree_skb(this); + break; + case TX_NOTIFY_UNREACHABLE: + case TX_NOTIFY_DELAYED_UNREACHABLE: + case TX_NOTIFY_TPQFULL: /* not yet used */ + case TX_NOTIFY_GENERALERROR: + case TX_NOTIFY_DELAYED_GENERALERROR: + __skb_unlink(this, list); + kfree_skb(this); + if (!list_empty(&iucv->accept_q)) + sk->sk_state = IUCV_SEVERED; + else + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); + break; + } + break; + } + list_skb = list_skb->next; + } + spin_unlock_irqrestore(&list->lock, flags); + +out_unlock: + bh_unlock_sock(sk); +} static const struct proto_ops iucv_sock_ops = { .family = PF_IUCV, .owner = THIS_MODULE, @@ -1718,71 +2320,104 @@ static const struct net_proto_family iucv_sock_family_ops = { .create = iucv_sock_create, }; -static int __init afiucv_init(void) +static struct packet_type iucv_packet_type = { + .type = cpu_to_be16(ETH_P_AF_IUCV), + .func = afiucv_hs_rcv, +}; + +static int afiucv_iucv_init(void) { int err; - if (!MACHINE_IS_VM) { - pr_err("The af_iucv module cannot be loaded" - " without z/VM\n"); - err = -EPROTONOSUPPORT; - goto out; - } - cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err); - if (unlikely(err)) { - WARN_ON(err); - err = -EPROTONOSUPPORT; - goto out; - } - - err = iucv_register(&af_iucv_handler, 0); + err = pr_iucv->iucv_register(&af_iucv_handler, 0); if (err) goto out; - err = proto_register(&iucv_proto, 0); - if (err) - goto out_iucv; - err = sock_register(&iucv_sock_family_ops); - if (err) - goto out_proto; /* establish dummy device */ + af_iucv_driver.bus = pr_iucv->bus; err = driver_register(&af_iucv_driver); if (err) - goto out_sock; + goto out_iucv; af_iucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL); if (!af_iucv_dev) { err = -ENOMEM; goto out_driver; } dev_set_name(af_iucv_dev, "af_iucv"); - af_iucv_dev->bus = &iucv_bus; - af_iucv_dev->parent = iucv_root; + af_iucv_dev->bus = pr_iucv->bus; + af_iucv_dev->parent = pr_iucv->root; af_iucv_dev->release = (void (*)(struct device *))kfree; af_iucv_dev->driver = &af_iucv_driver; err = device_register(af_iucv_dev); if (err) goto out_driver; - return 0; out_driver: driver_unregister(&af_iucv_driver); +out_iucv: + pr_iucv->iucv_unregister(&af_iucv_handler, 0); +out: + return err; +} + +static int __init afiucv_init(void) +{ + int err; + + if (MACHINE_IS_VM) { + cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err); + if (unlikely(err)) { + WARN_ON(err); + err = -EPROTONOSUPPORT; + goto out; + } + + pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv"); + if (!pr_iucv) { + printk(KERN_WARNING "iucv_if lookup failed\n"); + memset(&iucv_userid, 0, sizeof(iucv_userid)); + } + } else { + memset(&iucv_userid, 0, sizeof(iucv_userid)); + pr_iucv = NULL; + } + + err = proto_register(&iucv_proto, 0); + if (err) + goto out; + err = sock_register(&iucv_sock_family_ops); + if (err) + goto out_proto; + + if (pr_iucv) { + err = afiucv_iucv_init(); + if (err) + goto out_sock; + } + dev_add_pack(&iucv_packet_type); + return 0; + out_sock: sock_unregister(PF_IUCV); out_proto: proto_unregister(&iucv_proto); -out_iucv: - iucv_unregister(&af_iucv_handler, 0); out: + if (pr_iucv) + symbol_put(iucv_if); return err; } static void __exit afiucv_exit(void) { - device_unregister(af_iucv_dev); - driver_unregister(&af_iucv_driver); + if (pr_iucv) { + device_unregister(af_iucv_dev); + driver_unregister(&af_iucv_driver); + pr_iucv->iucv_unregister(&af_iucv_handler, 0); + symbol_put(iucv_if); + } + dev_remove_pack(&iucv_packet_type); sock_unregister(PF_IUCV); proto_unregister(&iucv_proto); - iucv_unregister(&af_iucv_handler, 0); } module_init(afiucv_init); @@ -1793,3 +2428,4 @@ MODULE_DESCRIPTION("IUCV Sockets ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_IUCV); + diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 075a380..403be43 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -1974,6 +1974,27 @@ out: return rc; } +struct iucv_interface iucv_if = { + .message_receive = iucv_message_receive, + .__message_receive = __iucv_message_receive, + .message_reply = iucv_message_reply, + .message_reject = iucv_message_reject, + .message_send = iucv_message_send, + .__message_send = __iucv_message_send, + .message_send2way = iucv_message_send2way, + .message_purge = iucv_message_purge, + .path_accept = iucv_path_accept, + .path_connect = iucv_path_connect, + .path_quiesce = iucv_path_quiesce, + .path_resume = iucv_path_resume, + .path_sever = iucv_path_sever, + .iucv_register = iucv_register, + .iucv_unregister = iucv_unregister, + .bus = NULL, + .root = NULL, +}; +EXPORT_SYMBOL(iucv_if); + /** * iucv_init * @@ -2038,6 +2059,8 @@ static int __init iucv_init(void) rc = bus_register(&iucv_bus); if (rc) goto out_reboot; + iucv_if.root = iucv_root; + iucv_if.bus = &iucv_bus; return 0; out_reboot: diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index 956b7e4..8d0324b 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -139,7 +139,8 @@ out: return lapb; } -int lapb_register(struct net_device *dev, struct lapb_register_struct *callbacks) +int lapb_register(struct net_device *dev, + const struct lapb_register_struct *callbacks) { struct lapb_cb *lapb; int rc = LAPB_BADTOKEN; @@ -158,7 +159,7 @@ int lapb_register(struct net_device *dev, struct lapb_register_struct *callbacks goto out; lapb->dev = dev; - lapb->callbacks = *callbacks; + lapb->callbacks = callbacks; __lapb_insert_cb(lapb); @@ -380,32 +381,32 @@ int lapb_data_received(struct net_device *dev, struct sk_buff *skb) void lapb_connect_confirmation(struct lapb_cb *lapb, int reason) { - if (lapb->callbacks.connect_confirmation) - lapb->callbacks.connect_confirmation(lapb->dev, reason); + if (lapb->callbacks->connect_confirmation) + lapb->callbacks->connect_confirmation(lapb->dev, reason); } void lapb_connect_indication(struct lapb_cb *lapb, int reason) { - if (lapb->callbacks.connect_indication) - lapb->callbacks.connect_indication(lapb->dev, reason); + if (lapb->callbacks->connect_indication) + lapb->callbacks->connect_indication(lapb->dev, reason); } void lapb_disconnect_confirmation(struct lapb_cb *lapb, int reason) { - if (lapb->callbacks.disconnect_confirmation) - lapb->callbacks.disconnect_confirmation(lapb->dev, reason); + if (lapb->callbacks->disconnect_confirmation) + lapb->callbacks->disconnect_confirmation(lapb->dev, reason); } void lapb_disconnect_indication(struct lapb_cb *lapb, int reason) { - if (lapb->callbacks.disconnect_indication) - lapb->callbacks.disconnect_indication(lapb->dev, reason); + if (lapb->callbacks->disconnect_indication) + lapb->callbacks->disconnect_indication(lapb->dev, reason); } int lapb_data_indication(struct lapb_cb *lapb, struct sk_buff *skb) { - if (lapb->callbacks.data_indication) - return lapb->callbacks.data_indication(lapb->dev, skb); + if (lapb->callbacks->data_indication) + return lapb->callbacks->data_indication(lapb->dev, skb); kfree_skb(skb); return NET_RX_SUCCESS; /* For now; must be != NET_RX_DROP */ @@ -415,8 +416,8 @@ int lapb_data_transmit(struct lapb_cb *lapb, struct sk_buff *skb) { int used = 0; - if (lapb->callbacks.data_transmit) { - lapb->callbacks.data_transmit(lapb->dev, skb); + if (lapb->callbacks->data_transmit) { + lapb->callbacks->data_transmit(lapb->dev, skb); used = 1; } diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 0cde8df..97f3358 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -69,7 +69,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, if (!tid_rx) return; - rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], NULL); + RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], NULL); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n", @@ -325,7 +325,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, status = WLAN_STATUS_SUCCESS; /* activate it for RX */ - rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx); + RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx); if (timeout) mod_timer(&tid_agg_rx->session_timer, TU_TO_EXP_TIME(timeout)); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 1309bb9..55ee5a3 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -63,7 +63,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, if (type == NL80211_IFTYPE_AP_VLAN && params && params->use_4addr == 0) - rcu_assign_pointer(sdata->u.vlan.sta, NULL); + RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); else if (type == NL80211_IFTYPE_STATION && params && params->use_4addr >= 0) sdata->u.mgd.use_4addr = params->use_4addr; @@ -557,7 +557,7 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.dtim_period = new->dtim_period; - rcu_assign_pointer(sdata->u.ap.beacon, new); + RCU_INIT_POINTER(sdata->u.ap.beacon, new); synchronize_rcu(); @@ -612,7 +612,7 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev) if (!old) return -ENOENT; - rcu_assign_pointer(sdata->u.ap.beacon, NULL); + RCU_INIT_POINTER(sdata->u.ap.beacon, NULL); synchronize_rcu(); kfree(old); @@ -904,7 +904,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, return -EBUSY; } - rcu_assign_pointer(vlansdata->u.vlan.sta, sta); + RCU_INIT_POINTER(vlansdata->u.vlan.sta, sta); } sta->sdata = vlansdata; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 2da3040..ede9a8b 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -84,7 +84,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, drv_reset_tsf(local, sdata); skb = ifibss->skb; - rcu_assign_pointer(ifibss->presp, NULL); + RCU_INIT_POINTER(ifibss->presp, NULL); synchronize_rcu(); skb->data = skb->head; skb->len = 0; @@ -184,7 +184,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, *pos++ = 0; /* U-APSD no in use */ } - rcu_assign_pointer(ifibss->presp, skb); + RCU_INIT_POINTER(ifibss->presp, skb); sdata->vif.bss_conf.beacon_int = beacon_int; sdata->vif.bss_conf.basic_rates = basic_rates; @@ -995,7 +995,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) kfree(sdata->u.ibss.ie); skb = rcu_dereference_protected(sdata->u.ibss.presp, lockdep_is_held(&sdata->u.ibss.mtx)); - rcu_assign_pointer(sdata->u.ibss.presp, NULL); + RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); sdata->vif.bss_conf.ibss_joined = false; ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index ef741e8..30d7355 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -456,7 +456,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, BSS_CHANGED_BEACON_ENABLED); /* remove beacon */ - rcu_assign_pointer(sdata->u.ap.beacon, NULL); + RCU_INIT_POINTER(sdata->u.ap.beacon, NULL); synchronize_rcu(); kfree(old_beacon); @@ -643,7 +643,7 @@ static const struct net_device_ops ieee80211_dataif_ops = { .ndo_stop = ieee80211_stop, .ndo_uninit = ieee80211_teardown_sdata, .ndo_start_xmit = ieee80211_subif_start_xmit, - .ndo_set_multicast_list = ieee80211_set_multicast_list, + .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_netdev_select_queue, @@ -687,7 +687,7 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_stop = ieee80211_stop, .ndo_uninit = ieee80211_teardown_sdata, .ndo_start_xmit = ieee80211_monitor_start_xmit, - .ndo_set_multicast_list = ieee80211_set_multicast_list, + .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_select_queue = ieee80211_monitor_select_queue, diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 332b5ff1..7f54c50 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -1168,6 +1168,6 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata) void mesh_pathtbl_unregister(void) { /* no need for locking during exit path */ - mesh_table_free(rcu_dereference_raw(mesh_paths), true); - mesh_table_free(rcu_dereference_raw(mpp_paths), true); + mesh_table_free(rcu_dereference_protected(mesh_paths, 1), true); + mesh_table_free(rcu_dereference_protected(mpp_paths, 1), true); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 076593b..58b1c2b 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -73,7 +73,7 @@ static int sta_info_hash_del(struct ieee80211_local *local, if (!s) return -ENOENT; if (s == sta) { - rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], + RCU_INIT_POINTER(local->sta_hash[STA_HASH(sta->sta.addr)], s->hnext); return 0; } @@ -83,7 +83,7 @@ static int sta_info_hash_del(struct ieee80211_local *local, s = rcu_dereference_protected(s->hnext, lockdep_is_held(&local->sta_lock)); if (rcu_access_pointer(s->hnext)) { - rcu_assign_pointer(s->hnext, sta->hnext); + RCU_INIT_POINTER(s->hnext, sta->hnext); return 0; } @@ -232,7 +232,7 @@ static void sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) { sta->hnext = local->sta_hash[STA_HASH(sta->sta.addr)]; - rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta); + RCU_INIT_POINTER(local->sta_hash[STA_HASH(sta->sta.addr)], sta); } static void sta_unblock(struct work_struct *wk) @@ -906,7 +906,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) local->sta_generation++; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - rcu_assign_pointer(sdata->u.vlan.sta, NULL); + RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); if (sta->uploaded) { if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 899b71c..3346829 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -37,7 +37,7 @@ int nf_register_afinfo(const struct nf_afinfo *afinfo) err = mutex_lock_interruptible(&afinfo_mutex); if (err < 0) return err; - rcu_assign_pointer(nf_afinfo[afinfo->family], afinfo); + RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo); mutex_unlock(&afinfo_mutex); return 0; } @@ -46,7 +46,7 @@ EXPORT_SYMBOL_GPL(nf_register_afinfo); void nf_unregister_afinfo(const struct nf_afinfo *afinfo) { mutex_lock(&afinfo_mutex); - rcu_assign_pointer(nf_afinfo[afinfo->family], NULL); + RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL); mutex_unlock(&afinfo_mutex); synchronize_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index be43fd8..5290ac3 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3679,7 +3679,7 @@ int __net_init ip_vs_control_net_init(struct net *net) int idx; struct netns_ipvs *ipvs = net_ipvs(net); - ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock); + rwlock_init(&ipvs->rs_lock); /* Initialize rs_table */ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) @@ -3771,6 +3771,7 @@ err_sock: void ip_vs_control_cleanup(void) { EnterFunction(2); + unregister_netdevice_notifier(&ip_vs_dst_notifier); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f7af8b8..5acfaf5 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -779,7 +779,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (exp->helper) { help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help) - rcu_assign_pointer(help->helper, exp->helper); + RCU_INIT_POINTER(help->helper, exp->helper); } #ifdef CONFIG_NF_CONNTRACK_MARK @@ -1317,7 +1317,7 @@ static void nf_conntrack_cleanup_net(struct net *net) void nf_conntrack_cleanup(struct net *net) { if (net_eq(net, &init_net)) - rcu_assign_pointer(ip_ct_attach, NULL); + RCU_INIT_POINTER(ip_ct_attach, NULL); /* This makes sure all current packets have passed through netfilter framework. Roll on, two-stage module @@ -1327,7 +1327,7 @@ void nf_conntrack_cleanup(struct net *net) nf_conntrack_cleanup_net(net); if (net_eq(net, &init_net)) { - rcu_assign_pointer(nf_ct_destroy, NULL); + RCU_INIT_POINTER(nf_ct_destroy, NULL); nf_conntrack_cleanup_init_net(); } } @@ -1576,11 +1576,11 @@ int nf_conntrack_init(struct net *net) if (net_eq(net, &init_net)) { /* For use by REJECT target */ - rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); - rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); + RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); + RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack); /* Howto get NAT offsets */ - rcu_assign_pointer(nf_ct_nat_offset, NULL); + RCU_INIT_POINTER(nf_ct_nat_offset, NULL); } return 0; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 63a1b91..3add994 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -94,7 +94,7 @@ int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new) ret = -EBUSY; goto out_unlock; } - rcu_assign_pointer(nf_conntrack_event_cb, new); + RCU_INIT_POINTER(nf_conntrack_event_cb, new); mutex_unlock(&nf_ct_ecache_mutex); return ret; @@ -112,7 +112,7 @@ void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new) notify = rcu_dereference_protected(nf_conntrack_event_cb, lockdep_is_held(&nf_ct_ecache_mutex)); BUG_ON(notify != new); - rcu_assign_pointer(nf_conntrack_event_cb, NULL); + RCU_INIT_POINTER(nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); @@ -129,7 +129,7 @@ int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new) ret = -EBUSY; goto out_unlock; } - rcu_assign_pointer(nf_expect_event_cb, new); + RCU_INIT_POINTER(nf_expect_event_cb, new); mutex_unlock(&nf_ct_ecache_mutex); return ret; @@ -147,7 +147,7 @@ void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new) notify = rcu_dereference_protected(nf_expect_event_cb, lockdep_is_held(&nf_ct_ecache_mutex)); BUG_ON(notify != new); - rcu_assign_pointer(nf_expect_event_cb, NULL); + RCU_INIT_POINTER(nf_expect_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); } EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 05ecdc2..4605c94 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -169,7 +169,7 @@ int nf_ct_extend_register(struct nf_ct_ext_type *type) before updating alloc_size */ type->alloc_size = ALIGN(sizeof(struct nf_ct_ext), type->align) + type->len; - rcu_assign_pointer(nf_ct_ext_types[type->id], type); + RCU_INIT_POINTER(nf_ct_ext_types[type->id], type); update_alloc_size(type); out: mutex_unlock(&nf_ct_ext_type_mutex); @@ -181,7 +181,7 @@ EXPORT_SYMBOL_GPL(nf_ct_extend_register); void nf_ct_extend_unregister(struct nf_ct_ext_type *type) { mutex_lock(&nf_ct_ext_type_mutex); - rcu_assign_pointer(nf_ct_ext_types[type->id], NULL); + RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL); update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); rcu_barrier(); /* Wait for completion of call_rcu()'s */ diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 1bdfea3..93c4bdb 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -131,7 +131,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); if (helper == NULL) { if (help) - rcu_assign_pointer(help->helper, NULL); + RCU_INIT_POINTER(help->helper, NULL); goto out; } @@ -145,7 +145,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, memset(&help->help, 0, sizeof(help->help)); } - rcu_assign_pointer(help->helper, helper); + RCU_INIT_POINTER(help->helper, helper); out: return ret; } @@ -162,7 +162,7 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i, lockdep_is_held(&nf_conntrack_lock) ) == me) { nf_conntrack_event(IPCT_HELPER, ct); - rcu_assign_pointer(help->helper, NULL); + RCU_INIT_POINTER(help->helper, NULL); } return 0; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 7dec88a..e58aa9b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1125,7 +1125,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) if (help && help->helper) { /* we had a helper before ... */ nf_ct_remove_expectations(ct); - rcu_assign_pointer(help->helper, NULL); + RCU_INIT_POINTER(help->helper, NULL); } return 0; @@ -1163,7 +1163,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) return -EOPNOTSUPP; } - rcu_assign_pointer(help->helper, helper); + RCU_INIT_POINTER(help->helper, helper); return 0; } @@ -1386,7 +1386,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, } /* not in hash table yet so not strictly necessary */ - rcu_assign_pointer(help->helper, helper); + RCU_INIT_POINTER(help->helper, helper); } } else { /* try an implicit helper assignation */ diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 2fd4565..31d56b2 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -364,6 +364,7 @@ pptp_inbound_pkt(struct sk_buff *skb, break; case PPTP_WAN_ERROR_NOTIFY: + case PPTP_SET_LINK_INFO: case PPTP_ECHO_REQUEST: case PPTP_ECHO_REPLY: /* I don't have to explain these ;) */ diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 37bf943..8235b86 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -409,7 +409,7 @@ static void tcp_options(const struct sk_buff *skb, if (opsize < 2) /* "silly options" */ return; if (opsize > length) - break; /* don't parse partial options */ + return; /* don't parse partial options */ if (opcode == TCPOPT_SACK_PERM && opsize == TCPOLEN_SACK_PERM) @@ -447,7 +447,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, BUG_ON(ptr == NULL); /* Fast path for timestamp-only option */ - if (length == TCPOLEN_TSTAMP_ALIGNED*4 + if (length == TCPOLEN_TSTAMP_ALIGNED && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) @@ -469,7 +469,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, if (opsize < 2) /* "silly options" */ return; if (opsize > length) - break; /* don't parse partial options */ + return; /* don't parse partial options */ if (opcode == TCPOPT_SACK && opsize >= (TCPOLEN_SACK_BASE diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 20714ed..ce0c406 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -55,7 +55,7 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) llog = rcu_dereference_protected(nf_loggers[pf], lockdep_is_held(&nf_log_mutex)); if (llog == NULL) - rcu_assign_pointer(nf_loggers[pf], logger); + RCU_INIT_POINTER(nf_loggers[pf], logger); } mutex_unlock(&nf_log_mutex); @@ -74,7 +74,7 @@ void nf_log_unregister(struct nf_logger *logger) c_logger = rcu_dereference_protected(nf_loggers[i], lockdep_is_held(&nf_log_mutex)); if (c_logger == logger) - rcu_assign_pointer(nf_loggers[i], NULL); + RCU_INIT_POINTER(nf_loggers[i], NULL); list_del(&logger->list[i]); } mutex_unlock(&nf_log_mutex); @@ -92,7 +92,7 @@ int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) mutex_unlock(&nf_log_mutex); return -ENOENT; } - rcu_assign_pointer(nf_loggers[pf], logger); + RCU_INIT_POINTER(nf_loggers[pf], logger); mutex_unlock(&nf_log_mutex); return 0; } @@ -103,7 +103,7 @@ void nf_log_unbind_pf(u_int8_t pf) if (pf >= ARRAY_SIZE(nf_loggers)) return; mutex_lock(&nf_log_mutex); - rcu_assign_pointer(nf_loggers[pf], NULL); + RCU_INIT_POINTER(nf_loggers[pf], NULL); mutex_unlock(&nf_log_mutex); } EXPORT_SYMBOL(nf_log_unbind_pf); @@ -250,7 +250,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, mutex_unlock(&nf_log_mutex); return -ENOENT; } - rcu_assign_pointer(nf_loggers[tindex], logger); + RCU_INIT_POINTER(nf_loggers[tindex], logger); mutex_unlock(&nf_log_mutex); } else { mutex_lock(&nf_log_mutex); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 5b466cd..99ffd28 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -40,7 +40,7 @@ int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) else if (old) ret = -EBUSY; else { - rcu_assign_pointer(queue_handler[pf], qh); + RCU_INIT_POINTER(queue_handler[pf], qh); ret = 0; } mutex_unlock(&queue_handler_mutex); @@ -65,7 +65,7 @@ int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) return -EINVAL; } - rcu_assign_pointer(queue_handler[pf], NULL); + RCU_INIT_POINTER(queue_handler[pf], NULL); mutex_unlock(&queue_handler_mutex); synchronize_rcu(); @@ -84,7 +84,7 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh) queue_handler[pf], lockdep_is_held(&queue_handler_mutex) ) == qh) - rcu_assign_pointer(queue_handler[pf], NULL); + RCU_INIT_POINTER(queue_handler[pf], NULL); } mutex_unlock(&queue_handler_mutex); @@ -312,6 +312,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) } break; case NF_STOLEN: + break; default: kfree_skb(skb); } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 1905976..c879c1a 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -59,7 +59,7 @@ int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) nfnl_unlock(); return -EBUSY; } - rcu_assign_pointer(subsys_table[n->subsys_id], n); + RCU_INIT_POINTER(subsys_table[n->subsys_id], n); nfnl_unlock(); return 0; @@ -210,7 +210,7 @@ static int __net_init nfnetlink_net_init(struct net *net) if (!nfnl) return -ENOMEM; net->nfnl_stash = nfnl; - rcu_assign_pointer(net->nfnl, nfnl); + RCU_INIT_POINTER(net->nfnl, nfnl); return 0; } @@ -219,7 +219,7 @@ static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list) struct net *net; list_for_each_entry(net, net_exit_list, exit_list) - rcu_assign_pointer(net->nfnl, NULL); + RCU_INIT_POINTER(net->nfnl, NULL); synchronize_net(); list_for_each_entry(net, net_exit_list, exit_list) netlink_kernel_release(net->nfnl_stash); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 00bd475..a80b0cb 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -646,8 +646,8 @@ verdicthdr_get(const struct nlattr * const nfqa[]) return NULL; vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); - verdict = ntohl(vhdr->verdict); - if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) + verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK; + if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN) return NULL; return vhdr; } diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 76a0831..ed0db15 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -78,7 +78,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_rateest_match_info *info = par->matchinfo; struct xt_rateest *est1, *est2; - int ret = false; + int ret = -EINVAL; if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS | XT_RATEEST_MATCH_REL)) != 1) @@ -101,13 +101,12 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) if (!est1) goto err1; + est2 = NULL; if (info->flags & XT_RATEEST_MATCH_REL) { est2 = xt_rateest_lookup(info->name2); if (!est2) goto err2; - } else - est2 = NULL; - + } info->est1 = est1; info->est2 = est2; @@ -116,7 +115,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) err2: xt_rateest_put(est1); err1: - return -EINVAL; + return ret; } static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par) diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile index ea750e9..d2732fc 100644 --- a/net/netlabel/Makefile +++ b/net/netlabel/Makefile @@ -1,8 +1,6 @@ # # Makefile for the NetLabel subsystem. # -# Feb 9, 2006, Paul Moore <paul.moore@hp.com> -# # base objects obj-y := netlabel_user.o netlabel_kapi.o diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index c051913..96b749d 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h index 2b9644e..fdbc1d2 100644 --- a/net/netlabel/netlabel_addrlist.h +++ b/net/netlabel/netlabel_addrlist.h @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index dd53a36..6bf8783 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index af7f335..d24d774 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 2aa975e5..3f905e5 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ @@ -282,7 +282,7 @@ int __init netlbl_domhsh_init(u32 size) INIT_LIST_HEAD(&hsh_tbl->tbl[iter]); spin_lock(&netlbl_domhsh_lock); - rcu_assign_pointer(netlbl_domhsh, hsh_tbl); + RCU_INIT_POINTER(netlbl_domhsh, hsh_tbl); spin_unlock(&netlbl_domhsh_lock); return 0; @@ -330,7 +330,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, &rcu_dereference(netlbl_domhsh)->tbl[bkt]); } else { INIT_LIST_HEAD(&entry->list); - rcu_assign_pointer(netlbl_domhsh_def, entry); + RCU_INIT_POINTER(netlbl_domhsh_def, entry); } if (entry->type == NETLBL_NLTYPE_ADDRSELECT) { @@ -451,7 +451,7 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, if (entry != rcu_dereference(netlbl_domhsh_def)) list_del_rcu(&entry->list); else - rcu_assign_pointer(netlbl_domhsh_def, NULL); + RCU_INIT_POINTER(netlbl_domhsh_def, NULL); } else ret_val = -ENOENT; spin_unlock(&netlbl_domhsh_lock); diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 0261dda..bfcc0f7 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index b528dd9..9c24de10 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -5,7 +5,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ @@ -341,11 +341,11 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) - return -ENOMEM; + goto out_entry; if (domain != NULL) { entry->domain = kstrdup(domain, GFP_ATOMIC); if (entry->domain == NULL) - goto cfg_cipsov4_map_add_failure; + goto out_domain; } if (addr == NULL && mask == NULL) { @@ -354,13 +354,13 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, } else if (addr != NULL && mask != NULL) { addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC); if (addrmap == NULL) - goto cfg_cipsov4_map_add_failure; + goto out_addrmap; INIT_LIST_HEAD(&addrmap->list4); INIT_LIST_HEAD(&addrmap->list6); addrinfo = kzalloc(sizeof(*addrinfo), GFP_ATOMIC); if (addrinfo == NULL) - goto cfg_cipsov4_map_add_failure; + goto out_addrinfo; addrinfo->type_def.cipsov4 = doi_def; addrinfo->type = NETLBL_NLTYPE_CIPSOV4; addrinfo->list.addr = addr->s_addr & mask->s_addr; @@ -374,7 +374,7 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, entry->type = NETLBL_NLTYPE_ADDRSELECT; } else { ret_val = -EINVAL; - goto cfg_cipsov4_map_add_failure; + goto out_addrmap; } ret_val = netlbl_domhsh_add(entry, audit_info); @@ -384,11 +384,15 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, return 0; cfg_cipsov4_map_add_failure: - cipso_v4_doi_putdef(doi_def); + kfree(addrinfo); +out_addrinfo: + kfree(addrmap); +out_addrmap: kfree(entry->domain); +out_domain: kfree(entry); - kfree(addrmap); - kfree(addrinfo); +out_entry: + cipso_v4_doi_putdef(doi_def); return ret_val; } diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index dff8a08..bfa5558 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index 8db37f4..5a9f31c 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index f1ecf84..e251c2c 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -5,7 +5,7 @@ * NetLabel system. The NetLabel system manages static and dynamic label * mappings for network protocols such as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ @@ -354,7 +354,7 @@ static struct netlbl_unlhsh_iface *netlbl_unlhsh_add_iface(int ifindex) INIT_LIST_HEAD(&iface->list); if (netlbl_unlhsh_rcu_deref(netlbl_unlhsh_def) != NULL) goto add_iface_failure; - rcu_assign_pointer(netlbl_unlhsh_def, iface); + RCU_INIT_POINTER(netlbl_unlhsh_def, iface); } spin_unlock(&netlbl_unlhsh_lock); @@ -621,7 +621,7 @@ static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface) if (iface->ifindex > 0) list_del_rcu(&iface->list); else - rcu_assign_pointer(netlbl_unlhsh_def, NULL); + RCU_INIT_POINTER(netlbl_unlhsh_def, NULL); spin_unlock(&netlbl_unlhsh_lock); call_rcu(&iface->rcu, netlbl_unlhsh_free_iface); @@ -1449,7 +1449,7 @@ int __init netlbl_unlabel_init(u32 size) rcu_read_lock(); spin_lock(&netlbl_unlhsh_lock); - rcu_assign_pointer(netlbl_unlhsh, hsh_tbl); + RCU_INIT_POINTER(netlbl_unlhsh, hsh_tbl); spin_unlock(&netlbl_unlhsh_lock); rcu_read_unlock(); diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h index 0bc8dc3..700af49 100644 --- a/net/netlabel/netlabel_unlabeled.h +++ b/net/netlabel/netlabel_unlabeled.h @@ -5,7 +5,7 @@ * NetLabel system. The NetLabel system manages static and dynamic label * mappings for network protocols such as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index a3fd75a..9fae63f 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index f4fc4c9..8196978 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore <paul.moore@hp.com> + * Author: Paul Moore <paul@paul-moore.com> * */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0a4db02..1201b6d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1324,10 +1324,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; - if (NULL == siocb->scm) { + if (NULL == siocb->scm) siocb->scm = &scm; - memset(&scm, 0, sizeof(scm)); - } + err = scm_send(sock, msg, siocb->scm); if (err < 0) return err; @@ -1578,7 +1577,7 @@ int __netlink_change_ngroups(struct sock *sk, unsigned int groups) new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC); if (!new) return -ENOMEM; - old = rcu_dereference_raw(tbl->listeners); + old = rcu_dereference_protected(tbl->listeners, 1); memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups)); rcu_assign_pointer(tbl->listeners, new); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c698cec..7b5f032 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -40,6 +40,10 @@ * byte arrays at the end of sockaddr_ll * and packet_mreq. * Johann Baudy : Added TX RING. + * Chetan Loke : Implemented TPACKET_V3 block abstraction + * layer. + * Copyright (C) 2011, <lokec@ccs.neu.edu> + * * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -161,9 +165,56 @@ struct packet_mreq_max { unsigned char mr_address[MAX_ADDR_LEN]; }; -static int packet_set_ring(struct sock *sk, struct tpacket_req *req, +static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, int closing, int tx_ring); + +#define V3_ALIGNMENT (8) + +#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT)) + +#define BLK_PLUS_PRIV(sz_of_priv) \ + (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) + +/* kbdq - kernel block descriptor queue */ +struct tpacket_kbdq_core { + struct pgv *pkbdq; + unsigned int feature_req_word; + unsigned int hdrlen; + unsigned char reset_pending_on_curr_blk; + unsigned char delete_blk_timer; + unsigned short kactive_blk_num; + unsigned short blk_sizeof_priv; + + /* last_kactive_blk_num: + * trick to see if user-space has caught up + * in order to avoid refreshing timer when every single pkt arrives. + */ + unsigned short last_kactive_blk_num; + + char *pkblk_start; + char *pkblk_end; + int kblk_size; + unsigned int knum_blocks; + uint64_t knxt_seq_num; + char *prev; + char *nxt_offset; + struct sk_buff *skb; + + atomic_t blk_fill_in_prog; + + /* Default is set to 8ms */ +#define DEFAULT_PRB_RETIRE_TOV (8) + + unsigned short retire_blk_tov; + unsigned short version; + unsigned long tov_in_jiffies; + + /* timer to retire an outstanding block */ + struct timer_list retire_blk_timer; +}; + +#define PGV_FROM_VMALLOC 1 struct pgv { char *buffer; }; @@ -179,12 +230,44 @@ struct packet_ring_buffer { unsigned int pg_vec_pages; unsigned int pg_vec_len; + struct tpacket_kbdq_core prb_bdqc; atomic_t pending; }; +#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) +#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) +#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt) +#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len) +#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num) +#define BLOCK_O2PRIV(x) ((x)->offset_to_priv) +#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x))) + struct packet_sock; static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); +static void *packet_previous_frame(struct packet_sock *po, + struct packet_ring_buffer *rb, + int status); +static void packet_increment_head(struct packet_ring_buffer *buff); +static int prb_curr_blk_in_use(struct tpacket_kbdq_core *, + struct tpacket_block_desc *); +static void *prb_dispatch_next_block(struct tpacket_kbdq_core *, + struct packet_sock *); +static void prb_retire_current_block(struct tpacket_kbdq_core *, + struct packet_sock *, unsigned int status); +static int prb_queue_frozen(struct tpacket_kbdq_core *); +static void prb_open_block(struct tpacket_kbdq_core *, + struct tpacket_block_desc *); +static void prb_retire_rx_blk_timer_expired(unsigned long); +static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *); +static void prb_init_blk_timer(struct packet_sock *, + struct tpacket_kbdq_core *, + void (*func) (unsigned long)); +static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *); +static void prb_clear_rxhash(struct tpacket_kbdq_core *, + struct tpacket3_hdr *); +static void prb_fill_vlan_info(struct tpacket_kbdq_core *, + struct tpacket3_hdr *); static void packet_flush_mclist(struct sock *sk); struct packet_fanout; @@ -193,6 +276,7 @@ struct packet_sock { struct sock sk; struct packet_fanout *fanout; struct tpacket_stats stats; + union tpacket_stats_u stats_u; struct packet_ring_buffer rx_ring; struct packet_ring_buffer tx_ring; int copy_thresh; @@ -242,6 +326,15 @@ struct packet_skb_cb { #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) +#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc)) +#define GET_PBLOCK_DESC(x, bid) \ + ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer)) +#define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \ + ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer)) +#define GET_NEXT_PRB_BLK_NUM(x) \ + (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \ + ((x)->kactive_blk_num+1) : 0) + static inline struct packet_sock *pkt_sk(struct sock *sk) { return (struct packet_sock *)sk; @@ -325,8 +418,9 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) h.h2->tp_status = status; flush_dcache_page(pgv_to_page(&h.h2->tp_status)); break; + case TPACKET_V3: default: - pr_err("TPACKET version not supported\n"); + WARN(1, "TPACKET version not supported.\n"); BUG(); } @@ -351,8 +445,9 @@ static int __packet_get_status(struct packet_sock *po, void *frame) case TPACKET_V2: flush_dcache_page(pgv_to_page(&h.h2->tp_status)); return h.h2->tp_status; + case TPACKET_V3: default: - pr_err("TPACKET version not supported\n"); + WARN(1, "TPACKET version not supported.\n"); BUG(); return 0; } @@ -389,6 +484,670 @@ static inline void *packet_current_frame(struct packet_sock *po, return packet_lookup_frame(po, rb, rb->head, status); } +static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc) +{ + del_timer_sync(&pkc->retire_blk_timer); +} + +static void prb_shutdown_retire_blk_timer(struct packet_sock *po, + int tx_ring, + struct sk_buff_head *rb_queue) +{ + struct tpacket_kbdq_core *pkc; + + pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; + + spin_lock(&rb_queue->lock); + pkc->delete_blk_timer = 1; + spin_unlock(&rb_queue->lock); + + prb_del_retire_blk_timer(pkc); +} + +static void prb_init_blk_timer(struct packet_sock *po, + struct tpacket_kbdq_core *pkc, + void (*func) (unsigned long)) +{ + init_timer(&pkc->retire_blk_timer); + pkc->retire_blk_timer.data = (long)po; + pkc->retire_blk_timer.function = func; + pkc->retire_blk_timer.expires = jiffies; +} + +static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring) +{ + struct tpacket_kbdq_core *pkc; + + if (tx_ring) + BUG(); + + pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; + prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired); +} + +static int prb_calc_retire_blk_tmo(struct packet_sock *po, + int blk_size_in_bytes) +{ + struct net_device *dev; + unsigned int mbits = 0, msec = 0, div = 0, tmo = 0; + struct ethtool_cmd ecmd; + int err; + + rtnl_lock(); + dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex); + if (unlikely(!dev)) { + rtnl_unlock(); + return DEFAULT_PRB_RETIRE_TOV; + } + err = __ethtool_get_settings(dev, &ecmd); + rtnl_unlock(); + if (!err) { + switch (ecmd.speed) { + case SPEED_10000: + msec = 1; + div = 10000/1000; + break; + case SPEED_1000: + msec = 1; + div = 1000/1000; + break; + /* + * If the link speed is so slow you don't really + * need to worry about perf anyways + */ + case SPEED_100: + case SPEED_10: + default: + return DEFAULT_PRB_RETIRE_TOV; + } + } + + mbits = (blk_size_in_bytes * 8) / (1024 * 1024); + + if (div) + mbits /= div; + + tmo = mbits * msec; + + if (div) + return tmo+1; + return tmo; +} + +static void prb_init_ft_ops(struct tpacket_kbdq_core *p1, + union tpacket_req_u *req_u) +{ + p1->feature_req_word = req_u->req3.tp_feature_req_word; +} + +static void init_prb_bdqc(struct packet_sock *po, + struct packet_ring_buffer *rb, + struct pgv *pg_vec, + union tpacket_req_u *req_u, int tx_ring) +{ + struct tpacket_kbdq_core *p1 = &rb->prb_bdqc; + struct tpacket_block_desc *pbd; + + memset(p1, 0x0, sizeof(*p1)); + + p1->knxt_seq_num = 1; + p1->pkbdq = pg_vec; + pbd = (struct tpacket_block_desc *)pg_vec[0].buffer; + p1->pkblk_start = (char *)pg_vec[0].buffer; + p1->kblk_size = req_u->req3.tp_block_size; + p1->knum_blocks = req_u->req3.tp_block_nr; + p1->hdrlen = po->tp_hdrlen; + p1->version = po->tp_version; + p1->last_kactive_blk_num = 0; + po->stats_u.stats3.tp_freeze_q_cnt = 0; + if (req_u->req3.tp_retire_blk_tov) + p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov; + else + p1->retire_blk_tov = prb_calc_retire_blk_tmo(po, + req_u->req3.tp_block_size); + p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); + p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; + + prb_init_ft_ops(p1, req_u); + prb_setup_retire_blk_timer(po, tx_ring); + prb_open_block(p1, pbd); +} + +/* Do NOT update the last_blk_num first. + * Assumes sk_buff_head lock is held. + */ +static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc) +{ + mod_timer(&pkc->retire_blk_timer, + jiffies + pkc->tov_in_jiffies); + pkc->last_kactive_blk_num = pkc->kactive_blk_num; +} + +/* + * Timer logic: + * 1) We refresh the timer only when we open a block. + * By doing this we don't waste cycles refreshing the timer + * on packet-by-packet basis. + * + * With a 1MB block-size, on a 1Gbps line, it will take + * i) ~8 ms to fill a block + ii) memcpy etc. + * In this cut we are not accounting for the memcpy time. + * + * So, if the user sets the 'tmo' to 10ms then the timer + * will never fire while the block is still getting filled + * (which is what we want). However, the user could choose + * to close a block early and that's fine. + * + * But when the timer does fire, we check whether or not to refresh it. + * Since the tmo granularity is in msecs, it is not too expensive + * to refresh the timer, lets say every '8' msecs. + * Either the user can set the 'tmo' or we can derive it based on + * a) line-speed and b) block-size. + * prb_calc_retire_blk_tmo() calculates the tmo. + * + */ +static void prb_retire_rx_blk_timer_expired(unsigned long data) +{ + struct packet_sock *po = (struct packet_sock *)data; + struct tpacket_kbdq_core *pkc = &po->rx_ring.prb_bdqc; + unsigned int frozen; + struct tpacket_block_desc *pbd; + + spin_lock(&po->sk.sk_receive_queue.lock); + + frozen = prb_queue_frozen(pkc); + pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); + + if (unlikely(pkc->delete_blk_timer)) + goto out; + + /* We only need to plug the race when the block is partially filled. + * tpacket_rcv: + * lock(); increment BLOCK_NUM_PKTS; unlock() + * copy_bits() is in progress ... + * timer fires on other cpu: + * we can't retire the current block because copy_bits + * is in progress. + * + */ + if (BLOCK_NUM_PKTS(pbd)) { + while (atomic_read(&pkc->blk_fill_in_prog)) { + /* Waiting for skb_copy_bits to finish... */ + cpu_relax(); + } + } + + if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { + if (!frozen) { + prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO); + if (!prb_dispatch_next_block(pkc, po)) + goto refresh_timer; + else + goto out; + } else { + /* Case 1. Queue was frozen because user-space was + * lagging behind. + */ + if (prb_curr_blk_in_use(pkc, pbd)) { + /* + * Ok, user-space is still behind. + * So just refresh the timer. + */ + goto refresh_timer; + } else { + /* Case 2. queue was frozen,user-space caught up, + * now the link went idle && the timer fired. + * We don't have a block to close.So we open this + * block and restart the timer. + * opening a block thaws the queue,restarts timer + * Thawing/timer-refresh is a side effect. + */ + prb_open_block(pkc, pbd); + goto out; + } + } + } + +refresh_timer: + _prb_refresh_rx_retire_blk_timer(pkc); + +out: + spin_unlock(&po->sk.sk_receive_queue.lock); +} + +static inline void prb_flush_block(struct tpacket_kbdq_core *pkc1, + struct tpacket_block_desc *pbd1, __u32 status) +{ + /* Flush everything minus the block header */ + +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 + u8 *start, *end; + + start = (u8 *)pbd1; + + /* Skip the block header(we know header WILL fit in 4K) */ + start += PAGE_SIZE; + + end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end); + for (; start < end; start += PAGE_SIZE) + flush_dcache_page(pgv_to_page(start)); + + smp_wmb(); +#endif + + /* Now update the block status. */ + + BLOCK_STATUS(pbd1) = status; + + /* Flush the block header */ + +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 + start = (u8 *)pbd1; + flush_dcache_page(pgv_to_page(start)); + + smp_wmb(); +#endif +} + +/* + * Side effect: + * + * 1) flush the block + * 2) Increment active_blk_num + * + * Note:We DONT refresh the timer on purpose. + * Because almost always the next block will be opened. + */ +static void prb_close_block(struct tpacket_kbdq_core *pkc1, + struct tpacket_block_desc *pbd1, + struct packet_sock *po, unsigned int stat) +{ + __u32 status = TP_STATUS_USER | stat; + + struct tpacket3_hdr *last_pkt; + struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; + + if (po->stats.tp_drops) + status |= TP_STATUS_LOSING; + + last_pkt = (struct tpacket3_hdr *)pkc1->prev; + last_pkt->tp_next_offset = 0; + + /* Get the ts of the last pkt */ + if (BLOCK_NUM_PKTS(pbd1)) { + h1->ts_last_pkt.ts_sec = last_pkt->tp_sec; + h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec; + } else { + /* Ok, we tmo'd - so get the current time */ + struct timespec ts; + getnstimeofday(&ts); + h1->ts_last_pkt.ts_sec = ts.tv_sec; + h1->ts_last_pkt.ts_nsec = ts.tv_nsec; + } + + smp_wmb(); + + /* Flush the block */ + prb_flush_block(pkc1, pbd1, status); + + pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1); +} + +static inline void prb_thaw_queue(struct tpacket_kbdq_core *pkc) +{ + pkc->reset_pending_on_curr_blk = 0; +} + +/* + * Side effect of opening a block: + * + * 1) prb_queue is thawed. + * 2) retire_blk_timer is refreshed. + * + */ +static void prb_open_block(struct tpacket_kbdq_core *pkc1, + struct tpacket_block_desc *pbd1) +{ + struct timespec ts; + struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; + + smp_rmb(); + + if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) { + + /* We could have just memset this but we will lose the + * flexibility of making the priv area sticky + */ + BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; + BLOCK_NUM_PKTS(pbd1) = 0; + BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); + getnstimeofday(&ts); + h1->ts_first_pkt.ts_sec = ts.tv_sec; + h1->ts_first_pkt.ts_nsec = ts.tv_nsec; + pkc1->pkblk_start = (char *)pbd1; + pkc1->nxt_offset = (char *)(pkc1->pkblk_start + + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv)); + BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); + BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; + pbd1->version = pkc1->version; + pkc1->prev = pkc1->nxt_offset; + pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; + prb_thaw_queue(pkc1); + _prb_refresh_rx_retire_blk_timer(pkc1); + + smp_wmb(); + + return; + } + + WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n", + pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num); + dump_stack(); + BUG(); +} + +/* + * Queue freeze logic: + * 1) Assume tp_block_nr = 8 blocks. + * 2) At time 't0', user opens Rx ring. + * 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7 + * 4) user-space is either sleeping or processing block '0'. + * 5) tpacket_rcv is currently filling block '7', since there is no space left, + * it will close block-7,loop around and try to fill block '0'. + * call-flow: + * __packet_lookup_frame_in_block + * prb_retire_current_block() + * prb_dispatch_next_block() + * |->(BLOCK_STATUS == USER) evaluates to true + * 5.1) Since block-0 is currently in-use, we just freeze the queue. + * 6) Now there are two cases: + * 6.1) Link goes idle right after the queue is frozen. + * But remember, the last open_block() refreshed the timer. + * When this timer expires,it will refresh itself so that we can + * re-open block-0 in near future. + * 6.2) Link is busy and keeps on receiving packets. This is a simple + * case and __packet_lookup_frame_in_block will check if block-0 + * is free and can now be re-used. + */ +static inline void prb_freeze_queue(struct tpacket_kbdq_core *pkc, + struct packet_sock *po) +{ + pkc->reset_pending_on_curr_blk = 1; + po->stats_u.stats3.tp_freeze_q_cnt++; +} + +#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT)) + +/* + * If the next block is free then we will dispatch it + * and return a good offset. + * Else, we will freeze the queue. + * So, caller must check the return value. + */ +static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc, + struct packet_sock *po) +{ + struct tpacket_block_desc *pbd; + + smp_rmb(); + + /* 1. Get current block num */ + pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); + + /* 2. If this block is currently in_use then freeze the queue */ + if (TP_STATUS_USER & BLOCK_STATUS(pbd)) { + prb_freeze_queue(pkc, po); + return NULL; + } + + /* + * 3. + * open this block and return the offset where the first packet + * needs to get stored. + */ + prb_open_block(pkc, pbd); + return (void *)pkc->nxt_offset; +} + +static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, + struct packet_sock *po, unsigned int status) +{ + struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); + + /* retire/close the current block */ + if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) { + /* + * Plug the case where copy_bits() is in progress on + * cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't + * have space to copy the pkt in the current block and + * called prb_retire_current_block() + * + * We don't need to worry about the TMO case because + * the timer-handler already handled this case. + */ + if (!(status & TP_STATUS_BLK_TMO)) { + while (atomic_read(&pkc->blk_fill_in_prog)) { + /* Waiting for skb_copy_bits to finish... */ + cpu_relax(); + } + } + prb_close_block(pkc, pbd, po, status); + return; + } + + WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd); + dump_stack(); + BUG(); +} + +static inline int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc, + struct tpacket_block_desc *pbd) +{ + return TP_STATUS_USER & BLOCK_STATUS(pbd); +} + +static inline int prb_queue_frozen(struct tpacket_kbdq_core *pkc) +{ + return pkc->reset_pending_on_curr_blk; +} + +static inline void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) +{ + struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); + atomic_dec(&pkc->blk_fill_in_prog); +} + +static inline void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, + struct tpacket3_hdr *ppd) +{ + ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb); +} + +static inline void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, + struct tpacket3_hdr *ppd) +{ + ppd->hv1.tp_rxhash = 0; +} + +static inline void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, + struct tpacket3_hdr *ppd) +{ + if (vlan_tx_tag_present(pkc->skb)) { + ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb); + ppd->tp_status = TP_STATUS_VLAN_VALID; + } else { + ppd->hv1.tp_vlan_tci = ppd->tp_status = 0; + } +} + +static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc, + struct tpacket3_hdr *ppd) +{ + prb_fill_vlan_info(pkc, ppd); + + if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH) + prb_fill_rxhash(pkc, ppd); + else + prb_clear_rxhash(pkc, ppd); +} + +static inline void prb_fill_curr_block(char *curr, + struct tpacket_kbdq_core *pkc, + struct tpacket_block_desc *pbd, + unsigned int len) +{ + struct tpacket3_hdr *ppd; + + ppd = (struct tpacket3_hdr *)curr; + ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len); + pkc->prev = curr; + pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len); + BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len); + BLOCK_NUM_PKTS(pbd) += 1; + atomic_inc(&pkc->blk_fill_in_prog); + prb_run_all_ft_ops(pkc, ppd); +} + +/* Assumes caller has the sk->rx_queue.lock */ +static void *__packet_lookup_frame_in_block(struct packet_sock *po, + struct sk_buff *skb, + int status, + unsigned int len + ) +{ + struct tpacket_kbdq_core *pkc; + struct tpacket_block_desc *pbd; + char *curr, *end; + + pkc = GET_PBDQC_FROM_RB(((struct packet_ring_buffer *)&po->rx_ring)); + pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); + + /* Queue is frozen when user space is lagging behind */ + if (prb_queue_frozen(pkc)) { + /* + * Check if that last block which caused the queue to freeze, + * is still in_use by user-space. + */ + if (prb_curr_blk_in_use(pkc, pbd)) { + /* Can't record this packet */ + return NULL; + } else { + /* + * Ok, the block was released by user-space. + * Now let's open that block. + * opening a block also thaws the queue. + * Thawing is a side effect. + */ + prb_open_block(pkc, pbd); + } + } + + smp_mb(); + curr = pkc->nxt_offset; + pkc->skb = skb; + end = (char *) ((char *)pbd + pkc->kblk_size); + + /* first try the current block */ + if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) { + prb_fill_curr_block(curr, pkc, pbd, len); + return (void *)curr; + } + + /* Ok, close the current block */ + prb_retire_current_block(pkc, po, 0); + + /* Now, try to dispatch the next block */ + curr = (char *)prb_dispatch_next_block(pkc, po); + if (curr) { + pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); + prb_fill_curr_block(curr, pkc, pbd, len); + return (void *)curr; + } + + /* + * No free blocks are available.user_space hasn't caught up yet. + * Queue was just frozen and now this packet will get dropped. + */ + return NULL; +} + +static inline void *packet_current_rx_frame(struct packet_sock *po, + struct sk_buff *skb, + int status, unsigned int len) +{ + char *curr = NULL; + switch (po->tp_version) { + case TPACKET_V1: + case TPACKET_V2: + curr = packet_lookup_frame(po, &po->rx_ring, + po->rx_ring.head, status); + return curr; + case TPACKET_V3: + return __packet_lookup_frame_in_block(po, skb, status, len); + default: + WARN(1, "TPACKET version not supported\n"); + BUG(); + return 0; + } +} + +static inline void *prb_lookup_block(struct packet_sock *po, + struct packet_ring_buffer *rb, + unsigned int previous, + int status) +{ + struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); + struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous); + + if (status != BLOCK_STATUS(pbd)) + return NULL; + return pbd; +} + +static inline int prb_previous_blk_num(struct packet_ring_buffer *rb) +{ + unsigned int prev; + if (rb->prb_bdqc.kactive_blk_num) + prev = rb->prb_bdqc.kactive_blk_num-1; + else + prev = rb->prb_bdqc.knum_blocks-1; + return prev; +} + +/* Assumes caller has held the rx_queue.lock */ +static inline void *__prb_previous_block(struct packet_sock *po, + struct packet_ring_buffer *rb, + int status) +{ + unsigned int previous = prb_previous_blk_num(rb); + return prb_lookup_block(po, rb, previous, status); +} + +static inline void *packet_previous_rx_frame(struct packet_sock *po, + struct packet_ring_buffer *rb, + int status) +{ + if (po->tp_version <= TPACKET_V2) + return packet_previous_frame(po, rb, status); + + return __prb_previous_block(po, rb, status); +} + +static inline void packet_increment_rx_head(struct packet_sock *po, + struct packet_ring_buffer *rb) +{ + switch (po->tp_version) { + case TPACKET_V1: + case TPACKET_V2: + return packet_increment_head(rb); + case TPACKET_V3: + default: + WARN(1, "TPACKET version not supported.\n"); + BUG(); + return; + } +} + static inline void *packet_previous_frame(struct packet_sock *po, struct packet_ring_buffer *rb, int status) @@ -961,7 +1720,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, return 0; drop_n_acct: - po->stats.tp_drops = atomic_inc_return(&sk->sk_drops); + spin_lock(&sk->sk_receive_queue.lock); + po->stats.tp_drops++; + atomic_inc(&sk->sk_drops); + spin_unlock(&sk->sk_receive_queue.lock); drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { @@ -982,12 +1744,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, union { struct tpacket_hdr *h1; struct tpacket2_hdr *h2; + struct tpacket3_hdr *h3; void *raw; } h; u8 *skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; - unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER; + unsigned long status = TP_STATUS_USER; unsigned short macoff, netoff, hdrlen; struct sk_buff *copy_skb = NULL; struct timeval tv; @@ -1033,37 +1796,46 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, po->tp_reserve; macoff = netoff - maclen; } - - if (macoff + snaplen > po->rx_ring.frame_size) { - if (po->copy_thresh && - atomic_read(&sk->sk_rmem_alloc) + skb->truesize < - (unsigned)sk->sk_rcvbuf) { - if (skb_shared(skb)) { - copy_skb = skb_clone(skb, GFP_ATOMIC); - } else { - copy_skb = skb_get(skb); - skb_head = skb->data; + if (po->tp_version <= TPACKET_V2) { + if (macoff + snaplen > po->rx_ring.frame_size) { + if (po->copy_thresh && + atomic_read(&sk->sk_rmem_alloc) + skb->truesize + < (unsigned)sk->sk_rcvbuf) { + if (skb_shared(skb)) { + copy_skb = skb_clone(skb, GFP_ATOMIC); + } else { + copy_skb = skb_get(skb); + skb_head = skb->data; + } + if (copy_skb) + skb_set_owner_r(copy_skb, sk); } - if (copy_skb) - skb_set_owner_r(copy_skb, sk); + snaplen = po->rx_ring.frame_size - macoff; + if ((int)snaplen < 0) + snaplen = 0; } - snaplen = po->rx_ring.frame_size - macoff; - if ((int)snaplen < 0) - snaplen = 0; } - spin_lock(&sk->sk_receive_queue.lock); - h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL); + h.raw = packet_current_rx_frame(po, skb, + TP_STATUS_KERNEL, (macoff+snaplen)); if (!h.raw) goto ring_is_full; - packet_increment_head(&po->rx_ring); + if (po->tp_version <= TPACKET_V2) { + packet_increment_rx_head(po, &po->rx_ring); + /* + * LOSING will be reported till you read the stats, + * because it's COR - Clear On Read. + * Anyways, moving it for V1/V2 only as V3 doesn't need this + * at packet level. + */ + if (po->stats.tp_drops) + status |= TP_STATUS_LOSING; + } po->stats.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; __skb_queue_tail(&sk->sk_receive_queue, copy_skb); } - if (!po->stats.tp_drops) - status &= ~TP_STATUS_LOSING; spin_unlock(&sk->sk_receive_queue.lock); skb_copy_bits(skb, 0, h.raw + macoff, snaplen); @@ -1114,6 +1886,29 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h2->tp_padding = 0; hdrlen = sizeof(*h.h2); break; + case TPACKET_V3: + /* tp_nxt_offset,vlan are already populated above. + * So DONT clear those fields here + */ + h.h3->tp_status |= status; + h.h3->tp_len = skb->len; + h.h3->tp_snaplen = snaplen; + h.h3->tp_mac = macoff; + h.h3->tp_net = netoff; + if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) + && shhwtstamps->syststamp.tv64) + ts = ktime_to_timespec(shhwtstamps->syststamp); + else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) + && shhwtstamps->hwtstamp.tv64) + ts = ktime_to_timespec(shhwtstamps->hwtstamp); + else if (skb->tstamp.tv64) + ts = ktime_to_timespec(skb->tstamp); + else + getnstimeofday(&ts); + h.h3->tp_sec = ts.tv_sec; + h.h3->tp_nsec = ts.tv_nsec; + hdrlen = sizeof(*h.h3); + break; default: BUG(); } @@ -1134,13 +1929,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, { u8 *start, *end; - end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen); - for (start = h.raw; start < end; start += PAGE_SIZE) - flush_dcache_page(pgv_to_page(start)); + if (po->tp_version <= TPACKET_V2) { + end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + + macoff + snaplen); + for (start = h.raw; start < end; start += PAGE_SIZE) + flush_dcache_page(pgv_to_page(start)); + } smp_wmb(); } #endif - __packet_set_status(po, h.raw, status); + if (po->tp_version <= TPACKET_V2) + __packet_set_status(po, h.raw, status); + else + prb_clear_blk_fill_status(&po->rx_ring); sk->sk_data_ready(sk, 0); @@ -1167,8 +1968,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb) struct packet_sock *po = pkt_sk(skb->sk); void *ph; - BUG_ON(skb == NULL); - if (likely(po->tx_ring.pg_vec)) { ph = skb_shinfo(skb)->destructor_arg; BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); @@ -1631,7 +2430,7 @@ static int packet_release(struct socket *sock) struct sock *sk = sock->sk; struct packet_sock *po; struct net *net; - struct tpacket_req req; + union tpacket_req_u req_u; if (!sk) return 0; @@ -1654,13 +2453,13 @@ static int packet_release(struct socket *sock) packet_flush_mclist(sk); - memset(&req, 0, sizeof(req)); + memset(&req_u, 0, sizeof(req_u)); if (po->rx_ring.pg_vec) - packet_set_ring(sk, &req, 1, 0); + packet_set_ring(sk, &req_u, 1, 0); if (po->tx_ring.pg_vec) - packet_set_ring(sk, &req, 1, 1); + packet_set_ring(sk, &req_u, 1, 1); fanout_release(sk); @@ -2280,15 +3079,27 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv case PACKET_RX_RING: case PACKET_TX_RING: { - struct tpacket_req req; + union tpacket_req_u req_u; + int len; - if (optlen < sizeof(req)) + switch (po->tp_version) { + case TPACKET_V1: + case TPACKET_V2: + len = sizeof(req_u.req); + break; + case TPACKET_V3: + default: + len = sizeof(req_u.req3); + break; + } + if (optlen < len) return -EINVAL; if (pkt_sk(sk)->has_vnet_hdr) return -EINVAL; - if (copy_from_user(&req, optval, sizeof(req))) + if (copy_from_user(&req_u.req, optval, len)) return -EFAULT; - return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING); + return packet_set_ring(sk, &req_u, 0, + optname == PACKET_TX_RING); } case PACKET_COPY_THRESH: { @@ -2315,6 +3126,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv switch (val) { case TPACKET_V1: case TPACKET_V2: + case TPACKET_V3: po->tp_version = val; return 0; default: @@ -2424,6 +3236,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, struct packet_sock *po = pkt_sk(sk); void *data; struct tpacket_stats st; + union tpacket_stats_u st_u; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -2436,15 +3249,27 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, switch (optname) { case PACKET_STATISTICS: - if (len > sizeof(struct tpacket_stats)) - len = sizeof(struct tpacket_stats); + if (po->tp_version == TPACKET_V3) { + len = sizeof(struct tpacket_stats_v3); + } else { + if (len > sizeof(struct tpacket_stats)) + len = sizeof(struct tpacket_stats); + } spin_lock_bh(&sk->sk_receive_queue.lock); - st = po->stats; + if (po->tp_version == TPACKET_V3) { + memcpy(&st_u.stats3, &po->stats, + sizeof(struct tpacket_stats)); + st_u.stats3.tp_freeze_q_cnt = + po->stats_u.stats3.tp_freeze_q_cnt; + st_u.stats3.tp_packets += po->stats.tp_drops; + data = &st_u.stats3; + } else { + st = po->stats; + st.tp_packets += st.tp_drops; + data = &st; + } memset(&po->stats, 0, sizeof(st)); spin_unlock_bh(&sk->sk_receive_queue.lock); - st.tp_packets += st.tp_drops; - - data = &st; break; case PACKET_AUXDATA: if (len > sizeof(int)) @@ -2485,6 +3310,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, case TPACKET_V2: val = sizeof(struct tpacket2_hdr); break; + case TPACKET_V3: + val = sizeof(struct tpacket3_hdr); + break; default: return -EINVAL; } @@ -2641,7 +3469,8 @@ static unsigned int packet_poll(struct file *file, struct socket *sock, spin_lock_bh(&sk->sk_receive_queue.lock); if (po->rx_ring.pg_vec) { - if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL)) + if (!packet_previous_rx_frame(po, &po->rx_ring, + TP_STATUS_KERNEL)) mask |= POLLIN | POLLRDNORM; } spin_unlock_bh(&sk->sk_receive_queue.lock); @@ -2760,7 +3589,7 @@ out_free_pgvec: goto out; } -static int packet_set_ring(struct sock *sk, struct tpacket_req *req, +static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, int closing, int tx_ring) { struct pgv *pg_vec = NULL; @@ -2769,7 +3598,15 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, struct packet_ring_buffer *rb; struct sk_buff_head *rb_queue; __be16 num; - int err; + int err = -EINVAL; + /* Added to avoid minimal code churn */ + struct tpacket_req *req = &req_u->req; + + /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ + if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { + WARN(1, "Tx-ring is not supported.\n"); + goto out; + } rb = tx_ring ? &po->tx_ring : &po->rx_ring; rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; @@ -2795,6 +3632,9 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, case TPACKET_V2: po->tp_hdrlen = TPACKET2_HDRLEN; break; + case TPACKET_V3: + po->tp_hdrlen = TPACKET3_HDRLEN; + break; } err = -EINVAL; @@ -2820,6 +3660,17 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, pg_vec = alloc_pg_vec(req, order); if (unlikely(!pg_vec)) goto out; + switch (po->tp_version) { + case TPACKET_V3: + /* Transmit path is not supported. We checked + * it above but just being paranoid + */ + if (!tx_ring) + init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring); + break; + default: + break; + } } /* Done */ else { @@ -2872,7 +3723,11 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, register_prot_hook(sk); } spin_unlock(&po->bind_lock); - + if (closing && (po->tp_version > TPACKET_V2)) { + /* Because we don't support block-based V3 on tx-ring */ + if (!tx_ring) + prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue); + } release_sock(sk); if (pg_vec) diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index c6fffd9..bf10ea8 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -480,7 +480,7 @@ int __init_or_module phonet_proto_register(unsigned int protocol, if (proto_tab[protocol]) err = -EBUSY; else - rcu_assign_pointer(proto_tab[protocol], pp); + RCU_INIT_POINTER(proto_tab[protocol], pp); mutex_unlock(&proto_tab_lock); return err; @@ -491,7 +491,7 @@ void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp) { mutex_lock(&proto_tab_lock); BUG_ON(proto_tab[protocol] != pp); - rcu_assign_pointer(proto_tab[protocol], NULL); + RCU_INIT_POINTER(proto_tab[protocol], NULL); mutex_unlock(&proto_tab_lock); synchronize_rcu(); proto_unregister(pp->prot); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index d2df8f3..c582761 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -276,7 +276,7 @@ static void phonet_route_autodel(struct net_device *dev) mutex_lock(&pnn->routes.lock); for (i = 0; i < 64; i++) if (dev == pnn->routes.table[i]) { - rcu_assign_pointer(pnn->routes.table[i], NULL); + RCU_INIT_POINTER(pnn->routes.table[i], NULL); set_bit(i, deleted); } mutex_unlock(&pnn->routes.lock); @@ -390,7 +390,7 @@ int phonet_route_add(struct net_device *dev, u8 daddr) daddr = daddr >> 2; mutex_lock(&routes->lock); if (routes->table[daddr] == NULL) { - rcu_assign_pointer(routes->table[daddr], dev); + RCU_INIT_POINTER(routes->table[daddr], dev); dev_hold(dev); err = 0; } @@ -406,7 +406,7 @@ int phonet_route_del(struct net_device *dev, u8 daddr) daddr = daddr >> 2; mutex_lock(&routes->lock); if (dev == routes->table[daddr]) - rcu_assign_pointer(routes->table[daddr], NULL); + RCU_INIT_POINTER(routes->table[daddr], NULL); else dev = NULL; mutex_unlock(&routes->lock); diff --git a/net/phonet/socket.c b/net/phonet/socket.c index ab07711..676d18d 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -679,7 +679,7 @@ int pn_sock_bind_res(struct sock *sk, u8 res) mutex_lock(&resource_mutex); if (pnres.sk[res] == NULL) { sock_hold(sk); - rcu_assign_pointer(pnres.sk[res], sk); + RCU_INIT_POINTER(pnres.sk[res], sk); ret = 0; } mutex_unlock(&resource_mutex); @@ -695,7 +695,7 @@ int pn_sock_unbind_res(struct sock *sk, u8 res) mutex_lock(&resource_mutex); if (pnres.sk[res] == sk) { - rcu_assign_pointer(pnres.sk[res], NULL); + RCU_INIT_POINTER(pnres.sk[res], NULL); ret = 0; } mutex_unlock(&resource_mutex); @@ -714,7 +714,7 @@ void pn_sock_unbind_all_res(struct sock *sk) mutex_lock(&resource_mutex); for (res = 0; res < 256; res++) { if (pnres.sk[res] == sk) { - rcu_assign_pointer(pnres.sk[res], NULL); + RCU_INIT_POINTER(pnres.sk[res], NULL); match++; } } diff --git a/net/rds/Kconfig b/net/rds/Kconfig index ec753b3..4cf6dc7 100644 --- a/net/rds/Kconfig +++ b/net/rds/Kconfig @@ -9,6 +9,7 @@ config RDS config RDS_RDMA tristate "RDS over Infiniband and iWARP" + select LLIST depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS ---help--- Allow RDS to use Infiniband and iWARP as a transport. diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 819c35a..e8fdb17 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -33,10 +33,10 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/rculist.h> +#include <linux/llist.h> #include "rds.h" #include "ib.h" -#include "xlist.h" static DEFINE_PER_CPU(unsigned long, clean_list_grace); #define CLEAN_LIST_BUSY_BIT 0 @@ -49,7 +49,7 @@ struct rds_ib_mr { struct rds_ib_mr_pool *pool; struct ib_fmr *fmr; - struct xlist_head xlist; + struct llist_node llnode; /* unmap_list is for freeing */ struct list_head unmap_list; @@ -71,9 +71,9 @@ struct rds_ib_mr_pool { atomic_t item_count; /* total # of MRs */ atomic_t dirty_count; /* # dirty of MRs */ - struct xlist_head drop_list; /* MRs that have reached their max_maps limit */ - struct xlist_head free_list; /* unused MRs */ - struct xlist_head clean_list; /* global unused & unamapped MRs */ + struct llist_head drop_list; /* MRs that have reached their max_maps limit */ + struct llist_head free_list; /* unused MRs */ + struct llist_head clean_list; /* global unused & unamapped MRs */ wait_queue_head_t flush_wait; atomic_t free_pinned; /* memory pinned by free MRs */ @@ -220,9 +220,9 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev) if (!pool) return ERR_PTR(-ENOMEM); - INIT_XLIST_HEAD(&pool->free_list); - INIT_XLIST_HEAD(&pool->drop_list); - INIT_XLIST_HEAD(&pool->clean_list); + init_llist_head(&pool->free_list); + init_llist_head(&pool->drop_list); + init_llist_head(&pool->clean_list); mutex_init(&pool->flush_lock); init_waitqueue_head(&pool->flush_wait); INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); @@ -260,26 +260,18 @@ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) kfree(pool); } -static void refill_local(struct rds_ib_mr_pool *pool, struct xlist_head *xl, - struct rds_ib_mr **ibmr_ret) -{ - struct xlist_head *ibmr_xl; - ibmr_xl = xlist_del_head_fast(xl); - *ibmr_ret = list_entry(ibmr_xl, struct rds_ib_mr, xlist); -} - static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool) { struct rds_ib_mr *ibmr = NULL; - struct xlist_head *ret; + struct llist_node *ret; unsigned long *flag; preempt_disable(); flag = &__get_cpu_var(clean_list_grace); set_bit(CLEAN_LIST_BUSY_BIT, flag); - ret = xlist_del_head(&pool->clean_list); + ret = llist_del_first(&pool->clean_list); if (ret) - ibmr = list_entry(ret, struct rds_ib_mr, xlist); + ibmr = llist_entry(ret, struct rds_ib_mr, llnode); clear_bit(CLEAN_LIST_BUSY_BIT, flag); preempt_enable(); @@ -529,46 +521,44 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr } /* - * given an xlist of mrs, put them all into the list_head for more processing + * given an llist of mrs, put them all into the list_head for more processing */ -static void xlist_append_to_list(struct xlist_head *xlist, struct list_head *list) +static void llist_append_to_list(struct llist_head *llist, struct list_head *list) { struct rds_ib_mr *ibmr; - struct xlist_head splice; - struct xlist_head *cur; - struct xlist_head *next; - - splice.next = NULL; - xlist_splice(xlist, &splice); - cur = splice.next; - while (cur) { - next = cur->next; - ibmr = list_entry(cur, struct rds_ib_mr, xlist); + struct llist_node *node; + struct llist_node *next; + + node = llist_del_all(llist); + while (node) { + next = node->next; + ibmr = llist_entry(node, struct rds_ib_mr, llnode); list_add_tail(&ibmr->unmap_list, list); - cur = next; + node = next; } } /* - * this takes a list head of mrs and turns it into an xlist of clusters. - * each cluster has an xlist of MR_CLUSTER_SIZE mrs that are ready for - * reuse. + * this takes a list head of mrs and turns it into linked llist nodes + * of clusters. Each cluster has linked llist nodes of + * MR_CLUSTER_SIZE mrs that are ready for reuse. */ -static void list_append_to_xlist(struct rds_ib_mr_pool *pool, - struct list_head *list, struct xlist_head *xlist, - struct xlist_head **tail_ret) +static void list_to_llist_nodes(struct rds_ib_mr_pool *pool, + struct list_head *list, + struct llist_node **nodes_head, + struct llist_node **nodes_tail) { struct rds_ib_mr *ibmr; - struct xlist_head *cur_mr = xlist; - struct xlist_head *tail_mr = NULL; + struct llist_node *cur = NULL; + struct llist_node **next = nodes_head; list_for_each_entry(ibmr, list, unmap_list) { - tail_mr = &ibmr->xlist; - tail_mr->next = NULL; - cur_mr->next = tail_mr; - cur_mr = tail_mr; + cur = &ibmr->llnode; + *next = cur; + next = &cur->next; } - *tail_ret = tail_mr; + *next = NULL; + *nodes_tail = cur; } /* @@ -581,8 +571,8 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **ibmr_ret) { struct rds_ib_mr *ibmr, *next; - struct xlist_head clean_xlist; - struct xlist_head *clean_tail; + struct llist_node *clean_nodes; + struct llist_node *clean_tail; LIST_HEAD(unmap_list); LIST_HEAD(fmr_list); unsigned long unpinned = 0; @@ -603,7 +593,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, prepare_to_wait(&pool->flush_wait, &wait, TASK_UNINTERRUPTIBLE); - if (xlist_empty(&pool->clean_list)) + if (llist_empty(&pool->clean_list)) schedule(); ibmr = rds_ib_reuse_fmr(pool); @@ -628,10 +618,10 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, /* Get the list of all MRs to be dropped. Ordering matters - * we want to put drop_list ahead of free_list. */ - xlist_append_to_list(&pool->drop_list, &unmap_list); - xlist_append_to_list(&pool->free_list, &unmap_list); + llist_append_to_list(&pool->drop_list, &unmap_list); + llist_append_to_list(&pool->free_list, &unmap_list); if (free_all) - xlist_append_to_list(&pool->clean_list, &unmap_list); + llist_append_to_list(&pool->clean_list, &unmap_list); free_goal = rds_ib_flush_goal(pool, free_all); @@ -663,22 +653,22 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, if (!list_empty(&unmap_list)) { /* we have to make sure that none of the things we're about * to put on the clean list would race with other cpus trying - * to pull items off. The xlist would explode if we managed to + * to pull items off. The llist would explode if we managed to * remove something from the clean list and then add it back again - * while another CPU was spinning on that same item in xlist_del_head. + * while another CPU was spinning on that same item in llist_del_first. * - * This is pretty unlikely, but just in case wait for an xlist grace period + * This is pretty unlikely, but just in case wait for an llist grace period * here before adding anything back into the clean list. */ wait_clean_list_grace(); - list_append_to_xlist(pool, &unmap_list, &clean_xlist, &clean_tail); + list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail); if (ibmr_ret) - refill_local(pool, &clean_xlist, ibmr_ret); + *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode); - /* refill_local may have emptied our list */ - if (!xlist_empty(&clean_xlist)) - xlist_add(clean_xlist.next, clean_tail, &pool->clean_list); + /* more than one entry in llist nodes */ + if (clean_nodes->next) + llist_add_batch(clean_nodes->next, clean_tail, &pool->clean_list); } @@ -711,9 +701,9 @@ void rds_ib_free_mr(void *trans_private, int invalidate) /* Return it to the pool's free list */ if (ibmr->remap_count >= pool->fmr_attr.max_maps) - xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->drop_list); + llist_add(&ibmr->llnode, &pool->drop_list); else - xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->free_list); + llist_add(&ibmr->llnode, &pool->free_list); atomic_add(ibmr->sg_len, &pool->free_pinned); atomic_inc(&pool->dirty_count); diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 8b77edb..4e1de17 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -84,7 +84,8 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, struct list_head *unmap_list, - struct list_head *kill_list); + struct list_head *kill_list, + int *unpinned); static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id) @@ -499,7 +500,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) LIST_HEAD(unmap_list); LIST_HEAD(kill_list); unsigned long flags; - unsigned int nfreed = 0, ncleaned = 0, free_goal; + unsigned int nfreed = 0, ncleaned = 0, unpinned = 0, free_goal; int ret = 0; rds_iw_stats_inc(s_iw_rdma_mr_pool_flush); @@ -524,7 +525,8 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) * will be destroyed by the unmap function. */ if (!list_empty(&unmap_list)) { - ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, &kill_list); + ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, + &kill_list, &unpinned); /* If we've been asked to destroy all MRs, move those * that were simply cleaned to the kill list */ if (free_all) @@ -548,6 +550,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) spin_unlock_irqrestore(&pool->list_lock, flags); } + atomic_sub(unpinned, &pool->free_pinned); atomic_sub(ncleaned, &pool->dirty_count); atomic_sub(nfreed, &pool->item_count); @@ -828,7 +831,8 @@ static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, struct list_head *unmap_list, - struct list_head *kill_list) + struct list_head *kill_list, + int *unpinned) { struct rds_iw_mapping *mapping, *next; unsigned int ncleaned = 0; @@ -855,6 +859,7 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, spin_lock_irqsave(&pool->list_lock, flags); list_for_each_entry_safe(mapping, next, unmap_list, m_list) { + *unpinned += mapping->m_sg.len; list_move(&mapping->m_list, &laundered); ncleaned++; } diff --git a/net/rds/xlist.h b/net/rds/xlist.h deleted file mode 100644 index e6b5190..0000000 --- a/net/rds/xlist.h +++ /dev/null @@ -1,80 +0,0 @@ -#ifndef _LINUX_XLIST_H -#define _LINUX_XLIST_H - -#include <linux/stddef.h> -#include <linux/poison.h> -#include <linux/prefetch.h> -#include <asm/system.h> - -struct xlist_head { - struct xlist_head *next; -}; - -static inline void INIT_XLIST_HEAD(struct xlist_head *list) -{ - list->next = NULL; -} - -static inline int xlist_empty(struct xlist_head *head) -{ - return head->next == NULL; -} - -static inline void xlist_add(struct xlist_head *new, struct xlist_head *tail, - struct xlist_head *head) -{ - struct xlist_head *cur; - struct xlist_head *check; - - while (1) { - cur = head->next; - tail->next = cur; - check = cmpxchg(&head->next, cur, new); - if (check == cur) - break; - } -} - -static inline struct xlist_head *xlist_del_head(struct xlist_head *head) -{ - struct xlist_head *cur; - struct xlist_head *check; - struct xlist_head *next; - - while (1) { - cur = head->next; - if (!cur) - goto out; - - next = cur->next; - check = cmpxchg(&head->next, cur, next); - if (check == cur) - goto out; - } -out: - return cur; -} - -static inline struct xlist_head *xlist_del_head_fast(struct xlist_head *head) -{ - struct xlist_head *cur; - - cur = head->next; - if (!cur) - return NULL; - - head->next = cur->next; - return cur; -} - -static inline void xlist_splice(struct xlist_head *list, - struct xlist_head *head) -{ - struct xlist_head *cur; - - WARN_ON(head->next); - cur = xchg(&list->next, NULL); - head->next = cur; -} - -#endif diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 102fc21..e051398 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -196,8 +196,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; - dev_queue_xmit(skb2); - err = 0; + err = dev_queue_xmit(skb2); out: if (err) { diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index be4505e..b014279 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, struct rsvp_filter *f, **fp; struct rsvp_session *s, **sp; struct tc_rsvp_pinfo *pinfo = NULL; - struct nlattr *opt = tca[TCA_OPTIONS-1]; + struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_RSVP_MAX + 1]; struct tcf_exts e; unsigned int h1, h2; @@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, if (err < 0) return err; - err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map); + err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map); if (err < 0) return err; @@ -449,8 +449,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, if (f->handle != handle && handle) goto errout2; - if (tb[TCA_RSVP_CLASSID-1]) { - f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]); + if (tb[TCA_RSVP_CLASSID]) { + f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); tcf_bind_filter(tp, &f->res, base); } @@ -462,7 +462,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, err = -EINVAL; if (handle) goto errout2; - if (tb[TCA_RSVP_DST-1] == NULL) + if (tb[TCA_RSVP_DST] == NULL) goto errout2; err = -ENOBUFS; @@ -471,19 +471,19 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, goto errout2; h2 = 16; - if (tb[TCA_RSVP_SRC-1]) { - memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src)); + if (tb[TCA_RSVP_SRC]) { + memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src)); h2 = hash_src(f->src); } - if (tb[TCA_RSVP_PINFO-1]) { - pinfo = nla_data(tb[TCA_RSVP_PINFO-1]); + if (tb[TCA_RSVP_PINFO]) { + pinfo = nla_data(tb[TCA_RSVP_PINFO]); f->spi = pinfo->spi; f->tunnelhdr = pinfo->tunnelhdr; } - if (tb[TCA_RSVP_CLASSID-1]) - f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]); + if (tb[TCA_RSVP_CLASSID]) + f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); - dst = nla_data(tb[TCA_RSVP_DST-1]); + dst = nla_data(tb[TCA_RSVP_DST]); h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0); err = -ENOMEM; @@ -642,8 +642,7 @@ nla_put_failure: return -1; } -static struct tcf_proto_ops RSVP_OPS = { - .next = NULL, +static struct tcf_proto_ops RSVP_OPS __read_mostly = { .kind = RSVP_ID, .classify = rsvp_classify, .init = rsvp_init, diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 2a318f2..b5d56a2 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -112,7 +112,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch) for (prio = 0; prio < q->bands; prio++) { struct Qdisc *qdisc = q->queues[prio]; - struct sk_buff *skb = qdisc->dequeue(qdisc); + struct sk_buff *skb = qdisc_dequeue_peeked(qdisc); if (skb) { qdisc_bstats_update(sch, skb); sch->q.qlen--; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 0a833d0..e83c272 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -287,6 +287,12 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) u32 r, slot, salt, sfbhash; int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + if (unlikely(sch->q.qlen >= q->limit)) { + sch->qstats.overlimits++; + q->stats.queuedrop++; + goto drop; + } + if (q->rehash_interval > 0) { unsigned long limit = q->rehash_time + q->rehash_interval; @@ -332,12 +338,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) slot ^= 1; sfb_skb_cb(skb)->hashes[slot] = 0; - if (unlikely(minqlen >= q->max || sch->q.qlen >= q->limit)) { + if (unlikely(minqlen >= q->max)) { sch->qstats.overlimits++; - if (minqlen >= q->max) - q->stats.bucketdrop++; - else - q->stats.queuedrop++; + q->stats.bucketdrop++; goto drop; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 4536ee6..4f5510e 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -410,7 +410,12 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* Return Congestion Notification only if we dropped a packet * from this flow. */ - return (qlen != slot->qlen) ? NET_XMIT_CN : NET_XMIT_SUCCESS; + if (qlen != slot->qlen) + return NET_XMIT_CN; + + /* As we dropped a packet, better let upper stack know this */ + qdisc_tree_decrease_qlen(sch, 1); + return NET_XMIT_SUCCESS; } static struct sk_buff * diff --git a/net/sctp/associola.c b/net/sctp/associola.c index dc16b90..152b5b3 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -282,6 +282,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->peer.asconf_capable = 1; asoc->asconf_addr_del_pending = NULL; asoc->src_out_of_asoc_ok = 0; + asoc->new_transport = NULL; /* Create an input queue. */ sctp_inq_init(&asoc->base.inqueue); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index a6d27bf..14c2b06 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -917,6 +917,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) * current cwnd). */ if (!list_empty(&q->retransmit)) { + if (asoc->peer.retran_path->state == SCTP_UNCONFIRMED) + goto sctp_flush_out; if (transport == asoc->peer.retran_path) goto retran; @@ -989,6 +991,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) ((new_transport->state == SCTP_INACTIVE) || (new_transport->state == SCTP_UNCONFIRMED))) new_transport = asoc->peer.active_path; + if (new_transport->state == SCTP_UNCONFIRMED) + continue; /* Change packets if necessary. */ if (new_transport != transport) { diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 81db4e3..0121e0a 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3015,6 +3015,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, /* Start the heartbeat timer. */ if (!mod_timer(&peer->hb_timer, sctp_transport_timeout(peer))) sctp_transport_hold(peer); + asoc->new_transport = peer; break; case SCTP_PARAM_DEL_IP: /* ADDIP 4.3 D7) If a request is received to delete the diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 167c880..76388b0 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1689,6 +1689,11 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_PURGE_ASCONF_QUEUE: sctp_asconf_queue_teardown(asoc); break; + + case SCTP_CMD_SET_ASOC: + asoc = cmd->obj.asoc; + break; + default: pr_warn("Impossible command: %u, %p\n", cmd->verb, cmd->obj.ptr); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 49b847b..891f5db 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2047,6 +2047,12 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); + /* Restore association pointer to provide SCTP command interpeter + * with a valid context in case it needs to manipulate + * the queues */ + sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, + SCTP_ASOC((struct sctp_association *)asoc)); + return retval; nomem: @@ -3612,6 +3618,11 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, */ asconf_ack->dest = chunk->source; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack)); + if (asoc->new_transport) { + sctp_sf_heartbeat(ep, asoc, type, asoc->new_transport, + commands); + ((struct sctp_association *)asoc)->new_transport = NULL; + } return SCTP_DISPOSITION_CONSUME; } diff --git a/net/socket.c b/net/socket.c index b1cbbcd..2877647 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1871,8 +1871,14 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how) #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) +struct used_address { + struct sockaddr_storage name; + unsigned int name_len; +}; + static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, - struct msghdr *msg_sys, unsigned flags, int nosec) + struct msghdr *msg_sys, unsigned flags, + struct used_address *used_address) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; @@ -1953,8 +1959,30 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, if (sock->file->f_flags & O_NONBLOCK) msg_sys->msg_flags |= MSG_DONTWAIT; - err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys, - total_len); + /* + * If this is sendmmsg() and current destination address is same as + * previously succeeded address, omit asking LSM's decision. + * used_address->name_len is initialized to UINT_MAX so that the first + * destination address never matches. + */ + if (used_address && msg_sys->msg_name && + used_address->name_len == msg_sys->msg_namelen && + !memcmp(&used_address->name, msg_sys->msg_name, + used_address->name_len)) { + err = sock_sendmsg_nosec(sock, msg_sys, total_len); + goto out_freectl; + } + err = sock_sendmsg(sock, msg_sys, total_len); + /* + * If this is sendmmsg() and sending to current destination address was + * successful, remember it. + */ + if (used_address && err >= 0) { + used_address->name_len = msg_sys->msg_namelen; + if (msg_sys->msg_name) + memcpy(&used_address->name, msg_sys->msg_name, + used_address->name_len); + } out_freectl: if (ctl_buf != ctl) @@ -1979,7 +2007,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) if (!sock) goto out; - err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0); + err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL); fput_light(sock->file, fput_needed); out: @@ -1998,6 +2026,10 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, struct mmsghdr __user *entry; struct compat_mmsghdr __user *compat_entry; struct msghdr msg_sys; + struct used_address used_address; + + if (vlen > UIO_MAXIOV) + vlen = UIO_MAXIOV; datagrams = 0; @@ -2005,27 +2037,22 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (!sock) return err; - err = sock_error(sock->sk); - if (err) - goto out_put; - + used_address.name_len = UINT_MAX; entry = mmsg; compat_entry = (struct compat_mmsghdr __user *)mmsg; + err = 0; while (datagrams < vlen) { - /* - * No need to ask LSM for more than the first datagram. - */ if (MSG_CMSG_COMPAT & flags) { err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry, - &msg_sys, flags, datagrams); + &msg_sys, flags, &used_address); if (err < 0) break; err = __put_user(err, &compat_entry->msg_len); ++compat_entry; } else { err = __sys_sendmsg(sock, (struct msghdr __user *)entry, - &msg_sys, flags, datagrams); + &msg_sys, flags, &used_address); if (err < 0) break; err = put_user(err, &entry->msg_len); @@ -2037,29 +2064,11 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, ++datagrams; } -out_put: fput_light(sock->file, fput_needed); - if (err == 0) - return datagrams; - - if (datagrams != 0) { - /* - * We may send less entries than requested (vlen) if the - * sock is non blocking... - */ - if (err != -EAGAIN) { - /* - * ... or if sendmsg returns an error after we - * send some datagrams, where we record the - * error to return on the next call or if the - * app asks about it using getsockopt(SO_ERROR). - */ - sock->sk->sk_err = -err; - } - + /* We only return an error if no datagrams were able to be sent */ + if (datagrams != 0) return datagrams; - } return err; } @@ -2463,7 +2472,7 @@ int sock_register(const struct net_proto_family *ops) lockdep_is_held(&net_family_lock))) err = -EEXIST; else { - rcu_assign_pointer(net_families[ops->family], ops); + RCU_INIT_POINTER(net_families[ops->family], ops); err = 0; } spin_unlock(&net_family_lock); @@ -2491,7 +2500,7 @@ void sock_unregister(int family) BUG_ON(family < 0 || family >= NPROTO); spin_lock(&net_family_lock); - rcu_assign_pointer(net_families[family], NULL); + RCU_INIT_POINTER(net_families[family], NULL); spin_unlock(&net_family_lock); synchronize_rcu(); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 364eb45..d413275 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -122,7 +122,7 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)) return; gss_get_ctx(ctx); - rcu_assign_pointer(gss_cred->gc_ctx, ctx); + RCU_INIT_POINTER(gss_cred->gc_ctx, ctx); set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); smp_mb__before_clear_bit(); clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); @@ -970,7 +970,7 @@ gss_destroy_nullcred(struct rpc_cred *cred) struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cl_ctx *ctx = gss_cred->gc_ctx; - rcu_assign_pointer(gss_cred->gc_ctx, NULL); + RCU_INIT_POINTER(gss_cred->gc_ctx, NULL); call_rcu(&cred->cr_rcu, gss_free_cred_callback); if (ctx) gss_put_ctx(ctx); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9b6a4d1..f4385e4 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -187,6 +187,7 @@ EXPORT_SYMBOL_GPL(xprt_load_transport); /** * xprt_reserve_xprt - serialize write access to transports * @task: task that is requesting access to the transport + * @xprt: pointer to the target transport * * This prevents mixing the payload of separate requests, and prevents * transport connects from colliding with writes. No congestion control diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 759b318..28908f5 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -39,6 +39,7 @@ #include "link.h" #include "port.h" #include "bcast.h" +#include "name_distr.h" #define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ @@ -298,14 +299,9 @@ static void bclink_send_nack(struct tipc_node *n_ptr) msg_set_bcgap_to(msg, n_ptr->bclink.gap_to); msg_set_bcast_tag(msg, tipc_own_tag); - if (tipc_bearer_send(&bcbearer->bearer, buf, NULL)) { - bcl->stats.sent_nacks++; - buf_discard(buf); - } else { - tipc_bearer_schedule(bcl->b_ptr, bcl); - bcl->proto_msg_queue = buf; - bcl->stats.bearer_congs++; - } + tipc_bearer_send(&bcbearer->bearer, buf, NULL); + bcl->stats.sent_nacks++; + buf_discard(buf); /* * Ensure we doesn't send another NACK msg to the node @@ -426,20 +422,28 @@ int tipc_bclink_send_msg(struct sk_buff *buf) void tipc_bclink_recv_pkt(struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); - struct tipc_node *node = tipc_node_find(msg_prevnode(msg)); + struct tipc_node *node; u32 next_in; u32 seqno; struct sk_buff *deferred; - if (unlikely(!node || !tipc_node_is_up(node) || !node->bclink.supported || - (msg_mc_netid(msg) != tipc_net_id))) { - buf_discard(buf); - return; - } + /* Screen out unwanted broadcast messages */ + + if (msg_mc_netid(msg) != tipc_net_id) + goto exit; + + node = tipc_node_find(msg_prevnode(msg)); + if (unlikely(!node)) + goto exit; + + tipc_node_lock(node); + if (unlikely(!node->bclink.supported)) + goto unlock; if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) { + if (msg_type(msg) != STATE_MSG) + goto unlock; if (msg_destnode(msg) == tipc_own_addr) { - tipc_node_lock(node); tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); tipc_node_unlock(node); spin_lock_bh(&bc_lock); @@ -449,18 +453,18 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf) msg_bcgap_to(msg)); spin_unlock_bh(&bc_lock); } else { + tipc_node_unlock(node); tipc_bclink_peek_nack(msg_destnode(msg), msg_bcast_tag(msg), msg_bcgap_after(msg), msg_bcgap_to(msg)); } - buf_discard(buf); - return; + goto exit; } - tipc_node_lock(node); + /* Handle in-sequence broadcast message */ + receive: - deferred = node->bclink.deferred_head; next_in = mod(node->bclink.last_in + 1); seqno = msg_seqno(msg); @@ -474,7 +478,10 @@ receive: } if (likely(msg_isdata(msg))) { tipc_node_unlock(node); - tipc_port_recv_mcast(buf, NULL); + if (likely(msg_mcast(msg))) + tipc_port_recv_mcast(buf, NULL); + else + buf_discard(buf); } else if (msg_user(msg) == MSG_BUNDLER) { bcl->stats.recv_bundles++; bcl->stats.recv_bundled += msg_msgcnt(msg); @@ -487,18 +494,22 @@ receive: bcl->stats.recv_fragmented++; tipc_node_unlock(node); tipc_net_route_msg(buf); + } else if (msg_user(msg) == NAME_DISTRIBUTOR) { + tipc_node_unlock(node); + tipc_named_recv(buf); } else { tipc_node_unlock(node); - tipc_net_route_msg(buf); + buf_discard(buf); } + buf = NULL; + tipc_node_lock(node); + deferred = node->bclink.deferred_head; if (deferred && (buf_seqno(deferred) == mod(next_in + 1))) { - tipc_node_lock(node); buf = deferred; msg = buf_msg(buf); node->bclink.deferred_head = deferred->next; goto receive; } - return; } else if (less(next_in, seqno)) { u32 gap_after = node->bclink.gap_after; u32 gap_to = node->bclink.gap_to; @@ -513,6 +524,7 @@ receive: else if (less(gap_after, seqno) && less(seqno, gap_to)) node->bclink.gap_to = seqno; } + buf = NULL; if (bclink_ack_allowed(node->bclink.nack_sync)) { if (gap_to != gap_after) bclink_send_nack(node); @@ -520,9 +532,11 @@ receive: } } else { bcl->stats.duplicates++; - buf_discard(buf); } +unlock: tipc_node_unlock(node); +exit: + buf_discard(buf); } u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) @@ -535,10 +549,11 @@ u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) /** * tipc_bcbearer_send - send a packet through the broadcast pseudo-bearer * - * Send through as many bearers as necessary to reach all nodes - * that support TIPC multicasting. + * Send packet over as many bearers as necessary to reach all nodes + * that have joined the broadcast link. * - * Returns 0 if packet sent successfully, non-zero if not + * Returns 0 (packet sent successfully) under all circumstances, + * since the broadcast link's pseudo-bearer never blocks */ static int tipc_bcbearer_send(struct sk_buff *buf, @@ -547,7 +562,12 @@ static int tipc_bcbearer_send(struct sk_buff *buf, { int bp_index; - /* Prepare buffer for broadcasting (if first time trying to send it) */ + /* + * Prepare broadcast link message for reliable transmission, + * if first time trying to send it; + * preparation is skipped for broadcast link protocol messages + * since they are sent in an unreliable manner and don't need it + */ if (likely(!msg_non_seq(buf_msg(buf)))) { struct tipc_msg *msg; @@ -596,18 +616,12 @@ static int tipc_bcbearer_send(struct sk_buff *buf, } if (bcbearer->remains_new.count == 0) - return 0; + break; /* all targets reached */ bcbearer->remains = bcbearer->remains_new; } - /* - * Unable to reach all targets (indicate success, since currently - * there isn't code in place to properly block & unblock the - * pseudo-bearer used by the broadcast link) - */ - - return TIPC_OK; + return 0; } /** @@ -667,27 +681,6 @@ void tipc_bcbearer_sort(void) spin_unlock_bh(&bc_lock); } -/** - * tipc_bcbearer_push - resolve bearer congestion - * - * Forces bclink to push out any unsent packets, until all packets are gone - * or congestion reoccurs. - * No locks set when function called - */ - -void tipc_bcbearer_push(void) -{ - struct tipc_bearer *b_ptr; - - spin_lock_bh(&bc_lock); - b_ptr = &bcbearer->bearer; - if (b_ptr->blocked) { - b_ptr->blocked = 0; - tipc_bearer_lock_push(b_ptr); - } - spin_unlock_bh(&bc_lock); -} - int tipc_bclink_stats(char *buf, const u32 buf_size) { @@ -764,7 +757,7 @@ int tipc_bclink_init(void) bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC); bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC); if (!bcbearer || !bclink) { - warn("Multicast link creation failed, no memory\n"); + warn("Broadcast link creation failed, no memory\n"); kfree(bcbearer); bcbearer = NULL; kfree(bclink); @@ -775,7 +768,7 @@ int tipc_bclink_init(void) INIT_LIST_HEAD(&bcbearer->bearer.cong_links); bcbearer->bearer.media = &bcbearer->media; bcbearer->media.send_msg = tipc_bcbearer_send; - sprintf(bcbearer->media.name, "tipc-multicast"); + sprintf(bcbearer->media.name, "tipc-broadcast"); bcl = &bclink->link; INIT_LIST_HEAD(&bcl->waiting_ports); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 500c97f..06740da 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -101,6 +101,5 @@ int tipc_bclink_stats(char *stats_buf, const u32 buf_size); int tipc_bclink_reset_stats(void); int tipc_bclink_set_queue_limits(u32 limit); void tipc_bcbearer_sort(void); -void tipc_bcbearer_push(void); #endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 85eba9c..e2202de 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -385,13 +385,9 @@ static int bearer_push(struct tipc_bearer *b_ptr) void tipc_bearer_lock_push(struct tipc_bearer *b_ptr) { - int res; - spin_lock_bh(&b_ptr->lock); - res = bearer_push(b_ptr); + bearer_push(b_ptr); spin_unlock_bh(&b_ptr->lock); - if (res) - tipc_bcbearer_push(); } @@ -608,6 +604,7 @@ int tipc_block_bearer(const char *name) info("Blocking bearer <%s>\n", name); spin_lock_bh(&b_ptr->lock); b_ptr->blocked = 1; + list_splice_init(&b_ptr->cong_links, &b_ptr->links); list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { struct tipc_node *n_ptr = l_ptr->owner; @@ -635,6 +632,7 @@ static void bearer_disable(struct tipc_bearer *b_ptr) spin_lock_bh(&b_ptr->lock); b_ptr->blocked = 1; b_ptr->media->disable_bearer(b_ptr); + list_splice_init(&b_ptr->cong_links, &b_ptr->links); list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { tipc_link_delete(l_ptr); } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 5ad70ef..d696f9e 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -39,8 +39,8 @@ #include "bcast.h" -#define MAX_BEARERS 8 -#define MAX_MEDIA 4 +#define MAX_BEARERS 2 +#define MAX_MEDIA 2 /* * Identifiers of supported TIPC media types diff --git a/net/tipc/config.h b/net/tipc/config.h index 443159a..80da6eb 100644 --- a/net/tipc/config.h +++ b/net/tipc/config.h @@ -65,7 +65,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *req_tlv_area, int req_tlv_space, int headroom); -void tipc_cfg_link_event(u32 addr, char *name, int up); int tipc_cfg_init(void); void tipc_cfg_stop(void); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 0987933..f2fb96e 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -159,12 +159,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) } tipc_node_lock(n_ptr); - /* Don't talk to neighbor during cleanup after last session */ - if (n_ptr->cleanup_required) { - tipc_node_unlock(n_ptr); - return; - } - link = n_ptr->links[b_ptr->identity]; /* Create a link endpoint for this bearer, if necessary */ diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index b69092e..e728d4c 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -2,7 +2,7 @@ * net/tipc/eth_media.c: Ethernet bearer support for TIPC * * Copyright (c) 2001-2007, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems + * Copyright (c) 2005-2008, 2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,7 +37,7 @@ #include "core.h" #include "bearer.h" -#define MAX_ETH_BEARERS 2 +#define MAX_ETH_BEARERS MAX_BEARERS #define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI #define ETH_LINK_TOLERANCE TIPC_DEF_LINK_TOL #define ETH_LINK_WINDOW TIPC_DEF_LINK_WIN @@ -144,31 +144,27 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) /* Find device with specified name */ + read_lock(&dev_base_lock); for_each_netdev(&init_net, pdev) { if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { dev = pdev; + dev_hold(dev); break; } } + read_unlock(&dev_base_lock); if (!dev) return -ENODEV; - /* Find Ethernet bearer for device (or create one) */ - - while ((eb_ptr != stop) && eb_ptr->dev && (eb_ptr->dev != dev)) - eb_ptr++; - if (eb_ptr == stop) - return -EDQUOT; - if (!eb_ptr->dev) { - eb_ptr->dev = dev; - eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC); - eb_ptr->tipc_packet_type.dev = dev; - eb_ptr->tipc_packet_type.func = recv_msg; - eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr; - INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list)); - dev_hold(dev); - dev_add_pack(&eb_ptr->tipc_packet_type); - } + /* Create Ethernet bearer for device */ + + eb_ptr->dev = dev; + eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC); + eb_ptr->tipc_packet_type.dev = dev; + eb_ptr->tipc_packet_type.func = recv_msg; + eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr; + INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list)); + dev_add_pack(&eb_ptr->tipc_packet_type); /* Associate TIPC bearer with Ethernet bearer */ diff --git a/net/tipc/link.c b/net/tipc/link.c index f89570c..ae98a72 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -332,15 +332,16 @@ struct link *tipc_link_create(struct tipc_node *n_ptr, l_ptr->addr = peer; if_name = strchr(b_ptr->name, ':') + 1; - sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:", + sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:unknown", tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr), tipc_node(tipc_own_addr), if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); - /* note: peer i/f is appended to link name by reset/activate */ + /* note: peer i/f name is updated by reset/activate message */ memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr)); l_ptr->owner = n_ptr; l_ptr->checkpoint = 1; + l_ptr->peer_session = INVALID_SESSION; l_ptr->b_ptr = b_ptr; link_set_supervision_props(l_ptr, b_ptr->media->tolerance); l_ptr->state = RESET_UNKNOWN; @@ -536,9 +537,6 @@ void tipc_link_stop(struct link *l_ptr) l_ptr->proto_msg_queue = NULL; } -/* LINK EVENT CODE IS NOT SUPPORTED AT PRESENT */ -#define link_send_event(fcn, l_ptr, up) do { } while (0) - void tipc_link_reset(struct link *l_ptr) { struct sk_buff *buf; @@ -596,10 +594,6 @@ void tipc_link_reset(struct link *l_ptr) l_ptr->fsm_msg_cnt = 0; l_ptr->stale_count = 0; link_reset_statistics(l_ptr); - - link_send_event(tipc_cfg_link_event, l_ptr, 0); - if (!in_own_cluster(l_ptr->addr)) - link_send_event(tipc_disc_link_event, l_ptr, 0); } @@ -608,9 +602,6 @@ static void link_activate(struct link *l_ptr) l_ptr->next_in_no = l_ptr->stats.recv_info = 1; tipc_node_link_up(l_ptr->owner, l_ptr); tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr); - link_send_event(tipc_cfg_link_event, l_ptr, 1); - if (!in_own_cluster(l_ptr->addr)) - link_send_event(tipc_disc_link_event, l_ptr, 1); } /** @@ -985,6 +976,51 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector) } /* + * tipc_link_send_names - send name table entries to new neighbor + * + * Send routine for bulk delivery of name table messages when contact + * with a new neighbor occurs. No link congestion checking is performed + * because name table messages *must* be delivered. The messages must be + * small enough not to require fragmentation. + * Called without any locks held. + */ + +void tipc_link_send_names(struct list_head *message_list, u32 dest) +{ + struct tipc_node *n_ptr; + struct link *l_ptr; + struct sk_buff *buf; + struct sk_buff *temp_buf; + + if (list_empty(message_list)) + return; + + read_lock_bh(&tipc_net_lock); + n_ptr = tipc_node_find(dest); + if (n_ptr) { + tipc_node_lock(n_ptr); + l_ptr = n_ptr->active_links[0]; + if (l_ptr) { + /* convert circular list to linear list */ + ((struct sk_buff *)message_list->prev)->next = NULL; + link_add_chain_to_outqueue(l_ptr, + (struct sk_buff *)message_list->next, 0); + tipc_link_push_queue(l_ptr); + INIT_LIST_HEAD(message_list); + } + tipc_node_unlock(n_ptr); + } + read_unlock_bh(&tipc_net_lock); + + /* discard the messages if they couldn't be sent */ + + list_for_each_safe(buf, temp_buf, ((struct sk_buff *)message_list)) { + list_del((struct list_head *)buf); + buf_discard(buf); + } +} + +/* * link_send_buf_fast: Entry for data messages where the * destination link is known and the header is complete, * inclusive total message length. Very time critical. @@ -1031,9 +1067,6 @@ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode) u32 selector = msg_origport(buf_msg(buf)) & 1; u32 dummy; - if (destnode == tipc_own_addr) - return tipc_port_recv_msg(buf); - read_lock_bh(&tipc_net_lock); n_ptr = tipc_node_find(destnode); if (likely(n_ptr)) { @@ -1658,19 +1691,12 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) continue; } + /* Discard unicast link messages destined for another node */ + if (unlikely(!msg_short(msg) && (msg_destnode(msg) != tipc_own_addr))) goto cont; - /* Discard non-routeable messages destined for another node */ - - if (unlikely(!msg_isdata(msg) && - (msg_destnode(msg) != tipc_own_addr))) { - if ((msg_user(msg) != CONN_MANAGER) && - (msg_user(msg) != MSG_FRAGMENTER)) - goto cont; - } - /* Locate neighboring node that sent message */ n_ptr = tipc_node_find(msg_prevnode(msg)); @@ -1678,17 +1704,24 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) goto cont; tipc_node_lock(n_ptr); - /* Don't talk to neighbor during cleanup after last session */ + /* Locate unicast link endpoint that should handle message */ - if (n_ptr->cleanup_required) { + l_ptr = n_ptr->links[b_ptr->identity]; + if (unlikely(!l_ptr)) { tipc_node_unlock(n_ptr); goto cont; } - /* Locate unicast link endpoint that should handle message */ + /* Verify that communication with node is currently allowed */ - l_ptr = n_ptr->links[b_ptr->identity]; - if (unlikely(!l_ptr)) { + if ((n_ptr->block_setup & WAIT_PEER_DOWN) && + msg_user(msg) == LINK_PROTOCOL && + (msg_type(msg) == RESET_MSG || + msg_type(msg) == ACTIVATE_MSG) && + !msg_redundant_link(msg)) + n_ptr->block_setup &= ~WAIT_PEER_DOWN; + + if (n_ptr->block_setup) { tipc_node_unlock(n_ptr); goto cont; } @@ -1923,6 +1956,12 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, if (link_blocked(l_ptr)) return; + + /* Abort non-RESET send if communication with node is prohibited */ + + if ((l_ptr->owner->block_setup) && (msg_typ != RESET_MSG)) + return; + msg_set_type(msg, msg_typ); msg_set_net_plane(msg, l_ptr->b_ptr->net_plane); msg_set_bcast_ack(msg, mod(l_ptr->owner->bclink.last_in)); @@ -2051,9 +2090,19 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) case RESET_MSG: if (!link_working_unknown(l_ptr) && (l_ptr->peer_session != INVALID_SESSION)) { - if (msg_session(msg) == l_ptr->peer_session) - break; /* duplicate: ignore */ + if (less_eq(msg_session(msg), l_ptr->peer_session)) + break; /* duplicate or old reset: ignore */ + } + + if (!msg_redundant_link(msg) && (link_working_working(l_ptr) || + link_working_unknown(l_ptr))) { + /* + * peer has lost contact -- don't allow peer's links + * to reactivate before we recognize loss & clean up + */ + l_ptr->owner->block_setup = WAIT_NODE_DOWN; } + /* fall thru' */ case ACTIVATE_MSG: /* Update link settings according other endpoint's values */ diff --git a/net/tipc/link.h b/net/tipc/link.h index 74fbeca..e56cb53 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -223,6 +223,7 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_s struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space); void tipc_link_reset(struct link *l_ptr); int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector); +void tipc_link_send_names(struct list_head *message_list, u32 dest); int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf); u32 tipc_link_get_max_pkt(u32 dest, u32 selector); int tipc_link_send_sections_fast(struct tipc_port *sender, diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index cd356e5..b7ca1bd 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -173,18 +173,40 @@ void tipc_named_withdraw(struct publication *publ) * tipc_named_node_up - tell specified node about all publications by this node */ -void tipc_named_node_up(unsigned long node) +void tipc_named_node_up(unsigned long nodearg) { + struct tipc_node *n_ptr; + struct link *l_ptr; struct publication *publ; struct distr_item *item = NULL; struct sk_buff *buf = NULL; + struct list_head message_list; + u32 node = (u32)nodearg; u32 left = 0; u32 rest; - u32 max_item_buf; + u32 max_item_buf = 0; + + /* compute maximum amount of publication data to send per message */ + + read_lock_bh(&tipc_net_lock); + n_ptr = tipc_node_find(node); + if (n_ptr) { + tipc_node_lock(n_ptr); + l_ptr = n_ptr->active_links[0]; + if (l_ptr) + max_item_buf = ((l_ptr->max_pkt - INT_H_SIZE) / + ITEM_SIZE) * ITEM_SIZE; + tipc_node_unlock(n_ptr); + } + read_unlock_bh(&tipc_net_lock); + if (!max_item_buf) + return; + + /* create list of publication messages, then send them as a unit */ + + INIT_LIST_HEAD(&message_list); read_lock_bh(&tipc_nametbl_lock); - max_item_buf = TIPC_MAX_USER_MSG_SIZE / ITEM_SIZE; - max_item_buf *= ITEM_SIZE; rest = publ_cnt * ITEM_SIZE; list_for_each_entry(publ, &publ_root, local_list) { @@ -202,13 +224,14 @@ void tipc_named_node_up(unsigned long node) item++; left -= ITEM_SIZE; if (!left) { - msg_set_link_selector(buf_msg(buf), node); - tipc_link_send(buf, node, node); + list_add_tail((struct list_head *)buf, &message_list); buf = NULL; } } exit: read_unlock_bh(&tipc_nametbl_lock); + + tipc_link_send_names(&message_list, (u32)node); } /** diff --git a/net/tipc/net.c b/net/tipc/net.c index 68b3dd6..fafef6c 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -141,17 +141,6 @@ void tipc_net_route_msg(struct sk_buff *buf) return; msg = buf_msg(buf); - msg_incr_reroute_cnt(msg); - if (msg_reroute_cnt(msg) > 6) { - if (msg_errcode(msg)) { - buf_discard(buf); - } else { - tipc_reject_msg(buf, msg_destport(msg) ? - TIPC_ERR_NO_PORT : TIPC_ERR_NO_NAME); - } - return; - } - /* Handle message for this node */ dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg); if (tipc_in_scope(dnode, tipc_own_addr)) { diff --git a/net/tipc/node.c b/net/tipc/node.c index 2d106ef..27b4bb0 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -112,6 +112,7 @@ struct tipc_node *tipc_node_create(u32 addr) break; } list_add_tail(&n_ptr->list, &temp_node->list); + n_ptr->block_setup = WAIT_PEER_DOWN; tipc_num_nodes++; @@ -312,7 +313,7 @@ static void node_established_contact(struct tipc_node *n_ptr) } } -static void node_cleanup_finished(unsigned long node_addr) +static void node_name_purge_complete(unsigned long node_addr) { struct tipc_node *n_ptr; @@ -320,7 +321,7 @@ static void node_cleanup_finished(unsigned long node_addr) n_ptr = tipc_node_find(node_addr); if (n_ptr) { tipc_node_lock(n_ptr); - n_ptr->cleanup_required = 0; + n_ptr->block_setup &= ~WAIT_NAMES_GONE; tipc_node_unlock(n_ptr); } read_unlock_bh(&tipc_net_lock); @@ -331,28 +332,32 @@ static void node_lost_contact(struct tipc_node *n_ptr) char addr_string[16]; u32 i; - /* Clean up broadcast reception remains */ - n_ptr->bclink.gap_after = n_ptr->bclink.gap_to = 0; - while (n_ptr->bclink.deferred_head) { - struct sk_buff *buf = n_ptr->bclink.deferred_head; - n_ptr->bclink.deferred_head = buf->next; - buf_discard(buf); - } - if (n_ptr->bclink.defragm) { - buf_discard(n_ptr->bclink.defragm); - n_ptr->bclink.defragm = NULL; - } + info("Lost contact with %s\n", + tipc_addr_string_fill(addr_string, n_ptr->addr)); + + /* Flush broadcast link info associated with lost node */ if (n_ptr->bclink.supported) { + n_ptr->bclink.gap_after = n_ptr->bclink.gap_to = 0; + while (n_ptr->bclink.deferred_head) { + struct sk_buff *buf = n_ptr->bclink.deferred_head; + n_ptr->bclink.deferred_head = buf->next; + buf_discard(buf); + } + + if (n_ptr->bclink.defragm) { + buf_discard(n_ptr->bclink.defragm); + n_ptr->bclink.defragm = NULL; + } + + tipc_nmap_remove(&tipc_bcast_nmap, n_ptr->addr); tipc_bclink_acknowledge(n_ptr, mod(n_ptr->bclink.acked + 10000)); - tipc_nmap_remove(&tipc_bcast_nmap, n_ptr->addr); if (n_ptr->addr < tipc_own_addr) tipc_own_tag--; - } - info("Lost contact with %s\n", - tipc_addr_string_fill(addr_string, n_ptr->addr)); + n_ptr->bclink.supported = 0; + } /* Abort link changeover */ for (i = 0; i < MAX_BEARERS; i++) { @@ -367,10 +372,10 @@ static void node_lost_contact(struct tipc_node *n_ptr) /* Notify subscribers */ tipc_nodesub_notify(n_ptr); - /* Prevent re-contact with node until all cleanup is done */ + /* Prevent re-contact with node until cleanup is done */ - n_ptr->cleanup_required = 1; - tipc_k_signal((Handler)node_cleanup_finished, n_ptr->addr); + n_ptr->block_setup = WAIT_PEER_DOWN | WAIT_NAMES_GONE; + tipc_k_signal((Handler)node_name_purge_complete, n_ptr->addr); } struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) diff --git a/net/tipc/node.h b/net/tipc/node.h index 5c61afc..4f15cb4 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -42,6 +42,12 @@ #include "net.h" #include "bearer.h" +/* Flags used to block (re)establishment of contact with a neighboring node */ + +#define WAIT_PEER_DOWN 0x0001 /* wait to see that peer's links are down */ +#define WAIT_NAMES_GONE 0x0002 /* wait for peer's publications to be purged */ +#define WAIT_NODE_DOWN 0x0004 /* wait until peer node is declared down */ + /** * struct tipc_node - TIPC node structure * @addr: network address of node @@ -52,7 +58,7 @@ * @active_links: pointers to active links to node * @links: pointers to all links to node * @working_links: number of working links to node (both active and standby) - * @cleanup_required: non-zero if cleaning up after a prior loss of contact + * @block_setup: bit mask of conditions preventing link establishment to node * @link_cnt: number of links to node * @permit_changeover: non-zero if node has redundant links to this system * @bclink: broadcast-related info @@ -77,7 +83,7 @@ struct tipc_node { struct link *links[MAX_BEARERS]; int link_cnt; int working_links; - int cleanup_required; + int block_setup; int permit_changeover; struct { int supported; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index adb2eff..9440a3d 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -49,7 +49,7 @@ struct tipc_sock { struct sock sk; struct tipc_port *p; struct tipc_portid peer_name; - long conn_timeout; + unsigned int conn_timeout; }; #define tipc_sk(sk) ((struct tipc_sock *)(sk)) @@ -231,7 +231,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, sock_init_data(sock, sk); sk->sk_backlog_rcv = backlog_rcv; tipc_sk(sk)->p = tp_ptr; - tipc_sk(sk)->conn_timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT); + tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT; spin_unlock_bh(tp_ptr->lock); @@ -525,6 +525,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, struct tipc_port *tport = tipc_sk_port(sk); struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name; int needs_conn; + long timeout_val; int res = -EINVAL; if (unlikely(!dest)) @@ -564,6 +565,8 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, reject_rx_queue(sk); } + timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + do { if (dest->addrtype == TIPC_ADDR_NAME) { res = dest_name_check(dest, m); @@ -600,16 +603,14 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, sock->state = SS_CONNECTING; break; } - if (m->msg_flags & MSG_DONTWAIT) { - res = -EWOULDBLOCK; + if (timeout_val <= 0L) { + res = timeout_val ? timeout_val : -EWOULDBLOCK; break; } release_sock(sk); - res = wait_event_interruptible(*sk_sleep(sk), - !tport->congested); + timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk), + !tport->congested, timeout_val); lock_sock(sk); - if (res) - break; } while (1); exit: @@ -636,6 +637,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct tipc_port *tport = tipc_sk_port(sk); struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name; + long timeout_val; int res; /* Handle implied connection establishment */ @@ -650,6 +652,8 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, if (iocb) lock_sock(sk); + timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + do { if (unlikely(sock->state != SS_CONNECTED)) { if (sock->state == SS_DISCONNECTING) @@ -663,16 +667,14 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, total_len); if (likely(res != -ELINKCONG)) break; - if (m->msg_flags & MSG_DONTWAIT) { - res = -EWOULDBLOCK; + if (timeout_val <= 0L) { + res = timeout_val ? timeout_val : -EWOULDBLOCK; break; } release_sock(sk); - res = wait_event_interruptible(*sk_sleep(sk), - (!tport->congested || !tport->connected)); + timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk), + (!tport->congested || !tport->connected), timeout_val); lock_sock(sk); - if (res) - break; } while (1); if (iocb) @@ -1369,7 +1371,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, struct msghdr m = {NULL,}; struct sk_buff *buf; struct tipc_msg *msg; - long timeout; + unsigned int timeout; int res; lock_sock(sk); @@ -1434,7 +1436,8 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, res = wait_event_interruptible_timeout(*sk_sleep(sk), (!skb_queue_empty(&sk->sk_receive_queue) || (sock->state != SS_CONNECTING)), - timeout ? timeout : MAX_SCHEDULE_TIMEOUT); + timeout ? (long)msecs_to_jiffies(timeout) + : MAX_SCHEDULE_TIMEOUT); lock_sock(sk); if (res > 0) { @@ -1480,9 +1483,7 @@ static int listen(struct socket *sock, int len) lock_sock(sk); - if (sock->state == SS_READY) - res = -EOPNOTSUPP; - else if (sock->state != SS_UNCONNECTED) + if (sock->state != SS_UNCONNECTED) res = -EINVAL; else { sock->state = SS_LISTENING; @@ -1510,10 +1511,6 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) lock_sock(sk); - if (sock->state == SS_READY) { - res = -EOPNOTSUPP; - goto exit; - } if (sock->state != SS_LISTENING) { res = -EINVAL; goto exit; @@ -1696,7 +1693,7 @@ static int setsockopt(struct socket *sock, res = tipc_set_portunreturnable(tport->ref, value); break; case TIPC_CONN_TIMEOUT: - tipc_sk(sk)->conn_timeout = msecs_to_jiffies(value); + tipc_sk(sk)->conn_timeout = value; /* no need to set "res", since already 0 at this point */ break; default: @@ -1752,7 +1749,7 @@ static int getsockopt(struct socket *sock, res = tipc_portunreturnable(tport->ref, &value); break; case TIPC_CONN_TIMEOUT: - value = jiffies_to_msecs(tipc_sk(sk)->conn_timeout); + value = tipc_sk(sk)->conn_timeout; /* no need to set "res", since already 0 at this point */ break; case TIPC_NODE_RECVQ_DEPTH: @@ -1790,11 +1787,11 @@ static const struct proto_ops msg_ops = { .bind = bind, .connect = connect, .socketpair = sock_no_socketpair, - .accept = accept, + .accept = sock_no_accept, .getname = get_name, .poll = poll, .ioctl = sock_no_ioctl, - .listen = listen, + .listen = sock_no_listen, .shutdown = shutdown, .setsockopt = setsockopt, .getsockopt = getsockopt, diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 6cf7268..1983717 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -151,7 +151,7 @@ void tipc_subscr_report_overlap(struct subscription *sub, if (!must && !(sub->filter & TIPC_SUB_PORTS)) return; - sub->event_cb(sub, found_lower, found_upper, event, port_ref, node); + subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); } /** @@ -365,7 +365,6 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s, subscr_terminate(subscriber); return NULL; } - sub->event_cb = subscr_send_event; INIT_LIST_HEAD(&sub->nameseq_list); list_add(&sub->subscription_list, &subscriber->subscription_list); sub->server_ref = subscriber->port_ref; diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 45d89bf..4b06ef6 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -39,16 +39,11 @@ struct subscription; -typedef void (*tipc_subscr_event) (struct subscription *sub, - u32 found_lower, u32 found_upper, - u32 event, u32 port_ref, u32 node); - /** * struct subscription - TIPC network topology subscription object * @seq: name sequence associated with subscription * @timeout: duration of subscription (in ms) * @filter: event filtering to be done for subscription - * @event_cb: routine invoked when a subscription event is detected * @timer: timer governing subscription duration (optional) * @nameseq_list: adjacent subscriptions in name sequence's subscription list * @subscription_list: adjacent subscriptions in subscriber's subscription list @@ -61,7 +56,6 @@ struct subscription { struct tipc_name_seq seq; u32 timeout; u32 filter; - tipc_subscr_event event_cb; struct timer_list timer; struct list_head nameseq_list; struct list_head subscription_list; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ec68e1c..466fbcc 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1381,8 +1381,10 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) { int err = 0; + UNIXCB(skb).pid = get_pid(scm->pid); - UNIXCB(skb).cred = get_cred(scm->cred); + if (scm->cred) + UNIXCB(skb).cred = get_cred(scm->cred); UNIXCB(skb).fp = NULL; if (scm->fp && send_fds) err = unix_attach_fds(scm, skb); @@ -1392,6 +1394,24 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen } /* + * Some apps rely on write() giving SCM_CREDENTIALS + * We include credentials if source or destination socket + * asserted SOCK_PASSCRED. + */ +static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, + const struct sock *other) +{ + if (UNIXCB(skb).cred) + return; + if (test_bit(SOCK_PASSCRED, &sock->flags) || + !other->sk_socket || + test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { + UNIXCB(skb).pid = get_pid(task_tgid(current)); + UNIXCB(skb).cred = get_current_cred(); + } +} + +/* * Send AF_UNIX data. */ @@ -1538,6 +1558,7 @@ restart: if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); + maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); if (max_level > unix_sk(other)->recursion_level) unix_sk(other)->recursion_level = max_level; @@ -1652,6 +1673,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, (other->sk_shutdown & RCV_SHUTDOWN)) goto pipe_err_free; + maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); if (max_level > unix_sk(other)->recursion_level) unix_sk(other)->recursion_level = max_level; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 58064d9..791ab2e 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -462,8 +462,8 @@ static struct xfrm_algo_desc ealg_list[] = { .desc = { .sadb_alg_id = SADB_X_EALG_AESCTR, .sadb_alg_ivlen = 8, - .sadb_alg_minbits = 128, - .sadb_alg_maxbits = 256 + .sadb_alg_minbits = 160, + .sadb_alg_maxbits = 288 } }, }; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index a026b0e..54a0dc2e 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -212,6 +212,11 @@ resume: /* only the first xfrm gets the encap type */ encap_type = 0; + if (async && x->repl->check(x, skb, seq)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); + goto drop_unlock; + } + x->repl->advance(x, seq); x->curlft.bytes += skb->len; diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index fc91ad7..f781b9a 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -70,26 +70,29 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) while ((scratch += len, dlen -= len) > 0) { skb_frag_t *frag; + struct page *page; err = -EMSGSIZE; if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) goto out; frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags; - frag->page = alloc_page(GFP_ATOMIC); + page = alloc_page(GFP_ATOMIC); err = -ENOMEM; - if (!frag->page) + if (!page) goto out; + __skb_frag_set_page(frag, page); + len = PAGE_SIZE; if (dlen < len) len = dlen; - memcpy(page_address(frag->page), scratch, len); - frag->page_offset = 0; frag->size = len; + memcpy(skb_frag_address(frag), scratch, len); + skb->truesize += len; skb->data_len += len; skb->len += len; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 94fdcc7..552df27 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1349,14 +1349,16 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) BUG(); } xdst = dst_alloc(dst_ops, NULL, 0, 0, 0); - memset(&xdst->u.rt6.rt6i_table, 0, sizeof(*xdst) - sizeof(struct dst_entry)); - xfrm_policy_put_afinfo(afinfo); - if (likely(xdst)) + if (likely(xdst)) { + memset(&xdst->u.rt6.rt6i_table, 0, + sizeof(*xdst) - sizeof(struct dst_entry)); xdst->flo.ops = &xfrm_bundle_fc_ops; - else + } else xdst = ERR_PTR(-ENOBUFS); + xfrm_policy_put_afinfo(afinfo); + return xdst; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0256b8a..d0a42df 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2927,7 +2927,7 @@ static int __net_init xfrm_user_net_init(struct net *net) if (nlsk == NULL) return -ENOMEM; net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */ - rcu_assign_pointer(net->xfrm.nlsk, nlsk); + RCU_INIT_POINTER(net->xfrm.nlsk, nlsk); return 0; } @@ -2935,7 +2935,7 @@ static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list) { struct net *net; list_for_each_entry(net, net_exit_list, exit_list) - rcu_assign_pointer(net->xfrm.nlsk, NULL); + RCU_INIT_POINTER(net->xfrm.nlsk, NULL); synchronize_net(); list_for_each_entry(net, net_exit_list, exit_list) netlink_kernel_release(net->xfrm.nlsk_stash); |