summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/garp.c4
-rw-r--r--net/802/stp.c4
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/8021q/vlan_core.c2
-rw-r--r--net/8021q/vlan_dev.c4
-rw-r--r--net/9p/trans_virtio.c17
-rw-r--r--net/atm/br2684.c9
-rw-r--r--net/atm/lec.c2
-rw-r--r--net/batman-adv/Makefile2
-rw-r--r--net/batman-adv/aggregation.c293
-rw-r--r--net/batman-adv/aggregation.h46
-rw-r--r--net/batman-adv/bat_iv_ogm.c1170
-rw-r--r--net/batman-adv/bat_ogm.h35
-rw-r--r--net/batman-adv/bat_sysfs.c2
-rw-r--r--net/batman-adv/bitarray.c6
-rw-r--r--net/batman-adv/gateway_client.c10
-rw-r--r--net/batman-adv/hard-interface.c88
-rw-r--r--net/batman-adv/hard-interface.h1
-rw-r--r--net/batman-adv/hash.h25
-rw-r--r--net/batman-adv/main.c4
-rw-r--r--net/batman-adv/main.h8
-rw-r--r--net/batman-adv/originator.c21
-rw-r--r--net/batman-adv/packet.h19
-rw-r--r--net/batman-adv/routing.c669
-rw-r--r--net/batman-adv/routing.h17
-rw-r--r--net/batman-adv/send.c313
-rw-r--r--net/batman-adv/send.h9
-rw-r--r--net/batman-adv/soft-interface.c44
-rw-r--r--net/batman-adv/translation-table.c199
-rw-r--r--net/batman-adv/translation-table.h21
-rw-r--r--net/batman-adv/types.h5
-rw-r--r--net/batman-adv/unicast.c6
-rw-r--r--net/batman-adv/unicast.h2
-rw-r--r--net/batman-adv/vis.c10
-rw-r--r--net/bluetooth/bnep/netdev.c2
-rw-r--r--net/bridge/br_device.c7
-rw-r--r--net/bridge/br_fdb.c23
-rw-r--r--net/bridge/br_if.c53
-rw-r--r--net/bridge/br_input.c33
-rw-r--r--net/bridge/br_multicast.c21
-rw-r--r--net/bridge/br_notify.c7
-rw-r--r--net/bridge/br_private.h7
-rw-r--r--net/bridge/br_sysfs_br.c34
-rw-r--r--net/bridge/netfilter/Kconfig2
-rw-r--r--net/bridge/netfilter/ebtable_broute.c4
-rw-r--r--net/bridge/netfilter/ebtables.c3
-rw-r--r--net/caif/caif_dev.c6
-rw-r--r--net/caif/cfcnfg.c38
-rw-r--r--net/caif/cfctrl.c23
-rw-r--r--net/caif/cfdbgl.c7
-rw-r--r--net/caif/cfdgml.c7
-rw-r--r--net/caif/cffrml.c7
-rw-r--r--net/caif/cfmuxl.c6
-rw-r--r--net/caif/cfrfml.c7
-rw-r--r--net/caif/cfserl.c7
-rw-r--r--net/caif/cfsrvl.c8
-rw-r--r--net/caif/cfutill.c7
-rw-r--r--net/caif/cfveil.c7
-rw-r--r--net/caif/cfvidl.c7
-rw-r--r--net/can/Kconfig11
-rw-r--r--net/can/Makefile3
-rw-r--r--net/can/af_can.c6
-rw-r--r--net/can/bcm.c53
-rw-r--r--net/can/gw.c959
-rw-r--r--net/ceph/ceph_common.c1
-rw-r--r--net/ceph/messenger.c1
-rw-r--r--net/ceph/msgpool.c40
-rw-r--r--net/ceph/osd_client.c26
-rw-r--r--net/ceph/osdmap.c84
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/datagram.c8
-rw-r--r--net/core/dev.c171
-rw-r--r--net/core/dev_addr_lists.c4
-rw-r--r--net/core/dst.c15
-rw-r--r--net/core/ethtool.c20
-rw-r--r--net/core/fib_rules.c8
-rw-r--r--net/core/filter.c2
-rw-r--r--net/core/flow.c36
-rw-r--r--net/core/kmap_skb.h2
-rw-r--r--net/core/link_watch.c9
-rw-r--r--net/core/neighbour.c48
-rw-r--r--net/core/net-sysfs.c12
-rw-r--r--net/core/netpoll.c8
-rw-r--r--net/core/pktgen.c3
-rw-r--r--net/core/rtnetlink.c1
-rw-r--r--net/core/scm.c12
-rw-r--r--net/core/secure_seq.c184
-rw-r--r--net/core/skbuff.c70
-rw-r--r--net/core/sock.c21
-rw-r--r--net/core/user_dma.c2
-rw-r--r--net/dcb/dcbnl.c30
-rw-r--r--net/dccp/ccids/ccid2.c84
-rw-r--r--net/dccp/ccids/ccid2.h6
-rw-r--r--net/dccp/dccp.h1
-rw-r--r--net/dccp/feat.c202
-rw-r--r--net/dccp/feat.h1
-rw-r--r--net/dccp/ipv4.c1
-rw-r--r--net/dccp/ipv6.c9
-rw-r--r--net/dccp/proto.c1
-rw-r--r--net/decnet/dn_dev.c6
-rw-r--r--net/dsa/slave.c3
-rw-r--r--net/ethernet/eth.c2
-rw-r--r--net/ieee802154/6lowpan.c891
-rw-r--r--net/ieee802154/6lowpan.h212
-rw-r--r--net/ieee802154/Kconfig6
-rw-r--r--net/ieee802154/Makefile8
-rw-r--r--net/ipv4/af_inet.c7
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/fib_semantics.c10
-rw-r--r--net/ipv4/fib_trie.c12
-rw-r--r--net/ipv4/gre.c4
-rw-r--r--net/ipv4/igmp.c16
-rw-r--r--net/ipv4/inet_hashtables.c1
-rw-r--r--net/ipv4/inet_lro.c2
-rw-r--r--net/ipv4/inetpeer.c1
-rw-r--r--net/ipv4/ip_output.c18
-rw-r--r--net/ipv4/ip_sockglue.c9
-rw-r--r--net/ipv4/ipip.c10
-rw-r--r--net/ipv4/ipmr.c8
-rw-r--r--net/ipv4/netfilter.c18
-rw-r--r--net/ipv4/netfilter/ip_queue.c12
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c24
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c36
-rw-r--r--net/ipv4/netfilter/nf_nat_irc.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c16
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_common.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c28
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_tftp.c4
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/raw.c3
-rw-r--r--net/ipv4/route.c38
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/tcp.c56
-rw-r--r--net/ipv4/tcp_input.c91
-rw-r--r--net/ipv4/tcp_ipv4.c69
-rw-r--r--net/ipv4/tcp_output.c72
-rw-r--r--net/ipv4/udp.c11
-rw-r--r--net/ipv6/addrconf.c87
-rw-r--r--net/ipv6/datagram.c16
-rw-r--r--net/ipv6/exthdrs.c7
-rw-r--r--net/ipv6/icmp.c25
-rw-r--r--net/ipv6/inet6_connection_sock.c9
-rw-r--r--net/ipv6/inet6_hashtables.c1
-rw-r--r--net/ipv6/ip6_fib.c2
-rw-r--r--net/ipv6/ip6_flowlabel.c8
-rw-r--r--net/ipv6/ip6_output.c20
-rw-r--r--net/ipv6/ip6_tunnel.c54
-rw-r--r--net/ipv6/ip6mr.c8
-rw-r--r--net/ipv6/ipv6_sockglue.c11
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/ndisc.c31
-rw-r--r--net/ipv6/netfilter/ip6_queue.c12
-rw-r--r--net/ipv6/raw.c12
-rw-r--r--net/ipv6/route.c72
-rw-r--r--net/ipv6/sit.c15
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/tcp_ipv6.c51
-rw-r--r--net/ipv6/udp.c8
-rw-r--r--net/irda/irlan/irlan_eth.c2
-rw-r--r--net/irda/irsysctl.c6
-rw-r--r--net/irda/qos.c6
-rw-r--r--net/iucv/Kconfig14
-rw-r--r--net/iucv/af_iucv.c868
-rw-r--r--net/iucv/iucv.c23
-rw-r--r--net/lapb/lapb_iface.c29
-rw-r--r--net/mac80211/agg-rx.c4
-rw-r--r--net/mac80211/cfg.c8
-rw-r--r--net/mac80211/ibss.c6
-rw-r--r--net/mac80211/iface.c6
-rw-r--r--net/mac80211/mesh_pathtbl.c4
-rw-r--r--net/mac80211/sta_info.c8
-rw-r--r--net/netfilter/core.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3
-rw-r--r--net/netfilter/nf_conntrack_core.c12
-rw-r--r--net/netfilter/nf_conntrack_ecache.c8
-rw-r--r--net/netfilter/nf_conntrack_extend.c4
-rw-r--r--net/netfilter/nf_conntrack_helper.c6
-rw-r--r--net/netfilter/nf_conntrack_netlink.c6
-rw-r--r--net/netfilter/nf_conntrack_pptp.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c6
-rw-r--r--net/netfilter/nf_log.c10
-rw-r--r--net/netfilter/nf_queue.c7
-rw-r--r--net/netfilter/nfnetlink.c6
-rw-r--r--net/netfilter/nfnetlink_queue.c4
-rw-r--r--net/netfilter/xt_rateest.c9
-rw-r--r--net/netlabel/Makefile2
-rw-r--r--net/netlabel/netlabel_addrlist.c2
-rw-r--r--net/netlabel/netlabel_addrlist.h2
-rw-r--r--net/netlabel/netlabel_cipso_v4.c2
-rw-r--r--net/netlabel/netlabel_cipso_v4.h2
-rw-r--r--net/netlabel/netlabel_domainhash.c8
-rw-r--r--net/netlabel/netlabel_domainhash.h2
-rw-r--r--net/netlabel/netlabel_kapi.c22
-rw-r--r--net/netlabel/netlabel_mgmt.c2
-rw-r--r--net/netlabel/netlabel_mgmt.h2
-rw-r--r--net/netlabel/netlabel_unlabeled.c8
-rw-r--r--net/netlabel/netlabel_unlabeled.h2
-rw-r--r--net/netlabel/netlabel_user.c2
-rw-r--r--net/netlabel/netlabel_user.h2
-rw-r--r--net/netlink/af_netlink.c7
-rw-r--r--net/packet/af_packet.c953
-rw-r--r--net/phonet/af_phonet.c4
-rw-r--r--net/phonet/pn_dev.c6
-rw-r--r--net/phonet/socket.c6
-rw-r--r--net/rds/Kconfig1
-rw-r--r--net/rds/ib_rdma.c112
-rw-r--r--net/rds/iw_rdma.c13
-rw-r--r--net/rds/xlist.h80
-rw-r--r--net/sched/act_mirred.c3
-rw-r--r--net/sched/cls_rsvp.h27
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_sfb.c13
-rw-r--r--net/sched/sch_sfq.c7
-rw-r--r--net/sctp/associola.c1
-rw-r--r--net/sctp/outqueue.c4
-rw-r--r--net/sctp/sm_make_chunk.c1
-rw-r--r--net/sctp/sm_sideeffect.c5
-rw-r--r--net/sctp/sm_statefuns.c11
-rw-r--r--net/socket.c79
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c4
-rw-r--r--net/sunrpc/xprt.c1
-rw-r--r--net/tipc/bcast.c111
-rw-r--r--net/tipc/bcast.h1
-rw-r--r--net/tipc/bearer.c8
-rw-r--r--net/tipc/bearer.h4
-rw-r--r--net/tipc/config.h1
-rw-r--r--net/tipc/discover.c6
-rw-r--r--net/tipc/eth_media.c32
-rw-r--r--net/tipc/link.c111
-rw-r--r--net/tipc/link.h1
-rw-r--r--net/tipc/name_distr.c35
-rw-r--r--net/tipc/net.c11
-rw-r--r--net/tipc/node.c45
-rw-r--r--net/tipc/node.h10
-rw-r--r--net/tipc/socket.c51
-rw-r--r--net/tipc/subscr.c3
-rw-r--r--net/tipc/subscr.h6
-rw-r--r--net/unix/af_unix.c24
-rw-r--r--net/xfrm/xfrm_algo.c4
-rw-r--r--net/xfrm/xfrm_input.c5
-rw-r--r--net/xfrm/xfrm_ipcomp.c11
-rw-r--r--net/xfrm/xfrm_policy.c10
-rw-r--r--net/xfrm/xfrm_user.c4
247 files changed, 7627 insertions, 3086 deletions
diff --git a/net/802/garp.c b/net/802/garp.c
index 1610295..070bf44 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -553,7 +553,7 @@ static void garp_release_port(struct net_device *dev)
if (rtnl_dereference(port->applicants[i]))
return;
}
- rcu_assign_pointer(dev->garp_port, NULL);
+ RCU_INIT_POINTER(dev->garp_port, NULL);
kfree_rcu(port, rcu);
}
@@ -605,7 +605,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
ASSERT_RTNL();
- rcu_assign_pointer(port->applicants[appl->type], NULL);
+ RCU_INIT_POINTER(port->applicants[appl->type], NULL);
/* Delete timer and generate a final TRANSMIT_PDU event to flush out
* all pending messages before the applicant is gone. */
diff --git a/net/802/stp.c b/net/802/stp.c
index 978c30b..0e136ef 100644
--- a/net/802/stp.c
+++ b/net/802/stp.c
@@ -88,9 +88,9 @@ void stp_proto_unregister(const struct stp_proto *proto)
{
mutex_lock(&stp_proto_mutex);
if (is_zero_ether_addr(proto->group_address))
- rcu_assign_pointer(stp_proto, NULL);
+ RCU_INIT_POINTER(stp_proto, NULL);
else
- rcu_assign_pointer(garp_protos[proto->group_address[5] -
+ RCU_INIT_POINTER(garp_protos[proto->group_address[5] -
GARP_ADDR_MIN], NULL);
synchronize_rcu();
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8970ba1..5471628 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -133,7 +133,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
if (grp->nr_vlans == 0) {
vlan_gvrp_uninit_applicant(real_dev);
- rcu_assign_pointer(real_dev->vlgrp, NULL);
+ RCU_INIT_POINTER(real_dev->vlgrp, NULL);
/* Free the group, after all cpu's are done. */
call_rcu(&grp->rcu, vlan_rcu_free);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 5f27f8e..f1f2f7b 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -167,6 +167,8 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
if (unlikely(!skb))
goto err_free;
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
return skb;
err_free:
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 9d40a07..c8cf939 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -610,7 +610,8 @@ static int vlan_ethtool_get_settings(struct net_device *dev,
struct ethtool_cmd *cmd)
{
const struct vlan_dev_info *vlan = vlan_dev_info(dev);
- return dev_ethtool_get_settings(vlan->real_dev, cmd);
+
+ return __ethtool_get_settings(vlan->real_dev, cmd);
}
static void vlan_ethtool_get_drvinfo(struct net_device *dev,
@@ -674,7 +675,6 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = vlan_dev_set_mac_address,
.ndo_set_rx_mode = vlan_dev_set_rx_mode,
- .ndo_set_multicast_list = vlan_dev_set_rx_mode,
.ndo_change_rx_flags = vlan_dev_change_rx_flags,
.ndo_do_ioctl = vlan_dev_ioctl,
.ndo_neigh_setup = vlan_dev_neigh_setup,
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 175b513..e317583 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -263,7 +263,6 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
{
int in, out, inp, outp;
struct virtio_chan *chan = client->trans;
- char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
unsigned long flags;
size_t pdata_off = 0;
struct trans_rpage_info *rpinfo = NULL;
@@ -346,7 +345,8 @@ req_retry_pinned:
* Arrange in such a way that server places header in the
* alloced memory and payload onto the user buffer.
*/
- inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11);
+ inp = pack_sg_list(chan->sg, out,
+ VIRTQUEUE_NUM, req->rc->sdata, 11);
/*
* Running executables in the filesystem may result in
* a read request with kernel buffer as opposed to user buffer.
@@ -366,8 +366,8 @@ req_retry_pinned:
}
in += inp;
} else {
- in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata,
- req->rc->capacity);
+ in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM,
+ req->rc->sdata, req->rc->capacity);
}
err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
@@ -592,7 +592,14 @@ static struct p9_trans_module p9_virtio_trans = {
.close = p9_virtio_close,
.request = p9_virtio_request,
.cancel = p9_virtio_cancel,
- .maxsize = PAGE_SIZE*VIRTQUEUE_NUM,
+
+ /*
+ * We leave one entry for input and one entry for response
+ * headers. We also skip one more entry to accomodate, address
+ * that are not at page boundary, that can result in an extra
+ * page in zero copy.
+ */
+ .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3),
.pref = P9_TRANS_PREF_PAYLOAD_SEP,
.def = 0,
.owner = THIS_MODULE,
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 2252c20..d07223c 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -242,8 +242,6 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
if (brdev->payload == p_bridged) {
skb_push(skb, 2);
memset(skb->data, 0, 2);
- } else { /* p_routed */
- skb_pull(skb, ETH_HLEN);
}
}
skb_debug(skb);
@@ -560,12 +558,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
spin_unlock_irqrestore(&rq->lock, flags);
skb_queue_walk_safe(&queue, skb, tmp) {
- struct net_device *dev = skb->dev;
+ struct net_device *dev;
+
+ br2684_push(atmvcc, skb);
+ dev = skb->dev;
dev->stats.rx_bytes -= skb->len;
dev->stats.rx_packets--;
-
- br2684_push(atmvcc, skb);
}
/* initialize netdev carrier state */
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 215c9fa..f1964ca 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -643,7 +643,7 @@ static const struct net_device_ops lec_netdev_ops = {
.ndo_start_xmit = lec_start_xmit,
.ndo_change_mtu = lec_change_mtu,
.ndo_tx_timeout = lec_tx_timeout,
- .ndo_set_multicast_list = lec_set_multicast_list,
+ .ndo_set_rx_mode = lec_set_multicast_list,
};
static const unsigned char lec_ctrl_magic[] = {
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 2de93d0..ce68611 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -19,8 +19,8 @@
#
obj-$(CONFIG_BATMAN_ADV) += batman-adv.o
-batman-adv-y += aggregation.o
batman-adv-y += bat_debugfs.o
+batman-adv-y += bat_iv_ogm.o
batman-adv-y += bat_sysfs.o
batman-adv-y += bitarray.o
batman-adv-y += gateway_client.o
diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c
deleted file mode 100644
index 69467fe..0000000
--- a/net/batman-adv/aggregation.c
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
- *
- * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- *
- */
-
-#include "main.h"
-#include "translation-table.h"
-#include "aggregation.h"
-#include "send.h"
-#include "routing.h"
-#include "hard-interface.h"
-
-/* return true if new_packet can be aggregated with forw_packet */
-static bool can_aggregate_with(const struct batman_packet *new_batman_packet,
- struct bat_priv *bat_priv,
- int packet_len,
- unsigned long send_time,
- bool directlink,
- const struct hard_iface *if_incoming,
- const struct forw_packet *forw_packet)
-{
- struct batman_packet *batman_packet =
- (struct batman_packet *)forw_packet->skb->data;
- int aggregated_bytes = forw_packet->packet_len + packet_len;
- struct hard_iface *primary_if = NULL;
- bool res = false;
-
- /**
- * we can aggregate the current packet to this aggregated packet
- * if:
- *
- * - the send time is within our MAX_AGGREGATION_MS time
- * - the resulting packet wont be bigger than
- * MAX_AGGREGATION_BYTES
- */
-
- if (time_before(send_time, forw_packet->send_time) &&
- time_after_eq(send_time + msecs_to_jiffies(MAX_AGGREGATION_MS),
- forw_packet->send_time) &&
- (aggregated_bytes <= MAX_AGGREGATION_BYTES)) {
-
- /**
- * check aggregation compatibility
- * -> direct link packets are broadcasted on
- * their interface only
- * -> aggregate packet if the current packet is
- * a "global" packet as well as the base
- * packet
- */
-
- primary_if = primary_if_get_selected(bat_priv);
- if (!primary_if)
- goto out;
-
- /* packets without direct link flag and high TTL
- * are flooded through the net */
- if ((!directlink) &&
- (!(batman_packet->flags & DIRECTLINK)) &&
- (batman_packet->ttl != 1) &&
-
- /* own packets originating non-primary
- * interfaces leave only that interface */
- ((!forw_packet->own) ||
- (forw_packet->if_incoming == primary_if))) {
- res = true;
- goto out;
- }
-
- /* if the incoming packet is sent via this one
- * interface only - we still can aggregate */
- if ((directlink) &&
- (new_batman_packet->ttl == 1) &&
- (forw_packet->if_incoming == if_incoming) &&
-
- /* packets from direct neighbors or
- * own secondary interface packets
- * (= secondary interface packets in general) */
- (batman_packet->flags & DIRECTLINK ||
- (forw_packet->own &&
- forw_packet->if_incoming != primary_if))) {
- res = true;
- goto out;
- }
- }
-
-out:
- if (primary_if)
- hardif_free_ref(primary_if);
- return res;
-}
-
-/* create a new aggregated packet and add this packet to it */
-static void new_aggregated_packet(const unsigned char *packet_buff,
- int packet_len, unsigned long send_time,
- bool direct_link,
- struct hard_iface *if_incoming,
- int own_packet)
-{
- struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
- struct forw_packet *forw_packet_aggr;
- unsigned char *skb_buff;
-
- if (!atomic_inc_not_zero(&if_incoming->refcount))
- return;
-
- /* own packet should always be scheduled */
- if (!own_packet) {
- if (!atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
- bat_dbg(DBG_BATMAN, bat_priv,
- "batman packet queue full\n");
- goto out;
- }
- }
-
- forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC);
- if (!forw_packet_aggr) {
- if (!own_packet)
- atomic_inc(&bat_priv->batman_queue_left);
- goto out;
- }
-
- if ((atomic_read(&bat_priv->aggregated_ogms)) &&
- (packet_len < MAX_AGGREGATION_BYTES))
- forw_packet_aggr->skb = dev_alloc_skb(MAX_AGGREGATION_BYTES +
- sizeof(struct ethhdr));
- else
- forw_packet_aggr->skb = dev_alloc_skb(packet_len +
- sizeof(struct ethhdr));
-
- if (!forw_packet_aggr->skb) {
- if (!own_packet)
- atomic_inc(&bat_priv->batman_queue_left);
- kfree(forw_packet_aggr);
- goto out;
- }
- skb_reserve(forw_packet_aggr->skb, sizeof(struct ethhdr));
-
- INIT_HLIST_NODE(&forw_packet_aggr->list);
-
- skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
- forw_packet_aggr->packet_len = packet_len;
- memcpy(skb_buff, packet_buff, packet_len);
-
- forw_packet_aggr->own = own_packet;
- forw_packet_aggr->if_incoming = if_incoming;
- forw_packet_aggr->num_packets = 0;
- forw_packet_aggr->direct_link_flags = NO_FLAGS;
- forw_packet_aggr->send_time = send_time;
-
- /* save packet direct link flag status */
- if (direct_link)
- forw_packet_aggr->direct_link_flags |= 1;
-
- /* add new packet to packet list */
- spin_lock_bh(&bat_priv->forw_bat_list_lock);
- hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list);
- spin_unlock_bh(&bat_priv->forw_bat_list_lock);
-
- /* start timer for this packet */
- INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work,
- send_outstanding_bat_packet);
- queue_delayed_work(bat_event_workqueue,
- &forw_packet_aggr->delayed_work,
- send_time - jiffies);
-
- return;
-out:
- hardif_free_ref(if_incoming);
-}
-
-/* aggregate a new packet into the existing aggregation */
-static void aggregate(struct forw_packet *forw_packet_aggr,
- const unsigned char *packet_buff, int packet_len,
- bool direct_link)
-{
- unsigned char *skb_buff;
-
- skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
- memcpy(skb_buff, packet_buff, packet_len);
- forw_packet_aggr->packet_len += packet_len;
- forw_packet_aggr->num_packets++;
-
- /* save packet direct link flag status */
- if (direct_link)
- forw_packet_aggr->direct_link_flags |=
- (1 << forw_packet_aggr->num_packets);
-}
-
-void add_bat_packet_to_list(struct bat_priv *bat_priv,
- unsigned char *packet_buff, int packet_len,
- struct hard_iface *if_incoming, int own_packet,
- unsigned long send_time)
-{
- /**
- * _aggr -> pointer to the packet we want to aggregate with
- * _pos -> pointer to the position in the queue
- */
- struct forw_packet *forw_packet_aggr = NULL, *forw_packet_pos = NULL;
- struct hlist_node *tmp_node;
- struct batman_packet *batman_packet =
- (struct batman_packet *)packet_buff;
- bool direct_link = batman_packet->flags & DIRECTLINK ? 1 : 0;
-
- /* find position for the packet in the forward queue */
- spin_lock_bh(&bat_priv->forw_bat_list_lock);
- /* own packets are not to be aggregated */
- if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) {
- hlist_for_each_entry(forw_packet_pos, tmp_node,
- &bat_priv->forw_bat_list, list) {
- if (can_aggregate_with(batman_packet,
- bat_priv,
- packet_len,
- send_time,
- direct_link,
- if_incoming,
- forw_packet_pos)) {
- forw_packet_aggr = forw_packet_pos;
- break;
- }
- }
- }
-
- /* nothing to aggregate with - either aggregation disabled or no
- * suitable aggregation packet found */
- if (!forw_packet_aggr) {
- /* the following section can run without the lock */
- spin_unlock_bh(&bat_priv->forw_bat_list_lock);
-
- /**
- * if we could not aggregate this packet with one of the others
- * we hold it back for a while, so that it might be aggregated
- * later on
- */
- if ((!own_packet) &&
- (atomic_read(&bat_priv->aggregated_ogms)))
- send_time += msecs_to_jiffies(MAX_AGGREGATION_MS);
-
- new_aggregated_packet(packet_buff, packet_len,
- send_time, direct_link,
- if_incoming, own_packet);
- } else {
- aggregate(forw_packet_aggr,
- packet_buff, packet_len,
- direct_link);
- spin_unlock_bh(&bat_priv->forw_bat_list_lock);
- }
-}
-
-/* unpack the aggregated packets and process them one by one */
-void receive_aggr_bat_packet(const struct ethhdr *ethhdr,
- unsigned char *packet_buff, int packet_len,
- struct hard_iface *if_incoming)
-{
- struct batman_packet *batman_packet;
- int buff_pos = 0;
- unsigned char *tt_buff;
-
- batman_packet = (struct batman_packet *)packet_buff;
-
- do {
- /* network to host order for our 32bit seqno and the
- orig_interval */
- batman_packet->seqno = ntohl(batman_packet->seqno);
- batman_packet->tt_crc = ntohs(batman_packet->tt_crc);
-
- tt_buff = packet_buff + buff_pos + BAT_PACKET_LEN;
-
- receive_bat_packet(ethhdr, batman_packet, tt_buff, if_incoming);
-
- buff_pos += BAT_PACKET_LEN +
- tt_len(batman_packet->tt_num_changes);
-
- batman_packet = (struct batman_packet *)
- (packet_buff + buff_pos);
- } while (aggregated_packet(buff_pos, packet_len,
- batman_packet->tt_num_changes));
-}
diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h
deleted file mode 100644
index 216337b..0000000
--- a/net/batman-adv/aggregation.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
- *
- * Marek Lindner, Simon Wunderlich
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
- *
- */
-
-#ifndef _NET_BATMAN_ADV_AGGREGATION_H_
-#define _NET_BATMAN_ADV_AGGREGATION_H_
-
-#include "main.h"
-
-/* is there another aggregated packet here? */
-static inline int aggregated_packet(int buff_pos, int packet_len,
- int tt_num_changes)
-{
- int next_buff_pos = buff_pos + BAT_PACKET_LEN + (tt_num_changes *
- sizeof(struct tt_change));
-
- return (next_buff_pos <= packet_len) &&
- (next_buff_pos <= MAX_AGGREGATION_BYTES);
-}
-
-void add_bat_packet_to_list(struct bat_priv *bat_priv,
- unsigned char *packet_buff, int packet_len,
- struct hard_iface *if_incoming, int own_packet,
- unsigned long send_time);
-void receive_aggr_bat_packet(const struct ethhdr *ethhdr,
- unsigned char *packet_buff, int packet_len,
- struct hard_iface *if_incoming);
-
-#endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
new file mode 100644
index 0000000..3512e25
--- /dev/null
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -0,0 +1,1170 @@
+/*
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include "bat_ogm.h"
+#include "translation-table.h"
+#include "ring_buffer.h"
+#include "originator.h"
+#include "routing.h"
+#include "gateway_common.h"
+#include "gateway_client.h"
+#include "hard-interface.h"
+#include "send.h"
+
+void bat_ogm_init(struct hard_iface *hard_iface)
+{
+ struct batman_ogm_packet *batman_ogm_packet;
+
+ hard_iface->packet_len = BATMAN_OGM_LEN;
+ hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC);
+
+ batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff;
+ batman_ogm_packet->packet_type = BAT_OGM;
+ batman_ogm_packet->version = COMPAT_VERSION;
+ batman_ogm_packet->flags = NO_FLAGS;
+ batman_ogm_packet->ttl = 2;
+ batman_ogm_packet->tq = TQ_MAX_VALUE;
+ batman_ogm_packet->tt_num_changes = 0;
+ batman_ogm_packet->ttvn = 0;
+}
+
+void bat_ogm_init_primary(struct hard_iface *hard_iface)
+{
+ struct batman_ogm_packet *batman_ogm_packet;
+
+ batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff;
+ batman_ogm_packet->flags = PRIMARIES_FIRST_HOP;
+ batman_ogm_packet->ttl = TTL;
+}
+
+void bat_ogm_update_mac(struct hard_iface *hard_iface)
+{
+ struct batman_ogm_packet *batman_ogm_packet;
+
+ batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff;
+ memcpy(batman_ogm_packet->orig,
+ hard_iface->net_dev->dev_addr, ETH_ALEN);
+ memcpy(batman_ogm_packet->prev_sender,
+ hard_iface->net_dev->dev_addr, ETH_ALEN);
+}
+
+/* when do we schedule our own ogm to be sent */
+static unsigned long bat_ogm_emit_send_time(const struct bat_priv *bat_priv)
+{
+ return jiffies + msecs_to_jiffies(
+ atomic_read(&bat_priv->orig_interval) -
+ JITTER + (random32() % 2*JITTER));
+}
+
+/* when do we schedule a ogm packet to be sent */
+static unsigned long bat_ogm_fwd_send_time(void)
+{
+ return jiffies + msecs_to_jiffies(random32() % (JITTER/2));
+}
+
+/* apply hop penalty for a normal link */
+static uint8_t hop_penalty(uint8_t tq, const struct bat_priv *bat_priv)
+{
+ int hop_penalty = atomic_read(&bat_priv->hop_penalty);
+ return (tq * (TQ_MAX_VALUE - hop_penalty)) / (TQ_MAX_VALUE);
+}
+
+/* is there another aggregated packet here? */
+static int bat_ogm_aggr_packet(int buff_pos, int packet_len,
+ int tt_num_changes)
+{
+ int next_buff_pos = buff_pos + BATMAN_OGM_LEN + tt_len(tt_num_changes);
+
+ return (next_buff_pos <= packet_len) &&
+ (next_buff_pos <= MAX_AGGREGATION_BYTES);
+}
+
+/* send a batman ogm to a given interface */
+static void bat_ogm_send_to_if(struct forw_packet *forw_packet,
+ struct hard_iface *hard_iface)
+{
+ struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
+ char *fwd_str;
+ uint8_t packet_num;
+ int16_t buff_pos;
+ struct batman_ogm_packet *batman_ogm_packet;
+ struct sk_buff *skb;
+
+ if (hard_iface->if_status != IF_ACTIVE)
+ return;
+
+ packet_num = 0;
+ buff_pos = 0;
+ batman_ogm_packet = (struct batman_ogm_packet *)forw_packet->skb->data;
+
+ /* adjust all flags and log packets */
+ while (bat_ogm_aggr_packet(buff_pos, forw_packet->packet_len,
+ batman_ogm_packet->tt_num_changes)) {
+
+ /* we might have aggregated direct link packets with an
+ * ordinary base packet */
+ if ((forw_packet->direct_link_flags & (1 << packet_num)) &&
+ (forw_packet->if_incoming == hard_iface))
+ batman_ogm_packet->flags |= DIRECTLINK;
+ else
+ batman_ogm_packet->flags &= ~DIRECTLINK;
+
+ fwd_str = (packet_num > 0 ? "Forwarding" : (forw_packet->own ?
+ "Sending own" :
+ "Forwarding"));
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "%s %spacket (originator %pM, seqno %d, TQ %d, TTL %d,"
+ " IDF %s, ttvn %d) on interface %s [%pM]\n",
+ fwd_str, (packet_num > 0 ? "aggregated " : ""),
+ batman_ogm_packet->orig,
+ ntohl(batman_ogm_packet->seqno),
+ batman_ogm_packet->tq, batman_ogm_packet->ttl,
+ (batman_ogm_packet->flags & DIRECTLINK ?
+ "on" : "off"),
+ batman_ogm_packet->ttvn, hard_iface->net_dev->name,
+ hard_iface->net_dev->dev_addr);
+
+ buff_pos += BATMAN_OGM_LEN +
+ tt_len(batman_ogm_packet->tt_num_changes);
+ packet_num++;
+ batman_ogm_packet = (struct batman_ogm_packet *)
+ (forw_packet->skb->data + buff_pos);
+ }
+
+ /* create clone because function is called more than once */
+ skb = skb_clone(forw_packet->skb, GFP_ATOMIC);
+ if (skb)
+ send_skb_packet(skb, hard_iface, broadcast_addr);
+}
+
+/* send a batman ogm packet */
+void bat_ogm_emit(struct forw_packet *forw_packet)
+{
+ struct hard_iface *hard_iface;
+ struct net_device *soft_iface;
+ struct bat_priv *bat_priv;
+ struct hard_iface *primary_if = NULL;
+ struct batman_ogm_packet *batman_ogm_packet;
+ unsigned char directlink;
+
+ batman_ogm_packet = (struct batman_ogm_packet *)
+ (forw_packet->skb->data);
+ directlink = (batman_ogm_packet->flags & DIRECTLINK ? 1 : 0);
+
+ if (!forw_packet->if_incoming) {
+ pr_err("Error - can't forward packet: incoming iface not "
+ "specified\n");
+ goto out;
+ }
+
+ soft_iface = forw_packet->if_incoming->soft_iface;
+ bat_priv = netdev_priv(soft_iface);
+
+ if (forw_packet->if_incoming->if_status != IF_ACTIVE)
+ goto out;
+
+ primary_if = primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ goto out;
+
+ /* multihomed peer assumed */
+ /* non-primary OGMs are only broadcasted on their interface */
+ if ((directlink && (batman_ogm_packet->ttl == 1)) ||
+ (forw_packet->own && (forw_packet->if_incoming != primary_if))) {
+
+ /* FIXME: what about aggregated packets ? */
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "%s packet (originator %pM, seqno %d, TTL %d) "
+ "on interface %s [%pM]\n",
+ (forw_packet->own ? "Sending own" : "Forwarding"),
+ batman_ogm_packet->orig,
+ ntohl(batman_ogm_packet->seqno),
+ batman_ogm_packet->ttl,
+ forw_packet->if_incoming->net_dev->name,
+ forw_packet->if_incoming->net_dev->dev_addr);
+
+ /* skb is only used once and than forw_packet is free'd */
+ send_skb_packet(forw_packet->skb, forw_packet->if_incoming,
+ broadcast_addr);
+ forw_packet->skb = NULL;
+
+ goto out;
+ }
+
+ /* broadcast on every interface */
+ rcu_read_lock();
+ list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
+ if (hard_iface->soft_iface != soft_iface)
+ continue;
+
+ bat_ogm_send_to_if(forw_packet, hard_iface);
+ }
+ rcu_read_unlock();
+
+out:
+ if (primary_if)
+ hardif_free_ref(primary_if);
+}
+
+/* return true if new_packet can be aggregated with forw_packet */
+static bool bat_ogm_can_aggregate(const struct batman_ogm_packet
+ *new_batman_ogm_packet,
+ struct bat_priv *bat_priv,
+ int packet_len, unsigned long send_time,
+ bool directlink,
+ const struct hard_iface *if_incoming,
+ const struct forw_packet *forw_packet)
+{
+ struct batman_ogm_packet *batman_ogm_packet;
+ int aggregated_bytes = forw_packet->packet_len + packet_len;
+ struct hard_iface *primary_if = NULL;
+ bool res = false;
+
+ batman_ogm_packet = (struct batman_ogm_packet *)forw_packet->skb->data;
+
+ /**
+ * we can aggregate the current packet to this aggregated packet
+ * if:
+ *
+ * - the send time is within our MAX_AGGREGATION_MS time
+ * - the resulting packet wont be bigger than
+ * MAX_AGGREGATION_BYTES
+ */
+
+ if (time_before(send_time, forw_packet->send_time) &&
+ time_after_eq(send_time + msecs_to_jiffies(MAX_AGGREGATION_MS),
+ forw_packet->send_time) &&
+ (aggregated_bytes <= MAX_AGGREGATION_BYTES)) {
+
+ /**
+ * check aggregation compatibility
+ * -> direct link packets are broadcasted on
+ * their interface only
+ * -> aggregate packet if the current packet is
+ * a "global" packet as well as the base
+ * packet
+ */
+
+ primary_if = primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ goto out;
+
+ /* packets without direct link flag and high TTL
+ * are flooded through the net */
+ if ((!directlink) &&
+ (!(batman_ogm_packet->flags & DIRECTLINK)) &&
+ (batman_ogm_packet->ttl != 1) &&
+
+ /* own packets originating non-primary
+ * interfaces leave only that interface */
+ ((!forw_packet->own) ||
+ (forw_packet->if_incoming == primary_if))) {
+ res = true;
+ goto out;
+ }
+
+ /* if the incoming packet is sent via this one
+ * interface only - we still can aggregate */
+ if ((directlink) &&
+ (new_batman_ogm_packet->ttl == 1) &&
+ (forw_packet->if_incoming == if_incoming) &&
+
+ /* packets from direct neighbors or
+ * own secondary interface packets
+ * (= secondary interface packets in general) */
+ (batman_ogm_packet->flags & DIRECTLINK ||
+ (forw_packet->own &&
+ forw_packet->if_incoming != primary_if))) {
+ res = true;
+ goto out;
+ }
+ }
+
+out:
+ if (primary_if)
+ hardif_free_ref(primary_if);
+ return res;
+}
+
+/* create a new aggregated packet and add this packet to it */
+static void bat_ogm_aggregate_new(const unsigned char *packet_buff,
+ int packet_len, unsigned long send_time,
+ bool direct_link,
+ struct hard_iface *if_incoming,
+ int own_packet)
+{
+ struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+ struct forw_packet *forw_packet_aggr;
+ unsigned char *skb_buff;
+
+ if (!atomic_inc_not_zero(&if_incoming->refcount))
+ return;
+
+ /* own packet should always be scheduled */
+ if (!own_packet) {
+ if (!atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "batman packet queue full\n");
+ goto out;
+ }
+ }
+
+ forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC);
+ if (!forw_packet_aggr) {
+ if (!own_packet)
+ atomic_inc(&bat_priv->batman_queue_left);
+ goto out;
+ }
+
+ if ((atomic_read(&bat_priv->aggregated_ogms)) &&
+ (packet_len < MAX_AGGREGATION_BYTES))
+ forw_packet_aggr->skb = dev_alloc_skb(MAX_AGGREGATION_BYTES +
+ sizeof(struct ethhdr));
+ else
+ forw_packet_aggr->skb = dev_alloc_skb(packet_len +
+ sizeof(struct ethhdr));
+
+ if (!forw_packet_aggr->skb) {
+ if (!own_packet)
+ atomic_inc(&bat_priv->batman_queue_left);
+ kfree(forw_packet_aggr);
+ goto out;
+ }
+ skb_reserve(forw_packet_aggr->skb, sizeof(struct ethhdr));
+
+ INIT_HLIST_NODE(&forw_packet_aggr->list);
+
+ skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
+ forw_packet_aggr->packet_len = packet_len;
+ memcpy(skb_buff, packet_buff, packet_len);
+
+ forw_packet_aggr->own = own_packet;
+ forw_packet_aggr->if_incoming = if_incoming;
+ forw_packet_aggr->num_packets = 0;
+ forw_packet_aggr->direct_link_flags = NO_FLAGS;
+ forw_packet_aggr->send_time = send_time;
+
+ /* save packet direct link flag status */
+ if (direct_link)
+ forw_packet_aggr->direct_link_flags |= 1;
+
+ /* add new packet to packet list */
+ spin_lock_bh(&bat_priv->forw_bat_list_lock);
+ hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list);
+ spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+
+ /* start timer for this packet */
+ INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work,
+ send_outstanding_bat_ogm_packet);
+ queue_delayed_work(bat_event_workqueue,
+ &forw_packet_aggr->delayed_work,
+ send_time - jiffies);
+
+ return;
+out:
+ hardif_free_ref(if_incoming);
+}
+
+/* aggregate a new packet into the existing ogm packet */
+static void bat_ogm_aggregate(struct forw_packet *forw_packet_aggr,
+ const unsigned char *packet_buff,
+ int packet_len, bool direct_link)
+{
+ unsigned char *skb_buff;
+
+ skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
+ memcpy(skb_buff, packet_buff, packet_len);
+ forw_packet_aggr->packet_len += packet_len;
+ forw_packet_aggr->num_packets++;
+
+ /* save packet direct link flag status */
+ if (direct_link)
+ forw_packet_aggr->direct_link_flags |=
+ (1 << forw_packet_aggr->num_packets);
+}
+
+static void bat_ogm_queue_add(struct bat_priv *bat_priv,
+ unsigned char *packet_buff,
+ int packet_len, struct hard_iface *if_incoming,
+ int own_packet, unsigned long send_time)
+{
+ /**
+ * _aggr -> pointer to the packet we want to aggregate with
+ * _pos -> pointer to the position in the queue
+ */
+ struct forw_packet *forw_packet_aggr = NULL, *forw_packet_pos = NULL;
+ struct hlist_node *tmp_node;
+ struct batman_ogm_packet *batman_ogm_packet;
+ bool direct_link;
+
+ batman_ogm_packet = (struct batman_ogm_packet *)packet_buff;
+ direct_link = batman_ogm_packet->flags & DIRECTLINK ? 1 : 0;
+
+ /* find position for the packet in the forward queue */
+ spin_lock_bh(&bat_priv->forw_bat_list_lock);
+ /* own packets are not to be aggregated */
+ if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) {
+ hlist_for_each_entry(forw_packet_pos, tmp_node,
+ &bat_priv->forw_bat_list, list) {
+ if (bat_ogm_can_aggregate(batman_ogm_packet,
+ bat_priv, packet_len,
+ send_time, direct_link,
+ if_incoming,
+ forw_packet_pos)) {
+ forw_packet_aggr = forw_packet_pos;
+ break;
+ }
+ }
+ }
+
+ /* nothing to aggregate with - either aggregation disabled or no
+ * suitable aggregation packet found */
+ if (!forw_packet_aggr) {
+ /* the following section can run without the lock */
+ spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+
+ /**
+ * if we could not aggregate this packet with one of the others
+ * we hold it back for a while, so that it might be aggregated
+ * later on
+ */
+ if ((!own_packet) &&
+ (atomic_read(&bat_priv->aggregated_ogms)))
+ send_time += msecs_to_jiffies(MAX_AGGREGATION_MS);
+
+ bat_ogm_aggregate_new(packet_buff, packet_len,
+ send_time, direct_link,
+ if_incoming, own_packet);
+ } else {
+ bat_ogm_aggregate(forw_packet_aggr, packet_buff, packet_len,
+ direct_link);
+ spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+ }
+}
+
+static void bat_ogm_forward(struct orig_node *orig_node,
+ const struct ethhdr *ethhdr,
+ struct batman_ogm_packet *batman_ogm_packet,
+ int directlink, struct hard_iface *if_incoming)
+{
+ struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+ struct neigh_node *router;
+ uint8_t in_tq, in_ttl, tq_avg = 0;
+ uint8_t tt_num_changes;
+
+ if (batman_ogm_packet->ttl <= 1) {
+ bat_dbg(DBG_BATMAN, bat_priv, "ttl exceeded\n");
+ return;
+ }
+
+ router = orig_node_get_router(orig_node);
+
+ in_tq = batman_ogm_packet->tq;
+ in_ttl = batman_ogm_packet->ttl;
+ tt_num_changes = batman_ogm_packet->tt_num_changes;
+
+ batman_ogm_packet->ttl--;
+ memcpy(batman_ogm_packet->prev_sender, ethhdr->h_source, ETH_ALEN);
+
+ /* rebroadcast tq of our best ranking neighbor to ensure the rebroadcast
+ * of our best tq value */
+ if (router && router->tq_avg != 0) {
+
+ /* rebroadcast ogm of best ranking neighbor as is */
+ if (!compare_eth(router->addr, ethhdr->h_source)) {
+ batman_ogm_packet->tq = router->tq_avg;
+
+ if (router->last_ttl)
+ batman_ogm_packet->ttl = router->last_ttl - 1;
+ }
+
+ tq_avg = router->tq_avg;
+ }
+
+ if (router)
+ neigh_node_free_ref(router);
+
+ /* apply hop penalty */
+ batman_ogm_packet->tq = hop_penalty(batman_ogm_packet->tq, bat_priv);
+
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "Forwarding packet: tq_orig: %i, tq_avg: %i, "
+ "tq_forw: %i, ttl_orig: %i, ttl_forw: %i\n",
+ in_tq, tq_avg, batman_ogm_packet->tq, in_ttl - 1,
+ batman_ogm_packet->ttl);
+
+ batman_ogm_packet->seqno = htonl(batman_ogm_packet->seqno);
+ batman_ogm_packet->tt_crc = htons(batman_ogm_packet->tt_crc);
+
+ /* switch of primaries first hop flag when forwarding */
+ batman_ogm_packet->flags &= ~PRIMARIES_FIRST_HOP;
+ if (directlink)
+ batman_ogm_packet->flags |= DIRECTLINK;
+ else
+ batman_ogm_packet->flags &= ~DIRECTLINK;
+
+ bat_ogm_queue_add(bat_priv, (unsigned char *)batman_ogm_packet,
+ BATMAN_OGM_LEN + tt_len(tt_num_changes),
+ if_incoming, 0, bat_ogm_fwd_send_time());
+}
+
+void bat_ogm_schedule(struct hard_iface *hard_iface, int tt_num_changes)
+{
+ struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
+ struct batman_ogm_packet *batman_ogm_packet;
+ struct hard_iface *primary_if;
+ int vis_server;
+
+ vis_server = atomic_read(&bat_priv->vis_mode);
+ primary_if = primary_if_get_selected(bat_priv);
+
+ batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff;
+
+ /* change sequence number to network order */
+ batman_ogm_packet->seqno =
+ htonl((uint32_t)atomic_read(&hard_iface->seqno));
+
+ batman_ogm_packet->ttvn = atomic_read(&bat_priv->ttvn);
+ batman_ogm_packet->tt_crc = htons((uint16_t)
+ atomic_read(&bat_priv->tt_crc));
+ if (tt_num_changes >= 0)
+ batman_ogm_packet->tt_num_changes = tt_num_changes;
+
+ if (vis_server == VIS_TYPE_SERVER_SYNC)
+ batman_ogm_packet->flags |= VIS_SERVER;
+ else
+ batman_ogm_packet->flags &= ~VIS_SERVER;
+
+ if ((hard_iface == primary_if) &&
+ (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER))
+ batman_ogm_packet->gw_flags =
+ (uint8_t)atomic_read(&bat_priv->gw_bandwidth);
+ else
+ batman_ogm_packet->gw_flags = NO_FLAGS;
+
+ atomic_inc(&hard_iface->seqno);
+
+ slide_own_bcast_window(hard_iface);
+ bat_ogm_queue_add(bat_priv, hard_iface->packet_buff,
+ hard_iface->packet_len, hard_iface, 1,
+ bat_ogm_emit_send_time(bat_priv));
+
+ if (primary_if)
+ hardif_free_ref(primary_if);
+}
+
+static void bat_ogm_orig_update(struct bat_priv *bat_priv,
+ struct orig_node *orig_node,
+ const struct ethhdr *ethhdr,
+ const struct batman_ogm_packet
+ *batman_ogm_packet,
+ struct hard_iface *if_incoming,
+ const unsigned char *tt_buff, int is_duplicate)
+{
+ struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL;
+ struct neigh_node *router = NULL;
+ struct orig_node *orig_node_tmp;
+ struct hlist_node *node;
+ uint8_t bcast_own_sum_orig, bcast_own_sum_neigh;
+
+ bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): "
+ "Searching and updating originator entry of received packet\n");
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tmp_neigh_node, node,
+ &orig_node->neigh_list, list) {
+ if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) &&
+ (tmp_neigh_node->if_incoming == if_incoming) &&
+ atomic_inc_not_zero(&tmp_neigh_node->refcount)) {
+ if (neigh_node)
+ neigh_node_free_ref(neigh_node);
+ neigh_node = tmp_neigh_node;
+ continue;
+ }
+
+ if (is_duplicate)
+ continue;
+
+ spin_lock_bh(&tmp_neigh_node->tq_lock);
+ ring_buffer_set(tmp_neigh_node->tq_recv,
+ &tmp_neigh_node->tq_index, 0);
+ tmp_neigh_node->tq_avg =
+ ring_buffer_avg(tmp_neigh_node->tq_recv);
+ spin_unlock_bh(&tmp_neigh_node->tq_lock);
+ }
+
+ if (!neigh_node) {
+ struct orig_node *orig_tmp;
+
+ orig_tmp = get_orig_node(bat_priv, ethhdr->h_source);
+ if (!orig_tmp)
+ goto unlock;
+
+ neigh_node = create_neighbor(orig_node, orig_tmp,
+ ethhdr->h_source, if_incoming);
+
+ orig_node_free_ref(orig_tmp);
+ if (!neigh_node)
+ goto unlock;
+ } else
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "Updating existing last-hop neighbor of originator\n");
+
+ rcu_read_unlock();
+
+ orig_node->flags = batman_ogm_packet->flags;
+ neigh_node->last_valid = jiffies;
+
+ spin_lock_bh(&neigh_node->tq_lock);
+ ring_buffer_set(neigh_node->tq_recv,
+ &neigh_node->tq_index,
+ batman_ogm_packet->tq);
+ neigh_node->tq_avg = ring_buffer_avg(neigh_node->tq_recv);
+ spin_unlock_bh(&neigh_node->tq_lock);
+
+ if (!is_duplicate) {
+ orig_node->last_ttl = batman_ogm_packet->ttl;
+ neigh_node->last_ttl = batman_ogm_packet->ttl;
+ }
+
+ bonding_candidate_add(orig_node, neigh_node);
+
+ /* if this neighbor already is our next hop there is nothing
+ * to change */
+ router = orig_node_get_router(orig_node);
+ if (router == neigh_node)
+ goto update_tt;
+
+ /* if this neighbor does not offer a better TQ we won't consider it */
+ if (router && (router->tq_avg > neigh_node->tq_avg))
+ goto update_tt;
+
+ /* if the TQ is the same and the link not more symmetric we
+ * won't consider it either */
+ if (router && (neigh_node->tq_avg == router->tq_avg)) {
+ orig_node_tmp = router->orig_node;
+ spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
+ bcast_own_sum_orig =
+ orig_node_tmp->bcast_own_sum[if_incoming->if_num];
+ spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
+
+ orig_node_tmp = neigh_node->orig_node;
+ spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
+ bcast_own_sum_neigh =
+ orig_node_tmp->bcast_own_sum[if_incoming->if_num];
+ spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
+
+ if (bcast_own_sum_orig >= bcast_own_sum_neigh)
+ goto update_tt;
+ }
+
+ update_route(bat_priv, orig_node, neigh_node);
+
+update_tt:
+ /* I have to check for transtable changes only if the OGM has been
+ * sent through a primary interface */
+ if (((batman_ogm_packet->orig != ethhdr->h_source) &&
+ (batman_ogm_packet->ttl > 2)) ||
+ (batman_ogm_packet->flags & PRIMARIES_FIRST_HOP))
+ tt_update_orig(bat_priv, orig_node, tt_buff,
+ batman_ogm_packet->tt_num_changes,
+ batman_ogm_packet->ttvn,
+ batman_ogm_packet->tt_crc);
+
+ if (orig_node->gw_flags != batman_ogm_packet->gw_flags)
+ gw_node_update(bat_priv, orig_node,
+ batman_ogm_packet->gw_flags);
+
+ orig_node->gw_flags = batman_ogm_packet->gw_flags;
+
+ /* restart gateway selection if fast or late switching was enabled */
+ if ((orig_node->gw_flags) &&
+ (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) &&
+ (atomic_read(&bat_priv->gw_sel_class) > 2))
+ gw_check_election(bat_priv, orig_node);
+
+ goto out;
+
+unlock:
+ rcu_read_unlock();
+out:
+ if (neigh_node)
+ neigh_node_free_ref(neigh_node);
+ if (router)
+ neigh_node_free_ref(router);
+}
+
+static int bat_ogm_calc_tq(struct orig_node *orig_node,
+ struct orig_node *orig_neigh_node,
+ struct batman_ogm_packet *batman_ogm_packet,
+ struct hard_iface *if_incoming)
+{
+ struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+ struct neigh_node *neigh_node = NULL, *tmp_neigh_node;
+ struct hlist_node *node;
+ uint8_t total_count;
+ uint8_t orig_eq_count, neigh_rq_count, tq_own;
+ int tq_asym_penalty, ret = 0;
+
+ /* find corresponding one hop neighbor */
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tmp_neigh_node, node,
+ &orig_neigh_node->neigh_list, list) {
+
+ if (!compare_eth(tmp_neigh_node->addr, orig_neigh_node->orig))
+ continue;
+
+ if (tmp_neigh_node->if_incoming != if_incoming)
+ continue;
+
+ if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
+ continue;
+
+ neigh_node = tmp_neigh_node;
+ break;
+ }
+ rcu_read_unlock();
+
+ if (!neigh_node)
+ neigh_node = create_neighbor(orig_neigh_node,
+ orig_neigh_node,
+ orig_neigh_node->orig,
+ if_incoming);
+
+ if (!neigh_node)
+ goto out;
+
+ /* if orig_node is direct neighbor update neigh_node last_valid */
+ if (orig_node == orig_neigh_node)
+ neigh_node->last_valid = jiffies;
+
+ orig_node->last_valid = jiffies;
+
+ /* find packet count of corresponding one hop neighbor */
+ spin_lock_bh(&orig_node->ogm_cnt_lock);
+ orig_eq_count = orig_neigh_node->bcast_own_sum[if_incoming->if_num];
+ neigh_rq_count = neigh_node->real_packet_count;
+ spin_unlock_bh(&orig_node->ogm_cnt_lock);
+
+ /* pay attention to not get a value bigger than 100 % */
+ total_count = (orig_eq_count > neigh_rq_count ?
+ neigh_rq_count : orig_eq_count);
+
+ /* if we have too few packets (too less data) we set tq_own to zero */
+ /* if we receive too few packets it is not considered bidirectional */
+ if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) ||
+ (neigh_rq_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM))
+ tq_own = 0;
+ else
+ /* neigh_node->real_packet_count is never zero as we
+ * only purge old information when getting new
+ * information */
+ tq_own = (TQ_MAX_VALUE * total_count) / neigh_rq_count;
+
+ /*
+ * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does
+ * affect the nearly-symmetric links only a little, but
+ * punishes asymmetric links more. This will give a value
+ * between 0 and TQ_MAX_VALUE
+ */
+ tq_asym_penalty = TQ_MAX_VALUE - (TQ_MAX_VALUE *
+ (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) *
+ (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) *
+ (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count)) /
+ (TQ_LOCAL_WINDOW_SIZE *
+ TQ_LOCAL_WINDOW_SIZE *
+ TQ_LOCAL_WINDOW_SIZE);
+
+ batman_ogm_packet->tq = ((batman_ogm_packet->tq * tq_own
+ * tq_asym_penalty) /
+ (TQ_MAX_VALUE * TQ_MAX_VALUE));
+
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "bidirectional: "
+ "orig = %-15pM neigh = %-15pM => own_bcast = %2i, "
+ "real recv = %2i, local tq: %3i, asym_penalty: %3i, "
+ "total tq: %3i\n",
+ orig_node->orig, orig_neigh_node->orig, total_count,
+ neigh_rq_count, tq_own, tq_asym_penalty, batman_ogm_packet->tq);
+
+ /* if link has the minimum required transmission quality
+ * consider it bidirectional */
+ if (batman_ogm_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT)
+ ret = 1;
+
+out:
+ if (neigh_node)
+ neigh_node_free_ref(neigh_node);
+ return ret;
+}
+
+/* processes a batman packet for all interfaces, adjusts the sequence number and
+ * finds out whether it is a duplicate.
+ * returns:
+ * 1 the packet is a duplicate
+ * 0 the packet has not yet been received
+ * -1 the packet is old and has been received while the seqno window
+ * was protected. Caller should drop it.
+ */
+static int bat_ogm_update_seqnos(const struct ethhdr *ethhdr,
+ const struct batman_ogm_packet
+ *batman_ogm_packet,
+ const struct hard_iface *if_incoming)
+{
+ struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+ struct orig_node *orig_node;
+ struct neigh_node *tmp_neigh_node;
+ struct hlist_node *node;
+ int is_duplicate = 0;
+ int32_t seq_diff;
+ int need_update = 0;
+ int set_mark, ret = -1;
+
+ orig_node = get_orig_node(bat_priv, batman_ogm_packet->orig);
+ if (!orig_node)
+ return 0;
+
+ spin_lock_bh(&orig_node->ogm_cnt_lock);
+ seq_diff = batman_ogm_packet->seqno - orig_node->last_real_seqno;
+
+ /* signalize caller that the packet is to be dropped. */
+ if (window_protected(bat_priv, seq_diff,
+ &orig_node->batman_seqno_reset))
+ goto out;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tmp_neigh_node, node,
+ &orig_node->neigh_list, list) {
+
+ is_duplicate |= get_bit_status(tmp_neigh_node->real_bits,
+ orig_node->last_real_seqno,
+ batman_ogm_packet->seqno);
+
+ if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) &&
+ (tmp_neigh_node->if_incoming == if_incoming))
+ set_mark = 1;
+ else
+ set_mark = 0;
+
+ /* if the window moved, set the update flag. */
+ need_update |= bit_get_packet(bat_priv,
+ tmp_neigh_node->real_bits,
+ seq_diff, set_mark);
+
+ tmp_neigh_node->real_packet_count =
+ bit_packet_count(tmp_neigh_node->real_bits);
+ }
+ rcu_read_unlock();
+
+ if (need_update) {
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "updating last_seqno: old %d, new %d\n",
+ orig_node->last_real_seqno, batman_ogm_packet->seqno);
+ orig_node->last_real_seqno = batman_ogm_packet->seqno;
+ }
+
+ ret = is_duplicate;
+
+out:
+ spin_unlock_bh(&orig_node->ogm_cnt_lock);
+ orig_node_free_ref(orig_node);
+ return ret;
+}
+
+static void bat_ogm_process(const struct ethhdr *ethhdr,
+ struct batman_ogm_packet *batman_ogm_packet,
+ const unsigned char *tt_buff,
+ struct hard_iface *if_incoming)
+{
+ struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+ struct hard_iface *hard_iface;
+ struct orig_node *orig_neigh_node, *orig_node;
+ struct neigh_node *router = NULL, *router_router = NULL;
+ struct neigh_node *orig_neigh_router = NULL;
+ int has_directlink_flag;
+ int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0;
+ int is_broadcast = 0, is_bidirectional, is_single_hop_neigh;
+ int is_duplicate;
+ uint32_t if_incoming_seqno;
+
+ /* Silently drop when the batman packet is actually not a
+ * correct packet.
+ *
+ * This might happen if a packet is padded (e.g. Ethernet has a
+ * minimum frame length of 64 byte) and the aggregation interprets
+ * it as an additional length.
+ *
+ * TODO: A more sane solution would be to have a bit in the
+ * batman_ogm_packet to detect whether the packet is the last
+ * packet in an aggregation. Here we expect that the padding
+ * is always zero (or not 0x01)
+ */
+ if (batman_ogm_packet->packet_type != BAT_OGM)
+ return;
+
+ /* could be changed by schedule_own_packet() */
+ if_incoming_seqno = atomic_read(&if_incoming->seqno);
+
+ has_directlink_flag = (batman_ogm_packet->flags & DIRECTLINK ? 1 : 0);
+
+ is_single_hop_neigh = (compare_eth(ethhdr->h_source,
+ batman_ogm_packet->orig) ? 1 : 0);
+
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "Received BATMAN packet via NB: %pM, IF: %s [%pM] "
+ "(from OG: %pM, via prev OG: %pM, seqno %d, ttvn %u, "
+ "crc %u, changes %u, td %d, TTL %d, V %d, IDF %d)\n",
+ ethhdr->h_source, if_incoming->net_dev->name,
+ if_incoming->net_dev->dev_addr, batman_ogm_packet->orig,
+ batman_ogm_packet->prev_sender, batman_ogm_packet->seqno,
+ batman_ogm_packet->ttvn, batman_ogm_packet->tt_crc,
+ batman_ogm_packet->tt_num_changes, batman_ogm_packet->tq,
+ batman_ogm_packet->ttl, batman_ogm_packet->version,
+ has_directlink_flag);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
+ if (hard_iface->if_status != IF_ACTIVE)
+ continue;
+
+ if (hard_iface->soft_iface != if_incoming->soft_iface)
+ continue;
+
+ if (compare_eth(ethhdr->h_source,
+ hard_iface->net_dev->dev_addr))
+ is_my_addr = 1;
+
+ if (compare_eth(batman_ogm_packet->orig,
+ hard_iface->net_dev->dev_addr))
+ is_my_orig = 1;
+
+ if (compare_eth(batman_ogm_packet->prev_sender,
+ hard_iface->net_dev->dev_addr))
+ is_my_oldorig = 1;
+
+ if (is_broadcast_ether_addr(ethhdr->h_source))
+ is_broadcast = 1;
+ }
+ rcu_read_unlock();
+
+ if (batman_ogm_packet->version != COMPAT_VERSION) {
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "Drop packet: incompatible batman version (%i)\n",
+ batman_ogm_packet->version);
+ return;
+ }
+
+ if (is_my_addr) {
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "Drop packet: received my own broadcast (sender: %pM"
+ ")\n",
+ ethhdr->h_source);
+ return;
+ }
+
+ if (is_broadcast) {
+ bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: "
+ "ignoring all packets with broadcast source addr (sender: %pM"
+ ")\n", ethhdr->h_source);
+ return;
+ }
+
+ if (is_my_orig) {
+ unsigned long *word;
+ int offset;
+
+ orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source);
+ if (!orig_neigh_node)
+ return;
+
+ /* neighbor has to indicate direct link and it has to
+ * come via the corresponding interface */
+ /* save packet seqno for bidirectional check */
+ if (has_directlink_flag &&
+ compare_eth(if_incoming->net_dev->dev_addr,
+ batman_ogm_packet->orig)) {
+ offset = if_incoming->if_num * NUM_WORDS;
+
+ spin_lock_bh(&orig_neigh_node->ogm_cnt_lock);
+ word = &(orig_neigh_node->bcast_own[offset]);
+ bit_mark(word,
+ if_incoming_seqno -
+ batman_ogm_packet->seqno - 2);
+ orig_neigh_node->bcast_own_sum[if_incoming->if_num] =
+ bit_packet_count(word);
+ spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock);
+ }
+
+ bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: "
+ "originator packet from myself (via neighbor)\n");
+ orig_node_free_ref(orig_neigh_node);
+ return;
+ }
+
+ if (is_my_oldorig) {
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "Drop packet: ignoring all rebroadcast echos (sender: "
+ "%pM)\n", ethhdr->h_source);
+ return;
+ }
+
+ orig_node = get_orig_node(bat_priv, batman_ogm_packet->orig);
+ if (!orig_node)
+ return;
+
+ is_duplicate = bat_ogm_update_seqnos(ethhdr, batman_ogm_packet,
+ if_incoming);
+
+ if (is_duplicate == -1) {
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "Drop packet: packet within seqno protection time "
+ "(sender: %pM)\n", ethhdr->h_source);
+ goto out;
+ }
+
+ if (batman_ogm_packet->tq == 0) {
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "Drop packet: originator packet with tq equal 0\n");
+ goto out;
+ }
+
+ router = orig_node_get_router(orig_node);
+ if (router)
+ router_router = orig_node_get_router(router->orig_node);
+
+ /* avoid temporary routing loops */
+ if (router && router_router &&
+ (compare_eth(router->addr, batman_ogm_packet->prev_sender)) &&
+ !(compare_eth(batman_ogm_packet->orig,
+ batman_ogm_packet->prev_sender)) &&
+ (compare_eth(router->addr, router_router->addr))) {
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "Drop packet: ignoring all rebroadcast packets that "
+ "may make me loop (sender: %pM)\n", ethhdr->h_source);
+ goto out;
+ }
+
+ /* if sender is a direct neighbor the sender mac equals
+ * originator mac */
+ orig_neigh_node = (is_single_hop_neigh ?
+ orig_node :
+ get_orig_node(bat_priv, ethhdr->h_source));
+ if (!orig_neigh_node)
+ goto out;
+
+ orig_neigh_router = orig_node_get_router(orig_neigh_node);
+
+ /* drop packet if sender is not a direct neighbor and if we
+ * don't route towards it */
+ if (!is_single_hop_neigh && (!orig_neigh_router)) {
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "Drop packet: OGM via unknown neighbor!\n");
+ goto out_neigh;
+ }
+
+ is_bidirectional = bat_ogm_calc_tq(orig_node, orig_neigh_node,
+ batman_ogm_packet, if_incoming);
+
+ bonding_save_primary(orig_node, orig_neigh_node, batman_ogm_packet);
+
+ /* update ranking if it is not a duplicate or has the same
+ * seqno and similar ttl as the non-duplicate */
+ if (is_bidirectional &&
+ (!is_duplicate ||
+ ((orig_node->last_real_seqno == batman_ogm_packet->seqno) &&
+ (orig_node->last_ttl - 3 <= batman_ogm_packet->ttl))))
+ bat_ogm_orig_update(bat_priv, orig_node, ethhdr,
+ batman_ogm_packet, if_incoming,
+ tt_buff, is_duplicate);
+
+ /* is single hop (direct) neighbor */
+ if (is_single_hop_neigh) {
+
+ /* mark direct link on incoming interface */
+ bat_ogm_forward(orig_node, ethhdr, batman_ogm_packet,
+ 1, if_incoming);
+
+ bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: "
+ "rebroadcast neighbor packet with direct link flag\n");
+ goto out_neigh;
+ }
+
+ /* multihop originator */
+ if (!is_bidirectional) {
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "Drop packet: not received via bidirectional link\n");
+ goto out_neigh;
+ }
+
+ if (is_duplicate) {
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "Drop packet: duplicate packet received\n");
+ goto out_neigh;
+ }
+
+ bat_dbg(DBG_BATMAN, bat_priv,
+ "Forwarding packet: rebroadcast originator packet\n");
+ bat_ogm_forward(orig_node, ethhdr, batman_ogm_packet, 0, if_incoming);
+
+out_neigh:
+ if ((orig_neigh_node) && (!is_single_hop_neigh))
+ orig_node_free_ref(orig_neigh_node);
+out:
+ if (router)
+ neigh_node_free_ref(router);
+ if (router_router)
+ neigh_node_free_ref(router_router);
+ if (orig_neigh_router)
+ neigh_node_free_ref(orig_neigh_router);
+
+ orig_node_free_ref(orig_node);
+}
+
+void bat_ogm_receive(const struct ethhdr *ethhdr, unsigned char *packet_buff,
+ int packet_len, struct hard_iface *if_incoming)
+{
+ struct batman_ogm_packet *batman_ogm_packet;
+ int buff_pos = 0;
+ unsigned char *tt_buff;
+
+ batman_ogm_packet = (struct batman_ogm_packet *)packet_buff;
+
+ /* unpack the aggregated packets and process them one by one */
+ do {
+ /* network to host order for our 32bit seqno and the
+ orig_interval */
+ batman_ogm_packet->seqno = ntohl(batman_ogm_packet->seqno);
+ batman_ogm_packet->tt_crc = ntohs(batman_ogm_packet->tt_crc);
+
+ tt_buff = packet_buff + buff_pos + BATMAN_OGM_LEN;
+
+ bat_ogm_process(ethhdr, batman_ogm_packet,
+ tt_buff, if_incoming);
+
+ buff_pos += BATMAN_OGM_LEN +
+ tt_len(batman_ogm_packet->tt_num_changes);
+
+ batman_ogm_packet = (struct batman_ogm_packet *)
+ (packet_buff + buff_pos);
+ } while (bat_ogm_aggr_packet(buff_pos, packet_len,
+ batman_ogm_packet->tt_num_changes));
+}
diff --git a/net/batman-adv/bat_ogm.h b/net/batman-adv/bat_ogm.h
new file mode 100644
index 0000000..69329c1
--- /dev/null
+++ b/net/batman-adv/bat_ogm.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_OGM_H_
+#define _NET_BATMAN_ADV_OGM_H_
+
+#include "main.h"
+
+void bat_ogm_init(struct hard_iface *hard_iface);
+void bat_ogm_init_primary(struct hard_iface *hard_iface);
+void bat_ogm_update_mac(struct hard_iface *hard_iface);
+void bat_ogm_schedule(struct hard_iface *hard_iface, int tt_num_changes);
+void bat_ogm_emit(struct forw_packet *forw_packet);
+void bat_ogm_receive(const struct ethhdr *ethhdr, unsigned char *packet_buff,
+ int packet_len, struct hard_iface *if_incoming);
+
+#endif /* _NET_BATMAN_ADV_OGM_H_ */
diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c
index cd15deb..b8a7414 100644
--- a/net/batman-adv/bat_sysfs.c
+++ b/net/batman-adv/bat_sysfs.c
@@ -380,6 +380,7 @@ static ssize_t store_gw_bwidth(struct kobject *kobj, struct attribute *attr,
BAT_ATTR_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL);
BAT_ATTR_BOOL(bonding, S_IRUGO | S_IWUSR, NULL);
BAT_ATTR_BOOL(fragmentation, S_IRUGO | S_IWUSR, update_min_mtu);
+BAT_ATTR_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL);
static BAT_ATTR(vis_mode, S_IRUGO | S_IWUSR, show_vis_mode, store_vis_mode);
static BAT_ATTR(gw_mode, S_IRUGO | S_IWUSR, show_gw_mode, store_gw_mode);
BAT_ATTR_UINT(orig_interval, S_IRUGO | S_IWUSR, 2 * JITTER, INT_MAX, NULL);
@@ -396,6 +397,7 @@ static struct bat_attribute *mesh_attrs[] = {
&bat_attr_aggregated_ogms,
&bat_attr_bonding,
&bat_attr_fragmentation,
+ &bat_attr_ap_isolation,
&bat_attr_vis_mode,
&bat_attr_gw_mode,
&bat_attr_orig_interval,
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index c1f4bfc..0be9ff3 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -97,12 +97,12 @@ static void bit_shift(unsigned long *seq_bits, int32_t n)
(seq_bits[i - word_num - 1] >>
(WORD_BIT_SIZE-word_offset));
/* and the upper part of the right half and shift it left to
- * it's position */
+ * its position */
/* for our example that would be: word[0] = 9800 + 0076 =
* 9876 */
}
- /* now for our last word, i==word_num, we only have the it's "left"
- * half. that's the 1000 word in our example.*/
+ /* now for our last word, i==word_num, we only have its "left" half.
+ * that's the 1000 word in our example.*/
seq_bits[i] = (seq_bits[i - word_num] << word_offset);
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 056180e..619fb73 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -532,14 +532,14 @@ static bool is_type_dhcprequest(struct sk_buff *skb, int header_len)
pkt_len -= header_len + DHCP_OPTIONS_OFFSET + 1;
/* Access the dhcp option lists. Each entry is made up by:
- * - octect 1: option type
- * - octect 2: option data len (only if type != 255 and 0)
- * - octect 3: option data */
+ * - octet 1: option type
+ * - octet 2: option data len (only if type != 255 and 0)
+ * - octet 3: option data */
while (*p != 255 && !ret) {
- /* p now points to the first octect: option type */
+ /* p now points to the first octet: option type */
if (*p == 53) {
/* type 53 is the message type option.
- * Jump the len octect and go to the data octect */
+ * Jump the len octet and go to the data octet */
if (pkt_len < 2)
goto out;
p += 2;
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index db7aacf..7704df4 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -28,6 +28,7 @@
#include "bat_sysfs.h"
#include "originator.h"
#include "hash.h"
+#include "bat_ogm.h"
#include <linux/if_arp.h>
@@ -131,7 +132,6 @@ static void primary_if_select(struct bat_priv *bat_priv,
struct hard_iface *new_hard_iface)
{
struct hard_iface *curr_hard_iface;
- struct batman_packet *batman_packet;
ASSERT_RTNL();
@@ -147,10 +147,7 @@ static void primary_if_select(struct bat_priv *bat_priv,
if (!new_hard_iface)
return;
- batman_packet = (struct batman_packet *)(new_hard_iface->packet_buff);
- batman_packet->flags = PRIMARIES_FIRST_HOP;
- batman_packet->ttl = TTL;
-
+ bat_ogm_init_primary(new_hard_iface);
primary_if_update_addr(bat_priv);
}
@@ -162,14 +159,6 @@ static bool hardif_is_iface_up(const struct hard_iface *hard_iface)
return false;
}
-static void update_mac_addresses(struct hard_iface *hard_iface)
-{
- memcpy(((struct batman_packet *)(hard_iface->packet_buff))->orig,
- hard_iface->net_dev->dev_addr, ETH_ALEN);
- memcpy(((struct batman_packet *)(hard_iface->packet_buff))->prev_sender,
- hard_iface->net_dev->dev_addr, ETH_ALEN);
-}
-
static void check_known_mac_addr(const struct net_device *net_dev)
{
const struct hard_iface *hard_iface;
@@ -244,12 +233,12 @@ static void hardif_activate_interface(struct hard_iface *hard_iface)
bat_priv = netdev_priv(hard_iface->soft_iface);
- update_mac_addresses(hard_iface);
+ bat_ogm_update_mac(hard_iface);
hard_iface->if_status = IF_TO_BE_ACTIVATED;
/**
* the first active interface becomes our primary interface or
- * the next active interface after the old primay interface was removed
+ * the next active interface after the old primary interface was removed
*/
primary_if = primary_if_get_selected(bat_priv);
if (!primary_if)
@@ -283,7 +272,6 @@ int hardif_enable_interface(struct hard_iface *hard_iface,
const char *iface_name)
{
struct bat_priv *bat_priv;
- struct batman_packet *batman_packet;
struct net_device *soft_iface;
int ret;
@@ -318,8 +306,8 @@ int hardif_enable_interface(struct hard_iface *hard_iface,
hard_iface->soft_iface = soft_iface;
bat_priv = netdev_priv(hard_iface->soft_iface);
- hard_iface->packet_len = BAT_PACKET_LEN;
- hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC);
+
+ bat_ogm_init(hard_iface);
if (!hard_iface->packet_buff) {
bat_err(hard_iface->soft_iface, "Can't add interface packet "
@@ -328,15 +316,6 @@ int hardif_enable_interface(struct hard_iface *hard_iface,
goto err;
}
- batman_packet = (struct batman_packet *)(hard_iface->packet_buff);
- batman_packet->packet_type = BAT_PACKET;
- batman_packet->version = COMPAT_VERSION;
- batman_packet->flags = NO_FLAGS;
- batman_packet->ttl = 2;
- batman_packet->tq = TQ_MAX_VALUE;
- batman_packet->tt_num_changes = 0;
- batman_packet->ttvn = 0;
-
hard_iface->if_num = bat_priv->num_ifaces;
bat_priv->num_ifaces++;
hard_iface->if_status = IF_INACTIVE;
@@ -381,7 +360,7 @@ int hardif_enable_interface(struct hard_iface *hard_iface,
hard_iface->net_dev->name);
/* begin scheduling originator messages on that interface */
- schedule_own_packet(hard_iface);
+ schedule_bat_ogm(hard_iface);
out:
return 0;
@@ -455,11 +434,8 @@ static struct hard_iface *hardif_add_interface(struct net_device *net_dev)
dev_hold(net_dev);
hard_iface = kmalloc(sizeof(*hard_iface), GFP_ATOMIC);
- if (!hard_iface) {
- pr_err("Can't add interface (%s): out of memory\n",
- net_dev->name);
+ if (!hard_iface)
goto release_dev;
- }
ret = sysfs_add_hardif(&hard_iface->hardif_obj, net_dev);
if (ret)
@@ -551,7 +527,7 @@ static int hard_if_event(struct notifier_block *this,
goto hardif_put;
check_known_mac_addr(hard_iface->net_dev);
- update_mac_addresses(hard_iface);
+ bat_ogm_update_mac(hard_iface);
bat_priv = netdev_priv(hard_iface->soft_iface);
primary_if = primary_if_get_selected(bat_priv);
@@ -573,14 +549,14 @@ out:
return NOTIFY_DONE;
}
-/* receive a packet with the batman ethertype coming on a hard
+/* incoming packets with the batman ethertype received on any active hard
* interface */
static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *ptype,
struct net_device *orig_dev)
{
struct bat_priv *bat_priv;
- struct batman_packet *batman_packet;
+ struct batman_ogm_packet *batman_ogm_packet;
struct hard_iface *hard_iface;
int ret;
@@ -612,22 +588,22 @@ static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
if (hard_iface->if_status != IF_ACTIVE)
goto err_free;
- batman_packet = (struct batman_packet *)skb->data;
+ batman_ogm_packet = (struct batman_ogm_packet *)skb->data;
- if (batman_packet->version != COMPAT_VERSION) {
+ if (batman_ogm_packet->version != COMPAT_VERSION) {
bat_dbg(DBG_BATMAN, bat_priv,
"Drop packet: incompatible batman version (%i)\n",
- batman_packet->version);
+ batman_ogm_packet->version);
goto err_free;
}
/* all receive handlers return whether they received or reused
* the supplied skb. if not, we have to free the skb. */
- switch (batman_packet->packet_type) {
+ switch (batman_ogm_packet->packet_type) {
/* batman originator packet */
- case BAT_PACKET:
- ret = recv_bat_packet(skb, hard_iface);
+ case BAT_OGM:
+ ret = recv_bat_ogm_packet(skb, hard_iface);
break;
/* batman icmp packet */
@@ -681,6 +657,36 @@ err_out:
return NET_RX_DROP;
}
+/* This function returns true if the interface represented by ifindex is a
+ * 802.11 wireless device */
+bool is_wifi_iface(int ifindex)
+{
+ struct net_device *net_device = NULL;
+ bool ret = false;
+
+ if (ifindex == NULL_IFINDEX)
+ goto out;
+
+ net_device = dev_get_by_index(&init_net, ifindex);
+ if (!net_device)
+ goto out;
+
+#ifdef CONFIG_WIRELESS_EXT
+ /* pre-cfg80211 drivers have to implement WEXT, so it is possible to
+ * check for wireless_handlers != NULL */
+ if (net_device->wireless_handlers)
+ ret = true;
+ else
+#endif
+ /* cfg80211 drivers have to set ieee80211_ptr */
+ if (net_device->ieee80211_ptr)
+ ret = true;
+out:
+ if (net_device)
+ dev_put(net_device);
+ return ret;
+}
+
struct notifier_block hard_if_notifier = {
.notifier_call = hard_if_event,
};
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 442eacb..67f78d1 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -42,6 +42,7 @@ void hardif_remove_interfaces(void);
int hardif_min_mtu(struct net_device *soft_iface);
void update_min_mtu(struct net_device *soft_iface);
void hardif_free_rcu(struct rcu_head *rcu);
+bool is_wifi_iface(int ifindex);
static inline void hardif_free_ref(struct hard_iface *hard_iface)
{
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index dd5c9fd..d20aa71 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -76,19 +76,30 @@ static inline void hash_delete(struct hashtable_t *hash,
hash_destroy(hash);
}
-/* adds data to the hashtable. returns 0 on success, -1 on error */
+/**
+ * hash_add - adds data to the hashtable
+ * @hash: storage hash table
+ * @compare: callback to determine if 2 hash elements are identical
+ * @choose: callback calculating the hash index
+ * @data: data passed to the aforementioned callbacks as argument
+ * @data_node: to be added element
+ *
+ * Returns 0 on success, 1 if the element already is in the hash
+ * and -1 on error.
+ */
+
static inline int hash_add(struct hashtable_t *hash,
hashdata_compare_cb compare,
hashdata_choose_cb choose,
const void *data, struct hlist_node *data_node)
{
- int index;
+ int index, ret = -1;
struct hlist_head *head;
struct hlist_node *node;
spinlock_t *list_lock; /* spinlock to protect write access */
if (!hash)
- goto err;
+ goto out;
index = choose(data, hash->size);
head = &hash->table[index];
@@ -99,6 +110,7 @@ static inline int hash_add(struct hashtable_t *hash,
if (!compare(node, data))
continue;
+ ret = 1;
goto err_unlock;
}
rcu_read_unlock();
@@ -108,12 +120,13 @@ static inline int hash_add(struct hashtable_t *hash,
hlist_add_head_rcu(data_node, head);
spin_unlock_bh(list_lock);
- return 0;
+ ret = 0;
+ goto out;
err_unlock:
rcu_read_unlock();
-err:
- return -1;
+out:
+ return ret;
}
/* removes data from hash, if found. returns pointer do data on success, so you
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index b0f9068..fb87bdc 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -107,7 +107,7 @@ int mesh_init(struct net_device *soft_iface)
if (tt_init(bat_priv) < 1)
goto err;
- tt_local_add(soft_iface, soft_iface->dev_addr);
+ tt_local_add(soft_iface, soft_iface->dev_addr, NULL_IFINDEX);
if (vis_init(bat_priv) < 1)
goto err;
@@ -117,8 +117,6 @@ int mesh_init(struct net_device *soft_iface)
goto end;
err:
- pr_err("Unable to allocate memory for mesh information structures: "
- "out of mem ?\n");
mesh_free(soft_iface);
return -1;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index a6df61a..964ad4d 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -28,7 +28,7 @@
#define DRIVER_DEVICE "batman-adv"
#ifndef SOURCE_VERSION
-#define SOURCE_VERSION "2011.3.0"
+#define SOURCE_VERSION "2011.4.0"
#endif
/* B.A.T.M.A.N. parameters */
@@ -44,7 +44,7 @@
#define PURGE_TIMEOUT 200
#define TT_LOCAL_TIMEOUT 3600 /* in seconds */
#define TT_CLIENT_ROAM_TIMEOUT 600
-/* sliding packet range of received originator messages in squence numbers
+/* sliding packet range of received originator messages in sequence numbers
* (should be a multiple of our word size) */
#define TQ_LOCAL_WINDOW_SIZE 64
#define TT_REQUEST_TIMEOUT 3 /* seconds we have to keep pending tt_req */
@@ -62,6 +62,8 @@
#define NO_FLAGS 0
+#define NULL_IFINDEX 0 /* dummy ifindex used to avoid iface checks */
+
#define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE)
#define LOG_BUF_LEN 8192 /* has to be a power of 2 */
@@ -133,7 +135,7 @@ enum dbg_level {
#include <linux/mutex.h> /* mutex */
#include <linux/module.h> /* needed by all modules */
#include <linux/netdevice.h> /* netdevice */
-#include <linux/etherdevice.h> /* ethernet address classifaction */
+#include <linux/etherdevice.h> /* ethernet address classification */
#include <linux/if_ether.h> /* ethernet header */
#include <linux/poll.h> /* poll_table */
#include <linux/kthread.h> /* kernel threads */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index f3c3f62..0e5b772 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -252,7 +252,7 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, const uint8_t *addr)
hash_added = hash_add(bat_priv->orig_hash, compare_orig,
choose_orig, orig_node, &orig_node->hash_entry);
- if (hash_added < 0)
+ if (hash_added != 0)
goto free_bcast_own_sum;
return orig_node;
@@ -336,8 +336,7 @@ static bool purge_orig_node(struct bat_priv *bat_priv,
} else {
if (purge_orig_neighbors(bat_priv, orig_node,
&best_neigh_node)) {
- update_routes(bat_priv, orig_node,
- best_neigh_node);
+ update_route(bat_priv, orig_node, best_neigh_node);
}
}
@@ -493,10 +492,8 @@ static int orig_node_add_if(struct orig_node *orig_node, int max_if_num)
data_ptr = kmalloc(max_if_num * sizeof(unsigned long) * NUM_WORDS,
GFP_ATOMIC);
- if (!data_ptr) {
- pr_err("Can't resize orig: out of memory\n");
+ if (!data_ptr)
return -1;
- }
memcpy(data_ptr, orig_node->bcast_own,
(max_if_num - 1) * sizeof(unsigned long) * NUM_WORDS);
@@ -504,10 +501,8 @@ static int orig_node_add_if(struct orig_node *orig_node, int max_if_num)
orig_node->bcast_own = data_ptr;
data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
- if (!data_ptr) {
- pr_err("Can't resize orig: out of memory\n");
+ if (!data_ptr)
return -1;
- }
memcpy(data_ptr, orig_node->bcast_own_sum,
(max_if_num - 1) * sizeof(uint8_t));
@@ -562,10 +557,8 @@ static int orig_node_del_if(struct orig_node *orig_node,
chunk_size = sizeof(unsigned long) * NUM_WORDS;
data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC);
- if (!data_ptr) {
- pr_err("Can't resize orig: out of memory\n");
+ if (!data_ptr)
return -1;
- }
/* copy first part */
memcpy(data_ptr, orig_node->bcast_own, del_if_num * chunk_size);
@@ -583,10 +576,8 @@ free_bcast_own:
goto free_own_sum;
data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
- if (!data_ptr) {
- pr_err("Can't resize orig: out of memory\n");
+ if (!data_ptr)
return -1;
- }
memcpy(data_ptr, orig_node->bcast_own_sum,
del_if_num * sizeof(uint8_t));
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index b76b4be..4d9e54c 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -25,14 +25,14 @@
#define ETH_P_BATMAN 0x4305 /* unofficial/not registered Ethertype */
enum bat_packettype {
- BAT_PACKET = 0x01,
- BAT_ICMP = 0x02,
- BAT_UNICAST = 0x03,
- BAT_BCAST = 0x04,
- BAT_VIS = 0x05,
+ BAT_OGM = 0x01,
+ BAT_ICMP = 0x02,
+ BAT_UNICAST = 0x03,
+ BAT_BCAST = 0x04,
+ BAT_VIS = 0x05,
BAT_UNICAST_FRAG = 0x06,
- BAT_TT_QUERY = 0x07,
- BAT_ROAM_ADV = 0x08
+ BAT_TT_QUERY = 0x07,
+ BAT_ROAM_ADV = 0x08
};
/* this file is included by batctl which needs these defines */
@@ -84,12 +84,13 @@ enum tt_query_flags {
enum tt_client_flags {
TT_CLIENT_DEL = 1 << 0,
TT_CLIENT_ROAM = 1 << 1,
+ TT_CLIENT_WIFI = 1 << 2,
TT_CLIENT_NOPURGE = 1 << 8,
TT_CLIENT_NEW = 1 << 9,
TT_CLIENT_PENDING = 1 << 10
};
-struct batman_packet {
+struct batman_ogm_packet {
uint8_t packet_type;
uint8_t version; /* batman version field */
uint8_t ttl;
@@ -104,7 +105,7 @@ struct batman_packet {
uint16_t tt_crc;
} __packed;
-#define BAT_PACKET_LEN sizeof(struct batman_packet)
+#define BATMAN_OGM_LEN sizeof(struct batman_ogm_packet)
struct icmp_packet {
uint8_t packet_type;
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 0f32c81..f961cc5 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -22,18 +22,14 @@
#include "main.h"
#include "routing.h"
#include "send.h"
-#include "hash.h"
#include "soft-interface.h"
#include "hard-interface.h"
#include "icmp_socket.h"
#include "translation-table.h"
#include "originator.h"
-#include "ring_buffer.h"
#include "vis.h"
-#include "aggregation.h"
-#include "gateway_common.h"
-#include "gateway_client.h"
#include "unicast.h"
+#include "bat_ogm.h"
void slide_own_bcast_window(struct hard_iface *hard_iface)
{
@@ -64,69 +60,9 @@ void slide_own_bcast_window(struct hard_iface *hard_iface)
}
}
-static void update_transtable(struct bat_priv *bat_priv,
- struct orig_node *orig_node,
- const unsigned char *tt_buff,
- uint8_t tt_num_changes, uint8_t ttvn,
- uint16_t tt_crc)
-{
- uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
- bool full_table = true;
-
- /* the ttvn increased by one -> we can apply the attached changes */
- if (ttvn - orig_ttvn == 1) {
- /* the OGM could not contain the changes because they were too
- * many to fit in one frame or because they have already been
- * sent TT_OGM_APPEND_MAX times. In this case send a tt
- * request */
- if (!tt_num_changes) {
- full_table = false;
- goto request_table;
- }
-
- tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn,
- (struct tt_change *)tt_buff);
-
- /* Even if we received the crc into the OGM, we prefer
- * to recompute it to spot any possible inconsistency
- * in the global table */
- orig_node->tt_crc = tt_global_crc(bat_priv, orig_node);
-
- /* The ttvn alone is not enough to guarantee consistency
- * because a single value could repesent different states
- * (due to the wrap around). Thus a node has to check whether
- * the resulting table (after applying the changes) is still
- * consistent or not. E.g. a node could disconnect while its
- * ttvn is X and reconnect on ttvn = X + TTVN_MAX: in this case
- * checking the CRC value is mandatory to detect the
- * inconsistency */
- if (orig_node->tt_crc != tt_crc)
- goto request_table;
-
- /* Roaming phase is over: tables are in sync again. I can
- * unset the flag */
- orig_node->tt_poss_change = false;
- } else {
- /* if we missed more than one change or our tables are not
- * in sync anymore -> request fresh tt data */
- if (ttvn != orig_ttvn || orig_node->tt_crc != tt_crc) {
-request_table:
- bat_dbg(DBG_TT, bat_priv, "TT inconsistency for %pM. "
- "Need to retrieve the correct information "
- "(ttvn: %u last_ttvn: %u crc: %u last_crc: "
- "%u num_changes: %u)\n", orig_node->orig, ttvn,
- orig_ttvn, tt_crc, orig_node->tt_crc,
- tt_num_changes);
- send_tt_request(bat_priv, orig_node, ttvn, tt_crc,
- full_table);
- return;
- }
- }
-}
-
-static void update_route(struct bat_priv *bat_priv,
- struct orig_node *orig_node,
- struct neigh_node *neigh_node)
+static void _update_route(struct bat_priv *bat_priv,
+ struct orig_node *orig_node,
+ struct neigh_node *neigh_node)
{
struct neigh_node *curr_router;
@@ -170,8 +106,8 @@ static void update_route(struct bat_priv *bat_priv,
neigh_node_free_ref(curr_router);
}
-void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node,
- struct neigh_node *neigh_node)
+void update_route(struct bat_priv *bat_priv, struct orig_node *orig_node,
+ struct neigh_node *neigh_node)
{
struct neigh_node *router = NULL;
@@ -181,116 +117,13 @@ void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node,
router = orig_node_get_router(orig_node);
if (router != neigh_node)
- update_route(bat_priv, orig_node, neigh_node);
+ _update_route(bat_priv, orig_node, neigh_node);
out:
if (router)
neigh_node_free_ref(router);
}
-static int is_bidirectional_neigh(struct orig_node *orig_node,
- struct orig_node *orig_neigh_node,
- struct batman_packet *batman_packet,
- struct hard_iface *if_incoming)
-{
- struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
- struct neigh_node *neigh_node = NULL, *tmp_neigh_node;
- struct hlist_node *node;
- uint8_t total_count;
- uint8_t orig_eq_count, neigh_rq_count, tq_own;
- int tq_asym_penalty, ret = 0;
-
- /* find corresponding one hop neighbor */
- rcu_read_lock();
- hlist_for_each_entry_rcu(tmp_neigh_node, node,
- &orig_neigh_node->neigh_list, list) {
-
- if (!compare_eth(tmp_neigh_node->addr, orig_neigh_node->orig))
- continue;
-
- if (tmp_neigh_node->if_incoming != if_incoming)
- continue;
-
- if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
- continue;
-
- neigh_node = tmp_neigh_node;
- break;
- }
- rcu_read_unlock();
-
- if (!neigh_node)
- neigh_node = create_neighbor(orig_neigh_node,
- orig_neigh_node,
- orig_neigh_node->orig,
- if_incoming);
-
- if (!neigh_node)
- goto out;
-
- /* if orig_node is direct neighbour update neigh_node last_valid */
- if (orig_node == orig_neigh_node)
- neigh_node->last_valid = jiffies;
-
- orig_node->last_valid = jiffies;
-
- /* find packet count of corresponding one hop neighbor */
- spin_lock_bh(&orig_node->ogm_cnt_lock);
- orig_eq_count = orig_neigh_node->bcast_own_sum[if_incoming->if_num];
- neigh_rq_count = neigh_node->real_packet_count;
- spin_unlock_bh(&orig_node->ogm_cnt_lock);
-
- /* pay attention to not get a value bigger than 100 % */
- total_count = (orig_eq_count > neigh_rq_count ?
- neigh_rq_count : orig_eq_count);
-
- /* if we have too few packets (too less data) we set tq_own to zero */
- /* if we receive too few packets it is not considered bidirectional */
- if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) ||
- (neigh_rq_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM))
- tq_own = 0;
- else
- /* neigh_node->real_packet_count is never zero as we
- * only purge old information when getting new
- * information */
- tq_own = (TQ_MAX_VALUE * total_count) / neigh_rq_count;
-
- /*
- * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does
- * affect the nearly-symmetric links only a little, but
- * punishes asymmetric links more. This will give a value
- * between 0 and TQ_MAX_VALUE
- */
- tq_asym_penalty = TQ_MAX_VALUE - (TQ_MAX_VALUE *
- (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) *
- (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) *
- (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count)) /
- (TQ_LOCAL_WINDOW_SIZE *
- TQ_LOCAL_WINDOW_SIZE *
- TQ_LOCAL_WINDOW_SIZE);
-
- batman_packet->tq = ((batman_packet->tq * tq_own * tq_asym_penalty) /
- (TQ_MAX_VALUE * TQ_MAX_VALUE));
-
- bat_dbg(DBG_BATMAN, bat_priv,
- "bidirectional: "
- "orig = %-15pM neigh = %-15pM => own_bcast = %2i, "
- "real recv = %2i, local tq: %3i, asym_penalty: %3i, "
- "total tq: %3i\n",
- orig_node->orig, orig_neigh_node->orig, total_count,
- neigh_rq_count, tq_own, tq_asym_penalty, batman_packet->tq);
-
- /* if link has the minimum required transmission quality
- * consider it bidirectional */
- if (batman_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT)
- ret = 1;
-
-out:
- if (neigh_node)
- neigh_node_free_ref(neigh_node);
- return ret;
-}
-
/* caller must hold the neigh_list_lock */
void bonding_candidate_del(struct orig_node *orig_node,
struct neigh_node *neigh_node)
@@ -308,8 +141,8 @@ out:
return;
}
-static void bonding_candidate_add(struct orig_node *orig_node,
- struct neigh_node *neigh_node)
+void bonding_candidate_add(struct orig_node *orig_node,
+ struct neigh_node *neigh_node)
{
struct hlist_node *node;
struct neigh_node *tmp_neigh_node, *router = NULL;
@@ -379,162 +212,23 @@ out:
}
/* copy primary address for bonding */
-static void bonding_save_primary(const struct orig_node *orig_node,
- struct orig_node *orig_neigh_node,
- const struct batman_packet *batman_packet)
+void bonding_save_primary(const struct orig_node *orig_node,
+ struct orig_node *orig_neigh_node,
+ const struct batman_ogm_packet *batman_ogm_packet)
{
- if (!(batman_packet->flags & PRIMARIES_FIRST_HOP))
+ if (!(batman_ogm_packet->flags & PRIMARIES_FIRST_HOP))
return;
memcpy(orig_neigh_node->primary_addr, orig_node->orig, ETH_ALEN);
}
-static void update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node,
- const struct ethhdr *ethhdr,
- const struct batman_packet *batman_packet,
- struct hard_iface *if_incoming,
- const unsigned char *tt_buff, int is_duplicate)
-{
- struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL;
- struct neigh_node *router = NULL;
- struct orig_node *orig_node_tmp;
- struct hlist_node *node;
- uint8_t bcast_own_sum_orig, bcast_own_sum_neigh;
-
- bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): "
- "Searching and updating originator entry of received packet\n");
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tmp_neigh_node, node,
- &orig_node->neigh_list, list) {
- if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) &&
- (tmp_neigh_node->if_incoming == if_incoming) &&
- atomic_inc_not_zero(&tmp_neigh_node->refcount)) {
- if (neigh_node)
- neigh_node_free_ref(neigh_node);
- neigh_node = tmp_neigh_node;
- continue;
- }
-
- if (is_duplicate)
- continue;
-
- spin_lock_bh(&tmp_neigh_node->tq_lock);
- ring_buffer_set(tmp_neigh_node->tq_recv,
- &tmp_neigh_node->tq_index, 0);
- tmp_neigh_node->tq_avg =
- ring_buffer_avg(tmp_neigh_node->tq_recv);
- spin_unlock_bh(&tmp_neigh_node->tq_lock);
- }
-
- if (!neigh_node) {
- struct orig_node *orig_tmp;
-
- orig_tmp = get_orig_node(bat_priv, ethhdr->h_source);
- if (!orig_tmp)
- goto unlock;
-
- neigh_node = create_neighbor(orig_node, orig_tmp,
- ethhdr->h_source, if_incoming);
-
- orig_node_free_ref(orig_tmp);
- if (!neigh_node)
- goto unlock;
- } else
- bat_dbg(DBG_BATMAN, bat_priv,
- "Updating existing last-hop neighbor of originator\n");
-
- rcu_read_unlock();
-
- orig_node->flags = batman_packet->flags;
- neigh_node->last_valid = jiffies;
-
- spin_lock_bh(&neigh_node->tq_lock);
- ring_buffer_set(neigh_node->tq_recv,
- &neigh_node->tq_index,
- batman_packet->tq);
- neigh_node->tq_avg = ring_buffer_avg(neigh_node->tq_recv);
- spin_unlock_bh(&neigh_node->tq_lock);
-
- if (!is_duplicate) {
- orig_node->last_ttl = batman_packet->ttl;
- neigh_node->last_ttl = batman_packet->ttl;
- }
-
- bonding_candidate_add(orig_node, neigh_node);
-
- /* if this neighbor already is our next hop there is nothing
- * to change */
- router = orig_node_get_router(orig_node);
- if (router == neigh_node)
- goto update_tt;
-
- /* if this neighbor does not offer a better TQ we won't consider it */
- if (router && (router->tq_avg > neigh_node->tq_avg))
- goto update_tt;
-
- /* if the TQ is the same and the link not more symetric we
- * won't consider it either */
- if (router && (neigh_node->tq_avg == router->tq_avg)) {
- orig_node_tmp = router->orig_node;
- spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
- bcast_own_sum_orig =
- orig_node_tmp->bcast_own_sum[if_incoming->if_num];
- spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
-
- orig_node_tmp = neigh_node->orig_node;
- spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
- bcast_own_sum_neigh =
- orig_node_tmp->bcast_own_sum[if_incoming->if_num];
- spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
-
- if (bcast_own_sum_orig >= bcast_own_sum_neigh)
- goto update_tt;
- }
-
- update_routes(bat_priv, orig_node, neigh_node);
-
-update_tt:
- /* I have to check for transtable changes only if the OGM has been
- * sent through a primary interface */
- if (((batman_packet->orig != ethhdr->h_source) &&
- (batman_packet->ttl > 2)) ||
- (batman_packet->flags & PRIMARIES_FIRST_HOP))
- update_transtable(bat_priv, orig_node, tt_buff,
- batman_packet->tt_num_changes,
- batman_packet->ttvn,
- batman_packet->tt_crc);
-
- if (orig_node->gw_flags != batman_packet->gw_flags)
- gw_node_update(bat_priv, orig_node, batman_packet->gw_flags);
-
- orig_node->gw_flags = batman_packet->gw_flags;
-
- /* restart gateway selection if fast or late switching was enabled */
- if ((orig_node->gw_flags) &&
- (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) &&
- (atomic_read(&bat_priv->gw_sel_class) > 2))
- gw_check_election(bat_priv, orig_node);
-
- goto out;
-
-unlock:
- rcu_read_unlock();
-out:
- if (neigh_node)
- neigh_node_free_ref(neigh_node);
- if (router)
- neigh_node_free_ref(router);
-}
-
/* checks whether the host restarted and is in the protection time.
* returns:
* 0 if the packet is to be accepted
* 1 if the packet is to be ignored.
*/
-static int window_protected(struct bat_priv *bat_priv,
- int32_t seq_num_diff,
- unsigned long *last_reset)
+int window_protected(struct bat_priv *bat_priv, int32_t seq_num_diff,
+ unsigned long *last_reset)
{
if ((seq_num_diff <= -TQ_LOCAL_WINDOW_SIZE)
|| (seq_num_diff >= EXPECTED_SEQNO_RANGE)) {
@@ -552,330 +246,12 @@ static int window_protected(struct bat_priv *bat_priv,
return 0;
}
-/* processes a batman packet for all interfaces, adjusts the sequence number and
- * finds out whether it is a duplicate.
- * returns:
- * 1 the packet is a duplicate
- * 0 the packet has not yet been received
- * -1 the packet is old and has been received while the seqno window
- * was protected. Caller should drop it.
- */
-static int count_real_packets(const struct ethhdr *ethhdr,
- const struct batman_packet *batman_packet,
- const struct hard_iface *if_incoming)
-{
- struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
- struct orig_node *orig_node;
- struct neigh_node *tmp_neigh_node;
- struct hlist_node *node;
- int is_duplicate = 0;
- int32_t seq_diff;
- int need_update = 0;
- int set_mark, ret = -1;
-
- orig_node = get_orig_node(bat_priv, batman_packet->orig);
- if (!orig_node)
- return 0;
-
- spin_lock_bh(&orig_node->ogm_cnt_lock);
- seq_diff = batman_packet->seqno - orig_node->last_real_seqno;
-
- /* signalize caller that the packet is to be dropped. */
- if (window_protected(bat_priv, seq_diff,
- &orig_node->batman_seqno_reset))
- goto out;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tmp_neigh_node, node,
- &orig_node->neigh_list, list) {
-
- is_duplicate |= get_bit_status(tmp_neigh_node->real_bits,
- orig_node->last_real_seqno,
- batman_packet->seqno);
-
- if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) &&
- (tmp_neigh_node->if_incoming == if_incoming))
- set_mark = 1;
- else
- set_mark = 0;
-
- /* if the window moved, set the update flag. */
- need_update |= bit_get_packet(bat_priv,
- tmp_neigh_node->real_bits,
- seq_diff, set_mark);
-
- tmp_neigh_node->real_packet_count =
- bit_packet_count(tmp_neigh_node->real_bits);
- }
- rcu_read_unlock();
-
- if (need_update) {
- bat_dbg(DBG_BATMAN, bat_priv,
- "updating last_seqno: old %d, new %d\n",
- orig_node->last_real_seqno, batman_packet->seqno);
- orig_node->last_real_seqno = batman_packet->seqno;
- }
-
- ret = is_duplicate;
-
-out:
- spin_unlock_bh(&orig_node->ogm_cnt_lock);
- orig_node_free_ref(orig_node);
- return ret;
-}
-
-void receive_bat_packet(const struct ethhdr *ethhdr,
- struct batman_packet *batman_packet,
- const unsigned char *tt_buff,
- struct hard_iface *if_incoming)
-{
- struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
- struct hard_iface *hard_iface;
- struct orig_node *orig_neigh_node, *orig_node;
- struct neigh_node *router = NULL, *router_router = NULL;
- struct neigh_node *orig_neigh_router = NULL;
- int has_directlink_flag;
- int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0;
- int is_broadcast = 0, is_bidirectional, is_single_hop_neigh;
- int is_duplicate;
- uint32_t if_incoming_seqno;
-
- /* Silently drop when the batman packet is actually not a
- * correct packet.
- *
- * This might happen if a packet is padded (e.g. Ethernet has a
- * minimum frame length of 64 byte) and the aggregation interprets
- * it as an additional length.
- *
- * TODO: A more sane solution would be to have a bit in the
- * batman_packet to detect whether the packet is the last
- * packet in an aggregation. Here we expect that the padding
- * is always zero (or not 0x01)
- */
- if (batman_packet->packet_type != BAT_PACKET)
- return;
-
- /* could be changed by schedule_own_packet() */
- if_incoming_seqno = atomic_read(&if_incoming->seqno);
-
- has_directlink_flag = (batman_packet->flags & DIRECTLINK ? 1 : 0);
-
- is_single_hop_neigh = (compare_eth(ethhdr->h_source,
- batman_packet->orig) ? 1 : 0);
-
- bat_dbg(DBG_BATMAN, bat_priv,
- "Received BATMAN packet via NB: %pM, IF: %s [%pM] "
- "(from OG: %pM, via prev OG: %pM, seqno %d, ttvn %u, "
- "crc %u, changes %u, td %d, TTL %d, V %d, IDF %d)\n",
- ethhdr->h_source, if_incoming->net_dev->name,
- if_incoming->net_dev->dev_addr, batman_packet->orig,
- batman_packet->prev_sender, batman_packet->seqno,
- batman_packet->ttvn, batman_packet->tt_crc,
- batman_packet->tt_num_changes, batman_packet->tq,
- batman_packet->ttl, batman_packet->version,
- has_directlink_flag);
-
- rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
- if (hard_iface->if_status != IF_ACTIVE)
- continue;
-
- if (hard_iface->soft_iface != if_incoming->soft_iface)
- continue;
-
- if (compare_eth(ethhdr->h_source,
- hard_iface->net_dev->dev_addr))
- is_my_addr = 1;
-
- if (compare_eth(batman_packet->orig,
- hard_iface->net_dev->dev_addr))
- is_my_orig = 1;
-
- if (compare_eth(batman_packet->prev_sender,
- hard_iface->net_dev->dev_addr))
- is_my_oldorig = 1;
-
- if (is_broadcast_ether_addr(ethhdr->h_source))
- is_broadcast = 1;
- }
- rcu_read_unlock();
-
- if (batman_packet->version != COMPAT_VERSION) {
- bat_dbg(DBG_BATMAN, bat_priv,
- "Drop packet: incompatible batman version (%i)\n",
- batman_packet->version);
- return;
- }
-
- if (is_my_addr) {
- bat_dbg(DBG_BATMAN, bat_priv,
- "Drop packet: received my own broadcast (sender: %pM"
- ")\n",
- ethhdr->h_source);
- return;
- }
-
- if (is_broadcast) {
- bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: "
- "ignoring all packets with broadcast source addr (sender: %pM"
- ")\n", ethhdr->h_source);
- return;
- }
-
- if (is_my_orig) {
- unsigned long *word;
- int offset;
-
- orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source);
- if (!orig_neigh_node)
- return;
-
- /* neighbor has to indicate direct link and it has to
- * come via the corresponding interface */
- /* save packet seqno for bidirectional check */
- if (has_directlink_flag &&
- compare_eth(if_incoming->net_dev->dev_addr,
- batman_packet->orig)) {
- offset = if_incoming->if_num * NUM_WORDS;
-
- spin_lock_bh(&orig_neigh_node->ogm_cnt_lock);
- word = &(orig_neigh_node->bcast_own[offset]);
- bit_mark(word,
- if_incoming_seqno - batman_packet->seqno - 2);
- orig_neigh_node->bcast_own_sum[if_incoming->if_num] =
- bit_packet_count(word);
- spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock);
- }
-
- bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: "
- "originator packet from myself (via neighbor)\n");
- orig_node_free_ref(orig_neigh_node);
- return;
- }
-
- if (is_my_oldorig) {
- bat_dbg(DBG_BATMAN, bat_priv,
- "Drop packet: ignoring all rebroadcast echos (sender: "
- "%pM)\n", ethhdr->h_source);
- return;
- }
-
- orig_node = get_orig_node(bat_priv, batman_packet->orig);
- if (!orig_node)
- return;
-
- is_duplicate = count_real_packets(ethhdr, batman_packet, if_incoming);
-
- if (is_duplicate == -1) {
- bat_dbg(DBG_BATMAN, bat_priv,
- "Drop packet: packet within seqno protection time "
- "(sender: %pM)\n", ethhdr->h_source);
- goto out;
- }
-
- if (batman_packet->tq == 0) {
- bat_dbg(DBG_BATMAN, bat_priv,
- "Drop packet: originator packet with tq equal 0\n");
- goto out;
- }
-
- router = orig_node_get_router(orig_node);
- if (router)
- router_router = orig_node_get_router(router->orig_node);
-
- /* avoid temporary routing loops */
- if (router && router_router &&
- (compare_eth(router->addr, batman_packet->prev_sender)) &&
- !(compare_eth(batman_packet->orig, batman_packet->prev_sender)) &&
- (compare_eth(router->addr, router_router->addr))) {
- bat_dbg(DBG_BATMAN, bat_priv,
- "Drop packet: ignoring all rebroadcast packets that "
- "may make me loop (sender: %pM)\n", ethhdr->h_source);
- goto out;
- }
-
- /* if sender is a direct neighbor the sender mac equals
- * originator mac */
- orig_neigh_node = (is_single_hop_neigh ?
- orig_node :
- get_orig_node(bat_priv, ethhdr->h_source));
- if (!orig_neigh_node)
- goto out;
-
- orig_neigh_router = orig_node_get_router(orig_neigh_node);
-
- /* drop packet if sender is not a direct neighbor and if we
- * don't route towards it */
- if (!is_single_hop_neigh && (!orig_neigh_router)) {
- bat_dbg(DBG_BATMAN, bat_priv,
- "Drop packet: OGM via unknown neighbor!\n");
- goto out_neigh;
- }
-
- is_bidirectional = is_bidirectional_neigh(orig_node, orig_neigh_node,
- batman_packet, if_incoming);
-
- bonding_save_primary(orig_node, orig_neigh_node, batman_packet);
-
- /* update ranking if it is not a duplicate or has the same
- * seqno and similar ttl as the non-duplicate */
- if (is_bidirectional &&
- (!is_duplicate ||
- ((orig_node->last_real_seqno == batman_packet->seqno) &&
- (orig_node->last_ttl - 3 <= batman_packet->ttl))))
- update_orig(bat_priv, orig_node, ethhdr, batman_packet,
- if_incoming, tt_buff, is_duplicate);
-
- /* is single hop (direct) neighbor */
- if (is_single_hop_neigh) {
-
- /* mark direct link on incoming interface */
- schedule_forward_packet(orig_node, ethhdr, batman_packet,
- 1, if_incoming);
-
- bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: "
- "rebroadcast neighbor packet with direct link flag\n");
- goto out_neigh;
- }
-
- /* multihop originator */
- if (!is_bidirectional) {
- bat_dbg(DBG_BATMAN, bat_priv,
- "Drop packet: not received via bidirectional link\n");
- goto out_neigh;
- }
-
- if (is_duplicate) {
- bat_dbg(DBG_BATMAN, bat_priv,
- "Drop packet: duplicate packet received\n");
- goto out_neigh;
- }
-
- bat_dbg(DBG_BATMAN, bat_priv,
- "Forwarding packet: rebroadcast originator packet\n");
- schedule_forward_packet(orig_node, ethhdr, batman_packet,
- 0, if_incoming);
-
-out_neigh:
- if ((orig_neigh_node) && (!is_single_hop_neigh))
- orig_node_free_ref(orig_neigh_node);
-out:
- if (router)
- neigh_node_free_ref(router);
- if (router_router)
- neigh_node_free_ref(router_router);
- if (orig_neigh_router)
- neigh_node_free_ref(orig_neigh_router);
-
- orig_node_free_ref(orig_node);
-}
-
-int recv_bat_packet(struct sk_buff *skb, struct hard_iface *hard_iface)
+int recv_bat_ogm_packet(struct sk_buff *skb, struct hard_iface *hard_iface)
{
struct ethhdr *ethhdr;
/* drop packet if it has not necessary minimum size */
- if (unlikely(!pskb_may_pull(skb, sizeof(struct batman_packet))))
+ if (unlikely(!pskb_may_pull(skb, BATMAN_OGM_LEN)))
return NET_RX_DROP;
ethhdr = (struct ethhdr *)skb_mac_header(skb);
@@ -898,10 +274,7 @@ int recv_bat_packet(struct sk_buff *skb, struct hard_iface *hard_iface)
ethhdr = (struct ethhdr *)skb_mac_header(skb);
- receive_aggr_bat_packet(ethhdr,
- skb->data,
- skb_headlen(skb),
- hard_iface);
+ bat_ogm_receive(ethhdr, skb->data, skb_headlen(skb), hard_iface);
kfree_skb(skb);
return NET_RX_SUCCESS;
@@ -1243,7 +616,7 @@ int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if)
}
break;
case TT_RESPONSE:
- /* packet needs to be linearised to access the TT changes */
+ /* packet needs to be linearized to access the TT changes */
if (skb_linearize(skb) < 0)
goto out;
@@ -1300,7 +673,7 @@ int recv_roam_adv(struct sk_buff *skb, struct hard_iface *recv_if)
roam_adv_packet->client);
tt_global_add(bat_priv, orig_node, roam_adv_packet->client,
- atomic_read(&orig_node->last_ttvn) + 1, true);
+ atomic_read(&orig_node->last_ttvn) + 1, true, false);
/* Roaming phase starts: I have new information but the ttvn has not
* been incremented yet. This flag will make me check all the incoming
@@ -1536,7 +909,7 @@ static int check_unicast_ttvn(struct bat_priv *bat_priv,
ethhdr = (struct ethhdr *)(skb->data +
sizeof(struct unicast_packet));
- orig_node = transtable_search(bat_priv, ethhdr->h_dest);
+ orig_node = transtable_search(bat_priv, NULL, ethhdr->h_dest);
if (!orig_node) {
if (!is_my_client(bat_priv, ethhdr->h_dest))
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index fb14e95..7aaee0f 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -23,19 +23,15 @@
#define _NET_BATMAN_ADV_ROUTING_H_
void slide_own_bcast_window(struct hard_iface *hard_iface);
-void receive_bat_packet(const struct ethhdr *ethhdr,
- struct batman_packet *batman_packet,
- const unsigned char *tt_buff,
- struct hard_iface *if_incoming);
-void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node,
- struct neigh_node *neigh_node);
+void update_route(struct bat_priv *bat_priv, struct orig_node *orig_node,
+ struct neigh_node *neigh_node);
int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if);
int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if);
int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if);
int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if);
int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if);
int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if);
-int recv_bat_packet(struct sk_buff *skb, struct hard_iface *recv_if);
+int recv_bat_ogm_packet(struct sk_buff *skb, struct hard_iface *recv_if);
int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if);
int recv_roam_adv(struct sk_buff *skb, struct hard_iface *recv_if);
struct neigh_node *find_router(struct bat_priv *bat_priv,
@@ -43,5 +39,12 @@ struct neigh_node *find_router(struct bat_priv *bat_priv,
const struct hard_iface *recv_if);
void bonding_candidate_del(struct orig_node *orig_node,
struct neigh_node *neigh_node);
+void bonding_candidate_add(struct orig_node *orig_node,
+ struct neigh_node *neigh_node);
+void bonding_save_primary(const struct orig_node *orig_node,
+ struct orig_node *orig_neigh_node,
+ const struct batman_ogm_packet *batman_ogm_packet);
+int window_protected(struct bat_priv *bat_priv, int32_t seq_num_diff,
+ unsigned long *last_reset);
#endif /* _NET_BATMAN_ADV_ROUTING_H_ */
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 58d1447..8a684eb 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -26,33 +26,12 @@
#include "soft-interface.h"
#include "hard-interface.h"
#include "vis.h"
-#include "aggregation.h"
#include "gateway_common.h"
#include "originator.h"
+#include "bat_ogm.h"
static void send_outstanding_bcast_packet(struct work_struct *work);
-/* apply hop penalty for a normal link */
-static uint8_t hop_penalty(uint8_t tq, const struct bat_priv *bat_priv)
-{
- int hop_penalty = atomic_read(&bat_priv->hop_penalty);
- return (tq * (TQ_MAX_VALUE - hop_penalty)) / (TQ_MAX_VALUE);
-}
-
-/* when do we schedule our own packet to be sent */
-static unsigned long own_send_time(const struct bat_priv *bat_priv)
-{
- return jiffies + msecs_to_jiffies(
- atomic_read(&bat_priv->orig_interval) -
- JITTER + (random32() % 2*JITTER));
-}
-
-/* when do we schedule a forwarded packet to be sent */
-static unsigned long forward_send_time(void)
-{
- return jiffies + msecs_to_jiffies(random32() % (JITTER/2));
-}
-
/* send out an already prepared packet to the given address via the
* specified batman interface */
int send_skb_packet(struct sk_buff *skb, struct hard_iface *hard_iface,
@@ -99,141 +78,17 @@ send_skb_err:
return NET_XMIT_DROP;
}
-/* Send a packet to a given interface */
-static void send_packet_to_if(struct forw_packet *forw_packet,
- struct hard_iface *hard_iface)
-{
- struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
- char *fwd_str;
- uint8_t packet_num;
- int16_t buff_pos;
- struct batman_packet *batman_packet;
- struct sk_buff *skb;
-
- if (hard_iface->if_status != IF_ACTIVE)
- return;
-
- packet_num = 0;
- buff_pos = 0;
- batman_packet = (struct batman_packet *)forw_packet->skb->data;
-
- /* adjust all flags and log packets */
- while (aggregated_packet(buff_pos,
- forw_packet->packet_len,
- batman_packet->tt_num_changes)) {
-
- /* we might have aggregated direct link packets with an
- * ordinary base packet */
- if ((forw_packet->direct_link_flags & (1 << packet_num)) &&
- (forw_packet->if_incoming == hard_iface))
- batman_packet->flags |= DIRECTLINK;
- else
- batman_packet->flags &= ~DIRECTLINK;
-
- fwd_str = (packet_num > 0 ? "Forwarding" : (forw_packet->own ?
- "Sending own" :
- "Forwarding"));
- bat_dbg(DBG_BATMAN, bat_priv,
- "%s %spacket (originator %pM, seqno %d, TQ %d, TTL %d,"
- " IDF %s, hvn %d) on interface %s [%pM]\n",
- fwd_str, (packet_num > 0 ? "aggregated " : ""),
- batman_packet->orig, ntohl(batman_packet->seqno),
- batman_packet->tq, batman_packet->ttl,
- (batman_packet->flags & DIRECTLINK ?
- "on" : "off"),
- batman_packet->ttvn, hard_iface->net_dev->name,
- hard_iface->net_dev->dev_addr);
-
- buff_pos += sizeof(*batman_packet) +
- tt_len(batman_packet->tt_num_changes);
- packet_num++;
- batman_packet = (struct batman_packet *)
- (forw_packet->skb->data + buff_pos);
- }
-
- /* create clone because function is called more than once */
- skb = skb_clone(forw_packet->skb, GFP_ATOMIC);
- if (skb)
- send_skb_packet(skb, hard_iface, broadcast_addr);
-}
-
-/* send a batman packet */
-static void send_packet(struct forw_packet *forw_packet)
-{
- struct hard_iface *hard_iface;
- struct net_device *soft_iface;
- struct bat_priv *bat_priv;
- struct hard_iface *primary_if = NULL;
- struct batman_packet *batman_packet =
- (struct batman_packet *)(forw_packet->skb->data);
- int directlink = (batman_packet->flags & DIRECTLINK ? 1 : 0);
-
- if (!forw_packet->if_incoming) {
- pr_err("Error - can't forward packet: incoming iface not "
- "specified\n");
- goto out;
- }
-
- soft_iface = forw_packet->if_incoming->soft_iface;
- bat_priv = netdev_priv(soft_iface);
-
- if (forw_packet->if_incoming->if_status != IF_ACTIVE)
- goto out;
-
- primary_if = primary_if_get_selected(bat_priv);
- if (!primary_if)
- goto out;
-
- /* multihomed peer assumed */
- /* non-primary OGMs are only broadcasted on their interface */
- if ((directlink && (batman_packet->ttl == 1)) ||
- (forw_packet->own && (forw_packet->if_incoming != primary_if))) {
-
- /* FIXME: what about aggregated packets ? */
- bat_dbg(DBG_BATMAN, bat_priv,
- "%s packet (originator %pM, seqno %d, TTL %d) "
- "on interface %s [%pM]\n",
- (forw_packet->own ? "Sending own" : "Forwarding"),
- batman_packet->orig, ntohl(batman_packet->seqno),
- batman_packet->ttl,
- forw_packet->if_incoming->net_dev->name,
- forw_packet->if_incoming->net_dev->dev_addr);
-
- /* skb is only used once and than forw_packet is free'd */
- send_skb_packet(forw_packet->skb, forw_packet->if_incoming,
- broadcast_addr);
- forw_packet->skb = NULL;
-
- goto out;
- }
-
- /* broadcast on every interface */
- rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
- if (hard_iface->soft_iface != soft_iface)
- continue;
-
- send_packet_to_if(forw_packet, hard_iface);
- }
- rcu_read_unlock();
-
-out:
- if (primary_if)
- hardif_free_ref(primary_if);
-}
-
static void realloc_packet_buffer(struct hard_iface *hard_iface,
- int new_len)
+ int new_len)
{
unsigned char *new_buff;
- struct batman_packet *batman_packet;
new_buff = kmalloc(new_len, GFP_ATOMIC);
/* keep old buffer if kmalloc should fail */
if (new_buff) {
memcpy(new_buff, hard_iface->packet_buff,
- sizeof(*batman_packet));
+ BATMAN_OGM_LEN);
kfree(hard_iface->packet_buff);
hard_iface->packet_buff = new_buff;
@@ -242,60 +97,48 @@ static void realloc_packet_buffer(struct hard_iface *hard_iface,
}
/* when calling this function (hard_iface == primary_if) has to be true */
-static void prepare_packet_buffer(struct bat_priv *bat_priv,
+static int prepare_packet_buffer(struct bat_priv *bat_priv,
struct hard_iface *hard_iface)
{
int new_len;
- struct batman_packet *batman_packet;
- new_len = BAT_PACKET_LEN +
+ new_len = BATMAN_OGM_LEN +
tt_len((uint8_t)atomic_read(&bat_priv->tt_local_changes));
/* if we have too many changes for one packet don't send any
* and wait for the tt table request which will be fragmented */
if (new_len > hard_iface->soft_iface->mtu)
- new_len = BAT_PACKET_LEN;
+ new_len = BATMAN_OGM_LEN;
realloc_packet_buffer(hard_iface, new_len);
- batman_packet = (struct batman_packet *)hard_iface->packet_buff;
atomic_set(&bat_priv->tt_crc, tt_local_crc(bat_priv));
/* reset the sending counter */
atomic_set(&bat_priv->tt_ogm_append_cnt, TT_OGM_APPEND_MAX);
- batman_packet->tt_num_changes = tt_changes_fill_buffer(bat_priv,
- hard_iface->packet_buff + BAT_PACKET_LEN,
- hard_iface->packet_len - BAT_PACKET_LEN);
-
+ return tt_changes_fill_buffer(bat_priv,
+ hard_iface->packet_buff + BATMAN_OGM_LEN,
+ hard_iface->packet_len - BATMAN_OGM_LEN);
}
-static void reset_packet_buffer(struct bat_priv *bat_priv,
- struct hard_iface *hard_iface)
+static int reset_packet_buffer(struct bat_priv *bat_priv,
+ struct hard_iface *hard_iface)
{
- struct batman_packet *batman_packet;
-
- realloc_packet_buffer(hard_iface, BAT_PACKET_LEN);
-
- batman_packet = (struct batman_packet *)hard_iface->packet_buff;
- batman_packet->tt_num_changes = 0;
+ realloc_packet_buffer(hard_iface, BATMAN_OGM_LEN);
+ return 0;
}
-void schedule_own_packet(struct hard_iface *hard_iface)
+void schedule_bat_ogm(struct hard_iface *hard_iface)
{
struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
struct hard_iface *primary_if;
- unsigned long send_time;
- struct batman_packet *batman_packet;
- int vis_server;
+ int tt_num_changes = -1;
if ((hard_iface->if_status == IF_NOT_IN_USE) ||
(hard_iface->if_status == IF_TO_BE_REMOVED))
return;
- vis_server = atomic_read(&bat_priv->vis_mode);
- primary_if = primary_if_get_selected(bat_priv);
-
/**
* the interface gets activated here to avoid race conditions between
* the moment of activating the interface in
@@ -306,124 +149,26 @@ void schedule_own_packet(struct hard_iface *hard_iface)
if (hard_iface->if_status == IF_TO_BE_ACTIVATED)
hard_iface->if_status = IF_ACTIVE;
+ primary_if = primary_if_get_selected(bat_priv);
+
if (hard_iface == primary_if) {
/* if at least one change happened */
if (atomic_read(&bat_priv->tt_local_changes) > 0) {
tt_commit_changes(bat_priv);
- prepare_packet_buffer(bat_priv, hard_iface);
+ tt_num_changes = prepare_packet_buffer(bat_priv,
+ hard_iface);
}
- /* if the changes have been sent enough times */
+ /* if the changes have been sent often enough */
if (!atomic_dec_not_zero(&bat_priv->tt_ogm_append_cnt))
- reset_packet_buffer(bat_priv, hard_iface);
+ tt_num_changes = reset_packet_buffer(bat_priv,
+ hard_iface);
}
- /**
- * NOTE: packet_buff might just have been re-allocated in
- * prepare_packet_buffer() or in reset_packet_buffer()
- */
- batman_packet = (struct batman_packet *)hard_iface->packet_buff;
-
- /* change sequence number to network order */
- batman_packet->seqno =
- htonl((uint32_t)atomic_read(&hard_iface->seqno));
-
- batman_packet->ttvn = atomic_read(&bat_priv->ttvn);
- batman_packet->tt_crc = htons((uint16_t)atomic_read(&bat_priv->tt_crc));
-
- if (vis_server == VIS_TYPE_SERVER_SYNC)
- batman_packet->flags |= VIS_SERVER;
- else
- batman_packet->flags &= ~VIS_SERVER;
-
- if ((hard_iface == primary_if) &&
- (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER))
- batman_packet->gw_flags =
- (uint8_t)atomic_read(&bat_priv->gw_bandwidth);
- else
- batman_packet->gw_flags = NO_FLAGS;
-
- atomic_inc(&hard_iface->seqno);
-
- slide_own_bcast_window(hard_iface);
- send_time = own_send_time(bat_priv);
- add_bat_packet_to_list(bat_priv,
- hard_iface->packet_buff,
- hard_iface->packet_len,
- hard_iface, 1, send_time);
-
if (primary_if)
hardif_free_ref(primary_if);
-}
-
-void schedule_forward_packet(struct orig_node *orig_node,
- const struct ethhdr *ethhdr,
- struct batman_packet *batman_packet,
- int directlink,
- struct hard_iface *if_incoming)
-{
- struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
- struct neigh_node *router;
- uint8_t in_tq, in_ttl, tq_avg = 0;
- unsigned long send_time;
- uint8_t tt_num_changes;
-
- if (batman_packet->ttl <= 1) {
- bat_dbg(DBG_BATMAN, bat_priv, "ttl exceeded\n");
- return;
- }
-
- router = orig_node_get_router(orig_node);
-
- in_tq = batman_packet->tq;
- in_ttl = batman_packet->ttl;
- tt_num_changes = batman_packet->tt_num_changes;
-
- batman_packet->ttl--;
- memcpy(batman_packet->prev_sender, ethhdr->h_source, ETH_ALEN);
-
- /* rebroadcast tq of our best ranking neighbor to ensure the rebroadcast
- * of our best tq value */
- if (router && router->tq_avg != 0) {
-
- /* rebroadcast ogm of best ranking neighbor as is */
- if (!compare_eth(router->addr, ethhdr->h_source)) {
- batman_packet->tq = router->tq_avg;
-
- if (router->last_ttl)
- batman_packet->ttl = router->last_ttl - 1;
- }
-
- tq_avg = router->tq_avg;
- }
-
- if (router)
- neigh_node_free_ref(router);
-
- /* apply hop penalty */
- batman_packet->tq = hop_penalty(batman_packet->tq, bat_priv);
-
- bat_dbg(DBG_BATMAN, bat_priv,
- "Forwarding packet: tq_orig: %i, tq_avg: %i, "
- "tq_forw: %i, ttl_orig: %i, ttl_forw: %i\n",
- in_tq, tq_avg, batman_packet->tq, in_ttl - 1,
- batman_packet->ttl);
-
- batman_packet->seqno = htonl(batman_packet->seqno);
- batman_packet->tt_crc = htons(batman_packet->tt_crc);
-
- /* switch of primaries first hop flag when forwarding */
- batman_packet->flags &= ~PRIMARIES_FIRST_HOP;
- if (directlink)
- batman_packet->flags |= DIRECTLINK;
- else
- batman_packet->flags &= ~DIRECTLINK;
- send_time = forward_send_time();
- add_bat_packet_to_list(bat_priv,
- (unsigned char *)batman_packet,
- sizeof(*batman_packet) + tt_len(tt_num_changes),
- if_incoming, 0, send_time);
+ bat_ogm_schedule(hard_iface, tt_num_changes);
}
static void forw_packet_free(struct forw_packet *forw_packet)
@@ -454,7 +199,7 @@ static void _add_bcast_packet_to_list(struct bat_priv *bat_priv,
}
/* add a broadcast packet to the queue and setup timers. broadcast packets
- * are sent multiple times to increase probability for beeing received.
+ * are sent multiple times to increase probability for being received.
*
* This function returns NETDEV_TX_OK on success and NETDEV_TX_BUSY on
* errors.
@@ -557,7 +302,7 @@ out:
atomic_inc(&bat_priv->bcast_queue_left);
}
-void send_outstanding_bat_packet(struct work_struct *work)
+void send_outstanding_bat_ogm_packet(struct work_struct *work)
{
struct delayed_work *delayed_work =
container_of(work, struct delayed_work, work);
@@ -573,7 +318,7 @@ void send_outstanding_bat_packet(struct work_struct *work)
if (atomic_read(&bat_priv->mesh_state) == MESH_DEACTIVATING)
goto out;
- send_packet(forw_packet);
+ bat_ogm_emit(forw_packet);
/**
* we have to have at least one packet in the queue
@@ -581,7 +326,7 @@ void send_outstanding_bat_packet(struct work_struct *work)
* shutting down
*/
if (forw_packet->own)
- schedule_own_packet(forw_packet->if_incoming);
+ schedule_bat_ogm(forw_packet->if_incoming);
out:
/* don't count own packet */
@@ -612,7 +357,7 @@ void purge_outstanding_packets(struct bat_priv *bat_priv,
&bat_priv->forw_bcast_list, list) {
/**
- * if purge_outstanding_packets() was called with an argmument
+ * if purge_outstanding_packets() was called with an argument
* we delete only packets belonging to the given interface
*/
if ((hard_iface) &&
@@ -641,7 +386,7 @@ void purge_outstanding_packets(struct bat_priv *bat_priv,
&bat_priv->forw_bat_list, list) {
/**
- * if purge_outstanding_packets() was called with an argmument
+ * if purge_outstanding_packets() was called with an argument
* we delete only packets belonging to the given interface
*/
if ((hard_iface) &&
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 1f2d1e8..c8ca3ef 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -24,15 +24,10 @@
int send_skb_packet(struct sk_buff *skb, struct hard_iface *hard_iface,
const uint8_t *dst_addr);
-void schedule_own_packet(struct hard_iface *hard_iface);
-void schedule_forward_packet(struct orig_node *orig_node,
- const struct ethhdr *ethhdr,
- struct batman_packet *batman_packet,
- int directlink,
- struct hard_iface *if_outgoing);
+void schedule_bat_ogm(struct hard_iface *hard_iface);
int add_bcast_packet_to_list(struct bat_priv *bat_priv,
const struct sk_buff *skb, unsigned long delay);
-void send_outstanding_bat_packet(struct work_struct *work);
+void send_outstanding_bat_ogm_packet(struct work_struct *work);
void purge_outstanding_packets(struct bat_priv *bat_priv,
const struct hard_iface *hard_iface);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 3e2f91f..f9cc957 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -445,30 +445,31 @@ static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev,
{
struct bat_priv *bat_priv = netdev_priv(dev);
struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
- struct batman_packet *batman_packet;
+ struct batman_ogm_packet *batman_ogm_packet;
struct softif_neigh *softif_neigh = NULL;
struct hard_iface *primary_if = NULL;
struct softif_neigh *curr_softif_neigh = NULL;
if (ntohs(ethhdr->h_proto) == ETH_P_8021Q)
- batman_packet = (struct batman_packet *)
+ batman_ogm_packet = (struct batman_ogm_packet *)
(skb->data + ETH_HLEN + VLAN_HLEN);
else
- batman_packet = (struct batman_packet *)(skb->data + ETH_HLEN);
+ batman_ogm_packet = (struct batman_ogm_packet *)
+ (skb->data + ETH_HLEN);
- if (batman_packet->version != COMPAT_VERSION)
+ if (batman_ogm_packet->version != COMPAT_VERSION)
goto out;
- if (batman_packet->packet_type != BAT_PACKET)
+ if (batman_ogm_packet->packet_type != BAT_OGM)
goto out;
- if (!(batman_packet->flags & PRIMARIES_FIRST_HOP))
+ if (!(batman_ogm_packet->flags & PRIMARIES_FIRST_HOP))
goto out;
- if (is_my_mac(batman_packet->orig))
+ if (is_my_mac(batman_ogm_packet->orig))
goto out;
- softif_neigh = softif_neigh_get(bat_priv, batman_packet->orig, vid);
+ softif_neigh = softif_neigh_get(bat_priv, batman_ogm_packet->orig, vid);
if (!softif_neigh)
goto out;
@@ -532,11 +533,11 @@ static int interface_set_mac_addr(struct net_device *dev, void *p)
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
- /* only modify transtable if it has been initialised before */
+ /* only modify transtable if it has been initialized before */
if (atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) {
tt_local_remove(bat_priv, dev->dev_addr,
"mac address changed", false);
- tt_local_add(dev, addr->sa_data);
+ tt_local_add(dev, addr->sa_data, NULL_IFINDEX);
}
memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
@@ -565,7 +566,7 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface)
struct orig_node *orig_node = NULL;
int data_len = skb->len, ret;
short vid = -1;
- bool do_bcast = false;
+ bool do_bcast;
if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE)
goto dropped;
@@ -595,18 +596,19 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface)
goto dropped;
/* Register the client MAC in the transtable */
- tt_local_add(soft_iface, ethhdr->h_source);
+ tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif);
- orig_node = transtable_search(bat_priv, ethhdr->h_dest);
- if (is_multicast_ether_addr(ethhdr->h_dest) ||
- (orig_node && orig_node->gw_flags)) {
+ orig_node = transtable_search(bat_priv, ethhdr->h_source,
+ ethhdr->h_dest);
+ do_bcast = is_multicast_ether_addr(ethhdr->h_dest);
+ if (do_bcast || (orig_node && orig_node->gw_flags)) {
ret = gw_is_target(bat_priv, skb, orig_node);
if (ret < 0)
goto dropped;
- if (ret == 0)
- do_bcast = true;
+ if (ret)
+ do_bcast = false;
}
/* ethernet packet should be broadcasted */
@@ -739,6 +741,9 @@ void interface_rx(struct net_device *soft_iface,
soft_iface->last_rx = jiffies;
+ if (is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest))
+ goto dropped;
+
netif_rx(skb);
goto out;
@@ -796,10 +801,8 @@ struct net_device *softif_create(const char *name)
soft_iface = alloc_netdev(sizeof(*bat_priv), name, interface_setup);
- if (!soft_iface) {
- pr_err("Unable to allocate the batman interface: %s\n", name);
+ if (!soft_iface)
goto out;
- }
ret = register_netdevice(soft_iface);
if (ret < 0) {
@@ -812,6 +815,7 @@ struct net_device *softif_create(const char *name)
atomic_set(&bat_priv->aggregated_ogms, 1);
atomic_set(&bat_priv->bonding, 0);
+ atomic_set(&bat_priv->ap_isolation, 0);
atomic_set(&bat_priv->vis_mode, VIS_TYPE_CLIENT_UPDATE);
atomic_set(&bat_priv->gw_mode, GW_MODE_OFF);
atomic_set(&bat_priv->gw_sel_class, 20);
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index fb6931d..cc53f78 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -183,7 +183,8 @@ static int tt_local_init(struct bat_priv *bat_priv)
return 1;
}
-void tt_local_add(struct net_device *soft_iface, const uint8_t *addr)
+void tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
+ int ifindex)
{
struct bat_priv *bat_priv = netdev_priv(soft_iface);
struct tt_local_entry *tt_local_entry = NULL;
@@ -207,6 +208,8 @@ void tt_local_add(struct net_device *soft_iface, const uint8_t *addr)
memcpy(tt_local_entry->addr, addr, ETH_ALEN);
tt_local_entry->last_seen = jiffies;
tt_local_entry->flags = NO_FLAGS;
+ if (is_wifi_iface(ifindex))
+ tt_local_entry->flags |= TT_CLIENT_WIFI;
atomic_set(&tt_local_entry->refcount, 2);
/* the batman interface mac address should never be purged */
@@ -329,7 +332,7 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset)
rcu_read_lock();
__hlist_for_each_rcu(node, head)
- buf_size += 21;
+ buf_size += 29;
rcu_read_unlock();
}
@@ -348,8 +351,19 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset)
rcu_read_lock();
hlist_for_each_entry_rcu(tt_local_entry, node,
head, hash_entry) {
- pos += snprintf(buff + pos, 22, " * %pM\n",
- tt_local_entry->addr);
+ pos += snprintf(buff + pos, 30, " * %pM "
+ "[%c%c%c%c%c]\n",
+ tt_local_entry->addr,
+ (tt_local_entry->flags &
+ TT_CLIENT_ROAM ? 'R' : '.'),
+ (tt_local_entry->flags &
+ TT_CLIENT_NOPURGE ? 'P' : '.'),
+ (tt_local_entry->flags &
+ TT_CLIENT_NEW ? 'N' : '.'),
+ (tt_local_entry->flags &
+ TT_CLIENT_PENDING ? 'X' : '.'),
+ (tt_local_entry->flags &
+ TT_CLIENT_WIFI ? 'W' : '.'));
}
rcu_read_unlock();
}
@@ -369,8 +383,8 @@ static void tt_local_set_pending(struct bat_priv *bat_priv,
tt_local_event(bat_priv, tt_local_entry->addr,
tt_local_entry->flags | flags);
- /* The local client has to be merked as "pending to be removed" but has
- * to be kept in the table in order to send it in an full tables
+ /* The local client has to be marked as "pending to be removed" but has
+ * to be kept in the table in order to send it in a full table
* response issued before the net ttvn increment (consistency check) */
tt_local_entry->flags |= TT_CLIENT_PENDING;
}
@@ -495,7 +509,8 @@ static void tt_changes_list_free(struct bat_priv *bat_priv)
/* caller must hold orig_node refcount */
int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node,
- const unsigned char *tt_addr, uint8_t ttvn, bool roaming)
+ const unsigned char *tt_addr, uint8_t ttvn, bool roaming,
+ bool wifi)
{
struct tt_global_entry *tt_global_entry;
struct orig_node *orig_node_tmp;
@@ -537,6 +552,9 @@ int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node,
tt_global_entry->roam_at = 0;
}
+ if (wifi)
+ tt_global_entry->flags |= TT_CLIENT_WIFI;
+
bat_dbg(DBG_TT, bat_priv,
"Creating new global tt entry: %pM (via %pM)\n",
tt_global_entry->addr, orig_node->orig);
@@ -582,8 +600,8 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq,
"Globally announced TT entries received via the mesh %s\n",
net_dev->name);
- seq_printf(seq, " %-13s %s %-15s %s\n",
- "Client", "(TTVN)", "Originator", "(Curr TTVN)");
+ seq_printf(seq, " %-13s %s %-15s %s %s\n",
+ "Client", "(TTVN)", "Originator", "(Curr TTVN)", "Flags");
buf_size = 1;
/* Estimate length for: " * xx:xx:xx:xx:xx:xx (ttvn) via
@@ -593,7 +611,7 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset)
rcu_read_lock();
__hlist_for_each_rcu(node, head)
- buf_size += 59;
+ buf_size += 67;
rcu_read_unlock();
}
@@ -612,14 +630,20 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset)
rcu_read_lock();
hlist_for_each_entry_rcu(tt_global_entry, node,
head, hash_entry) {
- pos += snprintf(buff + pos, 61,
- " * %pM (%3u) via %pM (%3u)\n",
- tt_global_entry->addr,
+ pos += snprintf(buff + pos, 69,
+ " * %pM (%3u) via %pM (%3u) "
+ "[%c%c%c]\n", tt_global_entry->addr,
tt_global_entry->ttvn,
tt_global_entry->orig_node->orig,
(uint8_t) atomic_read(
&tt_global_entry->orig_node->
- last_ttvn));
+ last_ttvn),
+ (tt_global_entry->flags &
+ TT_CLIENT_ROAM ? 'R' : '.'),
+ (tt_global_entry->flags &
+ TT_CLIENT_PENDING ? 'X' : '.'),
+ (tt_global_entry->flags &
+ TT_CLIENT_WIFI ? 'W' : '.'));
}
rcu_read_unlock();
}
@@ -774,30 +798,56 @@ static void tt_global_table_free(struct bat_priv *bat_priv)
bat_priv->tt_global_hash = NULL;
}
+static bool _is_ap_isolated(struct tt_local_entry *tt_local_entry,
+ struct tt_global_entry *tt_global_entry)
+{
+ bool ret = false;
+
+ if (tt_local_entry->flags & TT_CLIENT_WIFI &&
+ tt_global_entry->flags & TT_CLIENT_WIFI)
+ ret = true;
+
+ return ret;
+}
+
struct orig_node *transtable_search(struct bat_priv *bat_priv,
- const uint8_t *addr)
+ const uint8_t *src, const uint8_t *addr)
{
- struct tt_global_entry *tt_global_entry;
+ struct tt_local_entry *tt_local_entry = NULL;
+ struct tt_global_entry *tt_global_entry = NULL;
struct orig_node *orig_node = NULL;
- tt_global_entry = tt_global_hash_find(bat_priv, addr);
+ if (src && atomic_read(&bat_priv->ap_isolation)) {
+ tt_local_entry = tt_local_hash_find(bat_priv, src);
+ if (!tt_local_entry)
+ goto out;
+ }
+ tt_global_entry = tt_global_hash_find(bat_priv, addr);
if (!tt_global_entry)
goto out;
+ /* check whether the clients should not communicate due to AP
+ * isolation */
+ if (tt_local_entry && _is_ap_isolated(tt_local_entry, tt_global_entry))
+ goto out;
+
if (!atomic_inc_not_zero(&tt_global_entry->orig_node->refcount))
- goto free_tt;
+ goto out;
/* A global client marked as PENDING has already moved from that
* originator */
if (tt_global_entry->flags & TT_CLIENT_PENDING)
- goto free_tt;
+ goto out;
orig_node = tt_global_entry->orig_node;
-free_tt:
- tt_global_entry_free_ref(tt_global_entry);
out:
+ if (tt_global_entry)
+ tt_global_entry_free_ref(tt_global_entry);
+ if (tt_local_entry)
+ tt_local_entry_free_ref(tt_local_entry);
+
return orig_node;
}
@@ -1029,8 +1079,9 @@ out:
return skb;
}
-int send_tt_request(struct bat_priv *bat_priv, struct orig_node *dst_orig_node,
- uint8_t ttvn, uint16_t tt_crc, bool full_table)
+static int send_tt_request(struct bat_priv *bat_priv,
+ struct orig_node *dst_orig_node,
+ uint8_t ttvn, uint16_t tt_crc, bool full_table)
{
struct sk_buff *skb = NULL;
struct tt_query_packet *tt_request;
@@ -1137,12 +1188,12 @@ static bool send_other_tt_response(struct bat_priv *bat_priv,
orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn);
req_ttvn = tt_request->ttvn;
- /* I have not the requested data */
+ /* I don't have the requested data */
if (orig_ttvn != req_ttvn ||
tt_request->tt_data != req_dst_orig_node->tt_crc)
goto out;
- /* If it has explicitly been requested the full table */
+ /* If the full table has been explicitly requested */
if (tt_request->flags & TT_FULL_TABLE ||
!req_dst_orig_node->tt_buff)
full_table = true;
@@ -1363,7 +1414,9 @@ static void _tt_update_changes(struct bat_priv *bat_priv,
(tt_change + i)->flags & TT_CLIENT_ROAM);
else
if (!tt_global_add(bat_priv, orig_node,
- (tt_change + i)->addr, ttvn, false))
+ (tt_change + i)->addr, ttvn, false,
+ (tt_change + i)->flags &
+ TT_CLIENT_WIFI))
/* In case of problem while storing a
* global_entry, we stop the updating
* procedure without committing the
@@ -1403,9 +1456,10 @@ out:
orig_node_free_ref(orig_node);
}
-void tt_update_changes(struct bat_priv *bat_priv, struct orig_node *orig_node,
- uint16_t tt_num_changes, uint8_t ttvn,
- struct tt_change *tt_change)
+static void tt_update_changes(struct bat_priv *bat_priv,
+ struct orig_node *orig_node,
+ uint16_t tt_num_changes, uint8_t ttvn,
+ struct tt_change *tt_change)
{
_tt_update_changes(bat_priv, orig_node, tt_change, tt_num_changes,
ttvn);
@@ -1720,3 +1774,90 @@ void tt_commit_changes(struct bat_priv *bat_priv)
atomic_inc(&bat_priv->ttvn);
bat_priv->tt_poss_change = false;
}
+
+bool is_ap_isolated(struct bat_priv *bat_priv, uint8_t *src, uint8_t *dst)
+{
+ struct tt_local_entry *tt_local_entry = NULL;
+ struct tt_global_entry *tt_global_entry = NULL;
+ bool ret = true;
+
+ if (!atomic_read(&bat_priv->ap_isolation))
+ return false;
+
+ tt_local_entry = tt_local_hash_find(bat_priv, dst);
+ if (!tt_local_entry)
+ goto out;
+
+ tt_global_entry = tt_global_hash_find(bat_priv, src);
+ if (!tt_global_entry)
+ goto out;
+
+ if (_is_ap_isolated(tt_local_entry, tt_global_entry))
+ goto out;
+
+ ret = false;
+
+out:
+ if (tt_global_entry)
+ tt_global_entry_free_ref(tt_global_entry);
+ if (tt_local_entry)
+ tt_local_entry_free_ref(tt_local_entry);
+ return ret;
+}
+
+void tt_update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node,
+ const unsigned char *tt_buff, uint8_t tt_num_changes,
+ uint8_t ttvn, uint16_t tt_crc)
+{
+ uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
+ bool full_table = true;
+
+ /* the ttvn increased by one -> we can apply the attached changes */
+ if (ttvn - orig_ttvn == 1) {
+ /* the OGM could not contain the changes due to their size or
+ * because they have already been sent TT_OGM_APPEND_MAX times.
+ * In this case send a tt request */
+ if (!tt_num_changes) {
+ full_table = false;
+ goto request_table;
+ }
+
+ tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn,
+ (struct tt_change *)tt_buff);
+
+ /* Even if we received the precomputed crc with the OGM, we
+ * prefer to recompute it to spot any possible inconsistency
+ * in the global table */
+ orig_node->tt_crc = tt_global_crc(bat_priv, orig_node);
+
+ /* The ttvn alone is not enough to guarantee consistency
+ * because a single value could represent different states
+ * (due to the wrap around). Thus a node has to check whether
+ * the resulting table (after applying the changes) is still
+ * consistent or not. E.g. a node could disconnect while its
+ * ttvn is X and reconnect on ttvn = X + TTVN_MAX: in this case
+ * checking the CRC value is mandatory to detect the
+ * inconsistency */
+ if (orig_node->tt_crc != tt_crc)
+ goto request_table;
+
+ /* Roaming phase is over: tables are in sync again. I can
+ * unset the flag */
+ orig_node->tt_poss_change = false;
+ } else {
+ /* if we missed more than one change or our tables are not
+ * in sync anymore -> request fresh tt data */
+ if (ttvn != orig_ttvn || orig_node->tt_crc != tt_crc) {
+request_table:
+ bat_dbg(DBG_TT, bat_priv, "TT inconsistency for %pM. "
+ "Need to retrieve the correct information "
+ "(ttvn: %u last_ttvn: %u crc: %u last_crc: "
+ "%u num_changes: %u)\n", orig_node->orig, ttvn,
+ orig_ttvn, tt_crc, orig_node->tt_crc,
+ tt_num_changes);
+ send_tt_request(bat_priv, orig_node, ttvn, tt_crc,
+ full_table);
+ return;
+ }
+ }
+}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index d4122cb..30efd49 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -26,15 +26,16 @@ int tt_len(int changes_num);
int tt_changes_fill_buffer(struct bat_priv *bat_priv,
unsigned char *buff, int buff_len);
int tt_init(struct bat_priv *bat_priv);
-void tt_local_add(struct net_device *soft_iface, const uint8_t *addr);
+void tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
+ int ifindex);
void tt_local_remove(struct bat_priv *bat_priv,
const uint8_t *addr, const char *message, bool roaming);
int tt_local_seq_print_text(struct seq_file *seq, void *offset);
void tt_global_add_orig(struct bat_priv *bat_priv, struct orig_node *orig_node,
const unsigned char *tt_buff, int tt_buff_len);
-int tt_global_add(struct bat_priv *bat_priv,
- struct orig_node *orig_node, const unsigned char *addr,
- uint8_t ttvn, bool roaming);
+int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node,
+ const unsigned char *addr, uint8_t ttvn, bool roaming,
+ bool wifi);
int tt_global_seq_print_text(struct seq_file *seq, void *offset);
void tt_global_del_orig(struct bat_priv *bat_priv,
struct orig_node *orig_node, const char *message);
@@ -42,25 +43,23 @@ void tt_global_del(struct bat_priv *bat_priv,
struct orig_node *orig_node, const unsigned char *addr,
const char *message, bool roaming);
struct orig_node *transtable_search(struct bat_priv *bat_priv,
- const uint8_t *addr);
+ const uint8_t *src, const uint8_t *addr);
void tt_save_orig_buffer(struct bat_priv *bat_priv, struct orig_node *orig_node,
const unsigned char *tt_buff, uint8_t tt_num_changes);
uint16_t tt_local_crc(struct bat_priv *bat_priv);
uint16_t tt_global_crc(struct bat_priv *bat_priv, struct orig_node *orig_node);
void tt_free(struct bat_priv *bat_priv);
-int send_tt_request(struct bat_priv *bat_priv,
- struct orig_node *dst_orig_node, uint8_t hvn,
- uint16_t tt_crc, bool full_table);
bool send_tt_response(struct bat_priv *bat_priv,
struct tt_query_packet *tt_request);
-void tt_update_changes(struct bat_priv *bat_priv, struct orig_node *orig_node,
- uint16_t tt_num_changes, uint8_t ttvn,
- struct tt_change *tt_change);
bool is_my_client(struct bat_priv *bat_priv, const uint8_t *addr);
void handle_tt_response(struct bat_priv *bat_priv,
struct tt_query_packet *tt_response);
void send_roam_adv(struct bat_priv *bat_priv, uint8_t *client,
struct orig_node *orig_node);
void tt_commit_changes(struct bat_priv *bat_priv);
+bool is_ap_isolated(struct bat_priv *bat_priv, uint8_t *src, uint8_t *dst);
+void tt_update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node,
+ const unsigned char *tt_buff, uint8_t tt_num_changes,
+ uint8_t ttvn, uint16_t tt_crc);
#endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 25bd1db..1ae3557 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -57,7 +57,7 @@ struct hard_iface {
* @batman_seqno_reset: time when the batman seqno window was reset
* @gw_flags: flags related to gateway class
* @flags: for now only VIS_SERVER flag
- * @last_real_seqno: last and best known squence number
+ * @last_real_seqno: last and best known sequence number
* @last_ttl: ttl of last received packet
* @last_bcast_seqno: last broadcast sequence number received by this host
*
@@ -146,6 +146,7 @@ struct bat_priv {
atomic_t aggregated_ogms; /* boolean */
atomic_t bonding; /* boolean */
atomic_t fragmentation; /* boolean */
+ atomic_t ap_isolation; /* boolean */
atomic_t vis_mode; /* VIS_TYPE_* */
atomic_t gw_mode; /* GW_MODE_* */
atomic_t gw_sel_class; /* uint */
@@ -156,7 +157,7 @@ struct bat_priv {
atomic_t bcast_seqno;
atomic_t bcast_queue_left;
atomic_t batman_queue_left;
- atomic_t ttvn; /* tranlation table version number */
+ atomic_t ttvn; /* translation table version number */
atomic_t tt_ogm_append_cnt;
atomic_t tt_local_changes; /* changes registered in a OGM interval */
/* The tt_poss_change flag is used to detect an ongoing roaming phase.
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index 32b125f..07d1c1d 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -299,8 +299,10 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv)
goto find_router;
}
- /* check for tt host - increases orig_node refcount */
- orig_node = transtable_search(bat_priv, ethhdr->h_dest);
+ /* check for tt host - increases orig_node refcount.
+ * returns NULL in case of AP isolation */
+ orig_node = transtable_search(bat_priv, ethhdr->h_source,
+ ethhdr->h_dest);
find_router:
/**
diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h
index 62f54b9..8fd5535 100644
--- a/net/batman-adv/unicast.h
+++ b/net/batman-adv/unicast.h
@@ -24,7 +24,7 @@
#include "packet.h"
-#define FRAG_TIMEOUT 10000 /* purge frag list entrys after time in ms */
+#define FRAG_TIMEOUT 10000 /* purge frag list entries after time in ms */
#define FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */
int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 8a1b985..f81a6b6 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -131,7 +131,7 @@ static void vis_data_insert_interface(const uint8_t *interface,
return;
}
- /* its a new address, add it to the list */
+ /* it's a new address, add it to the list */
entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry)
return;
@@ -465,7 +465,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv,
/* try to add it */
hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose,
info, &info->hash_entry);
- if (hash_added < 0) {
+ if (hash_added != 0) {
/* did not work (for some reason) */
kref_put(&info->refcount, free_info);
info = NULL;
@@ -887,10 +887,8 @@ int vis_init(struct bat_priv *bat_priv)
}
bat_priv->my_vis_info = kmalloc(MAX_VIS_PACKET_SIZE, GFP_ATOMIC);
- if (!bat_priv->my_vis_info) {
- pr_err("Can't initialize vis packet\n");
+ if (!bat_priv->my_vis_info)
goto err;
- }
bat_priv->my_vis_info->skb_packet = dev_alloc_skb(sizeof(*packet) +
MAX_VIS_PACKET_SIZE +
@@ -920,7 +918,7 @@ int vis_init(struct bat_priv *bat_priv)
hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose,
bat_priv->my_vis_info,
&bat_priv->my_vis_info->hash_entry);
- if (hash_added < 0) {
+ if (hash_added != 0) {
pr_err("Can't add own vis packet into hash\n");
/* not in hash, need to remove it manually. */
kref_put(&bat_priv->my_vis_info->refcount, free_info);
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index d4f5dff..bc40864 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -217,7 +217,7 @@ static const struct net_device_ops bnep_netdev_ops = {
.ndo_stop = bnep_net_close,
.ndo_start_xmit = bnep_net_xmit,
.ndo_validate_addr = eth_validate_addr,
- .ndo_set_multicast_list = bnep_net_set_mc_list,
+ .ndo_set_rx_mode = bnep_net_set_mc_list,
.ndo_set_mac_address = bnep_net_set_mac_addr,
.ndo_tx_timeout = bnep_net_timeout,
.ndo_change_mtu = eth_change_mtu,
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 32b8f9f..feb77ea 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -91,7 +91,6 @@ static int br_dev_open(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
- netif_carrier_off(dev);
netdev_update_features(dev);
netif_start_queue(dev);
br_stp_enable_bridge(br);
@@ -108,8 +107,6 @@ static int br_dev_stop(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
- netif_carrier_off(dev);
-
br_stp_disable_bridge(br);
br_multicast_stop(br);
@@ -304,7 +301,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_start_xmit = br_dev_xmit,
.ndo_get_stats64 = br_get_stats64,
.ndo_set_mac_address = br_set_mac_address,
- .ndo_set_multicast_list = br_dev_set_multicast_list,
+ .ndo_set_rx_mode = br_dev_set_multicast_list,
.ndo_change_mtu = br_change_mtu,
.ndo_do_ioctl = br_dev_ioctl,
#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -361,6 +358,8 @@ void br_dev_setup(struct net_device *dev)
memcpy(br->group_addr, br_group_address, ETH_ALEN);
br->stp_enabled = BR_NO_STP;
+ br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
+
br->designated_root = br->bridge_id;
br->bridge_max_age = br->max_age = 20 * HZ;
br->bridge_hello_time = br->hello_time = 2 * HZ;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 68def3b..c8e7861 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -558,19 +558,28 @@ skip:
/* Create new static fdb entry */
static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
- __u16 state)
+ __u16 state, __u16 flags)
{
struct net_bridge *br = source->br;
struct hlist_head *head = &br->hash[br_mac_hash(addr)];
struct net_bridge_fdb_entry *fdb;
fdb = fdb_find(head, addr);
- if (fdb)
- return -EEXIST;
+ if (fdb == NULL) {
+ if (!(flags & NLM_F_CREATE))
+ return -ENOENT;
- fdb = fdb_create(head, source, addr);
- if (!fdb)
- return -ENOMEM;
+ fdb = fdb_create(head, source, addr);
+ if (!fdb)
+ return -ENOMEM;
+ } else {
+ if (flags & NLM_F_EXCL)
+ return -EEXIST;
+
+ if (flags & NLM_F_REPLACE)
+ fdb->updated = fdb->used = jiffies;
+ fdb->is_local = fdb->is_static = 0;
+ }
if (state & NUD_PERMANENT)
fdb->is_local = fdb->is_static = 1;
@@ -626,7 +635,7 @@ int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
}
spin_lock_bh(&p->br->hash_lock);
- err = fdb_add_entry(p, addr, ndm->ndm_state);
+ err = fdb_add_entry(p, addr, ndm->ndm_state, nlh->nlmsg_flags);
spin_unlock_bh(&p->br->hash_lock);
return err;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 3176e2e..c3b77dc 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
#include <linux/netpoll.h>
#include <linux/ethtool.h>
#include <linux/if_arp.h>
@@ -33,20 +34,18 @@
*/
static int port_cost(struct net_device *dev)
{
- if (dev->ethtool_ops && dev->ethtool_ops->get_settings) {
- struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET, };
-
- if (!dev_ethtool_get_settings(dev, &ecmd)) {
- switch (ethtool_cmd_speed(&ecmd)) {
- case SPEED_10000:
- return 2;
- case SPEED_1000:
- return 4;
- case SPEED_100:
- return 19;
- case SPEED_10:
- return 100;
- }
+ struct ethtool_cmd ecmd;
+
+ if (!__ethtool_get_settings(dev, &ecmd)) {
+ switch (ethtool_cmd_speed(&ecmd)) {
+ case SPEED_10000:
+ return 2;
+ case SPEED_1000:
+ return 4;
+ case SPEED_100:
+ return 19;
+ case SPEED_10:
+ return 100;
}
}
@@ -231,6 +230,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
int br_add_bridge(struct net *net, const char *name)
{
struct net_device *dev;
+ int res;
dev = alloc_netdev(sizeof(struct net_bridge), name,
br_dev_setup);
@@ -240,7 +240,10 @@ int br_add_bridge(struct net *net, const char *name)
dev_net_set(dev, net);
- return register_netdev(dev);
+ res = register_netdev(dev);
+ if (res)
+ free_netdev(dev);
+ return res;
}
int br_del_bridge(struct net *net, const char *name)
@@ -320,7 +323,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
/* Don't allow bridging non-ethernet like devices */
if ((dev->flags & IFF_LOOPBACK) ||
- dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN)
+ dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
+ !is_valid_ether_addr(dev->dev_addr))
return -EINVAL;
/* No bridging of bridges */
@@ -348,10 +352,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
SYSFS_BRIDGE_PORT_ATTR);
if (err)
- goto err0;
-
- err = br_fdb_insert(br, p, dev->dev_addr);
- if (err)
goto err1;
err = br_sysfs_addif(p);
@@ -392,6 +392,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
dev_set_mtu(br->dev, br_min_mtu(br));
+ if (br_fdb_insert(br, p, dev->dev_addr))
+ netdev_err(dev, "failed insert local address bridge forwarding table\n");
+
kobject_uevent(&p->kobj, KOBJ_ADD);
return 0;
@@ -401,11 +404,9 @@ err4:
err3:
sysfs_remove_link(br->ifobj, p->dev->name);
err2:
- br_fdb_delete_by_port(br, p, 1);
-err1:
kobject_put(&p->kobj);
p = NULL; /* kobject_put frees */
-err0:
+err1:
dev_set_promiscuity(dev, -1);
put_back:
dev_put(dev);
@@ -417,6 +418,7 @@ put_back:
int br_del_if(struct net_bridge *br, struct net_device *dev)
{
struct net_bridge_port *p;
+ bool changed_addr;
p = br_port_get_rtnl(dev);
if (!p || p->br != br)
@@ -425,9 +427,12 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
del_nbp(p);
spin_lock_bh(&br->lock);
- br_stp_recalculate_bridge_id(br);
+ changed_addr = br_stp_recalculate_bridge_id(br);
spin_unlock_bh(&br->lock);
+ if (changed_addr)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
+
netdev_update_features(br->dev);
return 0;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index f06ee39..6f9f8c0 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -162,14 +162,37 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
p = br_port_get_rcu(skb->dev);
if (unlikely(is_link_local(dest))) {
- /* Pause frames shouldn't be passed up by driver anyway */
- if (skb->protocol == htons(ETH_P_PAUSE))
+ /*
+ * See IEEE 802.1D Table 7-10 Reserved addresses
+ *
+ * Assignment Value
+ * Bridge Group Address 01-80-C2-00-00-00
+ * (MAC Control) 802.3 01-80-C2-00-00-01
+ * (Link Aggregation) 802.3 01-80-C2-00-00-02
+ * 802.1X PAE address 01-80-C2-00-00-03
+ *
+ * 802.1AB LLDP 01-80-C2-00-00-0E
+ *
+ * Others reserved for future standardization
+ */
+ switch (dest[5]) {
+ case 0x00: /* Bridge Group Address */
+ /* If STP is turned off,
+ then must forward to keep loop detection */
+ if (p->br->stp_enabled == BR_NO_STP)
+ goto forward;
+ break;
+
+ case 0x01: /* IEEE MAC (Pause) */
goto drop;
- /* If STP is turned off, then forward */
- if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0)
- goto forward;
+ default:
+ /* Allow selective forwarding for most other protocols */
+ if (p->br->group_fwd_mask & (1u << dest[5]))
+ goto forward;
+ }
+ /* Deliver packet to local host only */
if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
NULL, br_handle_local_finish)) {
return RX_HANDLER_CONSUMED; /* consumed by filter */
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 2d85ca7..995cbe0 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1456,7 +1456,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
{
struct sk_buff *skb2;
const struct ipv6hdr *ip6h;
- struct icmp6hdr *icmp6h;
+ u8 icmp6_type;
u8 nexthdr;
unsigned len;
int offset;
@@ -1502,9 +1502,9 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
__skb_pull(skb2, offset);
skb_reset_transport_header(skb2);
- icmp6h = icmp6_hdr(skb2);
+ icmp6_type = icmp6_hdr(skb2)->icmp6_type;
- switch (icmp6h->icmp6_type) {
+ switch (icmp6_type) {
case ICMPV6_MGM_QUERY:
case ICMPV6_MGM_REPORT:
case ICMPV6_MGM_REDUCTION:
@@ -1520,16 +1520,23 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
err = pskb_trim_rcsum(skb2, len);
if (err)
goto out;
+ err = -EINVAL;
}
+ ip6h = ipv6_hdr(skb2);
+
switch (skb2->ip_summed) {
case CHECKSUM_COMPLETE:
- if (!csum_fold(skb2->csum))
+ if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb2->len,
+ IPPROTO_ICMPV6, skb2->csum))
break;
/*FALLTHROUGH*/
case CHECKSUM_NONE:
- skb2->csum = 0;
- if (skb_checksum_complete(skb2))
+ skb2->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
+ &ip6h->daddr,
+ skb2->len,
+ IPPROTO_ICMPV6, 0));
+ if (__skb_checksum_complete(skb2))
goto out;
}
@@ -1537,7 +1544,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
BR_INPUT_SKB_CB(skb)->igmp = 1;
- switch (icmp6h->icmp6_type) {
+ switch (icmp6_type) {
case ICMPV6_MGM_REPORT:
{
struct mld_msg *mld;
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 6545ee9..a76b621 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -34,6 +34,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
struct net_device *dev = ptr;
struct net_bridge_port *p;
struct net_bridge *br;
+ bool changed_addr;
int err;
/* register of bridge completed, add sysfs entries */
@@ -57,8 +58,12 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
case NETDEV_CHANGEADDR:
spin_lock_bh(&br->lock);
br_fdb_changeaddr(p, dev->dev_addr);
- br_stp_recalculate_bridge_id(br);
+ changed_addr = br_stp_recalculate_bridge_id(br);
spin_unlock_bh(&br->lock);
+
+ if (changed_addr)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
+
break;
case NETDEV_CHANGE:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 78cc364..a248fe6 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -29,6 +29,11 @@
#define BR_VERSION "2.3"
+/* Control of forwarding link local multicast */
+#define BR_GROUPFWD_DEFAULT 0
+/* Don't allow forwarding control protocols like STP and LLDP */
+#define BR_GROUPFWD_RESTRICTED 0x4007u
+
/* Path to usermode spanning tree program */
#define BR_STP_PROG "/sbin/bridge-stp"
@@ -193,6 +198,8 @@ struct net_bridge
unsigned long flags;
#define BR_SET_MAC_ADDR 0x00000001
+ u16 group_fwd_mask;
+
/* STP */
bridge_id designated_root;
bridge_id bridge_id;
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 68b893e..c236c0e 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -149,6 +149,39 @@ static ssize_t store_stp_state(struct device *d,
static DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state,
store_stp_state);
+static ssize_t show_group_fwd_mask(struct device *d,
+ struct device_attribute *attr, char *buf)
+{
+ struct net_bridge *br = to_bridge(d);
+ return sprintf(buf, "%#x\n", br->group_fwd_mask);
+}
+
+
+static ssize_t store_group_fwd_mask(struct device *d,
+ struct device_attribute *attr, const char *buf,
+ size_t len)
+{
+ struct net_bridge *br = to_bridge(d);
+ char *endp;
+ unsigned long val;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ val = simple_strtoul(buf, &endp, 0);
+ if (endp == buf)
+ return -EINVAL;
+
+ if (val & BR_GROUPFWD_RESTRICTED)
+ return -EINVAL;
+
+ br->group_fwd_mask = val;
+
+ return len;
+}
+static DEVICE_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask,
+ store_group_fwd_mask);
+
static ssize_t show_priority(struct device *d, struct device_attribute *attr,
char *buf)
{
@@ -652,6 +685,7 @@ static struct attribute *bridge_attrs[] = {
&dev_attr_max_age.attr,
&dev_attr_ageing_time.attr,
&dev_attr_stp_state.attr,
+ &dev_attr_group_fwd_mask.attr,
&dev_attr_priority.attr,
&dev_attr_bridge_id.attr,
&dev_attr_root_id.attr,
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index ba6f73e..a9aff9c 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -4,7 +4,7 @@
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
- depends on BRIDGE && BRIDGE_NETFILTER
+ depends on BRIDGE && NETFILTER
select NETFILTER_XTABLES
help
ebtables is a general, extensible frame/packet identification
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 1bcaf36a..40d8258 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -87,14 +87,14 @@ static int __init ebtable_broute_init(void)
if (ret < 0)
return ret;
/* see br_input.c */
- rcu_assign_pointer(br_should_route_hook,
+ RCU_INIT_POINTER(br_should_route_hook,
(br_should_route_hook_t *)ebt_broute);
return 0;
}
static void __exit ebtable_broute_fini(void)
{
- rcu_assign_pointer(br_should_route_hook, NULL);
+ RCU_INIT_POINTER(br_should_route_hook, NULL);
synchronize_net();
unregister_pernet_subsys(&broute_net_ops);
}
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 2b5ca1a..5864cc4 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1198,7 +1198,8 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table)
if (table->check && table->check(newinfo, table->valid_hooks)) {
BUGPRINT("The table doesn't like its own initial data, lol\n");
- return ERR_PTR(-EINVAL);
+ ret = -EINVAL;
+ goto free_chainstack;
}
table->private = newinfo;
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 7c2fa0a..7f9ac07 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -93,10 +93,14 @@ static struct caif_device_entry *caif_device_alloc(struct net_device *dev)
caifdevs = caif_device_list(dev_net(dev));
BUG_ON(!caifdevs);
- caifd = kzalloc(sizeof(*caifd), GFP_ATOMIC);
+ caifd = kzalloc(sizeof(*caifd), GFP_KERNEL);
if (!caifd)
return NULL;
caifd->pcpu_refcnt = alloc_percpu(int);
+ if (!caifd->pcpu_refcnt) {
+ kfree(caifd);
+ return NULL;
+ }
caifd->netdev = dev;
dev_hold(dev);
return caifd;
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 52fe33b..00523ec 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -78,10 +78,8 @@ struct cfcnfg *cfcnfg_create(void)
/* Initiate this layer */
this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
- if (!this) {
- pr_warn("Out of memory\n");
+ if (!this)
return NULL;
- }
this->mux = cfmuxl_create();
if (!this->mux)
goto out_of_mem;
@@ -108,8 +106,6 @@ struct cfcnfg *cfcnfg_create(void)
return this;
out_of_mem:
- pr_warn("Out of memory\n");
-
synchronize_rcu();
kfree(this->mux);
@@ -448,10 +444,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
"- unknown channel type\n");
goto unlock;
}
- if (!servicel) {
- pr_warn("Out of memory\n");
+ if (!servicel)
goto unlock;
- }
layer_set_dn(servicel, cnfg->mux);
cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id);
layer_set_up(servicel, adapt_layer);
@@ -473,7 +467,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
{
struct cflayer *frml;
struct cflayer *phy_driver = NULL;
- struct cfcnfg_phyinfo *phyinfo;
+ struct cfcnfg_phyinfo *phyinfo = NULL;
int i;
u8 phyid;
@@ -488,25 +482,25 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
goto got_phyid;
}
pr_warn("Too many CAIF Link Layers (max 6)\n");
- goto out;
+ goto out_err;
got_phyid:
phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC);
+ if (!phyinfo)
+ goto out_err;
switch (phy_type) {
case CFPHYTYPE_FRAG:
phy_driver =
cfserl_create(CFPHYTYPE_FRAG, phyid, stx);
- if (!phy_driver) {
- pr_warn("Out of memory\n");
- goto out;
- }
+ if (!phy_driver)
+ goto out_err;
break;
case CFPHYTYPE_CAIF:
phy_driver = NULL;
break;
default:
- goto out;
+ goto out_err;
}
phy_layer->id = phyid;
phyinfo->pref = pref;
@@ -520,11 +514,8 @@ got_phyid:
frml = cffrml_create(phyid, fcs);
- if (!frml) {
- pr_warn("Out of memory\n");
- kfree(phyinfo);
- goto out;
- }
+ if (!frml)
+ goto out_err;
phyinfo->frm_layer = frml;
layer_set_up(frml, cnfg->mux);
@@ -540,7 +531,12 @@ got_phyid:
}
list_add_rcu(&phyinfo->node, &cnfg->phys);
-out:
+ mutex_unlock(&cnfg->lock);
+ return;
+
+out_err:
+ kfree(phy_driver);
+ kfree(phyinfo);
mutex_unlock(&cnfg->lock);
}
EXPORT_SYMBOL(cfcnfg_add_phy_layer);
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index e22671b..5cf5222 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -35,15 +35,12 @@ struct cflayer *cfctrl_create(void)
{
struct dev_info dev_info;
struct cfctrl *this =
- kmalloc(sizeof(struct cfctrl), GFP_ATOMIC);
- if (!this) {
- pr_warn("Out of memory\n");
+ kzalloc(sizeof(struct cfctrl), GFP_ATOMIC);
+ if (!this)
return NULL;
- }
caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
memset(&dev_info, 0, sizeof(dev_info));
dev_info.id = 0xff;
- memset(this, 0, sizeof(*this));
cfsrvl_init(&this->serv, 0, &dev_info, false);
atomic_set(&this->req_seq_no, 1);
atomic_set(&this->rsp_seq_no, 1);
@@ -180,10 +177,8 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
struct cfctrl *cfctrl = container_obj(layer);
struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
struct cflayer *dn = cfctrl->serv.layer.dn;
- if (!pkt) {
- pr_warn("Out of memory\n");
+ if (!pkt)
return;
- }
if (!dn) {
pr_debug("not able to send enum request\n");
return;
@@ -224,10 +219,8 @@ int cfctrl_linkup_request(struct cflayer *layer,
}
pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
- if (!pkt) {
- pr_warn("Out of memory\n");
+ if (!pkt)
return -ENOMEM;
- }
cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP);
cfpkt_addbdy(pkt, (param->chtype << 4) | param->linktype);
cfpkt_addbdy(pkt, (param->priority << 3) | param->phyid);
@@ -275,10 +268,8 @@ int cfctrl_linkup_request(struct cflayer *layer,
return -EINVAL;
}
req = kzalloc(sizeof(*req), GFP_KERNEL);
- if (!req) {
- pr_warn("Out of memory\n");
+ if (!req)
return -ENOMEM;
- }
req->client_layer = user_layer;
req->cmd = CFCTRL_CMD_LINK_SETUP;
req->param = *param;
@@ -312,10 +303,8 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
struct cflayer *dn = cfctrl->serv.layer.dn;
- if (!pkt) {
- pr_warn("Out of memory\n");
+ if (!pkt)
return -ENOMEM;
- }
if (!dn) {
pr_debug("not able to send link-down request\n");
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 11a2af4..65d6ef3 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -19,13 +19,10 @@ static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt);
struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
{
- struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
- if (!dbg) {
- pr_warn("Out of memory\n");
+ struct cfsrvl *dbg = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+ if (!dbg)
return NULL;
- }
caif_assert(offsetof(struct cfsrvl, layer) == 0);
- memset(dbg, 0, sizeof(struct cfsrvl));
cfsrvl_init(dbg, channel_id, dev_info, false);
dbg->layer.receive = cfdbgl_receive;
dbg->layer.transmit = cfdbgl_transmit;
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index 0382dec..0f5ff27 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -26,13 +26,10 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt);
struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
{
- struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
- if (!dgm) {
- pr_warn("Out of memory\n");
+ struct cfsrvl *dgm = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+ if (!dgm)
return NULL;
- }
caif_assert(offsetof(struct cfsrvl, layer) == 0);
- memset(dgm, 0, sizeof(struct cfsrvl));
cfsrvl_init(dgm, channel_id, dev_info, true);
dgm->layer.receive = cfdgml_receive;
dgm->layer.transmit = cfdgml_transmit;
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index 04204b2..f399211 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -34,11 +34,9 @@ static u32 cffrml_rcv_error;
static u32 cffrml_rcv_checsum_error;
struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
{
- struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC);
- if (!this) {
- pr_warn("Out of memory\n");
+ struct cffrml *this = kzalloc(sizeof(struct cffrml), GFP_ATOMIC);
+ if (!this)
return NULL;
- }
this->pcpu_refcnt = alloc_percpu(int);
if (this->pcpu_refcnt == NULL) {
kfree(this);
@@ -47,7 +45,6 @@ struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
caif_assert(offsetof(struct cffrml, layer) == 0);
- memset(this, 0, sizeof(struct cflayer));
this->layer.receive = cffrml_receive;
this->layer.transmit = cffrml_transmit;
this->layer.ctrlcmd = cffrml_ctrlcmd;
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index c23979e..b36f24a 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -108,7 +108,7 @@ struct cflayer *cfmuxl_remove_dnlayer(struct cflayer *layr, u8 phyid)
int idx = phyid % DN_CACHE_SIZE;
spin_lock_bh(&muxl->transmit_lock);
- rcu_assign_pointer(muxl->dn_cache[idx], NULL);
+ RCU_INIT_POINTER(muxl->dn_cache[idx], NULL);
dn = get_from_id(&muxl->frml_list, phyid);
if (dn == NULL)
goto out;
@@ -164,7 +164,7 @@ struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id)
if (up == NULL)
goto out;
- rcu_assign_pointer(muxl->up_cache[idx], NULL);
+ RCU_INIT_POINTER(muxl->up_cache[idx], NULL);
list_del_rcu(&up->node);
out:
spin_unlock_bh(&muxl->receive_lock);
@@ -261,7 +261,7 @@ static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
idx = layer->id % UP_CACHE_SIZE;
spin_lock_bh(&muxl->receive_lock);
- rcu_assign_pointer(muxl->up_cache[idx], NULL);
+ RCU_INIT_POINTER(muxl->up_cache[idx], NULL);
list_del_rcu(&layer->node);
spin_unlock_bh(&muxl->receive_lock);
}
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 0deabb4..81660f8 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -46,13 +46,10 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
int mtu_size)
{
int tmp;
- struct cfrfml *this =
- kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
+ struct cfrfml *this = kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
- if (!this) {
- pr_warn("Out of memory\n");
+ if (!this)
return NULL;
- }
cfsrvl_init(&this->serv, channel_id, dev_info, false);
this->serv.release = cfrfml_release;
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 2715c84..797c8d1 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -33,13 +33,10 @@ static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
struct cflayer *cfserl_create(int type, int instance, bool use_stx)
{
- struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC);
- if (!this) {
- pr_warn("Out of memory\n");
+ struct cfserl *this = kzalloc(sizeof(struct cfserl), GFP_ATOMIC);
+ if (!this)
return NULL;
- }
caif_assert(offsetof(struct cfserl, layer) == 0);
- memset(this, 0, sizeof(struct cfserl));
this->layer.receive = cfserl_receive;
this->layer.transmit = cfserl_transmit;
this->layer.ctrlcmd = cfserl_ctrlcmd;
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 535a1e7..b99f5b2 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -108,10 +108,8 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
struct caif_payload_info *info;
u8 flow_on = SRVL_FLOW_ON;
pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
- if (!pkt) {
- pr_warn("Out of memory\n");
+ if (!pkt)
return -ENOMEM;
- }
if (cfpkt_add_head(pkt, &flow_on, 1) < 0) {
pr_err("Packet is erroneous!\n");
@@ -130,10 +128,8 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
struct caif_payload_info *info;
u8 flow_off = SRVL_FLOW_OFF;
pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
- if (!pkt) {
- pr_warn("Out of memory\n");
+ if (!pkt)
return -ENOMEM;
- }
if (cfpkt_add_head(pkt, &flow_off, 1) < 0) {
pr_err("Packet is erroneous!\n");
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 98e027d..53e49f3 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -26,13 +26,10 @@ static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt);
struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
{
- struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
- if (!util) {
- pr_warn("Out of memory\n");
+ struct cfsrvl *util = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+ if (!util)
return NULL;
- }
caif_assert(offsetof(struct cfsrvl, layer) == 0);
- memset(util, 0, sizeof(struct cfsrvl));
cfsrvl_init(util, channel_id, dev_info, true);
util->layer.receive = cfutill_receive;
util->layer.transmit = cfutill_transmit;
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 3ec83fb..910ab06 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -25,13 +25,10 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt);
struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
{
- struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
- if (!vei) {
- pr_warn("Out of memory\n");
+ struct cfsrvl *vei = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+ if (!vei)
return NULL;
- }
caif_assert(offsetof(struct cfsrvl, layer) == 0);
- memset(vei, 0, sizeof(struct cfsrvl));
cfsrvl_init(vei, channel_id, dev_info, true);
vei->layer.receive = cfvei_receive;
vei->layer.transmit = cfvei_transmit;
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index b2f5989..e3f37db 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -21,14 +21,11 @@ static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt);
struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
{
- struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
- if (!vid) {
- pr_warn("Out of memory\n");
+ struct cfsrvl *vid = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+ if (!vid)
return NULL;
- }
caif_assert(offsetof(struct cfsrvl, layer) == 0);
- memset(vid, 0, sizeof(struct cfsrvl));
cfsrvl_init(vid, channel_id, dev_info, false);
vid->layer.receive = cfvidl_receive;
vid->layer.transmit = cfvidl_transmit;
diff --git a/net/can/Kconfig b/net/can/Kconfig
index 89395b2..0320069 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -40,5 +40,16 @@ config CAN_BCM
CAN messages are used on the bus (e.g. in automotive environments).
To use the Broadcast Manager, use AF_CAN with protocol CAN_BCM.
+config CAN_GW
+ tristate "CAN Gateway/Router (with netlink configuration)"
+ depends on CAN
+ default N
+ ---help---
+ The CAN Gateway/Router is used to route (and modify) CAN frames.
+ It is based on the PF_CAN core infrastructure for msg filtering and
+ msg sending and can optionally modify routed CAN frames on the fly.
+ CAN frames can be routed between CAN network interfaces (one hop).
+ They can be modified with AND/OR/XOR/SET operations as configured
+ by the netlink configuration interface known e.g. from iptables.
source "drivers/net/can/Kconfig"
diff --git a/net/can/Makefile b/net/can/Makefile
index 2d3894b3..cef49eb 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -10,3 +10,6 @@ can-raw-y := raw.o
obj-$(CONFIG_CAN_BCM) += can-bcm.o
can-bcm-y := bcm.o
+
+obj-$(CONFIG_CAN_GW) += can-gw.o
+can-gw-y := gw.o
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 8ce926d..d1ff515 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -719,7 +719,7 @@ int can_proto_register(const struct can_proto *cp)
proto);
err = -EBUSY;
} else
- rcu_assign_pointer(proto_tab[proto], cp);
+ RCU_INIT_POINTER(proto_tab[proto], cp);
mutex_unlock(&proto_tab_lock);
@@ -740,7 +740,7 @@ void can_proto_unregister(const struct can_proto *cp)
mutex_lock(&proto_tab_lock);
BUG_ON(proto_tab[proto] != cp);
- rcu_assign_pointer(proto_tab[proto], NULL);
+ RCU_INIT_POINTER(proto_tab[proto], NULL);
mutex_unlock(&proto_tab_lock);
synchronize_rcu();
@@ -857,7 +857,7 @@ static __exit void can_exit(void)
struct net_device *dev;
if (stats_timer)
- del_timer(&can_stattimer);
+ del_timer_sync(&can_stattimer);
can_remove_proc();
diff --git a/net/can/bcm.c b/net/can/bcm.c
index d6c8ae5..c84963d 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -344,6 +344,18 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
}
}
+static void bcm_tx_start_timer(struct bcm_op *op)
+{
+ if (op->kt_ival1.tv64 && op->count)
+ hrtimer_start(&op->timer,
+ ktime_add(ktime_get(), op->kt_ival1),
+ HRTIMER_MODE_ABS);
+ else if (op->kt_ival2.tv64)
+ hrtimer_start(&op->timer,
+ ktime_add(ktime_get(), op->kt_ival2),
+ HRTIMER_MODE_ABS);
+}
+
static void bcm_tx_timeout_tsklet(unsigned long data)
{
struct bcm_op *op = (struct bcm_op *)data;
@@ -365,26 +377,12 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
bcm_send_to_user(op, &msg_head, NULL, 0);
}
- }
-
- if (op->kt_ival1.tv64 && (op->count > 0)) {
-
- /* send (next) frame */
bcm_can_tx(op);
- hrtimer_start(&op->timer,
- ktime_add(ktime_get(), op->kt_ival1),
- HRTIMER_MODE_ABS);
- } else {
- if (op->kt_ival2.tv64) {
+ } else if (op->kt_ival2.tv64)
+ bcm_can_tx(op);
- /* send (next) frame */
- bcm_can_tx(op);
- hrtimer_start(&op->timer,
- ktime_add(ktime_get(), op->kt_ival2),
- HRTIMER_MODE_ABS);
- }
- }
+ bcm_tx_start_timer(op);
}
/*
@@ -964,23 +962,20 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
hrtimer_cancel(&op->timer);
}
- if ((op->flags & STARTTIMER) &&
- ((op->kt_ival1.tv64 && op->count) || op->kt_ival2.tv64)) {
-
+ if (op->flags & STARTTIMER) {
+ hrtimer_cancel(&op->timer);
/* spec: send can_frame when starting timer */
op->flags |= TX_ANNOUNCE;
-
- if (op->kt_ival1.tv64 && (op->count > 0)) {
- /* op->count-- is done in bcm_tx_timeout_handler */
- hrtimer_start(&op->timer, op->kt_ival1,
- HRTIMER_MODE_REL);
- } else
- hrtimer_start(&op->timer, op->kt_ival2,
- HRTIMER_MODE_REL);
}
- if (op->flags & TX_ANNOUNCE)
+ if (op->flags & TX_ANNOUNCE) {
bcm_can_tx(op);
+ if (op->count)
+ op->count--;
+ }
+
+ if (op->flags & STARTTIMER)
+ bcm_tx_start_timer(op);
return msg_head->nframes * CFSIZ + MHSIZ;
}
diff --git a/net/can/gw.c b/net/can/gw.c
new file mode 100644
index 0000000..ac11407
--- /dev/null
+++ b/net/can/gw.c
@@ -0,0 +1,959 @@
+/*
+ * gw.c - CAN frame Gateway/Router/Bridge with netlink interface
+ *
+ * Copyright (c) 2011 Volkswagen Group Electronic Research
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Volkswagen nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * The provided data structures and external interfaces from this code
+ * are not restricted to be used by modules with a GPL compatible license.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Send feedback to <socketcan-users@lists.berlios.de>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <linux/can.h>
+#include <linux/can/core.h>
+#include <linux/can/gw.h>
+#include <net/rtnetlink.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+#define CAN_GW_VERSION "20101209"
+static __initdata const char banner[] =
+ KERN_INFO "can: netlink gateway (rev " CAN_GW_VERSION ")\n";
+
+MODULE_DESCRIPTION("PF_CAN netlink gateway");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>");
+MODULE_ALIAS("can-gw");
+
+HLIST_HEAD(cgw_list);
+static struct notifier_block notifier;
+
+static struct kmem_cache *cgw_cache __read_mostly;
+
+/* structure that contains the (on-the-fly) CAN frame modifications */
+struct cf_mod {
+ struct {
+ struct can_frame and;
+ struct can_frame or;
+ struct can_frame xor;
+ struct can_frame set;
+ } modframe;
+ struct {
+ u8 and;
+ u8 or;
+ u8 xor;
+ u8 set;
+ } modtype;
+ void (*modfunc[MAX_MODFUNCTIONS])(struct can_frame *cf,
+ struct cf_mod *mod);
+
+ /* CAN frame checksum calculation after CAN frame modifications */
+ struct {
+ struct cgw_csum_xor xor;
+ struct cgw_csum_crc8 crc8;
+ } csum;
+ struct {
+ void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor);
+ void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8);
+ } csumfunc;
+};
+
+
+/*
+ * So far we just support CAN -> CAN routing and frame modifications.
+ *
+ * The internal can_can_gw structure contains data and attributes for
+ * a CAN -> CAN gateway job.
+ */
+struct can_can_gw {
+ struct can_filter filter;
+ int src_idx;
+ int dst_idx;
+};
+
+/* list entry for CAN gateways jobs */
+struct cgw_job {
+ struct hlist_node list;
+ struct rcu_head rcu;
+ u32 handled_frames;
+ u32 dropped_frames;
+ struct cf_mod mod;
+ union {
+ /* CAN frame data source */
+ struct net_device *dev;
+ } src;
+ union {
+ /* CAN frame data destination */
+ struct net_device *dev;
+ } dst;
+ union {
+ struct can_can_gw ccgw;
+ /* tbc */
+ };
+ u8 gwtype;
+ u16 flags;
+};
+
+/* modification functions that are invoked in the hot path in can_can_gw_rcv */
+
+#define MODFUNC(func, op) static void func(struct can_frame *cf, \
+ struct cf_mod *mod) { op ; }
+
+MODFUNC(mod_and_id, cf->can_id &= mod->modframe.and.can_id)
+MODFUNC(mod_and_dlc, cf->can_dlc &= mod->modframe.and.can_dlc)
+MODFUNC(mod_and_data, *(u64 *)cf->data &= *(u64 *)mod->modframe.and.data)
+MODFUNC(mod_or_id, cf->can_id |= mod->modframe.or.can_id)
+MODFUNC(mod_or_dlc, cf->can_dlc |= mod->modframe.or.can_dlc)
+MODFUNC(mod_or_data, *(u64 *)cf->data |= *(u64 *)mod->modframe.or.data)
+MODFUNC(mod_xor_id, cf->can_id ^= mod->modframe.xor.can_id)
+MODFUNC(mod_xor_dlc, cf->can_dlc ^= mod->modframe.xor.can_dlc)
+MODFUNC(mod_xor_data, *(u64 *)cf->data ^= *(u64 *)mod->modframe.xor.data)
+MODFUNC(mod_set_id, cf->can_id = mod->modframe.set.can_id)
+MODFUNC(mod_set_dlc, cf->can_dlc = mod->modframe.set.can_dlc)
+MODFUNC(mod_set_data, *(u64 *)cf->data = *(u64 *)mod->modframe.set.data)
+
+static inline void canframecpy(struct can_frame *dst, struct can_frame *src)
+{
+ /*
+ * Copy the struct members separately to ensure that no uninitialized
+ * data are copied in the 3 bytes hole of the struct. This is needed
+ * to make easy compares of the data in the struct cf_mod.
+ */
+
+ dst->can_id = src->can_id;
+ dst->can_dlc = src->can_dlc;
+ *(u64 *)dst->data = *(u64 *)src->data;
+}
+
+static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re)
+{
+ /*
+ * absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0]
+ * relative to received dlc -1 .. -8 :
+ * e.g. for received dlc = 8
+ * -1 => index = 7 (data[7])
+ * -3 => index = 5 (data[5])
+ * -8 => index = 0 (data[0])
+ */
+
+ if (fr > -9 && fr < 8 &&
+ to > -9 && to < 8 &&
+ re > -9 && re < 8)
+ return 0;
+ else
+ return -EINVAL;
+}
+
+static inline int calc_idx(int idx, int rx_dlc)
+{
+ if (idx < 0)
+ return rx_dlc + idx;
+ else
+ return idx;
+}
+
+static void cgw_csum_xor_rel(struct can_frame *cf, struct cgw_csum_xor *xor)
+{
+ int from = calc_idx(xor->from_idx, cf->can_dlc);
+ int to = calc_idx(xor->to_idx, cf->can_dlc);
+ int res = calc_idx(xor->result_idx, cf->can_dlc);
+ u8 val = xor->init_xor_val;
+ int i;
+
+ if (from < 0 || to < 0 || res < 0)
+ return;
+
+ if (from <= to) {
+ for (i = from; i <= to; i++)
+ val ^= cf->data[i];
+ } else {
+ for (i = from; i >= to; i--)
+ val ^= cf->data[i];
+ }
+
+ cf->data[res] = val;
+}
+
+static void cgw_csum_xor_pos(struct can_frame *cf, struct cgw_csum_xor *xor)
+{
+ u8 val = xor->init_xor_val;
+ int i;
+
+ for (i = xor->from_idx; i <= xor->to_idx; i++)
+ val ^= cf->data[i];
+
+ cf->data[xor->result_idx] = val;
+}
+
+static void cgw_csum_xor_neg(struct can_frame *cf, struct cgw_csum_xor *xor)
+{
+ u8 val = xor->init_xor_val;
+ int i;
+
+ for (i = xor->from_idx; i >= xor->to_idx; i--)
+ val ^= cf->data[i];
+
+ cf->data[xor->result_idx] = val;
+}
+
+static void cgw_csum_crc8_rel(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
+{
+ int from = calc_idx(crc8->from_idx, cf->can_dlc);
+ int to = calc_idx(crc8->to_idx, cf->can_dlc);
+ int res = calc_idx(crc8->result_idx, cf->can_dlc);
+ u8 crc = crc8->init_crc_val;
+ int i;
+
+ if (from < 0 || to < 0 || res < 0)
+ return;
+
+ if (from <= to) {
+ for (i = crc8->from_idx; i <= crc8->to_idx; i++)
+ crc = crc8->crctab[crc^cf->data[i]];
+ } else {
+ for (i = crc8->from_idx; i >= crc8->to_idx; i--)
+ crc = crc8->crctab[crc^cf->data[i]];
+ }
+
+ switch (crc8->profile) {
+
+ case CGW_CRC8PRF_1U8:
+ crc = crc8->crctab[crc^crc8->profile_data[0]];
+ break;
+
+ case CGW_CRC8PRF_16U8:
+ crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]];
+ break;
+
+ case CGW_CRC8PRF_SFFID_XOR:
+ crc = crc8->crctab[crc^(cf->can_id & 0xFF)^
+ (cf->can_id >> 8 & 0xFF)];
+ break;
+
+ }
+
+ cf->data[crc8->result_idx] = crc^crc8->final_xor_val;
+}
+
+static void cgw_csum_crc8_pos(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
+{
+ u8 crc = crc8->init_crc_val;
+ int i;
+
+ for (i = crc8->from_idx; i <= crc8->to_idx; i++)
+ crc = crc8->crctab[crc^cf->data[i]];
+
+ switch (crc8->profile) {
+
+ case CGW_CRC8PRF_1U8:
+ crc = crc8->crctab[crc^crc8->profile_data[0]];
+ break;
+
+ case CGW_CRC8PRF_16U8:
+ crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]];
+ break;
+
+ case CGW_CRC8PRF_SFFID_XOR:
+ crc = crc8->crctab[crc^(cf->can_id & 0xFF)^
+ (cf->can_id >> 8 & 0xFF)];
+ break;
+ }
+
+ cf->data[crc8->result_idx] = crc^crc8->final_xor_val;
+}
+
+static void cgw_csum_crc8_neg(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
+{
+ u8 crc = crc8->init_crc_val;
+ int i;
+
+ for (i = crc8->from_idx; i >= crc8->to_idx; i--)
+ crc = crc8->crctab[crc^cf->data[i]];
+
+ switch (crc8->profile) {
+
+ case CGW_CRC8PRF_1U8:
+ crc = crc8->crctab[crc^crc8->profile_data[0]];
+ break;
+
+ case CGW_CRC8PRF_16U8:
+ crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]];
+ break;
+
+ case CGW_CRC8PRF_SFFID_XOR:
+ crc = crc8->crctab[crc^(cf->can_id & 0xFF)^
+ (cf->can_id >> 8 & 0xFF)];
+ break;
+ }
+
+ cf->data[crc8->result_idx] = crc^crc8->final_xor_val;
+}
+
+/* the receive & process & send function */
+static void can_can_gw_rcv(struct sk_buff *skb, void *data)
+{
+ struct cgw_job *gwj = (struct cgw_job *)data;
+ struct can_frame *cf;
+ struct sk_buff *nskb;
+ int modidx = 0;
+
+ /* do not handle already routed frames - see comment below */
+ if (skb_mac_header_was_set(skb))
+ return;
+
+ if (!(gwj->dst.dev->flags & IFF_UP)) {
+ gwj->dropped_frames++;
+ return;
+ }
+
+ /*
+ * clone the given skb, which has not been done in can_rcv()
+ *
+ * When there is at least one modification function activated,
+ * we need to copy the skb as we want to modify skb->data.
+ */
+ if (gwj->mod.modfunc[0])
+ nskb = skb_copy(skb, GFP_ATOMIC);
+ else
+ nskb = skb_clone(skb, GFP_ATOMIC);
+
+ if (!nskb) {
+ gwj->dropped_frames++;
+ return;
+ }
+
+ /*
+ * Mark routed frames by setting some mac header length which is
+ * not relevant for the CAN frames located in the skb->data section.
+ *
+ * As dev->header_ops is not set in CAN netdevices no one is ever
+ * accessing the various header offsets in the CAN skbuffs anyway.
+ * E.g. using the packet socket to read CAN frames is still working.
+ */
+ skb_set_mac_header(nskb, 8);
+ nskb->dev = gwj->dst.dev;
+
+ /* pointer to modifiable CAN frame */
+ cf = (struct can_frame *)nskb->data;
+
+ /* perform preprocessed modification functions if there are any */
+ while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx])
+ (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod);
+
+ /* check for checksum updates when the CAN frame has been modified */
+ if (modidx) {
+ if (gwj->mod.csumfunc.crc8)
+ (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8);
+
+ if (gwj->mod.csumfunc.xor)
+ (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor);
+ }
+
+ /* clear the skb timestamp if not configured the other way */
+ if (!(gwj->flags & CGW_FLAGS_CAN_SRC_TSTAMP))
+ nskb->tstamp.tv64 = 0;
+
+ /* send to netdevice */
+ if (can_send(nskb, gwj->flags & CGW_FLAGS_CAN_ECHO))
+ gwj->dropped_frames++;
+ else
+ gwj->handled_frames++;
+}
+
+static inline int cgw_register_filter(struct cgw_job *gwj)
+{
+ return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id,
+ gwj->ccgw.filter.can_mask, can_can_gw_rcv,
+ gwj, "gw");
+}
+
+static inline void cgw_unregister_filter(struct cgw_job *gwj)
+{
+ can_rx_unregister(gwj->src.dev, gwj->ccgw.filter.can_id,
+ gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj);
+}
+
+static int cgw_notifier(struct notifier_block *nb,
+ unsigned long msg, void *data)
+{
+ struct net_device *dev = (struct net_device *)data;
+
+ if (!net_eq(dev_net(dev), &init_net))
+ return NOTIFY_DONE;
+ if (dev->type != ARPHRD_CAN)
+ return NOTIFY_DONE;
+
+ if (msg == NETDEV_UNREGISTER) {
+
+ struct cgw_job *gwj = NULL;
+ struct hlist_node *n, *nx;
+
+ ASSERT_RTNL();
+
+ hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) {
+
+ if (gwj->src.dev == dev || gwj->dst.dev == dev) {
+ hlist_del(&gwj->list);
+ cgw_unregister_filter(gwj);
+ kfree(gwj);
+ }
+ }
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj)
+{
+ struct cgw_frame_mod mb;
+ struct rtcanmsg *rtcan;
+ struct nlmsghdr *nlh = nlmsg_put(skb, 0, 0, 0, sizeof(*rtcan), 0);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ rtcan = nlmsg_data(nlh);
+ rtcan->can_family = AF_CAN;
+ rtcan->gwtype = gwj->gwtype;
+ rtcan->flags = gwj->flags;
+
+ /* add statistics if available */
+
+ if (gwj->handled_frames) {
+ if (nla_put_u32(skb, CGW_HANDLED, gwj->handled_frames) < 0)
+ goto cancel;
+ else
+ nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32));
+ }
+
+ if (gwj->dropped_frames) {
+ if (nla_put_u32(skb, CGW_DROPPED, gwj->dropped_frames) < 0)
+ goto cancel;
+ else
+ nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32));
+ }
+
+ /* check non default settings of attributes */
+
+ if (gwj->mod.modtype.and) {
+ memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
+ mb.modtype = gwj->mod.modtype.and;
+ if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0)
+ goto cancel;
+ else
+ nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb));
+ }
+
+ if (gwj->mod.modtype.or) {
+ memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf));
+ mb.modtype = gwj->mod.modtype.or;
+ if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0)
+ goto cancel;
+ else
+ nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb));
+ }
+
+ if (gwj->mod.modtype.xor) {
+ memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf));
+ mb.modtype = gwj->mod.modtype.xor;
+ if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0)
+ goto cancel;
+ else
+ nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb));
+ }
+
+ if (gwj->mod.modtype.set) {
+ memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf));
+ mb.modtype = gwj->mod.modtype.set;
+ if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0)
+ goto cancel;
+ else
+ nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb));
+ }
+
+ if (gwj->mod.csumfunc.crc8) {
+ if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN,
+ &gwj->mod.csum.crc8) < 0)
+ goto cancel;
+ else
+ nlh->nlmsg_len += NLA_HDRLEN + \
+ NLA_ALIGN(CGW_CS_CRC8_LEN);
+ }
+
+ if (gwj->mod.csumfunc.xor) {
+ if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN,
+ &gwj->mod.csum.xor) < 0)
+ goto cancel;
+ else
+ nlh->nlmsg_len += NLA_HDRLEN + \
+ NLA_ALIGN(CGW_CS_XOR_LEN);
+ }
+
+ if (gwj->gwtype == CGW_TYPE_CAN_CAN) {
+
+ if (gwj->ccgw.filter.can_id || gwj->ccgw.filter.can_mask) {
+ if (nla_put(skb, CGW_FILTER, sizeof(struct can_filter),
+ &gwj->ccgw.filter) < 0)
+ goto cancel;
+ else
+ nlh->nlmsg_len += NLA_HDRLEN +
+ NLA_ALIGN(sizeof(struct can_filter));
+ }
+
+ if (nla_put_u32(skb, CGW_SRC_IF, gwj->ccgw.src_idx) < 0)
+ goto cancel;
+ else
+ nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32));
+
+ if (nla_put_u32(skb, CGW_DST_IF, gwj->ccgw.dst_idx) < 0)
+ goto cancel;
+ else
+ nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32));
+ }
+
+ return skb->len;
+
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+/* Dump information about all CAN gateway jobs, in response to RTM_GETROUTE */
+static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct cgw_job *gwj = NULL;
+ struct hlist_node *n;
+ int idx = 0;
+ int s_idx = cb->args[0];
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(gwj, n, &cgw_list, list) {
+ if (idx < s_idx)
+ goto cont;
+
+ if (cgw_put_job(skb, gwj) < 0)
+ break;
+cont:
+ idx++;
+ }
+ rcu_read_unlock();
+
+ cb->args[0] = idx;
+
+ return skb->len;
+}
+
+/* check for common and gwtype specific attributes */
+static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
+ u8 gwtype, void *gwtypeattr)
+{
+ struct nlattr *tb[CGW_MAX+1];
+ struct cgw_frame_mod mb;
+ int modidx = 0;
+ int err = 0;
+
+ /* initialize modification & checksum data space */
+ memset(mod, 0, sizeof(*mod));
+
+ err = nlmsg_parse(nlh, sizeof(struct rtcanmsg), tb, CGW_MAX, NULL);
+ if (err < 0)
+ return err;
+
+ /* check for AND/OR/XOR/SET modifications */
+
+ if (tb[CGW_MOD_AND] &&
+ nla_len(tb[CGW_MOD_AND]) == CGW_MODATTR_LEN) {
+ nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN);
+
+ canframecpy(&mod->modframe.and, &mb.cf);
+ mod->modtype.and = mb.modtype;
+
+ if (mb.modtype & CGW_MOD_ID)
+ mod->modfunc[modidx++] = mod_and_id;
+
+ if (mb.modtype & CGW_MOD_DLC)
+ mod->modfunc[modidx++] = mod_and_dlc;
+
+ if (mb.modtype & CGW_MOD_DATA)
+ mod->modfunc[modidx++] = mod_and_data;
+ }
+
+ if (tb[CGW_MOD_OR] &&
+ nla_len(tb[CGW_MOD_OR]) == CGW_MODATTR_LEN) {
+ nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN);
+
+ canframecpy(&mod->modframe.or, &mb.cf);
+ mod->modtype.or = mb.modtype;
+
+ if (mb.modtype & CGW_MOD_ID)
+ mod->modfunc[modidx++] = mod_or_id;
+
+ if (mb.modtype & CGW_MOD_DLC)
+ mod->modfunc[modidx++] = mod_or_dlc;
+
+ if (mb.modtype & CGW_MOD_DATA)
+ mod->modfunc[modidx++] = mod_or_data;
+ }
+
+ if (tb[CGW_MOD_XOR] &&
+ nla_len(tb[CGW_MOD_XOR]) == CGW_MODATTR_LEN) {
+ nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN);
+
+ canframecpy(&mod->modframe.xor, &mb.cf);
+ mod->modtype.xor = mb.modtype;
+
+ if (mb.modtype & CGW_MOD_ID)
+ mod->modfunc[modidx++] = mod_xor_id;
+
+ if (mb.modtype & CGW_MOD_DLC)
+ mod->modfunc[modidx++] = mod_xor_dlc;
+
+ if (mb.modtype & CGW_MOD_DATA)
+ mod->modfunc[modidx++] = mod_xor_data;
+ }
+
+ if (tb[CGW_MOD_SET] &&
+ nla_len(tb[CGW_MOD_SET]) == CGW_MODATTR_LEN) {
+ nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN);
+
+ canframecpy(&mod->modframe.set, &mb.cf);
+ mod->modtype.set = mb.modtype;
+
+ if (mb.modtype & CGW_MOD_ID)
+ mod->modfunc[modidx++] = mod_set_id;
+
+ if (mb.modtype & CGW_MOD_DLC)
+ mod->modfunc[modidx++] = mod_set_dlc;
+
+ if (mb.modtype & CGW_MOD_DATA)
+ mod->modfunc[modidx++] = mod_set_data;
+ }
+
+ /* check for checksum operations after CAN frame modifications */
+ if (modidx) {
+
+ if (tb[CGW_CS_CRC8] &&
+ nla_len(tb[CGW_CS_CRC8]) == CGW_CS_CRC8_LEN) {
+
+ struct cgw_csum_crc8 *c = (struct cgw_csum_crc8 *)\
+ nla_data(tb[CGW_CS_CRC8]);
+
+ err = cgw_chk_csum_parms(c->from_idx, c->to_idx,
+ c->result_idx);
+ if (err)
+ return err;
+
+ nla_memcpy(&mod->csum.crc8, tb[CGW_CS_CRC8],
+ CGW_CS_CRC8_LEN);
+
+ /*
+ * select dedicated processing function to reduce
+ * runtime operations in receive hot path.
+ */
+ if (c->from_idx < 0 || c->to_idx < 0 ||
+ c->result_idx < 0)
+ mod->csumfunc.crc8 = cgw_csum_crc8_rel;
+ else if (c->from_idx <= c->to_idx)
+ mod->csumfunc.crc8 = cgw_csum_crc8_pos;
+ else
+ mod->csumfunc.crc8 = cgw_csum_crc8_neg;
+ }
+
+ if (tb[CGW_CS_XOR] &&
+ nla_len(tb[CGW_CS_XOR]) == CGW_CS_XOR_LEN) {
+
+ struct cgw_csum_xor *c = (struct cgw_csum_xor *)\
+ nla_data(tb[CGW_CS_XOR]);
+
+ err = cgw_chk_csum_parms(c->from_idx, c->to_idx,
+ c->result_idx);
+ if (err)
+ return err;
+
+ nla_memcpy(&mod->csum.xor, tb[CGW_CS_XOR],
+ CGW_CS_XOR_LEN);
+
+ /*
+ * select dedicated processing function to reduce
+ * runtime operations in receive hot path.
+ */
+ if (c->from_idx < 0 || c->to_idx < 0 ||
+ c->result_idx < 0)
+ mod->csumfunc.xor = cgw_csum_xor_rel;
+ else if (c->from_idx <= c->to_idx)
+ mod->csumfunc.xor = cgw_csum_xor_pos;
+ else
+ mod->csumfunc.xor = cgw_csum_xor_neg;
+ }
+ }
+
+ if (gwtype == CGW_TYPE_CAN_CAN) {
+
+ /* check CGW_TYPE_CAN_CAN specific attributes */
+
+ struct can_can_gw *ccgw = (struct can_can_gw *)gwtypeattr;
+ memset(ccgw, 0, sizeof(*ccgw));
+
+ /* check for can_filter in attributes */
+ if (tb[CGW_FILTER] &&
+ nla_len(tb[CGW_FILTER]) == sizeof(struct can_filter))
+ nla_memcpy(&ccgw->filter, tb[CGW_FILTER],
+ sizeof(struct can_filter));
+
+ err = -ENODEV;
+
+ /* specifying two interfaces is mandatory */
+ if (!tb[CGW_SRC_IF] || !tb[CGW_DST_IF])
+ return err;
+
+ if (nla_len(tb[CGW_SRC_IF]) == sizeof(u32))
+ nla_memcpy(&ccgw->src_idx, tb[CGW_SRC_IF],
+ sizeof(u32));
+
+ if (nla_len(tb[CGW_DST_IF]) == sizeof(u32))
+ nla_memcpy(&ccgw->dst_idx, tb[CGW_DST_IF],
+ sizeof(u32));
+
+ /* both indices set to 0 for flushing all routing entries */
+ if (!ccgw->src_idx && !ccgw->dst_idx)
+ return 0;
+
+ /* only one index set to 0 is an error */
+ if (!ccgw->src_idx || !ccgw->dst_idx)
+ return err;
+ }
+
+ /* add the checks for other gwtypes here */
+
+ return 0;
+}
+
+static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh,
+ void *arg)
+{
+ struct rtcanmsg *r;
+ struct cgw_job *gwj;
+ int err = 0;
+
+ if (nlmsg_len(nlh) < sizeof(*r))
+ return -EINVAL;
+
+ r = nlmsg_data(nlh);
+ if (r->can_family != AF_CAN)
+ return -EPFNOSUPPORT;
+
+ /* so far we only support CAN -> CAN routings */
+ if (r->gwtype != CGW_TYPE_CAN_CAN)
+ return -EINVAL;
+
+ gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL);
+ if (!gwj)
+ return -ENOMEM;
+
+ gwj->handled_frames = 0;
+ gwj->dropped_frames = 0;
+ gwj->flags = r->flags;
+ gwj->gwtype = r->gwtype;
+
+ err = cgw_parse_attr(nlh, &gwj->mod, CGW_TYPE_CAN_CAN, &gwj->ccgw);
+ if (err < 0)
+ goto out;
+
+ err = -ENODEV;
+
+ /* ifindex == 0 is not allowed for job creation */
+ if (!gwj->ccgw.src_idx || !gwj->ccgw.dst_idx)
+ goto out;
+
+ gwj->src.dev = dev_get_by_index(&init_net, gwj->ccgw.src_idx);
+
+ if (!gwj->src.dev)
+ goto out;
+
+ /* check for CAN netdev not using header_ops - see gw_rcv() */
+ if (gwj->src.dev->type != ARPHRD_CAN || gwj->src.dev->header_ops)
+ goto put_src_out;
+
+ gwj->dst.dev = dev_get_by_index(&init_net, gwj->ccgw.dst_idx);
+
+ if (!gwj->dst.dev)
+ goto put_src_out;
+
+ /* check for CAN netdev not using header_ops - see gw_rcv() */
+ if (gwj->dst.dev->type != ARPHRD_CAN || gwj->dst.dev->header_ops)
+ goto put_src_dst_out;
+
+ ASSERT_RTNL();
+
+ err = cgw_register_filter(gwj);
+ if (!err)
+ hlist_add_head_rcu(&gwj->list, &cgw_list);
+
+put_src_dst_out:
+ dev_put(gwj->dst.dev);
+put_src_out:
+ dev_put(gwj->src.dev);
+out:
+ if (err)
+ kmem_cache_free(cgw_cache, gwj);
+
+ return err;
+}
+
+static void cgw_remove_all_jobs(void)
+{
+ struct cgw_job *gwj = NULL;
+ struct hlist_node *n, *nx;
+
+ ASSERT_RTNL();
+
+ hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) {
+ hlist_del(&gwj->list);
+ cgw_unregister_filter(gwj);
+ kfree(gwj);
+ }
+}
+
+static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+ struct cgw_job *gwj = NULL;
+ struct hlist_node *n, *nx;
+ struct rtcanmsg *r;
+ struct cf_mod mod;
+ struct can_can_gw ccgw;
+ int err = 0;
+
+ if (nlmsg_len(nlh) < sizeof(*r))
+ return -EINVAL;
+
+ r = nlmsg_data(nlh);
+ if (r->can_family != AF_CAN)
+ return -EPFNOSUPPORT;
+
+ /* so far we only support CAN -> CAN routings */
+ if (r->gwtype != CGW_TYPE_CAN_CAN)
+ return -EINVAL;
+
+ err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw);
+ if (err < 0)
+ return err;
+
+ /* two interface indices both set to 0 => remove all entries */
+ if (!ccgw.src_idx && !ccgw.dst_idx) {
+ cgw_remove_all_jobs();
+ return 0;
+ }
+
+ err = -EINVAL;
+
+ ASSERT_RTNL();
+
+ /* remove only the first matching entry */
+ hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) {
+
+ if (gwj->flags != r->flags)
+ continue;
+
+ if (memcmp(&gwj->mod, &mod, sizeof(mod)))
+ continue;
+
+ /* if (r->gwtype == CGW_TYPE_CAN_CAN) - is made sure here */
+ if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
+ continue;
+
+ hlist_del(&gwj->list);
+ cgw_unregister_filter(gwj);
+ kfree(gwj);
+ err = 0;
+ break;
+ }
+
+ return err;
+}
+
+static __init int cgw_module_init(void)
+{
+ printk(banner);
+
+ cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job),
+ 0, 0, NULL);
+
+ if (!cgw_cache)
+ return -ENOMEM;
+
+ /* set notifier */
+ notifier.notifier_call = cgw_notifier;
+ register_netdevice_notifier(&notifier);
+
+ if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, NULL)) {
+ unregister_netdevice_notifier(&notifier);
+ kmem_cache_destroy(cgw_cache);
+ return -ENOBUFS;
+ }
+
+ /* Only the first call to __rtnl_register can fail */
+ __rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, NULL);
+ __rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, NULL);
+
+ return 0;
+}
+
+static __exit void cgw_module_exit(void)
+{
+ rtnl_unregister_all(PF_CAN);
+
+ unregister_netdevice_notifier(&notifier);
+
+ rtnl_lock();
+ cgw_remove_all_jobs();
+ rtnl_unlock();
+
+ rcu_barrier(); /* Wait for completion of call_rcu()'s */
+
+ kmem_cache_destroy(cgw_cache);
+}
+
+module_init(cgw_module_init);
+module_exit(cgw_module_exit);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 132963a..2883ea0 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -232,6 +232,7 @@ void ceph_destroy_options(struct ceph_options *opt)
ceph_crypto_key_destroy(opt->key);
kfree(opt->key);
}
+ kfree(opt->mon_addr);
kfree(opt);
}
EXPORT_SYMBOL(ceph_destroy_options);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index c340e2e..9918e9e 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2307,6 +2307,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
m->front_max = front_len;
m->front_is_vmalloc = false;
m->more_to_follow = false;
+ m->ack_stamp = 0;
m->pool = NULL;
/* middle */
diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
index d5f2d97..1f4cb30 100644
--- a/net/ceph/msgpool.c
+++ b/net/ceph/msgpool.c
@@ -7,27 +7,37 @@
#include <linux/ceph/msgpool.h>
-static void *alloc_fn(gfp_t gfp_mask, void *arg)
+static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
{
struct ceph_msgpool *pool = arg;
- void *p;
+ struct ceph_msg *msg;
- p = ceph_msg_new(0, pool->front_len, gfp_mask);
- if (!p)
- pr_err("msgpool %s alloc failed\n", pool->name);
- return p;
+ msg = ceph_msg_new(0, pool->front_len, gfp_mask);
+ if (!msg) {
+ dout("msgpool_alloc %s failed\n", pool->name);
+ } else {
+ dout("msgpool_alloc %s %p\n", pool->name, msg);
+ msg->pool = pool;
+ }
+ return msg;
}
-static void free_fn(void *element, void *arg)
+static void msgpool_free(void *element, void *arg)
{
- ceph_msg_put(element);
+ struct ceph_msgpool *pool = arg;
+ struct ceph_msg *msg = element;
+
+ dout("msgpool_release %s %p\n", pool->name, msg);
+ msg->pool = NULL;
+ ceph_msg_put(msg);
}
int ceph_msgpool_init(struct ceph_msgpool *pool,
int front_len, int size, bool blocking, const char *name)
{
+ dout("msgpool %s init\n", name);
pool->front_len = front_len;
- pool->pool = mempool_create(size, alloc_fn, free_fn, pool);
+ pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
if (!pool->pool)
return -ENOMEM;
pool->name = name;
@@ -36,14 +46,17 @@ int ceph_msgpool_init(struct ceph_msgpool *pool,
void ceph_msgpool_destroy(struct ceph_msgpool *pool)
{
+ dout("msgpool %s destroy\n", pool->name);
mempool_destroy(pool->pool);
}
struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
int front_len)
{
+ struct ceph_msg *msg;
+
if (front_len > pool->front_len) {
- pr_err("msgpool_get pool %s need front %d, pool size is %d\n",
+ dout("msgpool_get %s need front %d, pool size is %d\n",
pool->name, front_len, pool->front_len);
WARN_ON(1);
@@ -51,14 +64,19 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
return ceph_msg_new(0, front_len, GFP_NOFS);
}
- return mempool_alloc(pool->pool, GFP_NOFS);
+ msg = mempool_alloc(pool->pool, GFP_NOFS);
+ dout("msgpool_get %s %p\n", pool->name, msg);
+ return msg;
}
void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg)
{
+ dout("msgpool_put %s %p\n", pool->name, msg);
+
/* reset msg front_len; user may have changed it */
msg->front.iov_len = pool->front_len;
msg->hdr.front_len = cpu_to_le32(pool->front_len);
kref_init(&msg->kref); /* retake single ref */
+ mempool_free(msg, pool->pool);
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ce310ee..88ad8a2 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -217,6 +217,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
INIT_LIST_HEAD(&req->r_unsafe_item);
INIT_LIST_HEAD(&req->r_linger_item);
INIT_LIST_HEAD(&req->r_linger_osd);
+ INIT_LIST_HEAD(&req->r_req_lru_item);
req->r_flags = flags;
WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
@@ -685,6 +686,18 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
put_osd(osd);
}
+static void remove_all_osds(struct ceph_osd_client *osdc)
+{
+ dout("__remove_old_osds %p\n", osdc);
+ mutex_lock(&osdc->request_mutex);
+ while (!RB_EMPTY_ROOT(&osdc->osds)) {
+ struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds),
+ struct ceph_osd, o_node);
+ __remove_osd(osdc, osd);
+ }
+ mutex_unlock(&osdc->request_mutex);
+}
+
static void __move_osd_to_lru(struct ceph_osd_client *osdc,
struct ceph_osd *osd)
{
@@ -701,14 +714,14 @@ static void __remove_osd_from_lru(struct ceph_osd *osd)
list_del_init(&osd->o_osd_lru);
}
-static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all)
+static void remove_old_osds(struct ceph_osd_client *osdc)
{
struct ceph_osd *osd, *nosd;
dout("__remove_old_osds %p\n", osdc);
mutex_lock(&osdc->request_mutex);
list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) {
- if (!remove_all && time_before(jiffies, osd->lru_ttl))
+ if (time_before(jiffies, osd->lru_ttl))
break;
__remove_osd(osdc, osd);
}
@@ -751,6 +764,7 @@ static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new)
struct rb_node *parent = NULL;
struct ceph_osd *osd = NULL;
+ dout("__insert_osd %p osd%d\n", new, new->o_osd);
while (*p) {
parent = *p;
osd = rb_entry(parent, struct ceph_osd, o_node);
@@ -803,13 +817,10 @@ static void __register_request(struct ceph_osd_client *osdc,
{
req->r_tid = ++osdc->last_tid;
req->r_request->hdr.tid = cpu_to_le64(req->r_tid);
- INIT_LIST_HEAD(&req->r_req_lru_item);
-
dout("__register_request %p tid %lld\n", req, req->r_tid);
__insert_request(osdc, req);
ceph_osdc_get_request(req);
osdc->num_requests++;
-
if (osdc->num_requests == 1) {
dout(" first request, scheduling timeout\n");
__schedule_osd_timeout(osdc);
@@ -1144,7 +1155,7 @@ static void handle_osds_timeout(struct work_struct *work)
dout("osds timeout\n");
down_read(&osdc->map_sem);
- remove_old_osds(osdc, 0);
+ remove_old_osds(osdc);
up_read(&osdc->map_sem);
schedule_delayed_work(&osdc->osds_timeout_work,
@@ -1862,8 +1873,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
ceph_osdmap_destroy(osdc->osdmap);
osdc->osdmap = NULL;
}
- remove_old_osds(osdc, 1);
- WARN_ON(!RB_EMPTY_ROOT(&osdc->osds));
+ remove_all_osds(osdc);
mempool_destroy(osdc->req_mempool);
ceph_msgpool_destroy(&osdc->msgpool_op);
ceph_msgpool_destroy(&osdc->msgpool_op_reply);
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index e97c358..fd863fe7 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -339,6 +339,7 @@ static int __insert_pg_mapping(struct ceph_pg_mapping *new,
struct ceph_pg_mapping *pg = NULL;
int c;
+ dout("__insert_pg_mapping %llx %p\n", *(u64 *)&new->pgid, new);
while (*p) {
parent = *p;
pg = rb_entry(parent, struct ceph_pg_mapping, node);
@@ -366,16 +367,33 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
while (n) {
pg = rb_entry(n, struct ceph_pg_mapping, node);
c = pgid_cmp(pgid, pg->pgid);
- if (c < 0)
+ if (c < 0) {
n = n->rb_left;
- else if (c > 0)
+ } else if (c > 0) {
n = n->rb_right;
- else
+ } else {
+ dout("__lookup_pg_mapping %llx got %p\n",
+ *(u64 *)&pgid, pg);
return pg;
+ }
}
return NULL;
}
+static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
+{
+ struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
+
+ if (pg) {
+ dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg);
+ rb_erase(&pg->node, root);
+ kfree(pg);
+ return 0;
+ }
+ dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid);
+ return -ENOENT;
+}
+
/*
* rbtree of pg pool info
*/
@@ -711,7 +729,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
void *start = *p;
int err = -EINVAL;
u16 version;
- struct rb_node *rbp;
ceph_decode_16_safe(p, end, version, bad);
if (version > CEPH_OSDMAP_INC_VERSION) {
@@ -861,7 +878,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
}
/* new_pg_temp */
- rbp = rb_first(&map->pg_temp);
ceph_decode_32_safe(p, end, len, bad);
while (len--) {
struct ceph_pg_mapping *pg;
@@ -872,18 +888,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
ceph_decode_copy(p, &pgid, sizeof(pgid));
pglen = ceph_decode_32(p);
- /* remove any? */
- while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping,
- node)->pgid, pgid) <= 0) {
- struct ceph_pg_mapping *cur =
- rb_entry(rbp, struct ceph_pg_mapping, node);
-
- rbp = rb_next(rbp);
- dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
- rb_erase(&cur->node, &map->pg_temp);
- kfree(cur);
- }
-
if (pglen) {
/* insert */
ceph_decode_need(p, end, pglen*sizeof(u32), bad);
@@ -903,17 +907,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
}
dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
pglen);
+ } else {
+ /* remove */
+ __remove_pg_mapping(&map->pg_temp, pgid);
}
}
- while (rbp) {
- struct ceph_pg_mapping *cur =
- rb_entry(rbp, struct ceph_pg_mapping, node);
-
- rbp = rb_next(rbp);
- dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
- rb_erase(&cur->node, &map->pg_temp);
- kfree(cur);
- }
/* ignore the rest */
*p = end;
@@ -1046,10 +1044,25 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
struct ceph_pg_mapping *pg;
struct ceph_pg_pool_info *pool;
int ruleno;
- unsigned poolid, ps, pps;
+ unsigned poolid, ps, pps, t;
int preferred;
+ poolid = le32_to_cpu(pgid.pool);
+ ps = le16_to_cpu(pgid.ps);
+ preferred = (s16)le16_to_cpu(pgid.preferred);
+
+ pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
+ if (!pool)
+ return NULL;
+
/* pg_temp? */
+ if (preferred >= 0)
+ t = ceph_stable_mod(ps, le32_to_cpu(pool->v.lpg_num),
+ pool->lpgp_num_mask);
+ else
+ t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
+ pool->pgp_num_mask);
+ pgid.ps = cpu_to_le16(t);
pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
if (pg) {
*num = pg->len;
@@ -1057,18 +1070,6 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
}
/* crush */
- poolid = le32_to_cpu(pgid.pool);
- ps = le16_to_cpu(pgid.ps);
- preferred = (s16)le16_to_cpu(pgid.preferred);
-
- /* don't forcefeed bad device ids to crush */
- if (preferred >= osdmap->max_osd ||
- preferred >= osdmap->crush->max_devices)
- preferred = -1;
-
- pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
- if (!pool)
- return NULL;
ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset,
pool->v.type, pool->v.size);
if (ruleno < 0) {
@@ -1078,6 +1079,11 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
return NULL;
}
+ /* don't forcefeed bad device ids to crush */
+ if (preferred >= osdmap->max_osd ||
+ preferred >= osdmap->crush->max_devices)
+ preferred = -1;
+
if (preferred >= 0)
pps = ceph_stable_mod(ps,
le32_to_cpu(pool->v.lpgp_num),
diff --git a/net/core/Makefile b/net/core/Makefile
index 8a04dd2..0d357b1 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -3,7 +3,7 @@
#
obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
- gen_stats.o gen_estimator.o net_namespace.o
+ gen_stats.o gen_estimator.o net_namespace.o secure_seq.o
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 18ac112..6449bed 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -332,7 +332,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
int err;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- struct page *page = frag->page;
+ struct page *page = skb_frag_page(frag);
if (copy > len)
copy = len;
@@ -418,7 +418,7 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
int err;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- struct page *page = frag->page;
+ struct page *page = skb_frag_page(frag);
if (copy > len)
copy = len;
@@ -508,7 +508,7 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
int err;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- struct page *page = frag->page;
+ struct page *page = skb_frag_page(frag);
if (copy > len)
copy = len;
@@ -594,7 +594,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
int err = 0;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- struct page *page = frag->page;
+ struct page *page = skb_frag_page(frag);
if (copy > len)
copy = len;
diff --git a/net/core/dev.c b/net/core/dev.c
index 17d67b5..70ecb86 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -133,6 +133,9 @@
#include <linux/pci.h>
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>
+#include <linux/if_tunnel.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
#include "net-sysfs.h"
@@ -1515,6 +1518,14 @@ static inline bool is_skb_forwardable(struct net_device *dev,
*/
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+ if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
+ atomic_long_inc(&dev->rx_dropped);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+ }
+
skb_orphan(skb);
nf_reset(skb);
@@ -1947,9 +1958,11 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
#ifdef CONFIG_HIGHMEM
int i;
if (!(dev->features & NETIF_F_HIGHDMA)) {
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- if (PageHighMem(skb_shinfo(skb)->frags[i].page))
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ if (PageHighMem(skb_frag_page(frag)))
return 1;
+ }
}
if (PCI_DMA_BUS_IS_PHYS) {
@@ -1958,7 +1971,8 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
if (!pdev)
return 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ dma_addr_t addr = page_to_phys(skb_frag_page(frag));
if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
return 1;
}
@@ -2519,25 +2533,31 @@ static inline void ____napi_schedule(struct softnet_data *sd,
/*
* __skb_get_rxhash: calculate a flow hash based on src/dst addresses
- * and src/dst port numbers. Returns a non-zero hash number on success
- * and 0 on failure.
+ * and src/dst port numbers. Sets rxhash in skb to non-zero hash value
+ * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb
+ * if hash is a canonical 4-tuple hash over transport ports.
*/
-__u32 __skb_get_rxhash(struct sk_buff *skb)
+void __skb_get_rxhash(struct sk_buff *skb)
{
int nhoff, hash = 0, poff;
const struct ipv6hdr *ip6;
const struct iphdr *ip;
+ const struct vlan_hdr *vlan;
u8 ip_proto;
- u32 addr1, addr2, ihl;
+ u32 addr1, addr2;
+ u16 proto;
union {
u32 v32;
u16 v16[2];
} ports;
nhoff = skb_network_offset(skb);
+ proto = skb->protocol;
- switch (skb->protocol) {
+again:
+ switch (proto) {
case __constant_htons(ETH_P_IP):
+ip:
if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
goto done;
@@ -2548,9 +2568,10 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
ip_proto = ip->protocol;
addr1 = (__force u32) ip->saddr;
addr2 = (__force u32) ip->daddr;
- ihl = ip->ihl;
+ nhoff += ip->ihl * 4;
break;
case __constant_htons(ETH_P_IPV6):
+ipv6:
if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
goto done;
@@ -2558,20 +2579,71 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
ip_proto = ip6->nexthdr;
addr1 = (__force u32) ip6->saddr.s6_addr32[3];
addr2 = (__force u32) ip6->daddr.s6_addr32[3];
- ihl = (40 >> 2);
+ nhoff += 40;
break;
+ case __constant_htons(ETH_P_8021Q):
+ if (!pskb_may_pull(skb, sizeof(*vlan) + nhoff))
+ goto done;
+ vlan = (const struct vlan_hdr *) (skb->data + nhoff);
+ proto = vlan->h_vlan_encapsulated_proto;
+ nhoff += sizeof(*vlan);
+ goto again;
+ case __constant_htons(ETH_P_PPP_SES):
+ if (!pskb_may_pull(skb, PPPOE_SES_HLEN + nhoff))
+ goto done;
+ proto = *((__be16 *) (skb->data + nhoff +
+ sizeof(struct pppoe_hdr)));
+ nhoff += PPPOE_SES_HLEN;
+ switch (proto) {
+ case __constant_htons(PPP_IP):
+ goto ip;
+ case __constant_htons(PPP_IPV6):
+ goto ipv6;
+ default:
+ goto done;
+ }
default:
goto done;
}
+ switch (ip_proto) {
+ case IPPROTO_GRE:
+ if (pskb_may_pull(skb, nhoff + 16)) {
+ u8 *h = skb->data + nhoff;
+ __be16 flags = *(__be16 *)h;
+
+ /*
+ * Only look inside GRE if version zero and no
+ * routing
+ */
+ if (!(flags & (GRE_VERSION|GRE_ROUTING))) {
+ proto = *(__be16 *)(h + 2);
+ nhoff += 4;
+ if (flags & GRE_CSUM)
+ nhoff += 4;
+ if (flags & GRE_KEY)
+ nhoff += 4;
+ if (flags & GRE_SEQ)
+ nhoff += 4;
+ goto again;
+ }
+ }
+ break;
+ case IPPROTO_IPIP:
+ goto again;
+ default:
+ break;
+ }
+
ports.v32 = 0;
poff = proto_ports_offset(ip_proto);
if (poff >= 0) {
- nhoff += ihl * 4 + poff;
+ nhoff += poff;
if (pskb_may_pull(skb, nhoff + 4)) {
ports.v32 = * (__force u32 *) (skb->data + nhoff);
if (ports.v16[1] < ports.v16[0])
swap(ports.v16[0], ports.v16[1]);
+ skb->l4_rxhash = 1;
}
}
@@ -2584,7 +2656,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
hash = 1;
done:
- return hash;
+ skb->rxhash = hash;
}
EXPORT_SYMBOL(__skb_get_rxhash);
@@ -2598,10 +2670,7 @@ static struct rps_dev_flow *
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow *rflow, u16 next_cpu)
{
- u16 tcpu;
-
- tcpu = rflow->cpu = next_cpu;
- if (tcpu != RPS_NO_CPU) {
+ if (next_cpu != RPS_NO_CPU) {
#ifdef CONFIG_RFS_ACCEL
struct netdev_rx_queue *rxqueue;
struct rps_dev_flow_table *flow_table;
@@ -2629,16 +2698,16 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
goto out;
old_rflow = rflow;
rflow = &flow_table->flows[flow_id];
- rflow->cpu = next_cpu;
rflow->filter = rc;
if (old_rflow->filter == rflow->filter)
old_rflow->filter = RPS_NO_FILTER;
out:
#endif
rflow->last_qtail =
- per_cpu(softnet_data, tcpu).input_queue_head;
+ per_cpu(softnet_data, next_cpu).input_queue_head;
}
+ rflow->cpu = next_cpu;
return rflow;
}
@@ -2673,13 +2742,13 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
map = rcu_dereference(rxqueue->rps_map);
if (map) {
if (map->len == 1 &&
- !rcu_dereference_raw(rxqueue->rps_flow_table)) {
+ !rcu_access_pointer(rxqueue->rps_flow_table)) {
tcpu = map->cpus[0];
if (cpu_online(tcpu))
cpu = tcpu;
goto done;
}
- } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) {
+ } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
goto done;
}
@@ -3094,8 +3163,8 @@ void netdev_rx_handler_unregister(struct net_device *dev)
{
ASSERT_RTNL();
- rcu_assign_pointer(dev->rx_handler, NULL);
- rcu_assign_pointer(dev->rx_handler_data, NULL);
+ RCU_INIT_POINTER(dev->rx_handler, NULL);
+ RCU_INIT_POINTER(dev->rx_handler_data, NULL);
}
EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
@@ -3187,10 +3256,9 @@ ncls:
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL;
}
- if (vlan_do_receive(&skb)) {
- ret = __netif_receive_skb(skb);
- goto out;
- } else if (unlikely(!skb))
+ if (vlan_do_receive(&skb))
+ goto another_round;
+ else if (unlikely(!skb))
goto out;
}
@@ -3424,7 +3492,7 @@ pull:
skb_shinfo(skb)->frags[0].size -= grow;
if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
- put_page(skb_shinfo(skb)->frags[0].page);
+ skb_frag_unref(skb, 0);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
@@ -3488,10 +3556,9 @@ void skb_gro_reset_offset(struct sk_buff *skb)
NAPI_GRO_CB(skb)->frag0_len = 0;
if (skb->mac_header == skb->tail &&
- !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
+ !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
NAPI_GRO_CB(skb)->frag0 =
- page_address(skb_shinfo(skb)->frags[0].page) +
- skb_shinfo(skb)->frags[0].page_offset;
+ skb_frag_address(&skb_shinfo(skb)->frags[0]);
NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
}
}
@@ -4489,9 +4556,7 @@ void __dev_set_rx_mode(struct net_device *dev)
if (!netif_device_present(dev))
return;
- if (ops->ndo_set_rx_mode)
- ops->ndo_set_rx_mode(dev);
- else {
+ if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
/* Unicast addresses changes may only happen under the rtnl,
* therefore calling __dev_set_promiscuity here is safe.
*/
@@ -4502,10 +4567,10 @@ void __dev_set_rx_mode(struct net_device *dev)
__dev_set_promiscuity(dev, -1);
dev->uc_promisc = false;
}
-
- if (ops->ndo_set_multicast_list)
- ops->ndo_set_multicast_list(dev);
}
+
+ if (ops->ndo_set_rx_mode)
+ ops->ndo_set_rx_mode(dev);
}
void dev_set_rx_mode(struct net_device *dev)
@@ -4516,30 +4581,6 @@ void dev_set_rx_mode(struct net_device *dev)
}
/**
- * dev_ethtool_get_settings - call device's ethtool_ops::get_settings()
- * @dev: device
- * @cmd: memory area for ethtool_ops::get_settings() result
- *
- * The cmd arg is initialized properly (cleared and
- * ethtool_cmd::cmd field set to ETHTOOL_GSET).
- *
- * Return device's ethtool_ops::get_settings() result value or
- * -EOPNOTSUPP when device doesn't expose
- * ethtool_ops::get_settings() operation.
- */
-int dev_ethtool_get_settings(struct net_device *dev,
- struct ethtool_cmd *cmd)
-{
- if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
- return -EOPNOTSUPP;
-
- memset(cmd, 0, sizeof(struct ethtool_cmd));
- cmd->cmd = ETHTOOL_GSET;
- return dev->ethtool_ops->get_settings(dev, cmd);
-}
-EXPORT_SYMBOL(dev_ethtool_get_settings);
-
-/**
* dev_get_flags - get flags reported to userspace
* @dev: device
*
@@ -4855,7 +4896,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
return -EOPNOTSUPP;
case SIOCADDMULTI:
- if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
+ if (!ops->ndo_set_rx_mode ||
ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
return -EINVAL;
if (!netif_device_present(dev))
@@ -4863,7 +4904,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
case SIOCDELMULTI:
- if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
+ if (!ops->ndo_set_rx_mode ||
ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
return -EINVAL;
if (!netif_device_present(dev))
@@ -5727,8 +5768,8 @@ void netdev_run_todo(void)
/* paranoia */
BUG_ON(netdev_refcnt_read(dev));
- WARN_ON(rcu_dereference_raw(dev->ip_ptr));
- WARN_ON(rcu_dereference_raw(dev->ip6_ptr));
+ WARN_ON(rcu_access_pointer(dev->ip_ptr));
+ WARN_ON(rcu_access_pointer(dev->ip6_ptr));
WARN_ON(dev->dn_ptr);
if (dev->destructor)
@@ -5932,7 +5973,7 @@ void free_netdev(struct net_device *dev)
kfree(dev->_rx);
#endif
- kfree(rcu_dereference_raw(dev->ingress_queue));
+ kfree(rcu_dereference_protected(dev->ingress_queue, 1));
/* Flush device addresses */
dev_addr_flush(dev);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index e2e6693..283d1b8 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -591,8 +591,8 @@ EXPORT_SYMBOL(dev_mc_del_global);
* addresses that have no users left. The source device must be
* locked by netif_tx_lock_bh.
*
- * This function is intended to be called from the dev->set_multicast_list
- * or dev->set_rx_mode function of layered software devices.
+ * This function is intended to be called from the ndo_set_rx_mode
+ * function of layered software devices.
*/
int dev_mc_sync(struct net_device *to, struct net_device *from)
{
diff --git a/net/core/dst.c b/net/core/dst.c
index 14b33baf..d5e2c4c 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -171,7 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
dst_init_metrics(dst, dst_default_metrics, true);
dst->expires = 0UL;
dst->path = dst;
- dst->_neighbour = NULL;
+ RCU_INIT_POINTER(dst->_neighbour, NULL);
#ifdef CONFIG_XFRM
dst->xfrm = NULL;
#endif
@@ -229,11 +229,11 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
smp_rmb();
again:
- neigh = dst->_neighbour;
+ neigh = rcu_dereference_protected(dst->_neighbour, 1);
child = dst->child;
if (neigh) {
- dst->_neighbour = NULL;
+ RCU_INIT_POINTER(dst->_neighbour, NULL);
neigh_release(neigh);
}
@@ -360,14 +360,19 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
if (!unregister) {
dst->input = dst->output = dst_discard;
} else {
+ struct neighbour *neigh;
+
dst->dev = dev_net(dst->dev)->loopback_dev;
dev_hold(dst->dev);
dev_put(dev);
- if (dst->_neighbour && dst->_neighbour->dev == dev) {
- dst->_neighbour->dev = dst->dev;
+ rcu_read_lock();
+ neigh = dst_get_neighbour(dst);
+ if (neigh && neigh->dev == dev) {
+ neigh->dev = dst->dev;
dev_hold(dst->dev);
dev_put(dev);
}
+ rcu_read_unlock();
}
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6cdba5f..f444817 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -569,15 +569,25 @@ int __ethtool_set_flags(struct net_device *dev, u32 data)
return 0;
}
-static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
+int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
{
- struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
- int err;
+ ASSERT_RTNL();
- if (!dev->ethtool_ops->get_settings)
+ if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
return -EOPNOTSUPP;
- err = dev->ethtool_ops->get_settings(dev, &cmd);
+ memset(cmd, 0, sizeof(struct ethtool_cmd));
+ cmd->cmd = ETHTOOL_GSET;
+ return dev->ethtool_ops->get_settings(dev, cmd);
+}
+EXPORT_SYMBOL(__ethtool_get_settings);
+
+static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
+{
+ int err;
+ struct ethtool_cmd cmd;
+
+ err = __ethtool_get_settings(dev, &cmd);
if (err < 0)
return err;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index e7ab0c0..38be474 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -384,8 +384,8 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
*/
list_for_each_entry(r, &ops->rules_list, list) {
if (r->action == FR_ACT_GOTO &&
- r->target == rule->pref) {
- BUG_ON(rtnl_dereference(r->ctarget) != NULL);
+ r->target == rule->pref &&
+ rtnl_dereference(r->ctarget) == NULL) {
rcu_assign_pointer(r->ctarget, rule);
if (--ops->unresolved_rules == 0)
break;
@@ -487,7 +487,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (ops->nr_goto_rules > 0) {
list_for_each_entry(tmp, &ops->rules_list, list) {
if (rtnl_dereference(tmp->ctarget) == rule) {
- rcu_assign_pointer(tmp->ctarget, NULL);
+ RCU_INIT_POINTER(tmp->ctarget, NULL);
ops->unresolved_rules++;
}
}
@@ -545,7 +545,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh->flags = rule->flags;
if (rule->action == FR_ACT_GOTO &&
- rcu_dereference_raw(rule->ctarget) == NULL)
+ rcu_access_pointer(rule->ctarget) == NULL)
frh->flags |= FIB_RULE_UNRESOLVED;
if (rule->iifname[0]) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 36f975f..8fcc2d7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -645,7 +645,7 @@ int sk_detach_filter(struct sock *sk)
filter = rcu_dereference_protected(sk->sk_filter,
sock_owned_by_user(sk));
if (filter) {
- rcu_assign_pointer(sk->sk_filter, NULL);
+ RCU_INIT_POINTER(sk->sk_filter, NULL);
sk_filter_uncharge(sk, filter);
ret = 0;
}
diff --git a/net/core/flow.c b/net/core/flow.c
index bf32c33..555a456 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -30,6 +30,7 @@ struct flow_cache_entry {
struct hlist_node hlist;
struct list_head gc_list;
} u;
+ struct net *net;
u16 family;
u8 dir;
u32 genid;
@@ -172,29 +173,26 @@ static void flow_new_hash_rnd(struct flow_cache *fc,
static u32 flow_hash_code(struct flow_cache *fc,
struct flow_cache_percpu *fcp,
- const struct flowi *key)
+ const struct flowi *key,
+ size_t keysize)
{
const u32 *k = (const u32 *) key;
+ const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
- return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
+ return jhash2(k, length, fcp->hash_rnd)
& (flow_cache_hash_size(fc) - 1);
}
-typedef unsigned long flow_compare_t;
-
/* I hear what you're saying, use memcmp. But memcmp cannot make
- * important assumptions that we can here, such as alignment and
- * constant size.
+ * important assumptions that we can here, such as alignment.
*/
-static int flow_key_compare(const struct flowi *key1, const struct flowi *key2)
+static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
+ size_t keysize)
{
const flow_compare_t *k1, *k1_lim, *k2;
- const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
-
- BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t));
k1 = (const flow_compare_t *) key1;
- k1_lim = k1 + n_elem;
+ k1_lim = k1 + keysize;
k2 = (const flow_compare_t *) key2;
@@ -215,6 +213,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
struct flow_cache_entry *fle, *tfle;
struct hlist_node *entry;
struct flow_cache_object *flo;
+ size_t keysize;
unsigned int hash;
local_bh_disable();
@@ -222,6 +221,11 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
fle = NULL;
flo = NULL;
+
+ keysize = flow_key_size(family);
+ if (!keysize)
+ goto nocache;
+
/* Packet really early in init? Making flow_cache_init a
* pre-smp initcall would solve this. --RR */
if (!fcp->hash_table)
@@ -230,11 +234,12 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
if (fcp->hash_rnd_recalc)
flow_new_hash_rnd(fc, fcp);
- hash = flow_hash_code(fc, fcp, key);
+ hash = flow_hash_code(fc, fcp, key, keysize);
hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
- if (tfle->family == family &&
+ if (tfle->net == net &&
+ tfle->family == family &&
tfle->dir == dir &&
- flow_key_compare(key, &tfle->key) == 0) {
+ flow_key_compare(key, &tfle->key, keysize) == 0) {
fle = tfle;
break;
}
@@ -246,9 +251,10 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
if (fle) {
+ fle->net = net;
fle->family = family;
fle->dir = dir;
- memcpy(&fle->key, key, sizeof(*key));
+ memcpy(&fle->key, key, keysize * sizeof(flow_compare_t));
fle->object = NULL;
hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
fcp->hash_count++;
diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h
index 283c2b99..81e1ed7 100644
--- a/net/core/kmap_skb.h
+++ b/net/core/kmap_skb.h
@@ -7,7 +7,7 @@ static inline void *kmap_skb_frag(const skb_frag_t *frag)
local_bh_disable();
#endif
- return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
+ return kmap_atomic(skb_frag_page(frag), KM_SKB_DATA_SOFTIRQ);
}
static inline void kunmap_skb_frag(void *vaddr)
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 357bd4e..c3519c6 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -78,8 +78,13 @@ static void rfc2863_policy(struct net_device *dev)
static bool linkwatch_urgent_event(struct net_device *dev)
{
- return netif_running(dev) && netif_carrier_ok(dev) &&
- qdisc_tx_changing(dev);
+ if (!netif_running(dev))
+ return false;
+
+ if (dev->ifindex != dev->iflink)
+ return true;
+
+ return netif_carrier_ok(dev) && qdisc_tx_changing(dev);
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8fab9b0..4344964 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -844,6 +844,19 @@ static void neigh_invalidate(struct neighbour *neigh)
skb_queue_purge(&neigh->arp_queue);
}
+static void neigh_probe(struct neighbour *neigh)
+ __releases(neigh->lock)
+{
+ struct sk_buff *skb = skb_peek(&neigh->arp_queue);
+ /* keep skb alive even if arp_queue overflows */
+ if (skb)
+ skb = skb_copy(skb, GFP_ATOMIC);
+ write_unlock(&neigh->lock);
+ neigh->ops->solicit(neigh, skb);
+ atomic_inc(&neigh->probes);
+ kfree_skb(skb);
+}
+
/* Called when a timer expires for a neighbour entry. */
static void neigh_timer_handler(unsigned long arg)
@@ -920,14 +933,7 @@ static void neigh_timer_handler(unsigned long arg)
neigh_hold(neigh);
}
if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
- struct sk_buff *skb = skb_peek(&neigh->arp_queue);
- /* keep skb alive even if arp_queue overflows */
- if (skb)
- skb = skb_copy(skb, GFP_ATOMIC);
- write_unlock(&neigh->lock);
- neigh->ops->solicit(neigh, skb);
- atomic_inc(&neigh->probes);
- kfree_skb(skb);
+ neigh_probe(neigh);
} else {
out:
write_unlock(&neigh->lock);
@@ -942,7 +948,7 @@ out:
int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
int rc;
- unsigned long now;
+ bool immediate_probe = false;
write_lock_bh(&neigh->lock);
@@ -950,14 +956,16 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
goto out_unlock_bh;
- now = jiffies;
-
if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
+ unsigned long next, now = jiffies;
+
atomic_set(&neigh->probes, neigh->parms->ucast_probes);
neigh->nud_state = NUD_INCOMPLETE;
- neigh->updated = jiffies;
- neigh_add_timer(neigh, now + 1);
+ neigh->updated = now;
+ next = now + max(neigh->parms->retrans_time, HZ/2);
+ neigh_add_timer(neigh, next);
+ immediate_probe = true;
} else {
neigh->nud_state = NUD_FAILED;
neigh->updated = jiffies;
@@ -989,7 +997,11 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
rc = 1;
}
out_unlock_bh:
- write_unlock_bh(&neigh->lock);
+ if (immediate_probe)
+ neigh_probe(neigh);
+ else
+ write_unlock(&neigh->lock);
+ local_bh_enable();
return rc;
}
EXPORT_SYMBOL(__neigh_event_send);
@@ -1319,11 +1331,15 @@ static void neigh_proxy_process(unsigned long arg)
if (tdif <= 0) {
struct net_device *dev = skb->dev;
+
__skb_unlink(skb, &tbl->proxy_queue);
- if (tbl->proxy_redo && netif_running(dev))
+ if (tbl->proxy_redo && netif_running(dev)) {
+ rcu_read_lock();
tbl->proxy_redo(skb);
- else
+ rcu_read_unlock();
+ } else {
kfree_skb(skb);
+ }
dev_put(dev);
} else if (!sched_next || tdif < sched_next)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1683e5d..7604a63 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -147,7 +147,7 @@ static ssize_t show_speed(struct device *dev,
if (netif_running(netdev)) {
struct ethtool_cmd cmd;
- if (!dev_ethtool_get_settings(netdev, &cmd))
+ if (!__ethtool_get_settings(netdev, &cmd))
ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd));
}
rtnl_unlock();
@@ -165,7 +165,7 @@ static ssize_t show_duplex(struct device *dev,
if (netif_running(netdev)) {
struct ethtool_cmd cmd;
- if (!dev_ethtool_get_settings(netdev, &cmd))
+ if (!__ethtool_get_settings(netdev, &cmd))
ret = sprintf(buf, "%s\n",
cmd.duplex ? "full" : "half");
}
@@ -712,13 +712,13 @@ static void rx_queue_release(struct kobject *kobj)
struct rps_dev_flow_table *flow_table;
- map = rcu_dereference_raw(queue->rps_map);
+ map = rcu_dereference_protected(queue->rps_map, 1);
if (map) {
RCU_INIT_POINTER(queue->rps_map, NULL);
kfree_rcu(map, rcu);
}
- flow_table = rcu_dereference_raw(queue->rps_flow_table);
+ flow_table = rcu_dereference_protected(queue->rps_flow_table, 1);
if (flow_table) {
RCU_INIT_POINTER(queue->rps_flow_table, NULL);
call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
@@ -987,10 +987,10 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
}
if (nonempty)
- rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+ RCU_INIT_POINTER(dev->xps_maps, new_dev_maps);
else {
kfree(new_dev_maps);
- rcu_assign_pointer(dev->xps_maps, NULL);
+ RCU_INIT_POINTER(dev->xps_maps, NULL);
}
if (dev_maps)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index adf84dd..f57d946 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -558,13 +558,14 @@ int __netpoll_rx(struct sk_buff *skb)
if (skb_shared(skb))
goto out;
- iph = (struct iphdr *)skb->data;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto out;
+ iph = (struct iphdr *)skb->data;
if (iph->ihl < 5 || iph->version != 4)
goto out;
if (!pskb_may_pull(skb, iph->ihl*4))
goto out;
+ iph = (struct iphdr *)skb->data;
if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
goto out;
@@ -579,6 +580,7 @@ int __netpoll_rx(struct sk_buff *skb)
if (pskb_trim_rcsum(skb, len))
goto out;
+ iph = (struct iphdr *)skb->data;
if (iph->protocol != IPPROTO_UDP)
goto out;
@@ -760,7 +762,7 @@ int __netpoll_setup(struct netpoll *np)
}
/* last thing to do is link it to the net device structure */
- rcu_assign_pointer(ndev->npinfo, npinfo);
+ RCU_INIT_POINTER(ndev->npinfo, npinfo);
return 0;
@@ -901,7 +903,7 @@ void __netpoll_cleanup(struct netpoll *np)
if (ops->ndo_netpoll_cleanup)
ops->ndo_netpoll_cleanup(np->dev);
- rcu_assign_pointer(np->dev->npinfo, NULL);
+ RCU_INIT_POINTER(np->dev->npinfo, NULL);
/* avoid racing with NAPI reading npinfo */
synchronize_rcu_bh();
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index e35a6fb..796044a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2602,8 +2602,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
if (!pkt_dev->page)
break;
}
- skb_shinfo(skb)->frags[i].page = pkt_dev->page;
- get_page(pkt_dev->page);
+ skb_frag_set_page(skb, i, pkt_dev->page);
skb_shinfo(skb)->frags[i].page_offset = 0;
/*last fragment, fill rest of data*/
if (i == (frags - 1))
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 99d9e95..39f8dd6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1604,7 +1604,6 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
dev_net_set(dev, net);
dev->rtnl_link_ops = ops;
dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
- dev->real_num_tx_queues = real_num_queues;
if (tb[IFLA_MTU])
dev->mtu = nla_get_u32(tb[IFLA_MTU]);
diff --git a/net/core/scm.c b/net/core/scm.c
index 4c1ef02..ff52ad0 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -173,7 +173,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
if (err)
goto error;
- if (pid_vnr(p->pid) != p->creds.pid) {
+ if (!p->pid || pid_vnr(p->pid) != p->creds.pid) {
struct pid *pid;
err = -ESRCH;
pid = find_get_pid(p->creds.pid);
@@ -183,8 +183,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
p->pid = pid;
}
- if ((p->cred->euid != p->creds.uid) ||
- (p->cred->egid != p->creds.gid)) {
+ if (!p->cred ||
+ (p->cred->euid != p->creds.uid) ||
+ (p->cred->egid != p->creds.gid)) {
struct cred *cred;
err = -ENOMEM;
cred = prepare_creds();
@@ -192,8 +193,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
goto error;
cred->uid = cred->euid = p->creds.uid;
- cred->gid = cred->egid = p->creds.uid;
- put_cred(p->cred);
+ cred->gid = cred->egid = p->creds.gid;
+ if (p->cred)
+ put_cred(p->cred);
p->cred = cred;
}
break;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
new file mode 100644
index 0000000..45329d7
--- /dev/null
+++ b/net/core/secure_seq.c
@@ -0,0 +1,184 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/cryptohash.h>
+#include <linux/module.h>
+#include <linux/cache.h>
+#include <linux/random.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/string.h>
+
+#include <net/secure_seq.h>
+
+static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
+
+static int __init net_secret_init(void)
+{
+ get_random_bytes(net_secret, sizeof(net_secret));
+ return 0;
+}
+late_initcall(net_secret_init);
+
+static u32 seq_scale(u32 seq)
+{
+ /*
+ * As close as possible to RFC 793, which
+ * suggests using a 250 kHz clock.
+ * Further reading shows this assumes 2 Mb/s networks.
+ * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate.
+ * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but
+ * we also need to limit the resolution so that the u32 seq
+ * overlaps less than one time per MSL (2 minutes).
+ * Choosing a clock of 64 ns period is OK. (period of 274 s)
+ */
+ return seq + (ktime_to_ns(ktime_get_real()) >> 6);
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
+ __be16 sport, __be16 dport)
+{
+ u32 secret[MD5_MESSAGE_BYTES / 4];
+ u32 hash[MD5_DIGEST_WORDS];
+ u32 i;
+
+ memcpy(hash, saddr, 16);
+ for (i = 0; i < 4; i++)
+ secret[i] = net_secret[i] + daddr[i];
+ secret[4] = net_secret[4] +
+ (((__force u16)sport << 16) + (__force u16)dport);
+ for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
+ secret[i] = net_secret[i];
+
+ md5_transform(hash, secret);
+
+ return seq_scale(hash[0]);
+}
+EXPORT_SYMBOL(secure_tcpv6_sequence_number);
+
+u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+ __be16 dport)
+{
+ u32 secret[MD5_MESSAGE_BYTES / 4];
+ u32 hash[MD5_DIGEST_WORDS];
+ u32 i;
+
+ memcpy(hash, saddr, 16);
+ for (i = 0; i < 4; i++)
+ secret[i] = net_secret[i] + (__force u32) daddr[i];
+ secret[4] = net_secret[4] + (__force u32)dport;
+ for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
+ secret[i] = net_secret[i];
+
+ md5_transform(hash, secret);
+
+ return hash[0];
+}
+#endif
+
+#ifdef CONFIG_INET
+__u32 secure_ip_id(__be32 daddr)
+{
+ u32 hash[MD5_DIGEST_WORDS];
+
+ hash[0] = (__force __u32) daddr;
+ hash[1] = net_secret[13];
+ hash[2] = net_secret[14];
+ hash[3] = net_secret[15];
+
+ md5_transform(hash, net_secret);
+
+ return hash[0];
+}
+
+__u32 secure_ipv6_id(const __be32 daddr[4])
+{
+ __u32 hash[4];
+
+ memcpy(hash, daddr, 16);
+ md5_transform(hash, net_secret);
+
+ return hash[0];
+}
+
+__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport)
+{
+ u32 hash[MD5_DIGEST_WORDS];
+
+ hash[0] = (__force u32)saddr;
+ hash[1] = (__force u32)daddr;
+ hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
+ hash[3] = net_secret[15];
+
+ md5_transform(hash, net_secret);
+
+ return seq_scale(hash[0]);
+}
+
+u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
+{
+ u32 hash[MD5_DIGEST_WORDS];
+
+ hash[0] = (__force u32)saddr;
+ hash[1] = (__force u32)daddr;
+ hash[2] = (__force u32)dport ^ net_secret[14];
+ hash[3] = net_secret[15];
+
+ md5_transform(hash, net_secret);
+
+ return hash[0];
+}
+EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
+#endif
+
+#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
+u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport)
+{
+ u32 hash[MD5_DIGEST_WORDS];
+ u64 seq;
+
+ hash[0] = (__force u32)saddr;
+ hash[1] = (__force u32)daddr;
+ hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
+ hash[3] = net_secret[15];
+
+ md5_transform(hash, net_secret);
+
+ seq = hash[0] | (((u64)hash[1]) << 32);
+ seq += ktime_to_ns(ktime_get_real());
+ seq &= (1ull << 48) - 1;
+
+ return seq;
+}
+EXPORT_SYMBOL(secure_dccp_sequence_number);
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
+ __be16 sport, __be16 dport)
+{
+ u32 secret[MD5_MESSAGE_BYTES / 4];
+ u32 hash[MD5_DIGEST_WORDS];
+ u64 seq;
+ u32 i;
+
+ memcpy(hash, saddr, 16);
+ for (i = 0; i < 4; i++)
+ secret[i] = net_secret[i] + daddr[i];
+ secret[4] = net_secret[4] +
+ (((__force u16)sport << 16) + (__force u16)dport);
+ for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
+ secret[i] = net_secret[i];
+
+ md5_transform(hash, secret);
+
+ seq = hash[0] | (((u64)hash[1]) << 32);
+ seq += ktime_to_ns(ktime_get_real());
+ seq &= (1ull << 48) - 1;
+
+ return seq;
+}
+EXPORT_SYMBOL(secure_dccpv6_sequence_number);
+#endif
+#endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2beda82..5b2c5f1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -326,7 +326,7 @@ static void skb_release_data(struct sk_buff *skb)
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ skb_frag_unref(skb, i);
}
/*
@@ -529,6 +529,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->mac_header = old->mac_header;
skb_dst_copy(new, old);
new->rxhash = old->rxhash;
+ new->ooo_okay = old->ooo_okay;
+ new->l4_rxhash = old->l4_rxhash;
#ifdef CONFIG_XFRM
new->sp = secpath_get(old->sp);
#endif
@@ -611,8 +613,21 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
}
EXPORT_SYMBOL_GPL(skb_morph);
-/* skb frags copy userspace buffers to kernel */
-static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
+/* skb_copy_ubufs - copy userspace skb frags buffers to kernel
+ * @skb: the skb to modify
+ * @gfp_mask: allocation priority
+ *
+ * This must be called on SKBTX_DEV_ZEROCOPY skb.
+ * It will copy all frags into kernel and drop the reference
+ * to userspace pages.
+ *
+ * If this function is called from an interrupt gfp_mask() must be
+ * %GFP_ATOMIC.
+ *
+ * Returns 0 on success or a negative error code on failure
+ * to allocate kernel memory to copy to.
+ */
+int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
{
int i;
int num_frags = skb_shinfo(skb)->nr_frags;
@@ -652,6 +667,8 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
skb_shinfo(skb)->frags[i - 1].page = head;
head = (struct page *)head->private;
}
+
+ skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
return 0;
}
@@ -677,7 +694,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
if (skb_copy_ubufs(skb, gfp_mask))
return NULL;
- skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
}
n = skb + 1;
@@ -803,11 +819,10 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
n = NULL;
goto out;
}
- skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
- get_page(skb_shinfo(n)->frags[i].page);
+ skb_frag_ref(skb, i);
}
skb_shinfo(n)->nr_frags = i;
}
@@ -896,10 +911,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
if (skb_copy_ubufs(skb, gfp_mask))
goto nofrags;
- skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ skb_frag_ref(skb, i);
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
@@ -1179,7 +1193,7 @@ drop_pages:
skb_shinfo(skb)->nr_frags = i;
for (; i < nfrags; i++)
- put_page(skb_shinfo(skb)->frags[i].page);
+ skb_frag_unref(skb, i);
if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
@@ -1348,7 +1362,7 @@ pull_pages:
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ skb_frag_unref(skb, i);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1369,8 +1383,21 @@ pull_pages:
}
EXPORT_SYMBOL(__pskb_pull_tail);
-/* Copy some data bits from skb to kernel buffer. */
-
+/**
+ * skb_copy_bits - copy bits from skb to kernel buffer
+ * @skb: source skb
+ * @offset: offset in source
+ * @to: destination buffer
+ * @len: number of bytes to copy
+ *
+ * Copy the specified number of bytes from the source skb to the
+ * destination buffer.
+ *
+ * CAUTION ! :
+ * If its prototype is ever changed,
+ * check arch/{*}/net/{*}.S files,
+ * since it is called from BPF assembly code.
+ */
int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
{
int start = skb_headlen(skb);
@@ -1594,7 +1621,8 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
- if (__splice_segment(f->page, f->page_offset, f->size,
+ if (__splice_segment(skb_frag_page(f),
+ f->page_offset, f->size,
offset, len, skb, spd, 0, sk, pipe))
return 1;
}
@@ -2139,7 +2167,7 @@ static inline void skb_split_no_header(struct sk_buff *skb,
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
- get_page(skb_shinfo(skb)->frags[i].page);
+ skb_frag_ref(skb, i);
skb_shinfo(skb1)->frags[0].page_offset += len - pos;
skb_shinfo(skb1)->frags[0].size -= len - pos;
skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2214,7 +2242,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
* commit all, so that we don't have to undo partial changes
*/
if (!to ||
- !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) {
+ !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
+ fragfrom->page_offset)) {
merge = -1;
} else {
merge = to - 1;
@@ -2261,7 +2290,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
to++;
} else {
- get_page(fragfrom->page);
+ __skb_frag_ref(fragfrom);
fragto->page = fragfrom->page;
fragto->page_offset = fragfrom->page_offset;
fragto->size = todo;
@@ -2283,7 +2312,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
fragto = &skb_shinfo(tgt)->frags[merge];
fragto->size += fragfrom->size;
- put_page(fragfrom->page);
+ __skb_frag_unref(fragfrom);
}
/* Reposition in the original skb */
@@ -2528,8 +2557,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
left = PAGE_SIZE - frag->page_offset;
copy = (length > left)? left : length;
- ret = getfrag(from, (page_address(frag->page) +
- frag->page_offset + frag->size),
+ ret = getfrag(from, skb_frag_address(frag) + frag->size,
offset, copy, 0, skb);
if (ret < 0)
return -EFAULT;
@@ -2681,7 +2709,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
while (pos < offset + len && i < nfrags) {
*frag = skb_shinfo(skb)->frags[i];
- get_page(frag->page);
+ __skb_frag_ref(frag);
size = frag->size;
if (pos < offset) {
@@ -2904,7 +2932,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
if (copy > len)
copy = len;
- sg_set_page(&sg[elt], frag->page, copy,
+ sg_set_page(&sg[elt], skb_frag_page(frag), copy,
frag->page_offset+offset-start);
elt++;
if (!(len -= copy))
diff --git a/net/core/sock.c b/net/core/sock.c
index bc745d0..83c462d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -387,7 +387,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
sk_tx_queue_clear(sk);
- rcu_assign_pointer(sk->sk_dst_cache, NULL);
+ RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
dst_release(dst);
return NULL;
}
@@ -738,10 +738,7 @@ set_rcvbuf:
/* We implement the SO_SNDLOWAT etc to
not be settable (1003.1g 5.3) */
case SO_RXQ_OVFL:
- if (valbool)
- sock_set_flag(sk, SOCK_RXQ_OVFL);
- else
- sock_reset_flag(sk, SOCK_RXQ_OVFL);
+ sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
break;
default:
ret = -ENOPROTOOPT;
@@ -1158,7 +1155,7 @@ static void __sk_free(struct sock *sk)
atomic_read(&sk->sk_wmem_alloc) == 0);
if (filter) {
sk_filter_uncharge(sk, filter);
- rcu_assign_pointer(sk->sk_filter, NULL);
+ RCU_INIT_POINTER(sk->sk_filter, NULL);
}
sock_disable_timestamp(sk, SOCK_TIMESTAMP);
@@ -1533,7 +1530,6 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
skb_shinfo(skb)->nr_frags = npages;
for (i = 0; i < npages; i++) {
struct page *page;
- skb_frag_t *frag;
page = alloc_pages(sk->sk_allocation, 0);
if (!page) {
@@ -1543,12 +1539,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
goto failure;
}
- frag = &skb_shinfo(skb)->frags[i];
- frag->page = page;
- frag->page_offset = 0;
- frag->size = (data_len >= PAGE_SIZE ?
- PAGE_SIZE :
- data_len);
+ __skb_fill_page_desc(skb, i,
+ page, 0,
+ (data_len >= PAGE_SIZE ?
+ PAGE_SIZE :
+ data_len));
data_len -= PAGE_SIZE;
}
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
index 25d717e..34e9664 100644
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -78,7 +78,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
copy = end - offset;
if (copy > 0) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- struct page *page = frag->page;
+ struct page *page = skb_frag_page(frag);
if (copy > len)
copy = len;
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 3cb56af..9bfbc1d 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1255,7 +1255,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
spin_lock(&dcb_lock);
list_for_each_entry(itr, &dcb_app_list, list) {
- if (strncmp(itr->name, netdev->name, IFNAMSIZ) == 0) {
+ if (itr->ifindex == netdev->ifindex) {
err = nla_put(skb, DCB_ATTR_IEEE_APP, sizeof(itr->app),
&itr->app);
if (err) {
@@ -1412,7 +1412,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev)
goto dcb_unlock;
list_for_each_entry(itr, &dcb_app_list, list) {
- if (strncmp(itr->name, netdev->name, IFNAMSIZ) == 0) {
+ if (itr->ifindex == netdev->ifindex) {
struct nlattr *app_nest = nla_nest_start(skb,
DCB_ATTR_APP);
if (!app_nest)
@@ -2050,7 +2050,7 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app)
list_for_each_entry(itr, &dcb_app_list, list) {
if (itr->app.selector == app->selector &&
itr->app.protocol == app->protocol &&
- (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) {
+ itr->ifindex == dev->ifindex) {
prio = itr->app.priority;
break;
}
@@ -2073,15 +2073,17 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new)
struct dcb_app_type *itr;
struct dcb_app_type event;
- memcpy(&event.name, dev->name, sizeof(event.name));
+ event.ifindex = dev->ifindex;
memcpy(&event.app, new, sizeof(event.app));
+ if (dev->dcbnl_ops->getdcbx)
+ event.dcbx = dev->dcbnl_ops->getdcbx(dev);
spin_lock(&dcb_lock);
/* Search for existing match and replace */
list_for_each_entry(itr, &dcb_app_list, list) {
if (itr->app.selector == new->selector &&
itr->app.protocol == new->protocol &&
- (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) {
+ itr->ifindex == dev->ifindex) {
if (new->priority)
itr->app.priority = new->priority;
else {
@@ -2101,7 +2103,7 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new)
}
memcpy(&entry->app, new, sizeof(*new));
- strncpy(entry->name, dev->name, IFNAMSIZ);
+ entry->ifindex = dev->ifindex;
list_add(&entry->list, &dcb_app_list);
}
out:
@@ -2127,7 +2129,7 @@ u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app)
list_for_each_entry(itr, &dcb_app_list, list) {
if (itr->app.selector == app->selector &&
itr->app.protocol == app->protocol &&
- (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) {
+ itr->ifindex == dev->ifindex) {
prio |= 1 << itr->app.priority;
}
}
@@ -2150,8 +2152,10 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new)
struct dcb_app_type event;
int err = 0;
- memcpy(&event.name, dev->name, sizeof(event.name));
+ event.ifindex = dev->ifindex;
memcpy(&event.app, new, sizeof(event.app));
+ if (dev->dcbnl_ops->getdcbx)
+ event.dcbx = dev->dcbnl_ops->getdcbx(dev);
spin_lock(&dcb_lock);
/* Search for existing match and abort if found */
@@ -2159,7 +2163,7 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new)
if (itr->app.selector == new->selector &&
itr->app.protocol == new->protocol &&
itr->app.priority == new->priority &&
- (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) {
+ itr->ifindex == dev->ifindex) {
err = -EEXIST;
goto out;
}
@@ -2173,7 +2177,7 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new)
}
memcpy(&entry->app, new, sizeof(*new));
- strncpy(entry->name, dev->name, IFNAMSIZ);
+ entry->ifindex = dev->ifindex;
list_add(&entry->list, &dcb_app_list);
out:
spin_unlock(&dcb_lock);
@@ -2194,8 +2198,10 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del)
struct dcb_app_type event;
int err = -ENOENT;
- memcpy(&event.name, dev->name, sizeof(event.name));
+ event.ifindex = dev->ifindex;
memcpy(&event.app, del, sizeof(event.app));
+ if (dev->dcbnl_ops->getdcbx)
+ event.dcbx = dev->dcbnl_ops->getdcbx(dev);
spin_lock(&dcb_lock);
/* Search for existing match and remove it. */
@@ -2203,7 +2209,7 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del)
if (itr->app.selector == del->selector &&
itr->app.protocol == del->protocol &&
itr->app.priority == del->priority &&
- (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) {
+ itr->ifindex == dev->ifindex) {
list_del(&itr->list);
kfree(itr);
err = 0;
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 0462040..67164bb 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -85,7 +85,6 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
{
- struct dccp_sock *dp = dccp_sk(sk);
u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2);
/*
@@ -98,14 +97,33 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
val = max_ratio;
}
- if (val > DCCPF_ACK_RATIO_MAX)
- val = DCCPF_ACK_RATIO_MAX;
+ dccp_feat_signal_nn_change(sk, DCCPF_ACK_RATIO,
+ min_t(u32, val, DCCPF_ACK_RATIO_MAX));
+}
- if (val == dp->dccps_l_ack_ratio)
- return;
+static void ccid2_check_l_ack_ratio(struct sock *sk)
+{
+ struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
- ccid2_pr_debug("changing local ack ratio to %u\n", val);
- dp->dccps_l_ack_ratio = val;
+ /*
+ * After a loss, idle period, application limited period, or RTO we
+ * need to check that the ack ratio is still less than the congestion
+ * window. Otherwise, we will send an entire congestion window of
+ * packets and got no response because we haven't sent ack ratio
+ * packets yet.
+ * If the ack ratio does need to be reduced, we reduce it to half of
+ * the congestion window (or 1 if that's zero) instead of to the
+ * congestion window. This prevents problems if one ack is lost.
+ */
+ if (dccp_feat_nn_get(sk, DCCPF_ACK_RATIO) > hc->tx_cwnd)
+ ccid2_change_l_ack_ratio(sk, hc->tx_cwnd/2 ? : 1U);
+}
+
+static void ccid2_change_l_seq_window(struct sock *sk, u64 val)
+{
+ dccp_feat_signal_nn_change(sk, DCCPF_SEQUENCE_WINDOW,
+ clamp_val(val, DCCPF_SEQ_WMIN,
+ DCCPF_SEQ_WMAX));
}
static void ccid2_hc_tx_rto_expire(unsigned long data)
@@ -187,6 +205,8 @@ static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now)
}
hc->tx_cwnd_used = 0;
hc->tx_cwnd_stamp = now;
+
+ ccid2_check_l_ack_ratio(sk);
}
/* This borrows the code of tcp_cwnd_restart() */
@@ -205,6 +225,8 @@ static void ccid2_cwnd_restart(struct sock *sk, const u32 now)
hc->tx_cwnd_stamp = now;
hc->tx_cwnd_used = 0;
+
+ ccid2_check_l_ack_ratio(sk);
}
static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
@@ -405,17 +427,37 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
unsigned int *maxincr)
{
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
-
- if (hc->tx_cwnd < hc->tx_ssthresh) {
- if (*maxincr > 0 && ++hc->tx_packets_acked == 2) {
+ struct dccp_sock *dp = dccp_sk(sk);
+ int r_seq_used = hc->tx_cwnd / dp->dccps_l_ack_ratio;
+
+ if (hc->tx_cwnd < dp->dccps_l_seq_win &&
+ r_seq_used < dp->dccps_r_seq_win) {
+ if (hc->tx_cwnd < hc->tx_ssthresh) {
+ if (*maxincr > 0 && ++hc->tx_packets_acked >= 2) {
+ hc->tx_cwnd += 1;
+ *maxincr -= 1;
+ hc->tx_packets_acked = 0;
+ }
+ } else if (++hc->tx_packets_acked >= hc->tx_cwnd) {
hc->tx_cwnd += 1;
- *maxincr -= 1;
hc->tx_packets_acked = 0;
}
- } else if (++hc->tx_packets_acked >= hc->tx_cwnd) {
- hc->tx_cwnd += 1;
- hc->tx_packets_acked = 0;
}
+
+ /*
+ * Adjust the local sequence window and the ack ratio to allow about
+ * 5 times the number of packets in the network (RFC 4340 7.5.2)
+ */
+ if (r_seq_used * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_r_seq_win)
+ ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio * 2);
+ else if (r_seq_used * CCID2_WIN_CHANGE_FACTOR < dp->dccps_r_seq_win/2)
+ ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio / 2 ? : 1U);
+
+ if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_l_seq_win)
+ ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win * 2);
+ else if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR < dp->dccps_l_seq_win/2)
+ ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win / 2);
+
/*
* FIXME: RTT is sampled several times per acknowledgment (for each
* entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
@@ -441,9 +483,7 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
- /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */
- if (dccp_sk(sk)->dccps_l_ack_ratio > hc->tx_cwnd)
- ccid2_change_l_ack_ratio(sk, hc->tx_cwnd);
+ ccid2_check_l_ack_ratio(sk);
}
static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
@@ -494,8 +534,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
if (hc->tx_rpdupack >= NUMDUPACK) {
hc->tx_rpdupack = -1; /* XXX lame */
hc->tx_rpseq = 0;
-
+#ifdef __CCID2_COPES_GRACEFULLY_WITH_ACK_CONGESTION_CONTROL__
+ /*
+ * FIXME: Ack Congestion Control is broken; in
+ * the current state instabilities occurred with
+ * Ack Ratios greater than 1; causing hang-ups
+ * and long RTO timeouts. This needs to be fixed
+ * before opening up dynamic changes. -- gerrit
+ */
ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
+#endif
}
}
}
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index f585d33..18c9754 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -43,6 +43,12 @@ struct ccid2_seq {
#define CCID2_SEQBUF_LEN 1024
#define CCID2_SEQBUF_MAX 128
+/*
+ * Multiple of congestion window to keep the sequence window at
+ * (RFC 4340 7.5.2)
+ */
+#define CCID2_WIN_CHANGE_FACTOR 5
+
/**
* struct ccid2_hc_tx_sock - CCID2 TX half connection
* @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 5fdb072..583490a 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -474,6 +474,7 @@ static inline int dccp_ack_pending(const struct sock *sk)
return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk);
}
+extern int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val);
extern int dccp_feat_finalise_settings(struct dccp_sock *dp);
extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*,
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 568def9..23cea0e 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -12,6 +12,7 @@
* -----------
* o Feature negotiation is coordinated with connection setup (as in TCP), wild
* changes of parameters of an established connection are not supported.
+ * o Changing non-negotiable (NN) values is supported in state OPEN/PARTOPEN.
* o All currently known SP features have 1-byte quantities. If in the future
* extensions of RFCs 4340..42 define features with item lengths larger than
* one byte, a feature-specific extension of the code will be required.
@@ -343,6 +344,20 @@ static int __dccp_feat_activate(struct sock *sk, const int idx,
return dccp_feat_table[idx].activation_hdlr(sk, val, rx);
}
+/**
+ * dccp_feat_activate - Activate feature value on socket
+ * @sk: fully connected DCCP socket (after handshake is complete)
+ * @feat_num: feature to activate, one of %dccp_feature_numbers
+ * @local: whether local (1) or remote (0) @feat_num is meant
+ * @fval: the value (SP or NN) to activate, or NULL to use the default value
+ * For general use this function is preferable over __dccp_feat_activate().
+ */
+static int dccp_feat_activate(struct sock *sk, u8 feat_num, bool local,
+ dccp_feat_val const *fval)
+{
+ return __dccp_feat_activate(sk, dccp_feat_index(feat_num), local, fval);
+}
+
/* Test for "Req'd" feature (RFC 4340, 6.4) */
static inline int dccp_feat_must_be_understood(u8 feat_num)
{
@@ -650,11 +665,22 @@ int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq,
return -1;
if (pos->needs_mandatory && dccp_insert_option_mandatory(skb))
return -1;
- /*
- * Enter CHANGING after transmitting the Change option (6.6.2).
- */
- if (pos->state == FEAT_INITIALISING)
- pos->state = FEAT_CHANGING;
+
+ if (skb->sk->sk_state == DCCP_OPEN &&
+ (opt == DCCPO_CONFIRM_R || opt == DCCPO_CONFIRM_L)) {
+ /*
+ * Confirms don't get retransmitted (6.6.3) once the
+ * connection is in state OPEN
+ */
+ dccp_feat_list_pop(pos);
+ } else {
+ /*
+ * Enter CHANGING after transmitting the Change
+ * option (6.6.2).
+ */
+ if (pos->state == FEAT_INITIALISING)
+ pos->state = FEAT_CHANGING;
+ }
}
return 0;
}
@@ -730,6 +756,70 @@ int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
0, list, len);
}
+/**
+ * dccp_feat_nn_get - Query current/pending value of NN feature
+ * @sk: DCCP socket of an established connection
+ * @feat: NN feature number from %dccp_feature_numbers
+ * For a known NN feature, returns value currently being negotiated, or
+ * current (confirmed) value if no negotiation is going on.
+ */
+u64 dccp_feat_nn_get(struct sock *sk, u8 feat)
+{
+ if (dccp_feat_type(feat) == FEAT_NN) {
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct dccp_feat_entry *entry;
+
+ entry = dccp_feat_list_lookup(&dp->dccps_featneg, feat, 1);
+ if (entry != NULL)
+ return entry->val.nn;
+
+ switch (feat) {
+ case DCCPF_ACK_RATIO:
+ return dp->dccps_l_ack_ratio;
+ case DCCPF_SEQUENCE_WINDOW:
+ return dp->dccps_l_seq_win;
+ }
+ }
+ DCCP_BUG("attempt to look up unsupported feature %u", feat);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dccp_feat_nn_get);
+
+/**
+ * dccp_feat_signal_nn_change - Update NN values for an established connection
+ * @sk: DCCP socket of an established connection
+ * @feat: NN feature number from %dccp_feature_numbers
+ * @nn_val: the new value to use
+ * This function is used to communicate NN updates out-of-band.
+ */
+int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val)
+{
+ struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
+ dccp_feat_val fval = { .nn = nn_val };
+ struct dccp_feat_entry *entry;
+
+ if (sk->sk_state != DCCP_OPEN && sk->sk_state != DCCP_PARTOPEN)
+ return 0;
+
+ if (dccp_feat_type(feat) != FEAT_NN ||
+ !dccp_feat_is_valid_nn_val(feat, nn_val))
+ return -EINVAL;
+
+ if (nn_val == dccp_feat_nn_get(sk, feat))
+ return 0; /* already set or negotiation under way */
+
+ entry = dccp_feat_list_lookup(fn, feat, 1);
+ if (entry != NULL) {
+ dccp_pr_debug("Clobbering existing NN entry %llu -> %llu\n",
+ (unsigned long long)entry->val.nn,
+ (unsigned long long)nn_val);
+ dccp_feat_list_pop(entry);
+ }
+
+ inet_csk_schedule_ack(sk);
+ return dccp_feat_push_change(fn, feat, 1, 0, &fval);
+}
+EXPORT_SYMBOL_GPL(dccp_feat_signal_nn_change);
/*
* Tracking features whose value depend on the choice of CCID
@@ -1187,6 +1277,100 @@ confirmation_failed:
}
/**
+ * dccp_feat_handle_nn_established - Fast-path reception of NN options
+ * @sk: socket of an established DCCP connection
+ * @mandatory: whether @opt was preceded by a Mandatory option
+ * @opt: %DCCPO_CHANGE_L | %DCCPO_CONFIRM_R (NN only)
+ * @feat: NN number, one of %dccp_feature_numbers
+ * @val: NN value
+ * @len: length of @val in bytes
+ * This function combines the functionality of change_recv/confirm_recv, with
+ * the following differences (reset codes are the same):
+ * - cleanup after receiving the Confirm;
+ * - values are directly activated after successful parsing;
+ * - deliberately restricted to NN features.
+ * The restriction to NN features is essential since SP features can have non-
+ * predictable outcomes (depending on the remote configuration), and are inter-
+ * dependent (CCIDs for instance cause further dependencies).
+ */
+static u8 dccp_feat_handle_nn_established(struct sock *sk, u8 mandatory, u8 opt,
+ u8 feat, u8 *val, u8 len)
+{
+ struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
+ const bool local = (opt == DCCPO_CONFIRM_R);
+ struct dccp_feat_entry *entry;
+ u8 type = dccp_feat_type(feat);
+ dccp_feat_val fval;
+
+ dccp_feat_print_opt(opt, feat, val, len, mandatory);
+
+ /* Ignore non-mandatory unknown and non-NN features */
+ if (type == FEAT_UNKNOWN) {
+ if (local && !mandatory)
+ return 0;
+ goto fast_path_unknown;
+ } else if (type != FEAT_NN) {
+ return 0;
+ }
+
+ /*
+ * We don't accept empty Confirms, since in fast-path feature
+ * negotiation the values are enabled immediately after sending
+ * the Change option.
+ * Empty Changes on the other hand are invalid (RFC 4340, 6.1).
+ */
+ if (len == 0 || len > sizeof(fval.nn))
+ goto fast_path_unknown;
+
+ if (opt == DCCPO_CHANGE_L) {
+ fval.nn = dccp_decode_value_var(val, len);
+ if (!dccp_feat_is_valid_nn_val(feat, fval.nn))
+ goto fast_path_unknown;
+
+ if (dccp_feat_push_confirm(fn, feat, local, &fval) ||
+ dccp_feat_activate(sk, feat, local, &fval))
+ return DCCP_RESET_CODE_TOO_BUSY;
+
+ /* set the `Ack Pending' flag to piggyback a Confirm */
+ inet_csk_schedule_ack(sk);
+
+ } else if (opt == DCCPO_CONFIRM_R) {
+ entry = dccp_feat_list_lookup(fn, feat, local);
+ if (entry == NULL || entry->state != FEAT_CHANGING)
+ return 0;
+
+ fval.nn = dccp_decode_value_var(val, len);
+ /*
+ * Just ignore a value that doesn't match our current value.
+ * If the option changes twice within two RTTs, then at least
+ * one CONFIRM will be received for the old value after a
+ * new CHANGE was sent.
+ */
+ if (fval.nn != entry->val.nn)
+ return 0;
+
+ /* Only activate after receiving the Confirm option (6.6.1). */
+ dccp_feat_activate(sk, feat, local, &fval);
+
+ /* It has been confirmed - so remove the entry */
+ dccp_feat_list_pop(entry);
+
+ } else {
+ DCCP_WARN("Received illegal option %u\n", opt);
+ goto fast_path_failed;
+ }
+ return 0;
+
+fast_path_unknown:
+ if (!mandatory)
+ return dccp_push_empty_confirm(fn, feat, local);
+
+fast_path_failed:
+ return mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
+ : DCCP_RESET_CODE_OPTION_ERROR;
+}
+
+/**
* dccp_feat_parse_options - Process Feature-Negotiation Options
* @sk: for general use and used by the client during connection setup
* @dreq: used by the server during connection setup
@@ -1221,6 +1405,14 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
return dccp_feat_confirm_recv(fn, mandatory, opt, feat,
val, len, server);
}
+ break;
+ /*
+ * Support for exchanging NN options on an established connection.
+ */
+ case DCCP_OPEN:
+ case DCCP_PARTOPEN:
+ return dccp_feat_handle_nn_established(sk, mandatory, opt, feat,
+ val, len);
}
return 0; /* ignore FN options in all other states */
}
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index e56a4e5..90b957d 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -129,6 +129,7 @@ extern int dccp_feat_clone_list(struct list_head const *, struct list_head *);
extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len);
extern u64 dccp_decode_value_var(const u8 *bf, const u8 len);
+extern u64 dccp_feat_nn_get(struct sock *sk, u8 feat);
extern int dccp_insert_option_mandatory(struct sk_buff *skb);
extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 8c36adf..332639b 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -26,6 +26,7 @@
#include <net/timewait_sock.h>
#include <net/tcp_states.h>
#include <net/xfrm.h>
+#include <net/secure_seq.h>
#include "ackvec.h"
#include "ccid.h"
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 8dc4348..b74f761 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -29,6 +29,7 @@
#include <net/transp_v6.h>
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
+#include <net/secure_seq.h>
#include "dccp.h"
#include "ipv6.h"
@@ -69,13 +70,7 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr);
}
-static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
- __be16 sport, __be16 dport )
-{
- return secure_tcpv6_sequence_number(saddr, daddr, sport, dport);
-}
-
-static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb)
+static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
{
return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
ipv6_hdr(skb)->saddr.s6_addr32,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 152975d..e742f90 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -184,7 +184,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
dp->dccps_rate_last = jiffies;
dp->dccps_role = DCCP_ROLE_UNDEFINED;
dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
- dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
dccp_init_xmit_timers(sk);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index ba4face..2ab16e1 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -388,7 +388,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
}
ifa->ifa_next = dn_db->ifa_list;
- rcu_assign_pointer(dn_db->ifa_list, ifa);
+ RCU_INIT_POINTER(dn_db->ifa_list, ifa);
dn_ifaddr_notify(RTM_NEWADDR, ifa);
blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa);
@@ -1093,7 +1093,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms));
- rcu_assign_pointer(dev->dn_ptr, dn_db);
+ RCU_INIT_POINTER(dev->dn_ptr, dn_db);
dn_db->dev = dev;
init_timer(&dn_db->timer);
@@ -1101,7 +1101,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table);
if (!dn_db->neigh_parms) {
- rcu_assign_pointer(dev->dn_ptr, NULL);
+ RCU_INIT_POINTER(dev->dn_ptr, NULL);
kfree(dn_db);
return NULL;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 0a47b6c..56cf9b8 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -301,7 +301,6 @@ static const struct net_device_ops dsa_netdev_ops = {
.ndo_start_xmit = dsa_xmit,
.ndo_change_rx_flags = dsa_slave_change_rx_flags,
.ndo_set_rx_mode = dsa_slave_set_rx_mode,
- .ndo_set_multicast_list = dsa_slave_set_rx_mode,
.ndo_set_mac_address = dsa_slave_set_mac_address,
.ndo_do_ioctl = dsa_slave_ioctl,
};
@@ -314,7 +313,6 @@ static const struct net_device_ops edsa_netdev_ops = {
.ndo_start_xmit = edsa_xmit,
.ndo_change_rx_flags = dsa_slave_change_rx_flags,
.ndo_set_rx_mode = dsa_slave_set_rx_mode,
- .ndo_set_multicast_list = dsa_slave_set_rx_mode,
.ndo_set_mac_address = dsa_slave_set_mac_address,
.ndo_do_ioctl = dsa_slave_ioctl,
};
@@ -327,7 +325,6 @@ static const struct net_device_ops trailer_netdev_ops = {
.ndo_start_xmit = trailer_xmit,
.ndo_change_rx_flags = dsa_slave_change_rx_flags,
.ndo_set_rx_mode = dsa_slave_set_rx_mode,
- .ndo_set_multicast_list = dsa_slave_set_rx_mode,
.ndo_set_mac_address = dsa_slave_set_mac_address,
.ndo_do_ioctl = dsa_slave_ioctl,
};
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 27997d3..a246836 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -340,7 +340,7 @@ void ether_setup(struct net_device *dev)
dev->addr_len = ETH_ALEN;
dev->tx_queue_len = 1000; /* Ethernet wants good queues */
dev->flags = IFF_BROADCAST|IFF_MULTICAST;
- dev->priv_flags = IFF_TX_SKB_SHARING;
+ dev->priv_flags |= IFF_TX_SKB_SHARING;
memset(dev->broadcast, 0xFF, ETH_ALEN);
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
new file mode 100644
index 0000000..19d6aef
--- /dev/null
+++ b/net/ieee802154/6lowpan.c
@@ -0,0 +1,891 @@
+/*
+ * Copyright 2011, Siemens AG
+ * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
+ */
+
+/*
+ * Based on patches from Jon Smirl <jonsmirl@gmail.com>
+ * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/* Jon's code is based on 6lowpan implementation for Contiki which is:
+ * Copyright (c) 2008, Swedish Institute of Computer Science.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the Institute nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#define DEBUG
+
+#include <linux/bitops.h>
+#include <linux/if_arp.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <net/af_ieee802154.h>
+#include <net/ieee802154.h>
+#include <net/ieee802154_netdev.h>
+#include <net/ipv6.h>
+
+#include "6lowpan.h"
+
+/* TTL uncompression values */
+static const u8 lowpan_ttl_values[] = {0, 1, 64, 255};
+
+static LIST_HEAD(lowpan_devices);
+
+/*
+ * Uncompression of linklocal:
+ * 0 -> 16 bytes from packet
+ * 1 -> 2 bytes from prefix - bunch of zeroes and 8 from packet
+ * 2 -> 2 bytes from prefix - zeroes + 2 from packet
+ * 3 -> 2 bytes from prefix - infer 8 bytes from lladdr
+ *
+ * NOTE: => the uncompress function does change 0xf to 0x10
+ * NOTE: 0x00 => no-autoconfig => unspecified
+ */
+static const u8 lowpan_unc_llconf[] = {0x0f, 0x28, 0x22, 0x20};
+
+/*
+ * Uncompression of ctx-based:
+ * 0 -> 0 bits from packet [unspecified / reserved]
+ * 1 -> 8 bytes from prefix - bunch of zeroes and 8 from packet
+ * 2 -> 8 bytes from prefix - zeroes + 2 from packet
+ * 3 -> 8 bytes from prefix - infer 8 bytes from lladdr
+ */
+static const u8 lowpan_unc_ctxconf[] = {0x00, 0x88, 0x82, 0x80};
+
+/*
+ * Uncompression of ctx-base
+ * 0 -> 0 bits from packet
+ * 1 -> 2 bytes from prefix - bunch of zeroes 5 from packet
+ * 2 -> 2 bytes from prefix - zeroes + 3 from packet
+ * 3 -> 2 bytes from prefix - infer 1 bytes from lladdr
+ */
+static const u8 lowpan_unc_mxconf[] = {0x0f, 0x25, 0x23, 0x21};
+
+/* Link local prefix */
+static const u8 lowpan_llprefix[] = {0xfe, 0x80};
+
+/* private device info */
+struct lowpan_dev_info {
+ struct net_device *real_dev; /* real WPAN device ptr */
+ struct mutex dev_list_mtx; /* mutex for list ops */
+};
+
+struct lowpan_dev_record {
+ struct net_device *ldev;
+ struct list_head list;
+};
+
+static inline struct
+lowpan_dev_info *lowpan_dev_info(const struct net_device *dev)
+{
+ return netdev_priv(dev);
+}
+
+static inline void lowpan_address_flip(u8 *src, u8 *dest)
+{
+ int i;
+ for (i = 0; i < IEEE802154_ADDR_LEN; i++)
+ (dest)[IEEE802154_ADDR_LEN - i - 1] = (src)[i];
+}
+
+/* list of all 6lowpan devices, uses for package delivering */
+/* print data in line */
+static inline void lowpan_raw_dump_inline(const char *caller, char *msg,
+ unsigned char *buf, int len)
+{
+#ifdef DEBUG
+ if (msg)
+ pr_debug("(%s) %s: ", caller, msg);
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE,
+ 16, 1, buf, len, false);
+#endif /* DEBUG */
+}
+
+/*
+ * print data in a table format:
+ *
+ * addr: xx xx xx xx xx xx
+ * addr: xx xx xx xx xx xx
+ * ...
+ */
+static inline void lowpan_raw_dump_table(const char *caller, char *msg,
+ unsigned char *buf, int len)
+{
+#ifdef DEBUG
+ if (msg)
+ pr_debug("(%s) %s:\n", caller, msg);
+ print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET,
+ 16, 1, buf, len, false);
+#endif /* DEBUG */
+}
+
+static u8
+lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, const struct in6_addr *ipaddr,
+ const unsigned char *lladdr)
+{
+ u8 val = 0;
+
+ if (is_addr_mac_addr_based(ipaddr, lladdr))
+ val = 3; /* 0-bits */
+ else if (lowpan_is_iid_16_bit_compressable(ipaddr)) {
+ /* compress IID to 16 bits xxxx::XXXX */
+ memcpy(*hc06_ptr, &ipaddr->s6_addr16[7], 2);
+ *hc06_ptr += 2;
+ val = 2; /* 16-bits */
+ } else {
+ /* do not compress IID => xxxx::IID */
+ memcpy(*hc06_ptr, &ipaddr->s6_addr16[4], 8);
+ *hc06_ptr += 8;
+ val = 1; /* 64-bits */
+ }
+
+ return rol8(val, shift);
+}
+
+static void
+lowpan_uip_ds6_set_addr_iid(struct in6_addr *ipaddr, unsigned char *lladdr)
+{
+ memcpy(&ipaddr->s6_addr[8], lladdr, IEEE802154_ALEN);
+ /* second bit-flip (Universe/Local) is done according RFC2464 */
+ ipaddr->s6_addr[8] ^= 0x02;
+}
+
+/*
+ * Uncompress addresses based on a prefix and a postfix with zeroes in
+ * between. If the postfix is zero in length it will use the link address
+ * to configure the IP address (autoconf style).
+ * pref_post_count takes a byte where the first nibble specify prefix count
+ * and the second postfix count (NOTE: 15/0xf => 16 bytes copy).
+ */
+static int
+lowpan_uncompress_addr(struct sk_buff *skb, struct in6_addr *ipaddr,
+ u8 const *prefix, u8 pref_post_count, unsigned char *lladdr)
+{
+ u8 prefcount = pref_post_count >> 4;
+ u8 postcount = pref_post_count & 0x0f;
+
+ /* full nibble 15 => 16 */
+ prefcount = (prefcount == 15 ? 16 : prefcount);
+ postcount = (postcount == 15 ? 16 : postcount);
+
+ if (lladdr)
+ lowpan_raw_dump_inline(__func__, "linklocal address",
+ lladdr, IEEE802154_ALEN);
+ if (prefcount > 0)
+ memcpy(ipaddr, prefix, prefcount);
+
+ if (prefcount + postcount < 16)
+ memset(&ipaddr->s6_addr[prefcount], 0,
+ 16 - (prefcount + postcount));
+
+ if (postcount > 0) {
+ memcpy(&ipaddr->s6_addr[16 - postcount], skb->data, postcount);
+ skb_pull(skb, postcount);
+ } else if (prefcount > 0) {
+ if (lladdr == NULL)
+ return -EINVAL;
+
+ /* no IID based configuration if no prefix and no data */
+ lowpan_uip_ds6_set_addr_iid(ipaddr, lladdr);
+ }
+
+ pr_debug("(%s): uncompressing %d + %d => ", __func__, prefcount,
+ postcount);
+ lowpan_raw_dump_inline(NULL, NULL, ipaddr->s6_addr, 16);
+
+ return 0;
+}
+
+static u8 lowpan_fetch_skb_u8(struct sk_buff *skb)
+{
+ u8 ret;
+
+ ret = skb->data[0];
+ skb_pull(skb, 1);
+
+ return ret;
+}
+
+static int lowpan_header_create(struct sk_buff *skb,
+ struct net_device *dev,
+ unsigned short type, const void *_daddr,
+ const void *_saddr, unsigned len)
+{
+ u8 tmp, iphc0, iphc1, *hc06_ptr;
+ struct ipv6hdr *hdr;
+ const u8 *saddr = _saddr;
+ const u8 *daddr = _daddr;
+ u8 *head;
+ struct ieee802154_addr sa, da;
+
+ if (type != ETH_P_IPV6)
+ return 0;
+ /* TODO:
+ * if this package isn't ipv6 one, where should it be routed?
+ */
+ head = kzalloc(100, GFP_KERNEL);
+ if (head == NULL)
+ return -ENOMEM;
+
+ hdr = ipv6_hdr(skb);
+ hc06_ptr = head + 2;
+
+ pr_debug("(%s): IPv6 header dump:\n\tversion = %d\n\tlength = %d\n"
+ "\tnexthdr = 0x%02x\n\thop_lim = %d\n", __func__,
+ hdr->version, ntohs(hdr->payload_len), hdr->nexthdr,
+ hdr->hop_limit);
+
+ lowpan_raw_dump_table(__func__, "raw skb network header dump",
+ skb_network_header(skb), sizeof(struct ipv6hdr));
+
+ if (!saddr)
+ saddr = dev->dev_addr;
+
+ lowpan_raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8);
+
+ /*
+ * As we copy some bit-length fields, in the IPHC encoding bytes,
+ * we sometimes use |=
+ * If the field is 0, and the current bit value in memory is 1,
+ * this does not work. We therefore reset the IPHC encoding here
+ */
+ iphc0 = LOWPAN_DISPATCH_IPHC;
+ iphc1 = 0;
+
+ /* TODO: context lookup */
+
+ lowpan_raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8);
+
+ /*
+ * Traffic class, flow label
+ * If flow label is 0, compress it. If traffic class is 0, compress it
+ * We have to process both in the same time as the offset of traffic
+ * class depends on the presence of version and flow label
+ */
+
+ /* hc06 format of TC is ECN | DSCP , original one is DSCP | ECN */
+ tmp = (hdr->priority << 4) | (hdr->flow_lbl[0] >> 4);
+ tmp = ((tmp & 0x03) << 6) | (tmp >> 2);
+
+ if (((hdr->flow_lbl[0] & 0x0F) == 0) &&
+ (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) {
+ /* flow label can be compressed */
+ iphc0 |= LOWPAN_IPHC_FL_C;
+ if ((hdr->priority == 0) &&
+ ((hdr->flow_lbl[0] & 0xF0) == 0)) {
+ /* compress (elide) all */
+ iphc0 |= LOWPAN_IPHC_TC_C;
+ } else {
+ /* compress only the flow label */
+ *hc06_ptr = tmp;
+ hc06_ptr += 1;
+ }
+ } else {
+ /* Flow label cannot be compressed */
+ if ((hdr->priority == 0) &&
+ ((hdr->flow_lbl[0] & 0xF0) == 0)) {
+ /* compress only traffic class */
+ iphc0 |= LOWPAN_IPHC_TC_C;
+ *hc06_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F);
+ memcpy(hc06_ptr + 1, &hdr->flow_lbl[1], 2);
+ hc06_ptr += 3;
+ } else {
+ /* compress nothing */
+ memcpy(hc06_ptr, &hdr, 4);
+ /* replace the top byte with new ECN | DSCP format */
+ *hc06_ptr = tmp;
+ hc06_ptr += 4;
+ }
+ }
+
+ /* NOTE: payload length is always compressed */
+
+ /* Next Header is compress if UDP */
+ if (hdr->nexthdr == UIP_PROTO_UDP)
+ iphc0 |= LOWPAN_IPHC_NH_C;
+
+/* TODO: next header compression */
+
+ if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) {
+ *hc06_ptr = hdr->nexthdr;
+ hc06_ptr += 1;
+ }
+
+ /*
+ * Hop limit
+ * if 1: compress, encoding is 01
+ * if 64: compress, encoding is 10
+ * if 255: compress, encoding is 11
+ * else do not compress
+ */
+ switch (hdr->hop_limit) {
+ case 1:
+ iphc0 |= LOWPAN_IPHC_TTL_1;
+ break;
+ case 64:
+ iphc0 |= LOWPAN_IPHC_TTL_64;
+ break;
+ case 255:
+ iphc0 |= LOWPAN_IPHC_TTL_255;
+ break;
+ default:
+ *hc06_ptr = hdr->hop_limit;
+ break;
+ }
+
+ /* source address compression */
+ if (is_addr_unspecified(&hdr->saddr)) {
+ pr_debug("(%s): source address is unspecified, setting SAC\n",
+ __func__);
+ iphc1 |= LOWPAN_IPHC_SAC;
+ /* TODO: context lookup */
+ } else if (is_addr_link_local(&hdr->saddr)) {
+ pr_debug("(%s): source address is link-local\n", __func__);
+ iphc1 |= lowpan_compress_addr_64(&hc06_ptr,
+ LOWPAN_IPHC_SAM_BIT, &hdr->saddr, saddr);
+ } else {
+ pr_debug("(%s): send the full source address\n", __func__);
+ memcpy(hc06_ptr, &hdr->saddr.s6_addr16[0], 16);
+ hc06_ptr += 16;
+ }
+
+ /* destination address compression */
+ if (is_addr_mcast(&hdr->daddr)) {
+ pr_debug("(%s): destination address is multicast", __func__);
+ iphc1 |= LOWPAN_IPHC_M;
+ if (lowpan_is_mcast_addr_compressable8(&hdr->daddr)) {
+ pr_debug("compressed to 1 octet\n");
+ iphc1 |= LOWPAN_IPHC_DAM_11;
+ /* use last byte */
+ *hc06_ptr = hdr->daddr.s6_addr[15];
+ hc06_ptr += 1;
+ } else if (lowpan_is_mcast_addr_compressable32(&hdr->daddr)) {
+ pr_debug("compressed to 4 octets\n");
+ iphc1 |= LOWPAN_IPHC_DAM_10;
+ /* second byte + the last three */
+ *hc06_ptr = hdr->daddr.s6_addr[1];
+ memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[13], 3);
+ hc06_ptr += 4;
+ } else if (lowpan_is_mcast_addr_compressable48(&hdr->daddr)) {
+ pr_debug("compressed to 6 octets\n");
+ iphc1 |= LOWPAN_IPHC_DAM_01;
+ /* second byte + the last five */
+ *hc06_ptr = hdr->daddr.s6_addr[1];
+ memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[11], 5);
+ hc06_ptr += 6;
+ } else {
+ pr_debug("using full address\n");
+ iphc1 |= LOWPAN_IPHC_DAM_00;
+ memcpy(hc06_ptr, &hdr->daddr.s6_addr[0], 16);
+ hc06_ptr += 16;
+ }
+ } else {
+ pr_debug("(%s): destination address is unicast: ", __func__);
+ /* TODO: context lookup */
+ if (is_addr_link_local(&hdr->daddr)) {
+ pr_debug("destination address is link-local\n");
+ iphc1 |= lowpan_compress_addr_64(&hc06_ptr,
+ LOWPAN_IPHC_DAM_BIT, &hdr->daddr, daddr);
+ } else {
+ pr_debug("using full address\n");
+ memcpy(hc06_ptr, &hdr->daddr.s6_addr16[0], 16);
+ hc06_ptr += 16;
+ }
+ }
+
+ /* TODO: UDP header compression */
+ /* TODO: Next Header compression */
+
+ head[0] = iphc0;
+ head[1] = iphc1;
+
+ skb_pull(skb, sizeof(struct ipv6hdr));
+ memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head);
+
+ kfree(head);
+
+ lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data,
+ skb->len);
+
+ /*
+ * NOTE1: I'm still unsure about the fact that compression and WPAN
+ * header are created here and not later in the xmit. So wait for
+ * an opinion of net maintainers.
+ */
+ /*
+ * NOTE2: to be absolutely correct, we must derive PANid information
+ * from MAC subif of the 'dev' and 'real_dev' network devices, but
+ * this isn't implemented in mainline yet, so currently we assign 0xff
+ */
+ {
+ /* prepare wpan address data */
+ sa.addr_type = IEEE802154_ADDR_LONG;
+ sa.pan_id = 0xff;
+
+ da.addr_type = IEEE802154_ADDR_LONG;
+ da.pan_id = 0xff;
+
+ memcpy(&(da.hwaddr), daddr, 8);
+ memcpy(&(sa.hwaddr), saddr, 8);
+
+ mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
+ return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
+ type, (void *)&da, (void *)&sa, skb->len);
+ }
+}
+
+static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr)
+{
+ struct sk_buff *new;
+ struct lowpan_dev_record *entry;
+ int stat = NET_RX_SUCCESS;
+
+ new = skb_copy_expand(skb, sizeof(struct ipv6hdr), skb_tailroom(skb),
+ GFP_ATOMIC);
+ kfree_skb(skb);
+
+ if (!new)
+ return -ENOMEM;
+
+ skb_push(new, sizeof(struct ipv6hdr));
+ skb_reset_network_header(new);
+ skb_copy_to_linear_data(new, hdr, sizeof(struct ipv6hdr));
+
+ new->protocol = htons(ETH_P_IPV6);
+ new->pkt_type = PACKET_HOST;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(entry, &lowpan_devices, list)
+ if (lowpan_dev_info(entry->ldev)->real_dev == new->dev) {
+ skb = skb_copy(new, GFP_ATOMIC);
+ if (!skb) {
+ stat = -ENOMEM;
+ break;
+ }
+
+ skb->dev = entry->ldev;
+ stat = netif_rx(skb);
+ }
+ rcu_read_unlock();
+
+ kfree_skb(new);
+
+ return stat;
+}
+
+static int
+lowpan_process_data(struct sk_buff *skb)
+{
+ struct ipv6hdr hdr;
+ u8 tmp, iphc0, iphc1, num_context = 0;
+ u8 *_saddr, *_daddr;
+ int err;
+
+ lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data,
+ skb->len);
+ /* at least two bytes will be used for the encoding */
+ if (skb->len < 2)
+ goto drop;
+ iphc0 = lowpan_fetch_skb_u8(skb);
+ iphc1 = lowpan_fetch_skb_u8(skb);
+
+ _saddr = mac_cb(skb)->sa.hwaddr;
+ _daddr = mac_cb(skb)->da.hwaddr;
+
+ pr_debug("(%s): iphc0 = %02x, iphc1 = %02x\n", __func__, iphc0, iphc1);
+
+ /* another if the CID flag is set */
+ if (iphc1 & LOWPAN_IPHC_CID) {
+ pr_debug("(%s): CID flag is set, increase header with one\n",
+ __func__);
+ if (!skb->len)
+ goto drop;
+ num_context = lowpan_fetch_skb_u8(skb);
+ }
+
+ hdr.version = 6;
+
+ /* Traffic Class and Flow Label */
+ switch ((iphc0 & LOWPAN_IPHC_TF) >> 3) {
+ /*
+ * Traffic Class and FLow Label carried in-line
+ * ECN + DSCP + 4-bit Pad + Flow Label (4 bytes)
+ */
+ case 0: /* 00b */
+ if (!skb->len)
+ goto drop;
+ tmp = lowpan_fetch_skb_u8(skb);
+ memcpy(&hdr.flow_lbl, &skb->data[0], 3);
+ skb_pull(skb, 3);
+ hdr.priority = ((tmp >> 2) & 0x0f);
+ hdr.flow_lbl[0] = ((tmp >> 2) & 0x30) | (tmp << 6) |
+ (hdr.flow_lbl[0] & 0x0f);
+ break;
+ /*
+ * Traffic class carried in-line
+ * ECN + DSCP (1 byte), Flow Label is elided
+ */
+ case 1: /* 10b */
+ if (!skb->len)
+ goto drop;
+ tmp = lowpan_fetch_skb_u8(skb);
+ hdr.priority = ((tmp >> 2) & 0x0f);
+ hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30);
+ hdr.flow_lbl[1] = 0;
+ hdr.flow_lbl[2] = 0;
+ break;
+ /*
+ * Flow Label carried in-line
+ * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided
+ */
+ case 2: /* 01b */
+ if (!skb->len)
+ goto drop;
+ tmp = lowpan_fetch_skb_u8(skb);
+ hdr.flow_lbl[0] = (skb->data[0] & 0x0F) | ((tmp >> 2) & 0x30);
+ memcpy(&hdr.flow_lbl[1], &skb->data[0], 2);
+ skb_pull(skb, 2);
+ break;
+ /* Traffic Class and Flow Label are elided */
+ case 3: /* 11b */
+ hdr.priority = 0;
+ hdr.flow_lbl[0] = 0;
+ hdr.flow_lbl[1] = 0;
+ hdr.flow_lbl[2] = 0;
+ break;
+ default:
+ break;
+ }
+
+ /* Next Header */
+ if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) {
+ /* Next header is carried inline */
+ if (!skb->len)
+ goto drop;
+ hdr.nexthdr = lowpan_fetch_skb_u8(skb);
+ pr_debug("(%s): NH flag is set, next header is carried "
+ "inline: %02x\n", __func__, hdr.nexthdr);
+ }
+
+ /* Hop Limit */
+ if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I)
+ hdr.hop_limit = lowpan_ttl_values[iphc0 & 0x03];
+ else {
+ if (!skb->len)
+ goto drop;
+ hdr.hop_limit = lowpan_fetch_skb_u8(skb);
+ }
+
+ /* Extract SAM to the tmp variable */
+ tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03;
+
+ /* Source address uncompression */
+ pr_debug("(%s): source address stateless compression\n", __func__);
+ err = lowpan_uncompress_addr(skb, &hdr.saddr, lowpan_llprefix,
+ lowpan_unc_llconf[tmp], skb->data);
+ if (err)
+ goto drop;
+
+ /* Extract DAM to the tmp variable */
+ tmp = ((iphc1 & LOWPAN_IPHC_DAM_11) >> LOWPAN_IPHC_DAM_BIT) & 0x03;
+
+ /* check for Multicast Compression */
+ if (iphc1 & LOWPAN_IPHC_M) {
+ if (iphc1 & LOWPAN_IPHC_DAC) {
+ pr_debug("(%s): destination address context-based "
+ "multicast compression\n", __func__);
+ /* TODO: implement this */
+ } else {
+ u8 prefix[] = {0xff, 0x02};
+
+ pr_debug("(%s): destination address non-context-based"
+ " multicast compression\n", __func__);
+ if (0 < tmp && tmp < 3) {
+ if (!skb->len)
+ goto drop;
+ else
+ prefix[1] = lowpan_fetch_skb_u8(skb);
+ }
+
+ err = lowpan_uncompress_addr(skb, &hdr.daddr, prefix,
+ lowpan_unc_mxconf[tmp], NULL);
+ if (err)
+ goto drop;
+ }
+ } else {
+ pr_debug("(%s): destination address stateless compression\n",
+ __func__);
+ err = lowpan_uncompress_addr(skb, &hdr.daddr, lowpan_llprefix,
+ lowpan_unc_llconf[tmp], skb->data);
+ if (err)
+ goto drop;
+ }
+
+ /* TODO: UDP header parse */
+
+ /* Not fragmented package */
+ hdr.payload_len = htons(skb->len);
+
+ pr_debug("(%s): skb headroom size = %d, data length = %d\n", __func__,
+ skb_headroom(skb), skb->len);
+
+ pr_debug("(%s): IPv6 header dump:\n\tversion = %d\n\tlength = %d\n\t"
+ "nexthdr = 0x%02x\n\thop_lim = %d\n", __func__, hdr.version,
+ ntohs(hdr.payload_len), hdr.nexthdr, hdr.hop_limit);
+
+ lowpan_raw_dump_table(__func__, "raw header dump", (u8 *)&hdr,
+ sizeof(hdr));
+ return lowpan_skb_deliver(skb, &hdr);
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static int lowpan_set_address(struct net_device *dev, void *p)
+{
+ struct sockaddr *sa = p;
+
+ if (netif_running(dev))
+ return -EBUSY;
+
+ /* TODO: validate addr */
+ memcpy(dev->dev_addr, sa->sa_data, dev->addr_len);
+
+ return 0;
+}
+
+static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ int err = 0;
+
+ pr_debug("(%s): package xmit\n", __func__);
+
+ skb->dev = lowpan_dev_info(dev)->real_dev;
+ if (skb->dev == NULL) {
+ pr_debug("(%s) ERROR: no real wpan device found\n", __func__);
+ dev_kfree_skb(skb);
+ } else
+ err = dev_queue_xmit(skb);
+
+ return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK);
+}
+
+static void lowpan_dev_free(struct net_device *dev)
+{
+ dev_put(lowpan_dev_info(dev)->real_dev);
+ free_netdev(dev);
+}
+
+static struct header_ops lowpan_header_ops = {
+ .create = lowpan_header_create,
+};
+
+static const struct net_device_ops lowpan_netdev_ops = {
+ .ndo_start_xmit = lowpan_xmit,
+ .ndo_set_mac_address = lowpan_set_address,
+};
+
+static void lowpan_setup(struct net_device *dev)
+{
+ pr_debug("(%s)\n", __func__);
+
+ dev->addr_len = IEEE802154_ADDR_LEN;
+ memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN);
+ dev->type = ARPHRD_IEEE802154;
+ dev->features = NETIF_F_NO_CSUM;
+ /* Frame Control + Sequence Number + Address fields + Security Header */
+ dev->hard_header_len = 2 + 1 + 20 + 14;
+ dev->needed_tailroom = 2; /* FCS */
+ dev->mtu = 1281;
+ dev->tx_queue_len = 0;
+ dev->flags = IFF_NOARP | IFF_BROADCAST;
+ dev->watchdog_timeo = 0;
+
+ dev->netdev_ops = &lowpan_netdev_ops;
+ dev->header_ops = &lowpan_header_ops;
+ dev->destructor = lowpan_dev_free;
+}
+
+static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ pr_debug("(%s)\n", __func__);
+
+ if (tb[IFLA_ADDRESS]) {
+ if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+{
+ if (!netif_running(dev))
+ goto drop;
+
+ if (dev->type != ARPHRD_IEEE802154)
+ goto drop;
+
+ /* check that it's our buffer */
+ if ((skb->data[0] & 0xe0) == 0x60)
+ lowpan_process_data(skb);
+
+ return NET_RX_SUCCESS;
+
+drop:
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+
+static int lowpan_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct net_device *real_dev;
+ struct lowpan_dev_record *entry;
+
+ pr_debug("(%s)\n", __func__);
+
+ if (!tb[IFLA_LINK])
+ return -EINVAL;
+ /* find and hold real wpan device */
+ real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
+ if (!real_dev)
+ return -ENODEV;
+
+ lowpan_dev_info(dev)->real_dev = real_dev;
+ mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
+
+ entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL);
+ if (!entry) {
+ dev_put(real_dev);
+ lowpan_dev_info(dev)->real_dev = NULL;
+ return -ENOMEM;
+ }
+
+ entry->ldev = dev;
+
+ mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
+ INIT_LIST_HEAD(&entry->list);
+ list_add_tail(&entry->list, &lowpan_devices);
+ mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx);
+
+ register_netdevice(dev);
+
+ return 0;
+}
+
+static void lowpan_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev);
+ struct net_device *real_dev = lowpan_dev->real_dev;
+ struct lowpan_dev_record *entry;
+ struct lowpan_dev_record *tmp;
+
+ ASSERT_RTNL();
+
+ mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
+ list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) {
+ if (entry->ldev == dev) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ }
+ mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx);
+
+ mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx);
+
+ unregister_netdevice_queue(dev, head);
+
+ dev_put(real_dev);
+}
+
+static struct rtnl_link_ops lowpan_link_ops __read_mostly = {
+ .kind = "lowpan",
+ .priv_size = sizeof(struct lowpan_dev_info),
+ .setup = lowpan_setup,
+ .newlink = lowpan_newlink,
+ .dellink = lowpan_dellink,
+ .validate = lowpan_validate,
+};
+
+static inline int __init lowpan_netlink_init(void)
+{
+ return rtnl_link_register(&lowpan_link_ops);
+}
+
+static inline void __init lowpan_netlink_fini(void)
+{
+ rtnl_link_unregister(&lowpan_link_ops);
+}
+
+static struct packet_type lowpan_packet_type = {
+ .type = __constant_htons(ETH_P_IEEE802154),
+ .func = lowpan_rcv,
+};
+
+static int __init lowpan_init_module(void)
+{
+ int err = 0;
+
+ pr_debug("(%s)\n", __func__);
+
+ err = lowpan_netlink_init();
+ if (err < 0)
+ goto out;
+
+ dev_add_pack(&lowpan_packet_type);
+out:
+ return err;
+}
+
+static void __exit lowpan_cleanup_module(void)
+{
+ pr_debug("(%s)\n", __func__);
+
+ lowpan_netlink_fini();
+
+ dev_remove_pack(&lowpan_packet_type);
+}
+
+module_init(lowpan_init_module);
+module_exit(lowpan_cleanup_module);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("lowpan");
diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
new file mode 100644
index 0000000..5d8cf80
--- /dev/null
+++ b/net/ieee802154/6lowpan.h
@@ -0,0 +1,212 @@
+/*
+ * Copyright 2011, Siemens AG
+ * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
+ */
+
+/*
+ * Based on patches from Jon Smirl <jonsmirl@gmail.com>
+ * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/* Jon's code is based on 6lowpan implementation for Contiki which is:
+ * Copyright (c) 2008, Swedish Institute of Computer Science.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the Institute nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __6LOWPAN_H__
+#define __6LOWPAN_H__
+
+/* need to know address length to manipulate with it */
+#define IEEE802154_ALEN 8
+
+#define UIP_802154_SHORTADDR_LEN 2 /* compressed ipv6 address length */
+#define UIP_IPH_LEN 40 /* ipv6 fixed header size */
+#define UIP_PROTO_UDP 17 /* ipv6 next header value for UDP */
+#define UIP_FRAGH_LEN 8 /* ipv6 fragment header size */
+
+/*
+ * ipv6 address based on mac
+ * second bit-flip (Universe/Local) is done according RFC2464
+ */
+#define is_addr_mac_addr_based(a, m) \
+ ((((a)->s6_addr[8]) == (((m)[0]) ^ 0x02)) && \
+ (((a)->s6_addr[9]) == (m)[1]) && \
+ (((a)->s6_addr[10]) == (m)[2]) && \
+ (((a)->s6_addr[11]) == (m)[3]) && \
+ (((a)->s6_addr[12]) == (m)[4]) && \
+ (((a)->s6_addr[13]) == (m)[5]) && \
+ (((a)->s6_addr[14]) == (m)[6]) && \
+ (((a)->s6_addr[15]) == (m)[7]))
+
+/* ipv6 address is unspecified */
+#define is_addr_unspecified(a) \
+ ((((a)->s6_addr32[0]) == 0) && \
+ (((a)->s6_addr32[1]) == 0) && \
+ (((a)->s6_addr32[2]) == 0) && \
+ (((a)->s6_addr32[3]) == 0))
+
+/* compare ipv6 addresses prefixes */
+#define ipaddr_prefixcmp(addr1, addr2, length) \
+ (memcmp(addr1, addr2, length >> 3) == 0)
+
+/* local link, i.e. FE80::/10 */
+#define is_addr_link_local(a) (((a)->s6_addr16[0]) == 0x80FE)
+
+/*
+ * check whether we can compress the IID to 16 bits,
+ * it's possible for unicast adresses with first 49 bits are zero only.
+ */
+#define lowpan_is_iid_16_bit_compressable(a) \
+ ((((a)->s6_addr16[4]) == 0) && \
+ (((a)->s6_addr16[5]) == 0) && \
+ (((a)->s6_addr16[6]) == 0) && \
+ ((((a)->s6_addr[14]) & 0x80) == 0))
+
+/* multicast address */
+#define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF)
+
+/* check whether the 112-bit gid of the multicast address is mappable to: */
+
+/* 9 bits, for FF02::1 (all nodes) and FF02::2 (all routers) addresses only. */
+#define lowpan_is_mcast_addr_compressable(a) \
+ ((((a)->s6_addr16[1]) == 0) && \
+ (((a)->s6_addr16[2]) == 0) && \
+ (((a)->s6_addr16[3]) == 0) && \
+ (((a)->s6_addr16[4]) == 0) && \
+ (((a)->s6_addr16[5]) == 0) && \
+ (((a)->s6_addr16[6]) == 0) && \
+ (((a)->s6_addr[14]) == 0) && \
+ ((((a)->s6_addr[15]) == 1) || (((a)->s6_addr[15]) == 2)))
+
+/* 48 bits, FFXX::00XX:XXXX:XXXX */
+#define lowpan_is_mcast_addr_compressable48(a) \
+ ((((a)->s6_addr16[1]) == 0) && \
+ (((a)->s6_addr16[2]) == 0) && \
+ (((a)->s6_addr16[3]) == 0) && \
+ (((a)->s6_addr16[4]) == 0) && \
+ (((a)->s6_addr[10]) == 0))
+
+/* 32 bits, FFXX::00XX:XXXX */
+#define lowpan_is_mcast_addr_compressable32(a) \
+ ((((a)->s6_addr16[1]) == 0) && \
+ (((a)->s6_addr16[2]) == 0) && \
+ (((a)->s6_addr16[3]) == 0) && \
+ (((a)->s6_addr16[4]) == 0) && \
+ (((a)->s6_addr16[5]) == 0) && \
+ (((a)->s6_addr[12]) == 0))
+
+/* 8 bits, FF02::00XX */
+#define lowpan_is_mcast_addr_compressable8(a) \
+ ((((a)->s6_addr[1]) == 2) && \
+ (((a)->s6_addr16[1]) == 0) && \
+ (((a)->s6_addr16[2]) == 0) && \
+ (((a)->s6_addr16[3]) == 0) && \
+ (((a)->s6_addr16[4]) == 0) && \
+ (((a)->s6_addr16[5]) == 0) && \
+ (((a)->s6_addr16[6]) == 0) && \
+ (((a)->s6_addr[14]) == 0))
+
+#define lowpan_is_addr_broadcast(a) \
+ ((((a)[0]) == 0xFF) && \
+ (((a)[1]) == 0xFF) && \
+ (((a)[2]) == 0xFF) && \
+ (((a)[3]) == 0xFF) && \
+ (((a)[4]) == 0xFF) && \
+ (((a)[5]) == 0xFF) && \
+ (((a)[6]) == 0xFF) && \
+ (((a)[7]) == 0xFF))
+
+#define LOWPAN_DISPATCH_IPV6 0x41 /* 01000001 = 65 */
+#define LOWPAN_DISPATCH_HC1 0x42 /* 01000010 = 66 */
+#define LOWPAN_DISPATCH_IPHC 0x60 /* 011xxxxx = ... */
+#define LOWPAN_DISPATCH_FRAG1 0xc0 /* 11000xxx */
+#define LOWPAN_DISPATCH_FRAGN 0xe0 /* 11100xxx */
+
+/*
+ * Values of fields within the IPHC encoding first byte
+ * (C stands for compressed and I for inline)
+ */
+#define LOWPAN_IPHC_TF 0x18
+
+#define LOWPAN_IPHC_FL_C 0x10
+#define LOWPAN_IPHC_TC_C 0x08
+#define LOWPAN_IPHC_NH_C 0x04
+#define LOWPAN_IPHC_TTL_1 0x01
+#define LOWPAN_IPHC_TTL_64 0x02
+#define LOWPAN_IPHC_TTL_255 0x03
+#define LOWPAN_IPHC_TTL_I 0x00
+
+
+/* Values of fields within the IPHC encoding second byte */
+#define LOWPAN_IPHC_CID 0x80
+
+#define LOWPAN_IPHC_SAC 0x40
+#define LOWPAN_IPHC_SAM_00 0x00
+#define LOWPAN_IPHC_SAM_01 0x10
+#define LOWPAN_IPHC_SAM_10 0x20
+#define LOWPAN_IPHC_SAM 0x30
+
+#define LOWPAN_IPHC_SAM_BIT 4
+
+#define LOWPAN_IPHC_M 0x08
+#define LOWPAN_IPHC_DAC 0x04
+#define LOWPAN_IPHC_DAM_00 0x00
+#define LOWPAN_IPHC_DAM_01 0x01
+#define LOWPAN_IPHC_DAM_10 0x02
+#define LOWPAN_IPHC_DAM_11 0x03
+
+#define LOWPAN_IPHC_DAM_BIT 0
+/*
+ * LOWPAN_UDP encoding (works together with IPHC)
+ */
+#define LOWPAN_NHC_UDP_MASK 0xF8
+#define LOWPAN_NHC_UDP_ID 0xF0
+#define LOWPAN_NHC_UDP_CHECKSUMC 0x04
+#define LOWPAN_NHC_UDP_CHECKSUMI 0x00
+
+/* values for port compression, _with checksum_ ie bit 5 set to 0 */
+#define LOWPAN_NHC_UDP_CS_P_00 0xF0 /* all inline */
+#define LOWPAN_NHC_UDP_CS_P_01 0xF1 /* source 16bit inline,
+ dest = 0xF0 + 8 bit inline */
+#define LOWPAN_NHC_UDP_CS_P_10 0xF2 /* source = 0xF0 + 8bit inline,
+ dest = 16 bit inline */
+#define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */
+
+#endif /* __6LOWPAN_H__ */
diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
index 1c1de97..7dee650 100644
--- a/net/ieee802154/Kconfig
+++ b/net/ieee802154/Kconfig
@@ -10,3 +10,9 @@ config IEEE802154
Say Y here to compile LR-WPAN support into the kernel or say M to
compile it as modules.
+
+config IEEE802154_6LOWPAN
+ tristate "6lowpan support over IEEE 802.15.4"
+ depends on IEEE802154 && IPV6
+ ---help---
+ IPv6 compression over IEEE 802.15.4.
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 5761185..d7716d6 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,3 +1,5 @@
-obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o
-ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o
-af_802154-y := af_ieee802154.o raw.o dgram.o
+obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o
+obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan.o
+
+ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o
+af_802154-y := af_ieee802154.o raw.o dgram.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1b745d4..dd2b947 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -466,8 +466,13 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
if (addr->sin_family != AF_INET) {
+ /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
+ * only if s_addr is INADDR_ANY.
+ */
err = -EAFNOSUPPORT;
- goto out;
+ if (addr->sin_family != AF_UNSPEC ||
+ addr->sin_addr.s_addr != htonl(INADDR_ANY))
+ goto out;
}
chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index bc19bd0..c6b5092 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -258,7 +258,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
ip_mc_up(in_dev);
/* we can receive as soon as ip_ptr is set -- do this last */
- rcu_assign_pointer(dev->ip_ptr, in_dev);
+ RCU_INIT_POINTER(dev->ip_ptr, in_dev);
out:
return in_dev;
out_kfree:
@@ -291,7 +291,7 @@ static void inetdev_destroy(struct in_device *in_dev)
inet_free_ifa(ifa);
}
- rcu_assign_pointer(dev->ip_ptr, NULL);
+ RCU_INIT_POINTER(dev->ip_ptr, NULL);
devinet_sysctl_unregister(in_dev);
neigh_parms_release(&arp_tbl, in_dev->arp_parms);
@@ -1175,7 +1175,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
switch (event) {
case NETDEV_REGISTER:
printk(KERN_DEBUG "inetdev_event: bug\n");
- rcu_assign_pointer(dev->ip_ptr, NULL);
+ RCU_INIT_POINTER(dev->ip_ptr, NULL);
break;
case NETDEV_UP:
if (!inetdev_valid_mtu(dev->mtu))
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 33e2c35..80106d8 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -142,6 +142,14 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
};
/* Release a nexthop info record */
+static void free_fib_info_rcu(struct rcu_head *head)
+{
+ struct fib_info *fi = container_of(head, struct fib_info, rcu);
+
+ if (fi->fib_metrics != (u32 *) dst_default_metrics)
+ kfree(fi->fib_metrics);
+ kfree(fi);
+}
void free_fib_info(struct fib_info *fi)
{
@@ -156,7 +164,7 @@ void free_fib_info(struct fib_info *fi)
} endfor_nexthops(fi);
fib_info_cnt--;
release_net(fi->fib_net);
- kfree_rcu(fi, rcu);
+ call_rcu(&fi->rcu, free_fib_info_rcu);
}
void fib_release_info(struct fib_info *fi)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index de9e297..89d6f71 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -204,7 +204,7 @@ static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node)
return (struct tnode *)(parent & ~NODE_TYPE_MASK);
}
-/* Same as rcu_assign_pointer
+/* Same as RCU_INIT_POINTER
* but that macro() assumes that value is a pointer.
*/
static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
@@ -528,7 +528,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *
if (n)
node_set_parent(n, tn);
- rcu_assign_pointer(tn->child[i], n);
+ RCU_INIT_POINTER(tn->child[i], n);
}
#define MAX_WORK 10
@@ -1014,7 +1014,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
tp = node_parent((struct rt_trie_node *) tn);
if (!tp)
- rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
+ RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn);
tnode_free_flush();
if (!tp)
@@ -1026,7 +1026,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
if (IS_TNODE(tn))
tn = (struct tnode *)resize(t, (struct tnode *)tn);
- rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
+ RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn);
tnode_free_flush();
}
@@ -1163,7 +1163,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
put_child(t, (struct tnode *)tp, cindex,
(struct rt_trie_node *)tn);
} else {
- rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
+ RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn);
tp = tn;
}
}
@@ -1621,7 +1621,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
put_child(t, (struct tnode *)tp, cindex, NULL);
trie_rebalance(t, tp);
} else
- rcu_assign_pointer(t->trie, NULL);
+ RCU_INIT_POINTER(t->trie, NULL);
free_leaf(l);
}
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index dbfc21d..8cb1ebb 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -34,7 +34,7 @@ int gre_add_protocol(const struct gre_protocol *proto, u8 version)
if (gre_proto[version])
goto err_out_unlock;
- rcu_assign_pointer(gre_proto[version], proto);
+ RCU_INIT_POINTER(gre_proto[version], proto);
spin_unlock(&gre_proto_lock);
return 0;
@@ -54,7 +54,7 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
if (rcu_dereference_protected(gre_proto[version],
lockdep_is_held(&gre_proto_lock)) != proto)
goto err_out_unlock;
- rcu_assign_pointer(gre_proto[version], NULL);
+ RCU_INIT_POINTER(gre_proto[version], NULL);
spin_unlock(&gre_proto_lock);
synchronize_rcu();
return 0;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f1d27f6..c7472ef 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -767,7 +767,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
break;
for (i=0; i<nsrcs; i++) {
/* skip inactive filters */
- if (pmc->sfcount[MCAST_INCLUDE] ||
+ if (psf->sf_count[MCAST_INCLUDE] ||
pmc->sfcount[MCAST_EXCLUDE] !=
psf->sf_count[MCAST_EXCLUDE])
continue;
@@ -1009,7 +1009,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr)
/* Checking for IFF_MULTICAST here is WRONG-WRONG-WRONG.
We will get multicast token leakage, when IFF_MULTICAST
- is changed. This check should be done in dev->set_multicast_list
+ is changed. This check should be done in ndo_set_rx_mode
routine. Something sort of:
if (dev->mc_list && dev->flags&IFF_MULTICAST) { do it; }
--ANK
@@ -1242,7 +1242,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
im->next_rcu = in_dev->mc_list;
in_dev->mc_count++;
- rcu_assign_pointer(in_dev->mc_list, im);
+ RCU_INIT_POINTER(in_dev->mc_list, im);
#ifdef CONFIG_IP_MULTICAST
igmpv3_del_delrec(in_dev, im->multiaddr);
@@ -1718,7 +1718,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
pmc->sfcount[sfmode]--;
for (j=0; j<i; j++)
- (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[i]);
+ (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]);
} else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) {
#ifdef CONFIG_IP_MULTICAST
struct ip_sf_list *psf;
@@ -1813,7 +1813,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
iml->next_rcu = inet->mc_list;
iml->sflist = NULL;
iml->sfmode = MCAST_EXCLUDE;
- rcu_assign_pointer(inet->mc_list, iml);
+ RCU_INIT_POINTER(inet->mc_list, iml);
ip_mc_inc_group(in_dev, addr);
err = 0;
done:
@@ -1835,7 +1835,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
}
err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
iml->sfmode, psf->sl_count, psf->sl_addr, 0);
- rcu_assign_pointer(iml->sflist, NULL);
+ RCU_INIT_POINTER(iml->sflist, NULL);
/* decrease mem now to avoid the memleak warning */
atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
kfree_rcu(psf, rcu);
@@ -2000,7 +2000,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
}
- rcu_assign_pointer(pmc->sflist, newpsl);
+ RCU_INIT_POINTER(pmc->sflist, newpsl);
psl = newpsl;
}
rv = 1; /* > 0 for insert logic below if sl_count is 0 */
@@ -2103,7 +2103,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
} else
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
0, NULL, 0);
- rcu_assign_pointer(pmc->sflist, newpsl);
+ RCU_INIT_POINTER(pmc->sflist, newpsl);
pmc->sfmode = msf->imsf_fmode;
err = 0;
done:
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 3c0369a..984ec65 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -21,6 +21,7 @@
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
+#include <net/secure_seq.h>
#include <net/ip.h>
/*
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index ef7ae60..8e6be5a 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -433,7 +433,7 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
if (!lro_mgr->get_frag_header ||
lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
(void *)&tcph, &flags, priv)) {
- mac_hdr = page_address(frags->page) + frags->page_offset;
+ mac_hdr = skb_frag_address(frags);
goto out1;
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index e382138..86f13c67 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -19,6 +19,7 @@
#include <linux/net.h>
#include <net/ip.h>
#include <net/inetpeer.h>
+#include <net/secure_seq.h>
/*
* Theory of operations.
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ccaaa85..ae3bb14 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -122,6 +122,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb)
newskb->pkt_type = PACKET_LOOPBACK;
newskb->ip_summed = CHECKSUM_UNNECESSARY;
WARN_ON(!skb_dst(newskb));
+ skb_dst_force(newskb);
netif_rx_ni(newskb);
return 0;
}
@@ -204,9 +205,15 @@ static inline int ip_finish_output2(struct sk_buff *skb)
skb = skb2;
}
+ rcu_read_lock();
neigh = dst_get_neighbour(dst);
- if (neigh)
- return neigh_output(neigh, skb);
+ if (neigh) {
+ int res = neigh_output(neigh, skb);
+
+ rcu_read_unlock();
+ return res;
+ }
+ rcu_read_unlock();
if (net_ratelimit())
printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
@@ -982,13 +989,13 @@ alloc_new_skb:
if (page && (left = PAGE_SIZE - off) > 0) {
if (copy >= left)
copy = left;
- if (page != frag->page) {
+ if (page != skb_frag_page(frag)) {
if (i == MAX_SKB_FRAGS) {
err = -EMSGSIZE;
goto error;
}
- get_page(page);
skb_fill_page_desc(skb, i, page, off, 0);
+ skb_frag_ref(skb, i);
frag = &skb_shinfo(skb)->frags[i];
}
} else if (i < MAX_SKB_FRAGS) {
@@ -1008,7 +1015,8 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
+ if (getfrag(from, skb_frag_address(frag)+frag->size,
+ offset, copy, skb->len, skb) < 0) {
err = -EFAULT;
goto error;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ab0c9ef..8905e92 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1067,7 +1067,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
*/
static int do_ip_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+ char __user *optval, int __user *optlen, unsigned flags)
{
struct inet_sock *inet = inet_sk(sk);
int val;
@@ -1240,7 +1240,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
msg.msg_control = optval;
msg.msg_controllen = len;
- msg.msg_flags = 0;
+ msg.msg_flags = flags;
if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
struct in_pktinfo info;
@@ -1294,7 +1294,7 @@ int ip_getsockopt(struct sock *sk, int level,
{
int err;
- err = do_ip_getsockopt(sk, level, optname, optval, optlen);
+ err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
@@ -1327,7 +1327,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
return compat_mc_getsockopt(sk, level, optname, optval, optlen,
ip_getsockopt);
- err = do_ip_getsockopt(sk, level, optname, optval, optlen);
+ err = do_ip_getsockopt(sk, level, optname, optval, optlen,
+ MSG_CMSG_COMPAT);
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 378b20b..065effd 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -231,7 +231,7 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
(iter = rtnl_dereference(*tp)) != NULL;
tp = &iter->next) {
if (t == iter) {
- rcu_assign_pointer(*tp, t->next);
+ RCU_INIT_POINTER(*tp, t->next);
break;
}
}
@@ -241,8 +241,8 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
{
struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
- rcu_assign_pointer(t->next, rtnl_dereference(*tp));
- rcu_assign_pointer(*tp, t);
+ RCU_INIT_POINTER(t->next, rtnl_dereference(*tp));
+ RCU_INIT_POINTER(*tp, t);
}
static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
@@ -301,7 +301,7 @@ static void ipip_tunnel_uninit(struct net_device *dev)
struct ipip_net *ipn = net_generic(net, ipip_net_id);
if (dev == ipn->fb_tunnel_dev)
- rcu_assign_pointer(ipn->tunnels_wc[0], NULL);
+ RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
else
ipip_tunnel_unlink(ipn, netdev_priv(dev));
dev_put(dev);
@@ -791,7 +791,7 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
return -ENOMEM;
dev_hold(dev);
- rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
+ RCU_INIT_POINTER(ipn->tunnels_wc[0], tunnel);
return 0;
}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 58e8791..6164e98 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1176,7 +1176,7 @@ static void mrtsock_destruct(struct sock *sk)
ipmr_for_each_table(mrt, net) {
if (sk == rtnl_dereference(mrt->mroute_sk)) {
IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
- rcu_assign_pointer(mrt->mroute_sk, NULL);
+ RCU_INIT_POINTER(mrt->mroute_sk, NULL);
mroute_clean_tables(mrt);
}
}
@@ -1203,7 +1203,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
return -ENOENT;
if (optname != MRT_INIT) {
- if (sk != rcu_dereference_raw(mrt->mroute_sk) &&
+ if (sk != rcu_access_pointer(mrt->mroute_sk) &&
!capable(CAP_NET_ADMIN))
return -EACCES;
}
@@ -1224,13 +1224,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
ret = ip_ra_control(sk, 1, mrtsock_destruct);
if (ret == 0) {
- rcu_assign_pointer(mrt->mroute_sk, sk);
+ RCU_INIT_POINTER(mrt->mroute_sk, sk);
IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
}
rtnl_unlock();
return ret;
case MRT_DONE:
- if (sk != rcu_dereference_raw(mrt->mroute_sk))
+ if (sk != rcu_access_pointer(mrt->mroute_sk))
return -EACCES;
return ip_ra_control(sk, 0, NULL);
case MRT_ADD_VIF:
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 2e97e3e..929b27b 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -18,17 +18,15 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
struct rtable *rt;
struct flowi4 fl4 = {};
__be32 saddr = iph->saddr;
- __u8 flags = 0;
+ __u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
unsigned int hh_len;
- if (!skb->sk && addr_type != RTN_LOCAL) {
- if (addr_type == RTN_UNSPEC)
- addr_type = inet_addr_type(net, saddr);
- if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
- flags |= FLOWI_FLAG_ANYSRC;
- else
- saddr = 0;
- }
+ if (addr_type == RTN_UNSPEC)
+ addr_type = inet_addr_type(net, saddr);
+ if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
+ flags |= FLOWI_FLAG_ANYSRC;
+ else
+ saddr = 0;
/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
* packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
@@ -38,7 +36,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
fl4.flowi4_tos = RT_TOS(iph->tos);
fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
fl4.flowi4_mark = skb->mark;
- fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags;
+ fl4.flowi4_flags = flags;
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
return -1;
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 5c9b9d9..e59aabd 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
return skb;
nlmsg_failure:
+ kfree_skb(skb);
*errp = -EINVAL;
printk(KERN_ERR "ip_queue: error creating packet message\n");
return NULL;
@@ -313,7 +314,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
{
struct nf_queue_entry *entry;
- if (vmsg->value > NF_MAX_VERDICT)
+ if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
return -EINVAL;
entry = ipq_find_dequeue_entry(vmsg->id);
@@ -358,12 +359,9 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg,
break;
case IPQM_VERDICT:
- if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
- status = -EINVAL;
- else
- status = ipq_set_verdict(&pmsg->msg.verdict,
- len - sizeof(*pmsg));
- break;
+ status = ipq_set_verdict(&pmsg->msg.verdict,
+ len - sizeof(*pmsg));
+ break;
default:
status = -EINVAL;
}
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index 703f366f..7b22382 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -70,14 +70,14 @@ static unsigned int help(struct sk_buff *skb,
static void __exit nf_nat_amanda_fini(void)
{
- rcu_assign_pointer(nf_nat_amanda_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_amanda_hook, NULL);
synchronize_rcu();
}
static int __init nf_nat_amanda_init(void)
{
BUG_ON(nf_nat_amanda_hook != NULL);
- rcu_assign_pointer(nf_nat_amanda_hook, help);
+ RCU_INIT_POINTER(nf_nat_amanda_hook, help);
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 3346de5..447bc5c 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -514,7 +514,7 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
ret = -EBUSY;
goto out;
}
- rcu_assign_pointer(nf_nat_protos[proto->protonum], proto);
+ RCU_INIT_POINTER(nf_nat_protos[proto->protonum], proto);
out:
spin_unlock_bh(&nf_nat_lock);
return ret;
@@ -525,7 +525,7 @@ EXPORT_SYMBOL(nf_nat_protocol_register);
void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
{
spin_lock_bh(&nf_nat_lock);
- rcu_assign_pointer(nf_nat_protos[proto->protonum],
+ RCU_INIT_POINTER(nf_nat_protos[proto->protonum],
&nf_nat_unknown_protocol);
spin_unlock_bh(&nf_nat_lock);
synchronize_rcu();
@@ -736,10 +736,10 @@ static int __init nf_nat_init(void)
/* Sew in builtin protocols. */
spin_lock_bh(&nf_nat_lock);
for (i = 0; i < MAX_IP_NAT_PROTO; i++)
- rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol);
- rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
- rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
- rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
+ RCU_INIT_POINTER(nf_nat_protos[i], &nf_nat_unknown_protocol);
+ RCU_INIT_POINTER(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
+ RCU_INIT_POINTER(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
+ RCU_INIT_POINTER(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
spin_unlock_bh(&nf_nat_lock);
/* Initialize fake conntrack so that NAT will skip it */
@@ -748,12 +748,12 @@ static int __init nf_nat_init(void)
l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
BUG_ON(nf_nat_seq_adjust_hook != NULL);
- rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
+ RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
- rcu_assign_pointer(nfnetlink_parse_nat_setup_hook,
+ RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook,
nfnetlink_parse_nat_setup);
BUG_ON(nf_ct_nat_offset != NULL);
- rcu_assign_pointer(nf_ct_nat_offset, nf_nat_get_offset);
+ RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset);
return 0;
cleanup_extend:
@@ -766,9 +766,9 @@ static void __exit nf_nat_cleanup(void)
unregister_pernet_subsys(&nf_nat_net_ops);
nf_ct_l3proto_put(l3proto);
nf_ct_extend_unregister(&nat_extend);
- rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
- rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL);
- rcu_assign_pointer(nf_ct_nat_offset, NULL);
+ RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL);
+ RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
+ RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
synchronize_net();
}
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index dc73abb..e462a95 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -113,14 +113,14 @@ out:
static void __exit nf_nat_ftp_fini(void)
{
- rcu_assign_pointer(nf_nat_ftp_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_ftp_hook, NULL);
synchronize_rcu();
}
static int __init nf_nat_ftp_init(void)
{
BUG_ON(nf_nat_ftp_hook != NULL);
- rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp);
+ RCU_INIT_POINTER(nf_nat_ftp_hook, nf_nat_ftp);
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 790f316..b9a1136 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -581,30 +581,30 @@ static int __init init(void)
BUG_ON(nat_callforwarding_hook != NULL);
BUG_ON(nat_q931_hook != NULL);
- rcu_assign_pointer(set_h245_addr_hook, set_h245_addr);
- rcu_assign_pointer(set_h225_addr_hook, set_h225_addr);
- rcu_assign_pointer(set_sig_addr_hook, set_sig_addr);
- rcu_assign_pointer(set_ras_addr_hook, set_ras_addr);
- rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp);
- rcu_assign_pointer(nat_t120_hook, nat_t120);
- rcu_assign_pointer(nat_h245_hook, nat_h245);
- rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding);
- rcu_assign_pointer(nat_q931_hook, nat_q931);
+ RCU_INIT_POINTER(set_h245_addr_hook, set_h245_addr);
+ RCU_INIT_POINTER(set_h225_addr_hook, set_h225_addr);
+ RCU_INIT_POINTER(set_sig_addr_hook, set_sig_addr);
+ RCU_INIT_POINTER(set_ras_addr_hook, set_ras_addr);
+ RCU_INIT_POINTER(nat_rtp_rtcp_hook, nat_rtp_rtcp);
+ RCU_INIT_POINTER(nat_t120_hook, nat_t120);
+ RCU_INIT_POINTER(nat_h245_hook, nat_h245);
+ RCU_INIT_POINTER(nat_callforwarding_hook, nat_callforwarding);
+ RCU_INIT_POINTER(nat_q931_hook, nat_q931);
return 0;
}
/****************************************************************************/
static void __exit fini(void)
{
- rcu_assign_pointer(set_h245_addr_hook, NULL);
- rcu_assign_pointer(set_h225_addr_hook, NULL);
- rcu_assign_pointer(set_sig_addr_hook, NULL);
- rcu_assign_pointer(set_ras_addr_hook, NULL);
- rcu_assign_pointer(nat_rtp_rtcp_hook, NULL);
- rcu_assign_pointer(nat_t120_hook, NULL);
- rcu_assign_pointer(nat_h245_hook, NULL);
- rcu_assign_pointer(nat_callforwarding_hook, NULL);
- rcu_assign_pointer(nat_q931_hook, NULL);
+ RCU_INIT_POINTER(set_h245_addr_hook, NULL);
+ RCU_INIT_POINTER(set_h225_addr_hook, NULL);
+ RCU_INIT_POINTER(set_sig_addr_hook, NULL);
+ RCU_INIT_POINTER(set_ras_addr_hook, NULL);
+ RCU_INIT_POINTER(nat_rtp_rtcp_hook, NULL);
+ RCU_INIT_POINTER(nat_t120_hook, NULL);
+ RCU_INIT_POINTER(nat_h245_hook, NULL);
+ RCU_INIT_POINTER(nat_callforwarding_hook, NULL);
+ RCU_INIT_POINTER(nat_q931_hook, NULL);
synchronize_rcu();
}
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index 535e1a8..979ae16 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -75,14 +75,14 @@ static unsigned int help(struct sk_buff *skb,
static void __exit nf_nat_irc_fini(void)
{
- rcu_assign_pointer(nf_nat_irc_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_irc_hook, NULL);
synchronize_rcu();
}
static int __init nf_nat_irc_init(void)
{
BUG_ON(nf_nat_irc_hook != NULL);
- rcu_assign_pointer(nf_nat_irc_hook, help);
+ RCU_INIT_POINTER(nf_nat_irc_hook, help);
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 4c06003..3e8284b 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -282,25 +282,25 @@ static int __init nf_nat_helper_pptp_init(void)
nf_nat_need_gre();
BUG_ON(nf_nat_pptp_hook_outbound != NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
BUG_ON(nf_nat_pptp_hook_inbound != NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_inbound, pptp_inbound_pkt);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, pptp_inbound_pkt);
BUG_ON(nf_nat_pptp_hook_exp_gre != NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, pptp_exp_gre);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, pptp_exp_gre);
BUG_ON(nf_nat_pptp_hook_expectfn != NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_expectfn, pptp_nat_expected);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, pptp_nat_expected);
return 0;
}
static void __exit nf_nat_helper_pptp_fini(void)
{
- rcu_assign_pointer(nf_nat_pptp_hook_expectfn, NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_inbound, NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_outbound, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, NULL);
synchronize_rcu();
}
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index 3e61faf..f52d41e 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -12,6 +12,7 @@
#include <linux/ip.h>
#include <linux/netfilter.h>
+#include <net/secure_seq.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_rule.h>
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index e40cf78..78844d9 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -528,13 +528,13 @@ err1:
static void __exit nf_nat_sip_fini(void)
{
- rcu_assign_pointer(nf_nat_sip_hook, NULL);
- rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, NULL);
- rcu_assign_pointer(nf_nat_sip_expect_hook, NULL);
- rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL);
- rcu_assign_pointer(nf_nat_sdp_port_hook, NULL);
- rcu_assign_pointer(nf_nat_sdp_session_hook, NULL);
- rcu_assign_pointer(nf_nat_sdp_media_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_sip_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL);
synchronize_rcu();
}
@@ -547,13 +547,13 @@ static int __init nf_nat_sip_init(void)
BUG_ON(nf_nat_sdp_port_hook != NULL);
BUG_ON(nf_nat_sdp_session_hook != NULL);
BUG_ON(nf_nat_sdp_media_hook != NULL);
- rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip);
- rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust);
- rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect);
- rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
- rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port);
- rcu_assign_pointer(nf_nat_sdp_session_hook, ip_nat_sdp_session);
- rcu_assign_pointer(nf_nat_sdp_media_hook, ip_nat_sdp_media);
+ RCU_INIT_POINTER(nf_nat_sip_hook, ip_nat_sip);
+ RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust);
+ RCU_INIT_POINTER(nf_nat_sip_expect_hook, ip_nat_sip_expect);
+ RCU_INIT_POINTER(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
+ RCU_INIT_POINTER(nf_nat_sdp_port_hook, ip_nat_sdp_port);
+ RCU_INIT_POINTER(nf_nat_sdp_session_hook, ip_nat_sdp_session);
+ RCU_INIT_POINTER(nf_nat_sdp_media_hook, ip_nat_sdp_media);
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 076b7c8..d1cb412 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1310,7 +1310,7 @@ static int __init nf_nat_snmp_basic_init(void)
int ret = 0;
BUG_ON(nf_nat_snmp_hook != NULL);
- rcu_assign_pointer(nf_nat_snmp_hook, help);
+ RCU_INIT_POINTER(nf_nat_snmp_hook, help);
ret = nf_conntrack_helper_register(&snmp_trap_helper);
if (ret < 0) {
@@ -1322,7 +1322,7 @@ static int __init nf_nat_snmp_basic_init(void)
static void __exit nf_nat_snmp_basic_fini(void)
{
- rcu_assign_pointer(nf_nat_snmp_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
nf_conntrack_helper_unregister(&snmp_trap_helper);
}
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index a6e606e..9290048 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -284,7 +284,7 @@ static int __init nf_nat_standalone_init(void)
#ifdef CONFIG_XFRM
BUG_ON(ip_nat_decode_session != NULL);
- rcu_assign_pointer(ip_nat_decode_session, nat_decode_session);
+ RCU_INIT_POINTER(ip_nat_decode_session, nat_decode_session);
#endif
ret = nf_nat_rule_init();
if (ret < 0) {
@@ -302,7 +302,7 @@ static int __init nf_nat_standalone_init(void)
nf_nat_rule_cleanup();
cleanup_decode_session:
#ifdef CONFIG_XFRM
- rcu_assign_pointer(ip_nat_decode_session, NULL);
+ RCU_INIT_POINTER(ip_nat_decode_session, NULL);
synchronize_net();
#endif
return ret;
@@ -313,7 +313,7 @@ static void __exit nf_nat_standalone_fini(void)
nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
nf_nat_rule_cleanup();
#ifdef CONFIG_XFRM
- rcu_assign_pointer(ip_nat_decode_session, NULL);
+ RCU_INIT_POINTER(ip_nat_decode_session, NULL);
synchronize_net();
#endif
/* Conntrack caches are unregistered in nf_conntrack_cleanup */
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index 7274a43..a2901bf 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -36,14 +36,14 @@ static unsigned int help(struct sk_buff *skb,
static void __exit nf_nat_tftp_fini(void)
{
- rcu_assign_pointer(nf_nat_tftp_hook, NULL);
+ RCU_INIT_POINTER(nf_nat_tftp_hook, NULL);
synchronize_rcu();
}
static int __init nf_nat_tftp_init(void)
{
BUG_ON(nf_nat_tftp_hook != NULL);
- rcu_assign_pointer(nf_nat_tftp_hook, help);
+ RCU_INIT_POINTER(nf_nat_tftp_hook, help);
return 0;
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index b14ec7d..4bfad5d 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -254,6 +254,8 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW),
+ SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES),
+ SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 1457acb..61714bd 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -563,7 +563,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE,
inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
- FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0);
+ inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP,
+ daddr, saddr, 0, 0);
if (!inet->hdrincl) {
err = raw_probe_proto_opt(&fl4, msg);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1730689..26c77e1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -109,6 +109,7 @@
#include <linux/sysctl.h>
#endif
#include <net/atmclip.h>
+#include <net/secure_seq.h>
#define RT_FL_TOS(oldflp4) \
((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
@@ -119,7 +120,6 @@
static int ip_rt_max_size;
static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
-static int ip_rt_gc_interval __read_mostly = 60 * HZ;
static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
static int ip_rt_redirect_number __read_mostly = 9;
static int ip_rt_redirect_load __read_mostly = HZ / 50;
@@ -323,7 +323,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
struct rtable *r = NULL;
for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
- if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain))
+ if (!rcu_access_pointer(rt_hash_table[st->bucket].chain))
continue;
rcu_read_lock_bh();
r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
@@ -349,7 +349,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
do {
if (--st->bucket < 0)
return NULL;
- } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain));
+ } while (!rcu_access_pointer(rt_hash_table[st->bucket].chain));
rcu_read_lock_bh();
r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
}
@@ -721,7 +721,7 @@ static inline bool compare_hash_inputs(const struct rtable *rt1,
{
return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
- (rt1->rt_iif ^ rt2->rt_iif)) == 0);
+ (rt1->rt_route_iif ^ rt2->rt_route_iif)) == 0);
}
static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
@@ -730,8 +730,8 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
(rt1->rt_mark ^ rt2->rt_mark) |
(rt1->rt_key_tos ^ rt2->rt_key_tos) |
- (rt1->rt_oif ^ rt2->rt_oif) |
- (rt1->rt_iif ^ rt2->rt_iif)) == 0;
+ (rt1->rt_route_iif ^ rt2->rt_route_iif) |
+ (rt1->rt_oif ^ rt2->rt_oif)) == 0;
}
static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
@@ -760,7 +760,7 @@ static void rt_do_flush(struct net *net, int process_context)
if (process_context && need_resched())
cond_resched();
- rth = rcu_dereference_raw(rt_hash_table[i].chain);
+ rth = rcu_access_pointer(rt_hash_table[i].chain);
if (!rth)
continue;
@@ -1628,16 +1628,18 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
{
struct rtable *rt = (struct rtable *) dst;
__be32 orig_gw = rt->rt_gateway;
- struct neighbour *n;
+ struct neighbour *n, *old_n;
dst_confirm(&rt->dst);
- neigh_release(dst_get_neighbour(&rt->dst));
- dst_set_neighbour(&rt->dst, NULL);
-
rt->rt_gateway = peer->redirect_learned.a4;
- rt_bind_neighbour(rt);
- n = dst_get_neighbour(&rt->dst);
+
+ n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
+ if (IS_ERR(n))
+ return PTR_ERR(n);
+ old_n = xchg(&rt->dst._neighbour, n);
+ if (old_n)
+ neigh_release(old_n);
if (!n || !(n->nud_state & NUD_VALID)) {
if (n)
neigh_event_send(n, NULL);
@@ -2317,8 +2319,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth = rcu_dereference(rth->dst.rt_next)) {
if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) |
((__force u32)rth->rt_key_src ^ (__force u32)saddr) |
- (rth->rt_iif ^ iif) |
- rth->rt_oif |
+ (rth->rt_route_iif ^ iif) |
(rth->rt_key_tos ^ tos)) == 0 &&
rth->rt_mark == skb->mark &&
net_eq(dev_net(rth->dst.dev), net) &&
@@ -3119,13 +3120,6 @@ static ctl_table ipv4_route_table[] = {
.proc_handler = proc_dointvec_jiffies,
},
{
- .procname = "gc_interval",
- .data = &ip_rt_gc_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
.procname = "redirect_load",
.data = &ip_rt_redirect_load,
.maxlen = sizeof(int),
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 92bb943..3bc5c8f 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -276,7 +276,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
int mss;
struct rtable *rt;
__u8 rcv_wscale;
- bool ecn_ok;
+ bool ecn_ok = false;
if (!sysctl_tcp_syncookies || !th->ack || th->rst)
goto out;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 46febca..4c0da24 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -524,7 +524,7 @@ EXPORT_SYMBOL(tcp_ioctl);
static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
{
- TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
tp->pushed_seq = tp->write_seq;
}
@@ -540,7 +540,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
skb->csum = 0;
tcb->seq = tcb->end_seq = tp->write_seq;
- tcb->flags = TCPHDR_ACK;
+ tcb->tcp_flags = TCPHDR_ACK;
tcb->sacked = 0;
skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
@@ -830,7 +830,7 @@ new_segment:
skb_shinfo(skb)->gso_segs = 0;
if (!copied)
- TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
copied += copy;
poffset += copy;
@@ -1074,7 +1074,7 @@ new_segment:
}
if (!copied)
- TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
tp->write_seq += copy;
TCP_SKB_CB(skb)->end_seq += copy;
@@ -2455,8 +2455,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
}
- if (tp->ecn_flags&TCP_ECN_OK)
+ if (tp->ecn_flags & TCP_ECN_OK)
info->tcpi_options |= TCPI_OPT_ECN;
+ if (tp->ecn_flags & TCP_ECN_SEEN)
+ info->tcpi_options |= TCPI_OPT_ECN_SEEN;
info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
@@ -2857,26 +2859,25 @@ EXPORT_SYMBOL(tcp_gro_complete);
#ifdef CONFIG_TCP_MD5SIG
static unsigned long tcp_md5sig_users;
-static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool;
+static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool;
static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
-static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool)
+static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
{
int cpu;
+
for_each_possible_cpu(cpu) {
- struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu);
- if (p) {
- if (p->md5_desc.tfm)
- crypto_free_hash(p->md5_desc.tfm);
- kfree(p);
- }
+ struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu);
+
+ if (p->md5_desc.tfm)
+ crypto_free_hash(p->md5_desc.tfm);
}
free_percpu(pool);
}
void tcp_free_md5sig_pool(void)
{
- struct tcp_md5sig_pool * __percpu *pool = NULL;
+ struct tcp_md5sig_pool __percpu *pool = NULL;
spin_lock_bh(&tcp_md5sig_pool_lock);
if (--tcp_md5sig_users == 0) {
@@ -2889,30 +2890,24 @@ void tcp_free_md5sig_pool(void)
}
EXPORT_SYMBOL(tcp_free_md5sig_pool);
-static struct tcp_md5sig_pool * __percpu *
+static struct tcp_md5sig_pool __percpu *
__tcp_alloc_md5sig_pool(struct sock *sk)
{
int cpu;
- struct tcp_md5sig_pool * __percpu *pool;
+ struct tcp_md5sig_pool __percpu *pool;
- pool = alloc_percpu(struct tcp_md5sig_pool *);
+ pool = alloc_percpu(struct tcp_md5sig_pool);
if (!pool)
return NULL;
for_each_possible_cpu(cpu) {
- struct tcp_md5sig_pool *p;
struct crypto_hash *hash;
- p = kzalloc(sizeof(*p), sk->sk_allocation);
- if (!p)
- goto out_free;
- *per_cpu_ptr(pool, cpu) = p;
-
hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
if (!hash || IS_ERR(hash))
goto out_free;
- p->md5_desc.tfm = hash;
+ per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
}
return pool;
out_free:
@@ -2920,9 +2915,9 @@ out_free:
return NULL;
}
-struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
+struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
{
- struct tcp_md5sig_pool * __percpu *pool;
+ struct tcp_md5sig_pool __percpu *pool;
int alloc = 0;
retry:
@@ -2941,7 +2936,7 @@ retry:
if (alloc) {
/* we cannot hold spinlock here because this may sleep. */
- struct tcp_md5sig_pool * __percpu *p;
+ struct tcp_md5sig_pool __percpu *p;
p = __tcp_alloc_md5sig_pool(sk);
spin_lock_bh(&tcp_md5sig_pool_lock);
@@ -2974,7 +2969,7 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
*/
struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
{
- struct tcp_md5sig_pool * __percpu *p;
+ struct tcp_md5sig_pool __percpu *p;
local_bh_disable();
@@ -2985,7 +2980,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
spin_unlock(&tcp_md5sig_pool_lock);
if (p)
- return *this_cpu_ptr(p);
+ return this_cpu_ptr(p);
local_bh_enable();
return NULL;
@@ -3035,7 +3030,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
for (i = 0; i < shi->nr_frags; ++i) {
const struct skb_frag_struct *f = &shi->frags[i];
- sg_set_page(&sg, f->page, f->size, f->page_offset);
+ struct page *page = skb_frag_page(f);
+ sg_set_page(&sg, page, f->size, f->page_offset);
if (crypto_hash_update(desc, &sg, f->size))
return 1;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ea0d218..81cae64 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -217,16 +217,25 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
}
-static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
+static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
{
- if (tp->ecn_flags & TCP_ECN_OK) {
- if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
- tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+ if (!(tp->ecn_flags & TCP_ECN_OK))
+ return;
+
+ switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
+ case INET_ECN_NOT_ECT:
/* Funny extension: if ECT is not set on a segment,
- * it is surely retransmit. It is not in ECN RFC,
- * but Linux follows this rule. */
- else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
+ * and we already seen ECT on a previous segment,
+ * it is probably a retransmit.
+ */
+ if (tp->ecn_flags & TCP_ECN_SEEN)
tcp_enter_quickack_mode((struct sock *)tp);
+ break;
+ case INET_ECN_CE:
+ tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+ /* fallinto */
+ default:
+ tp->ecn_flags |= TCP_ECN_SEEN;
}
}
@@ -1124,7 +1133,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
return 0;
/* ...Then it's D-SACK, and must reside below snd_una completely */
- if (!after(end_seq, tp->snd_una))
+ if (after(end_seq, tp->snd_una))
return 0;
if (!before(start_seq, tp->undo_marker))
@@ -1389,9 +1398,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
BUG_ON(!pcount);
- /* Tweak before seqno plays */
- if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint &&
- !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq))
+ if (skb == tp->lost_skb_hint)
tp->lost_cnt_hint += pcount;
TCP_SKB_CB(prev)->end_seq += shifted;
@@ -1440,7 +1447,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
tp->lost_cnt_hint -= tcp_skb_pcount(prev);
}
- TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags;
+ TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;
if (skb == tcp_highest_sack(sk))
tcp_advance_highest_sack(sk, skb);
@@ -2830,9 +2837,13 @@ static int tcp_try_undo_loss(struct sock *sk)
static inline void tcp_complete_cwr(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- /* Do not moderate cwnd if it's already undone in cwr or recovery */
- if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) {
- tp->snd_cwnd = tp->snd_ssthresh;
+
+ /* Do not moderate cwnd if it's already undone in cwr or recovery. */
+ if (tp->undo_marker) {
+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR)
+ tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+ else /* PRR */
+ tp->snd_cwnd = tp->snd_ssthresh;
tp->snd_cwnd_stamp = tcp_time_stamp;
}
tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
@@ -2950,6 +2961,38 @@ void tcp_simple_retransmit(struct sock *sk)
}
EXPORT_SYMBOL(tcp_simple_retransmit);
+/* This function implements the PRR algorithm, specifcally the PRR-SSRB
+ * (proportional rate reduction with slow start reduction bound) as described in
+ * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt.
+ * It computes the number of packets to send (sndcnt) based on packets newly
+ * delivered:
+ * 1) If the packets in flight is larger than ssthresh, PRR spreads the
+ * cwnd reductions across a full RTT.
+ * 2) If packets in flight is lower than ssthresh (such as due to excess
+ * losses and/or application stalls), do not perform any further cwnd
+ * reductions, but instead slow start up to ssthresh.
+ */
+static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
+ int fast_rexmit, int flag)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int sndcnt = 0;
+ int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
+
+ if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
+ u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
+ tp->prior_cwnd - 1;
+ sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
+ } else {
+ sndcnt = min_t(int, delta,
+ max_t(int, tp->prr_delivered - tp->prr_out,
+ newly_acked_sacked) + 1);
+ }
+
+ sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
+ tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
+}
+
/* Process an event, which can update packets-in-flight not trivially.
* Main goal of this function is to calculate new estimate for left_out,
* taking into account both packets sitting in receiver's buffer and
@@ -2961,7 +3004,8 @@ EXPORT_SYMBOL(tcp_simple_retransmit);
* It does _not_ decide what to send, it is made in function
* tcp_xmit_retransmit_queue().
*/
-static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
+static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
+ int newly_acked_sacked, int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -3111,13 +3155,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
tp->bytes_acked = 0;
tp->snd_cwnd_cnt = 0;
+ tp->prior_cwnd = tp->snd_cwnd;
+ tp->prr_delivered = 0;
+ tp->prr_out = 0;
tcp_set_ca_state(sk, TCP_CA_Recovery);
fast_rexmit = 1;
}
if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
tcp_update_scoreboard(sk, fast_rexmit);
- tcp_cwnd_down(sk, flag);
+ tp->prr_delivered += newly_acked_sacked;
+ tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
tcp_xmit_retransmit_queue(sk);
}
@@ -3298,7 +3346,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
* connection startup slow start one packet too
* quickly. This is severely frowned upon behavior.
*/
- if (!(scb->flags & TCPHDR_SYN)) {
+ if (!(scb->tcp_flags & TCPHDR_SYN)) {
flag |= FLAG_DATA_ACKED;
} else {
flag |= FLAG_SYN_ACKED;
@@ -3632,6 +3680,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
u32 prior_in_flight;
u32 prior_fackets;
int prior_packets;
+ int prior_sacked = tp->sacked_out;
+ int newly_acked_sacked = 0;
int frto_cwnd = 0;
/* If the ack is older than previous acks
@@ -3703,6 +3753,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
/* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
+ newly_acked_sacked = (prior_packets - prior_sacked) -
+ (tp->packets_out - tp->sacked_out);
+
if (tp->frto_counter)
frto_cwnd = tcp_process_frto(sk, flag);
/* Guarantee sacktag reordering detection against wrap-arounds */
@@ -3715,7 +3768,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
tcp_may_raise_cwnd(sk, flag))
tcp_cong_avoid(sk, ack, prior_in_flight);
tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
- flag);
+ newly_acked_sacked, flag);
} else {
if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
tcp_cong_avoid(sk, ack, prior_in_flight);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 955b8e6..48da7cc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -72,6 +72,7 @@
#include <net/timewait_sock.h>
#include <net/xfrm.h>
#include <net/netdma.h>
+#include <net/secure_seq.h>
#include <linux/inet.h>
#include <linux/ipv6.h>
@@ -807,20 +808,38 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
kfree(inet_rsk(req)->opt);
}
-static void syn_flood_warning(const struct sk_buff *skb)
+/*
+ * Return 1 if a syncookie should be sent
+ */
+int tcp_syn_flood_action(struct sock *sk,
+ const struct sk_buff *skb,
+ const char *proto)
{
- const char *msg;
+ const char *msg = "Dropping request";
+ int want_cookie = 0;
+ struct listen_sock *lopt;
+
+
#ifdef CONFIG_SYN_COOKIES
- if (sysctl_tcp_syncookies)
+ if (sysctl_tcp_syncookies) {
msg = "Sending cookies";
- else
+ want_cookie = 1;
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
+ } else
#endif
- msg = "Dropping request";
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
- pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
- ntohs(tcp_hdr(skb)->dest), msg);
+ lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
+ if (!lopt->synflood_warned) {
+ lopt->synflood_warned = 1;
+ pr_info("%s: Possible SYN flooding on port %d. %s. "
+ " Check SNMP counters.\n",
+ proto, ntohs(tcp_hdr(skb)->dest), msg);
+ }
+ return want_cookie;
}
+EXPORT_SYMBOL(tcp_syn_flood_action);
/*
* Save and compile IPv4 options into the request_sock if needed.
@@ -908,18 +927,21 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
}
sk_nocaps_add(sk, NETIF_F_GSO_MASK);
}
- if (tcp_alloc_md5sig_pool(sk) == NULL) {
+
+ md5sig = tp->md5sig_info;
+ if (md5sig->entries4 == 0 &&
+ tcp_alloc_md5sig_pool(sk) == NULL) {
kfree(newkey);
return -ENOMEM;
}
- md5sig = tp->md5sig_info;
if (md5sig->alloced4 == md5sig->entries4) {
keys = kmalloc((sizeof(*keys) *
(md5sig->entries4 + 1)), GFP_ATOMIC);
if (!keys) {
kfree(newkey);
- tcp_free_md5sig_pool();
+ if (md5sig->entries4 == 0)
+ tcp_free_md5sig_pool();
return -ENOMEM;
}
@@ -963,6 +985,7 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
kfree(tp->md5sig_info->keys4);
tp->md5sig_info->keys4 = NULL;
tp->md5sig_info->alloced4 = 0;
+ tcp_free_md5sig_pool();
} else if (tp->md5sig_info->entries4 != i) {
/* Need to do some manipulation */
memmove(&tp->md5sig_info->keys4[i],
@@ -970,7 +993,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
(tp->md5sig_info->entries4 - i) *
sizeof(struct tcp4_md5sig_key));
}
- tcp_free_md5sig_pool();
return 0;
}
}
@@ -1234,11 +1256,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
__be32 saddr = ip_hdr(skb)->saddr;
__be32 daddr = ip_hdr(skb)->daddr;
__u32 isn = TCP_SKB_CB(skb)->when;
-#ifdef CONFIG_SYN_COOKIES
int want_cookie = 0;
-#else
-#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
-#endif
/* Never answer to SYNs send to broadcast or multicast */
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1249,14 +1267,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
* evidently real one.
*/
if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
- if (net_ratelimit())
- syn_flood_warning(skb);
-#ifdef CONFIG_SYN_COOKIES
- if (sysctl_tcp_syncookies) {
- want_cookie = 1;
- } else
-#endif
- goto drop;
+ want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
+ if (!want_cookie)
+ goto drop;
}
/* Accept backlog is full. If we have already queued enough
@@ -1302,9 +1315,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
while (l-- > 0)
*c++ ^= *hash_location++;
-#ifdef CONFIG_SYN_COOKIES
want_cookie = 0; /* not our kind of cookie */
-#endif
tmp_ext.cookie_out_never = 0; /* false */
tmp_ext.cookie_plus = tmp_opt.cookie_plus;
} else if (!tp->rx_opt.cookie_in_always) {
@@ -1577,7 +1588,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
#endif
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
- sock_rps_save_rxhash(sk, skb->rxhash);
+ sock_rps_save_rxhash(sk, skb);
if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
goto reset;
@@ -1594,7 +1605,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
goto discard;
if (nsk != sk) {
- sock_rps_save_rxhash(nsk, skb->rxhash);
+ sock_rps_save_rxhash(nsk, skb);
if (tcp_child_process(sk, nsk, skb)) {
rsk = nsk;
goto reset;
@@ -1602,7 +1613,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
} else
- sock_rps_save_rxhash(sk, skb->rxhash);
+ sock_rps_save_rxhash(sk, skb);
if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
@@ -1669,7 +1680,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->when = 0;
- TCP_SKB_CB(skb)->flags = iph->tos;
+ TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0;
sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 882e0b0..dde6b57 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -297,9 +297,9 @@ static u16 tcp_select_window(struct sock *sk)
/* Packet ECN state for a SYN-ACK */
static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
{
- TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR;
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
if (!(tp->ecn_flags & TCP_ECN_OK))
- TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE;
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
}
/* Packet ECN state for a SYN. */
@@ -309,7 +309,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
tp->ecn_flags = 0;
if (sysctl_tcp_ecn == 1) {
- TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR;
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
tp->ecn_flags = TCP_ECN_OK;
}
}
@@ -356,7 +356,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
- TCP_SKB_CB(skb)->flags = flags;
+ TCP_SKB_CB(skb)->tcp_flags = flags;
TCP_SKB_CB(skb)->sacked = 0;
skb_shinfo(skb)->gso_segs = 1;
@@ -826,7 +826,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
tcb = TCP_SKB_CB(skb);
memset(&opts, 0, sizeof(opts));
- if (unlikely(tcb->flags & TCPHDR_SYN))
+ if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
else
tcp_options_size = tcp_established_options(sk, skb, &opts,
@@ -850,9 +850,9 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
th->seq = htonl(tcb->seq);
th->ack_seq = htonl(tp->rcv_nxt);
*(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
- tcb->flags);
+ tcb->tcp_flags);
- if (unlikely(tcb->flags & TCPHDR_SYN)) {
+ if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
/* RFC1323: The window in SYN & SYN/ACK segments
* is never scaled.
*/
@@ -875,7 +875,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
}
tcp_options_write((__be32 *)(th + 1), tp, &opts);
- if (likely((tcb->flags & TCPHDR_SYN) == 0))
+ if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
TCP_ECN_send(sk, skb, tcp_header_size);
#ifdef CONFIG_TCP_MD5SIG
@@ -889,7 +889,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
icsk->icsk_af_ops->send_check(sk, skb);
- if (likely(tcb->flags & TCPHDR_ACK))
+ if (likely(tcb->tcp_flags & TCPHDR_ACK))
tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
if (skb->len != tcp_header_size)
@@ -1032,9 +1032,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
/* PSH and FIN should only be set in the second packet. */
- flags = TCP_SKB_CB(skb)->flags;
- TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
- TCP_SKB_CB(buff)->flags = flags;
+ flags = TCP_SKB_CB(skb)->tcp_flags;
+ TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
+ TCP_SKB_CB(buff)->tcp_flags = flags;
TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
@@ -1095,7 +1095,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ skb_frag_unref(skb, i);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1340,7 +1340,8 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
u32 in_flight, cwnd;
/* Don't be strict about the congestion window for the final FIN. */
- if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1)
+ if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
+ tcp_skb_pcount(skb) == 1)
return 1;
in_flight = tcp_packets_in_flight(tp);
@@ -1409,7 +1410,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
* Nagle can be ignored during F-RTO too (see RFC4138).
*/
if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
- (TCP_SKB_CB(skb)->flags & TCPHDR_FIN))
+ (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
return 1;
if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
@@ -1497,9 +1498,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
/* PSH and FIN should only be set in the second packet. */
- flags = TCP_SKB_CB(skb)->flags;
- TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
- TCP_SKB_CB(buff)->flags = flags;
+ flags = TCP_SKB_CB(skb)->tcp_flags;
+ TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
+ TCP_SKB_CB(buff)->tcp_flags = flags;
/* This packet was never sent out yet, so no SACK bits. */
TCP_SKB_CB(buff)->sacked = 0;
@@ -1530,7 +1531,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
u32 send_win, cong_win, limit, in_flight;
int win_divisor;
- if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto send_now;
if (icsk->icsk_ca_state != TCP_CA_Open)
@@ -1657,7 +1658,7 @@ static int tcp_mtu_probe(struct sock *sk)
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
- TCP_SKB_CB(nskb)->flags = TCPHDR_ACK;
+ TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
TCP_SKB_CB(nskb)->sacked = 0;
nskb->csum = 0;
nskb->ip_summed = skb->ip_summed;
@@ -1677,11 +1678,11 @@ static int tcp_mtu_probe(struct sock *sk)
if (skb->len <= copy) {
/* We've eaten all the data from this skb.
* Throw it away. */
- TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
+ TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
tcp_unlink_write_queue(skb, sk);
sk_wmem_free_skb(sk, skb);
} else {
- TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
+ TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
~(TCPHDR_FIN|TCPHDR_PSH);
if (!skb_shinfo(skb)->nr_frags) {
skb_pull(skb, copy);
@@ -1796,11 +1797,13 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
tcp_event_new_data_sent(sk, skb);
tcp_minshall_update(tp, mss_now, skb);
- sent_pkts++;
+ sent_pkts += tcp_skb_pcount(skb);
if (push_one)
break;
}
+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
+ tp->prr_out += sent_pkts;
if (likely(sent_pkts)) {
tcp_cwnd_validate(sk);
@@ -1985,7 +1988,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
/* Merge over control information. This moves PSH/FIN etc. over */
- TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(next_skb)->flags;
+ TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
/* All done, get rid of second SKB and account for it so
* packet counting does not break.
@@ -2033,7 +2036,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
if (!sysctl_tcp_retrans_collapse)
return;
- if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN)
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return;
tcp_for_write_queue_from_safe(skb, tmp, sk) {
@@ -2125,12 +2128,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
* since it is cheap to do so and saves bytes on the network.
*/
if (skb->len > 0 &&
- (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) &&
+ (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
if (!pskb_trim(skb, 0)) {
/* Reuse, even though it does some unnecessary work */
tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
- TCP_SKB_CB(skb)->flags);
+ TCP_SKB_CB(skb)->tcp_flags);
skb->ip_summed = CHECKSUM_NONE;
}
}
@@ -2294,6 +2297,9 @@ begin_fwd:
return;
NET_INC_STATS_BH(sock_net(sk), mib_idx);
+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
+ tp->prr_out += tcp_skb_pcount(skb);
+
if (skb == tcp_write_queue_head(sk))
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto,
@@ -2317,7 +2323,7 @@ void tcp_send_fin(struct sock *sk)
mss_now = tcp_current_mss(sk);
if (tcp_send_head(sk) != NULL) {
- TCP_SKB_CB(skb)->flags |= TCPHDR_FIN;
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
TCP_SKB_CB(skb)->end_seq++;
tp->write_seq++;
} else {
@@ -2379,11 +2385,11 @@ int tcp_send_synack(struct sock *sk)
struct sk_buff *skb;
skb = tcp_write_queue_head(sk);
- if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) {
+ if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
return -EFAULT;
}
- if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) {
+ if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
if (skb_cloned(skb)) {
struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
if (nskb == NULL)
@@ -2397,7 +2403,7 @@ int tcp_send_synack(struct sock *sk)
skb = nskb;
}
- TCP_SKB_CB(skb)->flags |= TCPHDR_ACK;
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
TCP_ECN_send_synack(tcp_sk(sk), skb);
}
TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -2794,13 +2800,13 @@ int tcp_write_wakeup(struct sock *sk)
if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
skb->len > mss) {
seg_size = min(seg_size, mss);
- TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
if (tcp_fragment(sk, skb, seg_size, mss))
return -1;
} else if (!tcp_skb_pcount(skb))
tcp_set_skb_tso_segs(sk, skb, mss);
- TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
TCP_SKB_CB(skb)->when = tcp_time_stamp;
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
if (!err)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1b5a193..ebaa96b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1267,7 +1267,7 @@ int udp_disconnect(struct sock *sk, int flags)
sk->sk_state = TCP_CLOSE;
inet->inet_daddr = 0;
inet->inet_dport = 0;
- sock_rps_save_rxhash(sk, 0);
+ sock_rps_reset_rxhash(sk);
sk->sk_bound_dev_if = 0;
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
inet_reset_saddr(sk);
@@ -1355,7 +1355,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
int rc;
if (inet_sk(sk)->inet_daddr)
- sock_rps_save_rxhash(sk, skb->rxhash);
+ sock_rps_save_rxhash(sk, skb);
rc = ip_queue_rcv_skb(sk, skb);
if (rc < 0) {
@@ -1461,10 +1461,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
}
- if (rcu_dereference_raw(sk->sk_filter)) {
- if (udp_lib_checksum_complete(skb))
- goto drop;
- }
+ if (rcu_access_pointer(sk->sk_filter) &&
+ udp_lib_checksum_complete(skb))
+ goto drop;
if (sk_rcvqueues_full(sk, skb))
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a55500c..e39239e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -374,8 +374,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
"%s(): cannot allocate memory for statistics; dev=%s.\n",
__func__, dev->name));
neigh_parms_release(&nd_tbl, ndev->nd_parms);
- ndev->dead = 1;
- in6_dev_finish_destroy(ndev);
+ dev_put(dev);
+ kfree(ndev);
return NULL;
}
@@ -428,7 +428,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
ndev->tstamp = jiffies;
addrconf_sysctl_register(ndev);
/* protected by rtnl_lock */
- rcu_assign_pointer(dev->ip6_ptr, ndev);
+ RCU_INIT_POINTER(dev->ip6_ptr, ndev);
/* Join all-node multicast group */
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
@@ -656,7 +656,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
* layer address of our nexhop router
*/
- if (dst_get_neighbour(&rt->dst) == NULL)
+ if (dst_get_neighbour_raw(&rt->dst) == NULL)
ifa->flags &= ~IFA_F_OPTIMISTIC;
ifa->idev = idev;
@@ -824,12 +824,13 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
{
struct inet6_dev *idev = ifp->idev;
struct in6_addr addr, *tmpaddr;
- unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp, age;
+ unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_tstamp, age;
unsigned long regen_advance;
int tmp_plen;
int ret = 0;
int max_addresses;
u32 addr_flags;
+ unsigned long now = jiffies;
write_lock(&idev->lock);
if (ift) {
@@ -874,7 +875,7 @@ retry:
goto out;
}
memcpy(&addr.s6_addr[8], idev->rndid, 8);
- age = (jiffies - ifp->tstamp) / HZ;
+ age = (now - ifp->tstamp) / HZ;
tmp_valid_lft = min_t(__u32,
ifp->valid_lft,
idev->cnf.temp_valid_lft + age);
@@ -884,7 +885,6 @@ retry:
idev->cnf.max_desync_factor);
tmp_plen = ifp->prefix_len;
max_addresses = idev->cnf.max_addresses;
- tmp_cstamp = ifp->cstamp;
tmp_tstamp = ifp->tstamp;
spin_unlock_bh(&ifp->lock);
@@ -929,7 +929,7 @@ retry:
ift->ifpub = ifp;
ift->valid_lft = tmp_valid_lft;
ift->prefered_lft = tmp_prefered_lft;
- ift->cstamp = tmp_cstamp;
+ ift->cstamp = now;
ift->tstamp = tmp_tstamp;
spin_unlock_bh(&ift->lock);
@@ -1999,25 +1999,50 @@ ok:
#ifdef CONFIG_IPV6_PRIVACY
read_lock_bh(&in6_dev->lock);
/* update all temporary addresses in the list */
- list_for_each_entry(ift, &in6_dev->tempaddr_list, tmp_list) {
- /*
- * When adjusting the lifetimes of an existing
- * temporary address, only lower the lifetimes.
- * Implementations must not increase the
- * lifetimes of an existing temporary address
- * when processing a Prefix Information Option.
- */
+ list_for_each_entry(ift, &in6_dev->tempaddr_list,
+ tmp_list) {
+ int age, max_valid, max_prefered;
+
if (ifp != ift->ifpub)
continue;
+ /*
+ * RFC 4941 section 3.3:
+ * If a received option will extend the lifetime
+ * of a public address, the lifetimes of
+ * temporary addresses should be extended,
+ * subject to the overall constraint that no
+ * temporary addresses should ever remain
+ * "valid" or "preferred" for a time longer than
+ * (TEMP_VALID_LIFETIME) or
+ * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR),
+ * respectively.
+ */
+ age = (now - ift->cstamp) / HZ;
+ max_valid = in6_dev->cnf.temp_valid_lft - age;
+ if (max_valid < 0)
+ max_valid = 0;
+
+ max_prefered = in6_dev->cnf.temp_prefered_lft -
+ in6_dev->cnf.max_desync_factor -
+ age;
+ if (max_prefered < 0)
+ max_prefered = 0;
+
+ if (valid_lft > max_valid)
+ valid_lft = max_valid;
+
+ if (prefered_lft > max_prefered)
+ prefered_lft = max_prefered;
+
spin_lock(&ift->lock);
flags = ift->flags;
- if (ift->valid_lft > valid_lft &&
- ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ)
- ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ;
- if (ift->prefered_lft > prefered_lft &&
- ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ)
- ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ;
+ ift->valid_lft = valid_lft;
+ ift->prefered_lft = prefered_lft;
+ ift->tstamp = now;
+ if (prefered_lft > 0)
+ ift->flags &= ~IFA_F_DEPRECATED;
+
spin_unlock(&ift->lock);
if (!(flags&IFA_F_TENTATIVE))
ipv6_ifa_notify(0, ift);
@@ -2025,9 +2050,11 @@ ok:
if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) {
/*
- * When a new public address is created as described in [ADDRCONF],
- * also create a new temporary address. Also create a temporary
- * address if it's enabled but no temporary address currently exists.
+ * When a new public address is created as
+ * described in [ADDRCONF], also create a new
+ * temporary address. Also create a temporary
+ * address if it's enabled but no temporary
+ * address currently exists.
*/
read_unlock_bh(&in6_dev->lock);
ipv6_create_tempaddr(ifp, NULL);
@@ -2706,7 +2733,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
idev->dead = 1;
/* protected by rtnl_lock */
- rcu_assign_pointer(dev->ip6_ptr, NULL);
+ RCU_INIT_POINTER(dev->ip6_ptr, NULL);
/* Step 1.5: remove snmp6 entry */
snmp6_unregister_dev(idev);
@@ -2969,12 +2996,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
ipv6_ifa_notify(RTM_NEWADDR, ifp);
- /* If added prefix is link local and forwarding is off,
- start sending router solicitations.
+ /* If added prefix is link local and we are prepared to process
+ router advertisements, start sending router solicitations.
*/
- if ((ifp->idev->cnf.forwarding == 0 ||
- ifp->idev->cnf.forwarding == 2) &&
+ if (((ifp->idev->cnf.accept_ra == 1 && !ifp->idev->cnf.forwarding) ||
+ ifp->idev->cnf.accept_ra == 2) &&
ifp->idev->cnf.rtr_solicits > 0 &&
(dev->flags&IFF_LOOPBACK) == 0 &&
(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 1656033..b46e9f8 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -33,6 +33,11 @@
#include <linux/errqueue.h>
#include <asm/uaccess.h>
+static inline int ipv6_mapped_addr_any(const struct in6_addr *a)
+{
+ return (ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0));
+}
+
int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
@@ -102,10 +107,12 @@ ipv4_connected:
ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr);
- if (ipv6_addr_any(&np->saddr))
+ if (ipv6_addr_any(&np->saddr) ||
+ ipv6_mapped_addr_any(&np->saddr))
ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
- if (ipv6_addr_any(&np->rcv_saddr)) {
+ if (ipv6_addr_any(&np->rcv_saddr) ||
+ ipv6_mapped_addr_any(&np->rcv_saddr)) {
ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
&np->rcv_saddr);
if (sk->sk_prot->rehash)
@@ -592,7 +599,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
return 0;
}
-int datagram_send_ctl(struct net *net,
+int datagram_send_ctl(struct net *net, struct sock *sk,
struct msghdr *msg, struct flowi6 *fl6,
struct ipv6_txoptions *opt,
int *hlimit, int *tclass, int *dontfrag)
@@ -651,7 +658,8 @@ int datagram_send_ctl(struct net *net,
if (addr_type != IPV6_ADDR_ANY) {
int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
- if (!ipv6_chk_addr(net, &src_info->ipi6_addr,
+ if (!inet_sk(sk)->transparent &&
+ !ipv6_chk_addr(net, &src_info->ipi6_addr,
strict ? dev : NULL, 0))
err = -EINVAL;
else
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 79a485e..1318de4 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -273,12 +273,12 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
__u16 dstbuf;
#endif
- struct dst_entry *dst;
+ struct dst_entry *dst = skb_dst(skb);
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
((skb_transport_header(skb)[1] + 1) << 3)))) {
- IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
+ IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst),
IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
@@ -289,9 +289,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
dstbuf = opt->dst1;
#endif
- dst = dst_clone(skb_dst(skb));
if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
- dst_release(dst);
skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
opt = IP6CB(skb);
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
@@ -304,7 +302,6 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
IP6_INC_STATS_BH(dev_net(dst->dev),
ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
- dst_release(dst);
return -1;
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 1190041..2b59154 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -490,7 +490,8 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
goto out_dst_release;
}
- idev = in6_dev_get(skb->dev);
+ rcu_read_lock();
+ idev = __in6_dev_get(skb->dev);
err = ip6_append_data(sk, icmpv6_getfrag, &msg,
len + sizeof(struct icmp6hdr),
@@ -500,19 +501,16 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
if (err) {
ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
- goto out_put;
+ } else {
+ err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
+ len + sizeof(struct icmp6hdr));
}
- err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr));
-
-out_put:
- if (likely(idev != NULL))
- in6_dev_put(idev);
+ rcu_read_unlock();
out_dst_release:
dst_release(dst);
out:
icmpv6_xmit_unlock(sk);
}
-
EXPORT_SYMBOL(icmpv6_send);
static void icmpv6_echo_reply(struct sk_buff *skb)
@@ -569,7 +567,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
- idev = in6_dev_get(skb->dev);
+ idev = __in6_dev_get(skb->dev);
msg.skb = skb;
msg.offset = 0;
@@ -583,13 +581,10 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if (err) {
ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
- goto out_put;
+ } else {
+ err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
+ skb->len + sizeof(struct icmp6hdr));
}
- err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
-
-out_put:
- if (likely(idev != NULL))
- in6_dev_put(idev);
dst_release(dst);
out:
icmpv6_xmit_unlock(sk);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 8a58e8c..2916200 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -211,6 +211,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
struct flowi6 fl6;
struct dst_entry *dst;
struct in6_addr *final_p, final;
+ int res;
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = sk->sk_protocol;
@@ -241,12 +242,14 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
__inet6_csk_dst_store(sk, dst, NULL, NULL);
}
- skb_dst_set(skb, dst_clone(dst));
+ rcu_read_lock();
+ skb_dst_set_noref(skb, dst);
/* Restore final destination back after routing done */
ipv6_addr_copy(&fl6.daddr, &np->daddr);
- return ip6_xmit(sk, skb, &fl6, np->opt);
+ res = ip6_xmit(sk, skb, &fl6, np->opt);
+ rcu_read_unlock();
+ return res;
}
-
EXPORT_SYMBOL_GPL(inet6_csk_xmit);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b531972..73f1a00 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -20,6 +20,7 @@
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
+#include <net/secure_seq.h>
#include <net/ip.h>
int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 54a4678..320d91d 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1455,7 +1455,7 @@ static int fib6_age(struct rt6_info *rt, void *arg)
RT6_TRACE("aging clone %p\n", rt);
return -1;
} else if ((rt->rt6i_flags & RTF_GATEWAY) &&
- (!(dst_get_neighbour(&rt->dst)->flags & NTF_ROUTER))) {
+ (!(dst_get_neighbour_raw(&rt->dst)->flags & NTF_ROUTER))) {
RT6_TRACE("purging route %p via non-router but gateway\n",
rt);
return -1;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index f3caf1b..5430394 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -322,8 +322,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo
}
static struct ip6_flowlabel *
-fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
- int optlen, int *err_p)
+fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
+ char __user *optval, int optlen, int *err_p)
{
struct ip6_flowlabel *fl = NULL;
int olen;
@@ -360,7 +360,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
msg.msg_control = (void*)(fl->opt+1);
memset(&flowi6, 0, sizeof(flowi6));
- err = datagram_send_ctl(net, &msg, &flowi6, fl->opt, &junk,
+ err = datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, &junk,
&junk, &junk);
if (err)
goto done;
@@ -528,7 +528,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
return -EINVAL;
- fl = fl_create(net, &freq, optval, optlen, &err);
+ fl = fl_create(net, sk, &freq, optval, optlen, &err);
if (fl == NULL)
return err;
sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 32e5339..835c04b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -135,10 +135,15 @@ static int ip6_finish_output2(struct sk_buff *skb)
skb->len);
}
+ rcu_read_lock();
neigh = dst_get_neighbour(dst);
- if (neigh)
- return neigh_output(neigh, skb);
+ if (neigh) {
+ int res = neigh_output(neigh, skb);
+ rcu_read_unlock();
+ return res;
+ }
+ rcu_read_unlock();
IP6_INC_STATS_BH(dev_net(dst->dev),
ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
@@ -975,12 +980,14 @@ static int ip6_dst_lookup_tail(struct sock *sk,
* dst entry and replace it instead with the
* dst entry of the nexthop router
*/
+ rcu_read_lock();
n = dst_get_neighbour(*dst);
if (n && !(n->nud_state & NUD_VALID)) {
struct inet6_ifaddr *ifp;
struct flowi6 fl_gw6;
int redirect;
+ rcu_read_unlock();
ifp = ipv6_get_ifaddr(net, &fl6->saddr,
(*dst)->dev, 1);
@@ -1000,6 +1007,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
if ((err = (*dst)->error))
goto out_err_release;
}
+ } else {
+ rcu_read_unlock();
}
#endif
@@ -1471,13 +1480,13 @@ alloc_new_skb:
if (page && (left = PAGE_SIZE - off) > 0) {
if (copy >= left)
copy = left;
- if (page != frag->page) {
+ if (page != skb_frag_page(frag)) {
if (i == MAX_SKB_FRAGS) {
err = -EMSGSIZE;
goto error;
}
- get_page(page);
skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
+ skb_frag_ref(skb, i);
frag = &skb_shinfo(skb)->frags[i];
}
} else if(i < MAX_SKB_FRAGS) {
@@ -1497,7 +1506,8 @@ alloc_new_skb:
err = -EMSGSIZE;
goto error;
}
- if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
+ if (getfrag(from, skb_frag_address(frag)+frag->size,
+ offset, copy, skb->len, skb) < 0) {
err = -EFAULT;
goto error;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 0bc9888..bdc15c9 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -218,8 +218,8 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
{
struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
- rcu_assign_pointer(t->next , rtnl_dereference(*tp));
- rcu_assign_pointer(*tp, t);
+ RCU_INIT_POINTER(t->next , rtnl_dereference(*tp));
+ RCU_INIT_POINTER(*tp, t);
}
/**
@@ -237,7 +237,7 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
(iter = rtnl_dereference(*tp)) != NULL;
tp = &iter->next) {
if (t == iter) {
- rcu_assign_pointer(*tp, t->next);
+ RCU_INIT_POINTER(*tp, t->next);
break;
}
}
@@ -350,7 +350,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
if (dev == ip6n->fb_tnl_dev)
- rcu_assign_pointer(ip6n->tnls_wc[0], NULL);
+ RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
else
ip6_tnl_unlink(ip6n, t);
ip6_tnl_dst_reset(t);
@@ -889,7 +889,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
struct net_device_stats *stats = &t->dev->stats;
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct ipv6_tel_txoption opt;
- struct dst_entry *dst;
+ struct dst_entry *dst = NULL, *ndst = NULL;
struct net_device *tdev;
int mtu;
unsigned int max_headroom = sizeof(struct ipv6hdr);
@@ -897,19 +897,20 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
int err = -1;
int pkt_len;
- if ((dst = ip6_tnl_dst_check(t)) != NULL)
- dst_hold(dst);
- else {
- dst = ip6_route_output(net, NULL, fl6);
+ if (!fl6->flowi6_mark)
+ dst = ip6_tnl_dst_check(t);
+ if (!dst) {
+ ndst = ip6_route_output(net, NULL, fl6);
- if (dst->error)
+ if (ndst->error)
goto tx_err_link_failure;
- dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
- if (IS_ERR(dst)) {
- err = PTR_ERR(dst);
- dst = NULL;
+ ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
+ if (IS_ERR(ndst)) {
+ err = PTR_ERR(ndst);
+ ndst = NULL;
goto tx_err_link_failure;
}
+ dst = ndst;
}
tdev = dst->dev;
@@ -955,8 +956,12 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
skb = new_skb;
}
skb_dst_drop(skb);
- skb_dst_set(skb, dst_clone(dst));
-
+ if (fl6->flowi6_mark) {
+ skb_dst_set(skb, dst);
+ ndst = NULL;
+ } else {
+ skb_dst_set_noref(skb, dst);
+ }
skb->transport_header = skb->network_header;
proto = fl6->flowi6_proto;
@@ -987,13 +992,14 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
stats->tx_errors++;
stats->tx_aborted_errors++;
}
- ip6_tnl_dst_store(t, dst);
+ if (ndst)
+ ip6_tnl_dst_store(t, ndst);
return 0;
tx_err_link_failure:
stats->tx_carrier_errors++;
dst_link_failure(skb);
tx_err_dst_release:
- dst_release(dst);
+ dst_release(ndst);
return err;
}
@@ -1020,9 +1026,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
dsfield = ipv4_get_dsfield(iph);
- if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
& IPV6_TCLASS_MASK;
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6.flowi6_mark = skb->mark;
err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
if (err != 0) {
@@ -1069,10 +1077,12 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
fl6.flowi6_proto = IPPROTO_IPV6;
dsfield = ipv6_get_dsfield(ipv6h);
- if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
- if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6.flowi6_mark = skb->mark;
err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
if (err != 0) {
@@ -1439,7 +1449,7 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
t->parms.proto = IPPROTO_IPV6;
dev_hold(dev);
- rcu_assign_pointer(ip6n->tnls_wc[0], t);
+ RCU_INIT_POINTER(ip6n->tnls_wc[0], t);
return 0;
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 705c828..def0538 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -696,8 +696,10 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
int err;
err = ip6mr_fib_lookup(net, &fl6, &mrt);
- if (err < 0)
+ if (err < 0) {
+ kfree_skb(skb);
return err;
+ }
read_lock(&mrt_lock);
dev->stats.tx_bytes += skb->len;
@@ -2052,8 +2054,10 @@ int ip6_mr_input(struct sk_buff *skb)
int err;
err = ip6mr_fib_lookup(net, &fl6, &mrt);
- if (err < 0)
+ if (err < 0) {
+ kfree_skb(skb);
return err;
+ }
read_lock(&mrt_lock);
cache = ip6mr_cache_find(mrt,
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 9cb191e..2fbda5f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -475,7 +475,7 @@ sticky_done:
msg.msg_controllen = optlen;
msg.msg_control = (void*)(opt+1);
- retv = datagram_send_ctl(net, &msg, &fl6, opt, &junk, &junk,
+ retv = datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk,
&junk);
if (retv)
goto done;
@@ -913,7 +913,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
}
static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+ char __user *optval, int __user *optlen, unsigned flags)
{
struct ipv6_pinfo *np = inet6_sk(sk);
int len;
@@ -962,7 +962,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
msg.msg_control = optval;
msg.msg_controllen = len;
- msg.msg_flags = 0;
+ msg.msg_flags = flags;
lock_sock(sk);
skb = np->pktoptions;
@@ -1222,7 +1222,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
if(level != SOL_IPV6)
return -ENOPROTOOPT;
- err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
+ err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0);
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
@@ -1264,7 +1264,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
return compat_mc_getsockopt(sk, level, optname, optval, optlen,
ipv6_getsockopt);
- err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
+ err = do_ipv6_getsockopt(sk, level, optname, optval, optlen,
+ MSG_CMSG_COMPAT);
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 3e6ebcd..ee7839f 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1059,7 +1059,7 @@ static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
break;
for (i=0; i<nsrcs; i++) {
/* skip inactive filters */
- if (pmc->mca_sfcount[MCAST_INCLUDE] ||
+ if (psf->sf_count[MCAST_INCLUDE] ||
pmc->mca_sfcount[MCAST_EXCLUDE] !=
psf->sf_count[MCAST_EXCLUDE])
continue;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 9da6e02..1f52dd2 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -533,7 +533,8 @@ void ndisc_send_skb(struct sk_buff *skb,
skb_dst_set(skb, dst);
- idev = in6_dev_get(dst->dev);
+ rcu_read_lock();
+ idev = __in6_dev_get(dst->dev);
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
@@ -543,8 +544,7 @@ void ndisc_send_skb(struct sk_buff *skb,
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
}
- if (likely(idev != NULL))
- in6_dev_put(idev);
+ rcu_read_unlock();
}
EXPORT_SYMBOL(ndisc_send_skb);
@@ -1039,7 +1039,7 @@ static void ndisc_recv_rs(struct sk_buff *skb)
if (skb->len < sizeof(*rs_msg))
return;
- idev = in6_dev_get(skb->dev);
+ idev = __in6_dev_get(skb->dev);
if (!idev) {
if (net_ratelimit())
ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
@@ -1080,7 +1080,7 @@ static void ndisc_recv_rs(struct sk_buff *skb)
neigh_release(neigh);
}
out:
- in6_dev_put(idev);
+ return;
}
static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
@@ -1179,7 +1179,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
* set the RA_RECV flag in the interface
*/
- in6_dev = in6_dev_get(skb->dev);
+ in6_dev = __in6_dev_get(skb->dev);
if (in6_dev == NULL) {
ND_PRINTK0(KERN_ERR
"ICMPv6 RA: can't find inet6 device for %s.\n",
@@ -1188,7 +1188,6 @@ static void ndisc_router_discovery(struct sk_buff *skb)
}
if (!ndisc_parse_options(opt, optlen, &ndopts)) {
- in6_dev_put(in6_dev);
ND_PRINTK2(KERN_WARNING
"ICMP6 RA: invalid ND options\n");
return;
@@ -1255,7 +1254,6 @@ static void ndisc_router_discovery(struct sk_buff *skb)
ND_PRINTK0(KERN_ERR
"ICMPv6 RA: %s() failed to add default route.\n",
__func__);
- in6_dev_put(in6_dev);
return;
}
@@ -1265,7 +1263,6 @@ static void ndisc_router_discovery(struct sk_buff *skb)
"ICMPv6 RA: %s() got default router without neighbour.\n",
__func__);
dst_release(&rt->dst);
- in6_dev_put(in6_dev);
return;
}
neigh->flags |= NTF_ROUTER;
@@ -1422,7 +1419,6 @@ out:
dst_release(&rt->dst);
else if (neigh)
neigh_release(neigh);
- in6_dev_put(in6_dev);
}
static void ndisc_redirect_rcv(struct sk_buff *skb)
@@ -1481,13 +1477,11 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
return;
}
- in6_dev = in6_dev_get(skb->dev);
+ in6_dev = __in6_dev_get(skb->dev);
if (!in6_dev)
return;
- if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) {
- in6_dev_put(in6_dev);
+ if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
return;
- }
/* RFC2461 8.1:
* The IP source address of the Redirect MUST be the same as the current
@@ -1497,7 +1491,6 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
ND_PRINTK2(KERN_WARNING
"ICMPv6 Redirect: invalid ND options\n");
- in6_dev_put(in6_dev);
return;
}
if (ndopts.nd_opts_tgt_lladdr) {
@@ -1506,7 +1499,6 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
if (!lladdr) {
ND_PRINTK2(KERN_WARNING
"ICMPv6 Redirect: invalid link-layer address length\n");
- in6_dev_put(in6_dev);
return;
}
}
@@ -1518,7 +1510,6 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
on_link);
neigh_release(neigh);
}
- in6_dev_put(in6_dev);
}
void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
@@ -1651,7 +1642,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
csum_partial(icmph, len, 0));
skb_dst_set(buff, dst);
- idev = in6_dev_get(dst->dev);
+ rcu_read_lock();
+ idev = __in6_dev_get(dst->dev);
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
dst_output);
@@ -1660,8 +1652,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
}
- if (likely(idev != NULL))
- in6_dev_put(idev);
+ rcu_read_unlock();
return;
release:
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 2493948..e63c397 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
return skb;
nlmsg_failure:
+ kfree_skb(skb);
*errp = -EINVAL;
printk(KERN_ERR "ip6_queue: error creating packet message\n");
return NULL;
@@ -313,7 +314,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
{
struct nf_queue_entry *entry;
- if (vmsg->value > NF_MAX_VERDICT)
+ if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
return -EINVAL;
entry = ipq_find_dequeue_entry(vmsg->id);
@@ -358,12 +359,9 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg,
break;
case IPQM_VERDICT:
- if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
- status = -EINVAL;
- else
- status = ipq_set_verdict(&pmsg->msg.verdict,
- len - sizeof(*pmsg));
- break;
+ status = ipq_set_verdict(&pmsg->msg.verdict,
+ len - sizeof(*pmsg));
+ break;
default:
status = -EINVAL;
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 6a79f308..3486f62 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -130,14 +130,14 @@ static mh_filter_t __rcu *mh_filter __read_mostly;
int rawv6_mh_filter_register(mh_filter_t filter)
{
- rcu_assign_pointer(mh_filter, filter);
+ RCU_INIT_POINTER(mh_filter, filter);
return 0;
}
EXPORT_SYMBOL(rawv6_mh_filter_register);
int rawv6_mh_filter_unregister(mh_filter_t filter)
{
- rcu_assign_pointer(mh_filter, NULL);
+ RCU_INIT_POINTER(mh_filter, NULL);
synchronize_rcu();
return 0;
}
@@ -372,9 +372,9 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
read_unlock(&raw_v6_hashinfo.lock);
}
-static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
+static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
- if ((raw6_sk(sk)->checksum || rcu_dereference_raw(sk->sk_filter)) &&
+ if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) &&
skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
kfree_skb(skb);
@@ -817,8 +817,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(struct ipv6_txoptions);
- err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit,
- &tclass, &dontfrag);
+ err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+ &hlimit, &tclass, &dontfrag);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e8987da..fb545ed 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -104,6 +104,9 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
struct inet_peer *peer;
u32 *p = NULL;
+ if (!(rt->dst.flags & DST_HOST))
+ return NULL;
+
if (!rt->rt6i_peer)
rt6_bind_peer(rt, 1);
@@ -241,7 +244,9 @@ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
{
struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
- memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
+ if (rt != NULL)
+ memset(&rt->rt6i_table, 0,
+ sizeof(*rt) - sizeof(struct dst_entry));
return rt;
}
@@ -252,6 +257,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
struct inet6_dev *idev = rt->rt6i_idev;
struct inet_peer *peer = rt->rt6i_peer;
+ if (!(rt->dst.flags & DST_HOST))
+ dst_destroy_metrics_generic(dst);
+
if (idev != NULL) {
rt->rt6i_idev = NULL;
in6_dev_put(idev);
@@ -364,7 +372,7 @@ out:
#ifdef CONFIG_IPV6_ROUTER_PREF
static void rt6_probe(struct rt6_info *rt)
{
- struct neighbour *neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
+ struct neighbour *neigh;
/*
* Okay, this does not seem to be appropriate
* for now, however, we need to check if it
@@ -373,8 +381,10 @@ static void rt6_probe(struct rt6_info *rt)
* Router Reachability Probe MUST be rate-limited
* to no more than one per minute.
*/
+ rcu_read_lock();
+ neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
if (!neigh || (neigh->nud_state & NUD_VALID))
- return;
+ goto out;
read_lock_bh(&neigh->lock);
if (!(neigh->nud_state & NUD_VALID) &&
time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
@@ -387,8 +397,11 @@ static void rt6_probe(struct rt6_info *rt)
target = (struct in6_addr *)&neigh->primary_key;
addrconf_addr_solict_mult(target, &mcaddr);
ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
- } else
+ } else {
read_unlock_bh(&neigh->lock);
+ }
+out:
+ rcu_read_unlock();
}
#else
static inline void rt6_probe(struct rt6_info *rt)
@@ -412,8 +425,11 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
static inline int rt6_check_neigh(struct rt6_info *rt)
{
- struct neighbour *neigh = dst_get_neighbour(&rt->dst);
+ struct neighbour *neigh;
int m;
+
+ rcu_read_lock();
+ neigh = dst_get_neighbour(&rt->dst);
if (rt->rt6i_flags & RTF_NONEXTHOP ||
!(rt->rt6i_flags & RTF_GATEWAY))
m = 1;
@@ -430,6 +446,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
read_unlock_bh(&neigh->lock);
} else
m = 0;
+ rcu_read_unlock();
return m;
}
@@ -714,9 +731,7 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
ipv6_addr_copy(&rt->rt6i_gateway, daddr);
}
- rt->rt6i_dst.plen = 128;
rt->rt6i_flags |= RTF_CACHE;
- rt->dst.flags |= DST_HOST;
#ifdef CONFIG_IPV6_SUBTREES
if (rt->rt6i_src.plen && saddr) {
@@ -766,10 +781,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
struct rt6_info *rt = ip6_rt_copy(ort, daddr);
if (rt) {
- rt->rt6i_dst.plen = 128;
rt->rt6i_flags |= RTF_CACHE;
- rt->dst.flags |= DST_HOST;
- dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour(&ort->dst)));
+ dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
}
return rt;
}
@@ -803,7 +816,7 @@ restart:
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
- if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
+ if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
else if (!(rt->dst.flags & DST_HOST))
nrt = rt6_alloc_clone(rt, &fl6->daddr);
@@ -1069,12 +1082,15 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
neigh = NULL;
}
- rt->rt6i_idev = idev;
+ rt->dst.flags |= DST_HOST;
+ rt->dst.output = ip6_output;
dst_set_neighbour(&rt->dst, neigh);
atomic_set(&rt->dst.__refcnt, 1);
- ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
- rt->dst.output = ip6_output;
+
+ ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+ rt->rt6i_dst.plen = 128;
+ rt->rt6i_idev = idev;
spin_lock_bh(&icmp6_dst_lock);
rt->dst.next = icmp6_dst_gc_list;
@@ -1252,6 +1268,14 @@ int ip6_route_add(struct fib6_config *cfg)
if (rt->rt6i_dst.plen == 128)
rt->dst.flags |= DST_HOST;
+ if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
+ u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+ if (!metrics) {
+ err = -ENOMEM;
+ goto out;
+ }
+ dst_init_metrics(&rt->dst, metrics, 0);
+ }
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
rt->rt6i_src.plen = cfg->fc_src_len;
@@ -1587,7 +1611,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
dst_confirm(&rt->dst);
/* Duplicate redirect: silently ignore. */
- if (neigh == dst_get_neighbour(&rt->dst))
+ if (neigh == dst_get_neighbour_raw(&rt->dst))
goto out;
nrt = ip6_rt_copy(rt, dest);
@@ -1598,9 +1622,6 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
if (on_link)
nrt->rt6i_flags &= ~RTF_GATEWAY;
- nrt->rt6i_dst.plen = 128;
- nrt->dst.flags |= DST_HOST;
-
ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
@@ -1682,7 +1703,7 @@ again:
1. It is connected route. Action: COW
2. It is gatewayed route or NONEXTHOP route. Action: clone it.
*/
- if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
+ if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, daddr, saddr);
else
nrt = rt6_alloc_clone(rt, daddr);
@@ -1745,9 +1766,10 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
if (rt) {
rt->dst.input = ort->dst.input;
rt->dst.output = ort->dst.output;
+ rt->dst.flags |= DST_HOST;
ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
- rt->rt6i_dst.plen = ort->rt6i_dst.plen;
+ rt->rt6i_dst.plen = 128;
dst_copy_metrics(&rt->dst, &ort->dst);
rt->dst.error = ort->dst.error;
rt->rt6i_idev = ort->rt6i_idev;
@@ -2326,6 +2348,7 @@ static int rt6_fill_node(struct net *net,
struct nlmsghdr *nlh;
long expires;
u32 table;
+ struct neighbour *n;
if (prefix) { /* user wants prefix routes only */
if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -2414,8 +2437,11 @@ static int rt6_fill_node(struct net *net,
if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
goto nla_put_failure;
- if (dst_get_neighbour(&rt->dst))
- NLA_PUT(skb, RTA_GATEWAY, 16, &dst_get_neighbour(&rt->dst)->primary_key);
+ rcu_read_lock();
+ n = dst_get_neighbour(&rt->dst);
+ if (n)
+ NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
+ rcu_read_unlock();
if (rt->dst.dev)
NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
@@ -2608,12 +2634,14 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
#else
seq_puts(m, "00000000000000000000000000000000 00 ");
#endif
+ rcu_read_lock();
n = dst_get_neighbour(&rt->dst);
if (n) {
seq_printf(m, "%pi6", n->primary_key);
} else {
seq_puts(m, "00000000000000000000000000000000");
}
+ rcu_read_unlock();
seq_printf(m, " %08x %08x %08x %08x %8s\n",
rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
rt->dst.__use, rt->rt6i_flags,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 07bf108..a7a1860 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -182,7 +182,7 @@ static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
(iter = rtnl_dereference(*tp)) != NULL;
tp = &iter->next) {
if (t == iter) {
- rcu_assign_pointer(*tp, t->next);
+ RCU_INIT_POINTER(*tp, t->next);
break;
}
}
@@ -192,8 +192,8 @@ static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
{
struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t);
- rcu_assign_pointer(t->next, rtnl_dereference(*tp));
- rcu_assign_pointer(*tp, t);
+ RCU_INIT_POINTER(t->next, rtnl_dereference(*tp));
+ RCU_INIT_POINTER(*tp, t);
}
static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
@@ -391,7 +391,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
p->addr = a->addr;
p->flags = a->flags;
t->prl_count++;
- rcu_assign_pointer(t->prl, p);
+ RCU_INIT_POINTER(t->prl, p);
out:
return err;
}
@@ -474,7 +474,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
struct sit_net *sitn = net_generic(net, sit_net_id);
if (dev == sitn->fb_tunnel_dev) {
- rcu_assign_pointer(sitn->tunnels_wc[0], NULL);
+ RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL);
} else {
ipip6_tunnel_unlink(sitn, netdev_priv(dev));
ipip6_tunnel_del_prl(netdev_priv(dev), NULL);
@@ -672,6 +672,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
if (skb->protocol != htons(ETH_P_IPV6))
goto tx_error;
+ if (tos == 1)
+ tos = ipv6_get_dsfield(iph6);
+
/* ISATAP (RFC4214) - must come before 6to4 */
if (dev->priv_flags & IFF_ISATAP) {
struct neighbour *neigh = NULL;
@@ -1173,7 +1176,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
dev_hold(dev);
- rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
+ RCU_INIT_POINTER(sitn->tunnels_wc[0], tunnel);
return 0;
}
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 89d5bf8..ac83896 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -165,7 +165,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
int mss;
struct dst_entry *dst;
__u8 rcv_wscale;
- bool ecn_ok;
+ bool ecn_ok = false;
if (!sysctl_tcp_syncookies || !th->ack || th->rst)
goto out;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 78aa534..5357902 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -61,6 +61,7 @@
#include <net/timewait_sock.h>
#include <net/netdma.h>
#include <net/inet_common.h>
+#include <net/secure_seq.h>
#include <asm/uaccess.h>
@@ -530,20 +531,6 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
return tcp_v6_send_synack(sk, req, rvp);
}
-static inline void syn_flood_warning(struct sk_buff *skb)
-{
-#ifdef CONFIG_SYN_COOKIES
- if (sysctl_tcp_syncookies)
- printk(KERN_INFO
- "TCPv6: Possible SYN flooding on port %d. "
- "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
- else
-#endif
- printk(KERN_INFO
- "TCPv6: Possible SYN flooding on port %d. "
- "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
-}
-
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
kfree_skb(inet6_rsk(req)->pktopts);
@@ -604,7 +591,8 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
}
sk_nocaps_add(sk, NETIF_F_GSO_MASK);
}
- if (tcp_alloc_md5sig_pool(sk) == NULL) {
+ if (tp->md5sig_info->entries6 == 0 &&
+ tcp_alloc_md5sig_pool(sk) == NULL) {
kfree(newkey);
return -ENOMEM;
}
@@ -613,8 +601,9 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
(tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
if (!keys) {
- tcp_free_md5sig_pool();
kfree(newkey);
+ if (tp->md5sig_info->entries6 == 0)
+ tcp_free_md5sig_pool();
return -ENOMEM;
}
@@ -660,6 +649,7 @@ static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer)
kfree(tp->md5sig_info->keys6);
tp->md5sig_info->keys6 = NULL;
tp->md5sig_info->alloced6 = 0;
+ tcp_free_md5sig_pool();
} else {
/* shrink the database */
if (tp->md5sig_info->entries6 != i)
@@ -668,7 +658,6 @@ static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer)
(tp->md5sig_info->entries6 - i)
* sizeof (tp->md5sig_info->keys6[0]));
}
- tcp_free_md5sig_pool();
return 0;
}
}
@@ -1178,11 +1167,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
__u32 isn = TCP_SKB_CB(skb)->when;
struct dst_entry *dst = NULL;
-#ifdef CONFIG_SYN_COOKIES
int want_cookie = 0;
-#else
-#define want_cookie 0
-#endif
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_conn_request(sk, skb);
@@ -1191,14 +1176,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
goto drop;
if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
- if (net_ratelimit())
- syn_flood_warning(skb);
-#ifdef CONFIG_SYN_COOKIES
- if (sysctl_tcp_syncookies)
- want_cookie = 1;
- else
-#endif
- goto drop;
+ want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
+ if (!want_cookie)
+ goto drop;
}
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
@@ -1248,9 +1228,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
while (l-- > 0)
*c++ ^= *hash_location++;
-#ifdef CONFIG_SYN_COOKIES
want_cookie = 0; /* not our kind of cookie */
-#endif
tmp_ext.cookie_out_never = 0; /* false */
tmp_ext.cookie_plus = tmp_opt.cookie_plus;
} else if (!tp->rx_opt.cookie_in_always) {
@@ -1407,6 +1385,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif
+ newnp->ipv6_ac_list = NULL;
+ newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
@@ -1471,6 +1451,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
First: no IPv4 options.
*/
newinet->inet_opt = NULL;
+ newnp->ipv6_ac_list = NULL;
newnp->ipv6_fl_list = NULL;
/* Clone RX bits */
@@ -1627,7 +1608,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
opt_skb = skb_clone(skb, GFP_ATOMIC);
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
- sock_rps_save_rxhash(sk, skb->rxhash);
+ sock_rps_save_rxhash(sk, skb);
if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
goto reset;
if (opt_skb)
@@ -1649,7 +1630,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
* the new socket..
*/
if(nsk != sk) {
- sock_rps_save_rxhash(nsk, skb->rxhash);
+ sock_rps_save_rxhash(nsk, skb);
if (tcp_child_process(sk, nsk, skb))
goto reset;
if (opt_skb)
@@ -1657,7 +1638,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
} else
- sock_rps_save_rxhash(sk, skb->rxhash);
+ sock_rps_save_rxhash(sk, skb);
if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
goto reset;
@@ -1741,7 +1722,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->when = 0;
- TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(hdr);
+ TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0;
sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 29213b5..f4ca0a5 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -509,7 +509,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
int is_udplite = IS_UDPLITE(sk);
if (!ipv6_addr_any(&inet6_sk(sk)->daddr))
- sock_rps_save_rxhash(sk, skb->rxhash);
+ sock_rps_save_rxhash(sk, skb);
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto drop;
@@ -533,7 +533,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
}
}
- if (rcu_dereference_raw(sk->sk_filter)) {
+ if (rcu_access_pointer(sk->sk_filter)) {
if (udp_lib_checksum_complete(skb))
goto drop;
}
@@ -1090,8 +1090,8 @@ do_udp_sendmsg:
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(*opt);
- err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit,
- &tclass, &dontfrag);
+ err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+ &hlimit, &tclass, &dontfrag);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index e8d5f44..d14152e 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -50,7 +50,7 @@ static const struct net_device_ops irlan_eth_netdev_ops = {
.ndo_open = irlan_eth_open,
.ndo_stop = irlan_eth_close,
.ndo_start_xmit = irlan_eth_xmit,
- .ndo_set_multicast_list = irlan_eth_set_multicast_list,
+ .ndo_set_rx_mode = irlan_eth_set_multicast_list,
.ndo_change_mtu = eth_change_mtu,
.ndo_validate_addr = eth_validate_addr,
};
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c
index d0b70da..2615ffc 100644
--- a/net/irda/irsysctl.c
+++ b/net/irda/irsysctl.c
@@ -40,9 +40,9 @@ extern int sysctl_slot_timeout;
extern int sysctl_fast_poll_increase;
extern char sysctl_devname[];
extern int sysctl_max_baud_rate;
-extern int sysctl_min_tx_turn_time;
-extern int sysctl_max_tx_data_size;
-extern int sysctl_max_tx_window;
+extern unsigned int sysctl_min_tx_turn_time;
+extern unsigned int sysctl_max_tx_data_size;
+extern unsigned int sysctl_max_tx_window;
extern int sysctl_max_noreply_time;
extern int sysctl_warn_noreply_time;
extern int sysctl_lap_keepalive_time;
diff --git a/net/irda/qos.c b/net/irda/qos.c
index 1b51bcf..4369f7f 100644
--- a/net/irda/qos.c
+++ b/net/irda/qos.c
@@ -60,7 +60,7 @@ int sysctl_max_noreply_time = 12;
* Default is 10us which means using the unmodified value given by the
* peer except if it's 0 (0 is likely a bug in the other stack).
*/
-unsigned sysctl_min_tx_turn_time = 10;
+unsigned int sysctl_min_tx_turn_time = 10;
/*
* Maximum data size to be used in transmission in payload of LAP frame.
* There is a bit of confusion in the IrDA spec :
@@ -75,13 +75,13 @@ unsigned sysctl_min_tx_turn_time = 10;
* bytes frames or all negotiated frame sizes, but you can use the sysctl
* to play with this value anyway.
* Jean II */
-unsigned sysctl_max_tx_data_size = 2042;
+unsigned int sysctl_max_tx_data_size = 2042;
/*
* Maximum transmit window, i.e. number of LAP frames between turn-around.
* This allow to override what the peer told us. Some peers are buggy and
* don't always support what they tell us.
* Jean II */
-unsigned sysctl_max_tx_window = 7;
+unsigned int sysctl_max_tx_window = 7;
static int irlap_param_baud_rate(void *instance, irda_param_t *param, int get);
static int irlap_param_link_disconnect(void *instance, irda_param_t *parm,
diff --git a/net/iucv/Kconfig b/net/iucv/Kconfig
index 16ce9cd..497fbe7 100644
--- a/net/iucv/Kconfig
+++ b/net/iucv/Kconfig
@@ -1,15 +1,17 @@
config IUCV
- tristate "IUCV support (S390 - z/VM only)"
depends on S390
+ def_tristate y if S390
+ prompt "IUCV support (S390 - z/VM only)"
help
Select this option if you want to use inter-user communication
under VM or VIF. If you run on z/VM, say "Y" to enable a fast
communication link between VM guests.
config AFIUCV
- tristate "AF_IUCV support (S390 - z/VM only)"
- depends on IUCV
+ depends on S390
+ def_tristate m if QETH_L3 || IUCV
+ prompt "AF_IUCV Socket support (S390 - z/VM and HiperSockets transport)"
help
- Select this option if you want to use inter-user communication under
- VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast
- communication link between VM guests.
+ Select this option if you want to use AF_IUCV socket applications
+ based on z/VM inter-user communication vehicle or based on
+ HiperSockets.
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index e2013e4..c39f3a4 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -27,10 +27,9 @@
#include <asm/cpcmd.h>
#include <linux/kmod.h>
-#include <net/iucv/iucv.h>
#include <net/iucv/af_iucv.h>
-#define VERSION "1.1"
+#define VERSION "1.2"
static char iucv_userid[80];
@@ -42,6 +41,8 @@ static struct proto iucv_proto = {
.obj_size = sizeof(struct iucv_sock),
};
+static struct iucv_interface *pr_iucv;
+
/* special AF_IUCV IPRM messages */
static const u8 iprm_shutdown[8] =
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01};
@@ -90,6 +91,12 @@ do { \
static void iucv_sock_kill(struct sock *sk);
static void iucv_sock_close(struct sock *sk);
+static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev);
+static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
+ struct sk_buff *skb, u8 flags);
+static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify);
+
/* Call Back functions */
static void iucv_callback_rx(struct iucv_path *, struct iucv_message *);
static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *);
@@ -165,7 +172,7 @@ static int afiucv_pm_freeze(struct device *dev)
case IUCV_CLOSING:
case IUCV_CONNECTED:
if (iucv->path) {
- err = iucv_path_sever(iucv->path, NULL);
+ err = pr_iucv->path_sever(iucv->path, NULL);
iucv_path_free(iucv->path);
iucv->path = NULL;
}
@@ -229,7 +236,7 @@ static const struct dev_pm_ops afiucv_pm_ops = {
static struct device_driver af_iucv_driver = {
.owner = THIS_MODULE,
.name = "afiucv",
- .bus = &iucv_bus,
+ .bus = NULL,
.pm = &afiucv_pm_ops,
};
@@ -294,7 +301,11 @@ static inline int iucv_below_msglim(struct sock *sk)
if (sk->sk_state != IUCV_CONNECTED)
return 1;
- return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim);
+ if (iucv->transport == AF_IUCV_TRANS_IUCV)
+ return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim);
+ else
+ return ((atomic_read(&iucv->msg_sent) < iucv->msglimit_peer) &&
+ (atomic_read(&iucv->pendings) <= 0));
}
/**
@@ -312,6 +323,79 @@ static void iucv_sock_wake_msglim(struct sock *sk)
rcu_read_unlock();
}
+/**
+ * afiucv_hs_send() - send a message through HiperSockets transport
+ */
+static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
+ struct sk_buff *skb, u8 flags)
+{
+ struct net *net = sock_net(sock);
+ struct iucv_sock *iucv = iucv_sk(sock);
+ struct af_iucv_trans_hdr *phs_hdr;
+ struct sk_buff *nskb;
+ int err, confirm_recv = 0;
+
+ memset(skb->head, 0, ETH_HLEN);
+ phs_hdr = (struct af_iucv_trans_hdr *)skb_push(skb,
+ sizeof(struct af_iucv_trans_hdr));
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
+ skb_push(skb, ETH_HLEN);
+ skb_reset_mac_header(skb);
+ memset(phs_hdr, 0, sizeof(struct af_iucv_trans_hdr));
+
+ phs_hdr->magic = ETH_P_AF_IUCV;
+ phs_hdr->version = 1;
+ phs_hdr->flags = flags;
+ if (flags == AF_IUCV_FLAG_SYN)
+ phs_hdr->window = iucv->msglimit;
+ else if ((flags == AF_IUCV_FLAG_WIN) || !flags) {
+ confirm_recv = atomic_read(&iucv->msg_recv);
+ phs_hdr->window = confirm_recv;
+ if (confirm_recv)
+ phs_hdr->flags = phs_hdr->flags | AF_IUCV_FLAG_WIN;
+ }
+ memcpy(phs_hdr->destUserID, iucv->dst_user_id, 8);
+ memcpy(phs_hdr->destAppName, iucv->dst_name, 8);
+ memcpy(phs_hdr->srcUserID, iucv->src_user_id, 8);
+ memcpy(phs_hdr->srcAppName, iucv->src_name, 8);
+ ASCEBC(phs_hdr->destUserID, sizeof(phs_hdr->destUserID));
+ ASCEBC(phs_hdr->destAppName, sizeof(phs_hdr->destAppName));
+ ASCEBC(phs_hdr->srcUserID, sizeof(phs_hdr->srcUserID));
+ ASCEBC(phs_hdr->srcAppName, sizeof(phs_hdr->srcAppName));
+ if (imsg)
+ memcpy(&phs_hdr->iucv_hdr, imsg, sizeof(struct iucv_message));
+
+ rcu_read_lock();
+ skb->dev = dev_get_by_index_rcu(net, sock->sk_bound_dev_if);
+ rcu_read_unlock();
+ if (!skb->dev)
+ return -ENODEV;
+ if (!(skb->dev->flags & IFF_UP))
+ return -ENETDOWN;
+ if (skb->len > skb->dev->mtu) {
+ if (sock->sk_type == SOCK_SEQPACKET)
+ return -EMSGSIZE;
+ else
+ skb_trim(skb, skb->dev->mtu);
+ }
+ skb->protocol = ETH_P_AF_IUCV;
+ skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF;
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return -ENOMEM;
+ skb_queue_tail(&iucv->send_skb_q, nskb);
+ err = dev_queue_xmit(skb);
+ if (err) {
+ skb_unlink(nskb, &iucv->send_skb_q);
+ kfree_skb(nskb);
+ } else {
+ atomic_sub(confirm_recv, &iucv->msg_recv);
+ WARN_ON(atomic_read(&iucv->msg_recv) < 0);
+ }
+ return err;
+}
+
/* Timers */
static void iucv_sock_timeout(unsigned long arg)
{
@@ -380,6 +464,8 @@ static void iucv_sock_close(struct sock *sk)
unsigned char user_data[16];
struct iucv_sock *iucv = iucv_sk(sk);
unsigned long timeo;
+ int err, blen;
+ struct sk_buff *skb;
iucv_sock_clear_timer(sk);
lock_sock(sk);
@@ -390,6 +476,20 @@ static void iucv_sock_close(struct sock *sk)
break;
case IUCV_CONNECTED:
+ if (iucv->transport == AF_IUCV_TRANS_HIPER) {
+ /* send fin */
+ blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
+ skb = sock_alloc_send_skb(sk, blen, 1, &err);
+ if (skb) {
+ skb_reserve(skb,
+ sizeof(struct af_iucv_trans_hdr) +
+ ETH_HLEN);
+ err = afiucv_hs_send(NULL, sk, skb,
+ AF_IUCV_FLAG_FIN);
+ }
+ sk->sk_state = IUCV_DISCONN;
+ sk->sk_state_change(sk);
+ }
case IUCV_DISCONN:
sk->sk_state = IUCV_CLOSING;
sk->sk_state_change(sk);
@@ -412,7 +512,7 @@ static void iucv_sock_close(struct sock *sk)
low_nmcpy(user_data, iucv->src_name);
high_nmcpy(user_data, iucv->dst_name);
ASCEBC(user_data, sizeof(user_data));
- iucv_path_sever(iucv->path, user_data);
+ pr_iucv->path_sever(iucv->path, user_data);
iucv_path_free(iucv->path);
iucv->path = NULL;
}
@@ -444,23 +544,33 @@ static void iucv_sock_init(struct sock *sk, struct sock *parent)
static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
{
struct sock *sk;
+ struct iucv_sock *iucv;
sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto);
if (!sk)
return NULL;
+ iucv = iucv_sk(sk);
sock_init_data(sock, sk);
- INIT_LIST_HEAD(&iucv_sk(sk)->accept_q);
- spin_lock_init(&iucv_sk(sk)->accept_q_lock);
- skb_queue_head_init(&iucv_sk(sk)->send_skb_q);
- INIT_LIST_HEAD(&iucv_sk(sk)->message_q.list);
- spin_lock_init(&iucv_sk(sk)->message_q.lock);
- skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q);
- iucv_sk(sk)->send_tag = 0;
- iucv_sk(sk)->flags = 0;
- iucv_sk(sk)->msglimit = IUCV_QUEUELEN_DEFAULT;
- iucv_sk(sk)->path = NULL;
- memset(&iucv_sk(sk)->src_user_id , 0, 32);
+ INIT_LIST_HEAD(&iucv->accept_q);
+ spin_lock_init(&iucv->accept_q_lock);
+ skb_queue_head_init(&iucv->send_skb_q);
+ INIT_LIST_HEAD(&iucv->message_q.list);
+ spin_lock_init(&iucv->message_q.lock);
+ skb_queue_head_init(&iucv->backlog_skb_q);
+ iucv->send_tag = 0;
+ atomic_set(&iucv->pendings, 0);
+ iucv->flags = 0;
+ iucv->msglimit = 0;
+ atomic_set(&iucv->msg_sent, 0);
+ atomic_set(&iucv->msg_recv, 0);
+ iucv->path = NULL;
+ iucv->sk_txnotify = afiucv_hs_callback_txnotify;
+ memset(&iucv->src_user_id , 0, 32);
+ if (pr_iucv)
+ iucv->transport = AF_IUCV_TRANS_IUCV;
+ else
+ iucv->transport = AF_IUCV_TRANS_HIPER;
sk->sk_destruct = iucv_sock_destruct;
sk->sk_sndtimeo = IUCV_CONN_TIMEOUT;
@@ -591,7 +701,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr;
struct sock *sk = sock->sk;
struct iucv_sock *iucv;
- int err;
+ int err = 0;
+ struct net_device *dev;
+ char uid[9];
/* Verify the input sockaddr */
if (!addr || addr->sa_family != AF_IUCV)
@@ -610,19 +722,46 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
err = -EADDRINUSE;
goto done_unlock;
}
- if (iucv->path) {
- err = 0;
+ if (iucv->path)
goto done_unlock;
- }
/* Bind the socket */
- memcpy(iucv->src_name, sa->siucv_name, 8);
- /* Copy the user id */
- memcpy(iucv->src_user_id, iucv_userid, 8);
- sk->sk_state = IUCV_BOUND;
- err = 0;
+ if (pr_iucv)
+ if (!memcmp(sa->siucv_user_id, iucv_userid, 8))
+ goto vm_bind; /* VM IUCV transport */
+ /* try hiper transport */
+ memcpy(uid, sa->siucv_user_id, sizeof(uid));
+ ASCEBC(uid, 8);
+ rcu_read_lock();
+ for_each_netdev_rcu(&init_net, dev) {
+ if (!memcmp(dev->perm_addr, uid, 8)) {
+ memcpy(iucv->src_name, sa->siucv_name, 8);
+ memcpy(iucv->src_user_id, sa->siucv_user_id, 8);
+ sock->sk->sk_bound_dev_if = dev->ifindex;
+ sk->sk_state = IUCV_BOUND;
+ iucv->transport = AF_IUCV_TRANS_HIPER;
+ if (!iucv->msglimit)
+ iucv->msglimit = IUCV_HIPER_MSGLIM_DEFAULT;
+ rcu_read_unlock();
+ goto done_unlock;
+ }
+ }
+ rcu_read_unlock();
+vm_bind:
+ if (pr_iucv) {
+ /* use local userid for backward compat */
+ memcpy(iucv->src_name, sa->siucv_name, 8);
+ memcpy(iucv->src_user_id, iucv_userid, 8);
+ sk->sk_state = IUCV_BOUND;
+ iucv->transport = AF_IUCV_TRANS_IUCV;
+ if (!iucv->msglimit)
+ iucv->msglimit = IUCV_QUEUELEN_DEFAULT;
+ goto done_unlock;
+ }
+ /* found no dev to bind */
+ err = -ENODEV;
done_unlock:
/* Release the socket list lock */
write_unlock_bh(&iucv_sk_list.lock);
@@ -658,45 +797,44 @@ static int iucv_sock_autobind(struct sock *sk)
memcpy(&iucv->src_name, name, 8);
+ if (!iucv->msglimit)
+ iucv->msglimit = IUCV_QUEUELEN_DEFAULT;
+
return err;
}
-/* Connect an unconnected socket */
-static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
- int alen, int flags)
+static int afiucv_hs_connect(struct socket *sock)
{
- struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr;
struct sock *sk = sock->sk;
- struct iucv_sock *iucv;
- unsigned char user_data[16];
- int err;
-
- if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv))
- return -EINVAL;
-
- if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND)
- return -EBADFD;
-
- if (sk->sk_type != SOCK_STREAM && sk->sk_type != SOCK_SEQPACKET)
- return -EINVAL;
+ struct sk_buff *skb;
+ int blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
+ int err = 0;
- if (sk->sk_state == IUCV_OPEN) {
- err = iucv_sock_autobind(sk);
- if (unlikely(err))
- return err;
+ /* send syn */
+ skb = sock_alloc_send_skb(sk, blen, 1, &err);
+ if (!skb) {
+ err = -ENOMEM;
+ goto done;
}
+ skb->dev = NULL;
+ skb_reserve(skb, blen);
+ err = afiucv_hs_send(NULL, sk, skb, AF_IUCV_FLAG_SYN);
+done:
+ return err;
+}
- lock_sock(sk);
-
- /* Set the destination information */
- memcpy(iucv_sk(sk)->dst_user_id, sa->siucv_user_id, 8);
- memcpy(iucv_sk(sk)->dst_name, sa->siucv_name, 8);
+static int afiucv_path_connect(struct socket *sock, struct sockaddr *addr)
+{
+ struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr;
+ struct sock *sk = sock->sk;
+ struct iucv_sock *iucv = iucv_sk(sk);
+ unsigned char user_data[16];
+ int err;
high_nmcpy(user_data, sa->siucv_name);
- low_nmcpy(user_data, iucv_sk(sk)->src_name);
+ low_nmcpy(user_data, iucv->src_name);
ASCEBC(user_data, sizeof(user_data));
- iucv = iucv_sk(sk);
/* Create path. */
iucv->path = iucv_path_alloc(iucv->msglimit,
IUCV_IPRMDATA, GFP_KERNEL);
@@ -704,8 +842,9 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
err = -ENOMEM;
goto done;
}
- err = iucv_path_connect(iucv->path, &af_iucv_handler,
- sa->siucv_user_id, NULL, user_data, sk);
+ err = pr_iucv->path_connect(iucv->path, &af_iucv_handler,
+ sa->siucv_user_id, NULL, user_data,
+ sk);
if (err) {
iucv_path_free(iucv->path);
iucv->path = NULL;
@@ -724,21 +863,62 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
err = -ECONNREFUSED;
break;
}
- goto done;
}
+done:
+ return err;
+}
- if (sk->sk_state != IUCV_CONNECTED) {
+/* Connect an unconnected socket */
+static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
+ int alen, int flags)
+{
+ struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr;
+ struct sock *sk = sock->sk;
+ struct iucv_sock *iucv = iucv_sk(sk);
+ int err;
+
+ if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv))
+ return -EINVAL;
+
+ if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND)
+ return -EBADFD;
+
+ if (sk->sk_state == IUCV_OPEN &&
+ iucv->transport == AF_IUCV_TRANS_HIPER)
+ return -EBADFD; /* explicit bind required */
+
+ if (sk->sk_type != SOCK_STREAM && sk->sk_type != SOCK_SEQPACKET)
+ return -EINVAL;
+
+ if (sk->sk_state == IUCV_OPEN) {
+ err = iucv_sock_autobind(sk);
+ if (unlikely(err))
+ return err;
+ }
+
+ lock_sock(sk);
+
+ /* Set the destination information */
+ memcpy(iucv->dst_user_id, sa->siucv_user_id, 8);
+ memcpy(iucv->dst_name, sa->siucv_name, 8);
+
+ if (iucv->transport == AF_IUCV_TRANS_HIPER)
+ err = afiucv_hs_connect(sock);
+ else
+ err = afiucv_path_connect(sock, addr);
+ if (err)
+ goto done;
+
+ if (sk->sk_state != IUCV_CONNECTED)
err = iucv_sock_wait(sk, iucv_sock_in_state(sk, IUCV_CONNECTED,
IUCV_DISCONN),
sock_sndtimeo(sk, flags & O_NONBLOCK));
- }
- if (sk->sk_state == IUCV_DISCONN) {
+ if (sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_CLOSED)
err = -ECONNREFUSED;
- }
- if (err) {
- iucv_path_sever(iucv->path, NULL);
+ if (err && iucv->transport == AF_IUCV_TRANS_IUCV) {
+ pr_iucv->path_sever(iucv->path, NULL);
iucv_path_free(iucv->path);
iucv->path = NULL;
}
@@ -833,20 +1013,21 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
{
struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr;
struct sock *sk = sock->sk;
+ struct iucv_sock *iucv = iucv_sk(sk);
addr->sa_family = AF_IUCV;
*len = sizeof(struct sockaddr_iucv);
if (peer) {
- memcpy(siucv->siucv_user_id, iucv_sk(sk)->dst_user_id, 8);
- memcpy(siucv->siucv_name, &iucv_sk(sk)->dst_name, 8);
+ memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8);
+ memcpy(siucv->siucv_name, iucv->dst_name, 8);
} else {
- memcpy(siucv->siucv_user_id, iucv_sk(sk)->src_user_id, 8);
- memcpy(siucv->siucv_name, iucv_sk(sk)->src_name, 8);
+ memcpy(siucv->siucv_user_id, iucv->src_user_id, 8);
+ memcpy(siucv->siucv_name, iucv->src_name, 8);
}
memset(&siucv->siucv_port, 0, sizeof(siucv->siucv_port));
memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr));
- memset(siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid));
+ memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid));
return 0;
}
@@ -871,7 +1052,7 @@ static int iucv_send_iprm(struct iucv_path *path, struct iucv_message *msg,
memcpy(prmdata, (void *) skb->data, skb->len);
prmdata[7] = 0xff - (u8) skb->len;
- return iucv_message_send(path, msg, IUCV_IPRMDATA, 0,
+ return pr_iucv->message_send(path, msg, IUCV_IPRMDATA, 0,
(void *) prmdata, 8);
}
@@ -960,9 +1141,16 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
* this is fine for SOCK_SEQPACKET (unless we want to support
* segmented records using the MSG_EOR flag), but
* for SOCK_STREAM we might want to improve it in future */
- skb = sock_alloc_send_skb(sk, len, noblock, &err);
+ if (iucv->transport == AF_IUCV_TRANS_HIPER)
+ skb = sock_alloc_send_skb(sk,
+ len + sizeof(struct af_iucv_trans_hdr) + ETH_HLEN,
+ noblock, &err);
+ else
+ skb = sock_alloc_send_skb(sk, len, noblock, &err);
if (!skb)
goto out;
+ if (iucv->transport == AF_IUCV_TRANS_HIPER)
+ skb_reserve(skb, sizeof(struct af_iucv_trans_hdr) + ETH_HLEN);
if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
err = -EFAULT;
goto fail;
@@ -983,6 +1171,15 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
/* increment and save iucv message tag for msg_completion cbk */
txmsg.tag = iucv->send_tag++;
memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN);
+ if (iucv->transport == AF_IUCV_TRANS_HIPER) {
+ atomic_inc(&iucv->msg_sent);
+ err = afiucv_hs_send(&txmsg, sk, skb, 0);
+ if (err) {
+ atomic_dec(&iucv->msg_sent);
+ goto fail;
+ }
+ goto release;
+ }
skb_queue_tail(&iucv->send_skb_q, skb);
if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags)
@@ -999,13 +1196,13 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
/* this error should never happen since the
* IUCV_IPRMDATA path flag is set... sever path */
if (err == 0x15) {
- iucv_path_sever(iucv->path, NULL);
+ pr_iucv->path_sever(iucv->path, NULL);
skb_unlink(skb, &iucv->send_skb_q);
err = -EPIPE;
goto fail;
}
} else
- err = iucv_message_send(iucv->path, &txmsg, 0, 0,
+ err = pr_iucv->message_send(iucv->path, &txmsg, 0, 0,
(void *) skb->data, skb->len);
if (err) {
if (err == 3) {
@@ -1023,6 +1220,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
goto fail;
}
+release:
release_sock(sk);
return len;
@@ -1095,8 +1293,9 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
skb->len = 0;
}
} else {
- rc = iucv_message_receive(path, msg, msg->flags & IUCV_IPRMDATA,
- skb->data, len, NULL);
+ rc = pr_iucv->message_receive(path, msg,
+ msg->flags & IUCV_IPRMDATA,
+ skb->data, len, NULL);
if (rc) {
kfree_skb(skb);
return;
@@ -1110,7 +1309,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
kfree_skb(skb);
skb = NULL;
if (rc) {
- iucv_path_sever(path, NULL);
+ pr_iucv->path_sever(path, NULL);
return;
}
skb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
@@ -1154,7 +1353,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
struct sock *sk = sock->sk;
struct iucv_sock *iucv = iucv_sk(sk);
unsigned int copied, rlen;
- struct sk_buff *skb, *rskb, *cskb;
+ struct sk_buff *skb, *rskb, *cskb, *sskb;
+ int blen;
int err = 0;
if ((sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) &&
@@ -1179,7 +1379,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
copied = min_t(unsigned int, rlen, len);
cskb = skb;
- if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) {
+ if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) {
if (!(flags & MSG_PEEK))
skb_queue_head(&sk->sk_receive_queue, skb);
return -EFAULT;
@@ -1217,6 +1417,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
}
kfree_skb(skb);
+ atomic_inc(&iucv->msg_recv);
/* Queue backlog skbs */
spin_lock_bh(&iucv->message_q.lock);
@@ -1233,6 +1434,24 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
if (skb_queue_empty(&iucv->backlog_skb_q)) {
if (!list_empty(&iucv->message_q.list))
iucv_process_message_q(sk);
+ if (atomic_read(&iucv->msg_recv) >=
+ iucv->msglimit / 2) {
+ /* send WIN to peer */
+ blen = sizeof(struct af_iucv_trans_hdr) +
+ ETH_HLEN;
+ sskb = sock_alloc_send_skb(sk, blen, 1, &err);
+ if (sskb) {
+ skb_reserve(sskb,
+ sizeof(struct af_iucv_trans_hdr)
+ + ETH_HLEN);
+ err = afiucv_hs_send(NULL, sk, sskb,
+ AF_IUCV_FLAG_WIN);
+ }
+ if (err) {
+ sk->sk_state = IUCV_DISCONN;
+ sk->sk_state_change(sk);
+ }
+ }
}
spin_unlock_bh(&iucv->message_q.lock);
}
@@ -1327,8 +1546,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) {
txmsg.class = 0;
txmsg.tag = 0;
- err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0,
- (void *) iprm_shutdown, 8);
+ err = pr_iucv->message_send(iucv->path, &txmsg, IUCV_IPRMDATA,
+ 0, (void *) iprm_shutdown, 8);
if (err) {
switch (err) {
case 1:
@@ -1345,7 +1564,7 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
}
if (how == RCV_SHUTDOWN || how == SHUTDOWN_MASK) {
- err = iucv_path_quiesce(iucv_sk(sk)->path, NULL);
+ err = pr_iucv->path_quiesce(iucv->path, NULL);
if (err)
err = -ENOTCONN;
@@ -1372,7 +1591,7 @@ static int iucv_sock_release(struct socket *sock)
/* Unregister with IUCV base support */
if (iucv_sk(sk)->path) {
- iucv_path_sever(iucv_sk(sk)->path, NULL);
+ pr_iucv->path_sever(iucv_sk(sk)->path, NULL);
iucv_path_free(iucv_sk(sk)->path);
iucv_sk(sk)->path = NULL;
}
@@ -1514,14 +1733,14 @@ static int iucv_callback_connreq(struct iucv_path *path,
high_nmcpy(user_data, iucv->dst_name);
ASCEBC(user_data, sizeof(user_data));
if (sk->sk_state != IUCV_LISTEN) {
- err = iucv_path_sever(path, user_data);
+ err = pr_iucv->path_sever(path, user_data);
iucv_path_free(path);
goto fail;
}
/* Check for backlog size */
if (sk_acceptq_is_full(sk)) {
- err = iucv_path_sever(path, user_data);
+ err = pr_iucv->path_sever(path, user_data);
iucv_path_free(path);
goto fail;
}
@@ -1529,7 +1748,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
/* Create the new socket */
nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC);
if (!nsk) {
- err = iucv_path_sever(path, user_data);
+ err = pr_iucv->path_sever(path, user_data);
iucv_path_free(path);
goto fail;
}
@@ -1553,9 +1772,9 @@ static int iucv_callback_connreq(struct iucv_path *path,
/* set message limit for path based on msglimit of accepting socket */
niucv->msglimit = iucv->msglimit;
path->msglim = iucv->msglimit;
- err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk);
+ err = pr_iucv->path_accept(path, &af_iucv_handler, nuser_data, nsk);
if (err) {
- err = iucv_path_sever(path, user_data);
+ err = pr_iucv->path_sever(path, user_data);
iucv_path_free(path);
iucv_sock_kill(nsk);
goto fail;
@@ -1589,7 +1808,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
int len;
if (sk->sk_shutdown & RCV_SHUTDOWN) {
- iucv_message_reject(path, msg);
+ pr_iucv->message_reject(path, msg);
return;
}
@@ -1692,6 +1911,389 @@ static void iucv_callback_shutdown(struct iucv_path *path, u8 ipuser[16])
bh_unlock_sock(sk);
}
+/***************** HiperSockets transport callbacks ********************/
+static void afiucv_swap_src_dest(struct sk_buff *skb)
+{
+ struct af_iucv_trans_hdr *trans_hdr =
+ (struct af_iucv_trans_hdr *)skb->data;
+ char tmpID[8];
+ char tmpName[8];
+
+ ASCEBC(trans_hdr->destUserID, sizeof(trans_hdr->destUserID));
+ ASCEBC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName));
+ ASCEBC(trans_hdr->srcUserID, sizeof(trans_hdr->srcUserID));
+ ASCEBC(trans_hdr->srcAppName, sizeof(trans_hdr->srcAppName));
+ memcpy(tmpID, trans_hdr->srcUserID, 8);
+ memcpy(tmpName, trans_hdr->srcAppName, 8);
+ memcpy(trans_hdr->srcUserID, trans_hdr->destUserID, 8);
+ memcpy(trans_hdr->srcAppName, trans_hdr->destAppName, 8);
+ memcpy(trans_hdr->destUserID, tmpID, 8);
+ memcpy(trans_hdr->destAppName, tmpName, 8);
+ skb_push(skb, ETH_HLEN);
+ memset(skb->data, 0, ETH_HLEN);
+}
+
+/**
+ * afiucv_hs_callback_syn - react on received SYN
+ **/
+static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb)
+{
+ struct sock *nsk;
+ struct iucv_sock *iucv, *niucv;
+ struct af_iucv_trans_hdr *trans_hdr;
+ int err;
+
+ iucv = iucv_sk(sk);
+ trans_hdr = (struct af_iucv_trans_hdr *)skb->data;
+ if (!iucv) {
+ /* no sock - connection refused */
+ afiucv_swap_src_dest(skb);
+ trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN;
+ err = dev_queue_xmit(skb);
+ goto out;
+ }
+
+ nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC);
+ bh_lock_sock(sk);
+ if ((sk->sk_state != IUCV_LISTEN) ||
+ sk_acceptq_is_full(sk) ||
+ !nsk) {
+ /* error on server socket - connection refused */
+ if (nsk)
+ sk_free(nsk);
+ afiucv_swap_src_dest(skb);
+ trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN;
+ err = dev_queue_xmit(skb);
+ bh_unlock_sock(sk);
+ goto out;
+ }
+
+ niucv = iucv_sk(nsk);
+ iucv_sock_init(nsk, sk);
+ niucv->transport = AF_IUCV_TRANS_HIPER;
+ niucv->msglimit = iucv->msglimit;
+ if (!trans_hdr->window)
+ niucv->msglimit_peer = IUCV_HIPER_MSGLIM_DEFAULT;
+ else
+ niucv->msglimit_peer = trans_hdr->window;
+ memcpy(niucv->dst_name, trans_hdr->srcAppName, 8);
+ memcpy(niucv->dst_user_id, trans_hdr->srcUserID, 8);
+ memcpy(niucv->src_name, iucv->src_name, 8);
+ memcpy(niucv->src_user_id, iucv->src_user_id, 8);
+ nsk->sk_bound_dev_if = sk->sk_bound_dev_if;
+ afiucv_swap_src_dest(skb);
+ trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_ACK;
+ trans_hdr->window = niucv->msglimit;
+ /* if receiver acks the xmit connection is established */
+ err = dev_queue_xmit(skb);
+ if (!err) {
+ iucv_accept_enqueue(sk, nsk);
+ nsk->sk_state = IUCV_CONNECTED;
+ sk->sk_data_ready(sk, 1);
+ } else
+ iucv_sock_kill(nsk);
+ bh_unlock_sock(sk);
+
+out:
+ return NET_RX_SUCCESS;
+}
+
+/**
+ * afiucv_hs_callback_synack() - react on received SYN-ACK
+ **/
+static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb)
+{
+ struct iucv_sock *iucv = iucv_sk(sk);
+ struct af_iucv_trans_hdr *trans_hdr =
+ (struct af_iucv_trans_hdr *)skb->data;
+
+ if (!iucv)
+ goto out;
+ if (sk->sk_state != IUCV_BOUND)
+ goto out;
+ bh_lock_sock(sk);
+ iucv->msglimit_peer = trans_hdr->window;
+ sk->sk_state = IUCV_CONNECTED;
+ sk->sk_state_change(sk);
+ bh_unlock_sock(sk);
+out:
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+}
+
+/**
+ * afiucv_hs_callback_synfin() - react on received SYN_FIN
+ **/
+static int afiucv_hs_callback_synfin(struct sock *sk, struct sk_buff *skb)
+{
+ struct iucv_sock *iucv = iucv_sk(sk);
+
+ if (!iucv)
+ goto out;
+ if (sk->sk_state != IUCV_BOUND)
+ goto out;
+ bh_lock_sock(sk);
+ sk->sk_state = IUCV_DISCONN;
+ sk->sk_state_change(sk);
+ bh_unlock_sock(sk);
+out:
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+}
+
+/**
+ * afiucv_hs_callback_fin() - react on received FIN
+ **/
+static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb)
+{
+ struct iucv_sock *iucv = iucv_sk(sk);
+
+ /* other end of connection closed */
+ if (iucv) {
+ bh_lock_sock(sk);
+ if (!list_empty(&iucv->accept_q))
+ sk->sk_state = IUCV_SEVERED;
+ else
+ sk->sk_state = IUCV_DISCONN;
+ sk->sk_state_change(sk);
+ bh_unlock_sock(sk);
+ }
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+}
+
+/**
+ * afiucv_hs_callback_win() - react on received WIN
+ **/
+static int afiucv_hs_callback_win(struct sock *sk, struct sk_buff *skb)
+{
+ struct iucv_sock *iucv = iucv_sk(sk);
+ struct af_iucv_trans_hdr *trans_hdr =
+ (struct af_iucv_trans_hdr *)skb->data;
+
+ if (!iucv)
+ return NET_RX_SUCCESS;
+
+ if (sk->sk_state != IUCV_CONNECTED)
+ return NET_RX_SUCCESS;
+
+ atomic_sub(trans_hdr->window, &iucv->msg_sent);
+ iucv_sock_wake_msglim(sk);
+ return NET_RX_SUCCESS;
+}
+
+/**
+ * afiucv_hs_callback_rx() - react on received data
+ **/
+static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb)
+{
+ struct iucv_sock *iucv = iucv_sk(sk);
+
+ if (!iucv) {
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
+ if (sk->sk_state != IUCV_CONNECTED) {
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
+ /* write stuff from iucv_msg to skb cb */
+ if (skb->len <= sizeof(struct af_iucv_trans_hdr)) {
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+ skb_pull(skb, sizeof(struct af_iucv_trans_hdr));
+ skb_reset_transport_header(skb);
+ skb_reset_network_header(skb);
+ spin_lock(&iucv->message_q.lock);
+ if (skb_queue_empty(&iucv->backlog_skb_q)) {
+ if (sock_queue_rcv_skb(sk, skb)) {
+ /* handle rcv queue full */
+ skb_queue_tail(&iucv->backlog_skb_q, skb);
+ }
+ } else
+ skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
+ spin_unlock(&iucv->message_q.lock);
+ return NET_RX_SUCCESS;
+}
+
+/**
+ * afiucv_hs_rcv() - base function for arriving data through HiperSockets
+ * transport
+ * called from netif RX softirq
+ **/
+static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+{
+ struct hlist_node *node;
+ struct sock *sk;
+ struct iucv_sock *iucv;
+ struct af_iucv_trans_hdr *trans_hdr;
+ char nullstring[8];
+ int err = 0;
+
+ skb_pull(skb, ETH_HLEN);
+ trans_hdr = (struct af_iucv_trans_hdr *)skb->data;
+ EBCASC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName));
+ EBCASC(trans_hdr->destUserID, sizeof(trans_hdr->destUserID));
+ EBCASC(trans_hdr->srcAppName, sizeof(trans_hdr->srcAppName));
+ EBCASC(trans_hdr->srcUserID, sizeof(trans_hdr->srcUserID));
+ memset(nullstring, 0, sizeof(nullstring));
+ iucv = NULL;
+ sk = NULL;
+ read_lock(&iucv_sk_list.lock);
+ sk_for_each(sk, node, &iucv_sk_list.head) {
+ if (trans_hdr->flags == AF_IUCV_FLAG_SYN) {
+ if ((!memcmp(&iucv_sk(sk)->src_name,
+ trans_hdr->destAppName, 8)) &&
+ (!memcmp(&iucv_sk(sk)->src_user_id,
+ trans_hdr->destUserID, 8)) &&
+ (!memcmp(&iucv_sk(sk)->dst_name, nullstring, 8)) &&
+ (!memcmp(&iucv_sk(sk)->dst_user_id,
+ nullstring, 8))) {
+ iucv = iucv_sk(sk);
+ break;
+ }
+ } else {
+ if ((!memcmp(&iucv_sk(sk)->src_name,
+ trans_hdr->destAppName, 8)) &&
+ (!memcmp(&iucv_sk(sk)->src_user_id,
+ trans_hdr->destUserID, 8)) &&
+ (!memcmp(&iucv_sk(sk)->dst_name,
+ trans_hdr->srcAppName, 8)) &&
+ (!memcmp(&iucv_sk(sk)->dst_user_id,
+ trans_hdr->srcUserID, 8))) {
+ iucv = iucv_sk(sk);
+ break;
+ }
+ }
+ }
+ read_unlock(&iucv_sk_list.lock);
+ if (!iucv)
+ sk = NULL;
+
+ /* no sock
+ how should we send with no sock
+ 1) send without sock no send rc checking?
+ 2) introduce default sock to handle this cases
+
+ SYN -> send SYN|ACK in good case, send SYN|FIN in bad case
+ data -> send FIN
+ SYN|ACK, SYN|FIN, FIN -> no action? */
+
+ switch (trans_hdr->flags) {
+ case AF_IUCV_FLAG_SYN:
+ /* connect request */
+ err = afiucv_hs_callback_syn(sk, skb);
+ break;
+ case (AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_ACK):
+ /* connect request confirmed */
+ err = afiucv_hs_callback_synack(sk, skb);
+ break;
+ case (AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN):
+ /* connect request refused */
+ err = afiucv_hs_callback_synfin(sk, skb);
+ break;
+ case (AF_IUCV_FLAG_FIN):
+ /* close request */
+ err = afiucv_hs_callback_fin(sk, skb);
+ break;
+ case (AF_IUCV_FLAG_WIN):
+ err = afiucv_hs_callback_win(sk, skb);
+ if (skb->len > sizeof(struct af_iucv_trans_hdr))
+ err = afiucv_hs_callback_rx(sk, skb);
+ else
+ kfree(skb);
+ break;
+ case 0:
+ /* plain data frame */
+ err = afiucv_hs_callback_rx(sk, skb);
+ break;
+ default:
+ ;
+ }
+
+ return err;
+}
+
+/**
+ * afiucv_hs_callback_txnotify() - handle send notifcations from HiperSockets
+ * transport
+ **/
+static void afiucv_hs_callback_txnotify(struct sk_buff *skb,
+ enum iucv_tx_notify n)
+{
+ struct sock *isk = skb->sk;
+ struct sock *sk = NULL;
+ struct iucv_sock *iucv = NULL;
+ struct sk_buff_head *list;
+ struct sk_buff *list_skb;
+ struct sk_buff *this = NULL;
+ unsigned long flags;
+ struct hlist_node *node;
+
+ read_lock(&iucv_sk_list.lock);
+ sk_for_each(sk, node, &iucv_sk_list.head)
+ if (sk == isk) {
+ iucv = iucv_sk(sk);
+ break;
+ }
+ read_unlock(&iucv_sk_list.lock);
+
+ if (!iucv)
+ return;
+
+ bh_lock_sock(sk);
+ list = &iucv->send_skb_q;
+ list_skb = list->next;
+ if (skb_queue_empty(list))
+ goto out_unlock;
+
+ spin_lock_irqsave(&list->lock, flags);
+ while (list_skb != (struct sk_buff *)list) {
+ if (skb_shinfo(list_skb) == skb_shinfo(skb)) {
+ this = list_skb;
+ switch (n) {
+ case TX_NOTIFY_OK:
+ __skb_unlink(this, list);
+ iucv_sock_wake_msglim(sk);
+ kfree_skb(this);
+ break;
+ case TX_NOTIFY_PENDING:
+ atomic_inc(&iucv->pendings);
+ break;
+ case TX_NOTIFY_DELAYED_OK:
+ __skb_unlink(this, list);
+ atomic_dec(&iucv->pendings);
+ if (atomic_read(&iucv->pendings) <= 0)
+ iucv_sock_wake_msglim(sk);
+ kfree_skb(this);
+ break;
+ case TX_NOTIFY_UNREACHABLE:
+ case TX_NOTIFY_DELAYED_UNREACHABLE:
+ case TX_NOTIFY_TPQFULL: /* not yet used */
+ case TX_NOTIFY_GENERALERROR:
+ case TX_NOTIFY_DELAYED_GENERALERROR:
+ __skb_unlink(this, list);
+ kfree_skb(this);
+ if (!list_empty(&iucv->accept_q))
+ sk->sk_state = IUCV_SEVERED;
+ else
+ sk->sk_state = IUCV_DISCONN;
+ sk->sk_state_change(sk);
+ break;
+ }
+ break;
+ }
+ list_skb = list_skb->next;
+ }
+ spin_unlock_irqrestore(&list->lock, flags);
+
+out_unlock:
+ bh_unlock_sock(sk);
+}
static const struct proto_ops iucv_sock_ops = {
.family = PF_IUCV,
.owner = THIS_MODULE,
@@ -1718,71 +2320,104 @@ static const struct net_proto_family iucv_sock_family_ops = {
.create = iucv_sock_create,
};
-static int __init afiucv_init(void)
+static struct packet_type iucv_packet_type = {
+ .type = cpu_to_be16(ETH_P_AF_IUCV),
+ .func = afiucv_hs_rcv,
+};
+
+static int afiucv_iucv_init(void)
{
int err;
- if (!MACHINE_IS_VM) {
- pr_err("The af_iucv module cannot be loaded"
- " without z/VM\n");
- err = -EPROTONOSUPPORT;
- goto out;
- }
- cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err);
- if (unlikely(err)) {
- WARN_ON(err);
- err = -EPROTONOSUPPORT;
- goto out;
- }
-
- err = iucv_register(&af_iucv_handler, 0);
+ err = pr_iucv->iucv_register(&af_iucv_handler, 0);
if (err)
goto out;
- err = proto_register(&iucv_proto, 0);
- if (err)
- goto out_iucv;
- err = sock_register(&iucv_sock_family_ops);
- if (err)
- goto out_proto;
/* establish dummy device */
+ af_iucv_driver.bus = pr_iucv->bus;
err = driver_register(&af_iucv_driver);
if (err)
- goto out_sock;
+ goto out_iucv;
af_iucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL);
if (!af_iucv_dev) {
err = -ENOMEM;
goto out_driver;
}
dev_set_name(af_iucv_dev, "af_iucv");
- af_iucv_dev->bus = &iucv_bus;
- af_iucv_dev->parent = iucv_root;
+ af_iucv_dev->bus = pr_iucv->bus;
+ af_iucv_dev->parent = pr_iucv->root;
af_iucv_dev->release = (void (*)(struct device *))kfree;
af_iucv_dev->driver = &af_iucv_driver;
err = device_register(af_iucv_dev);
if (err)
goto out_driver;
-
return 0;
out_driver:
driver_unregister(&af_iucv_driver);
+out_iucv:
+ pr_iucv->iucv_unregister(&af_iucv_handler, 0);
+out:
+ return err;
+}
+
+static int __init afiucv_init(void)
+{
+ int err;
+
+ if (MACHINE_IS_VM) {
+ cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err);
+ if (unlikely(err)) {
+ WARN_ON(err);
+ err = -EPROTONOSUPPORT;
+ goto out;
+ }
+
+ pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv");
+ if (!pr_iucv) {
+ printk(KERN_WARNING "iucv_if lookup failed\n");
+ memset(&iucv_userid, 0, sizeof(iucv_userid));
+ }
+ } else {
+ memset(&iucv_userid, 0, sizeof(iucv_userid));
+ pr_iucv = NULL;
+ }
+
+ err = proto_register(&iucv_proto, 0);
+ if (err)
+ goto out;
+ err = sock_register(&iucv_sock_family_ops);
+ if (err)
+ goto out_proto;
+
+ if (pr_iucv) {
+ err = afiucv_iucv_init();
+ if (err)
+ goto out_sock;
+ }
+ dev_add_pack(&iucv_packet_type);
+ return 0;
+
out_sock:
sock_unregister(PF_IUCV);
out_proto:
proto_unregister(&iucv_proto);
-out_iucv:
- iucv_unregister(&af_iucv_handler, 0);
out:
+ if (pr_iucv)
+ symbol_put(iucv_if);
return err;
}
static void __exit afiucv_exit(void)
{
- device_unregister(af_iucv_dev);
- driver_unregister(&af_iucv_driver);
+ if (pr_iucv) {
+ device_unregister(af_iucv_dev);
+ driver_unregister(&af_iucv_driver);
+ pr_iucv->iucv_unregister(&af_iucv_handler, 0);
+ symbol_put(iucv_if);
+ }
+ dev_remove_pack(&iucv_packet_type);
sock_unregister(PF_IUCV);
proto_unregister(&iucv_proto);
- iucv_unregister(&af_iucv_handler, 0);
}
module_init(afiucv_init);
@@ -1793,3 +2428,4 @@ MODULE_DESCRIPTION("IUCV Sockets ver " VERSION);
MODULE_VERSION(VERSION);
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_IUCV);
+
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 075a380..403be43 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1974,6 +1974,27 @@ out:
return rc;
}
+struct iucv_interface iucv_if = {
+ .message_receive = iucv_message_receive,
+ .__message_receive = __iucv_message_receive,
+ .message_reply = iucv_message_reply,
+ .message_reject = iucv_message_reject,
+ .message_send = iucv_message_send,
+ .__message_send = __iucv_message_send,
+ .message_send2way = iucv_message_send2way,
+ .message_purge = iucv_message_purge,
+ .path_accept = iucv_path_accept,
+ .path_connect = iucv_path_connect,
+ .path_quiesce = iucv_path_quiesce,
+ .path_resume = iucv_path_resume,
+ .path_sever = iucv_path_sever,
+ .iucv_register = iucv_register,
+ .iucv_unregister = iucv_unregister,
+ .bus = NULL,
+ .root = NULL,
+};
+EXPORT_SYMBOL(iucv_if);
+
/**
* iucv_init
*
@@ -2038,6 +2059,8 @@ static int __init iucv_init(void)
rc = bus_register(&iucv_bus);
if (rc)
goto out_reboot;
+ iucv_if.root = iucv_root;
+ iucv_if.bus = &iucv_bus;
return 0;
out_reboot:
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index 956b7e4..8d0324b 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -139,7 +139,8 @@ out:
return lapb;
}
-int lapb_register(struct net_device *dev, struct lapb_register_struct *callbacks)
+int lapb_register(struct net_device *dev,
+ const struct lapb_register_struct *callbacks)
{
struct lapb_cb *lapb;
int rc = LAPB_BADTOKEN;
@@ -158,7 +159,7 @@ int lapb_register(struct net_device *dev, struct lapb_register_struct *callbacks
goto out;
lapb->dev = dev;
- lapb->callbacks = *callbacks;
+ lapb->callbacks = callbacks;
__lapb_insert_cb(lapb);
@@ -380,32 +381,32 @@ int lapb_data_received(struct net_device *dev, struct sk_buff *skb)
void lapb_connect_confirmation(struct lapb_cb *lapb, int reason)
{
- if (lapb->callbacks.connect_confirmation)
- lapb->callbacks.connect_confirmation(lapb->dev, reason);
+ if (lapb->callbacks->connect_confirmation)
+ lapb->callbacks->connect_confirmation(lapb->dev, reason);
}
void lapb_connect_indication(struct lapb_cb *lapb, int reason)
{
- if (lapb->callbacks.connect_indication)
- lapb->callbacks.connect_indication(lapb->dev, reason);
+ if (lapb->callbacks->connect_indication)
+ lapb->callbacks->connect_indication(lapb->dev, reason);
}
void lapb_disconnect_confirmation(struct lapb_cb *lapb, int reason)
{
- if (lapb->callbacks.disconnect_confirmation)
- lapb->callbacks.disconnect_confirmation(lapb->dev, reason);
+ if (lapb->callbacks->disconnect_confirmation)
+ lapb->callbacks->disconnect_confirmation(lapb->dev, reason);
}
void lapb_disconnect_indication(struct lapb_cb *lapb, int reason)
{
- if (lapb->callbacks.disconnect_indication)
- lapb->callbacks.disconnect_indication(lapb->dev, reason);
+ if (lapb->callbacks->disconnect_indication)
+ lapb->callbacks->disconnect_indication(lapb->dev, reason);
}
int lapb_data_indication(struct lapb_cb *lapb, struct sk_buff *skb)
{
- if (lapb->callbacks.data_indication)
- return lapb->callbacks.data_indication(lapb->dev, skb);
+ if (lapb->callbacks->data_indication)
+ return lapb->callbacks->data_indication(lapb->dev, skb);
kfree_skb(skb);
return NET_RX_SUCCESS; /* For now; must be != NET_RX_DROP */
@@ -415,8 +416,8 @@ int lapb_data_transmit(struct lapb_cb *lapb, struct sk_buff *skb)
{
int used = 0;
- if (lapb->callbacks.data_transmit) {
- lapb->callbacks.data_transmit(lapb->dev, skb);
+ if (lapb->callbacks->data_transmit) {
+ lapb->callbacks->data_transmit(lapb->dev, skb);
used = 1;
}
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 0cde8df..97f3358 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -69,7 +69,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
if (!tid_rx)
return;
- rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], NULL);
+ RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], NULL);
#ifdef CONFIG_MAC80211_HT_DEBUG
printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n",
@@ -325,7 +325,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
status = WLAN_STATUS_SUCCESS;
/* activate it for RX */
- rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx);
+ RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx);
if (timeout)
mod_timer(&tid_agg_rx->session_timer, TU_TO_EXP_TIME(timeout));
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 1309bb9..55ee5a3 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -63,7 +63,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
if (type == NL80211_IFTYPE_AP_VLAN &&
params && params->use_4addr == 0)
- rcu_assign_pointer(sdata->u.vlan.sta, NULL);
+ RCU_INIT_POINTER(sdata->u.vlan.sta, NULL);
else if (type == NL80211_IFTYPE_STATION &&
params && params->use_4addr >= 0)
sdata->u.mgd.use_4addr = params->use_4addr;
@@ -557,7 +557,7 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.dtim_period = new->dtim_period;
- rcu_assign_pointer(sdata->u.ap.beacon, new);
+ RCU_INIT_POINTER(sdata->u.ap.beacon, new);
synchronize_rcu();
@@ -612,7 +612,7 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev)
if (!old)
return -ENOENT;
- rcu_assign_pointer(sdata->u.ap.beacon, NULL);
+ RCU_INIT_POINTER(sdata->u.ap.beacon, NULL);
synchronize_rcu();
kfree(old);
@@ -904,7 +904,7 @@ static int ieee80211_change_station(struct wiphy *wiphy,
return -EBUSY;
}
- rcu_assign_pointer(vlansdata->u.vlan.sta, sta);
+ RCU_INIT_POINTER(vlansdata->u.vlan.sta, sta);
}
sta->sdata = vlansdata;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 2da3040..ede9a8b 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -84,7 +84,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
drv_reset_tsf(local, sdata);
skb = ifibss->skb;
- rcu_assign_pointer(ifibss->presp, NULL);
+ RCU_INIT_POINTER(ifibss->presp, NULL);
synchronize_rcu();
skb->data = skb->head;
skb->len = 0;
@@ -184,7 +184,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
*pos++ = 0; /* U-APSD no in use */
}
- rcu_assign_pointer(ifibss->presp, skb);
+ RCU_INIT_POINTER(ifibss->presp, skb);
sdata->vif.bss_conf.beacon_int = beacon_int;
sdata->vif.bss_conf.basic_rates = basic_rates;
@@ -995,7 +995,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
kfree(sdata->u.ibss.ie);
skb = rcu_dereference_protected(sdata->u.ibss.presp,
lockdep_is_held(&sdata->u.ibss.mtx));
- rcu_assign_pointer(sdata->u.ibss.presp, NULL);
+ RCU_INIT_POINTER(sdata->u.ibss.presp, NULL);
sdata->vif.bss_conf.ibss_joined = false;
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED |
BSS_CHANGED_IBSS);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index ef741e8..30d7355 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -456,7 +456,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
BSS_CHANGED_BEACON_ENABLED);
/* remove beacon */
- rcu_assign_pointer(sdata->u.ap.beacon, NULL);
+ RCU_INIT_POINTER(sdata->u.ap.beacon, NULL);
synchronize_rcu();
kfree(old_beacon);
@@ -643,7 +643,7 @@ static const struct net_device_ops ieee80211_dataif_ops = {
.ndo_stop = ieee80211_stop,
.ndo_uninit = ieee80211_teardown_sdata,
.ndo_start_xmit = ieee80211_subif_start_xmit,
- .ndo_set_multicast_list = ieee80211_set_multicast_list,
+ .ndo_set_rx_mode = ieee80211_set_multicast_list,
.ndo_change_mtu = ieee80211_change_mtu,
.ndo_set_mac_address = ieee80211_change_mac,
.ndo_select_queue = ieee80211_netdev_select_queue,
@@ -687,7 +687,7 @@ static const struct net_device_ops ieee80211_monitorif_ops = {
.ndo_stop = ieee80211_stop,
.ndo_uninit = ieee80211_teardown_sdata,
.ndo_start_xmit = ieee80211_monitor_start_xmit,
- .ndo_set_multicast_list = ieee80211_set_multicast_list,
+ .ndo_set_rx_mode = ieee80211_set_multicast_list,
.ndo_change_mtu = ieee80211_change_mtu,
.ndo_set_mac_address = eth_mac_addr,
.ndo_select_queue = ieee80211_monitor_select_queue,
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 332b5ff1..7f54c50 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -1168,6 +1168,6 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata)
void mesh_pathtbl_unregister(void)
{
/* no need for locking during exit path */
- mesh_table_free(rcu_dereference_raw(mesh_paths), true);
- mesh_table_free(rcu_dereference_raw(mpp_paths), true);
+ mesh_table_free(rcu_dereference_protected(mesh_paths, 1), true);
+ mesh_table_free(rcu_dereference_protected(mpp_paths, 1), true);
}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 076593b..58b1c2b 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -73,7 +73,7 @@ static int sta_info_hash_del(struct ieee80211_local *local,
if (!s)
return -ENOENT;
if (s == sta) {
- rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)],
+ RCU_INIT_POINTER(local->sta_hash[STA_HASH(sta->sta.addr)],
s->hnext);
return 0;
}
@@ -83,7 +83,7 @@ static int sta_info_hash_del(struct ieee80211_local *local,
s = rcu_dereference_protected(s->hnext,
lockdep_is_held(&local->sta_lock));
if (rcu_access_pointer(s->hnext)) {
- rcu_assign_pointer(s->hnext, sta->hnext);
+ RCU_INIT_POINTER(s->hnext, sta->hnext);
return 0;
}
@@ -232,7 +232,7 @@ static void sta_info_hash_add(struct ieee80211_local *local,
struct sta_info *sta)
{
sta->hnext = local->sta_hash[STA_HASH(sta->sta.addr)];
- rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta);
+ RCU_INIT_POINTER(local->sta_hash[STA_HASH(sta->sta.addr)], sta);
}
static void sta_unblock(struct work_struct *wk)
@@ -906,7 +906,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
local->sta_generation++;
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
- rcu_assign_pointer(sdata->u.vlan.sta, NULL);
+ RCU_INIT_POINTER(sdata->u.vlan.sta, NULL);
if (sta->uploaded) {
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 899b71c..3346829 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -37,7 +37,7 @@ int nf_register_afinfo(const struct nf_afinfo *afinfo)
err = mutex_lock_interruptible(&afinfo_mutex);
if (err < 0)
return err;
- rcu_assign_pointer(nf_afinfo[afinfo->family], afinfo);
+ RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
mutex_unlock(&afinfo_mutex);
return 0;
}
@@ -46,7 +46,7 @@ EXPORT_SYMBOL_GPL(nf_register_afinfo);
void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
{
mutex_lock(&afinfo_mutex);
- rcu_assign_pointer(nf_afinfo[afinfo->family], NULL);
+ RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL);
mutex_unlock(&afinfo_mutex);
synchronize_rcu();
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index be43fd8..5290ac3 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3679,7 +3679,7 @@ int __net_init ip_vs_control_net_init(struct net *net)
int idx;
struct netns_ipvs *ipvs = net_ipvs(net);
- ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
+ rwlock_init(&ipvs->rs_lock);
/* Initialize rs_table */
for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
@@ -3771,6 +3771,7 @@ err_sock:
void ip_vs_control_cleanup(void)
{
EnterFunction(2);
+ unregister_netdevice_notifier(&ip_vs_dst_notifier);
ip_vs_genl_unregister();
nf_unregister_sockopt(&ip_vs_sockopts);
LeaveFunction(2);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f7af8b8..5acfaf5 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -779,7 +779,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
if (exp->helper) {
help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
if (help)
- rcu_assign_pointer(help->helper, exp->helper);
+ RCU_INIT_POINTER(help->helper, exp->helper);
}
#ifdef CONFIG_NF_CONNTRACK_MARK
@@ -1317,7 +1317,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
void nf_conntrack_cleanup(struct net *net)
{
if (net_eq(net, &init_net))
- rcu_assign_pointer(ip_ct_attach, NULL);
+ RCU_INIT_POINTER(ip_ct_attach, NULL);
/* This makes sure all current packets have passed through
netfilter framework. Roll on, two-stage module
@@ -1327,7 +1327,7 @@ void nf_conntrack_cleanup(struct net *net)
nf_conntrack_cleanup_net(net);
if (net_eq(net, &init_net)) {
- rcu_assign_pointer(nf_ct_destroy, NULL);
+ RCU_INIT_POINTER(nf_ct_destroy, NULL);
nf_conntrack_cleanup_init_net();
}
}
@@ -1576,11 +1576,11 @@ int nf_conntrack_init(struct net *net)
if (net_eq(net, &init_net)) {
/* For use by REJECT target */
- rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
- rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
+ RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
+ RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
/* Howto get NAT offsets */
- rcu_assign_pointer(nf_ct_nat_offset, NULL);
+ RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
}
return 0;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 63a1b91..3add994 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -94,7 +94,7 @@ int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
ret = -EBUSY;
goto out_unlock;
}
- rcu_assign_pointer(nf_conntrack_event_cb, new);
+ RCU_INIT_POINTER(nf_conntrack_event_cb, new);
mutex_unlock(&nf_ct_ecache_mutex);
return ret;
@@ -112,7 +112,7 @@ void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
notify = rcu_dereference_protected(nf_conntrack_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex));
BUG_ON(notify != new);
- rcu_assign_pointer(nf_conntrack_event_cb, NULL);
+ RCU_INIT_POINTER(nf_conntrack_event_cb, NULL);
mutex_unlock(&nf_ct_ecache_mutex);
}
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
@@ -129,7 +129,7 @@ int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
ret = -EBUSY;
goto out_unlock;
}
- rcu_assign_pointer(nf_expect_event_cb, new);
+ RCU_INIT_POINTER(nf_expect_event_cb, new);
mutex_unlock(&nf_ct_ecache_mutex);
return ret;
@@ -147,7 +147,7 @@ void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
notify = rcu_dereference_protected(nf_expect_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex));
BUG_ON(notify != new);
- rcu_assign_pointer(nf_expect_event_cb, NULL);
+ RCU_INIT_POINTER(nf_expect_event_cb, NULL);
mutex_unlock(&nf_ct_ecache_mutex);
}
EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 05ecdc2..4605c94 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -169,7 +169,7 @@ int nf_ct_extend_register(struct nf_ct_ext_type *type)
before updating alloc_size */
type->alloc_size = ALIGN(sizeof(struct nf_ct_ext), type->align)
+ type->len;
- rcu_assign_pointer(nf_ct_ext_types[type->id], type);
+ RCU_INIT_POINTER(nf_ct_ext_types[type->id], type);
update_alloc_size(type);
out:
mutex_unlock(&nf_ct_ext_type_mutex);
@@ -181,7 +181,7 @@ EXPORT_SYMBOL_GPL(nf_ct_extend_register);
void nf_ct_extend_unregister(struct nf_ct_ext_type *type)
{
mutex_lock(&nf_ct_ext_type_mutex);
- rcu_assign_pointer(nf_ct_ext_types[type->id], NULL);
+ RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL);
update_alloc_size(type);
mutex_unlock(&nf_ct_ext_type_mutex);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 1bdfea3..93c4bdb 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -131,7 +131,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
if (helper == NULL) {
if (help)
- rcu_assign_pointer(help->helper, NULL);
+ RCU_INIT_POINTER(help->helper, NULL);
goto out;
}
@@ -145,7 +145,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
memset(&help->help, 0, sizeof(help->help));
}
- rcu_assign_pointer(help->helper, helper);
+ RCU_INIT_POINTER(help->helper, helper);
out:
return ret;
}
@@ -162,7 +162,7 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
lockdep_is_held(&nf_conntrack_lock)
) == me) {
nf_conntrack_event(IPCT_HELPER, ct);
- rcu_assign_pointer(help->helper, NULL);
+ RCU_INIT_POINTER(help->helper, NULL);
}
return 0;
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7dec88a..e58aa9b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1125,7 +1125,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
if (help && help->helper) {
/* we had a helper before ... */
nf_ct_remove_expectations(ct);
- rcu_assign_pointer(help->helper, NULL);
+ RCU_INIT_POINTER(help->helper, NULL);
}
return 0;
@@ -1163,7 +1163,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
return -EOPNOTSUPP;
}
- rcu_assign_pointer(help->helper, helper);
+ RCU_INIT_POINTER(help->helper, helper);
return 0;
}
@@ -1386,7 +1386,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
}
/* not in hash table yet so not strictly necessary */
- rcu_assign_pointer(help->helper, helper);
+ RCU_INIT_POINTER(help->helper, helper);
}
} else {
/* try an implicit helper assignation */
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 2fd4565..31d56b2 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -364,6 +364,7 @@ pptp_inbound_pkt(struct sk_buff *skb,
break;
case PPTP_WAN_ERROR_NOTIFY:
+ case PPTP_SET_LINK_INFO:
case PPTP_ECHO_REQUEST:
case PPTP_ECHO_REPLY:
/* I don't have to explain these ;) */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 37bf943..8235b86 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -409,7 +409,7 @@ static void tcp_options(const struct sk_buff *skb,
if (opsize < 2) /* "silly options" */
return;
if (opsize > length)
- break; /* don't parse partial options */
+ return; /* don't parse partial options */
if (opcode == TCPOPT_SACK_PERM
&& opsize == TCPOLEN_SACK_PERM)
@@ -447,7 +447,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
BUG_ON(ptr == NULL);
/* Fast path for timestamp-only option */
- if (length == TCPOLEN_TSTAMP_ALIGNED*4
+ if (length == TCPOLEN_TSTAMP_ALIGNED
&& *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
| (TCPOPT_NOP << 16)
| (TCPOPT_TIMESTAMP << 8)
@@ -469,7 +469,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
if (opsize < 2) /* "silly options" */
return;
if (opsize > length)
- break; /* don't parse partial options */
+ return; /* don't parse partial options */
if (opcode == TCPOPT_SACK
&& opsize >= (TCPOLEN_SACK_BASE
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 20714ed..ce0c406 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -55,7 +55,7 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
llog = rcu_dereference_protected(nf_loggers[pf],
lockdep_is_held(&nf_log_mutex));
if (llog == NULL)
- rcu_assign_pointer(nf_loggers[pf], logger);
+ RCU_INIT_POINTER(nf_loggers[pf], logger);
}
mutex_unlock(&nf_log_mutex);
@@ -74,7 +74,7 @@ void nf_log_unregister(struct nf_logger *logger)
c_logger = rcu_dereference_protected(nf_loggers[i],
lockdep_is_held(&nf_log_mutex));
if (c_logger == logger)
- rcu_assign_pointer(nf_loggers[i], NULL);
+ RCU_INIT_POINTER(nf_loggers[i], NULL);
list_del(&logger->list[i]);
}
mutex_unlock(&nf_log_mutex);
@@ -92,7 +92,7 @@ int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger)
mutex_unlock(&nf_log_mutex);
return -ENOENT;
}
- rcu_assign_pointer(nf_loggers[pf], logger);
+ RCU_INIT_POINTER(nf_loggers[pf], logger);
mutex_unlock(&nf_log_mutex);
return 0;
}
@@ -103,7 +103,7 @@ void nf_log_unbind_pf(u_int8_t pf)
if (pf >= ARRAY_SIZE(nf_loggers))
return;
mutex_lock(&nf_log_mutex);
- rcu_assign_pointer(nf_loggers[pf], NULL);
+ RCU_INIT_POINTER(nf_loggers[pf], NULL);
mutex_unlock(&nf_log_mutex);
}
EXPORT_SYMBOL(nf_log_unbind_pf);
@@ -250,7 +250,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
mutex_unlock(&nf_log_mutex);
return -ENOENT;
}
- rcu_assign_pointer(nf_loggers[tindex], logger);
+ RCU_INIT_POINTER(nf_loggers[tindex], logger);
mutex_unlock(&nf_log_mutex);
} else {
mutex_lock(&nf_log_mutex);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 5b466cd..99ffd28 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -40,7 +40,7 @@ int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
else if (old)
ret = -EBUSY;
else {
- rcu_assign_pointer(queue_handler[pf], qh);
+ RCU_INIT_POINTER(queue_handler[pf], qh);
ret = 0;
}
mutex_unlock(&queue_handler_mutex);
@@ -65,7 +65,7 @@ int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
return -EINVAL;
}
- rcu_assign_pointer(queue_handler[pf], NULL);
+ RCU_INIT_POINTER(queue_handler[pf], NULL);
mutex_unlock(&queue_handler_mutex);
synchronize_rcu();
@@ -84,7 +84,7 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
queue_handler[pf],
lockdep_is_held(&queue_handler_mutex)
) == qh)
- rcu_assign_pointer(queue_handler[pf], NULL);
+ RCU_INIT_POINTER(queue_handler[pf], NULL);
}
mutex_unlock(&queue_handler_mutex);
@@ -312,6 +312,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
}
break;
case NF_STOLEN:
+ break;
default:
kfree_skb(skb);
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 1905976..c879c1a 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -59,7 +59,7 @@ int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
nfnl_unlock();
return -EBUSY;
}
- rcu_assign_pointer(subsys_table[n->subsys_id], n);
+ RCU_INIT_POINTER(subsys_table[n->subsys_id], n);
nfnl_unlock();
return 0;
@@ -210,7 +210,7 @@ static int __net_init nfnetlink_net_init(struct net *net)
if (!nfnl)
return -ENOMEM;
net->nfnl_stash = nfnl;
- rcu_assign_pointer(net->nfnl, nfnl);
+ RCU_INIT_POINTER(net->nfnl, nfnl);
return 0;
}
@@ -219,7 +219,7 @@ static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list)
struct net *net;
list_for_each_entry(net, net_exit_list, exit_list)
- rcu_assign_pointer(net->nfnl, NULL);
+ RCU_INIT_POINTER(net->nfnl, NULL);
synchronize_net();
list_for_each_entry(net, net_exit_list, exit_list)
netlink_kernel_release(net->nfnl_stash);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 00bd475..a80b0cb 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -646,8 +646,8 @@ verdicthdr_get(const struct nlattr * const nfqa[])
return NULL;
vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
- verdict = ntohl(vhdr->verdict);
- if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT)
+ verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK;
+ if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN)
return NULL;
return vhdr;
}
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 76a0831..ed0db15 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -78,7 +78,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
{
struct xt_rateest_match_info *info = par->matchinfo;
struct xt_rateest *est1, *est2;
- int ret = false;
+ int ret = -EINVAL;
if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS |
XT_RATEEST_MATCH_REL)) != 1)
@@ -101,13 +101,12 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
if (!est1)
goto err1;
+ est2 = NULL;
if (info->flags & XT_RATEEST_MATCH_REL) {
est2 = xt_rateest_lookup(info->name2);
if (!est2)
goto err2;
- } else
- est2 = NULL;
-
+ }
info->est1 = est1;
info->est2 = est2;
@@ -116,7 +115,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
err2:
xt_rateest_put(est1);
err1:
- return -EINVAL;
+ return ret;
}
static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile
index ea750e9..d2732fc 100644
--- a/net/netlabel/Makefile
+++ b/net/netlabel/Makefile
@@ -1,8 +1,6 @@
#
# Makefile for the NetLabel subsystem.
#
-# Feb 9, 2006, Paul Moore <paul.moore@hp.com>
-#
# base objects
obj-y := netlabel_user.o netlabel_kapi.o
diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c
index c051913..96b749d 100644
--- a/net/netlabel/netlabel_addrlist.c
+++ b/net/netlabel/netlabel_addrlist.c
@@ -6,7 +6,7 @@
* system manages static and dynamic label mappings for network protocols such
* as CIPSO and RIPSO.
*
- * Author: Paul Moore <paul.moore@hp.com>
+ * Author: Paul Moore <paul@paul-moore.com>
*
*/
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index 2b9644e..fdbc1d2 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -6,7 +6,7 @@
* system manages static and dynamic label mappings for network protocols such
* as CIPSO and RIPSO.
*
- * Author: Paul Moore <paul.moore@hp.com>
+ * Author: Paul Moore <paul@paul-moore.com>
*
*/
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index dd53a36..6bf8783 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -5,7 +5,7 @@
* NetLabel system manages static and dynamic label mappings for network
* protocols such as CIPSO and RIPSO.
*
- * Author: Paul Moore <paul.moore@hp.com>
+ * Author: Paul Moore <paul@paul-moore.com>
*
*/
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index af7f335..d24d774 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -5,7 +5,7 @@
* NetLabel system manages static and dynamic label mappings for network
* protocols such as CIPSO and RIPSO.
*
- * Author: Paul Moore <paul.moore@hp.com>
+ * Author: Paul Moore <paul@paul-moore.com>
*
*/
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 2aa975e5..3f905e5 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -6,7 +6,7 @@
* system manages static and dynamic label mappings for network protocols such
* as CIPSO and RIPSO.
*
- * Author: Paul Moore <paul.moore@hp.com>
+ * Author: Paul Moore <paul@paul-moore.com>
*
*/
@@ -282,7 +282,7 @@ int __init netlbl_domhsh_init(u32 size)
INIT_LIST_HEAD(&hsh_tbl->tbl[iter]);
spin_lock(&netlbl_domhsh_lock);
- rcu_assign_pointer(netlbl_domhsh, hsh_tbl);
+ RCU_INIT_POINTER(netlbl_domhsh, hsh_tbl);
spin_unlock(&netlbl_domhsh_lock);
return 0;
@@ -330,7 +330,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
&rcu_dereference(netlbl_domhsh)->tbl[bkt]);
} else {
INIT_LIST_HEAD(&entry->list);
- rcu_assign_pointer(netlbl_domhsh_def, entry);
+ RCU_INIT_POINTER(netlbl_domhsh_def, entry);
}
if (entry->type == NETLBL_NLTYPE_ADDRSELECT) {
@@ -451,7 +451,7 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry,
if (entry != rcu_dereference(netlbl_domhsh_def))
list_del_rcu(&entry->list);
else
- rcu_assign_pointer(netlbl_domhsh_def, NULL);
+ RCU_INIT_POINTER(netlbl_domhsh_def, NULL);
} else
ret_val = -ENOENT;
spin_unlock(&netlbl_domhsh_lock);
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 0261dda..bfcc0f7 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -6,7 +6,7 @@
* system manages static and dynamic label mappings for network protocols such
* as CIPSO and RIPSO.
*
- * Author: Paul Moore <paul.moore@hp.com>
+ * Author: Paul Moore <paul@paul-moore.com>
*
*/
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index b528dd9..9c24de10 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -5,7 +5,7 @@
* system manages static and dynamic label mappings for network protocols such
* as CIPSO and RIPSO.
*
- * Author: Paul Moore <paul.moore@hp.com>
+ * Author: Paul Moore <paul@paul-moore.com>
*
*/
@@ -341,11 +341,11 @@ int netlbl_cfg_cipsov4_map_add(u32 doi,
entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (entry == NULL)
- return -ENOMEM;
+ goto out_entry;
if (domain != NULL) {
entry->domain = kstrdup(domain, GFP_ATOMIC);
if (entry->domain == NULL)
- goto cfg_cipsov4_map_add_failure;
+ goto out_domain;
}
if (addr == NULL && mask == NULL) {
@@ -354,13 +354,13 @@ int netlbl_cfg_cipsov4_map_add(u32 doi,
} else if (addr != NULL && mask != NULL) {
addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC);
if (addrmap == NULL)
- goto cfg_cipsov4_map_add_failure;
+ goto out_addrmap;
INIT_LIST_HEAD(&addrmap->list4);
INIT_LIST_HEAD(&addrmap->list6);
addrinfo = kzalloc(sizeof(*addrinfo), GFP_ATOMIC);
if (addrinfo == NULL)
- goto cfg_cipsov4_map_add_failure;
+ goto out_addrinfo;
addrinfo->type_def.cipsov4 = doi_def;
addrinfo->type = NETLBL_NLTYPE_CIPSOV4;
addrinfo->list.addr = addr->s_addr & mask->s_addr;
@@ -374,7 +374,7 @@ int netlbl_cfg_cipsov4_map_add(u32 doi,
entry->type = NETLBL_NLTYPE_ADDRSELECT;
} else {
ret_val = -EINVAL;
- goto cfg_cipsov4_map_add_failure;
+ goto out_addrmap;
}
ret_val = netlbl_domhsh_add(entry, audit_info);
@@ -384,11 +384,15 @@ int netlbl_cfg_cipsov4_map_add(u32 doi,
return 0;
cfg_cipsov4_map_add_failure:
- cipso_v4_doi_putdef(doi_def);
+ kfree(addrinfo);
+out_addrinfo:
+ kfree(addrmap);
+out_addrmap:
kfree(entry->domain);
+out_domain:
kfree(entry);
- kfree(addrmap);
- kfree(addrinfo);
+out_entry:
+ cipso_v4_doi_putdef(doi_def);
return ret_val;
}
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index dff8a08..bfa5558 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -5,7 +5,7 @@
* NetLabel system manages static and dynamic label mappings for network
* protocols such as CIPSO and RIPSO.
*
- * Author: Paul Moore <paul.moore@hp.com>
+ * Author: Paul Moore <paul@paul-moore.com>
*
*/
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
index 8db37f4..5a9f31c 100644
--- a/net/netlabel/netlabel_mgmt.h
+++ b/net/netlabel/netlabel_mgmt.h
@@ -5,7 +5,7 @@
* NetLabel system manages static and dynamic label mappings for network
* protocols such as CIPSO and RIPSO.
*
- * Author: Paul Moore <paul.moore@hp.com>
+ * Author: Paul Moore <paul@paul-moore.com>
*
*/
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index f1ecf84..e251c2c 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -5,7 +5,7 @@
* NetLabel system. The NetLabel system manages static and dynamic label
* mappings for network protocols such as CIPSO and RIPSO.
*
- * Author: Paul Moore <paul.moore@hp.com>
+ * Author: Paul Moore <paul@paul-moore.com>
*
*/
@@ -354,7 +354,7 @@ static struct netlbl_unlhsh_iface *netlbl_unlhsh_add_iface(int ifindex)
INIT_LIST_HEAD(&iface->list);
if (netlbl_unlhsh_rcu_deref(netlbl_unlhsh_def) != NULL)
goto add_iface_failure;
- rcu_assign_pointer(netlbl_unlhsh_def, iface);
+ RCU_INIT_POINTER(netlbl_unlhsh_def, iface);
}
spin_unlock(&netlbl_unlhsh_lock);
@@ -621,7 +621,7 @@ static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface)
if (iface->ifindex > 0)
list_del_rcu(&iface->list);
else
- rcu_assign_pointer(netlbl_unlhsh_def, NULL);
+ RCU_INIT_POINTER(netlbl_unlhsh_def, NULL);
spin_unlock(&netlbl_unlhsh_lock);
call_rcu(&iface->rcu, netlbl_unlhsh_free_iface);
@@ -1449,7 +1449,7 @@ int __init netlbl_unlabel_init(u32 size)
rcu_read_lock();
spin_lock(&netlbl_unlhsh_lock);
- rcu_assign_pointer(netlbl_unlhsh, hsh_tbl);
+ RCU_INIT_POINTER(netlbl_unlhsh, hsh_tbl);
spin_unlock(&netlbl_unlhsh_lock);
rcu_read_unlock();
diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h
index 0bc8dc3..700af49 100644
--- a/net/netlabel/netlabel_unlabeled.h
+++ b/net/netlabel/netlabel_unlabeled.h
@@ -5,7 +5,7 @@
* NetLabel system. The NetLabel system manages static and dynamic label
* mappings for network protocols such as CIPSO and RIPSO.
*
- * Author: Paul Moore <paul.moore@hp.com>
+ * Author: Paul Moore <paul@paul-moore.com>
*
*/
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index a3fd75a..9fae63f 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -5,7 +5,7 @@
* NetLabel system manages static and dynamic label mappings for network
* protocols such as CIPSO and RIPSO.
*
- * Author: Paul Moore <paul.moore@hp.com>
+ * Author: Paul Moore <paul@paul-moore.com>
*
*/
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index f4fc4c9..8196978 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -5,7 +5,7 @@
* NetLabel system manages static and dynamic label mappings for network
* protocols such as CIPSO and RIPSO.
*
- * Author: Paul Moore <paul.moore@hp.com>
+ * Author: Paul Moore <paul@paul-moore.com>
*
*/
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0a4db02..1201b6d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1324,10 +1324,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (msg->msg_flags&MSG_OOB)
return -EOPNOTSUPP;
- if (NULL == siocb->scm) {
+ if (NULL == siocb->scm)
siocb->scm = &scm;
- memset(&scm, 0, sizeof(scm));
- }
+
err = scm_send(sock, msg, siocb->scm);
if (err < 0)
return err;
@@ -1578,7 +1577,7 @@ int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
if (!new)
return -ENOMEM;
- old = rcu_dereference_raw(tbl->listeners);
+ old = rcu_dereference_protected(tbl->listeners, 1);
memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
rcu_assign_pointer(tbl->listeners, new);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c698cec..7b5f032 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -40,6 +40,10 @@
* byte arrays at the end of sockaddr_ll
* and packet_mreq.
* Johann Baudy : Added TX RING.
+ * Chetan Loke : Implemented TPACKET_V3 block abstraction
+ * layer.
+ * Copyright (C) 2011, <lokec@ccs.neu.edu>
+ *
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -161,9 +165,56 @@ struct packet_mreq_max {
unsigned char mr_address[MAX_ADDR_LEN];
};
-static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
+static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
int closing, int tx_ring);
+
+#define V3_ALIGNMENT (8)
+
+#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
+
+#define BLK_PLUS_PRIV(sz_of_priv) \
+ (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
+
+/* kbdq - kernel block descriptor queue */
+struct tpacket_kbdq_core {
+ struct pgv *pkbdq;
+ unsigned int feature_req_word;
+ unsigned int hdrlen;
+ unsigned char reset_pending_on_curr_blk;
+ unsigned char delete_blk_timer;
+ unsigned short kactive_blk_num;
+ unsigned short blk_sizeof_priv;
+
+ /* last_kactive_blk_num:
+ * trick to see if user-space has caught up
+ * in order to avoid refreshing timer when every single pkt arrives.
+ */
+ unsigned short last_kactive_blk_num;
+
+ char *pkblk_start;
+ char *pkblk_end;
+ int kblk_size;
+ unsigned int knum_blocks;
+ uint64_t knxt_seq_num;
+ char *prev;
+ char *nxt_offset;
+ struct sk_buff *skb;
+
+ atomic_t blk_fill_in_prog;
+
+ /* Default is set to 8ms */
+#define DEFAULT_PRB_RETIRE_TOV (8)
+
+ unsigned short retire_blk_tov;
+ unsigned short version;
+ unsigned long tov_in_jiffies;
+
+ /* timer to retire an outstanding block */
+ struct timer_list retire_blk_timer;
+};
+
+#define PGV_FROM_VMALLOC 1
struct pgv {
char *buffer;
};
@@ -179,12 +230,44 @@ struct packet_ring_buffer {
unsigned int pg_vec_pages;
unsigned int pg_vec_len;
+ struct tpacket_kbdq_core prb_bdqc;
atomic_t pending;
};
+#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
+#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
+#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt)
+#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
+#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
+#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
+#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x)))
+
struct packet_sock;
static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
+static void *packet_previous_frame(struct packet_sock *po,
+ struct packet_ring_buffer *rb,
+ int status);
+static void packet_increment_head(struct packet_ring_buffer *buff);
+static int prb_curr_blk_in_use(struct tpacket_kbdq_core *,
+ struct tpacket_block_desc *);
+static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
+ struct packet_sock *);
+static void prb_retire_current_block(struct tpacket_kbdq_core *,
+ struct packet_sock *, unsigned int status);
+static int prb_queue_frozen(struct tpacket_kbdq_core *);
+static void prb_open_block(struct tpacket_kbdq_core *,
+ struct tpacket_block_desc *);
+static void prb_retire_rx_blk_timer_expired(unsigned long);
+static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
+static void prb_init_blk_timer(struct packet_sock *,
+ struct tpacket_kbdq_core *,
+ void (*func) (unsigned long));
+static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
+static void prb_clear_rxhash(struct tpacket_kbdq_core *,
+ struct tpacket3_hdr *);
+static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
+ struct tpacket3_hdr *);
static void packet_flush_mclist(struct sock *sk);
struct packet_fanout;
@@ -193,6 +276,7 @@ struct packet_sock {
struct sock sk;
struct packet_fanout *fanout;
struct tpacket_stats stats;
+ union tpacket_stats_u stats_u;
struct packet_ring_buffer rx_ring;
struct packet_ring_buffer tx_ring;
int copy_thresh;
@@ -242,6 +326,15 @@ struct packet_skb_cb {
#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
+#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
+#define GET_PBLOCK_DESC(x, bid) \
+ ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
+#define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \
+ ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
+#define GET_NEXT_PRB_BLK_NUM(x) \
+ (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
+ ((x)->kactive_blk_num+1) : 0)
+
static inline struct packet_sock *pkt_sk(struct sock *sk)
{
return (struct packet_sock *)sk;
@@ -325,8 +418,9 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
h.h2->tp_status = status;
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
break;
+ case TPACKET_V3:
default:
- pr_err("TPACKET version not supported\n");
+ WARN(1, "TPACKET version not supported.\n");
BUG();
}
@@ -351,8 +445,9 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
case TPACKET_V2:
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
return h.h2->tp_status;
+ case TPACKET_V3:
default:
- pr_err("TPACKET version not supported\n");
+ WARN(1, "TPACKET version not supported.\n");
BUG();
return 0;
}
@@ -389,6 +484,670 @@ static inline void *packet_current_frame(struct packet_sock *po,
return packet_lookup_frame(po, rb, rb->head, status);
}
+static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
+{
+ del_timer_sync(&pkc->retire_blk_timer);
+}
+
+static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
+ int tx_ring,
+ struct sk_buff_head *rb_queue)
+{
+ struct tpacket_kbdq_core *pkc;
+
+ pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
+
+ spin_lock(&rb_queue->lock);
+ pkc->delete_blk_timer = 1;
+ spin_unlock(&rb_queue->lock);
+
+ prb_del_retire_blk_timer(pkc);
+}
+
+static void prb_init_blk_timer(struct packet_sock *po,
+ struct tpacket_kbdq_core *pkc,
+ void (*func) (unsigned long))
+{
+ init_timer(&pkc->retire_blk_timer);
+ pkc->retire_blk_timer.data = (long)po;
+ pkc->retire_blk_timer.function = func;
+ pkc->retire_blk_timer.expires = jiffies;
+}
+
+static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring)
+{
+ struct tpacket_kbdq_core *pkc;
+
+ if (tx_ring)
+ BUG();
+
+ pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
+ prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired);
+}
+
+static int prb_calc_retire_blk_tmo(struct packet_sock *po,
+ int blk_size_in_bytes)
+{
+ struct net_device *dev;
+ unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
+ struct ethtool_cmd ecmd;
+ int err;
+
+ rtnl_lock();
+ dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
+ if (unlikely(!dev)) {
+ rtnl_unlock();
+ return DEFAULT_PRB_RETIRE_TOV;
+ }
+ err = __ethtool_get_settings(dev, &ecmd);
+ rtnl_unlock();
+ if (!err) {
+ switch (ecmd.speed) {
+ case SPEED_10000:
+ msec = 1;
+ div = 10000/1000;
+ break;
+ case SPEED_1000:
+ msec = 1;
+ div = 1000/1000;
+ break;
+ /*
+ * If the link speed is so slow you don't really
+ * need to worry about perf anyways
+ */
+ case SPEED_100:
+ case SPEED_10:
+ default:
+ return DEFAULT_PRB_RETIRE_TOV;
+ }
+ }
+
+ mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
+
+ if (div)
+ mbits /= div;
+
+ tmo = mbits * msec;
+
+ if (div)
+ return tmo+1;
+ return tmo;
+}
+
+static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
+ union tpacket_req_u *req_u)
+{
+ p1->feature_req_word = req_u->req3.tp_feature_req_word;
+}
+
+static void init_prb_bdqc(struct packet_sock *po,
+ struct packet_ring_buffer *rb,
+ struct pgv *pg_vec,
+ union tpacket_req_u *req_u, int tx_ring)
+{
+ struct tpacket_kbdq_core *p1 = &rb->prb_bdqc;
+ struct tpacket_block_desc *pbd;
+
+ memset(p1, 0x0, sizeof(*p1));
+
+ p1->knxt_seq_num = 1;
+ p1->pkbdq = pg_vec;
+ pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
+ p1->pkblk_start = (char *)pg_vec[0].buffer;
+ p1->kblk_size = req_u->req3.tp_block_size;
+ p1->knum_blocks = req_u->req3.tp_block_nr;
+ p1->hdrlen = po->tp_hdrlen;
+ p1->version = po->tp_version;
+ p1->last_kactive_blk_num = 0;
+ po->stats_u.stats3.tp_freeze_q_cnt = 0;
+ if (req_u->req3.tp_retire_blk_tov)
+ p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
+ else
+ p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
+ req_u->req3.tp_block_size);
+ p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
+ p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
+
+ prb_init_ft_ops(p1, req_u);
+ prb_setup_retire_blk_timer(po, tx_ring);
+ prb_open_block(p1, pbd);
+}
+
+/* Do NOT update the last_blk_num first.
+ * Assumes sk_buff_head lock is held.
+ */
+static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
+{
+ mod_timer(&pkc->retire_blk_timer,
+ jiffies + pkc->tov_in_jiffies);
+ pkc->last_kactive_blk_num = pkc->kactive_blk_num;
+}
+
+/*
+ * Timer logic:
+ * 1) We refresh the timer only when we open a block.
+ * By doing this we don't waste cycles refreshing the timer
+ * on packet-by-packet basis.
+ *
+ * With a 1MB block-size, on a 1Gbps line, it will take
+ * i) ~8 ms to fill a block + ii) memcpy etc.
+ * In this cut we are not accounting for the memcpy time.
+ *
+ * So, if the user sets the 'tmo' to 10ms then the timer
+ * will never fire while the block is still getting filled
+ * (which is what we want). However, the user could choose
+ * to close a block early and that's fine.
+ *
+ * But when the timer does fire, we check whether or not to refresh it.
+ * Since the tmo granularity is in msecs, it is not too expensive
+ * to refresh the timer, lets say every '8' msecs.
+ * Either the user can set the 'tmo' or we can derive it based on
+ * a) line-speed and b) block-size.
+ * prb_calc_retire_blk_tmo() calculates the tmo.
+ *
+ */
+static void prb_retire_rx_blk_timer_expired(unsigned long data)
+{
+ struct packet_sock *po = (struct packet_sock *)data;
+ struct tpacket_kbdq_core *pkc = &po->rx_ring.prb_bdqc;
+ unsigned int frozen;
+ struct tpacket_block_desc *pbd;
+
+ spin_lock(&po->sk.sk_receive_queue.lock);
+
+ frozen = prb_queue_frozen(pkc);
+ pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
+
+ if (unlikely(pkc->delete_blk_timer))
+ goto out;
+
+ /* We only need to plug the race when the block is partially filled.
+ * tpacket_rcv:
+ * lock(); increment BLOCK_NUM_PKTS; unlock()
+ * copy_bits() is in progress ...
+ * timer fires on other cpu:
+ * we can't retire the current block because copy_bits
+ * is in progress.
+ *
+ */
+ if (BLOCK_NUM_PKTS(pbd)) {
+ while (atomic_read(&pkc->blk_fill_in_prog)) {
+ /* Waiting for skb_copy_bits to finish... */
+ cpu_relax();
+ }
+ }
+
+ if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
+ if (!frozen) {
+ prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
+ if (!prb_dispatch_next_block(pkc, po))
+ goto refresh_timer;
+ else
+ goto out;
+ } else {
+ /* Case 1. Queue was frozen because user-space was
+ * lagging behind.
+ */
+ if (prb_curr_blk_in_use(pkc, pbd)) {
+ /*
+ * Ok, user-space is still behind.
+ * So just refresh the timer.
+ */
+ goto refresh_timer;
+ } else {
+ /* Case 2. queue was frozen,user-space caught up,
+ * now the link went idle && the timer fired.
+ * We don't have a block to close.So we open this
+ * block and restart the timer.
+ * opening a block thaws the queue,restarts timer
+ * Thawing/timer-refresh is a side effect.
+ */
+ prb_open_block(pkc, pbd);
+ goto out;
+ }
+ }
+ }
+
+refresh_timer:
+ _prb_refresh_rx_retire_blk_timer(pkc);
+
+out:
+ spin_unlock(&po->sk.sk_receive_queue.lock);
+}
+
+static inline void prb_flush_block(struct tpacket_kbdq_core *pkc1,
+ struct tpacket_block_desc *pbd1, __u32 status)
+{
+ /* Flush everything minus the block header */
+
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
+ u8 *start, *end;
+
+ start = (u8 *)pbd1;
+
+ /* Skip the block header(we know header WILL fit in 4K) */
+ start += PAGE_SIZE;
+
+ end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
+ for (; start < end; start += PAGE_SIZE)
+ flush_dcache_page(pgv_to_page(start));
+
+ smp_wmb();
+#endif
+
+ /* Now update the block status. */
+
+ BLOCK_STATUS(pbd1) = status;
+
+ /* Flush the block header */
+
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
+ start = (u8 *)pbd1;
+ flush_dcache_page(pgv_to_page(start));
+
+ smp_wmb();
+#endif
+}
+
+/*
+ * Side effect:
+ *
+ * 1) flush the block
+ * 2) Increment active_blk_num
+ *
+ * Note:We DONT refresh the timer on purpose.
+ * Because almost always the next block will be opened.
+ */
+static void prb_close_block(struct tpacket_kbdq_core *pkc1,
+ struct tpacket_block_desc *pbd1,
+ struct packet_sock *po, unsigned int stat)
+{
+ __u32 status = TP_STATUS_USER | stat;
+
+ struct tpacket3_hdr *last_pkt;
+ struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
+
+ if (po->stats.tp_drops)
+ status |= TP_STATUS_LOSING;
+
+ last_pkt = (struct tpacket3_hdr *)pkc1->prev;
+ last_pkt->tp_next_offset = 0;
+
+ /* Get the ts of the last pkt */
+ if (BLOCK_NUM_PKTS(pbd1)) {
+ h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
+ h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec;
+ } else {
+ /* Ok, we tmo'd - so get the current time */
+ struct timespec ts;
+ getnstimeofday(&ts);
+ h1->ts_last_pkt.ts_sec = ts.tv_sec;
+ h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
+ }
+
+ smp_wmb();
+
+ /* Flush the block */
+ prb_flush_block(pkc1, pbd1, status);
+
+ pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
+}
+
+static inline void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
+{
+ pkc->reset_pending_on_curr_blk = 0;
+}
+
+/*
+ * Side effect of opening a block:
+ *
+ * 1) prb_queue is thawed.
+ * 2) retire_blk_timer is refreshed.
+ *
+ */
+static void prb_open_block(struct tpacket_kbdq_core *pkc1,
+ struct tpacket_block_desc *pbd1)
+{
+ struct timespec ts;
+ struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
+
+ smp_rmb();
+
+ if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) {
+
+ /* We could have just memset this but we will lose the
+ * flexibility of making the priv area sticky
+ */
+ BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
+ BLOCK_NUM_PKTS(pbd1) = 0;
+ BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
+ getnstimeofday(&ts);
+ h1->ts_first_pkt.ts_sec = ts.tv_sec;
+ h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
+ pkc1->pkblk_start = (char *)pbd1;
+ pkc1->nxt_offset = (char *)(pkc1->pkblk_start +
+ BLK_PLUS_PRIV(pkc1->blk_sizeof_priv));
+ BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
+ BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
+ pbd1->version = pkc1->version;
+ pkc1->prev = pkc1->nxt_offset;
+ pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
+ prb_thaw_queue(pkc1);
+ _prb_refresh_rx_retire_blk_timer(pkc1);
+
+ smp_wmb();
+
+ return;
+ }
+
+ WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n",
+ pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num);
+ dump_stack();
+ BUG();
+}
+
+/*
+ * Queue freeze logic:
+ * 1) Assume tp_block_nr = 8 blocks.
+ * 2) At time 't0', user opens Rx ring.
+ * 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7
+ * 4) user-space is either sleeping or processing block '0'.
+ * 5) tpacket_rcv is currently filling block '7', since there is no space left,
+ * it will close block-7,loop around and try to fill block '0'.
+ * call-flow:
+ * __packet_lookup_frame_in_block
+ * prb_retire_current_block()
+ * prb_dispatch_next_block()
+ * |->(BLOCK_STATUS == USER) evaluates to true
+ * 5.1) Since block-0 is currently in-use, we just freeze the queue.
+ * 6) Now there are two cases:
+ * 6.1) Link goes idle right after the queue is frozen.
+ * But remember, the last open_block() refreshed the timer.
+ * When this timer expires,it will refresh itself so that we can
+ * re-open block-0 in near future.
+ * 6.2) Link is busy and keeps on receiving packets. This is a simple
+ * case and __packet_lookup_frame_in_block will check if block-0
+ * is free and can now be re-used.
+ */
+static inline void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
+ struct packet_sock *po)
+{
+ pkc->reset_pending_on_curr_blk = 1;
+ po->stats_u.stats3.tp_freeze_q_cnt++;
+}
+
+#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
+
+/*
+ * If the next block is free then we will dispatch it
+ * and return a good offset.
+ * Else, we will freeze the queue.
+ * So, caller must check the return value.
+ */
+static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
+ struct packet_sock *po)
+{
+ struct tpacket_block_desc *pbd;
+
+ smp_rmb();
+
+ /* 1. Get current block num */
+ pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
+
+ /* 2. If this block is currently in_use then freeze the queue */
+ if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
+ prb_freeze_queue(pkc, po);
+ return NULL;
+ }
+
+ /*
+ * 3.
+ * open this block and return the offset where the first packet
+ * needs to get stored.
+ */
+ prb_open_block(pkc, pbd);
+ return (void *)pkc->nxt_offset;
+}
+
+static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
+ struct packet_sock *po, unsigned int status)
+{
+ struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
+
+ /* retire/close the current block */
+ if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
+ /*
+ * Plug the case where copy_bits() is in progress on
+ * cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't
+ * have space to copy the pkt in the current block and
+ * called prb_retire_current_block()
+ *
+ * We don't need to worry about the TMO case because
+ * the timer-handler already handled this case.
+ */
+ if (!(status & TP_STATUS_BLK_TMO)) {
+ while (atomic_read(&pkc->blk_fill_in_prog)) {
+ /* Waiting for skb_copy_bits to finish... */
+ cpu_relax();
+ }
+ }
+ prb_close_block(pkc, pbd, po, status);
+ return;
+ }
+
+ WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd);
+ dump_stack();
+ BUG();
+}
+
+static inline int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
+ struct tpacket_block_desc *pbd)
+{
+ return TP_STATUS_USER & BLOCK_STATUS(pbd);
+}
+
+static inline int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
+{
+ return pkc->reset_pending_on_curr_blk;
+}
+
+static inline void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
+{
+ struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
+ atomic_dec(&pkc->blk_fill_in_prog);
+}
+
+static inline void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
+ struct tpacket3_hdr *ppd)
+{
+ ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb);
+}
+
+static inline void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
+ struct tpacket3_hdr *ppd)
+{
+ ppd->hv1.tp_rxhash = 0;
+}
+
+static inline void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
+ struct tpacket3_hdr *ppd)
+{
+ if (vlan_tx_tag_present(pkc->skb)) {
+ ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb);
+ ppd->tp_status = TP_STATUS_VLAN_VALID;
+ } else {
+ ppd->hv1.tp_vlan_tci = ppd->tp_status = 0;
+ }
+}
+
+static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
+ struct tpacket3_hdr *ppd)
+{
+ prb_fill_vlan_info(pkc, ppd);
+
+ if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
+ prb_fill_rxhash(pkc, ppd);
+ else
+ prb_clear_rxhash(pkc, ppd);
+}
+
+static inline void prb_fill_curr_block(char *curr,
+ struct tpacket_kbdq_core *pkc,
+ struct tpacket_block_desc *pbd,
+ unsigned int len)
+{
+ struct tpacket3_hdr *ppd;
+
+ ppd = (struct tpacket3_hdr *)curr;
+ ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
+ pkc->prev = curr;
+ pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
+ BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
+ BLOCK_NUM_PKTS(pbd) += 1;
+ atomic_inc(&pkc->blk_fill_in_prog);
+ prb_run_all_ft_ops(pkc, ppd);
+}
+
+/* Assumes caller has the sk->rx_queue.lock */
+static void *__packet_lookup_frame_in_block(struct packet_sock *po,
+ struct sk_buff *skb,
+ int status,
+ unsigned int len
+ )
+{
+ struct tpacket_kbdq_core *pkc;
+ struct tpacket_block_desc *pbd;
+ char *curr, *end;
+
+ pkc = GET_PBDQC_FROM_RB(((struct packet_ring_buffer *)&po->rx_ring));
+ pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
+
+ /* Queue is frozen when user space is lagging behind */
+ if (prb_queue_frozen(pkc)) {
+ /*
+ * Check if that last block which caused the queue to freeze,
+ * is still in_use by user-space.
+ */
+ if (prb_curr_blk_in_use(pkc, pbd)) {
+ /* Can't record this packet */
+ return NULL;
+ } else {
+ /*
+ * Ok, the block was released by user-space.
+ * Now let's open that block.
+ * opening a block also thaws the queue.
+ * Thawing is a side effect.
+ */
+ prb_open_block(pkc, pbd);
+ }
+ }
+
+ smp_mb();
+ curr = pkc->nxt_offset;
+ pkc->skb = skb;
+ end = (char *) ((char *)pbd + pkc->kblk_size);
+
+ /* first try the current block */
+ if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
+ prb_fill_curr_block(curr, pkc, pbd, len);
+ return (void *)curr;
+ }
+
+ /* Ok, close the current block */
+ prb_retire_current_block(pkc, po, 0);
+
+ /* Now, try to dispatch the next block */
+ curr = (char *)prb_dispatch_next_block(pkc, po);
+ if (curr) {
+ pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
+ prb_fill_curr_block(curr, pkc, pbd, len);
+ return (void *)curr;
+ }
+
+ /*
+ * No free blocks are available.user_space hasn't caught up yet.
+ * Queue was just frozen and now this packet will get dropped.
+ */
+ return NULL;
+}
+
+static inline void *packet_current_rx_frame(struct packet_sock *po,
+ struct sk_buff *skb,
+ int status, unsigned int len)
+{
+ char *curr = NULL;
+ switch (po->tp_version) {
+ case TPACKET_V1:
+ case TPACKET_V2:
+ curr = packet_lookup_frame(po, &po->rx_ring,
+ po->rx_ring.head, status);
+ return curr;
+ case TPACKET_V3:
+ return __packet_lookup_frame_in_block(po, skb, status, len);
+ default:
+ WARN(1, "TPACKET version not supported\n");
+ BUG();
+ return 0;
+ }
+}
+
+static inline void *prb_lookup_block(struct packet_sock *po,
+ struct packet_ring_buffer *rb,
+ unsigned int previous,
+ int status)
+{
+ struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
+ struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous);
+
+ if (status != BLOCK_STATUS(pbd))
+ return NULL;
+ return pbd;
+}
+
+static inline int prb_previous_blk_num(struct packet_ring_buffer *rb)
+{
+ unsigned int prev;
+ if (rb->prb_bdqc.kactive_blk_num)
+ prev = rb->prb_bdqc.kactive_blk_num-1;
+ else
+ prev = rb->prb_bdqc.knum_blocks-1;
+ return prev;
+}
+
+/* Assumes caller has held the rx_queue.lock */
+static inline void *__prb_previous_block(struct packet_sock *po,
+ struct packet_ring_buffer *rb,
+ int status)
+{
+ unsigned int previous = prb_previous_blk_num(rb);
+ return prb_lookup_block(po, rb, previous, status);
+}
+
+static inline void *packet_previous_rx_frame(struct packet_sock *po,
+ struct packet_ring_buffer *rb,
+ int status)
+{
+ if (po->tp_version <= TPACKET_V2)
+ return packet_previous_frame(po, rb, status);
+
+ return __prb_previous_block(po, rb, status);
+}
+
+static inline void packet_increment_rx_head(struct packet_sock *po,
+ struct packet_ring_buffer *rb)
+{
+ switch (po->tp_version) {
+ case TPACKET_V1:
+ case TPACKET_V2:
+ return packet_increment_head(rb);
+ case TPACKET_V3:
+ default:
+ WARN(1, "TPACKET version not supported.\n");
+ BUG();
+ return;
+ }
+}
+
static inline void *packet_previous_frame(struct packet_sock *po,
struct packet_ring_buffer *rb,
int status)
@@ -961,7 +1720,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
return 0;
drop_n_acct:
- po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
+ spin_lock(&sk->sk_receive_queue.lock);
+ po->stats.tp_drops++;
+ atomic_inc(&sk->sk_drops);
+ spin_unlock(&sk->sk_receive_queue.lock);
drop_n_restore:
if (skb_head != skb->data && skb_shared(skb)) {
@@ -982,12 +1744,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
union {
struct tpacket_hdr *h1;
struct tpacket2_hdr *h2;
+ struct tpacket3_hdr *h3;
void *raw;
} h;
u8 *skb_head = skb->data;
int skb_len = skb->len;
unsigned int snaplen, res;
- unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
+ unsigned long status = TP_STATUS_USER;
unsigned short macoff, netoff, hdrlen;
struct sk_buff *copy_skb = NULL;
struct timeval tv;
@@ -1033,37 +1796,46 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
po->tp_reserve;
macoff = netoff - maclen;
}
-
- if (macoff + snaplen > po->rx_ring.frame_size) {
- if (po->copy_thresh &&
- atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
- (unsigned)sk->sk_rcvbuf) {
- if (skb_shared(skb)) {
- copy_skb = skb_clone(skb, GFP_ATOMIC);
- } else {
- copy_skb = skb_get(skb);
- skb_head = skb->data;
+ if (po->tp_version <= TPACKET_V2) {
+ if (macoff + snaplen > po->rx_ring.frame_size) {
+ if (po->copy_thresh &&
+ atomic_read(&sk->sk_rmem_alloc) + skb->truesize
+ < (unsigned)sk->sk_rcvbuf) {
+ if (skb_shared(skb)) {
+ copy_skb = skb_clone(skb, GFP_ATOMIC);
+ } else {
+ copy_skb = skb_get(skb);
+ skb_head = skb->data;
+ }
+ if (copy_skb)
+ skb_set_owner_r(copy_skb, sk);
}
- if (copy_skb)
- skb_set_owner_r(copy_skb, sk);
+ snaplen = po->rx_ring.frame_size - macoff;
+ if ((int)snaplen < 0)
+ snaplen = 0;
}
- snaplen = po->rx_ring.frame_size - macoff;
- if ((int)snaplen < 0)
- snaplen = 0;
}
-
spin_lock(&sk->sk_receive_queue.lock);
- h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
+ h.raw = packet_current_rx_frame(po, skb,
+ TP_STATUS_KERNEL, (macoff+snaplen));
if (!h.raw)
goto ring_is_full;
- packet_increment_head(&po->rx_ring);
+ if (po->tp_version <= TPACKET_V2) {
+ packet_increment_rx_head(po, &po->rx_ring);
+ /*
+ * LOSING will be reported till you read the stats,
+ * because it's COR - Clear On Read.
+ * Anyways, moving it for V1/V2 only as V3 doesn't need this
+ * at packet level.
+ */
+ if (po->stats.tp_drops)
+ status |= TP_STATUS_LOSING;
+ }
po->stats.tp_packets++;
if (copy_skb) {
status |= TP_STATUS_COPY;
__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
}
- if (!po->stats.tp_drops)
- status &= ~TP_STATUS_LOSING;
spin_unlock(&sk->sk_receive_queue.lock);
skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
@@ -1114,6 +1886,29 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
h.h2->tp_padding = 0;
hdrlen = sizeof(*h.h2);
break;
+ case TPACKET_V3:
+ /* tp_nxt_offset,vlan are already populated above.
+ * So DONT clear those fields here
+ */
+ h.h3->tp_status |= status;
+ h.h3->tp_len = skb->len;
+ h.h3->tp_snaplen = snaplen;
+ h.h3->tp_mac = macoff;
+ h.h3->tp_net = netoff;
+ if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
+ && shhwtstamps->syststamp.tv64)
+ ts = ktime_to_timespec(shhwtstamps->syststamp);
+ else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
+ && shhwtstamps->hwtstamp.tv64)
+ ts = ktime_to_timespec(shhwtstamps->hwtstamp);
+ else if (skb->tstamp.tv64)
+ ts = ktime_to_timespec(skb->tstamp);
+ else
+ getnstimeofday(&ts);
+ h.h3->tp_sec = ts.tv_sec;
+ h.h3->tp_nsec = ts.tv_nsec;
+ hdrlen = sizeof(*h.h3);
+ break;
default:
BUG();
}
@@ -1134,13 +1929,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
{
u8 *start, *end;
- end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
- for (start = h.raw; start < end; start += PAGE_SIZE)
- flush_dcache_page(pgv_to_page(start));
+ if (po->tp_version <= TPACKET_V2) {
+ end = (u8 *)PAGE_ALIGN((unsigned long)h.raw
+ + macoff + snaplen);
+ for (start = h.raw; start < end; start += PAGE_SIZE)
+ flush_dcache_page(pgv_to_page(start));
+ }
smp_wmb();
}
#endif
- __packet_set_status(po, h.raw, status);
+ if (po->tp_version <= TPACKET_V2)
+ __packet_set_status(po, h.raw, status);
+ else
+ prb_clear_blk_fill_status(&po->rx_ring);
sk->sk_data_ready(sk, 0);
@@ -1167,8 +1968,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
struct packet_sock *po = pkt_sk(skb->sk);
void *ph;
- BUG_ON(skb == NULL);
-
if (likely(po->tx_ring.pg_vec)) {
ph = skb_shinfo(skb)->destructor_arg;
BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
@@ -1631,7 +2430,7 @@ static int packet_release(struct socket *sock)
struct sock *sk = sock->sk;
struct packet_sock *po;
struct net *net;
- struct tpacket_req req;
+ union tpacket_req_u req_u;
if (!sk)
return 0;
@@ -1654,13 +2453,13 @@ static int packet_release(struct socket *sock)
packet_flush_mclist(sk);
- memset(&req, 0, sizeof(req));
+ memset(&req_u, 0, sizeof(req_u));
if (po->rx_ring.pg_vec)
- packet_set_ring(sk, &req, 1, 0);
+ packet_set_ring(sk, &req_u, 1, 0);
if (po->tx_ring.pg_vec)
- packet_set_ring(sk, &req, 1, 1);
+ packet_set_ring(sk, &req_u, 1, 1);
fanout_release(sk);
@@ -2280,15 +3079,27 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
case PACKET_RX_RING:
case PACKET_TX_RING:
{
- struct tpacket_req req;
+ union tpacket_req_u req_u;
+ int len;
- if (optlen < sizeof(req))
+ switch (po->tp_version) {
+ case TPACKET_V1:
+ case TPACKET_V2:
+ len = sizeof(req_u.req);
+ break;
+ case TPACKET_V3:
+ default:
+ len = sizeof(req_u.req3);
+ break;
+ }
+ if (optlen < len)
return -EINVAL;
if (pkt_sk(sk)->has_vnet_hdr)
return -EINVAL;
- if (copy_from_user(&req, optval, sizeof(req)))
+ if (copy_from_user(&req_u.req, optval, len))
return -EFAULT;
- return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
+ return packet_set_ring(sk, &req_u, 0,
+ optname == PACKET_TX_RING);
}
case PACKET_COPY_THRESH:
{
@@ -2315,6 +3126,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
switch (val) {
case TPACKET_V1:
case TPACKET_V2:
+ case TPACKET_V3:
po->tp_version = val;
return 0;
default:
@@ -2424,6 +3236,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
struct packet_sock *po = pkt_sk(sk);
void *data;
struct tpacket_stats st;
+ union tpacket_stats_u st_u;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -2436,15 +3249,27 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case PACKET_STATISTICS:
- if (len > sizeof(struct tpacket_stats))
- len = sizeof(struct tpacket_stats);
+ if (po->tp_version == TPACKET_V3) {
+ len = sizeof(struct tpacket_stats_v3);
+ } else {
+ if (len > sizeof(struct tpacket_stats))
+ len = sizeof(struct tpacket_stats);
+ }
spin_lock_bh(&sk->sk_receive_queue.lock);
- st = po->stats;
+ if (po->tp_version == TPACKET_V3) {
+ memcpy(&st_u.stats3, &po->stats,
+ sizeof(struct tpacket_stats));
+ st_u.stats3.tp_freeze_q_cnt =
+ po->stats_u.stats3.tp_freeze_q_cnt;
+ st_u.stats3.tp_packets += po->stats.tp_drops;
+ data = &st_u.stats3;
+ } else {
+ st = po->stats;
+ st.tp_packets += st.tp_drops;
+ data = &st;
+ }
memset(&po->stats, 0, sizeof(st));
spin_unlock_bh(&sk->sk_receive_queue.lock);
- st.tp_packets += st.tp_drops;
-
- data = &st;
break;
case PACKET_AUXDATA:
if (len > sizeof(int))
@@ -2485,6 +3310,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
case TPACKET_V2:
val = sizeof(struct tpacket2_hdr);
break;
+ case TPACKET_V3:
+ val = sizeof(struct tpacket3_hdr);
+ break;
default:
return -EINVAL;
}
@@ -2641,7 +3469,8 @@ static unsigned int packet_poll(struct file *file, struct socket *sock,
spin_lock_bh(&sk->sk_receive_queue.lock);
if (po->rx_ring.pg_vec) {
- if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
+ if (!packet_previous_rx_frame(po, &po->rx_ring,
+ TP_STATUS_KERNEL))
mask |= POLLIN | POLLRDNORM;
}
spin_unlock_bh(&sk->sk_receive_queue.lock);
@@ -2760,7 +3589,7 @@ out_free_pgvec:
goto out;
}
-static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
+static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
int closing, int tx_ring)
{
struct pgv *pg_vec = NULL;
@@ -2769,7 +3598,15 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
struct packet_ring_buffer *rb;
struct sk_buff_head *rb_queue;
__be16 num;
- int err;
+ int err = -EINVAL;
+ /* Added to avoid minimal code churn */
+ struct tpacket_req *req = &req_u->req;
+
+ /* Opening a Tx-ring is NOT supported in TPACKET_V3 */
+ if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
+ WARN(1, "Tx-ring is not supported.\n");
+ goto out;
+ }
rb = tx_ring ? &po->tx_ring : &po->rx_ring;
rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
@@ -2795,6 +3632,9 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
case TPACKET_V2:
po->tp_hdrlen = TPACKET2_HDRLEN;
break;
+ case TPACKET_V3:
+ po->tp_hdrlen = TPACKET3_HDRLEN;
+ break;
}
err = -EINVAL;
@@ -2820,6 +3660,17 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
pg_vec = alloc_pg_vec(req, order);
if (unlikely(!pg_vec))
goto out;
+ switch (po->tp_version) {
+ case TPACKET_V3:
+ /* Transmit path is not supported. We checked
+ * it above but just being paranoid
+ */
+ if (!tx_ring)
+ init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring);
+ break;
+ default:
+ break;
+ }
}
/* Done */
else {
@@ -2872,7 +3723,11 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
register_prot_hook(sk);
}
spin_unlock(&po->bind_lock);
-
+ if (closing && (po->tp_version > TPACKET_V2)) {
+ /* Because we don't support block-based V3 on tx-ring */
+ if (!tx_ring)
+ prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue);
+ }
release_sock(sk);
if (pg_vec)
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index c6fffd9..bf10ea8 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -480,7 +480,7 @@ int __init_or_module phonet_proto_register(unsigned int protocol,
if (proto_tab[protocol])
err = -EBUSY;
else
- rcu_assign_pointer(proto_tab[protocol], pp);
+ RCU_INIT_POINTER(proto_tab[protocol], pp);
mutex_unlock(&proto_tab_lock);
return err;
@@ -491,7 +491,7 @@ void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp)
{
mutex_lock(&proto_tab_lock);
BUG_ON(proto_tab[protocol] != pp);
- rcu_assign_pointer(proto_tab[protocol], NULL);
+ RCU_INIT_POINTER(proto_tab[protocol], NULL);
mutex_unlock(&proto_tab_lock);
synchronize_rcu();
proto_unregister(pp->prot);
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index d2df8f3..c582761 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -276,7 +276,7 @@ static void phonet_route_autodel(struct net_device *dev)
mutex_lock(&pnn->routes.lock);
for (i = 0; i < 64; i++)
if (dev == pnn->routes.table[i]) {
- rcu_assign_pointer(pnn->routes.table[i], NULL);
+ RCU_INIT_POINTER(pnn->routes.table[i], NULL);
set_bit(i, deleted);
}
mutex_unlock(&pnn->routes.lock);
@@ -390,7 +390,7 @@ int phonet_route_add(struct net_device *dev, u8 daddr)
daddr = daddr >> 2;
mutex_lock(&routes->lock);
if (routes->table[daddr] == NULL) {
- rcu_assign_pointer(routes->table[daddr], dev);
+ RCU_INIT_POINTER(routes->table[daddr], dev);
dev_hold(dev);
err = 0;
}
@@ -406,7 +406,7 @@ int phonet_route_del(struct net_device *dev, u8 daddr)
daddr = daddr >> 2;
mutex_lock(&routes->lock);
if (dev == routes->table[daddr])
- rcu_assign_pointer(routes->table[daddr], NULL);
+ RCU_INIT_POINTER(routes->table[daddr], NULL);
else
dev = NULL;
mutex_unlock(&routes->lock);
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index ab07711..676d18d 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -679,7 +679,7 @@ int pn_sock_bind_res(struct sock *sk, u8 res)
mutex_lock(&resource_mutex);
if (pnres.sk[res] == NULL) {
sock_hold(sk);
- rcu_assign_pointer(pnres.sk[res], sk);
+ RCU_INIT_POINTER(pnres.sk[res], sk);
ret = 0;
}
mutex_unlock(&resource_mutex);
@@ -695,7 +695,7 @@ int pn_sock_unbind_res(struct sock *sk, u8 res)
mutex_lock(&resource_mutex);
if (pnres.sk[res] == sk) {
- rcu_assign_pointer(pnres.sk[res], NULL);
+ RCU_INIT_POINTER(pnres.sk[res], NULL);
ret = 0;
}
mutex_unlock(&resource_mutex);
@@ -714,7 +714,7 @@ void pn_sock_unbind_all_res(struct sock *sk)
mutex_lock(&resource_mutex);
for (res = 0; res < 256; res++) {
if (pnres.sk[res] == sk) {
- rcu_assign_pointer(pnres.sk[res], NULL);
+ RCU_INIT_POINTER(pnres.sk[res], NULL);
match++;
}
}
diff --git a/net/rds/Kconfig b/net/rds/Kconfig
index ec753b3..4cf6dc7 100644
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig
@@ -9,6 +9,7 @@ config RDS
config RDS_RDMA
tristate "RDS over Infiniband and iWARP"
+ select LLIST
depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS
---help---
Allow RDS to use Infiniband and iWARP as a transport.
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 819c35a..e8fdb17 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -33,10 +33,10 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/rculist.h>
+#include <linux/llist.h>
#include "rds.h"
#include "ib.h"
-#include "xlist.h"
static DEFINE_PER_CPU(unsigned long, clean_list_grace);
#define CLEAN_LIST_BUSY_BIT 0
@@ -49,7 +49,7 @@ struct rds_ib_mr {
struct rds_ib_mr_pool *pool;
struct ib_fmr *fmr;
- struct xlist_head xlist;
+ struct llist_node llnode;
/* unmap_list is for freeing */
struct list_head unmap_list;
@@ -71,9 +71,9 @@ struct rds_ib_mr_pool {
atomic_t item_count; /* total # of MRs */
atomic_t dirty_count; /* # dirty of MRs */
- struct xlist_head drop_list; /* MRs that have reached their max_maps limit */
- struct xlist_head free_list; /* unused MRs */
- struct xlist_head clean_list; /* global unused & unamapped MRs */
+ struct llist_head drop_list; /* MRs that have reached their max_maps limit */
+ struct llist_head free_list; /* unused MRs */
+ struct llist_head clean_list; /* global unused & unamapped MRs */
wait_queue_head_t flush_wait;
atomic_t free_pinned; /* memory pinned by free MRs */
@@ -220,9 +220,9 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
if (!pool)
return ERR_PTR(-ENOMEM);
- INIT_XLIST_HEAD(&pool->free_list);
- INIT_XLIST_HEAD(&pool->drop_list);
- INIT_XLIST_HEAD(&pool->clean_list);
+ init_llist_head(&pool->free_list);
+ init_llist_head(&pool->drop_list);
+ init_llist_head(&pool->clean_list);
mutex_init(&pool->flush_lock);
init_waitqueue_head(&pool->flush_wait);
INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
@@ -260,26 +260,18 @@ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
kfree(pool);
}
-static void refill_local(struct rds_ib_mr_pool *pool, struct xlist_head *xl,
- struct rds_ib_mr **ibmr_ret)
-{
- struct xlist_head *ibmr_xl;
- ibmr_xl = xlist_del_head_fast(xl);
- *ibmr_ret = list_entry(ibmr_xl, struct rds_ib_mr, xlist);
-}
-
static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool)
{
struct rds_ib_mr *ibmr = NULL;
- struct xlist_head *ret;
+ struct llist_node *ret;
unsigned long *flag;
preempt_disable();
flag = &__get_cpu_var(clean_list_grace);
set_bit(CLEAN_LIST_BUSY_BIT, flag);
- ret = xlist_del_head(&pool->clean_list);
+ ret = llist_del_first(&pool->clean_list);
if (ret)
- ibmr = list_entry(ret, struct rds_ib_mr, xlist);
+ ibmr = llist_entry(ret, struct rds_ib_mr, llnode);
clear_bit(CLEAN_LIST_BUSY_BIT, flag);
preempt_enable();
@@ -529,46 +521,44 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr
}
/*
- * given an xlist of mrs, put them all into the list_head for more processing
+ * given an llist of mrs, put them all into the list_head for more processing
*/
-static void xlist_append_to_list(struct xlist_head *xlist, struct list_head *list)
+static void llist_append_to_list(struct llist_head *llist, struct list_head *list)
{
struct rds_ib_mr *ibmr;
- struct xlist_head splice;
- struct xlist_head *cur;
- struct xlist_head *next;
-
- splice.next = NULL;
- xlist_splice(xlist, &splice);
- cur = splice.next;
- while (cur) {
- next = cur->next;
- ibmr = list_entry(cur, struct rds_ib_mr, xlist);
+ struct llist_node *node;
+ struct llist_node *next;
+
+ node = llist_del_all(llist);
+ while (node) {
+ next = node->next;
+ ibmr = llist_entry(node, struct rds_ib_mr, llnode);
list_add_tail(&ibmr->unmap_list, list);
- cur = next;
+ node = next;
}
}
/*
- * this takes a list head of mrs and turns it into an xlist of clusters.
- * each cluster has an xlist of MR_CLUSTER_SIZE mrs that are ready for
- * reuse.
+ * this takes a list head of mrs and turns it into linked llist nodes
+ * of clusters. Each cluster has linked llist nodes of
+ * MR_CLUSTER_SIZE mrs that are ready for reuse.
*/
-static void list_append_to_xlist(struct rds_ib_mr_pool *pool,
- struct list_head *list, struct xlist_head *xlist,
- struct xlist_head **tail_ret)
+static void list_to_llist_nodes(struct rds_ib_mr_pool *pool,
+ struct list_head *list,
+ struct llist_node **nodes_head,
+ struct llist_node **nodes_tail)
{
struct rds_ib_mr *ibmr;
- struct xlist_head *cur_mr = xlist;
- struct xlist_head *tail_mr = NULL;
+ struct llist_node *cur = NULL;
+ struct llist_node **next = nodes_head;
list_for_each_entry(ibmr, list, unmap_list) {
- tail_mr = &ibmr->xlist;
- tail_mr->next = NULL;
- cur_mr->next = tail_mr;
- cur_mr = tail_mr;
+ cur = &ibmr->llnode;
+ *next = cur;
+ next = &cur->next;
}
- *tail_ret = tail_mr;
+ *next = NULL;
+ *nodes_tail = cur;
}
/*
@@ -581,8 +571,8 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
int free_all, struct rds_ib_mr **ibmr_ret)
{
struct rds_ib_mr *ibmr, *next;
- struct xlist_head clean_xlist;
- struct xlist_head *clean_tail;
+ struct llist_node *clean_nodes;
+ struct llist_node *clean_tail;
LIST_HEAD(unmap_list);
LIST_HEAD(fmr_list);
unsigned long unpinned = 0;
@@ -603,7 +593,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
prepare_to_wait(&pool->flush_wait, &wait,
TASK_UNINTERRUPTIBLE);
- if (xlist_empty(&pool->clean_list))
+ if (llist_empty(&pool->clean_list))
schedule();
ibmr = rds_ib_reuse_fmr(pool);
@@ -628,10 +618,10 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
/* Get the list of all MRs to be dropped. Ordering matters -
* we want to put drop_list ahead of free_list.
*/
- xlist_append_to_list(&pool->drop_list, &unmap_list);
- xlist_append_to_list(&pool->free_list, &unmap_list);
+ llist_append_to_list(&pool->drop_list, &unmap_list);
+ llist_append_to_list(&pool->free_list, &unmap_list);
if (free_all)
- xlist_append_to_list(&pool->clean_list, &unmap_list);
+ llist_append_to_list(&pool->clean_list, &unmap_list);
free_goal = rds_ib_flush_goal(pool, free_all);
@@ -663,22 +653,22 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
if (!list_empty(&unmap_list)) {
/* we have to make sure that none of the things we're about
* to put on the clean list would race with other cpus trying
- * to pull items off. The xlist would explode if we managed to
+ * to pull items off. The llist would explode if we managed to
* remove something from the clean list and then add it back again
- * while another CPU was spinning on that same item in xlist_del_head.
+ * while another CPU was spinning on that same item in llist_del_first.
*
- * This is pretty unlikely, but just in case wait for an xlist grace period
+ * This is pretty unlikely, but just in case wait for an llist grace period
* here before adding anything back into the clean list.
*/
wait_clean_list_grace();
- list_append_to_xlist(pool, &unmap_list, &clean_xlist, &clean_tail);
+ list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail);
if (ibmr_ret)
- refill_local(pool, &clean_xlist, ibmr_ret);
+ *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode);
- /* refill_local may have emptied our list */
- if (!xlist_empty(&clean_xlist))
- xlist_add(clean_xlist.next, clean_tail, &pool->clean_list);
+ /* more than one entry in llist nodes */
+ if (clean_nodes->next)
+ llist_add_batch(clean_nodes->next, clean_tail, &pool->clean_list);
}
@@ -711,9 +701,9 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
/* Return it to the pool's free list */
if (ibmr->remap_count >= pool->fmr_attr.max_maps)
- xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->drop_list);
+ llist_add(&ibmr->llnode, &pool->drop_list);
else
- xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->free_list);
+ llist_add(&ibmr->llnode, &pool->free_list);
atomic_add(ibmr->sg_len, &pool->free_pinned);
atomic_inc(&pool->dirty_count);
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 8b77edb..4e1de17 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -84,7 +84,8 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
struct list_head *unmap_list,
- struct list_head *kill_list);
+ struct list_head *kill_list,
+ int *unpinned);
static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id)
@@ -499,7 +500,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
LIST_HEAD(unmap_list);
LIST_HEAD(kill_list);
unsigned long flags;
- unsigned int nfreed = 0, ncleaned = 0, free_goal;
+ unsigned int nfreed = 0, ncleaned = 0, unpinned = 0, free_goal;
int ret = 0;
rds_iw_stats_inc(s_iw_rdma_mr_pool_flush);
@@ -524,7 +525,8 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
* will be destroyed by the unmap function.
*/
if (!list_empty(&unmap_list)) {
- ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, &kill_list);
+ ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list,
+ &kill_list, &unpinned);
/* If we've been asked to destroy all MRs, move those
* that were simply cleaned to the kill list */
if (free_all)
@@ -548,6 +550,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
spin_unlock_irqrestore(&pool->list_lock, flags);
}
+ atomic_sub(unpinned, &pool->free_pinned);
atomic_sub(ncleaned, &pool->dirty_count);
atomic_sub(nfreed, &pool->item_count);
@@ -828,7 +831,8 @@ static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool,
static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
struct list_head *unmap_list,
- struct list_head *kill_list)
+ struct list_head *kill_list,
+ int *unpinned)
{
struct rds_iw_mapping *mapping, *next;
unsigned int ncleaned = 0;
@@ -855,6 +859,7 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
spin_lock_irqsave(&pool->list_lock, flags);
list_for_each_entry_safe(mapping, next, unmap_list, m_list) {
+ *unpinned += mapping->m_sg.len;
list_move(&mapping->m_list, &laundered);
ncleaned++;
}
diff --git a/net/rds/xlist.h b/net/rds/xlist.h
deleted file mode 100644
index e6b5190..0000000
--- a/net/rds/xlist.h
+++ /dev/null
@@ -1,80 +0,0 @@
-#ifndef _LINUX_XLIST_H
-#define _LINUX_XLIST_H
-
-#include <linux/stddef.h>
-#include <linux/poison.h>
-#include <linux/prefetch.h>
-#include <asm/system.h>
-
-struct xlist_head {
- struct xlist_head *next;
-};
-
-static inline void INIT_XLIST_HEAD(struct xlist_head *list)
-{
- list->next = NULL;
-}
-
-static inline int xlist_empty(struct xlist_head *head)
-{
- return head->next == NULL;
-}
-
-static inline void xlist_add(struct xlist_head *new, struct xlist_head *tail,
- struct xlist_head *head)
-{
- struct xlist_head *cur;
- struct xlist_head *check;
-
- while (1) {
- cur = head->next;
- tail->next = cur;
- check = cmpxchg(&head->next, cur, new);
- if (check == cur)
- break;
- }
-}
-
-static inline struct xlist_head *xlist_del_head(struct xlist_head *head)
-{
- struct xlist_head *cur;
- struct xlist_head *check;
- struct xlist_head *next;
-
- while (1) {
- cur = head->next;
- if (!cur)
- goto out;
-
- next = cur->next;
- check = cmpxchg(&head->next, cur, next);
- if (check == cur)
- goto out;
- }
-out:
- return cur;
-}
-
-static inline struct xlist_head *xlist_del_head_fast(struct xlist_head *head)
-{
- struct xlist_head *cur;
-
- cur = head->next;
- if (!cur)
- return NULL;
-
- head->next = cur->next;
- return cur;
-}
-
-static inline void xlist_splice(struct xlist_head *list,
- struct xlist_head *head)
-{
- struct xlist_head *cur;
-
- WARN_ON(head->next);
- cur = xchg(&list->next, NULL);
- head->next = cur;
-}
-
-#endif
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 102fc21..e051398 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -196,8 +196,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
skb2->skb_iif = skb->dev->ifindex;
skb2->dev = dev;
- dev_queue_xmit(skb2);
- err = 0;
+ err = dev_queue_xmit(skb2);
out:
if (err) {
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index be4505e..b014279 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
struct rsvp_filter *f, **fp;
struct rsvp_session *s, **sp;
struct tc_rsvp_pinfo *pinfo = NULL;
- struct nlattr *opt = tca[TCA_OPTIONS-1];
+ struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_RSVP_MAX + 1];
struct tcf_exts e;
unsigned int h1, h2;
@@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
if (err < 0)
return err;
- err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
+ err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
if (err < 0)
return err;
@@ -449,8 +449,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
if (f->handle != handle && handle)
goto errout2;
- if (tb[TCA_RSVP_CLASSID-1]) {
- f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+ if (tb[TCA_RSVP_CLASSID]) {
+ f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
tcf_bind_filter(tp, &f->res, base);
}
@@ -462,7 +462,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
err = -EINVAL;
if (handle)
goto errout2;
- if (tb[TCA_RSVP_DST-1] == NULL)
+ if (tb[TCA_RSVP_DST] == NULL)
goto errout2;
err = -ENOBUFS;
@@ -471,19 +471,19 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
goto errout2;
h2 = 16;
- if (tb[TCA_RSVP_SRC-1]) {
- memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+ if (tb[TCA_RSVP_SRC]) {
+ memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
h2 = hash_src(f->src);
}
- if (tb[TCA_RSVP_PINFO-1]) {
- pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
+ if (tb[TCA_RSVP_PINFO]) {
+ pinfo = nla_data(tb[TCA_RSVP_PINFO]);
f->spi = pinfo->spi;
f->tunnelhdr = pinfo->tunnelhdr;
}
- if (tb[TCA_RSVP_CLASSID-1])
- f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+ if (tb[TCA_RSVP_CLASSID])
+ f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
- dst = nla_data(tb[TCA_RSVP_DST-1]);
+ dst = nla_data(tb[TCA_RSVP_DST]);
h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
err = -ENOMEM;
@@ -642,8 +642,7 @@ nla_put_failure:
return -1;
}
-static struct tcf_proto_ops RSVP_OPS = {
- .next = NULL,
+static struct tcf_proto_ops RSVP_OPS __read_mostly = {
.kind = RSVP_ID,
.classify = rsvp_classify,
.init = rsvp_init,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 2a318f2..b5d56a2 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -112,7 +112,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch)
for (prio = 0; prio < q->bands; prio++) {
struct Qdisc *qdisc = q->queues[prio];
- struct sk_buff *skb = qdisc->dequeue(qdisc);
+ struct sk_buff *skb = qdisc_dequeue_peeked(qdisc);
if (skb) {
qdisc_bstats_update(sch, skb);
sch->q.qlen--;
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 0a833d0..e83c272 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -287,6 +287,12 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
u32 r, slot, salt, sfbhash;
int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+ if (unlikely(sch->q.qlen >= q->limit)) {
+ sch->qstats.overlimits++;
+ q->stats.queuedrop++;
+ goto drop;
+ }
+
if (q->rehash_interval > 0) {
unsigned long limit = q->rehash_time + q->rehash_interval;
@@ -332,12 +338,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
slot ^= 1;
sfb_skb_cb(skb)->hashes[slot] = 0;
- if (unlikely(minqlen >= q->max || sch->q.qlen >= q->limit)) {
+ if (unlikely(minqlen >= q->max)) {
sch->qstats.overlimits++;
- if (minqlen >= q->max)
- q->stats.bucketdrop++;
- else
- q->stats.queuedrop++;
+ q->stats.bucketdrop++;
goto drop;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 4536ee6..4f5510e 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -410,7 +410,12 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* Return Congestion Notification only if we dropped a packet
* from this flow.
*/
- return (qlen != slot->qlen) ? NET_XMIT_CN : NET_XMIT_SUCCESS;
+ if (qlen != slot->qlen)
+ return NET_XMIT_CN;
+
+ /* As we dropped a packet, better let upper stack know this */
+ qdisc_tree_decrease_qlen(sch, 1);
+ return NET_XMIT_SUCCESS;
}
static struct sk_buff *
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index dc16b90..152b5b3 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -282,6 +282,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
asoc->peer.asconf_capable = 1;
asoc->asconf_addr_del_pending = NULL;
asoc->src_out_of_asoc_ok = 0;
+ asoc->new_transport = NULL;
/* Create an input queue. */
sctp_inq_init(&asoc->base.inqueue);
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index a6d27bf..14c2b06 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -917,6 +917,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
* current cwnd).
*/
if (!list_empty(&q->retransmit)) {
+ if (asoc->peer.retran_path->state == SCTP_UNCONFIRMED)
+ goto sctp_flush_out;
if (transport == asoc->peer.retran_path)
goto retran;
@@ -989,6 +991,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
((new_transport->state == SCTP_INACTIVE) ||
(new_transport->state == SCTP_UNCONFIRMED)))
new_transport = asoc->peer.active_path;
+ if (new_transport->state == SCTP_UNCONFIRMED)
+ continue;
/* Change packets if necessary. */
if (new_transport != transport) {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 81db4e3..0121e0a 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3015,6 +3015,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
/* Start the heartbeat timer. */
if (!mod_timer(&peer->hb_timer, sctp_transport_timeout(peer)))
sctp_transport_hold(peer);
+ asoc->new_transport = peer;
break;
case SCTP_PARAM_DEL_IP:
/* ADDIP 4.3 D7) If a request is received to delete the
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 167c880..76388b0 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1689,6 +1689,11 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
case SCTP_CMD_PURGE_ASCONF_QUEUE:
sctp_asconf_queue_teardown(asoc);
break;
+
+ case SCTP_CMD_SET_ASOC:
+ asoc = cmd->obj.asoc;
+ break;
+
default:
pr_warn("Impossible command: %u, %p\n",
cmd->verb, cmd->obj.ptr);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 49b847b..891f5db 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2047,6 +2047,12 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
+ /* Restore association pointer to provide SCTP command interpeter
+ * with a valid context in case it needs to manipulate
+ * the queues */
+ sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC,
+ SCTP_ASOC((struct sctp_association *)asoc));
+
return retval;
nomem:
@@ -3612,6 +3618,11 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
*/
asconf_ack->dest = chunk->source;
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack));
+ if (asoc->new_transport) {
+ sctp_sf_heartbeat(ep, asoc, type, asoc->new_transport,
+ commands);
+ ((struct sctp_association *)asoc)->new_transport = NULL;
+ }
return SCTP_DISPOSITION_CONSUME;
}
diff --git a/net/socket.c b/net/socket.c
index b1cbbcd..2877647 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1871,8 +1871,14 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how)
#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
+struct used_address {
+ struct sockaddr_storage name;
+ unsigned int name_len;
+};
+
static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
- struct msghdr *msg_sys, unsigned flags, int nosec)
+ struct msghdr *msg_sys, unsigned flags,
+ struct used_address *used_address)
{
struct compat_msghdr __user *msg_compat =
(struct compat_msghdr __user *)msg;
@@ -1953,8 +1959,30 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
if (sock->file->f_flags & O_NONBLOCK)
msg_sys->msg_flags |= MSG_DONTWAIT;
- err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys,
- total_len);
+ /*
+ * If this is sendmmsg() and current destination address is same as
+ * previously succeeded address, omit asking LSM's decision.
+ * used_address->name_len is initialized to UINT_MAX so that the first
+ * destination address never matches.
+ */
+ if (used_address && msg_sys->msg_name &&
+ used_address->name_len == msg_sys->msg_namelen &&
+ !memcmp(&used_address->name, msg_sys->msg_name,
+ used_address->name_len)) {
+ err = sock_sendmsg_nosec(sock, msg_sys, total_len);
+ goto out_freectl;
+ }
+ err = sock_sendmsg(sock, msg_sys, total_len);
+ /*
+ * If this is sendmmsg() and sending to current destination address was
+ * successful, remember it.
+ */
+ if (used_address && err >= 0) {
+ used_address->name_len = msg_sys->msg_namelen;
+ if (msg_sys->msg_name)
+ memcpy(&used_address->name, msg_sys->msg_name,
+ used_address->name_len);
+ }
out_freectl:
if (ctl_buf != ctl)
@@ -1979,7 +2007,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
if (!sock)
goto out;
- err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0);
+ err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
fput_light(sock->file, fput_needed);
out:
@@ -1998,6 +2026,10 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
struct mmsghdr __user *entry;
struct compat_mmsghdr __user *compat_entry;
struct msghdr msg_sys;
+ struct used_address used_address;
+
+ if (vlen > UIO_MAXIOV)
+ vlen = UIO_MAXIOV;
datagrams = 0;
@@ -2005,27 +2037,22 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
if (!sock)
return err;
- err = sock_error(sock->sk);
- if (err)
- goto out_put;
-
+ used_address.name_len = UINT_MAX;
entry = mmsg;
compat_entry = (struct compat_mmsghdr __user *)mmsg;
+ err = 0;
while (datagrams < vlen) {
- /*
- * No need to ask LSM for more than the first datagram.
- */
if (MSG_CMSG_COMPAT & flags) {
err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
- &msg_sys, flags, datagrams);
+ &msg_sys, flags, &used_address);
if (err < 0)
break;
err = __put_user(err, &compat_entry->msg_len);
++compat_entry;
} else {
err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
- &msg_sys, flags, datagrams);
+ &msg_sys, flags, &used_address);
if (err < 0)
break;
err = put_user(err, &entry->msg_len);
@@ -2037,29 +2064,11 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
++datagrams;
}
-out_put:
fput_light(sock->file, fput_needed);
- if (err == 0)
- return datagrams;
-
- if (datagrams != 0) {
- /*
- * We may send less entries than requested (vlen) if the
- * sock is non blocking...
- */
- if (err != -EAGAIN) {
- /*
- * ... or if sendmsg returns an error after we
- * send some datagrams, where we record the
- * error to return on the next call or if the
- * app asks about it using getsockopt(SO_ERROR).
- */
- sock->sk->sk_err = -err;
- }
-
+ /* We only return an error if no datagrams were able to be sent */
+ if (datagrams != 0)
return datagrams;
- }
return err;
}
@@ -2463,7 +2472,7 @@ int sock_register(const struct net_proto_family *ops)
lockdep_is_held(&net_family_lock)))
err = -EEXIST;
else {
- rcu_assign_pointer(net_families[ops->family], ops);
+ RCU_INIT_POINTER(net_families[ops->family], ops);
err = 0;
}
spin_unlock(&net_family_lock);
@@ -2491,7 +2500,7 @@ void sock_unregister(int family)
BUG_ON(family < 0 || family >= NPROTO);
spin_lock(&net_family_lock);
- rcu_assign_pointer(net_families[family], NULL);
+ RCU_INIT_POINTER(net_families[family], NULL);
spin_unlock(&net_family_lock);
synchronize_rcu();
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 364eb45..d413275 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -122,7 +122,7 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags))
return;
gss_get_ctx(ctx);
- rcu_assign_pointer(gss_cred->gc_ctx, ctx);
+ RCU_INIT_POINTER(gss_cred->gc_ctx, ctx);
set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
smp_mb__before_clear_bit();
clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags);
@@ -970,7 +970,7 @@ gss_destroy_nullcred(struct rpc_cred *cred)
struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
struct gss_cl_ctx *ctx = gss_cred->gc_ctx;
- rcu_assign_pointer(gss_cred->gc_ctx, NULL);
+ RCU_INIT_POINTER(gss_cred->gc_ctx, NULL);
call_rcu(&cred->cr_rcu, gss_free_cred_callback);
if (ctx)
gss_put_ctx(ctx);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 9b6a4d1..f4385e4 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -187,6 +187,7 @@ EXPORT_SYMBOL_GPL(xprt_load_transport);
/**
* xprt_reserve_xprt - serialize write access to transports
* @task: task that is requesting access to the transport
+ * @xprt: pointer to the target transport
*
* This prevents mixing the payload of separate requests, and prevents
* transport connects from colliding with writes. No congestion control
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 759b318..28908f5 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -39,6 +39,7 @@
#include "link.h"
#include "port.h"
#include "bcast.h"
+#include "name_distr.h"
#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */
@@ -298,14 +299,9 @@ static void bclink_send_nack(struct tipc_node *n_ptr)
msg_set_bcgap_to(msg, n_ptr->bclink.gap_to);
msg_set_bcast_tag(msg, tipc_own_tag);
- if (tipc_bearer_send(&bcbearer->bearer, buf, NULL)) {
- bcl->stats.sent_nacks++;
- buf_discard(buf);
- } else {
- tipc_bearer_schedule(bcl->b_ptr, bcl);
- bcl->proto_msg_queue = buf;
- bcl->stats.bearer_congs++;
- }
+ tipc_bearer_send(&bcbearer->bearer, buf, NULL);
+ bcl->stats.sent_nacks++;
+ buf_discard(buf);
/*
* Ensure we doesn't send another NACK msg to the node
@@ -426,20 +422,28 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
void tipc_bclink_recv_pkt(struct sk_buff *buf)
{
struct tipc_msg *msg = buf_msg(buf);
- struct tipc_node *node = tipc_node_find(msg_prevnode(msg));
+ struct tipc_node *node;
u32 next_in;
u32 seqno;
struct sk_buff *deferred;
- if (unlikely(!node || !tipc_node_is_up(node) || !node->bclink.supported ||
- (msg_mc_netid(msg) != tipc_net_id))) {
- buf_discard(buf);
- return;
- }
+ /* Screen out unwanted broadcast messages */
+
+ if (msg_mc_netid(msg) != tipc_net_id)
+ goto exit;
+
+ node = tipc_node_find(msg_prevnode(msg));
+ if (unlikely(!node))
+ goto exit;
+
+ tipc_node_lock(node);
+ if (unlikely(!node->bclink.supported))
+ goto unlock;
if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) {
+ if (msg_type(msg) != STATE_MSG)
+ goto unlock;
if (msg_destnode(msg) == tipc_own_addr) {
- tipc_node_lock(node);
tipc_bclink_acknowledge(node, msg_bcast_ack(msg));
tipc_node_unlock(node);
spin_lock_bh(&bc_lock);
@@ -449,18 +453,18 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
msg_bcgap_to(msg));
spin_unlock_bh(&bc_lock);
} else {
+ tipc_node_unlock(node);
tipc_bclink_peek_nack(msg_destnode(msg),
msg_bcast_tag(msg),
msg_bcgap_after(msg),
msg_bcgap_to(msg));
}
- buf_discard(buf);
- return;
+ goto exit;
}
- tipc_node_lock(node);
+ /* Handle in-sequence broadcast message */
+
receive:
- deferred = node->bclink.deferred_head;
next_in = mod(node->bclink.last_in + 1);
seqno = msg_seqno(msg);
@@ -474,7 +478,10 @@ receive:
}
if (likely(msg_isdata(msg))) {
tipc_node_unlock(node);
- tipc_port_recv_mcast(buf, NULL);
+ if (likely(msg_mcast(msg)))
+ tipc_port_recv_mcast(buf, NULL);
+ else
+ buf_discard(buf);
} else if (msg_user(msg) == MSG_BUNDLER) {
bcl->stats.recv_bundles++;
bcl->stats.recv_bundled += msg_msgcnt(msg);
@@ -487,18 +494,22 @@ receive:
bcl->stats.recv_fragmented++;
tipc_node_unlock(node);
tipc_net_route_msg(buf);
+ } else if (msg_user(msg) == NAME_DISTRIBUTOR) {
+ tipc_node_unlock(node);
+ tipc_named_recv(buf);
} else {
tipc_node_unlock(node);
- tipc_net_route_msg(buf);
+ buf_discard(buf);
}
+ buf = NULL;
+ tipc_node_lock(node);
+ deferred = node->bclink.deferred_head;
if (deferred && (buf_seqno(deferred) == mod(next_in + 1))) {
- tipc_node_lock(node);
buf = deferred;
msg = buf_msg(buf);
node->bclink.deferred_head = deferred->next;
goto receive;
}
- return;
} else if (less(next_in, seqno)) {
u32 gap_after = node->bclink.gap_after;
u32 gap_to = node->bclink.gap_to;
@@ -513,6 +524,7 @@ receive:
else if (less(gap_after, seqno) && less(seqno, gap_to))
node->bclink.gap_to = seqno;
}
+ buf = NULL;
if (bclink_ack_allowed(node->bclink.nack_sync)) {
if (gap_to != gap_after)
bclink_send_nack(node);
@@ -520,9 +532,11 @@ receive:
}
} else {
bcl->stats.duplicates++;
- buf_discard(buf);
}
+unlock:
tipc_node_unlock(node);
+exit:
+ buf_discard(buf);
}
u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
@@ -535,10 +549,11 @@ u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
/**
* tipc_bcbearer_send - send a packet through the broadcast pseudo-bearer
*
- * Send through as many bearers as necessary to reach all nodes
- * that support TIPC multicasting.
+ * Send packet over as many bearers as necessary to reach all nodes
+ * that have joined the broadcast link.
*
- * Returns 0 if packet sent successfully, non-zero if not
+ * Returns 0 (packet sent successfully) under all circumstances,
+ * since the broadcast link's pseudo-bearer never blocks
*/
static int tipc_bcbearer_send(struct sk_buff *buf,
@@ -547,7 +562,12 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
{
int bp_index;
- /* Prepare buffer for broadcasting (if first time trying to send it) */
+ /*
+ * Prepare broadcast link message for reliable transmission,
+ * if first time trying to send it;
+ * preparation is skipped for broadcast link protocol messages
+ * since they are sent in an unreliable manner and don't need it
+ */
if (likely(!msg_non_seq(buf_msg(buf)))) {
struct tipc_msg *msg;
@@ -596,18 +616,12 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
}
if (bcbearer->remains_new.count == 0)
- return 0;
+ break; /* all targets reached */
bcbearer->remains = bcbearer->remains_new;
}
- /*
- * Unable to reach all targets (indicate success, since currently
- * there isn't code in place to properly block & unblock the
- * pseudo-bearer used by the broadcast link)
- */
-
- return TIPC_OK;
+ return 0;
}
/**
@@ -667,27 +681,6 @@ void tipc_bcbearer_sort(void)
spin_unlock_bh(&bc_lock);
}
-/**
- * tipc_bcbearer_push - resolve bearer congestion
- *
- * Forces bclink to push out any unsent packets, until all packets are gone
- * or congestion reoccurs.
- * No locks set when function called
- */
-
-void tipc_bcbearer_push(void)
-{
- struct tipc_bearer *b_ptr;
-
- spin_lock_bh(&bc_lock);
- b_ptr = &bcbearer->bearer;
- if (b_ptr->blocked) {
- b_ptr->blocked = 0;
- tipc_bearer_lock_push(b_ptr);
- }
- spin_unlock_bh(&bc_lock);
-}
-
int tipc_bclink_stats(char *buf, const u32 buf_size)
{
@@ -764,7 +757,7 @@ int tipc_bclink_init(void)
bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC);
bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC);
if (!bcbearer || !bclink) {
- warn("Multicast link creation failed, no memory\n");
+ warn("Broadcast link creation failed, no memory\n");
kfree(bcbearer);
bcbearer = NULL;
kfree(bclink);
@@ -775,7 +768,7 @@ int tipc_bclink_init(void)
INIT_LIST_HEAD(&bcbearer->bearer.cong_links);
bcbearer->bearer.media = &bcbearer->media;
bcbearer->media.send_msg = tipc_bcbearer_send;
- sprintf(bcbearer->media.name, "tipc-multicast");
+ sprintf(bcbearer->media.name, "tipc-broadcast");
bcl = &bclink->link;
INIT_LIST_HEAD(&bcl->waiting_ports);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 500c97f..06740da 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -101,6 +101,5 @@ int tipc_bclink_stats(char *stats_buf, const u32 buf_size);
int tipc_bclink_reset_stats(void);
int tipc_bclink_set_queue_limits(u32 limit);
void tipc_bcbearer_sort(void);
-void tipc_bcbearer_push(void);
#endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 85eba9c..e2202de 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -385,13 +385,9 @@ static int bearer_push(struct tipc_bearer *b_ptr)
void tipc_bearer_lock_push(struct tipc_bearer *b_ptr)
{
- int res;
-
spin_lock_bh(&b_ptr->lock);
- res = bearer_push(b_ptr);
+ bearer_push(b_ptr);
spin_unlock_bh(&b_ptr->lock);
- if (res)
- tipc_bcbearer_push();
}
@@ -608,6 +604,7 @@ int tipc_block_bearer(const char *name)
info("Blocking bearer <%s>\n", name);
spin_lock_bh(&b_ptr->lock);
b_ptr->blocked = 1;
+ list_splice_init(&b_ptr->cong_links, &b_ptr->links);
list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
struct tipc_node *n_ptr = l_ptr->owner;
@@ -635,6 +632,7 @@ static void bearer_disable(struct tipc_bearer *b_ptr)
spin_lock_bh(&b_ptr->lock);
b_ptr->blocked = 1;
b_ptr->media->disable_bearer(b_ptr);
+ list_splice_init(&b_ptr->cong_links, &b_ptr->links);
list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
tipc_link_delete(l_ptr);
}
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 5ad70ef..d696f9e 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -39,8 +39,8 @@
#include "bcast.h"
-#define MAX_BEARERS 8
-#define MAX_MEDIA 4
+#define MAX_BEARERS 2
+#define MAX_MEDIA 2
/*
* Identifiers of supported TIPC media types
diff --git a/net/tipc/config.h b/net/tipc/config.h
index 443159a..80da6eb 100644
--- a/net/tipc/config.h
+++ b/net/tipc/config.h
@@ -65,7 +65,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd,
const void *req_tlv_area, int req_tlv_space,
int headroom);
-void tipc_cfg_link_event(u32 addr, char *name, int up);
int tipc_cfg_init(void);
void tipc_cfg_stop(void);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 0987933..f2fb96e 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -159,12 +159,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
}
tipc_node_lock(n_ptr);
- /* Don't talk to neighbor during cleanup after last session */
- if (n_ptr->cleanup_required) {
- tipc_node_unlock(n_ptr);
- return;
- }
-
link = n_ptr->links[b_ptr->identity];
/* Create a link endpoint for this bearer, if necessary */
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index b69092e..e728d4c 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -2,7 +2,7 @@
* net/tipc/eth_media.c: Ethernet bearer support for TIPC
*
* Copyright (c) 2001-2007, Ericsson AB
- * Copyright (c) 2005-2007, Wind River Systems
+ * Copyright (c) 2005-2008, 2011, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -37,7 +37,7 @@
#include "core.h"
#include "bearer.h"
-#define MAX_ETH_BEARERS 2
+#define MAX_ETH_BEARERS MAX_BEARERS
#define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI
#define ETH_LINK_TOLERANCE TIPC_DEF_LINK_TOL
#define ETH_LINK_WINDOW TIPC_DEF_LINK_WIN
@@ -144,31 +144,27 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
/* Find device with specified name */
+ read_lock(&dev_base_lock);
for_each_netdev(&init_net, pdev) {
if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) {
dev = pdev;
+ dev_hold(dev);
break;
}
}
+ read_unlock(&dev_base_lock);
if (!dev)
return -ENODEV;
- /* Find Ethernet bearer for device (or create one) */
-
- while ((eb_ptr != stop) && eb_ptr->dev && (eb_ptr->dev != dev))
- eb_ptr++;
- if (eb_ptr == stop)
- return -EDQUOT;
- if (!eb_ptr->dev) {
- eb_ptr->dev = dev;
- eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC);
- eb_ptr->tipc_packet_type.dev = dev;
- eb_ptr->tipc_packet_type.func = recv_msg;
- eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
- INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list));
- dev_hold(dev);
- dev_add_pack(&eb_ptr->tipc_packet_type);
- }
+ /* Create Ethernet bearer for device */
+
+ eb_ptr->dev = dev;
+ eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC);
+ eb_ptr->tipc_packet_type.dev = dev;
+ eb_ptr->tipc_packet_type.func = recv_msg;
+ eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
+ INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list));
+ dev_add_pack(&eb_ptr->tipc_packet_type);
/* Associate TIPC bearer with Ethernet bearer */
diff --git a/net/tipc/link.c b/net/tipc/link.c
index f89570c..ae98a72 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -332,15 +332,16 @@ struct link *tipc_link_create(struct tipc_node *n_ptr,
l_ptr->addr = peer;
if_name = strchr(b_ptr->name, ':') + 1;
- sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:",
+ sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr),
tipc_node(tipc_own_addr),
if_name,
tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
- /* note: peer i/f is appended to link name by reset/activate */
+ /* note: peer i/f name is updated by reset/activate message */
memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr));
l_ptr->owner = n_ptr;
l_ptr->checkpoint = 1;
+ l_ptr->peer_session = INVALID_SESSION;
l_ptr->b_ptr = b_ptr;
link_set_supervision_props(l_ptr, b_ptr->media->tolerance);
l_ptr->state = RESET_UNKNOWN;
@@ -536,9 +537,6 @@ void tipc_link_stop(struct link *l_ptr)
l_ptr->proto_msg_queue = NULL;
}
-/* LINK EVENT CODE IS NOT SUPPORTED AT PRESENT */
-#define link_send_event(fcn, l_ptr, up) do { } while (0)
-
void tipc_link_reset(struct link *l_ptr)
{
struct sk_buff *buf;
@@ -596,10 +594,6 @@ void tipc_link_reset(struct link *l_ptr)
l_ptr->fsm_msg_cnt = 0;
l_ptr->stale_count = 0;
link_reset_statistics(l_ptr);
-
- link_send_event(tipc_cfg_link_event, l_ptr, 0);
- if (!in_own_cluster(l_ptr->addr))
- link_send_event(tipc_disc_link_event, l_ptr, 0);
}
@@ -608,9 +602,6 @@ static void link_activate(struct link *l_ptr)
l_ptr->next_in_no = l_ptr->stats.recv_info = 1;
tipc_node_link_up(l_ptr->owner, l_ptr);
tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr);
- link_send_event(tipc_cfg_link_event, l_ptr, 1);
- if (!in_own_cluster(l_ptr->addr))
- link_send_event(tipc_disc_link_event, l_ptr, 1);
}
/**
@@ -985,6 +976,51 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
}
/*
+ * tipc_link_send_names - send name table entries to new neighbor
+ *
+ * Send routine for bulk delivery of name table messages when contact
+ * with a new neighbor occurs. No link congestion checking is performed
+ * because name table messages *must* be delivered. The messages must be
+ * small enough not to require fragmentation.
+ * Called without any locks held.
+ */
+
+void tipc_link_send_names(struct list_head *message_list, u32 dest)
+{
+ struct tipc_node *n_ptr;
+ struct link *l_ptr;
+ struct sk_buff *buf;
+ struct sk_buff *temp_buf;
+
+ if (list_empty(message_list))
+ return;
+
+ read_lock_bh(&tipc_net_lock);
+ n_ptr = tipc_node_find(dest);
+ if (n_ptr) {
+ tipc_node_lock(n_ptr);
+ l_ptr = n_ptr->active_links[0];
+ if (l_ptr) {
+ /* convert circular list to linear list */
+ ((struct sk_buff *)message_list->prev)->next = NULL;
+ link_add_chain_to_outqueue(l_ptr,
+ (struct sk_buff *)message_list->next, 0);
+ tipc_link_push_queue(l_ptr);
+ INIT_LIST_HEAD(message_list);
+ }
+ tipc_node_unlock(n_ptr);
+ }
+ read_unlock_bh(&tipc_net_lock);
+
+ /* discard the messages if they couldn't be sent */
+
+ list_for_each_safe(buf, temp_buf, ((struct sk_buff *)message_list)) {
+ list_del((struct list_head *)buf);
+ buf_discard(buf);
+ }
+}
+
+/*
* link_send_buf_fast: Entry for data messages where the
* destination link is known and the header is complete,
* inclusive total message length. Very time critical.
@@ -1031,9 +1067,6 @@ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode)
u32 selector = msg_origport(buf_msg(buf)) & 1;
u32 dummy;
- if (destnode == tipc_own_addr)
- return tipc_port_recv_msg(buf);
-
read_lock_bh(&tipc_net_lock);
n_ptr = tipc_node_find(destnode);
if (likely(n_ptr)) {
@@ -1658,19 +1691,12 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
continue;
}
+ /* Discard unicast link messages destined for another node */
+
if (unlikely(!msg_short(msg) &&
(msg_destnode(msg) != tipc_own_addr)))
goto cont;
- /* Discard non-routeable messages destined for another node */
-
- if (unlikely(!msg_isdata(msg) &&
- (msg_destnode(msg) != tipc_own_addr))) {
- if ((msg_user(msg) != CONN_MANAGER) &&
- (msg_user(msg) != MSG_FRAGMENTER))
- goto cont;
- }
-
/* Locate neighboring node that sent message */
n_ptr = tipc_node_find(msg_prevnode(msg));
@@ -1678,17 +1704,24 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
goto cont;
tipc_node_lock(n_ptr);
- /* Don't talk to neighbor during cleanup after last session */
+ /* Locate unicast link endpoint that should handle message */
- if (n_ptr->cleanup_required) {
+ l_ptr = n_ptr->links[b_ptr->identity];
+ if (unlikely(!l_ptr)) {
tipc_node_unlock(n_ptr);
goto cont;
}
- /* Locate unicast link endpoint that should handle message */
+ /* Verify that communication with node is currently allowed */
- l_ptr = n_ptr->links[b_ptr->identity];
- if (unlikely(!l_ptr)) {
+ if ((n_ptr->block_setup & WAIT_PEER_DOWN) &&
+ msg_user(msg) == LINK_PROTOCOL &&
+ (msg_type(msg) == RESET_MSG ||
+ msg_type(msg) == ACTIVATE_MSG) &&
+ !msg_redundant_link(msg))
+ n_ptr->block_setup &= ~WAIT_PEER_DOWN;
+
+ if (n_ptr->block_setup) {
tipc_node_unlock(n_ptr);
goto cont;
}
@@ -1923,6 +1956,12 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
if (link_blocked(l_ptr))
return;
+
+ /* Abort non-RESET send if communication with node is prohibited */
+
+ if ((l_ptr->owner->block_setup) && (msg_typ != RESET_MSG))
+ return;
+
msg_set_type(msg, msg_typ);
msg_set_net_plane(msg, l_ptr->b_ptr->net_plane);
msg_set_bcast_ack(msg, mod(l_ptr->owner->bclink.last_in));
@@ -2051,9 +2090,19 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
case RESET_MSG:
if (!link_working_unknown(l_ptr) &&
(l_ptr->peer_session != INVALID_SESSION)) {
- if (msg_session(msg) == l_ptr->peer_session)
- break; /* duplicate: ignore */
+ if (less_eq(msg_session(msg), l_ptr->peer_session))
+ break; /* duplicate or old reset: ignore */
+ }
+
+ if (!msg_redundant_link(msg) && (link_working_working(l_ptr) ||
+ link_working_unknown(l_ptr))) {
+ /*
+ * peer has lost contact -- don't allow peer's links
+ * to reactivate before we recognize loss & clean up
+ */
+ l_ptr->owner->block_setup = WAIT_NODE_DOWN;
}
+
/* fall thru' */
case ACTIVATE_MSG:
/* Update link settings according other endpoint's values */
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 74fbeca..e56cb53 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -223,6 +223,7 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_s
struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space);
void tipc_link_reset(struct link *l_ptr);
int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector);
+void tipc_link_send_names(struct list_head *message_list, u32 dest);
int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf);
u32 tipc_link_get_max_pkt(u32 dest, u32 selector);
int tipc_link_send_sections_fast(struct tipc_port *sender,
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index cd356e5..b7ca1bd 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -173,18 +173,40 @@ void tipc_named_withdraw(struct publication *publ)
* tipc_named_node_up - tell specified node about all publications by this node
*/
-void tipc_named_node_up(unsigned long node)
+void tipc_named_node_up(unsigned long nodearg)
{
+ struct tipc_node *n_ptr;
+ struct link *l_ptr;
struct publication *publ;
struct distr_item *item = NULL;
struct sk_buff *buf = NULL;
+ struct list_head message_list;
+ u32 node = (u32)nodearg;
u32 left = 0;
u32 rest;
- u32 max_item_buf;
+ u32 max_item_buf = 0;
+
+ /* compute maximum amount of publication data to send per message */
+
+ read_lock_bh(&tipc_net_lock);
+ n_ptr = tipc_node_find(node);
+ if (n_ptr) {
+ tipc_node_lock(n_ptr);
+ l_ptr = n_ptr->active_links[0];
+ if (l_ptr)
+ max_item_buf = ((l_ptr->max_pkt - INT_H_SIZE) /
+ ITEM_SIZE) * ITEM_SIZE;
+ tipc_node_unlock(n_ptr);
+ }
+ read_unlock_bh(&tipc_net_lock);
+ if (!max_item_buf)
+ return;
+
+ /* create list of publication messages, then send them as a unit */
+
+ INIT_LIST_HEAD(&message_list);
read_lock_bh(&tipc_nametbl_lock);
- max_item_buf = TIPC_MAX_USER_MSG_SIZE / ITEM_SIZE;
- max_item_buf *= ITEM_SIZE;
rest = publ_cnt * ITEM_SIZE;
list_for_each_entry(publ, &publ_root, local_list) {
@@ -202,13 +224,14 @@ void tipc_named_node_up(unsigned long node)
item++;
left -= ITEM_SIZE;
if (!left) {
- msg_set_link_selector(buf_msg(buf), node);
- tipc_link_send(buf, node, node);
+ list_add_tail((struct list_head *)buf, &message_list);
buf = NULL;
}
}
exit:
read_unlock_bh(&tipc_nametbl_lock);
+
+ tipc_link_send_names(&message_list, (u32)node);
}
/**
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 68b3dd6..fafef6c 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -141,17 +141,6 @@ void tipc_net_route_msg(struct sk_buff *buf)
return;
msg = buf_msg(buf);
- msg_incr_reroute_cnt(msg);
- if (msg_reroute_cnt(msg) > 6) {
- if (msg_errcode(msg)) {
- buf_discard(buf);
- } else {
- tipc_reject_msg(buf, msg_destport(msg) ?
- TIPC_ERR_NO_PORT : TIPC_ERR_NO_NAME);
- }
- return;
- }
-
/* Handle message for this node */
dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg);
if (tipc_in_scope(dnode, tipc_own_addr)) {
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2d106ef..27b4bb0 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -112,6 +112,7 @@ struct tipc_node *tipc_node_create(u32 addr)
break;
}
list_add_tail(&n_ptr->list, &temp_node->list);
+ n_ptr->block_setup = WAIT_PEER_DOWN;
tipc_num_nodes++;
@@ -312,7 +313,7 @@ static void node_established_contact(struct tipc_node *n_ptr)
}
}
-static void node_cleanup_finished(unsigned long node_addr)
+static void node_name_purge_complete(unsigned long node_addr)
{
struct tipc_node *n_ptr;
@@ -320,7 +321,7 @@ static void node_cleanup_finished(unsigned long node_addr)
n_ptr = tipc_node_find(node_addr);
if (n_ptr) {
tipc_node_lock(n_ptr);
- n_ptr->cleanup_required = 0;
+ n_ptr->block_setup &= ~WAIT_NAMES_GONE;
tipc_node_unlock(n_ptr);
}
read_unlock_bh(&tipc_net_lock);
@@ -331,28 +332,32 @@ static void node_lost_contact(struct tipc_node *n_ptr)
char addr_string[16];
u32 i;
- /* Clean up broadcast reception remains */
- n_ptr->bclink.gap_after = n_ptr->bclink.gap_to = 0;
- while (n_ptr->bclink.deferred_head) {
- struct sk_buff *buf = n_ptr->bclink.deferred_head;
- n_ptr->bclink.deferred_head = buf->next;
- buf_discard(buf);
- }
- if (n_ptr->bclink.defragm) {
- buf_discard(n_ptr->bclink.defragm);
- n_ptr->bclink.defragm = NULL;
- }
+ info("Lost contact with %s\n",
+ tipc_addr_string_fill(addr_string, n_ptr->addr));
+
+ /* Flush broadcast link info associated with lost node */
if (n_ptr->bclink.supported) {
+ n_ptr->bclink.gap_after = n_ptr->bclink.gap_to = 0;
+ while (n_ptr->bclink.deferred_head) {
+ struct sk_buff *buf = n_ptr->bclink.deferred_head;
+ n_ptr->bclink.deferred_head = buf->next;
+ buf_discard(buf);
+ }
+
+ if (n_ptr->bclink.defragm) {
+ buf_discard(n_ptr->bclink.defragm);
+ n_ptr->bclink.defragm = NULL;
+ }
+
+ tipc_nmap_remove(&tipc_bcast_nmap, n_ptr->addr);
tipc_bclink_acknowledge(n_ptr,
mod(n_ptr->bclink.acked + 10000));
- tipc_nmap_remove(&tipc_bcast_nmap, n_ptr->addr);
if (n_ptr->addr < tipc_own_addr)
tipc_own_tag--;
- }
- info("Lost contact with %s\n",
- tipc_addr_string_fill(addr_string, n_ptr->addr));
+ n_ptr->bclink.supported = 0;
+ }
/* Abort link changeover */
for (i = 0; i < MAX_BEARERS; i++) {
@@ -367,10 +372,10 @@ static void node_lost_contact(struct tipc_node *n_ptr)
/* Notify subscribers */
tipc_nodesub_notify(n_ptr);
- /* Prevent re-contact with node until all cleanup is done */
+ /* Prevent re-contact with node until cleanup is done */
- n_ptr->cleanup_required = 1;
- tipc_k_signal((Handler)node_cleanup_finished, n_ptr->addr);
+ n_ptr->block_setup = WAIT_PEER_DOWN | WAIT_NAMES_GONE;
+ tipc_k_signal((Handler)node_name_purge_complete, n_ptr->addr);
}
struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 5c61afc..4f15cb4 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -42,6 +42,12 @@
#include "net.h"
#include "bearer.h"
+/* Flags used to block (re)establishment of contact with a neighboring node */
+
+#define WAIT_PEER_DOWN 0x0001 /* wait to see that peer's links are down */
+#define WAIT_NAMES_GONE 0x0002 /* wait for peer's publications to be purged */
+#define WAIT_NODE_DOWN 0x0004 /* wait until peer node is declared down */
+
/**
* struct tipc_node - TIPC node structure
* @addr: network address of node
@@ -52,7 +58,7 @@
* @active_links: pointers to active links to node
* @links: pointers to all links to node
* @working_links: number of working links to node (both active and standby)
- * @cleanup_required: non-zero if cleaning up after a prior loss of contact
+ * @block_setup: bit mask of conditions preventing link establishment to node
* @link_cnt: number of links to node
* @permit_changeover: non-zero if node has redundant links to this system
* @bclink: broadcast-related info
@@ -77,7 +83,7 @@ struct tipc_node {
struct link *links[MAX_BEARERS];
int link_cnt;
int working_links;
- int cleanup_required;
+ int block_setup;
int permit_changeover;
struct {
int supported;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index adb2eff..9440a3d 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -49,7 +49,7 @@ struct tipc_sock {
struct sock sk;
struct tipc_port *p;
struct tipc_portid peer_name;
- long conn_timeout;
+ unsigned int conn_timeout;
};
#define tipc_sk(sk) ((struct tipc_sock *)(sk))
@@ -231,7 +231,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
sock_init_data(sock, sk);
sk->sk_backlog_rcv = backlog_rcv;
tipc_sk(sk)->p = tp_ptr;
- tipc_sk(sk)->conn_timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
+ tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT;
spin_unlock_bh(tp_ptr->lock);
@@ -525,6 +525,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
struct tipc_port *tport = tipc_sk_port(sk);
struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
int needs_conn;
+ long timeout_val;
int res = -EINVAL;
if (unlikely(!dest))
@@ -564,6 +565,8 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
reject_rx_queue(sk);
}
+ timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
+
do {
if (dest->addrtype == TIPC_ADDR_NAME) {
res = dest_name_check(dest, m);
@@ -600,16 +603,14 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
sock->state = SS_CONNECTING;
break;
}
- if (m->msg_flags & MSG_DONTWAIT) {
- res = -EWOULDBLOCK;
+ if (timeout_val <= 0L) {
+ res = timeout_val ? timeout_val : -EWOULDBLOCK;
break;
}
release_sock(sk);
- res = wait_event_interruptible(*sk_sleep(sk),
- !tport->congested);
+ timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk),
+ !tport->congested, timeout_val);
lock_sock(sk);
- if (res)
- break;
} while (1);
exit:
@@ -636,6 +637,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
struct sock *sk = sock->sk;
struct tipc_port *tport = tipc_sk_port(sk);
struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
+ long timeout_val;
int res;
/* Handle implied connection establishment */
@@ -650,6 +652,8 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
if (iocb)
lock_sock(sk);
+ timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
+
do {
if (unlikely(sock->state != SS_CONNECTED)) {
if (sock->state == SS_DISCONNECTING)
@@ -663,16 +667,14 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
total_len);
if (likely(res != -ELINKCONG))
break;
- if (m->msg_flags & MSG_DONTWAIT) {
- res = -EWOULDBLOCK;
+ if (timeout_val <= 0L) {
+ res = timeout_val ? timeout_val : -EWOULDBLOCK;
break;
}
release_sock(sk);
- res = wait_event_interruptible(*sk_sleep(sk),
- (!tport->congested || !tport->connected));
+ timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk),
+ (!tport->congested || !tport->connected), timeout_val);
lock_sock(sk);
- if (res)
- break;
} while (1);
if (iocb)
@@ -1369,7 +1371,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
struct msghdr m = {NULL,};
struct sk_buff *buf;
struct tipc_msg *msg;
- long timeout;
+ unsigned int timeout;
int res;
lock_sock(sk);
@@ -1434,7 +1436,8 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
res = wait_event_interruptible_timeout(*sk_sleep(sk),
(!skb_queue_empty(&sk->sk_receive_queue) ||
(sock->state != SS_CONNECTING)),
- timeout ? timeout : MAX_SCHEDULE_TIMEOUT);
+ timeout ? (long)msecs_to_jiffies(timeout)
+ : MAX_SCHEDULE_TIMEOUT);
lock_sock(sk);
if (res > 0) {
@@ -1480,9 +1483,7 @@ static int listen(struct socket *sock, int len)
lock_sock(sk);
- if (sock->state == SS_READY)
- res = -EOPNOTSUPP;
- else if (sock->state != SS_UNCONNECTED)
+ if (sock->state != SS_UNCONNECTED)
res = -EINVAL;
else {
sock->state = SS_LISTENING;
@@ -1510,10 +1511,6 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
lock_sock(sk);
- if (sock->state == SS_READY) {
- res = -EOPNOTSUPP;
- goto exit;
- }
if (sock->state != SS_LISTENING) {
res = -EINVAL;
goto exit;
@@ -1696,7 +1693,7 @@ static int setsockopt(struct socket *sock,
res = tipc_set_portunreturnable(tport->ref, value);
break;
case TIPC_CONN_TIMEOUT:
- tipc_sk(sk)->conn_timeout = msecs_to_jiffies(value);
+ tipc_sk(sk)->conn_timeout = value;
/* no need to set "res", since already 0 at this point */
break;
default:
@@ -1752,7 +1749,7 @@ static int getsockopt(struct socket *sock,
res = tipc_portunreturnable(tport->ref, &value);
break;
case TIPC_CONN_TIMEOUT:
- value = jiffies_to_msecs(tipc_sk(sk)->conn_timeout);
+ value = tipc_sk(sk)->conn_timeout;
/* no need to set "res", since already 0 at this point */
break;
case TIPC_NODE_RECVQ_DEPTH:
@@ -1790,11 +1787,11 @@ static const struct proto_ops msg_ops = {
.bind = bind,
.connect = connect,
.socketpair = sock_no_socketpair,
- .accept = accept,
+ .accept = sock_no_accept,
.getname = get_name,
.poll = poll,
.ioctl = sock_no_ioctl,
- .listen = listen,
+ .listen = sock_no_listen,
.shutdown = shutdown,
.setsockopt = setsockopt,
.getsockopt = getsockopt,
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 6cf7268..1983717 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -151,7 +151,7 @@ void tipc_subscr_report_overlap(struct subscription *sub,
if (!must && !(sub->filter & TIPC_SUB_PORTS))
return;
- sub->event_cb(sub, found_lower, found_upper, event, port_ref, node);
+ subscr_send_event(sub, found_lower, found_upper, event, port_ref, node);
}
/**
@@ -365,7 +365,6 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
subscr_terminate(subscriber);
return NULL;
}
- sub->event_cb = subscr_send_event;
INIT_LIST_HEAD(&sub->nameseq_list);
list_add(&sub->subscription_list, &subscriber->subscription_list);
sub->server_ref = subscriber->port_ref;
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 45d89bf..4b06ef6 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -39,16 +39,11 @@
struct subscription;
-typedef void (*tipc_subscr_event) (struct subscription *sub,
- u32 found_lower, u32 found_upper,
- u32 event, u32 port_ref, u32 node);
-
/**
* struct subscription - TIPC network topology subscription object
* @seq: name sequence associated with subscription
* @timeout: duration of subscription (in ms)
* @filter: event filtering to be done for subscription
- * @event_cb: routine invoked when a subscription event is detected
* @timer: timer governing subscription duration (optional)
* @nameseq_list: adjacent subscriptions in name sequence's subscription list
* @subscription_list: adjacent subscriptions in subscriber's subscription list
@@ -61,7 +56,6 @@ struct subscription {
struct tipc_name_seq seq;
u32 timeout;
u32 filter;
- tipc_subscr_event event_cb;
struct timer_list timer;
struct list_head nameseq_list;
struct list_head subscription_list;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ec68e1c..466fbcc 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1381,8 +1381,10 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
{
int err = 0;
+
UNIXCB(skb).pid = get_pid(scm->pid);
- UNIXCB(skb).cred = get_cred(scm->cred);
+ if (scm->cred)
+ UNIXCB(skb).cred = get_cred(scm->cred);
UNIXCB(skb).fp = NULL;
if (scm->fp && send_fds)
err = unix_attach_fds(scm, skb);
@@ -1392,6 +1394,24 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
}
/*
+ * Some apps rely on write() giving SCM_CREDENTIALS
+ * We include credentials if source or destination socket
+ * asserted SOCK_PASSCRED.
+ */
+static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
+ const struct sock *other)
+{
+ if (UNIXCB(skb).cred)
+ return;
+ if (test_bit(SOCK_PASSCRED, &sock->flags) ||
+ !other->sk_socket ||
+ test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
+ UNIXCB(skb).pid = get_pid(task_tgid(current));
+ UNIXCB(skb).cred = get_current_cred();
+ }
+}
+
+/*
* Send AF_UNIX data.
*/
@@ -1538,6 +1558,7 @@ restart:
if (sock_flag(other, SOCK_RCVTSTAMP))
__net_timestamp(skb);
+ maybe_add_creds(skb, sock, other);
skb_queue_tail(&other->sk_receive_queue, skb);
if (max_level > unix_sk(other)->recursion_level)
unix_sk(other)->recursion_level = max_level;
@@ -1652,6 +1673,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
(other->sk_shutdown & RCV_SHUTDOWN))
goto pipe_err_free;
+ maybe_add_creds(skb, sock, other);
skb_queue_tail(&other->sk_receive_queue, skb);
if (max_level > unix_sk(other)->recursion_level)
unix_sk(other)->recursion_level = max_level;
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 58064d9..791ab2e 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -462,8 +462,8 @@ static struct xfrm_algo_desc ealg_list[] = {
.desc = {
.sadb_alg_id = SADB_X_EALG_AESCTR,
.sadb_alg_ivlen = 8,
- .sadb_alg_minbits = 128,
- .sadb_alg_maxbits = 256
+ .sadb_alg_minbits = 160,
+ .sadb_alg_maxbits = 288
}
},
};
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index a026b0e..54a0dc2e 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -212,6 +212,11 @@ resume:
/* only the first xfrm gets the encap type */
encap_type = 0;
+ if (async && x->repl->check(x, skb, seq)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
+ goto drop_unlock;
+ }
+
x->repl->advance(x, seq);
x->curlft.bytes += skb->len;
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index fc91ad7..f781b9a 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -70,26 +70,29 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
while ((scratch += len, dlen -= len) > 0) {
skb_frag_t *frag;
+ struct page *page;
err = -EMSGSIZE;
if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS))
goto out;
frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags;
- frag->page = alloc_page(GFP_ATOMIC);
+ page = alloc_page(GFP_ATOMIC);
err = -ENOMEM;
- if (!frag->page)
+ if (!page)
goto out;
+ __skb_frag_set_page(frag, page);
+
len = PAGE_SIZE;
if (dlen < len)
len = dlen;
- memcpy(page_address(frag->page), scratch, len);
-
frag->page_offset = 0;
frag->size = len;
+ memcpy(skb_frag_address(frag), scratch, len);
+
skb->truesize += len;
skb->data_len += len;
skb->len += len;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 94fdcc7..552df27 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1349,14 +1349,16 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
BUG();
}
xdst = dst_alloc(dst_ops, NULL, 0, 0, 0);
- memset(&xdst->u.rt6.rt6i_table, 0, sizeof(*xdst) - sizeof(struct dst_entry));
- xfrm_policy_put_afinfo(afinfo);
- if (likely(xdst))
+ if (likely(xdst)) {
+ memset(&xdst->u.rt6.rt6i_table, 0,
+ sizeof(*xdst) - sizeof(struct dst_entry));
xdst->flo.ops = &xfrm_bundle_fc_ops;
- else
+ } else
xdst = ERR_PTR(-ENOBUFS);
+ xfrm_policy_put_afinfo(afinfo);
+
return xdst;
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 0256b8a..d0a42df 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2927,7 +2927,7 @@ static int __net_init xfrm_user_net_init(struct net *net)
if (nlsk == NULL)
return -ENOMEM;
net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */
- rcu_assign_pointer(net->xfrm.nlsk, nlsk);
+ RCU_INIT_POINTER(net->xfrm.nlsk, nlsk);
return 0;
}
@@ -2935,7 +2935,7 @@ static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list)
{
struct net *net;
list_for_each_entry(net, net_exit_list, exit_list)
- rcu_assign_pointer(net->xfrm.nlsk, NULL);
+ RCU_INIT_POINTER(net->xfrm.nlsk, NULL);
synchronize_net();
list_for_each_entry(net, net_exit_list, exit_list)
netlink_kernel_release(net->xfrm.nlsk_stash);
OpenPOWER on IntegriCloud