summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/iphc.c677
-rw-r--r--net/6lowpan/nhc.c16
-rw-r--r--net/6lowpan/nhc.h12
-rw-r--r--net/6lowpan/nhc_udp.c22
-rw-r--r--net/8021q/vlan_core.c10
-rw-r--r--net/bluetooth/6lowpan.c84
-rw-r--r--net/bluetooth/af_bluetooth.c4
-rw-r--r--net/bluetooth/hci_conn.c167
-rw-r--r--net/bluetooth/hci_core.c95
-rw-r--r--net/bluetooth/hci_event.c7
-rw-r--r--net/bluetooth/hci_request.c103
-rw-r--r--net/bluetooth/hci_request.h4
-rw-r--r--net/bluetooth/hci_sock.c44
-rw-r--r--net/bluetooth/hidp/core.c14
-rw-r--r--net/bluetooth/l2cap_sock.c71
-rw-r--r--net/bluetooth/mgmt.c178
-rw-r--r--net/bluetooth/sco.c44
-rw-r--r--net/bluetooth/smp.c60
-rw-r--r--net/bluetooth/smp.h1
-rw-r--r--net/bridge/br_fdb.c17
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_vlan.c10
-rw-r--r--net/core/dev.c81
-rw-r--r--net/core/ptp_classifier.c16
-rw-r--r--net/core/rtnetlink.c24
-rw-r--r--net/core/sock.c42
-rw-r--r--net/core/tso.c18
-rw-r--r--net/dccp/dccp.h4
-rw-r--r--net/dccp/ipv4.c6
-rw-r--r--net/dccp/ipv6.c25
-rw-r--r--net/dccp/minisocks.c14
-rw-r--r--net/dccp/probe.c11
-rw-r--r--net/dsa/slave.c55
-rw-r--r--net/ieee802154/6lowpan/reassembly.c11
-rw-r--r--net/ieee802154/6lowpan/rx.c39
-rw-r--r--net/ieee802154/6lowpan/tx.c7
-rw-r--r--net/ipv4/fib_frontend.c14
-rw-r--r--net/ipv4/fib_semantics.c3
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/gre_offload.c3
-rw-r--r--net/ipv4/inet_connection_sock.c33
-rw-r--r--net/ipv4/inet_fragment.c6
-rw-r--r--net/ipv4/inet_hashtables.c39
-rw-r--r--net/ipv4/ip_fragment.c12
-rw-r--r--net/ipv4/ip_gre.c46
-rw-r--r--net/ipv4/ip_output.c9
-rw-r--r--net/ipv4/netfilter/Kconfig1
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c4
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/tcp_dctcp.c2
-rw-r--r--net/ipv4/tcp_fastopen.c4
-rw-r--r--net/ipv4/tcp_ipv4.c6
-rw-r--r--net/ipv4/tcp_minisocks.c11
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/xfrm4_output.c2
-rw-r--r--net/ipv4/xfrm4_policy.c50
-rw-r--r--net/ipv6/addrconf.c46
-rw-r--r--net/ipv6/fib6_rules.c19
-rw-r--r--net/ipv6/icmp.c12
-rw-r--r--net/ipv6/ip6_fib.c12
-rw-r--r--net/ipv6/ip6_output.c80
-rw-r--r--net/ipv6/netfilter/Kconfig1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c13
-rw-r--r--net/ipv6/reassembly.c12
-rw-r--r--net/ipv6/route.c6
-rw-r--r--net/ipv6/tcp_ipv6.c27
-rw-r--r--net/ipv6/xfrm6_output.c18
-rw-r--r--net/ipv6/xfrm6_policy.c8
-rw-r--r--net/irda/irlmp.c2
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/mac802154/llsec.c20
-rw-r--r--net/mpls/af_mpls.c636
-rw-r--r--net/mpls/internal.h74
-rw-r--r--net/netfilter/core.c2
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c2
-rw-r--r--net/netlink/af_netlink.c4
-rw-r--r--net/nfc/core.c4
-rw-r--r--net/nfc/digital_core.c3
-rw-r--r--net/nfc/hci/core.c3
-rw-r--r--net/nfc/hci/llc.c2
-rw-r--r--net/nfc/nci/Kconfig2
-rw-r--r--net/nfc/nci/Makefile3
-rw-r--r--net/nfc/nci/core.c150
-rw-r--r--net/nfc/nci/data.c13
-rw-r--r--net/nfc/nci/hci.c167
-rw-r--r--net/nfc/nci/ntf.c3
-rw-r--r--net/nfc/nci/rsp.c1
-rw-r--r--net/nfc/nci/spi.c11
-rw-r--r--net/nfc/netlink.c8
-rw-r--r--net/nfc/nfc.h5
-rw-r--r--net/nfc/rawsock.c3
-rw-r--r--net/openvswitch/actions.c13
-rw-r--r--net/openvswitch/conntrack.c48
-rw-r--r--net/openvswitch/conntrack.h17
-rw-r--r--net/openvswitch/datapath.c5
-rw-r--r--net/openvswitch/datapath.h1
-rw-r--r--net/openvswitch/flow_netlink.c26
-rw-r--r--net/openvswitch/flow_netlink.h6
-rw-r--r--net/openvswitch/vport-geneve.c13
-rw-r--r--net/openvswitch/vport-gre.c8
-rw-r--r--net/openvswitch/vport-internal_dev.c46
-rw-r--r--net/openvswitch/vport-vxlan.c20
-rw-r--r--net/openvswitch/vport.c58
-rw-r--r--net/openvswitch/vport.h16
-rw-r--r--net/rds/af_rds.c10
-rw-r--r--net/rds/bind.c125
-rw-r--r--net/rds/iw_rdma.c6
-rw-r--r--net/rds/rds.h7
-rw-r--r--net/rds/tcp_recv.c11
-rw-r--r--net/switchdev/switchdev.c28
-rw-r--r--net/sysctl_net.c6
-rw-r--r--net/tipc/bcast.c996
-rw-r--r--net/tipc/bcast.h122
-rw-r--r--net/tipc/bearer.c94
-rw-r--r--net/tipc/bearer.h9
-rw-r--r--net/tipc/core.c9
-rw-r--r--net/tipc/core.h12
-rw-r--r--net/tipc/discover.c28
-rw-r--r--net/tipc/link.c775
-rw-r--r--net/tipc/link.h76
-rw-r--r--net/tipc/msg.c32
-rw-r--r--net/tipc/msg.h8
-rw-r--r--net/tipc/name_distr.c4
-rw-r--r--net/tipc/net.c6
-rw-r--r--net/tipc/node.c184
-rw-r--r--net/tipc/node.h41
-rw-r--r--net/tipc/socket.c4
-rw-r--r--net/tipc/udp_media.c17
-rw-r--r--net/unix/af_unix.c5
-rw-r--r--net/vmw_vsock/af_vsock.c44
-rw-r--r--net/vmw_vsock/vmci_transport.c177
-rw-r--r--net/vmw_vsock/vmci_transport.h4
-rw-r--r--net/xfrm/xfrm_input.c4
-rw-r--r--net/xfrm/xfrm_user.c9
134 files changed, 3792 insertions, 2964 deletions
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 78c8a49..346b5c1 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -49,36 +49,178 @@
#include <linux/bitops.h>
#include <linux/if_arp.h>
#include <linux/netdevice.h>
+
#include <net/6lowpan.h>
#include <net/ipv6.h>
-#include <net/af_ieee802154.h>
+
+/* special link-layer handling */
+#include <net/mac802154.h>
#include "nhc.h"
+/* Values of fields within the IPHC encoding first byte */
+#define LOWPAN_IPHC_TF_MASK 0x18
+#define LOWPAN_IPHC_TF_00 0x00
+#define LOWPAN_IPHC_TF_01 0x08
+#define LOWPAN_IPHC_TF_10 0x10
+#define LOWPAN_IPHC_TF_11 0x18
+
+#define LOWPAN_IPHC_NH 0x04
+
+#define LOWPAN_IPHC_HLIM_MASK 0x03
+#define LOWPAN_IPHC_HLIM_00 0x00
+#define LOWPAN_IPHC_HLIM_01 0x01
+#define LOWPAN_IPHC_HLIM_10 0x02
+#define LOWPAN_IPHC_HLIM_11 0x03
+
+/* Values of fields within the IPHC encoding second byte */
+#define LOWPAN_IPHC_CID 0x80
+
+#define LOWPAN_IPHC_SAC 0x40
+
+#define LOWPAN_IPHC_SAM_MASK 0x30
+#define LOWPAN_IPHC_SAM_00 0x00
+#define LOWPAN_IPHC_SAM_01 0x10
+#define LOWPAN_IPHC_SAM_10 0x20
+#define LOWPAN_IPHC_SAM_11 0x30
+
+#define LOWPAN_IPHC_M 0x08
+
+#define LOWPAN_IPHC_DAC 0x04
+
+#define LOWPAN_IPHC_DAM_MASK 0x03
+#define LOWPAN_IPHC_DAM_00 0x00
+#define LOWPAN_IPHC_DAM_01 0x01
+#define LOWPAN_IPHC_DAM_10 0x02
+#define LOWPAN_IPHC_DAM_11 0x03
+
+/* ipv6 address based on mac
+ * second bit-flip (Universe/Local) is done according RFC2464
+ */
+#define is_addr_mac_addr_based(a, m) \
+ ((((a)->s6_addr[8]) == (((m)[0]) ^ 0x02)) && \
+ (((a)->s6_addr[9]) == (m)[1]) && \
+ (((a)->s6_addr[10]) == (m)[2]) && \
+ (((a)->s6_addr[11]) == (m)[3]) && \
+ (((a)->s6_addr[12]) == (m)[4]) && \
+ (((a)->s6_addr[13]) == (m)[5]) && \
+ (((a)->s6_addr[14]) == (m)[6]) && \
+ (((a)->s6_addr[15]) == (m)[7]))
+
+/* check whether we can compress the IID to 16 bits,
+ * it's possible for unicast addresses with first 49 bits are zero only.
+ */
+#define lowpan_is_iid_16_bit_compressable(a) \
+ ((((a)->s6_addr16[4]) == 0) && \
+ (((a)->s6_addr[10]) == 0) && \
+ (((a)->s6_addr[11]) == 0xff) && \
+ (((a)->s6_addr[12]) == 0xfe) && \
+ (((a)->s6_addr[13]) == 0))
+
+/* check whether the 112-bit gid of the multicast address is mappable to: */
+
+/* 48 bits, FFXX::00XX:XXXX:XXXX */
+#define lowpan_is_mcast_addr_compressable48(a) \
+ ((((a)->s6_addr16[1]) == 0) && \
+ (((a)->s6_addr16[2]) == 0) && \
+ (((a)->s6_addr16[3]) == 0) && \
+ (((a)->s6_addr16[4]) == 0) && \
+ (((a)->s6_addr[10]) == 0))
+
+/* 32 bits, FFXX::00XX:XXXX */
+#define lowpan_is_mcast_addr_compressable32(a) \
+ ((((a)->s6_addr16[1]) == 0) && \
+ (((a)->s6_addr16[2]) == 0) && \
+ (((a)->s6_addr16[3]) == 0) && \
+ (((a)->s6_addr16[4]) == 0) && \
+ (((a)->s6_addr16[5]) == 0) && \
+ (((a)->s6_addr[12]) == 0))
+
+/* 8 bits, FF02::00XX */
+#define lowpan_is_mcast_addr_compressable8(a) \
+ ((((a)->s6_addr[1]) == 2) && \
+ (((a)->s6_addr16[1]) == 0) && \
+ (((a)->s6_addr16[2]) == 0) && \
+ (((a)->s6_addr16[3]) == 0) && \
+ (((a)->s6_addr16[4]) == 0) && \
+ (((a)->s6_addr16[5]) == 0) && \
+ (((a)->s6_addr16[6]) == 0) && \
+ (((a)->s6_addr[14]) == 0))
+
+static inline void iphc_uncompress_eui64_lladdr(struct in6_addr *ipaddr,
+ const void *lladdr)
+{
+ /* fe:80::XXXX:XXXX:XXXX:XXXX
+ * \_________________/
+ * hwaddr
+ */
+ ipaddr->s6_addr[0] = 0xFE;
+ ipaddr->s6_addr[1] = 0x80;
+ memcpy(&ipaddr->s6_addr[8], lladdr, EUI64_ADDR_LEN);
+ /* second bit-flip (Universe/Local)
+ * is done according RFC2464
+ */
+ ipaddr->s6_addr[8] ^= 0x02;
+}
+
+static inline void iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr,
+ const void *lladdr)
+{
+ const struct ieee802154_addr *addr = lladdr;
+ u8 eui64[EUI64_ADDR_LEN] = { };
+
+ switch (addr->mode) {
+ case IEEE802154_ADDR_LONG:
+ ieee802154_le64_to_be64(eui64, &addr->extended_addr);
+ iphc_uncompress_eui64_lladdr(ipaddr, eui64);
+ break;
+ case IEEE802154_ADDR_SHORT:
+ /* fe:80::ff:fe00:XXXX
+ * \__/
+ * short_addr
+ *
+ * Universe/Local bit is zero.
+ */
+ ipaddr->s6_addr[0] = 0xFE;
+ ipaddr->s6_addr[1] = 0x80;
+ ipaddr->s6_addr[11] = 0xFF;
+ ipaddr->s6_addr[12] = 0xFE;
+ ieee802154_le16_to_be16(&ipaddr->s6_addr16[7],
+ &addr->short_addr);
+ break;
+ default:
+ /* should never handled and filtered by 802154 6lowpan */
+ WARN_ON_ONCE(1);
+ break;
+ }
+}
+
/* Uncompress address function for source and
* destination address(non-multicast).
*
- * address_mode is sam value or dam value.
+ * address_mode is the masked value for sam or dam value
*/
-static int uncompress_addr(struct sk_buff *skb,
- struct in6_addr *ipaddr, const u8 address_mode,
- const u8 *lladdr, const u8 addr_type,
- const u8 addr_len)
+static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev,
+ struct in6_addr *ipaddr, u8 address_mode,
+ const void *lladdr)
{
bool fail;
switch (address_mode) {
- case LOWPAN_IPHC_ADDR_00:
+ /* SAM and DAM are the same here */
+ case LOWPAN_IPHC_DAM_00:
/* for global link addresses */
fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16);
break;
- case LOWPAN_IPHC_ADDR_01:
+ case LOWPAN_IPHC_SAM_01:
+ case LOWPAN_IPHC_DAM_01:
/* fe:80::XXXX:XXXX:XXXX:XXXX */
ipaddr->s6_addr[0] = 0xFE;
ipaddr->s6_addr[1] = 0x80;
fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8);
break;
- case LOWPAN_IPHC_ADDR_02:
+ case LOWPAN_IPHC_SAM_10:
+ case LOWPAN_IPHC_DAM_10:
/* fe:80::ff:fe00:XXXX */
ipaddr->s6_addr[0] = 0xFE;
ipaddr->s6_addr[1] = 0x80;
@@ -86,38 +228,16 @@ static int uncompress_addr(struct sk_buff *skb,
ipaddr->s6_addr[12] = 0xFE;
fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2);
break;
- case LOWPAN_IPHC_ADDR_03:
+ case LOWPAN_IPHC_SAM_11:
+ case LOWPAN_IPHC_DAM_11:
fail = false;
- switch (addr_type) {
- case IEEE802154_ADDR_LONG:
- /* fe:80::XXXX:XXXX:XXXX:XXXX
- * \_________________/
- * hwaddr
- */
- ipaddr->s6_addr[0] = 0xFE;
- ipaddr->s6_addr[1] = 0x80;
- memcpy(&ipaddr->s6_addr[8], lladdr, addr_len);
- /* second bit-flip (Universe/Local)
- * is done according RFC2464
- */
- ipaddr->s6_addr[8] ^= 0x02;
- break;
- case IEEE802154_ADDR_SHORT:
- /* fe:80::ff:fe00:XXXX
- * \__/
- * short_addr
- *
- * Universe/Local bit is zero.
- */
- ipaddr->s6_addr[0] = 0xFE;
- ipaddr->s6_addr[1] = 0x80;
- ipaddr->s6_addr[11] = 0xFF;
- ipaddr->s6_addr[12] = 0xFE;
- ipaddr->s6_addr16[7] = htons(*((u16 *)lladdr));
+ switch (lowpan_priv(dev)->lltype) {
+ case LOWPAN_LLTYPE_IEEE802154:
+ iphc_uncompress_802154_lladdr(ipaddr, lladdr);
break;
default:
- pr_debug("Invalid addr_type set\n");
- return -EINVAL;
+ iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
+ break;
}
break;
default:
@@ -141,24 +261,25 @@ static int uncompress_addr(struct sk_buff *skb,
*/
static int uncompress_context_based_src_addr(struct sk_buff *skb,
struct in6_addr *ipaddr,
- const u8 sam)
+ u8 address_mode)
{
- switch (sam) {
- case LOWPAN_IPHC_ADDR_00:
+ switch (address_mode) {
+ case LOWPAN_IPHC_SAM_00:
/* unspec address ::
* Do nothing, address is already ::
*/
break;
- case LOWPAN_IPHC_ADDR_01:
+ case LOWPAN_IPHC_SAM_01:
/* TODO */
- case LOWPAN_IPHC_ADDR_02:
+ case LOWPAN_IPHC_SAM_10:
/* TODO */
- case LOWPAN_IPHC_ADDR_03:
+ case LOWPAN_IPHC_SAM_11:
/* TODO */
- netdev_warn(skb->dev, "SAM value 0x%x not supported\n", sam);
+ netdev_warn(skb->dev, "SAM value 0x%x not supported\n",
+ address_mode);
return -EINVAL;
default:
- pr_debug("Invalid sam value: 0x%x\n", sam);
+ pr_debug("Invalid sam value: 0x%x\n", address_mode);
return -EINVAL;
}
@@ -174,11 +295,11 @@ static int uncompress_context_based_src_addr(struct sk_buff *skb,
*/
static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
struct in6_addr *ipaddr,
- const u8 dam)
+ u8 address_mode)
{
bool fail;
- switch (dam) {
+ switch (address_mode) {
case LOWPAN_IPHC_DAM_00:
/* 00: 128 bits. The full address
* is carried in-line.
@@ -210,7 +331,7 @@ static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[15], 1);
break;
default:
- pr_debug("DAM value has a wrong value: 0x%x\n", dam);
+ pr_debug("DAM value has a wrong value: 0x%x\n", address_mode);
return -EINVAL;
}
@@ -225,77 +346,142 @@ static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
return 0;
}
-/* TTL uncompression values */
-static const u8 lowpan_ttl_values[] = { 0, 1, 64, 255 };
-
-int
-lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
- const u8 *saddr, const u8 saddr_type,
- const u8 saddr_len, const u8 *daddr,
- const u8 daddr_type, const u8 daddr_len,
- u8 iphc0, u8 iphc1)
+/* get the ecn values from iphc tf format and set it to ipv6hdr */
+static inline void lowpan_iphc_tf_set_ecn(struct ipv6hdr *hdr, const u8 *tf)
{
- struct ipv6hdr hdr = {};
- u8 tmp, num_context = 0;
- int err;
+ /* get the two higher bits which is ecn */
+ u8 ecn = tf[0] & 0xc0;
- raw_dump_table(__func__, "raw skb data dump uncompressed",
- skb->data, skb->len);
+ /* ECN takes 0x30 in hdr->flow_lbl[0] */
+ hdr->flow_lbl[0] |= (ecn >> 2);
+}
- /* another if the CID flag is set */
- if (iphc1 & LOWPAN_IPHC_CID) {
- pr_debug("CID flag is set, increase header with one\n");
- if (lowpan_fetch_skb(skb, &num_context, sizeof(num_context)))
- return -EINVAL;
- }
+/* get the dscp values from iphc tf format and set it to ipv6hdr */
+static inline void lowpan_iphc_tf_set_dscp(struct ipv6hdr *hdr, const u8 *tf)
+{
+ /* DSCP is at place after ECN */
+ u8 dscp = tf[0] & 0x3f;
- hdr.version = 6;
+ /* The four highest bits need to be set at hdr->priority */
+ hdr->priority |= ((dscp & 0x3c) >> 2);
+ /* The two lower bits is part of hdr->flow_lbl[0] */
+ hdr->flow_lbl[0] |= ((dscp & 0x03) << 6);
+}
- /* Traffic Class and Flow Label */
- switch ((iphc0 & LOWPAN_IPHC_TF) >> 3) {
- /* Traffic Class and FLow Label carried in-line
- * ECN + DSCP + 4-bit Pad + Flow Label (4 bytes)
+/* get the flow label values from iphc tf format and set it to ipv6hdr */
+static inline void lowpan_iphc_tf_set_lbl(struct ipv6hdr *hdr, const u8 *lbl)
+{
+ /* flow label is always some array started with lower nibble of
+ * flow_lbl[0] and followed with two bytes afterwards. Inside inline
+ * data the flow_lbl position can be different, which will be handled
+ * by lbl pointer. E.g. case "01" vs "00" the traffic class is 8 bit
+ * shifted, the different lbl pointer will handle that.
+ *
+ * The flow label will started at lower nibble of flow_lbl[0], the
+ * higher nibbles are part of DSCP + ECN.
*/
- case 0: /* 00b */
- if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp)))
+ hdr->flow_lbl[0] |= lbl[0] & 0x0f;
+ memcpy(&hdr->flow_lbl[1], &lbl[1], 2);
+}
+
+/* lowpan_iphc_tf_decompress - decompress the traffic class.
+ * This function will return zero on success, a value lower than zero if
+ * failed.
+ */
+static int lowpan_iphc_tf_decompress(struct sk_buff *skb, struct ipv6hdr *hdr,
+ u8 val)
+{
+ u8 tf[4];
+
+ /* Traffic Class and Flow Label */
+ switch (val) {
+ case LOWPAN_IPHC_TF_00:
+ /* ECN + DSCP + 4-bit Pad + Flow Label (4 bytes) */
+ if (lowpan_fetch_skb(skb, tf, 4))
return -EINVAL;
- memcpy(&hdr.flow_lbl, &skb->data[0], 3);
- skb_pull(skb, 3);
- hdr.priority = ((tmp >> 2) & 0x0f);
- hdr.flow_lbl[0] = ((tmp >> 2) & 0x30) | (tmp << 6) |
- (hdr.flow_lbl[0] & 0x0f);
+ /* 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |ECN| DSCP | rsv | Flow Label |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+ lowpan_iphc_tf_set_ecn(hdr, tf);
+ lowpan_iphc_tf_set_dscp(hdr, tf);
+ lowpan_iphc_tf_set_lbl(hdr, &tf[1]);
break;
- /* Traffic class carried in-line
- * ECN + DSCP (1 byte), Flow Label is elided
- */
- case 2: /* 10b */
- if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp)))
+ case LOWPAN_IPHC_TF_01:
+ /* ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided. */
+ if (lowpan_fetch_skb(skb, tf, 3))
return -EINVAL;
- hdr.priority = ((tmp >> 2) & 0x0f);
- hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30);
+ /* 1 2
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |ECN|rsv| Flow Label |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+ lowpan_iphc_tf_set_ecn(hdr, tf);
+ lowpan_iphc_tf_set_lbl(hdr, &tf[0]);
break;
- /* Flow Label carried in-line
- * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided
- */
- case 1: /* 01b */
- if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp)))
+ case LOWPAN_IPHC_TF_10:
+ /* ECN + DSCP (1 byte), Flow Label is elided. */
+ if (lowpan_fetch_skb(skb, tf, 1))
return -EINVAL;
- hdr.flow_lbl[0] = (tmp & 0x0F) | ((tmp >> 2) & 0x30);
- memcpy(&hdr.flow_lbl[1], &skb->data[0], 2);
- skb_pull(skb, 2);
+ /* 0 1 2 3 4 5 6 7
+ * +-+-+-+-+-+-+-+-+
+ * |ECN| DSCP |
+ * +-+-+-+-+-+-+-+-+
+ */
+ lowpan_iphc_tf_set_ecn(hdr, tf);
+ lowpan_iphc_tf_set_dscp(hdr, tf);
break;
- /* Traffic Class and Flow Label are elided */
- case 3: /* 11b */
+ case LOWPAN_IPHC_TF_11:
+ /* Traffic Class and Flow Label are elided */
break;
default:
- break;
+ WARN_ON_ONCE(1);
+ return -EINVAL;
}
+ return 0;
+}
+
+/* TTL uncompression values */
+static const u8 lowpan_ttl_values[] = {
+ [LOWPAN_IPHC_HLIM_01] = 1,
+ [LOWPAN_IPHC_HLIM_10] = 64,
+ [LOWPAN_IPHC_HLIM_11] = 255,
+};
+
+int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
+ const void *daddr, const void *saddr)
+{
+ struct ipv6hdr hdr = {};
+ u8 iphc0, iphc1;
+ int err;
+
+ raw_dump_table(__func__, "raw skb data dump uncompressed",
+ skb->data, skb->len);
+
+ if (lowpan_fetch_skb(skb, &iphc0, sizeof(iphc0)) ||
+ lowpan_fetch_skb(skb, &iphc1, sizeof(iphc1)))
+ return -EINVAL;
+
+ /* another if the CID flag is set */
+ if (iphc1 & LOWPAN_IPHC_CID)
+ return -ENOTSUPP;
+
+ hdr.version = 6;
+
+ err = lowpan_iphc_tf_decompress(skb, &hdr,
+ iphc0 & LOWPAN_IPHC_TF_MASK);
+ if (err < 0)
+ return err;
+
/* Next Header */
- if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) {
+ if (!(iphc0 & LOWPAN_IPHC_NH)) {
/* Next header is carried inline */
if (lowpan_fetch_skb(skb, &hdr.nexthdr, sizeof(hdr.nexthdr)))
return -EINVAL;
@@ -305,35 +491,30 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
}
/* Hop Limit */
- if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I) {
- hdr.hop_limit = lowpan_ttl_values[iphc0 & 0x03];
+ if ((iphc0 & LOWPAN_IPHC_HLIM_MASK) != LOWPAN_IPHC_HLIM_00) {
+ hdr.hop_limit = lowpan_ttl_values[iphc0 & LOWPAN_IPHC_HLIM_MASK];
} else {
if (lowpan_fetch_skb(skb, &hdr.hop_limit,
sizeof(hdr.hop_limit)))
return -EINVAL;
}
- /* Extract SAM to the tmp variable */
- tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03;
-
if (iphc1 & LOWPAN_IPHC_SAC) {
/* Source address context based uncompression */
pr_debug("SAC bit is set. Handle context based source address.\n");
- err = uncompress_context_based_src_addr(skb, &hdr.saddr, tmp);
+ err = uncompress_context_based_src_addr(skb, &hdr.saddr,
+ iphc1 & LOWPAN_IPHC_SAM_MASK);
} else {
/* Source address uncompression */
pr_debug("source address stateless compression\n");
- err = uncompress_addr(skb, &hdr.saddr, tmp, saddr,
- saddr_type, saddr_len);
+ err = uncompress_addr(skb, dev, &hdr.saddr,
+ iphc1 & LOWPAN_IPHC_SAM_MASK, saddr);
}
/* Check on error of previous branch */
if (err)
return -EINVAL;
- /* Extract DAM to the tmp variable */
- tmp = ((iphc1 & LOWPAN_IPHC_DAM_11) >> LOWPAN_IPHC_DAM_BIT) & 0x03;
-
/* check for Multicast Compression */
if (iphc1 & LOWPAN_IPHC_M) {
if (iphc1 & LOWPAN_IPHC_DAC) {
@@ -341,22 +522,22 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
/* TODO: implement this */
} else {
err = lowpan_uncompress_multicast_daddr(skb, &hdr.daddr,
- tmp);
+ iphc1 & LOWPAN_IPHC_DAM_MASK);
if (err)
return -EINVAL;
}
} else {
- err = uncompress_addr(skb, &hdr.daddr, tmp, daddr,
- daddr_type, daddr_len);
+ err = uncompress_addr(skb, dev, &hdr.daddr,
+ iphc1 & LOWPAN_IPHC_DAM_MASK, daddr);
pr_debug("dest: stateless compression mode %d dest %pI6c\n",
- tmp, &hdr.daddr);
+ iphc1 & LOWPAN_IPHC_DAM_MASK, &hdr.daddr);
if (err)
return -EINVAL;
}
/* Next header data uncompression */
- if (iphc0 & LOWPAN_IPHC_NH_C) {
+ if (iphc0 & LOWPAN_IPHC_NH) {
err = lowpan_nhc_do_uncompression(skb, dev, &hdr);
if (err < 0)
return err;
@@ -397,42 +578,176 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
}
EXPORT_SYMBOL_GPL(lowpan_header_decompress);
-static u8 lowpan_compress_addr_64(u8 **hc_ptr, u8 shift,
- const struct in6_addr *ipaddr,
- const unsigned char *lladdr)
+static const u8 lowpan_iphc_dam_to_sam_value[] = {
+ [LOWPAN_IPHC_DAM_00] = LOWPAN_IPHC_SAM_00,
+ [LOWPAN_IPHC_DAM_01] = LOWPAN_IPHC_SAM_01,
+ [LOWPAN_IPHC_DAM_10] = LOWPAN_IPHC_SAM_10,
+ [LOWPAN_IPHC_DAM_11] = LOWPAN_IPHC_SAM_11,
+};
+
+static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct in6_addr *ipaddr,
+ const unsigned char *lladdr, bool sam)
{
- u8 val = 0;
+ u8 dam = LOWPAN_IPHC_DAM_00;
if (is_addr_mac_addr_based(ipaddr, lladdr)) {
- val = 3; /* 0-bits */
+ dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
pr_debug("address compression 0 bits\n");
} else if (lowpan_is_iid_16_bit_compressable(ipaddr)) {
/* compress IID to 16 bits xxxx::XXXX */
lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[7], 2);
- val = 2; /* 16-bits */
+ dam = LOWPAN_IPHC_DAM_10; /* 16-bits */
raw_dump_inline(NULL, "Compressed ipv6 addr is (16 bits)",
*hc_ptr - 2, 2);
} else {
/* do not compress IID => xxxx::IID */
lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[4], 8);
- val = 1; /* 64-bits */
+ dam = LOWPAN_IPHC_DAM_01; /* 64-bits */
raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)",
*hc_ptr - 8, 8);
}
- return rol8(val, shift);
+ if (sam)
+ return lowpan_iphc_dam_to_sam_value[dam];
+ else
+ return dam;
}
-int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
- unsigned short type, const void *_daddr,
- const void *_saddr, unsigned int len)
+/* lowpan_iphc_get_tc - get the ECN + DCSP fields in hc format */
+static inline u8 lowpan_iphc_get_tc(const struct ipv6hdr *hdr)
{
- u8 tmp, iphc0, iphc1, *hc_ptr;
+ u8 dscp, ecn;
+
+ /* hdr->priority contains the higher bits of dscp, lower are part of
+ * flow_lbl[0]. Note ECN, DCSP is swapped in ipv6 hdr.
+ */
+ dscp = (hdr->priority << 2) | ((hdr->flow_lbl[0] & 0xc0) >> 6);
+ /* ECN is at the two lower bits from first nibble of flow_lbl[0] */
+ ecn = (hdr->flow_lbl[0] & 0x30);
+ /* for pretty debug output, also shift ecn to get the ecn value */
+ pr_debug("ecn 0x%02x dscp 0x%02x\n", ecn >> 4, dscp);
+ /* ECN is at 0x30 now, shift it to have ECN + DCSP */
+ return (ecn << 2) | dscp;
+}
+
+/* lowpan_iphc_is_flow_lbl_zero - check if flow label is zero */
+static inline bool lowpan_iphc_is_flow_lbl_zero(const struct ipv6hdr *hdr)
+{
+ return ((!(hdr->flow_lbl[0] & 0x0f)) &&
+ !hdr->flow_lbl[1] && !hdr->flow_lbl[2]);
+}
+
+/* lowpan_iphc_tf_compress - compress the traffic class which is set by
+ * ipv6hdr. Return the corresponding format identifier which is used.
+ */
+static u8 lowpan_iphc_tf_compress(u8 **hc_ptr, const struct ipv6hdr *hdr)
+{
+ /* get ecn dscp data in a byteformat as: ECN(hi) + DSCP(lo) */
+ u8 tc = lowpan_iphc_get_tc(hdr), tf[4], val;
+
+ /* printout the traffic class in hc format */
+ pr_debug("tc 0x%02x\n", tc);
+
+ if (lowpan_iphc_is_flow_lbl_zero(hdr)) {
+ if (!tc) {
+ /* 11: Traffic Class and Flow Label are elided. */
+ val = LOWPAN_IPHC_TF_11;
+ } else {
+ /* 10: ECN + DSCP (1 byte), Flow Label is elided.
+ *
+ * 0 1 2 3 4 5 6 7
+ * +-+-+-+-+-+-+-+-+
+ * |ECN| DSCP |
+ * +-+-+-+-+-+-+-+-+
+ */
+ lowpan_push_hc_data(hc_ptr, &tc, sizeof(tc));
+ val = LOWPAN_IPHC_TF_10;
+ }
+ } else {
+ /* check if dscp is zero, it's after the first two bit */
+ if (!(tc & 0x3f)) {
+ /* 01: ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided
+ *
+ * 1 2
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |ECN|rsv| Flow Label |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+ memcpy(&tf[0], &hdr->flow_lbl[0], 3);
+ /* zero the highest 4-bits, contains DCSP + ECN */
+ tf[0] &= ~0xf0;
+ /* set ECN */
+ tf[0] |= (tc & 0xc0);
+
+ lowpan_push_hc_data(hc_ptr, tf, 3);
+ val = LOWPAN_IPHC_TF_01;
+ } else {
+ /* 00: ECN + DSCP + 4-bit Pad + Flow Label (4 bytes)
+ *
+ * 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |ECN| DSCP | rsv | Flow Label |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+ memcpy(&tf[0], &tc, sizeof(tc));
+ /* highest nibble of flow_lbl[0] is part of DSCP + ECN
+ * which will be the 4-bit pad and will be filled with
+ * zeros afterwards.
+ */
+ memcpy(&tf[1], &hdr->flow_lbl[0], 3);
+ /* zero the 4-bit pad, which is reserved */
+ tf[1] &= ~0xf0;
+
+ lowpan_push_hc_data(hc_ptr, tf, 4);
+ val = LOWPAN_IPHC_TF_00;
+ }
+ }
+
+ return val;
+}
+
+static u8 lowpan_iphc_mcast_addr_compress(u8 **hc_ptr,
+ const struct in6_addr *ipaddr)
+{
+ u8 val;
+
+ if (lowpan_is_mcast_addr_compressable8(ipaddr)) {
+ pr_debug("compressed to 1 octet\n");
+ /* use last byte */
+ lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[15], 1);
+ val = LOWPAN_IPHC_DAM_11;
+ } else if (lowpan_is_mcast_addr_compressable32(ipaddr)) {
+ pr_debug("compressed to 4 octets\n");
+ /* second byte + the last three */
+ lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[1], 1);
+ lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[13], 3);
+ val = LOWPAN_IPHC_DAM_10;
+ } else if (lowpan_is_mcast_addr_compressable48(ipaddr)) {
+ pr_debug("compressed to 6 octets\n");
+ /* second byte + the last five */
+ lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[1], 1);
+ lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[11], 5);
+ val = LOWPAN_IPHC_DAM_01;
+ } else {
+ pr_debug("using full address\n");
+ lowpan_push_hc_data(hc_ptr, ipaddr->s6_addr, 16);
+ val = LOWPAN_IPHC_DAM_00;
+ }
+
+ return val;
+}
+
+int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
+ const void *daddr, const void *saddr)
+{
+ u8 iphc0, iphc1, *hc_ptr;
struct ipv6hdr *hdr;
- u8 head[100] = {};
+ u8 head[LOWPAN_IPHC_MAX_HC_BUF_LEN] = {};
int ret, addr_type;
- if (type != ETH_P_IPV6)
+ if (skb->protocol != htons(ETH_P_IPV6))
return -EINVAL;
hdr = ipv6_hdr(skb);
@@ -456,63 +771,26 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
/* TODO: context lookup */
- raw_dump_inline(__func__, "saddr",
- (unsigned char *)_saddr, IEEE802154_ADDR_LEN);
- raw_dump_inline(__func__, "daddr",
- (unsigned char *)_daddr, IEEE802154_ADDR_LEN);
+ raw_dump_inline(__func__, "saddr", saddr, EUI64_ADDR_LEN);
+ raw_dump_inline(__func__, "daddr", daddr, EUI64_ADDR_LEN);
raw_dump_table(__func__, "sending raw skb network uncompressed packet",
skb->data, skb->len);
- /* Traffic class, flow label
- * If flow label is 0, compress it. If traffic class is 0, compress it
- * We have to process both in the same time as the offset of traffic
- * class depends on the presence of version and flow label
- */
-
- /* hc format of TC is ECN | DSCP , original one is DSCP | ECN */
- tmp = (hdr->priority << 4) | (hdr->flow_lbl[0] >> 4);
- tmp = ((tmp & 0x03) << 6) | (tmp >> 2);
-
- if (((hdr->flow_lbl[0] & 0x0F) == 0) &&
- (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) {
- /* flow label can be compressed */
- iphc0 |= LOWPAN_IPHC_FL_C;
- if ((hdr->priority == 0) &&
- ((hdr->flow_lbl[0] & 0xF0) == 0)) {
- /* compress (elide) all */
- iphc0 |= LOWPAN_IPHC_TC_C;
- } else {
- /* compress only the flow label */
- *hc_ptr = tmp;
- hc_ptr += 1;
- }
- } else {
- /* Flow label cannot be compressed */
- if ((hdr->priority == 0) &&
- ((hdr->flow_lbl[0] & 0xF0) == 0)) {
- /* compress only traffic class */
- iphc0 |= LOWPAN_IPHC_TC_C;
- *hc_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F);
- memcpy(hc_ptr + 1, &hdr->flow_lbl[1], 2);
- hc_ptr += 3;
- } else {
- /* compress nothing */
- memcpy(hc_ptr, hdr, 4);
- /* replace the top byte with new ECN | DSCP format */
- *hc_ptr = tmp;
- hc_ptr += 4;
- }
- }
+ /* Traffic Class, Flow Label compression */
+ iphc0 |= lowpan_iphc_tf_compress(&hc_ptr, hdr);
/* NOTE: payload length is always compressed */
/* Check if we provide the nhc format for nexthdr and compression
* functionality. If not nexthdr is handled inline and not compressed.
*/
- ret = lowpan_nhc_check_compression(skb, hdr, &hc_ptr, &iphc0);
- if (ret < 0)
- return ret;
+ ret = lowpan_nhc_check_compression(skb, hdr, &hc_ptr);
+ if (ret == -ENOENT)
+ lowpan_push_hc_data(&hc_ptr, &hdr->nexthdr,
+ sizeof(hdr->nexthdr));
+ else
+ iphc0 |= LOWPAN_IPHC_NH;
/* Hop limit
* if 1: compress, encoding is 01
@@ -522,13 +800,13 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
*/
switch (hdr->hop_limit) {
case 1:
- iphc0 |= LOWPAN_IPHC_TTL_1;
+ iphc0 |= LOWPAN_IPHC_HLIM_01;
break;
case 64:
- iphc0 |= LOWPAN_IPHC_TTL_64;
+ iphc0 |= LOWPAN_IPHC_HLIM_10;
break;
case 255:
- iphc0 |= LOWPAN_IPHC_TTL_255;
+ iphc0 |= LOWPAN_IPHC_HLIM_11;
break;
default:
lowpan_push_hc_data(&hc_ptr, &hdr->hop_limit,
@@ -542,9 +820,8 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
iphc1 |= LOWPAN_IPHC_SAC;
} else {
if (addr_type & IPV6_ADDR_LINKLOCAL) {
- iphc1 |= lowpan_compress_addr_64(&hc_ptr,
- LOWPAN_IPHC_SAM_BIT,
- &hdr->saddr, _saddr);
+ iphc1 |= lowpan_compress_addr_64(&hc_ptr, &hdr->saddr,
+ saddr, true);
pr_debug("source address unicast link-local %pI6c iphc1 0x%02x\n",
&hdr->saddr, iphc1);
} else {
@@ -558,38 +835,12 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
if (addr_type & IPV6_ADDR_MULTICAST) {
pr_debug("destination address is multicast: ");
iphc1 |= LOWPAN_IPHC_M;
- if (lowpan_is_mcast_addr_compressable8(&hdr->daddr)) {
- pr_debug("compressed to 1 octet\n");
- iphc1 |= LOWPAN_IPHC_DAM_11;
- /* use last byte */
- lowpan_push_hc_data(&hc_ptr,
- &hdr->daddr.s6_addr[15], 1);
- } else if (lowpan_is_mcast_addr_compressable32(&hdr->daddr)) {
- pr_debug("compressed to 4 octets\n");
- iphc1 |= LOWPAN_IPHC_DAM_10;
- /* second byte + the last three */
- lowpan_push_hc_data(&hc_ptr,
- &hdr->daddr.s6_addr[1], 1);
- lowpan_push_hc_data(&hc_ptr,
- &hdr->daddr.s6_addr[13], 3);
- } else if (lowpan_is_mcast_addr_compressable48(&hdr->daddr)) {
- pr_debug("compressed to 6 octets\n");
- iphc1 |= LOWPAN_IPHC_DAM_01;
- /* second byte + the last five */
- lowpan_push_hc_data(&hc_ptr,
- &hdr->daddr.s6_addr[1], 1);
- lowpan_push_hc_data(&hc_ptr,
- &hdr->daddr.s6_addr[11], 5);
- } else {
- pr_debug("using full address\n");
- iphc1 |= LOWPAN_IPHC_DAM_00;
- lowpan_push_hc_data(&hc_ptr, hdr->daddr.s6_addr, 16);
- }
+ iphc1 |= lowpan_iphc_mcast_addr_compress(&hc_ptr, &hdr->daddr);
} else {
if (addr_type & IPV6_ADDR_LINKLOCAL) {
/* TODO: context lookup */
- iphc1 |= lowpan_compress_addr_64(&hc_ptr,
- LOWPAN_IPHC_DAM_BIT, &hdr->daddr, _daddr);
+ iphc1 |= lowpan_compress_addr_64(&hc_ptr, &hdr->daddr,
+ daddr, false);
pr_debug("dest address unicast link-local %pI6c "
"iphc1 0x%02x\n", &hdr->daddr, iphc1);
} else {
@@ -599,7 +850,7 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
}
/* next header compression */
- if (iphc0 & LOWPAN_IPHC_NH_C) {
+ if (iphc0 & LOWPAN_IPHC_NH) {
ret = lowpan_nhc_do_compression(skb, hdr, &hc_ptr);
if (ret < 0)
return ret;
diff --git a/net/6lowpan/nhc.c b/net/6lowpan/nhc.c
index fd20fc5..7008d53 100644
--- a/net/6lowpan/nhc.c
+++ b/net/6lowpan/nhc.c
@@ -95,23 +95,20 @@ static struct lowpan_nhc *lowpan_nhc_by_nhcid(const struct sk_buff *skb)
}
int lowpan_nhc_check_compression(struct sk_buff *skb,
- const struct ipv6hdr *hdr, u8 **hc_ptr,
- u8 *iphc0)
+ const struct ipv6hdr *hdr, u8 **hc_ptr)
{
struct lowpan_nhc *nhc;
+ int ret = 0;
spin_lock_bh(&lowpan_nhc_lock);
nhc = lowpan_nexthdr_nhcs[hdr->nexthdr];
- if (nhc && nhc->compress)
- *iphc0 |= LOWPAN_IPHC_NH_C;
- else
- lowpan_push_hc_data(hc_ptr, &hdr->nexthdr,
- sizeof(hdr->nexthdr));
+ if (!(nhc && nhc->compress))
+ ret = -ENOENT;
spin_unlock_bh(&lowpan_nhc_lock);
- return 0;
+ return ret;
}
int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr,
@@ -157,7 +154,8 @@ out:
return ret;
}
-int lowpan_nhc_do_uncompression(struct sk_buff *skb, struct net_device *dev,
+int lowpan_nhc_do_uncompression(struct sk_buff *skb,
+ const struct net_device *dev,
struct ipv6hdr *hdr)
{
struct lowpan_nhc *nhc;
diff --git a/net/6lowpan/nhc.h b/net/6lowpan/nhc.h
index c249f17..8030414 100644
--- a/net/6lowpan/nhc.h
+++ b/net/6lowpan/nhc.h
@@ -86,19 +86,16 @@ struct lowpan_nhc *lowpan_nhc_by_nexthdr(u8 nexthdr);
/**
* lowpan_nhc_check_compression - checks if we support compression format. If
- * we support the nhc by nexthdr field, the 6LoWPAN iphc NHC bit will be
- * set. If we don't support nexthdr will be added as inline data to the
- * 6LoWPAN header.
+ * we support the nhc by nexthdr field, the function will return 0. If we
+ * don't support the nhc by nexthdr this function will return -ENOENT.
*
* @skb: skb of 6LoWPAN header to read nhc and replace header.
* @hdr: ipv6hdr to check the nexthdr value
* @hc_ptr: pointer for 6LoWPAN header which should increment at the end of
* replaced header.
- * @iphc0: iphc0 pointer to set the 6LoWPAN NHC bit
*/
int lowpan_nhc_check_compression(struct sk_buff *skb,
- const struct ipv6hdr *hdr, u8 **hc_ptr,
- u8 *iphc0);
+ const struct ipv6hdr *hdr, u8 **hc_ptr);
/**
* lowpan_nhc_do_compression - calling compress callback for nhc
@@ -119,7 +116,8 @@ int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr,
* @dev: netdevice for print logging information.
* @hdr: ipv6hdr for setting nexthdr value.
*/
-int lowpan_nhc_do_uncompression(struct sk_buff *skb, struct net_device *dev,
+int lowpan_nhc_do_uncompression(struct sk_buff *skb,
+ const struct net_device *dev,
struct ipv6hdr *hdr);
/**
diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c
index 72d0b57..69537a2 100644
--- a/net/6lowpan/nhc_udp.c
+++ b/net/6lowpan/nhc_udp.c
@@ -17,7 +17,27 @@
#include "nhc.h"
-#define LOWPAN_NHC_UDP_IDLEN 1
+#define LOWPAN_NHC_UDP_MASK 0xF8
+#define LOWPAN_NHC_UDP_ID 0xF0
+#define LOWPAN_NHC_UDP_IDLEN 1
+
+#define LOWPAN_NHC_UDP_4BIT_PORT 0xF0B0
+#define LOWPAN_NHC_UDP_4BIT_MASK 0xFFF0
+#define LOWPAN_NHC_UDP_8BIT_PORT 0xF000
+#define LOWPAN_NHC_UDP_8BIT_MASK 0xFF00
+
+/* values for port compression, _with checksum_ ie bit 5 set to 0 */
+
+/* all inline */
+#define LOWPAN_NHC_UDP_CS_P_00 0xF0
+/* source 16bit inline, dest = 0xF0 + 8 bit inline */
+#define LOWPAN_NHC_UDP_CS_P_01 0xF1
+/* source = 0xF0 + 8bit inline, dest = 16 bit inline */
+#define LOWPAN_NHC_UDP_CS_P_10 0xF2
+/* source & dest = 0xF0B + 4bit inline */
+#define LOWPAN_NHC_UDP_CS_P_11 0xF3
+/* checksum elided */
+#define LOWPAN_NHC_UDP_CS_C 0x04
static int udp_uncompress(struct sk_buff *skb, size_t needed)
{
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 61bf2a0..496b275 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -206,7 +206,10 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
return -ENOMEM;
if (vlan_hw_filter_capable(dev, vid_info)) {
- err = ops->ndo_vlan_rx_add_vid(dev, proto, vid);
+ if (netif_device_present(dev))
+ err = ops->ndo_vlan_rx_add_vid(dev, proto, vid);
+ else
+ err = -ENODEV;
if (err) {
kfree(vid_info);
return err;
@@ -264,7 +267,10 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
int err;
if (vlan_hw_filter_capable(dev, vid_info)) {
- err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
+ if (netif_device_present(dev))
+ err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
+ else
+ err = -ENODEV;
if (err) {
pr_warn("failed to kill vid %04x/%d for device %s\n",
proto, vid, dev->name);
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index db73b8a..9e9cca3 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -21,8 +21,6 @@
#include <net/ip6_route.h>
#include <net/addrconf.h>
-#include <net/af_ieee802154.h> /* to get the address type */
-
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/l2cap.h>
@@ -265,14 +263,13 @@ static int give_skb_to_upper(struct sk_buff *skb, struct net_device *dev)
if (!skb_cp)
return NET_RX_DROP;
- return netif_rx(skb_cp);
+ return netif_rx_ni(skb_cp);
}
static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
struct l2cap_chan *chan)
{
const u8 *saddr, *daddr;
- u8 iphc0, iphc1;
struct lowpan_dev *dev;
struct lowpan_peer *peer;
@@ -287,22 +284,7 @@ static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
saddr = peer->eui64_addr;
daddr = dev->netdev->dev_addr;
- /* at least two bytes will be used for the encoding */
- if (skb->len < 2)
- return -EINVAL;
-
- if (lowpan_fetch_skb_u8(skb, &iphc0))
- return -EINVAL;
-
- if (lowpan_fetch_skb_u8(skb, &iphc1))
- return -EINVAL;
-
- return lowpan_header_decompress(skb, netdev,
- saddr, IEEE802154_ADDR_LONG,
- EUI64_ADDR_LEN, daddr,
- IEEE802154_ADDR_LONG, EUI64_ADDR_LEN,
- iphc0, iphc1);
-
+ return lowpan_header_decompress(skb, netdev, daddr, saddr);
}
static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
@@ -314,15 +296,17 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
if (!netif_running(dev))
goto drop;
- if (dev->type != ARPHRD_6LOWPAN)
+ if (dev->type != ARPHRD_6LOWPAN || !skb->len)
goto drop;
+ skb_reset_network_header(skb);
+
skb = skb_share_check(skb, GFP_ATOMIC);
if (!skb)
goto drop;
/* check that it's our buffer */
- if (skb->data[0] == LOWPAN_DISPATCH_IPV6) {
+ if (lowpan_is_ipv6(*skb_network_header(skb))) {
/* Copy the packet so that the IPv6 header is
* properly aligned.
*/
@@ -334,7 +318,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
local_skb->protocol = htons(ETH_P_IPV6);
local_skb->pkt_type = PACKET_HOST;
- skb_reset_network_header(local_skb);
skb_set_transport_header(local_skb, sizeof(struct ipv6hdr));
if (give_skb_to_upper(local_skb, dev) != NET_RX_SUCCESS) {
@@ -347,38 +330,34 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
consume_skb(local_skb);
consume_skb(skb);
- } else {
- switch (skb->data[0] & 0xe0) {
- case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */
- local_skb = skb_clone(skb, GFP_ATOMIC);
- if (!local_skb)
- goto drop;
+ } else if (lowpan_is_iphc(*skb_network_header(skb))) {
+ local_skb = skb_clone(skb, GFP_ATOMIC);
+ if (!local_skb)
+ goto drop;
- ret = iphc_decompress(local_skb, dev, chan);
- if (ret < 0) {
- kfree_skb(local_skb);
- goto drop;
- }
+ ret = iphc_decompress(local_skb, dev, chan);
+ if (ret < 0) {
+ kfree_skb(local_skb);
+ goto drop;
+ }
- local_skb->protocol = htons(ETH_P_IPV6);
- local_skb->pkt_type = PACKET_HOST;
- local_skb->dev = dev;
+ local_skb->protocol = htons(ETH_P_IPV6);
+ local_skb->pkt_type = PACKET_HOST;
+ local_skb->dev = dev;
- if (give_skb_to_upper(local_skb, dev)
- != NET_RX_SUCCESS) {
- kfree_skb(local_skb);
- goto drop;
- }
+ if (give_skb_to_upper(local_skb, dev)
+ != NET_RX_SUCCESS) {
+ kfree_skb(local_skb);
+ goto drop;
+ }
- dev->stats.rx_bytes += skb->len;
- dev->stats.rx_packets++;
+ dev->stats.rx_bytes += skb->len;
+ dev->stats.rx_packets++;
- consume_skb(local_skb);
- consume_skb(skb);
- break;
- default:
- break;
- }
+ consume_skb(local_skb);
+ consume_skb(skb);
+ } else {
+ goto drop;
}
return NET_RX_SUCCESS;
@@ -492,8 +471,7 @@ static int setup_header(struct sk_buff *skb, struct net_device *netdev,
status = 1;
}
- lowpan_header_compress(skb, netdev, ETH_P_IPV6, daddr,
- dev->netdev->dev_addr, skb->len);
+ lowpan_header_compress(skb, netdev, daddr, dev->netdev->dev_addr);
err = dev_hard_header(skb, netdev, ETH_P_IPV6, NULL, NULL, 0);
if (err < 0)
@@ -1135,7 +1113,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
return -ENOENT;
hci_dev_lock(hdev);
- hcon = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr);
+ hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type);
hci_dev_unlock(hdev);
if (!hcon)
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 70f9d94..a3bffd1 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -33,7 +33,7 @@
#include "selftest.h"
-#define VERSION "2.20"
+#define VERSION "2.21"
/* Bluetooth sockets */
#define BT_MAX_PROTO 8
@@ -221,7 +221,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
BT_DBG("sock %p sk %p len %zu", sock, sk, len);
- if (flags & (MSG_OOB))
+ if (flags & MSG_OOB)
return -EOPNOTSUPP;
skb = skb_recv_datagram(sk, flags, noblock, &err);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 2dda439..85b82f7 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -59,15 +59,11 @@ static const struct sco_param esco_param_msbc[] = {
{ EDR_ESCO_MASK | ESCO_EV3, 0x0008, 0x02 }, /* T1 */
};
-static void hci_le_create_connection_cancel(struct hci_conn *conn)
-{
- hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL);
-}
-
/* This function requires the caller holds hdev->lock */
static void hci_connect_le_scan_cleanup(struct hci_conn *conn)
{
struct hci_conn_params *params;
+ struct hci_dev *hdev = conn->hdev;
struct smp_irk *irk;
bdaddr_t *bdaddr;
u8 bdaddr_type;
@@ -76,14 +72,15 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn)
bdaddr_type = conn->dst_type;
/* Check if we need to convert to identity address */
- irk = hci_get_irk(conn->hdev, bdaddr, bdaddr_type);
+ irk = hci_get_irk(hdev, bdaddr, bdaddr_type);
if (irk) {
bdaddr = &irk->bdaddr;
bdaddr_type = irk->addr_type;
}
- params = hci_explicit_connect_lookup(conn->hdev, bdaddr, bdaddr_type);
- if (!params)
+ params = hci_pend_le_action_lookup(&hdev->pend_le_conns, bdaddr,
+ bdaddr_type);
+ if (!params || !params->explicit_connect)
return;
/* The connection attempt was doing scan for new RPA, and is
@@ -97,21 +94,21 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn)
switch (params->auto_connect) {
case HCI_AUTO_CONN_EXPLICIT:
- hci_conn_params_del(conn->hdev, bdaddr, bdaddr_type);
+ hci_conn_params_del(hdev, bdaddr, bdaddr_type);
/* return instead of break to avoid duplicate scan update */
return;
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
- list_add(&params->action, &conn->hdev->pend_le_conns);
+ list_add(&params->action, &hdev->pend_le_conns);
break;
case HCI_AUTO_CONN_REPORT:
- list_add(&params->action, &conn->hdev->pend_le_reports);
+ list_add(&params->action, &hdev->pend_le_reports);
break;
default:
break;
}
- hci_update_background_scan(conn->hdev);
+ hci_update_background_scan(hdev);
}
static void hci_conn_cleanup(struct hci_conn *conn)
@@ -137,18 +134,51 @@ static void hci_conn_cleanup(struct hci_conn *conn)
hci_conn_put(conn);
}
-/* This function requires the caller holds hdev->lock */
-static void hci_connect_le_scan_remove(struct hci_conn *conn)
+static void le_scan_cleanup(struct work_struct *work)
{
- hci_connect_le_scan_cleanup(conn);
+ struct hci_conn *conn = container_of(work, struct hci_conn,
+ le_scan_cleanup);
+ struct hci_dev *hdev = conn->hdev;
+ struct hci_conn *c = NULL;
- /* We can't call hci_conn_del here since that would deadlock
- * with trying to call cancel_delayed_work_sync(&conn->disc_work).
- * Instead, call just hci_conn_cleanup() which contains the bare
- * minimum cleanup operations needed for a connection in this
- * state.
+ BT_DBG("%s hcon %p", hdev->name, conn);
+
+ hci_dev_lock(hdev);
+
+ /* Check that the hci_conn is still around */
+ rcu_read_lock();
+ list_for_each_entry_rcu(c, &hdev->conn_hash.list, list) {
+ if (c == conn)
+ break;
+ }
+ rcu_read_unlock();
+
+ if (c == conn) {
+ hci_connect_le_scan_cleanup(conn);
+ hci_conn_cleanup(conn);
+ }
+
+ hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
+ hci_conn_put(conn);
+}
+
+static void hci_connect_le_scan_remove(struct hci_conn *conn)
+{
+ BT_DBG("%s hcon %p", conn->hdev->name, conn);
+
+ /* We can't call hci_conn_del/hci_conn_cleanup here since that
+ * could deadlock with another hci_conn_del() call that's holding
+ * hci_dev_lock and doing cancel_delayed_work_sync(&conn->disc_work).
+ * Instead, grab temporary extra references to the hci_dev and
+ * hci_conn and perform the necessary cleanup in a separate work
+ * callback.
*/
- hci_conn_cleanup(conn);
+
+ hci_dev_hold(conn->hdev);
+ hci_conn_get(conn);
+
+ schedule_work(&conn->le_scan_cleanup);
}
static void hci_acl_create_connection(struct hci_conn *conn)
@@ -194,33 +224,8 @@ static void hci_acl_create_connection(struct hci_conn *conn)
hci_send_cmd(hdev, HCI_OP_CREATE_CONN, sizeof(cp), &cp);
}
-static void hci_acl_create_connection_cancel(struct hci_conn *conn)
-{
- struct hci_cp_create_conn_cancel cp;
-
- BT_DBG("hcon %p", conn);
-
- if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2)
- return;
-
- bacpy(&cp.bdaddr, &conn->dst);
- hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp);
-}
-
-static void hci_reject_sco(struct hci_conn *conn)
-{
- struct hci_cp_reject_sync_conn_req cp;
-
- cp.reason = HCI_ERROR_REJ_LIMITED_RESOURCES;
- bacpy(&cp.bdaddr, &conn->dst);
-
- hci_send_cmd(conn->hdev, HCI_OP_REJECT_SYNC_CONN_REQ, sizeof(cp), &cp);
-}
-
int hci_disconnect(struct hci_conn *conn, __u8 reason)
{
- struct hci_cp_disconnect cp;
-
BT_DBG("hcon %p", conn);
/* When we are master of an established connection and it enters
@@ -228,7 +233,8 @@ int hci_disconnect(struct hci_conn *conn, __u8 reason)
* current clock offset. Processing of the result is done
* within the event handling and hci_clock_offset_evt function.
*/
- if (conn->type == ACL_LINK && conn->role == HCI_ROLE_MASTER) {
+ if (conn->type == ACL_LINK && conn->role == HCI_ROLE_MASTER &&
+ (conn->state == BT_CONNECTED || conn->state == BT_CONFIG)) {
struct hci_dev *hdev = conn->hdev;
struct hci_cp_read_clock_offset clkoff_cp;
@@ -237,25 +243,7 @@ int hci_disconnect(struct hci_conn *conn, __u8 reason)
&clkoff_cp);
}
- conn->state = BT_DISCONN;
-
- cp.handle = cpu_to_le16(conn->handle);
- cp.reason = reason;
- return hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp);
-}
-
-static void hci_amp_disconn(struct hci_conn *conn)
-{
- struct hci_cp_disconn_phy_link cp;
-
- BT_DBG("hcon %p", conn);
-
- conn->state = BT_DISCONN;
-
- cp.phy_handle = HCI_PHY_HANDLE(conn->handle);
- cp.reason = hci_proto_disconn_ind(conn);
- hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK,
- sizeof(cp), &cp);
+ return hci_abort_conn(conn, reason);
}
static void hci_add_sco(struct hci_conn *conn, __u16 handle)
@@ -421,35 +409,14 @@ static void hci_conn_timeout(struct work_struct *work)
if (refcnt > 0)
return;
- switch (conn->state) {
- case BT_CONNECT:
- case BT_CONNECT2:
- if (conn->out) {
- if (conn->type == ACL_LINK)
- hci_acl_create_connection_cancel(conn);
- else if (conn->type == LE_LINK) {
- if (test_bit(HCI_CONN_SCANNING, &conn->flags))
- hci_connect_le_scan_remove(conn);
- else
- hci_le_create_connection_cancel(conn);
- }
- } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
- hci_reject_sco(conn);
- }
- break;
- case BT_CONFIG:
- case BT_CONNECTED:
- if (conn->type == AMP_LINK) {
- hci_amp_disconn(conn);
- } else {
- __u8 reason = hci_proto_disconn_ind(conn);
- hci_disconnect(conn, reason);
- }
- break;
- default:
- conn->state = BT_CLOSED;
- break;
+ /* LE connections in scanning state need special handling */
+ if (conn->state == BT_CONNECT && conn->type == LE_LINK &&
+ test_bit(HCI_CONN_SCANNING, &conn->flags)) {
+ hci_connect_le_scan_remove(conn);
+ return;
}
+
+ hci_abort_conn(conn, hci_proto_disconn_ind(conn));
}
/* Enter sniff mode */
@@ -517,7 +484,7 @@ static void le_conn_timeout(struct work_struct *work)
return;
}
- hci_le_create_connection_cancel(conn);
+ hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
}
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
@@ -580,6 +547,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept);
INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle);
INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout);
+ INIT_WORK(&conn->le_scan_cleanup, le_scan_cleanup);
atomic_set(&conn->refcnt, 0);
@@ -835,7 +803,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
* attempt, we simply update pending_sec_level and auth_type fields
* and return the object found.
*/
- conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst);
+ conn = hci_conn_hash_lookup_le(hdev, dst, dst_type);
conn_unfinished = NULL;
if (conn) {
if (conn->state == BT_CONNECT &&
@@ -985,13 +953,10 @@ static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type)
{
struct hci_conn *conn;
- conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr);
+ conn = hci_conn_hash_lookup_le(hdev, addr, type);
if (!conn)
return false;
- if (conn->dst_type != type)
- return false;
-
if (conn->state != BT_CONNECTED)
return false;
@@ -1064,7 +1029,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
* attempt, we simply update pending_sec_level and auth_type fields
* and return the object found.
*/
- conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst);
+ conn = hci_conn_hash_lookup_le(hdev, dst, dst_type);
if (conn) {
if (conn->pending_sec_level < sec_level)
conn->pending_sec_level = sec_level;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index e4e53bd..83a6aac 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -65,13 +65,6 @@ static DEFINE_IDA(hci_index_ida);
#define hci_req_lock(d) mutex_lock(&d->req_lock)
#define hci_req_unlock(d) mutex_unlock(&d->req_lock)
-/* ---- HCI notifications ---- */
-
-static void hci_notify(struct hci_dev *hdev, int event)
-{
- hci_sock_dev_event(hdev, event);
-}
-
/* ---- HCI debugfs entries ---- */
static ssize_t dut_mode_read(struct file *file, char __user *user_buf,
@@ -162,6 +155,16 @@ static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf,
if (strtobool(buf, &enable))
return -EINVAL;
+ /* When the diagnostic flags are not persistent and the transport
+ * is not active, then there is no need for the vendor callback.
+ *
+ * Instead just store the desired value. If needed the setting
+ * will be programmed when the controller gets powered on.
+ */
+ if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
+ !test_bit(HCI_RUNNING, &hdev->flags))
+ goto done;
+
hci_req_lock(hdev);
err = hdev->set_diag(hdev, enable);
hci_req_unlock(hdev);
@@ -169,6 +172,7 @@ static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf,
if (err < 0)
return err;
+done:
if (enable)
hci_dev_set_flag(hdev, HCI_VENDOR_DIAG);
else
@@ -1444,12 +1448,14 @@ static int hci_dev_do_open(struct hci_dev *hdev)
}
set_bit(HCI_RUNNING, &hdev->flags);
- hci_notify(hdev, HCI_DEV_OPEN);
+ hci_sock_dev_event(hdev, HCI_DEV_OPEN);
atomic_set(&hdev->cmd_cnt, 1);
set_bit(HCI_INIT, &hdev->flags);
if (hci_dev_test_flag(hdev, HCI_SETUP)) {
+ hci_sock_dev_event(hdev, HCI_DEV_SETUP);
+
if (hdev->setup)
ret = hdev->setup(hdev);
@@ -1490,17 +1496,28 @@ static int hci_dev_do_open(struct hci_dev *hdev)
if (!ret) {
if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
- !hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
+ !hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
ret = __hci_init(hdev);
+ if (!ret && hdev->post_init)
+ ret = hdev->post_init(hdev);
+ }
}
+ /* If the HCI Reset command is clearing all diagnostic settings,
+ * then they need to be reprogrammed after the init procedure
+ * completed.
+ */
+ if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
+ hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) && hdev->set_diag)
+ ret = hdev->set_diag(hdev, true);
+
clear_bit(HCI_INIT, &hdev->flags);
if (!ret) {
hci_dev_hold(hdev);
hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
set_bit(HCI_UP, &hdev->flags);
- hci_notify(hdev, HCI_DEV_UP);
+ hci_sock_dev_event(hdev, HCI_DEV_UP);
if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
!hci_dev_test_flag(hdev, HCI_CONFIG) &&
!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
@@ -1528,7 +1545,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
}
clear_bit(HCI_RUNNING, &hdev->flags);
- hci_notify(hdev, HCI_DEV_CLOSE);
+ hci_sock_dev_event(hdev, HCI_DEV_CLOSE);
hdev->close(hdev);
hdev->flags &= BIT(HCI_RAW);
@@ -1684,7 +1701,7 @@ int hci_dev_do_close(struct hci_dev *hdev)
smp_unregister(hdev);
- hci_notify(hdev, HCI_DEV_DOWN);
+ hci_sock_dev_event(hdev, HCI_DEV_DOWN);
if (hdev->flush)
hdev->flush(hdev);
@@ -1715,7 +1732,7 @@ int hci_dev_do_close(struct hci_dev *hdev)
}
clear_bit(HCI_RUNNING, &hdev->flags);
- hci_notify(hdev, HCI_DEV_CLOSE);
+ hci_sock_dev_event(hdev, HCI_DEV_CLOSE);
/* After this point our queues are empty
* and no tasks are scheduled. */
@@ -2917,23 +2934,6 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
}
/* This function requires the caller holds hdev->lock */
-struct hci_conn_params *hci_explicit_connect_lookup(struct hci_dev *hdev,
- bdaddr_t *addr,
- u8 addr_type)
-{
- struct hci_conn_params *param;
-
- list_for_each_entry(param, &hdev->pend_le_conns, action) {
- if (bacmp(&param->addr, addr) == 0 &&
- param->addr_type == addr_type &&
- param->explicit_connect)
- return param;
- }
-
- return NULL;
-}
-
-/* This function requires the caller holds hdev->lock */
struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
bdaddr_t *addr, u8 addr_type)
{
@@ -3407,7 +3407,7 @@ int hci_register_dev(struct hci_dev *hdev)
if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
hci_dev_set_flag(hdev, HCI_UNCONFIGURED);
- hci_notify(hdev, HCI_DEV_REG);
+ hci_sock_dev_event(hdev, HCI_DEV_REG);
hci_dev_hold(hdev);
queue_work(hdev->req_workqueue, &hdev->power_on);
@@ -3455,7 +3455,7 @@ void hci_unregister_dev(struct hci_dev *hdev)
* pending list */
BUG_ON(!list_empty(&hdev->mgmt_pending));
- hci_notify(hdev, HCI_DEV_UNREG);
+ hci_sock_dev_event(hdev, HCI_DEV_UNREG);
if (hdev->rfkill) {
rfkill_unregister(hdev->rfkill);
@@ -3492,7 +3492,7 @@ EXPORT_SYMBOL(hci_unregister_dev);
/* Suspend HCI device */
int hci_suspend_dev(struct hci_dev *hdev)
{
- hci_notify(hdev, HCI_DEV_SUSPEND);
+ hci_sock_dev_event(hdev, HCI_DEV_SUSPEND);
return 0;
}
EXPORT_SYMBOL(hci_suspend_dev);
@@ -3500,7 +3500,7 @@ EXPORT_SYMBOL(hci_suspend_dev);
/* Resume HCI device */
int hci_resume_dev(struct hci_dev *hdev)
{
- hci_notify(hdev, HCI_DEV_RESUME);
+ hci_sock_dev_event(hdev, HCI_DEV_RESUME);
return 0;
}
EXPORT_SYMBOL(hci_resume_dev);
@@ -3555,14 +3555,15 @@ EXPORT_SYMBOL(hci_recv_frame);
/* Receive diagnostic message from HCI drivers */
int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb)
{
+ /* Mark as diagnostic packet */
+ bt_cb(skb)->pkt_type = HCI_DIAG_PKT;
+
/* Time stamp */
__net_timestamp(skb);
- /* Mark as diagnostic packet and send to monitor */
- bt_cb(skb)->pkt_type = HCI_DIAG_PKT;
- hci_send_to_monitor(hdev, skb);
+ skb_queue_tail(&hdev->rx_q, skb);
+ queue_work(hdev->workqueue, &hdev->rx_work);
- kfree_skb(skb);
return 0;
}
EXPORT_SYMBOL(hci_recv_diag);
@@ -3642,7 +3643,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
/* Stand-alone HCI commands must be flagged as
* single-command requests.
*/
- bt_cb(skb)->req.start = true;
+ bt_cb(skb)->hci.req_start = true;
skb_queue_tail(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -4339,7 +4340,7 @@ static bool hci_req_is_complete(struct hci_dev *hdev)
if (!skb)
return true;
- return bt_cb(skb)->req.start;
+ return bt_cb(skb)->hci.req_start;
}
static void hci_resend_last(struct hci_dev *hdev)
@@ -4399,26 +4400,26 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
* callback would be found in hdev->sent_cmd instead of the
* command queue (hdev->cmd_q).
*/
- if (bt_cb(hdev->sent_cmd)->req.complete) {
- *req_complete = bt_cb(hdev->sent_cmd)->req.complete;
+ if (bt_cb(hdev->sent_cmd)->hci.req_complete) {
+ *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete;
return;
}
- if (bt_cb(hdev->sent_cmd)->req.complete_skb) {
- *req_complete_skb = bt_cb(hdev->sent_cmd)->req.complete_skb;
+ if (bt_cb(hdev->sent_cmd)->hci.req_complete_skb) {
+ *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb;
return;
}
/* Remove all pending commands belonging to this request */
spin_lock_irqsave(&hdev->cmd_q.lock, flags);
while ((skb = __skb_dequeue(&hdev->cmd_q))) {
- if (bt_cb(skb)->req.start) {
+ if (bt_cb(skb)->hci.req_start) {
__skb_queue_head(&hdev->cmd_q, skb);
break;
}
- *req_complete = bt_cb(skb)->req.complete;
- *req_complete_skb = bt_cb(skb)->req.complete_skb;
+ *req_complete = bt_cb(skb)->hci.req_complete;
+ *req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
kfree_skb(skb);
}
spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index b4571d8..d57c11c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1915,7 +1915,8 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status)
hci_dev_lock(hdev);
- conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr);
+ conn = hci_conn_hash_lookup_le(hdev, &cp->peer_addr,
+ cp->peer_addr_type);
if (!conn)
goto unlock;
@@ -3137,7 +3138,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
* complete event).
*/
if (ev->status ||
- (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event))
+ (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->hci.req_event))
hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete,
req_complete_skb);
@@ -5208,7 +5209,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
u8 status = 0, event = hdr->evt, req_evt = 0;
u16 opcode = HCI_OP_NOP;
- if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) {
+ if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->hci.req_event == event) {
struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data;
opcode = __le16_to_cpu(cmd_hdr->opcode);
hci_req_cmd_complete(hdev, opcode, status, &req_complete,
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index b736922..981f8a2 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -56,8 +56,8 @@ static int req_run(struct hci_request *req, hci_req_complete_t complete,
return -ENODATA;
skb = skb_peek_tail(&req->cmd_q);
- bt_cb(skb)->req.complete = complete;
- bt_cb(skb)->req.complete_skb = complete_skb;
+ bt_cb(skb)->hci.req_complete = complete;
+ bt_cb(skb)->hci.req_complete_skb = complete_skb;
spin_lock_irqsave(&hdev->cmd_q.lock, flags);
skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
@@ -99,7 +99,7 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
BT_DBG("skb len %d", skb->len);
bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
- bt_cb(skb)->opcode = opcode;
+ bt_cb(skb)->hci.opcode = opcode;
return skb;
}
@@ -128,9 +128,9 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
}
if (skb_queue_empty(&req->cmd_q))
- bt_cb(skb)->req.start = true;
+ bt_cb(skb)->hci.req_start = true;
- bt_cb(skb)->req.event = event;
+ bt_cb(skb)->hci.req_event = event;
skb_queue_tail(&req->cmd_q, skb);
}
@@ -564,3 +564,96 @@ void hci_update_background_scan(struct hci_dev *hdev)
if (err && err != -ENODATA)
BT_ERR("Failed to run HCI request: err %d", err);
}
+
+void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn,
+ u8 reason)
+{
+ switch (conn->state) {
+ case BT_CONNECTED:
+ case BT_CONFIG:
+ if (conn->type == AMP_LINK) {
+ struct hci_cp_disconn_phy_link cp;
+
+ cp.phy_handle = HCI_PHY_HANDLE(conn->handle);
+ cp.reason = reason;
+ hci_req_add(req, HCI_OP_DISCONN_PHY_LINK, sizeof(cp),
+ &cp);
+ } else {
+ struct hci_cp_disconnect dc;
+
+ dc.handle = cpu_to_le16(conn->handle);
+ dc.reason = reason;
+ hci_req_add(req, HCI_OP_DISCONNECT, sizeof(dc), &dc);
+ }
+
+ conn->state = BT_DISCONN;
+
+ break;
+ case BT_CONNECT:
+ if (conn->type == LE_LINK) {
+ if (test_bit(HCI_CONN_SCANNING, &conn->flags))
+ break;
+ hci_req_add(req, HCI_OP_LE_CREATE_CONN_CANCEL,
+ 0, NULL);
+ } else if (conn->type == ACL_LINK) {
+ if (req->hdev->hci_ver < BLUETOOTH_VER_1_2)
+ break;
+ hci_req_add(req, HCI_OP_CREATE_CONN_CANCEL,
+ 6, &conn->dst);
+ }
+ break;
+ case BT_CONNECT2:
+ if (conn->type == ACL_LINK) {
+ struct hci_cp_reject_conn_req rej;
+
+ bacpy(&rej.bdaddr, &conn->dst);
+ rej.reason = reason;
+
+ hci_req_add(req, HCI_OP_REJECT_CONN_REQ,
+ sizeof(rej), &rej);
+ } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
+ struct hci_cp_reject_sync_conn_req rej;
+
+ bacpy(&rej.bdaddr, &conn->dst);
+
+ /* SCO rejection has its own limited set of
+ * allowed error values (0x0D-0x0F) which isn't
+ * compatible with most values passed to this
+ * function. To be safe hard-code one of the
+ * values that's suitable for SCO.
+ */
+ rej.reason = HCI_ERROR_REMOTE_LOW_RESOURCES;
+
+ hci_req_add(req, HCI_OP_REJECT_SYNC_CONN_REQ,
+ sizeof(rej), &rej);
+ }
+ break;
+ default:
+ conn->state = BT_CLOSED;
+ break;
+ }
+}
+
+static void abort_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode)
+{
+ if (status)
+ BT_DBG("Failed to abort connection: status 0x%2.2x", status);
+}
+
+int hci_abort_conn(struct hci_conn *conn, u8 reason)
+{
+ struct hci_request req;
+ int err;
+
+ hci_req_init(&req, conn->hdev);
+
+ __hci_abort_conn(&req, conn, reason);
+
+ err = hci_req_run(&req, abort_conn_complete);
+ if (err && err != -ENODATA) {
+ BT_ERR("Failed to run HCI request: err %d", err);
+ return err;
+ }
+
+ return 0;
+}
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index bf6df92..25c7f13 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -55,3 +55,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
void hci_update_background_scan(struct hci_dev *hdev);
void __hci_update_background_scan(struct hci_request *req);
+
+int hci_abort_conn(struct hci_conn *conn, u8 reason);
+void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn,
+ u8 reason);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 9a100c1..b1eb8c0 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -120,10 +120,7 @@ static bool is_filtered_packet(struct sock *sk, struct sk_buff *skb)
/* Apply filter */
flt = &hci_pi(sk)->filter;
- if (bt_cb(skb)->pkt_type == HCI_VENDOR_PKT)
- flt_type = 0;
- else
- flt_type = bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS;
+ flt_type = bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS;
if (!test_bit(flt_type, &flt->type_mask))
return true;
@@ -173,6 +170,11 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
continue;
if (hci_pi(sk)->channel == HCI_CHANNEL_RAW) {
+ if (bt_cb(skb)->pkt_type != HCI_COMMAND_PKT &&
+ bt_cb(skb)->pkt_type != HCI_EVENT_PKT &&
+ bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT &&
+ bt_cb(skb)->pkt_type != HCI_SCODATA_PKT)
+ continue;
if (is_filtered_packet(sk, skb))
continue;
} else if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
@@ -333,6 +335,12 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
opcode = cpu_to_le16(HCI_MON_DEL_INDEX);
break;
+ case HCI_DEV_SETUP:
+ if (hdev->manufacturer == 0xffff)
+ return NULL;
+
+ /* fall through */
+
case HCI_DEV_UP:
skb = bt_skb_alloc(HCI_MON_INDEX_INFO_SIZE, GFP_ATOMIC);
if (!skb)
@@ -401,15 +409,17 @@ static void send_monitor_replay(struct sock *sk)
if (sock_queue_rcv_skb(sk, skb))
kfree_skb(skb);
- if (!test_bit(HCI_UP, &hdev->flags))
- continue;
-
- skb = create_monitor_event(hdev, HCI_DEV_UP);
- if (!skb)
- continue;
+ if (test_bit(HCI_UP, &hdev->flags))
+ skb = create_monitor_event(hdev, HCI_DEV_UP);
+ else if (hci_dev_test_flag(hdev, HCI_SETUP))
+ skb = create_monitor_event(hdev, HCI_DEV_SETUP);
+ else
+ skb = NULL;
- if (sock_queue_rcv_skb(sk, skb))
- kfree_skb(skb);
+ if (skb) {
+ if (sock_queue_rcv_skb(sk, skb))
+ kfree_skb(skb);
+ }
}
read_unlock(&hci_dev_list_lock);
@@ -991,7 +1001,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
BT_DBG("sock %p, sk %p", sock, sk);
- if (flags & (MSG_OOB))
+ if (flags & MSG_OOB)
return -EOPNOTSUPP;
if (sk->sk_state == BT_CLOSED)
@@ -1239,7 +1249,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
/* Stand-alone HCI commands must be flagged as
* single-command requests.
*/
- bt_cb(skb)->req.start = true;
+ bt_cb(skb)->hci.req_start = true;
skb_queue_tail(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -1250,6 +1260,12 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
goto drop;
}
+ if (bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT &&
+ bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) {
+ err = -EINVAL;
+ goto drop;
+ }
+
skb_queue_tail(&hdev->raw_q, skb);
queue_work(hdev->workqueue, &hdev->tx_work);
}
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index f1a117f..0bec458 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -401,6 +401,20 @@ static void hidp_idle_timeout(unsigned long arg)
{
struct hidp_session *session = (struct hidp_session *) arg;
+ /* The HIDP user-space API only contains calls to add and remove
+ * devices. There is no way to forward events of any kind. Therefore,
+ * we have to forcefully disconnect a device on idle-timeouts. This is
+ * unfortunate and weird API design, but it is spec-compliant and
+ * required for backwards-compatibility. Hence, on idle-timeout, we
+ * signal driver-detach events, so poll() will be woken up with an
+ * error-condition on both sockets.
+ */
+
+ session->intr_sock->sk->sk_err = EUNATCH;
+ session->ctrl_sock->sk->sk_err = EUNATCH;
+ wake_up_interruptible(sk_sleep(session->intr_sock->sk));
+ wake_up_interruptible(sk_sleep(session->ctrl_sock->sk));
+
hidp_session_terminate(session);
}
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 586b3d5..1bb5515 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1111,53 +1111,76 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
if (!sk)
return 0;
+ lock_sock(sk);
+
+ if (sk->sk_shutdown)
+ goto shutdown_already;
+
+ BT_DBG("Handling sock shutdown");
+
/* prevent sk structure from being freed whilst unlocked */
sock_hold(sk);
chan = l2cap_pi(sk)->chan;
/* prevent chan structure from being freed whilst unlocked */
l2cap_chan_hold(chan);
- conn = chan->conn;
BT_DBG("chan %p state %s", chan, state_to_string(chan->state));
+ if (chan->mode == L2CAP_MODE_ERTM &&
+ chan->unacked_frames > 0 &&
+ chan->state == BT_CONNECTED) {
+ err = __l2cap_wait_ack(sk, chan);
+
+ /* After waiting for ACKs, check whether shutdown
+ * has already been actioned to close the L2CAP
+ * link such as by l2cap_disconnection_req().
+ */
+ if (sk->sk_shutdown)
+ goto has_shutdown;
+ }
+
+ sk->sk_shutdown = SHUTDOWN_MASK;
+ release_sock(sk);
+
+ l2cap_chan_lock(chan);
+ conn = chan->conn;
+ if (conn)
+ /* prevent conn structure from being freed */
+ l2cap_conn_get(conn);
+ l2cap_chan_unlock(chan);
+
if (conn)
+ /* mutex lock must be taken before l2cap_chan_lock() */
mutex_lock(&conn->chan_lock);
l2cap_chan_lock(chan);
- lock_sock(sk);
+ l2cap_chan_close(chan, 0);
+ l2cap_chan_unlock(chan);
- if (!sk->sk_shutdown) {
- if (chan->mode == L2CAP_MODE_ERTM &&
- chan->unacked_frames > 0 &&
- chan->state == BT_CONNECTED)
- err = __l2cap_wait_ack(sk, chan);
+ if (conn) {
+ mutex_unlock(&conn->chan_lock);
+ l2cap_conn_put(conn);
+ }
- sk->sk_shutdown = SHUTDOWN_MASK;
+ lock_sock(sk);
- release_sock(sk);
- l2cap_chan_close(chan, 0);
- lock_sock(sk);
+ if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ !(current->flags & PF_EXITING))
+ err = bt_sock_wait_state(sk, BT_CLOSED,
+ sk->sk_lingertime);
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
- !(current->flags & PF_EXITING))
- err = bt_sock_wait_state(sk, BT_CLOSED,
- sk->sk_lingertime);
- }
+has_shutdown:
+ l2cap_chan_put(chan);
+ sock_put(sk);
+shutdown_already:
if (!err && sk->sk_err)
err = -sk->sk_err;
release_sock(sk);
- l2cap_chan_unlock(chan);
-
- if (conn)
- mutex_unlock(&conn->chan_lock);
-
- l2cap_chan_put(chan);
- sock_put(sk);
- BT_DBG("err: %d", err);
+ BT_DBG("Sock shutdown complete err: %d", err);
return err;
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index c4fe2fe..7f22119 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -268,6 +268,14 @@ static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len,
HCI_SOCK_TRUSTED, skip_sk);
}
+static u8 le_addr_type(u8 mgmt_addr_type)
+{
+ if (mgmt_addr_type == BDADDR_LE_PUBLIC)
+ return ADDR_LE_DEV_PUBLIC;
+ else
+ return ADDR_LE_DEV_RANDOM;
+}
+
static int read_version(struct sock *sk, struct hci_dev *hdev, void *data,
u16 data_len)
{
@@ -1631,35 +1639,8 @@ static int clean_up_hci_state(struct hci_dev *hdev)
discov_stopped = hci_stop_discovery(&req);
list_for_each_entry(conn, &hdev->conn_hash.list, list) {
- struct hci_cp_disconnect dc;
- struct hci_cp_reject_conn_req rej;
-
- switch (conn->state) {
- case BT_CONNECTED:
- case BT_CONFIG:
- dc.handle = cpu_to_le16(conn->handle);
- dc.reason = 0x15; /* Terminated due to Power Off */
- hci_req_add(&req, HCI_OP_DISCONNECT, sizeof(dc), &dc);
- break;
- case BT_CONNECT:
- if (conn->type == LE_LINK)
- hci_req_add(&req, HCI_OP_LE_CREATE_CONN_CANCEL,
- 0, NULL);
- else if (conn->type == ACL_LINK)
- hci_req_add(&req, HCI_OP_CREATE_CONN_CANCEL,
- 6, &conn->dst);
- break;
- case BT_CONNECT2:
- bacpy(&rej.bdaddr, &conn->dst);
- rej.reason = 0x15; /* Terminated due to Power Off */
- if (conn->type == ACL_LINK)
- hci_req_add(&req, HCI_OP_REJECT_CONN_REQ,
- sizeof(rej), &rej);
- else if (conn->type == SCO_LINK)
- hci_req_add(&req, HCI_OP_REJECT_SYNC_CONN_REQ,
- sizeof(rej), &rej);
- break;
- }
+ /* 0x15 == Terminated due to Power Off */
+ __hci_abort_conn(&req, conn, 0x15);
}
err = hci_req_run(&req, clean_up_hci_complete);
@@ -3044,9 +3025,10 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
{
struct mgmt_cp_unpair_device *cp = data;
struct mgmt_rp_unpair_device rp;
- struct hci_cp_disconnect dc;
+ struct hci_conn_params *params;
struct mgmt_pending_cmd *cmd;
struct hci_conn *conn;
+ u8 addr_type;
int err;
memset(&rp, 0, sizeof(rp));
@@ -3087,36 +3069,23 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
conn = NULL;
err = hci_remove_link_key(hdev, &cp->addr.bdaddr);
- } else {
- u8 addr_type;
-
- conn = hci_conn_hash_lookup_ba(hdev, LE_LINK,
- &cp->addr.bdaddr);
- if (conn) {
- /* Defer clearing up the connection parameters
- * until closing to give a chance of keeping
- * them if a repairing happens.
- */
- set_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags);
-
- /* If disconnection is not requested, then
- * clear the connection variable so that the
- * link is not terminated.
- */
- if (!cp->disconnect)
- conn = NULL;
+ if (err < 0) {
+ err = mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_UNPAIR_DEVICE,
+ MGMT_STATUS_NOT_PAIRED, &rp,
+ sizeof(rp));
+ goto unlock;
}
- if (cp->addr.type == BDADDR_LE_PUBLIC)
- addr_type = ADDR_LE_DEV_PUBLIC;
- else
- addr_type = ADDR_LE_DEV_RANDOM;
+ goto done;
+ }
- hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type);
+ /* LE address type */
+ addr_type = le_addr_type(cp->addr.type);
- err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type);
- }
+ hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type);
+ err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type);
if (err < 0) {
err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE,
MGMT_STATUS_NOT_PAIRED, &rp,
@@ -3124,6 +3093,36 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
goto unlock;
}
+ conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr, addr_type);
+ if (!conn) {
+ hci_conn_params_del(hdev, &cp->addr.bdaddr, addr_type);
+ goto done;
+ }
+
+ /* Abort any ongoing SMP pairing */
+ smp_cancel_pairing(conn);
+
+ /* Defer clearing up the connection parameters until closing to
+ * give a chance of keeping them if a repairing happens.
+ */
+ set_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags);
+
+ /* Disable auto-connection parameters if present */
+ params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, addr_type);
+ if (params) {
+ if (params->explicit_connect)
+ params->auto_connect = HCI_AUTO_CONN_EXPLICIT;
+ else
+ params->auto_connect = HCI_AUTO_CONN_DISABLED;
+ }
+
+ /* If disconnection is not requested, then clear the connection
+ * variable so that the link is not terminated.
+ */
+ if (!cp->disconnect)
+ conn = NULL;
+
+done:
/* If the connection variable is set, then termination of the
* link is requested.
*/
@@ -3143,9 +3142,7 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
cmd->cmd_complete = addr_cmd_complete;
- dc.handle = cpu_to_le16(conn->handle);
- dc.reason = 0x13; /* Remote User Terminated Connection */
- err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, sizeof(dc), &dc);
+ err = hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
if (err < 0)
mgmt_pending_remove(cmd);
@@ -3193,7 +3190,8 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data,
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK,
&cp->addr.bdaddr);
else
- conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr);
+ conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr,
+ le_addr_type(cp->addr.type));
if (!conn || conn->state == BT_OPEN || conn->state == BT_CLOSED) {
err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT,
@@ -3544,16 +3542,9 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
conn = hci_connect_acl(hdev, &cp->addr.bdaddr, sec_level,
auth_type);
} else {
- u8 addr_type;
+ u8 addr_type = le_addr_type(cp->addr.type);
struct hci_conn_params *p;
- /* Convert from L2CAP channel address type to HCI address type
- */
- if (cp->addr.type == BDADDR_LE_PUBLIC)
- addr_type = ADDR_LE_DEV_PUBLIC;
- else
- addr_type = ADDR_LE_DEV_RANDOM;
-
/* When pairing a new device, it is expected to remember
* this device for future connections. Adding the connection
* parameter information ahead of time allows tracking
@@ -3697,7 +3688,8 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
if (addr->type == BDADDR_BREDR)
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &addr->bdaddr);
else
- conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &addr->bdaddr);
+ conn = hci_conn_hash_lookup_le(hdev, &addr->bdaddr,
+ le_addr_type(addr->type));
if (!conn) {
err = mgmt_cmd_complete(sk, hdev->id, mgmt_op,
@@ -5600,14 +5592,9 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
for (i = 0; i < irk_count; i++) {
struct mgmt_irk_info *irk = &cp->irks[i];
- u8 addr_type;
-
- if (irk->addr.type == BDADDR_LE_PUBLIC)
- addr_type = ADDR_LE_DEV_PUBLIC;
- else
- addr_type = ADDR_LE_DEV_RANDOM;
- hci_add_irk(hdev, &irk->addr.bdaddr, addr_type, irk->val,
+ hci_add_irk(hdev, &irk->addr.bdaddr,
+ le_addr_type(irk->addr.type), irk->val,
BDADDR_ANY);
}
@@ -5687,12 +5674,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
for (i = 0; i < key_count; i++) {
struct mgmt_ltk_info *key = &cp->keys[i];
- u8 type, addr_type, authenticated;
-
- if (key->addr.type == BDADDR_LE_PUBLIC)
- addr_type = ADDR_LE_DEV_PUBLIC;
- else
- addr_type = ADDR_LE_DEV_RANDOM;
+ u8 type, authenticated;
switch (key->type) {
case MGMT_LTK_UNAUTHENTICATED:
@@ -5718,9 +5700,9 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
continue;
}
- hci_add_ltk(hdev, &key->addr.bdaddr, addr_type, type,
- authenticated, key->val, key->enc_size, key->ediv,
- key->rand);
+ hci_add_ltk(hdev, &key->addr.bdaddr,
+ le_addr_type(key->addr.type), type, authenticated,
+ key->val, key->enc_size, key->ediv, key->rand);
}
err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 0,
@@ -6232,10 +6214,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
goto added;
}
- if (cp->addr.type == BDADDR_LE_PUBLIC)
- addr_type = ADDR_LE_DEV_PUBLIC;
- else
- addr_type = ADDR_LE_DEV_RANDOM;
+ addr_type = le_addr_type(cp->addr.type);
if (cp->action == 0x02)
auto_conn = HCI_AUTO_CONN_ALWAYS;
@@ -6364,10 +6343,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
goto complete;
}
- if (cp->addr.type == BDADDR_LE_PUBLIC)
- addr_type = ADDR_LE_DEV_PUBLIC;
- else
- addr_type = ADDR_LE_DEV_RANDOM;
+ addr_type = le_addr_type(cp->addr.type);
/* Kernel internally uses conn_params with resolvable private
* address, but Remove Device allows only identity addresses.
@@ -7873,27 +7849,13 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
mgmt_event(MGMT_EV_NEW_LONG_TERM_KEY, hdev, &ev, sizeof(ev), NULL);
}
-void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk)
+void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent)
{
struct mgmt_ev_new_irk ev;
memset(&ev, 0, sizeof(ev));
- /* For identity resolving keys from devices that are already
- * using a public address or static random address, do not
- * ask for storing this key. The identity resolving key really
- * is only mandatory for devices using resolvable random
- * addresses.
- *
- * Storing all identity resolving keys has the downside that
- * they will be also loaded on next boot of they system. More
- * identity resolving keys, means more time during scanning is
- * needed to actually resolve these addresses.
- */
- if (bacmp(&irk->rpa, BDADDR_ANY))
- ev.store_hint = 0x01;
- else
- ev.store_hint = 0x00;
+ ev.store_hint = persistent;
bacpy(&ev.rpa, &irk->rpa);
bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index f315c8d..fe12966 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -74,7 +74,7 @@ struct sco_pinfo {
static void sco_sock_timeout(unsigned long arg)
{
- struct sock *sk = (struct sock *) arg;
+ struct sock *sk = (struct sock *)arg;
BT_DBG("sock %p state %d", sk, sk->sk_state);
@@ -170,18 +170,21 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
sco_conn_unlock(conn);
if (sk) {
+ sock_hold(sk);
bh_lock_sock(sk);
sco_sock_clear_timer(sk);
sco_chan_del(sk, err);
bh_unlock_sock(sk);
sco_sock_kill(sk);
+ sock_put(sk);
}
hcon->sco_data = NULL;
kfree(conn);
}
-static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, struct sock *parent)
+static void __sco_chan_add(struct sco_conn *conn, struct sock *sk,
+ struct sock *parent)
{
BT_DBG("conn %p", conn);
@@ -414,8 +417,10 @@ static void __sco_sock_close(struct sock *sk)
if (sco_pi(sk)->conn->hcon) {
sk->sk_state = BT_DISCONN;
sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
+ sco_conn_lock(sco_pi(sk)->conn);
hci_conn_drop(sco_pi(sk)->conn->hcon);
sco_pi(sk)->conn->hcon = NULL;
+ sco_conn_unlock(sco_pi(sk)->conn);
} else
sco_chan_del(sk, ECONNRESET);
break;
@@ -459,7 +464,8 @@ static struct proto sco_proto = {
.obj_size = sizeof(struct sco_pinfo)
};
-static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern)
+static struct sock *sco_sock_alloc(struct net *net, struct socket *sock,
+ int proto, gfp_t prio, int kern)
{
struct sock *sk;
@@ -508,7 +514,8 @@ static int sco_sock_create(struct net *net, struct socket *sock, int protocol,
return 0;
}
-static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+static int sco_sock_bind(struct socket *sock, struct sockaddr *addr,
+ int addr_len)
{
struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
struct sock *sk = sock->sk;
@@ -615,7 +622,8 @@ done:
return err;
}
-static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flags)
+static int sco_sock_accept(struct socket *sock, struct socket *newsock,
+ int flags)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *ch;
@@ -669,7 +677,8 @@ done:
return err;
}
-static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
+static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
+ int *len, int peer)
{
struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
struct sock *sk = sock->sk;
@@ -779,7 +788,8 @@ static int sco_sock_recvmsg(struct socket *sock, struct msghdr *msg,
return bt_sock_recvmsg(sock, msg, len, flags);
}
-static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
+static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
int len, err = 0;
@@ -819,7 +829,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char
voice.setting = sco_pi(sk)->setting;
len = min_t(unsigned int, sizeof(voice), optlen);
- if (copy_from_user((char *) &voice, optval, len)) {
+ if (copy_from_user((char *)&voice, optval, len)) {
err = -EFAULT;
break;
}
@@ -843,7 +853,8 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char
return err;
}
-static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen)
+static int sco_sock_getsockopt_old(struct socket *sock, int optname,
+ char __user *optval, int __user *optlen)
{
struct sock *sk = sock->sk;
struct sco_options opts;
@@ -903,7 +914,8 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user
return err;
}
-static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
+static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
{
struct sock *sk = sock->sk;
int len, err = 0;
@@ -928,7 +940,7 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char
}
if (put_user(test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags),
- (u32 __user *) optval))
+ (u32 __user *)optval))
err = -EFAULT;
break;
@@ -961,7 +973,9 @@ static int sco_sock_shutdown(struct socket *sock, int how)
if (!sk)
return 0;
+ sock_hold(sk);
lock_sock(sk);
+
if (!sk->sk_shutdown) {
sk->sk_shutdown = SHUTDOWN_MASK;
sco_sock_clear_timer(sk);
@@ -972,7 +986,10 @@ static int sco_sock_shutdown(struct socket *sock, int how)
err = bt_sock_wait_state(sk, BT_CLOSED,
sk->sk_lingertime);
}
+
release_sock(sk);
+ sock_put(sk);
+
return err;
}
@@ -1016,6 +1033,11 @@ static void sco_conn_ready(struct sco_conn *conn)
} else {
sco_conn_lock(conn);
+ if (!conn->hcon) {
+ sco_conn_unlock(conn);
+ return;
+ }
+
parent = sco_get_sock_listen(&conn->hcon->src);
if (!parent) {
sco_conn_unlock(conn);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 25644e1..c913538 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -811,7 +811,6 @@ static void smp_failure(struct l2cap_conn *conn, u8 reason)
smp_send_cmd(conn, SMP_CMD_PAIRING_FAIL, sizeof(reason),
&reason);
- clear_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags);
mgmt_auth_failed(hcon, HCI_ERROR_AUTH_FAILURE);
if (chan->data)
@@ -1046,8 +1045,24 @@ static void smp_notify_keys(struct l2cap_conn *conn)
struct smp_cmd_pairing *rsp = (void *) &smp->prsp[1];
bool persistent;
+ if (hcon->type == ACL_LINK) {
+ if (hcon->key_type == HCI_LK_DEBUG_COMBINATION)
+ persistent = false;
+ else
+ persistent = !test_bit(HCI_CONN_FLUSH_KEY,
+ &hcon->flags);
+ } else {
+ /* The LTKs, IRKs and CSRKs should be persistent only if
+ * both sides had the bonding bit set in their
+ * authentication requests.
+ */
+ persistent = !!((req->auth_req & rsp->auth_req) &
+ SMP_AUTH_BONDING);
+ }
+
if (smp->remote_irk) {
- mgmt_new_irk(hdev, smp->remote_irk);
+ mgmt_new_irk(hdev, smp->remote_irk, persistent);
+
/* Now that user space can be considered to know the
* identity address track the connection based on it
* from now on (assuming this is an LE link).
@@ -1075,21 +1090,6 @@ static void smp_notify_keys(struct l2cap_conn *conn)
}
}
- if (hcon->type == ACL_LINK) {
- if (hcon->key_type == HCI_LK_DEBUG_COMBINATION)
- persistent = false;
- else
- persistent = !test_bit(HCI_CONN_FLUSH_KEY,
- &hcon->flags);
- } else {
- /* The LTKs and CSRKs should be persistent only if both sides
- * had the bonding bit set in their authentication requests.
- */
- persistent = !!((req->auth_req & rsp->auth_req) &
- SMP_AUTH_BONDING);
- }
-
-
if (smp->csrk) {
smp->csrk->bdaddr_type = hcon->dst_type;
bacpy(&smp->csrk->bdaddr, &hcon->dst);
@@ -2380,6 +2380,32 @@ unlock:
return ret;
}
+void smp_cancel_pairing(struct hci_conn *hcon)
+{
+ struct l2cap_conn *conn = hcon->l2cap_data;
+ struct l2cap_chan *chan;
+ struct smp_chan *smp;
+
+ if (!conn)
+ return;
+
+ chan = conn->smp;
+ if (!chan)
+ return;
+
+ l2cap_chan_lock(chan);
+
+ smp = chan->data;
+ if (smp) {
+ if (test_bit(SMP_FLAG_COMPLETE, &smp->flags))
+ smp_failure(conn, 0);
+ else
+ smp_failure(conn, SMP_UNSPECIFIED);
+ }
+
+ l2cap_chan_unlock(chan);
+}
+
static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_cmd_encrypt_info *rp = (void *) skb->data;
diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index 6cf8725..ffcc70b 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h
@@ -180,6 +180,7 @@ enum smp_key_pref {
};
/* SMP Commands */
+void smp_cancel_pairing(struct hci_conn *hcon);
bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level,
enum smp_key_pref key_pref);
int smp_conn_security(struct hci_conn *hcon, __u8 sec_level);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index c88bd8e..a642bb8 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -495,7 +495,9 @@ static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
struct net_bridge_port *source,
const unsigned char *addr,
- __u16 vid)
+ __u16 vid,
+ unsigned char is_local,
+ unsigned char is_static)
{
struct net_bridge_fdb_entry *fdb;
@@ -504,8 +506,8 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
memcpy(fdb->addr.addr, addr, ETH_ALEN);
fdb->dst = source;
fdb->vlan_id = vid;
- fdb->is_local = 0;
- fdb->is_static = 0;
+ fdb->is_local = is_local;
+ fdb->is_static = is_static;
fdb->added_by_user = 0;
fdb->added_by_external_learn = 0;
fdb->updated = fdb->used = jiffies;
@@ -536,11 +538,10 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
fdb_delete(br, fdb);
}
- fdb = fdb_create(head, source, addr, vid);
+ fdb = fdb_create(head, source, addr, vid, 1, 1);
if (!fdb)
return -ENOMEM;
- fdb->is_local = fdb->is_static = 1;
fdb_add_hw_addr(br, addr);
fdb_notify(br, fdb, RTM_NEWNEIGH);
return 0;
@@ -597,7 +598,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
} else {
spin_lock(&br->hash_lock);
if (likely(!fdb_find(head, addr, vid))) {
- fdb = fdb_create(head, source, addr, vid);
+ fdb = fdb_create(head, source, addr, vid, 0, 0);
if (fdb) {
if (unlikely(added_by_user))
fdb->added_by_user = 1;
@@ -774,7 +775,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
if (!(flags & NLM_F_CREATE))
return -ENOENT;
- fdb = fdb_create(head, source, addr, vid);
+ fdb = fdb_create(head, source, addr, vid, 0, 0);
if (!fdb)
return -ENOMEM;
@@ -1099,7 +1100,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
head = &br->hash[br_mac_hash(addr, vid)];
fdb = fdb_find(head, addr, vid);
if (!fdb) {
- fdb = fdb_create(head, p, addr, vid);
+ fdb = fdb_create(head, p, addr, vid, 0, 0);
if (!fdb) {
err = -ENOMEM;
goto err_unlock;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index a9d424e..fcdb86d 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -141,7 +141,7 @@ EXPORT_SYMBOL_GPL(br_deliver);
/* called with rcu_read_lock */
void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0)
{
- if (should_deliver(to, skb)) {
+ if (to && should_deliver(to, skb)) {
if (skb0)
deliver_clone(to, skb, __br_forward);
else
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 5f0d0cc..1394da6 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -180,11 +180,11 @@ static void br_vlan_put_master(struct net_bridge_vlan *masterv)
* devices. There are four possible calls to this function in terms of the
* vlan entry type:
* 1. vlan is being added on a port (no master flags, global entry exists)
- * 2. vlan is being added on a bridge (both master and brvlan flags)
+ * 2. vlan is being added on a bridge (both master and brentry flags)
* 3. vlan is being added on a port, but a global entry didn't exist which
- * is being created right now (master flag set, brvlan flag unset), the
+ * is being created right now (master flag set, brentry flag unset), the
* global entry is used for global per-vlan features, but not for filtering
- * 4. same as 3 but with both master and brvlan flags set so the entry
+ * 4. same as 3 but with both master and brentry flags set so the entry
* will be used for filtering in both the port and the bridge
*/
static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
@@ -482,7 +482,7 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
if (!br->vlan_enabled)
return true;
- vg = nbp_vlan_group(p);
+ vg = nbp_vlan_group_rcu(p);
if (!vg || !vg->num_vlans)
return false;
@@ -914,6 +914,8 @@ out:
return ret;
err_vlan_add:
+ RCU_INIT_POINTER(p->vlgrp, NULL);
+ synchronize_rcu();
rhashtable_destroy(&vg->vlan_hash);
err_rhtbl:
kfree(vg);
diff --git a/net/core/dev.c b/net/core/dev.c
index 1225b4b..8ce3f74 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -99,6 +99,7 @@
#include <linux/rtnetlink.h>
#include <linux/stat.h>
#include <net/dst.h>
+#include <net/dst_metadata.h>
#include <net/pkt_sched.h>
#include <net/checksum.h>
#include <net/xfrm.h>
@@ -682,6 +683,32 @@ int dev_get_iflink(const struct net_device *dev)
EXPORT_SYMBOL(dev_get_iflink);
/**
+ * dev_fill_metadata_dst - Retrieve tunnel egress information.
+ * @dev: targeted interface
+ * @skb: The packet.
+ *
+ * For better visibility of tunnel traffic OVS needs to retrieve
+ * egress tunnel information for a packet. Following API allows
+ * user to get this info.
+ */
+int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+ struct ip_tunnel_info *info;
+
+ if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst)
+ return -EINVAL;
+
+ info = skb_tunnel_info_unclone(skb);
+ if (!info)
+ return -ENOMEM;
+ if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
+ return -EINVAL;
+
+ return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
+}
+EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
+
+/**
* __dev_get_by_name - find a device by its name
* @net: the applicable net namespace
* @name: name to find
@@ -6261,6 +6288,48 @@ static void rollback_registered(struct net_device *dev)
list_del(&single);
}
+static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
+ struct net_device *upper, netdev_features_t features)
+{
+ netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
+ netdev_features_t feature;
+ int feature_bit;
+
+ for_each_netdev_feature(&upper_disables, feature_bit) {
+ feature = __NETIF_F_BIT(feature_bit);
+ if (!(upper->wanted_features & feature)
+ && (features & feature)) {
+ netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n",
+ &feature, upper->name);
+ features &= ~feature;
+ }
+ }
+
+ return features;
+}
+
+static void netdev_sync_lower_features(struct net_device *upper,
+ struct net_device *lower, netdev_features_t features)
+{
+ netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
+ netdev_features_t feature;
+ int feature_bit;
+
+ for_each_netdev_feature(&upper_disables, feature_bit) {
+ feature = __NETIF_F_BIT(feature_bit);
+ if (!(features & feature) && (lower->features & feature)) {
+ netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
+ &feature, lower->name);
+ lower->wanted_features &= ~feature;
+ netdev_update_features(lower);
+
+ if (unlikely(lower->features & feature))
+ netdev_WARN(upper, "failed to disable %pNF on %s!\n",
+ &feature, lower->name);
+ }
+ }
+}
+
static netdev_features_t netdev_fix_features(struct net_device *dev,
netdev_features_t features)
{
@@ -6330,7 +6399,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
int __netdev_update_features(struct net_device *dev)
{
+ struct net_device *upper, *lower;
netdev_features_t features;
+ struct list_head *iter;
int err = 0;
ASSERT_RTNL();
@@ -6343,6 +6414,10 @@ int __netdev_update_features(struct net_device *dev)
/* driver might be less strict about feature dependencies */
features = netdev_fix_features(dev, features);
+ /* some features can't be enabled if they're off an an upper device */
+ netdev_for_each_upper_dev_rcu(dev, upper, iter)
+ features = netdev_sync_upper_features(dev, upper, features);
+
if (dev->features == features)
return 0;
@@ -6359,6 +6434,12 @@ int __netdev_update_features(struct net_device *dev)
return -1;
}
+ /* some features must be disabled on lower devices when disabled
+ * on an upper device (think: bonding master or bridge)
+ */
+ netdev_for_each_lower_dev(dev, lower, iter)
+ netdev_sync_lower_features(dev, lower, features);
+
if (!err)
dev->features = features;
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index 4eab4a9..703cf76 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -58,7 +58,7 @@
* jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these
* ldh [18] ; reload payload
* and #0xf ; mask PTP_CLASS_VMASK
- * or #0x70 ; PTP_CLASS_VLAN|PTP_CLASS_L2
+ * or #0xc0 ; PTP_CLASS_VLAN|PTP_CLASS_L2
* ret a ; return PTP class
*
* ; PTP over UDP over IPv4 over 802.1Q over Ethernet
@@ -73,7 +73,7 @@
* jneq #319, drop_8021q_ipv4 ; is port PTP_EV_PORT ?
* ldh [x + 26] ; load payload
* and #0xf ; mask PTP_CLASS_VMASK
- * or #0x50 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4
+ * or #0x90 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4
* ret a ; return PTP class
* drop_8021q_ipv4: ret #0x0 ; PTP_CLASS_NONE
*
@@ -86,7 +86,7 @@
* jneq #319, drop_8021q_ipv6 ; is port PTP_EV_PORT ?
* ldh [66] ; load payload
* and #0xf ; mask PTP_CLASS_VMASK
- * or #0x60 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6
+ * or #0xa0 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6
* ret a ; return PTP class
* drop_8021q_ipv6: ret #0x0 ; PTP_CLASS_NONE
*
@@ -98,7 +98,7 @@
* jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these
* ldh [14] ; reload payload
* and #0xf ; mask PTP_CLASS_VMASK
- * or #0x30 ; PTP_CLASS_L2
+ * or #0x40 ; PTP_CLASS_L2
* ret a ; return PTP class
* drop_ieee1588: ret #0x0 ; PTP_CLASS_NONE
*/
@@ -150,7 +150,7 @@ void __init ptp_classifier_init(void)
{ 0x15, 0, 35, 0x00000000 },
{ 0x28, 0, 0, 0x00000012 },
{ 0x54, 0, 0, 0x0000000f },
- { 0x44, 0, 0, 0x00000070 },
+ { 0x44, 0, 0, 0x000000c0 },
{ 0x16, 0, 0, 0x00000000 },
{ 0x15, 0, 12, 0x00000800 },
{ 0x30, 0, 0, 0x0000001b },
@@ -162,7 +162,7 @@ void __init ptp_classifier_init(void)
{ 0x15, 0, 4, 0x0000013f },
{ 0x48, 0, 0, 0x0000001a },
{ 0x54, 0, 0, 0x0000000f },
- { 0x44, 0, 0, 0x00000050 },
+ { 0x44, 0, 0, 0x00000090 },
{ 0x16, 0, 0, 0x00000000 },
{ 0x06, 0, 0, 0x00000000 },
{ 0x15, 0, 8, 0x000086dd },
@@ -172,7 +172,7 @@ void __init ptp_classifier_init(void)
{ 0x15, 0, 4, 0x0000013f },
{ 0x28, 0, 0, 0x00000042 },
{ 0x54, 0, 0, 0x0000000f },
- { 0x44, 0, 0, 0x00000060 },
+ { 0x44, 0, 0, 0x000000a0 },
{ 0x16, 0, 0, 0x00000000 },
{ 0x06, 0, 0, 0x00000000 },
{ 0x15, 0, 7, 0x000088f7 },
@@ -181,7 +181,7 @@ void __init ptp_classifier_init(void)
{ 0x15, 0, 4, 0x00000000 },
{ 0x28, 0, 0, 0x0000000e },
{ 0x54, 0, 0, 0x0000000f },
- { 0x44, 0, 0, 0x00000030 },
+ { 0x44, 0, 0, 0x00000040 },
{ 0x16, 0, 0, 0x00000000 },
{ 0x06, 0, 0, 0x00000000 },
};
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7c78b5a..504bd17 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -838,7 +838,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
/* IFLA_VF_STATS_BROADCAST */
nla_total_size(sizeof(__u64)) +
/* IFLA_VF_STATS_MULTICAST */
- nla_total_size(sizeof(__u64)));
+ nla_total_size(sizeof(__u64)) +
+ nla_total_size(sizeof(struct ifla_vf_trust)));
return size;
} else
return 0;
@@ -1161,6 +1162,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
struct ifla_vf_link_state vf_linkstate;
struct ifla_vf_rss_query_en vf_rss_query_en;
struct ifla_vf_stats vf_stats;
+ struct ifla_vf_trust vf_trust;
/*
* Not all SR-IOV capable drivers support the
@@ -1170,6 +1172,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
*/
ivi.spoofchk = -1;
ivi.rss_query_en = -1;
+ ivi.trusted = -1;
memset(ivi.mac, 0, sizeof(ivi.mac));
/* The default value for VF link state is "auto"
* IFLA_VF_LINK_STATE_AUTO which equals zero
@@ -1183,7 +1186,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
vf_tx_rate.vf =
vf_spoofchk.vf =
vf_linkstate.vf =
- vf_rss_query_en.vf = ivi.vf;
+ vf_rss_query_en.vf =
+ vf_trust.vf = ivi.vf;
memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
vf_vlan.vlan = ivi.vlan;
@@ -1194,6 +1198,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
vf_spoofchk.setting = ivi.spoofchk;
vf_linkstate.link_state = ivi.linkstate;
vf_rss_query_en.setting = ivi.rss_query_en;
+ vf_trust.setting = ivi.trusted;
vf = nla_nest_start(skb, IFLA_VF_INFO);
if (!vf) {
nla_nest_cancel(skb, vfinfo);
@@ -1211,7 +1216,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
&vf_linkstate) ||
nla_put(skb, IFLA_VF_RSS_QUERY_EN,
sizeof(vf_rss_query_en),
- &vf_rss_query_en))
+ &vf_rss_query_en) ||
+ nla_put(skb, IFLA_VF_TRUST,
+ sizeof(vf_trust), &vf_trust))
goto nla_put_failure;
memset(&vf_stats, 0, sizeof(vf_stats));
if (dev->netdev_ops->ndo_get_vf_stats)
@@ -1348,6 +1355,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
[IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) },
[IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) },
[IFLA_VF_STATS] = { .type = NLA_NESTED },
+ [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) },
};
static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = {
@@ -1587,6 +1595,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
return err;
}
+ if (tb[IFLA_VF_TRUST]) {
+ struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]);
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_trust)
+ err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting);
+ if (err < 0)
+ return err;
+ }
+
return err;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index dcc7d62..7529eb9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -422,13 +422,25 @@ static void sock_warn_obsolete_bsdism(const char *name)
}
}
+static bool sock_needs_netstamp(const struct sock *sk)
+{
+ switch (sk->sk_family) {
+ case AF_UNSPEC:
+ case AF_UNIX:
+ return false;
+ default:
+ return true;
+ }
+}
+
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
{
if (sk->sk_flags & flags) {
sk->sk_flags &= ~flags;
- if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
+ if (sock_needs_netstamp(sk) &&
+ !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
net_disable_timestamp();
}
}
@@ -1582,7 +1594,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
if (newsk->sk_prot->sockets_allocated)
sk_sockets_allocated_inc(newsk);
- if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
+ if (sock_needs_netstamp(sk) &&
+ newsk->sk_flags & SK_FLAGS_TIMESTAMP)
net_enable_timestamp();
}
out:
@@ -1643,6 +1656,28 @@ void sock_wfree(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_wfree);
+void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
+{
+ skb_orphan(skb);
+ skb->sk = sk;
+#ifdef CONFIG_INET
+ if (unlikely(!sk_fullsock(sk))) {
+ skb->destructor = sock_edemux;
+ sock_hold(sk);
+ return;
+ }
+#endif
+ skb->destructor = sock_wfree;
+ skb_set_hash_from_sk(skb, sk);
+ /*
+ * We used to take a refcount on sk, but following operation
+ * is enough to guarantee sk_free() wont free this sock until
+ * all in-flight packets are completed
+ */
+ atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+}
+EXPORT_SYMBOL(skb_set_owner_w);
+
void skb_orphan_partial(struct sk_buff *skb)
{
/* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
@@ -2510,7 +2545,8 @@ void sock_enable_timestamp(struct sock *sk, int flag)
* time stamping, but time stamping might have been on
* already because of the other one
*/
- if (!(previous_flags & SK_FLAGS_TIMESTAMP))
+ if (sock_needs_netstamp(sk) &&
+ !(previous_flags & SK_FLAGS_TIMESTAMP))
net_enable_timestamp();
}
}
diff --git a/net/core/tso.c b/net/core/tso.c
index 630b30b..5dca7ce 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -1,4 +1,5 @@
#include <linux/export.h>
+#include <linux/if_vlan.h>
#include <net/ip.h>
#include <net/tso.h>
#include <asm/unaligned.h>
@@ -14,18 +15,24 @@ EXPORT_SYMBOL(tso_count_descs);
void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
int size, bool is_last)
{
- struct iphdr *iph;
struct tcphdr *tcph;
int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
int mac_hdr_len = skb_network_offset(skb);
memcpy(hdr, skb->data, hdr_len);
- iph = (struct iphdr *)(hdr + mac_hdr_len);
- iph->id = htons(tso->ip_id);
- iph->tot_len = htons(size + hdr_len - mac_hdr_len);
+ if (!tso->ipv6) {
+ struct iphdr *iph = (void *)(hdr + mac_hdr_len);
+
+ iph->id = htons(tso->ip_id);
+ iph->tot_len = htons(size + hdr_len - mac_hdr_len);
+ tso->ip_id++;
+ } else {
+ struct ipv6hdr *iph = (void *)(hdr + mac_hdr_len);
+
+ iph->payload_len = htons(size + tcp_hdrlen(skb));
+ }
tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb));
put_unaligned_be32(tso->tcp_seq, &tcph->seq);
- tso->ip_id++;
if (!is_last) {
/* Clear all special flags for not last packet */
@@ -61,6 +68,7 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso)
tso->ip_id = ntohs(ip_hdr(skb)->id);
tso->tcp_seq = ntohl(tcp_hdr(skb)->seq);
tso->next_frag_idx = 0;
+ tso->ipv6 = vlan_get_protocol(skb) == htons(ETH_P_IPV6);
/* Build first data */
tso->size = skb_headlen(skb) - hdr_len;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 923f5a1..b0e28d2 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -278,7 +278,9 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
struct sock *dccp_v4_request_recv_sock(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct dst_entry *dst);
+ struct dst_entry *dst,
+ struct request_sock *req_unhash,
+ bool *own_req);
struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 59bc180..5684e14 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -393,7 +393,9 @@ static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb)
struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
- struct dst_entry *dst)
+ struct dst_entry *dst,
+ struct request_sock *req_unhash,
+ bool *own_req)
{
struct inet_request_sock *ireq;
struct inet_sock *newinet;
@@ -426,7 +428,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
- __inet_hash_nolisten(newsk, NULL);
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
return newsk;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d9cc731..db5fc24 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -380,7 +380,9 @@ drop:
static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
- struct dst_entry *dst)
+ struct dst_entry *dst,
+ struct request_sock *req_unhash,
+ bool *own_req)
{
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *newnp;
@@ -393,7 +395,8 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
/*
* v6 mapped
*/
- newsk = dccp_v4_request_recv_sock(sk, skb, req, dst);
+ newsk = dccp_v4_request_recv_sock(sk, skb, req, dst,
+ req_unhash, own_req);
if (newsk == NULL)
return NULL;
@@ -474,15 +477,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
/* Clone RX bits */
newnp->rxopt.all = np->rxopt.all;
- /* Clone pktoptions received with SYN */
newnp->pktoptions = NULL;
- if (ireq->pktopts != NULL) {
- newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
- consume_skb(ireq->pktopts);
- ireq->pktopts = NULL;
- if (newnp->pktoptions)
- skb_set_owner_r(newnp->pktoptions, newsk);
- }
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
@@ -511,7 +506,15 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
dccp_done(newsk);
goto out;
}
- __inet_hash(newsk, NULL);
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+ /* Clone pktoptions received with SYN, if we own the req */
+ if (*own_req && ireq->pktopts) {
+ newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
+ consume_skb(ireq->pktopts);
+ ireq->pktopts = NULL;
+ if (newnp->pktoptions)
+ skb_set_owner_r(newnp->pktoptions, newsk);
+ }
return newsk;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index d10aace..1994f8a 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -143,6 +143,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
{
struct sock *child = NULL;
struct dccp_request_sock *dreq = dccp_rsk(req);
+ bool own_req;
/* Check for retransmitted REQUEST */
if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
@@ -182,14 +183,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
if (dccp_parse_options(sk, dreq, skb))
goto drop;
- child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
- if (child == NULL)
+ child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
+ req, &own_req);
+ if (!child)
goto listen_overflow;
- inet_csk_reqsk_queue_drop(sk, req);
- inet_csk_reqsk_queue_add(sk, req, child);
-out:
- return child;
+ return inet_csk_complete_hashdance(sk, child, req, own_req);
+
listen_overflow:
dccp_pr_debug("listen_overflow!\n");
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
@@ -198,7 +198,7 @@ drop:
req->rsk_ops->send_reset(sk, skb);
inet_csk_reqsk_queue_drop(sk, req);
- goto out;
+ return NULL;
}
EXPORT_SYMBOL_GPL(dccp_check_req);
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index d8346d0..3d3fda0 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -30,6 +30,7 @@
#include <linux/module.h>
#include <linux/kfifo.h>
#include <linux/vmalloc.h>
+#include <linux/time64.h>
#include <linux/gfp.h>
#include <net/net_namespace.h>
@@ -47,20 +48,20 @@ static struct {
struct kfifo fifo;
spinlock_t lock;
wait_queue_head_t wait;
- struct timespec tstart;
+ struct timespec64 tstart;
} dccpw;
static void printl(const char *fmt, ...)
{
va_list args;
int len;
- struct timespec now;
+ struct timespec64 now;
char tbuf[256];
va_start(args, fmt);
- getnstimeofday(&now);
+ getnstimeofday64(&now);
- now = timespec_sub(now, dccpw.tstart);
+ now = timespec64_sub(now, dccpw.tstart);
len = sprintf(tbuf, "%lu.%06lu ",
(unsigned long) now.tv_sec,
@@ -110,7 +111,7 @@ static struct jprobe dccp_send_probe = {
static int dccpprobe_open(struct inode *inode, struct file *file)
{
kfifo_reset(&dccpw.fifo);
- getnstimeofday(&dccpw.tstart);
+ getnstimeofday64(&dccpw.tstart);
return 0;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index b0b8da0..7bc787b 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -247,11 +247,10 @@ static int dsa_slave_port_vlan_add(struct net_device *dev,
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- u16 vid;
int err;
if (switchdev_trans_ph_prepare(trans)) {
- if (!ds->drv->port_vlan_add || !ds->drv->port_pvid_set)
+ if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add)
return -EOPNOTSUPP;
/* If the requested port doesn't belong to the same bridge as
@@ -262,16 +261,14 @@ static int dsa_slave_port_vlan_add(struct net_device *dev,
vlan->vid_end);
if (err)
return err;
+
+ err = ds->drv->port_vlan_prepare(ds, p->port, vlan, trans);
+ if (err)
+ return err;
} else {
- for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
- err = ds->drv->port_vlan_add(ds, p->port, vid,
- vlan->flags &
- BRIDGE_VLAN_INFO_UNTAGGED);
- if (!err && vlan->flags & BRIDGE_VLAN_INFO_PVID)
- err = ds->drv->port_pvid_set(ds, p->port, vid);
- if (err)
- return err;
- }
+ err = ds->drv->port_vlan_add(ds, p->port, vlan, trans);
+ if (err)
+ return err;
}
return 0;
@@ -282,19 +279,11 @@ static int dsa_slave_port_vlan_del(struct net_device *dev,
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- u16 vid;
- int err;
if (!ds->drv->port_vlan_del)
return -EOPNOTSUPP;
- for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
- err = ds->drv->port_vlan_del(ds, p->port, vid);
- if (err)
- return err;
- }
-
- return 0;
+ return ds->drv->port_vlan_del(ds, p->port, vlan);
}
static int dsa_slave_port_vlan_dump(struct net_device *dev,
@@ -378,31 +367,11 @@ static int dsa_slave_port_fdb_dump(struct net_device *dev,
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- unsigned char addr[ETH_ALEN] = { 0 };
- u16 vid = 0;
- int ret;
-
- if (!ds->drv->port_fdb_getnext)
- return -EOPNOTSUPP;
-
- for (;;) {
- bool is_static;
- ret = ds->drv->port_fdb_getnext(ds, p->port, addr, &vid,
- &is_static);
- if (ret < 0)
- break;
-
- ether_addr_copy(fdb->addr, addr);
- fdb->vid = vid;
- fdb->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE;
-
- ret = cb(&fdb->obj);
- if (ret < 0)
- break;
- }
+ if (ds->drv->port_fdb_dump)
+ return ds->drv->port_fdb_dump(ds, p->port, fdb, cb);
- return ret == -ENOENT ? 0 : ret;
+ return -EOPNOTSUPP;
}
static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 12e8cf4..6b437e8 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -580,14 +580,19 @@ static int __net_init lowpan_frags_init_net(struct net *net)
{
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
+ int res;
ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
- inet_frags_init_net(&ieee802154_lowpan->frags);
-
- return lowpan_frags_ns_sysctl_register(net);
+ res = inet_frags_init_net(&ieee802154_lowpan->frags);
+ if (res)
+ return res;
+ res = lowpan_frags_ns_sysctl_register(net);
+ if (res)
+ inet_frags_uninit_net(&ieee802154_lowpan->frags);
+ return res;
}
static void __net_exit lowpan_frags_exit_net(struct net *net)
diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c
index 65d55e0..ef185dd 100644
--- a/net/ieee802154/6lowpan/rx.c
+++ b/net/ieee802154/6lowpan/rx.c
@@ -90,36 +90,12 @@ static lowpan_rx_result lowpan_rx_h_frag(struct sk_buff *skb)
int lowpan_iphc_decompress(struct sk_buff *skb)
{
- struct ieee802154_addr_sa sa, da;
struct ieee802154_hdr hdr;
- u8 iphc0, iphc1;
- void *sap, *dap;
if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
return -EINVAL;
- raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len);
-
- if (lowpan_fetch_skb_u8(skb, &iphc0) ||
- lowpan_fetch_skb_u8(skb, &iphc1))
- return -EINVAL;
-
- ieee802154_addr_to_sa(&sa, &hdr.source);
- ieee802154_addr_to_sa(&da, &hdr.dest);
-
- if (sa.addr_type == IEEE802154_ADDR_SHORT)
- sap = &sa.short_addr;
- else
- sap = &sa.hwaddr;
-
- if (da.addr_type == IEEE802154_ADDR_SHORT)
- dap = &da.short_addr;
- else
- dap = &da.hwaddr;
-
- return lowpan_header_decompress(skb, skb->dev, sap, sa.addr_type,
- IEEE802154_ADDR_LEN, dap, da.addr_type,
- IEEE802154_ADDR_LEN, iphc0, iphc1);
+ return lowpan_header_decompress(skb, skb->dev, &hdr.dest, &hdr.source);
}
static lowpan_rx_result lowpan_rx_h_iphc(struct sk_buff *skb)
@@ -308,16 +284,16 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *wdev,
if (wdev->type != ARPHRD_IEEE802154 ||
skb->pkt_type == PACKET_OTHERHOST ||
!lowpan_rx_h_check(skb))
- return NET_RX_DROP;
+ goto drop;
ldev = wdev->ieee802154_ptr->lowpan_dev;
if (!ldev || !netif_running(ldev))
- return NET_RX_DROP;
+ goto drop;
/* Replacing skb->dev and followed rx handlers will manipulate skb. */
skb = skb_share_check(skb, GFP_ATOMIC);
if (!skb)
- return NET_RX_DROP;
+ goto out;
skb->dev = ldev;
/* When receive frag1 it's likely that we manipulate the buffer.
@@ -328,10 +304,15 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *wdev,
lowpan_is_iphc(*skb_network_header(skb))) {
skb = skb_unshare(skb, GFP_ATOMIC);
if (!skb)
- return NET_RX_DROP;
+ goto out;
}
return lowpan_invoke_rx_handlers(skb);
+
+drop:
+ kfree_skb(skb);
+out:
+ return NET_RX_DROP;
}
static struct packet_type lowpan_packet_type = {
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index 62a21f6..d4353fa 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -14,6 +14,9 @@
#include "6lowpan_i.h"
+#define LOWPAN_FRAG1_HEAD_SIZE 0x4
+#define LOWPAN_FRAGN_HEAD_SIZE 0x5
+
/* don't save pan id, it's intra pan */
struct lowpan_addr {
u8 mode;
@@ -218,7 +221,7 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *ldev,
saddr = &info.saddr.u.extended_addr;
*dgram_size = skb->len;
- lowpan_header_compress(skb, ldev, ETH_P_IPV6, daddr, saddr, skb->len);
+ lowpan_header_compress(skb, ldev, daddr, saddr);
/* dgram_offset = (saved bytes after compression) + lowpan header len */
*dgram_offset = (*dgram_size - skb->len) + skb_network_header_len(skb);
@@ -235,7 +238,7 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *ldev,
/* if the destination address is the broadcast address, use the
* corresponding short address
*/
- if (lowpan_is_addr_broadcast((const u8 *)daddr)) {
+ if (!memcmp(daddr, ldev->broadcast, EUI64_ADDR_LEN)) {
da.mode = IEEE802154_ADDR_SHORT;
da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
cb->ackreq = false;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index d7c2bb0..e786873 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -867,9 +867,10 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
(prefix != addr || ifa->ifa_prefixlen < 32)) {
- fib_magic(RTM_NEWROUTE,
- dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
- prefix, ifa->ifa_prefixlen, prim);
+ if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
+ fib_magic(RTM_NEWROUTE,
+ dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+ prefix, ifa->ifa_prefixlen, prim);
/* Add network specific broadcasts, when it takes a sense */
if (ifa->ifa_prefixlen < 31) {
@@ -914,9 +915,10 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
}
} else if (!ipv4_is_zeronet(any) &&
(any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
- fib_magic(RTM_DELROUTE,
- dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
- any, ifa->ifa_prefixlen, prim);
+ if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
+ fib_magic(RTM_DELROUTE,
+ dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+ any, ifa->ifa_prefixlen, prim);
subnet = 1;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 42778d9..f30df0e 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1564,7 +1564,8 @@ void fib_select_path(struct net *net, struct fib_result *res,
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) {
if (mp_hash < 0)
- mp_hash = fib_multipath_hash(fl4->saddr, fl4->daddr);
+ mp_hash = get_hash_from_flowi4(fl4) >> 1;
+
fib_select_multipath(res, mp_hash);
}
else
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 6c2af79..744e593 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1569,7 +1569,7 @@ static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key)
do {
/* record parent and next child index */
pn = n;
- cindex = key ? get_index(key, pn) : 0;
+ cindex = (key > pn->key) ? get_index(key, pn) : 0;
if (cindex >> pn->bits)
break;
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 5aa46d4..5a8ee32 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -36,7 +36,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
SKB_GSO_GRE_CSUM |
- SKB_GSO_IPIP)))
+ SKB_GSO_IPIP |
+ SKB_GSO_SIT)))
goto out;
if (!skb->encapsulation)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8430bc8..1feb15f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -523,15 +523,15 @@ static bool reqsk_queue_unlink(struct request_sock_queue *queue,
struct request_sock *req)
{
struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
- spinlock_t *lock;
- bool found;
+ bool found = false;
- lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
-
- spin_lock(lock);
- found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
- spin_unlock(lock);
+ if (sk_hashed(req_to_sk(req))) {
+ spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
+ spin_lock(lock);
+ found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
+ spin_unlock(lock);
+ }
if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
reqsk_put(req);
return found;
@@ -811,6 +811,25 @@ void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
}
EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
+struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
+ struct request_sock *req, bool own_req)
+{
+ if (own_req) {
+ inet_csk_reqsk_queue_drop(sk, req);
+ reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
+ inet_csk_reqsk_queue_add(sk, req, child);
+ /* Warning: caller must not call reqsk_put(req);
+ * child stole last reference on it.
+ */
+ return child;
+ }
+ /* Too bad, another child took ownership of the request, undo. */
+ bh_unlock_sock(child);
+ sock_put(child);
+ return NULL;
+}
+EXPORT_SYMBOL(inet_csk_complete_hashdance);
+
/*
* This routine closes sockets which have been at least partially
* opened, but not yet accepted.
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index d0a7c03..fe144da 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -209,12 +209,6 @@ int inet_frags_init(struct inet_frags *f)
}
EXPORT_SYMBOL(inet_frags_init);
-void inet_frags_init_net(struct netns_frags *nf)
-{
- init_frag_mem_limit(nf);
-}
-EXPORT_SYMBOL(inet_frags_init_net);
-
void inet_frags_fini(struct inet_frags *f)
{
cancel_work_sync(&f->frags_work);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 958728a..ccc5980 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -407,13 +407,13 @@ static u32 inet_sk_port_offset(const struct sock *sk)
/* insert a socket into ehash, and eventually remove another one
* (The another one can be a SYN_RECV or TIMEWAIT
*/
-int inet_ehash_insert(struct sock *sk, struct sock *osk)
+bool inet_ehash_insert(struct sock *sk, struct sock *osk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct hlist_nulls_head *list;
struct inet_ehash_bucket *head;
spinlock_t *lock;
- int ret = 0;
+ bool ret = true;
WARN_ON_ONCE(!sk_unhashed(sk));
@@ -423,30 +423,41 @@ int inet_ehash_insert(struct sock *sk, struct sock *osk)
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
spin_lock(lock);
- __sk_nulls_add_node_rcu(sk, list);
if (osk) {
- WARN_ON(sk->sk_hash != osk->sk_hash);
- sk_nulls_del_node_init_rcu(osk);
+ WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
+ ret = sk_nulls_del_node_init_rcu(osk);
}
+ if (ret)
+ __sk_nulls_add_node_rcu(sk, list);
spin_unlock(lock);
return ret;
}
-void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
+bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
{
- inet_ehash_insert(sk, osk);
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ bool ok = inet_ehash_insert(sk, osk);
+
+ if (ok) {
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ } else {
+ percpu_counter_inc(sk->sk_prot->orphan_count);
+ sk->sk_state = TCP_CLOSE;
+ sock_set_flag(sk, SOCK_DEAD);
+ inet_csk_destroy_sock(sk);
+ }
+ return ok;
}
-EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
+EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
void __inet_hash(struct sock *sk, struct sock *osk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb;
- if (sk->sk_state != TCP_LISTEN)
- return __inet_hash_nolisten(sk, osk);
-
+ if (sk->sk_state != TCP_LISTEN) {
+ inet_ehash_nolisten(sk, osk);
+ return;
+ }
WARN_ON(!sk_unhashed(sk));
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
@@ -567,7 +578,7 @@ ok:
inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
- __inet_hash_nolisten(sk, (struct sock *)tw);
+ inet_ehash_nolisten(sk, (struct sock *)tw);
}
if (tw)
inet_twsk_bind_unhash(tw, hinfo);
@@ -584,7 +595,7 @@ ok:
tb = inet_csk(sk)->icsk_bind_hash;
spin_lock_bh(&head->lock);
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
- __inet_hash_nolisten(sk, NULL);
+ inet_ehash_nolisten(sk, NULL);
spin_unlock_bh(&head->lock);
return 0;
} else {
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 5482745..1fe55ae 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -839,6 +839,8 @@ static void __init ip4_frags_ctl_register(void)
static int __net_init ipv4_frags_init_net(struct net *net)
{
+ int res;
+
/* Fragment cache limits.
*
* The fragment memory accounting code, (tries to) account for
@@ -862,9 +864,13 @@ static int __net_init ipv4_frags_init_net(struct net *net)
*/
net->ipv4.frags.timeout = IP_FRAG_TIME;
- inet_frags_init_net(&net->ipv4.frags);
-
- return ip4_frags_ns_ctl_register(net);
+ res = inet_frags_init_net(&net->ipv4.frags);
+ if (res)
+ return res;
+ res = ip4_frags_ns_ctl_register(net);
+ if (res)
+ inet_frags_uninit_net(&net->ipv4.frags);
+ return res;
}
static void __net_exit ipv4_frags_exit_net(struct net *net)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index bd0679d..6145214 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -498,10 +498,26 @@ static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
+static struct rtable *gre_get_rt(struct sk_buff *skb,
+ struct net_device *dev,
+ struct flowi4 *fl,
+ const struct ip_tunnel_key *key)
+{
+ struct net *net = dev_net(dev);
+
+ memset(fl, 0, sizeof(*fl));
+ fl->daddr = key->u.ipv4.dst;
+ fl->saddr = key->u.ipv4.src;
+ fl->flowi4_tos = RT_TOS(key->tos);
+ fl->flowi4_mark = skb->mark;
+ fl->flowi4_proto = IPPROTO_GRE;
+
+ return ip_route_output_key(net, fl);
+}
+
static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel_info *tun_info;
- struct net *net = dev_net(dev);
const struct ip_tunnel_key *key;
struct flowi4 fl;
struct rtable *rt;
@@ -516,14 +532,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
key = &tun_info->key;
- memset(&fl, 0, sizeof(fl));
- fl.daddr = key->u.ipv4.dst;
- fl.saddr = key->u.ipv4.src;
- fl.flowi4_tos = RT_TOS(key->tos);
- fl.flowi4_mark = skb->mark;
- fl.flowi4_proto = IPPROTO_GRE;
-
- rt = ip_route_output_key(net, &fl);
+ rt = gre_get_rt(skb, dev, &fl, key);
if (IS_ERR(rt))
goto err_free_skb;
@@ -566,6 +575,24 @@ err_free_skb:
dev->stats.tx_dropped++;
}
+static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+ struct rtable *rt;
+ struct flowi4 fl4;
+
+ if (ip_tunnel_info_af(info) != AF_INET)
+ return -EINVAL;
+
+ rt = gre_get_rt(skb, dev, &fl4, &info->key);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+
+ ip_rt_put(rt);
+ info->key.u.ipv4.src = fl4.saddr;
+ return 0;
+}
+
static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
struct net_device *dev)
{
@@ -1023,6 +1050,7 @@ static const struct net_device_ops gre_tap_netdev_ops = {
.ndo_change_mtu = ip_tunnel_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
+ .ndo_fill_metadata_dst = gre_fill_metadata_dst,
};
static void ipgre_tap_setup(struct net_device *dev)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 50e2973..4233cbe 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -533,6 +533,11 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
dev = rt->dst.dev;
+ /* for offloaded checksums cleanup checksum before fragmentation */
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ (err = skb_checksum_help(skb)))
+ goto fail;
+
/*
* Point into the IP datagram header.
*/
@@ -657,9 +662,6 @@ slow_path_clean:
}
slow_path:
- /* for offloaded checksums cleanup checksum before fragmentation */
- if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb))
- goto fail;
iph = ip_hdr(skb);
left = skb->len - hlen; /* Space per frame */
@@ -911,6 +913,7 @@ static int __ip_append_data(struct sock *sk,
if (transhdrlen &&
length + fragheaderlen <= mtu &&
rt->dst.dev->features & NETIF_F_V4_CSUM &&
+ !(flags & MSG_MORE) &&
!exthdrlen)
csummode = CHECKSUM_PARTIAL;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 690d27d..a355841 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -75,6 +75,7 @@ endif # NF_TABLES
config NF_DUP_IPV4
tristate "Netfilter IPv4 packet duplication to alternate destination"
+ depends on !NF_CONNTRACK || NF_CONNTRACK
help
This option enables the nf_dup_ipv4 core, which duplicates an IPv4
packet to be rerouted to another destination.
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 74dd667..78cc64e 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -60,9 +60,7 @@ static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4,
if (FIB_RES_DEV(res) == dev)
dev_match = true;
#endif
- if (dev_match || flags & XT_RPFILTER_LOOSE)
- return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;
- return dev_match;
+ return dev_match || flags & XT_RPFILTER_LOOSE;
}
static bool rpfilter_is_local(const struct sk_buff *skb)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 4c0892b..4cbe9f0 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -221,8 +221,10 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct sock *child;
+ bool own_req;
- child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
+ child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
+ NULL, &own_req);
if (child) {
atomic_set(&req->rsk_refcnt, 1);
sock_rps_save_rxhash(child, skb);
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 7092a61..7e538f7 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -209,7 +209,7 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
/* alpha = (1 - g) * alpha + g * F */
- alpha -= alpha >> dctcp_shift_g;
+ alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
if (bytes_ecn) {
/* If dctcp_shift_g == 1, a 32bit value would overflow
* after 8 Mbytes.
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 93396bf..55be6ac 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -133,12 +133,14 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
struct sock *child;
u32 end_seq;
+ bool own_req;
req->num_retrans = 0;
req->num_timeout = 0;
req->sk = NULL;
- child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
+ child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
+ NULL, &own_req);
if (!child)
return NULL;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 30dd45c..1c2648b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1247,7 +1247,9 @@ EXPORT_SYMBOL(tcp_v4_conn_request);
*/
struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct dst_entry *dst)
+ struct dst_entry *dst,
+ struct request_sock *req_unhash,
+ bool *own_req)
{
struct inet_request_sock *ireq;
struct inet_sock *newinet;
@@ -1323,7 +1325,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
- __inet_hash_nolisten(newsk, NULL);
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
return newsk;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 1fd5d41..3575dd1 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -580,6 +580,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th = tcp_hdr(skb);
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
bool paws_reject = false;
+ bool own_req;
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(struct tcphdr)>>2)) {
@@ -767,18 +768,14 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
* ESTABLISHED STATE. If it will be dropped after
* socket is created, wait for troubles.
*/
- child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
+ child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
+ req, &own_req);
if (!child)
goto listen_overflow;
sock_rps_save_rxhash(child, skb);
tcp_synack_rtt_meas(child, req);
- inet_csk_reqsk_queue_drop(sk, req);
- inet_csk_reqsk_queue_add(sk, req, child);
- /* Warning: caller must not call reqsk_put(req);
- * child stole last reference on it.
- */
- return child;
+ return inet_csk_complete_hashdance(sk, child, req, own_req);
listen_overflow:
if (!sysctl_tcp_abort_on_overflow) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f6f7f9b..cb7ca56 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2963,9 +2963,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
skb_reserve(skb, MAX_TCP_HEADER);
if (attach_req) {
- skb->destructor = sock_edemux;
- sock_hold(req_to_sk(req));
- skb->sk = req_to_sk(req);
+ skb_set_owner_w(skb, req_to_sk(req));
} else {
/* sk is a const pointer, because we want to express multiple
* cpu might call us concurrently.
@@ -3410,7 +3408,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
*/
tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
skb_mstamp_get(&skb->skb_mstamp);
- NET_INC_STATS_BH(sock_net(sk), mib);
+ NET_INC_STATS(sock_net(sk), mib);
return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
}
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 9f298d0..7ee6518 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -30,6 +30,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
mtu = dst_mtu(skb_dst(skb));
if (skb->len > mtu) {
+ skb->protocol = htons(ETH_P_IP);
+
if (skb->sk)
xfrm_local_error(skb, mtu);
else
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index f2606b9..1e0c3c8 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -127,7 +127,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
case IPPROTO_DCCP:
if (xprth + 4 < skb->data ||
pskb_may_pull(skb, xprth + 4 - skb->data)) {
- __be16 *ports = (__be16 *)xprth;
+ __be16 *ports;
+
+ xprth = skb_network_header(skb) + iph->ihl * 4;
+ ports = (__be16 *)xprth;
fl4->fl4_sport = ports[!!reverse];
fl4->fl4_dport = ports[!reverse];
@@ -135,8 +138,12 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
break;
case IPPROTO_ICMP:
- if (pskb_may_pull(skb, xprth + 2 - skb->data)) {
- u8 *icmp = xprth;
+ if (xprth + 2 < skb->data ||
+ pskb_may_pull(skb, xprth + 2 - skb->data)) {
+ u8 *icmp;
+
+ xprth = skb_network_header(skb) + iph->ihl * 4;
+ icmp = xprth;
fl4->fl4_icmp_type = icmp[0];
fl4->fl4_icmp_code = icmp[1];
@@ -144,33 +151,50 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
break;
case IPPROTO_ESP:
- if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
- __be32 *ehdr = (__be32 *)xprth;
+ if (xprth + 4 < skb->data ||
+ pskb_may_pull(skb, xprth + 4 - skb->data)) {
+ __be32 *ehdr;
+
+ xprth = skb_network_header(skb) + iph->ihl * 4;
+ ehdr = (__be32 *)xprth;
fl4->fl4_ipsec_spi = ehdr[0];
}
break;
case IPPROTO_AH:
- if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
- __be32 *ah_hdr = (__be32 *)xprth;
+ if (xprth + 8 < skb->data ||
+ pskb_may_pull(skb, xprth + 8 - skb->data)) {
+ __be32 *ah_hdr;
+
+ xprth = skb_network_header(skb) + iph->ihl * 4;
+ ah_hdr = (__be32 *)xprth;
fl4->fl4_ipsec_spi = ah_hdr[1];
}
break;
case IPPROTO_COMP:
- if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
- __be16 *ipcomp_hdr = (__be16 *)xprth;
+ if (xprth + 4 < skb->data ||
+ pskb_may_pull(skb, xprth + 4 - skb->data)) {
+ __be16 *ipcomp_hdr;
+
+ xprth = skb_network_header(skb) + iph->ihl * 4;
+ ipcomp_hdr = (__be16 *)xprth;
fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
}
break;
case IPPROTO_GRE:
- if (pskb_may_pull(skb, xprth + 12 - skb->data)) {
- __be16 *greflags = (__be16 *)xprth;
- __be32 *gre_hdr = (__be32 *)xprth;
+ if (xprth + 12 < skb->data ||
+ pskb_may_pull(skb, xprth + 12 - skb->data)) {
+ __be16 *greflags;
+ __be32 *gre_hdr;
+
+ xprth = skb_network_header(skb) + iph->ihl * 4;
+ greflags = (__be16 *)xprth;
+ gre_hdr = (__be32 *)xprth;
if (greflags[0] & GRE_KEY) {
if (greflags[0] & GRE_CSUM)
@@ -244,7 +268,7 @@ static struct dst_ops xfrm4_dst_ops = {
.destroy = xfrm4_dst_destroy,
.ifdown = xfrm4_dst_ifdown,
.local_out = __ip_local_out,
- .gc_thresh = 32768,
+ .gc_thresh = INT_MAX,
};
static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d0c685c..d72fa90 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3147,6 +3147,32 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
}
break;
+ case NETDEV_CHANGEMTU:
+ /* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */
+ if (dev->mtu < IPV6_MIN_MTU) {
+ addrconf_ifdown(dev, 1);
+ break;
+ }
+
+ if (idev) {
+ rt6_mtu_change(dev, dev->mtu);
+ idev->cnf.mtu6 = dev->mtu;
+ break;
+ }
+
+ /* allocate new idev */
+ idev = ipv6_add_dev(dev);
+ if (IS_ERR(idev))
+ break;
+
+ /* device is still not ready */
+ if (!(idev->if_flags & IF_READY))
+ break;
+
+ run_pending = 1;
+
+ /* fall through */
+
case NETDEV_UP:
case NETDEV_CHANGE:
if (dev->flags & IFF_SLAVE)
@@ -3170,7 +3196,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
idev->if_flags |= IF_READY;
run_pending = 1;
}
- } else {
+ } else if (event == NETDEV_CHANGE) {
if (!addrconf_qdisc_ok(dev)) {
/* device is still not ready. */
break;
@@ -3235,24 +3261,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
}
break;
- case NETDEV_CHANGEMTU:
- if (idev && dev->mtu >= IPV6_MIN_MTU) {
- rt6_mtu_change(dev, dev->mtu);
- idev->cnf.mtu6 = dev->mtu;
- break;
- }
-
- if (!idev && dev->mtu >= IPV6_MIN_MTU) {
- idev = ipv6_add_dev(dev);
- if (!IS_ERR(idev))
- break;
- }
-
- /*
- * if MTU under IPV6_MIN_MTU.
- * Stop IPv6 on this interface.
- */
-
case NETDEV_DOWN:
case NETDEV_UNREGISTER:
/*
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 9f777ec..ed33abf 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -32,6 +32,7 @@ struct fib6_rule {
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
+ struct rt6_info *rt;
struct fib_lookup_arg arg = {
.lookup_ptr = lookup,
.flags = FIB_LOOKUP_NOREF,
@@ -40,11 +41,21 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
fib_rules_lookup(net->ipv6.fib6_rules_ops,
flowi6_to_flowi(fl6), flags, &arg);
- if (arg.result)
- return arg.result;
+ rt = arg.result;
- dst_hold(&net->ipv6.ip6_null_entry->dst);
- return &net->ipv6.ip6_null_entry->dst;
+ if (!rt) {
+ dst_hold(&net->ipv6.ip6_null_entry->dst);
+ return &net->ipv6.ip6_null_entry->dst;
+ }
+
+ if (rt->rt6i_flags & RTF_REJECT &&
+ rt->dst.error == -EAGAIN) {
+ ip6_rt_put(rt);
+ rt = net->ipv6.ip6_null_entry;
+ dst_hold(&rt->dst);
+ }
+
+ return &rt->dst;
}
static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index efb1c00..36c5a98 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -453,7 +453,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
* and anycast addresses will be checked later.
*/
if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
- net_dbg_ratelimited("icmp6_send: addr_any/mcast source\n");
+ net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
+ &hdr->saddr, &hdr->daddr);
return;
}
@@ -461,7 +462,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
* Never answer to a ICMP packet.
*/
if (is_ineligible(skb)) {
- net_dbg_ratelimited("icmp6_send: no reply to icmp error\n");
+ net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
+ &hdr->saddr, &hdr->daddr);
return;
}
@@ -513,7 +515,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
len = skb->len - msg.offset;
len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
if (len < 0) {
- net_dbg_ratelimited("icmp: len problem\n");
+ net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
+ &hdr->saddr, &hdr->daddr);
goto out_dst_release;
}
@@ -785,7 +788,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
if (type & ICMPV6_INFOMSG_MASK)
break;
- net_dbg_ratelimited("icmpv6: msg of unknown type\n");
+ net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
+ saddr, daddr);
/*
* error of unknown type.
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 09fddf7..0c7e276 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -286,7 +286,17 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
- return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ struct rt6_info *rt;
+
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ if (rt->rt6i_flags & RTF_REJECT &&
+ rt->dst.error == -EAGAIN) {
+ ip6_rt_put(rt);
+ rt = net->ipv6.ip6_null_entry;
+ dst_hold(&rt->dst);
+ }
+
+ return &rt->dst;
}
static void __net_init fib6_tables_init(struct net *net)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0c89671..e6a7bd15 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -596,11 +596,17 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (np->frag_size)
mtu = np->frag_size;
}
+ if (mtu < hlen + sizeof(struct frag_hdr) + 8)
+ goto fail_toobig;
mtu -= hlen + sizeof(struct frag_hdr);
frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr);
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ (err = skb_checksum_help(skb)))
+ goto fail;
+
hroom = LL_RESERVED_SPACE(rt->dst.dev);
if (skb_has_frag_list(skb)) {
int first_len = skb_pagelen(skb);
@@ -729,10 +735,6 @@ slow_path_clean:
}
slow_path:
- if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
- skb_checksum_help(skb))
- goto fail;
-
left = skb->len - hlen; /* Space per frame */
ptr = hlen; /* Where to start from */
@@ -1268,6 +1270,7 @@ static int __ip6_append_data(struct sock *sk,
struct rt6_info *rt = (struct rt6_info *)cork->dst;
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
+ unsigned int maxnonfragsize, headersize;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1285,38 +1288,43 @@ static int __ip6_append_data(struct sock *sk,
maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
sizeof(struct frag_hdr);
- if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
- unsigned int maxnonfragsize, headersize;
-
- headersize = sizeof(struct ipv6hdr) +
- (opt ? opt->opt_flen + opt->opt_nflen : 0) +
- (dst_allfrag(&rt->dst) ?
- sizeof(struct frag_hdr) : 0) +
- rt->rt6i_nfheader_len;
-
- if (ip6_sk_ignore_df(sk))
- maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
- else
- maxnonfragsize = mtu;
+ headersize = sizeof(struct ipv6hdr) +
+ (opt ? opt->opt_flen + opt->opt_nflen : 0) +
+ (dst_allfrag(&rt->dst) ?
+ sizeof(struct frag_hdr) : 0) +
+ rt->rt6i_nfheader_len;
+
+ if (cork->length + length > mtu - headersize && dontfrag &&
+ (sk->sk_protocol == IPPROTO_UDP ||
+ sk->sk_protocol == IPPROTO_RAW)) {
+ ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
+ sizeof(struct ipv6hdr));
+ goto emsgsize;
+ }
- /* dontfrag active */
- if ((cork->length + length > mtu - headersize) && dontfrag &&
- (sk->sk_protocol == IPPROTO_UDP ||
- sk->sk_protocol == IPPROTO_RAW)) {
- ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
- sizeof(struct ipv6hdr));
- goto emsgsize;
- }
+ if (ip6_sk_ignore_df(sk))
+ maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
+ else
+ maxnonfragsize = mtu;
- if (cork->length + length > maxnonfragsize - headersize) {
+ if (cork->length + length > maxnonfragsize - headersize) {
emsgsize:
- ipv6_local_error(sk, EMSGSIZE, fl6,
- mtu - headersize +
- sizeof(struct ipv6hdr));
- return -EMSGSIZE;
- }
+ ipv6_local_error(sk, EMSGSIZE, fl6,
+ mtu - headersize +
+ sizeof(struct ipv6hdr));
+ return -EMSGSIZE;
}
+ /* CHECKSUM_PARTIAL only with no extension headers and when
+ * we are not going to fragment
+ */
+ if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
+ headersize == sizeof(struct ipv6hdr) &&
+ length < mtu - headersize &&
+ !(flags & MSG_MORE) &&
+ rt->dst.dev->features & NETIF_F_V6_CSUM)
+ csummode = CHECKSUM_PARTIAL;
+
if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
sock_tx_timestamp(sk, &tx_flags);
if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
@@ -1324,16 +1332,6 @@ emsgsize:
tskey = sk->sk_tskey++;
}
- /* If this is the first and only packet and device
- * supports checksum offloading, let's use it.
- * Use transhdrlen, same as IPv4, because partial
- * sums only work when transhdrlen is set.
- */
- if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
- length + fragheaderlen < mtu &&
- rt->dst.dev->features & NETIF_F_V6_CSUM &&
- !exthdrlen)
- csummode = CHECKSUM_PARTIAL;
/*
* Let's try using as much space as possible.
* Use MTU if total length of the message fits into the MTU.
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 96833e4..f6a024e 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -58,6 +58,7 @@ endif # NF_TABLES
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
+ depends on !NF_CONNTRACK || NF_CONNTRACK
help
This option enables the nf_dup_ipv6 core, which duplicates an IPv6
packet to be rerouted to another destination.
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 056f5d4..d5efeb8 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -644,15 +644,22 @@ void nf_ct_frag6_consume_orig(struct sk_buff *skb)
s = s2;
}
}
+EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig);
static int nf_ct_net_init(struct net *net)
{
+ int res;
+
net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
- inet_frags_init_net(&net->nf_frag.frags);
-
- return nf_ct_frag6_sysctl_register(net);
+ res = inet_frags_init_net(&net->nf_frag.frags);
+ if (res)
+ return res;
+ res = nf_ct_frag6_sysctl_register(net);
+ if (res)
+ inet_frags_uninit_net(&net->nf_frag.frags);
+ return res;
}
static void nf_ct_net_exit(struct net *net)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index f1159bb..44e21a0 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -706,13 +706,19 @@ static void ip6_frags_sysctl_unregister(void)
static int __net_init ipv6_frags_init_net(struct net *net)
{
+ int res;
+
net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
- inet_frags_init_net(&net->ipv6.frags);
-
- return ip6_frags_ns_sysctl_register(net);
+ res = inet_frags_init_net(&net->ipv6.frags);
+ if (res)
+ return res;
+ res = ip6_frags_ns_sysctl_register(net);
+ if (res)
+ inet_frags_uninit_net(&net->ipv6.frags);
+ return res;
}
static void __net_exit ipv6_frags_exit_net(struct net *net)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d061963..2701cb3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1171,6 +1171,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
{
struct dst_entry *dst;
int flags = 0;
+ bool any_src;
dst = l3mdev_rt6_dst_by_oif(net, fl6);
if (dst)
@@ -1178,11 +1179,12 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
fl6->flowi6_iif = LOOPBACK_IFINDEX;
+ any_src = ipv6_addr_any(&fl6->saddr);
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
- fl6->flowi6_oif)
+ (fl6->flowi6_oif && any_src))
flags |= RT6_LOOKUP_F_IFACE;
- if (!ipv6_addr_any(&fl6->saddr))
+ if (!any_src)
flags |= RT6_LOOKUP_F_HAS_SADDR;
else if (sk)
flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f495d18..ea2f4d5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -965,7 +965,9 @@ drop:
static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct dst_entry *dst)
+ struct dst_entry *dst,
+ struct request_sock *req_unhash,
+ bool *own_req)
{
struct inet_request_sock *ireq;
struct ipv6_pinfo *newnp;
@@ -984,7 +986,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
* v6 mapped
*/
- newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
+ newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
+ req_unhash, own_req);
if (!newsk)
return NULL;
@@ -1081,16 +1084,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
/* Clone RX bits */
newnp->rxopt.all = np->rxopt.all;
- /* Clone pktoptions received with SYN */
newnp->pktoptions = NULL;
- if (ireq->pktopts) {
- newnp->pktoptions = skb_clone(ireq->pktopts,
- sk_gfp_atomic(sk, GFP_ATOMIC));
- consume_skb(ireq->pktopts);
- ireq->pktopts = NULL;
- if (newnp->pktoptions)
- skb_set_owner_r(newnp->pktoptions, newsk);
- }
newnp->opt = NULL;
newnp->mcast_oif = tcp_v6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
@@ -1145,7 +1139,16 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
tcp_done(newsk);
goto out;
}
- __inet_hash(newsk, NULL);
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+ /* Clone pktoptions received with SYN, if we own the req */
+ if (*own_req && ireq->pktopts) {
+ newnp->pktoptions = skb_clone(ireq->pktopts,
+ sk_gfp_atomic(sk, GFP_ATOMIC));
+ consume_skb(ireq->pktopts);
+ ireq->pktopts = NULL;
+ if (newnp->pktoptions)
+ skb_set_owner_r(newnp->pktoptions, newsk);
+ }
return newsk;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 9db067a..4d09ce6 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -79,6 +79,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
if (!skb->ignore_df && skb->len > mtu) {
skb->dev = dst->dev;
+ skb->protocol = htons(ETH_P_IPV6);
if (xfrm6_local_dontfrag(skb))
xfrm6_local_rxpmtu(skb, mtu);
@@ -143,6 +144,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
struct dst_entry *dst = skb_dst(skb);
struct xfrm_state *x = dst->xfrm;
int mtu;
+ bool toobig;
#ifdef CONFIG_NETFILTER
if (!x) {
@@ -151,25 +153,29 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
}
#endif
+ if (x->props.mode != XFRM_MODE_TUNNEL)
+ goto skip_frag;
+
if (skb->protocol == htons(ETH_P_IPV6))
mtu = ip6_skb_dst_mtu(skb);
else
mtu = dst_mtu(skb_dst(skb));
- if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
+ toobig = skb->len > mtu && !skb_is_gso(skb);
+
+ if (toobig && xfrm6_local_dontfrag(skb)) {
xfrm6_local_rxpmtu(skb, mtu);
return -EMSGSIZE;
- } else if (!skb->ignore_df && skb->len > mtu && skb->sk) {
+ } else if (!skb->ignore_df && toobig && skb->sk) {
xfrm_local_error(skb, mtu);
return -EMSGSIZE;
}
- if (x->props.mode == XFRM_MODE_TUNNEL &&
- ((skb->len > mtu && !skb_is_gso(skb)) ||
- dst_allfrag(skb_dst(skb)))) {
+ if (toobig || dst_allfrag(skb_dst(skb)))
return ip6_fragment(net, sk, skb,
__xfrm6_output_finish);
- }
+
+skip_frag:
return x->outer_mode->afinfo->output_finish(sk, skb);
}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 08c9c93..5643423 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -177,7 +177,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
return;
case IPPROTO_ICMPV6:
- if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
+ if (!onlyproto && (nh + offset + 2 < skb->data ||
+ pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
u8 *icmp;
nh = skb_network_header(skb);
@@ -191,7 +192,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPPROTO_MH:
offset += ipv6_optlen(exthdr);
- if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
+ if (!onlyproto && (nh + offset + 3 < skb->data ||
+ pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
struct ip6_mh *mh;
nh = skb_network_header(skb);
@@ -286,7 +288,7 @@ static struct dst_ops xfrm6_dst_ops = {
.destroy = xfrm6_dst_destroy,
.ifdown = xfrm6_dst_ifdown,
.local_out = __ip6_local_out,
- .gc_thresh = 32768,
+ .gc_thresh = INT_MAX,
};
static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index a26c401..4396459 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -1839,7 +1839,7 @@ static void *irlmp_seq_hb_idx(struct irlmp_iter_state *iter, loff_t *off)
for (element = hashbin_get_first(iter->hashbin);
element != NULL;
element = hashbin_get_next(iter->hashbin)) {
- if (!off || *off-- == 0) {
+ if (!off || (*off)-- == 0) {
/* NB: hashbin left locked */
return element;
}
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 83a7068..f9c9ecb 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -261,7 +261,7 @@ static int pfkey_broadcast(struct sk_buff *skb,
err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
- /* Error is cleare after succecful sending to at least one
+ /* Error is cleared after successful sending to at least one
* registered KM */
if ((broadcast_flags & BROADCAST_REGISTERED) && err)
err = err2;
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 7799d3c..a13d02b 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -55,7 +55,7 @@ void mac802154_llsec_destroy(struct mac802154_llsec *sec)
msl = container_of(sl, struct mac802154_llsec_seclevel, level);
list_del(&sl->list);
- kfree(msl);
+ kzfree(msl);
}
list_for_each_entry_safe(dev, dn, &sec->table.devices, list) {
@@ -72,7 +72,7 @@ void mac802154_llsec_destroy(struct mac802154_llsec *sec)
mkey = container_of(key->key, struct mac802154_llsec_key, key);
list_del(&key->list);
llsec_key_put(mkey);
- kfree(key);
+ kzfree(key);
}
}
@@ -161,7 +161,7 @@ err_tfm:
if (key->tfm[i])
crypto_free_aead(key->tfm[i]);
- kfree(key);
+ kzfree(key);
return NULL;
}
@@ -176,7 +176,7 @@ static void llsec_key_release(struct kref *ref)
crypto_free_aead(key->tfm[i]);
crypto_free_blkcipher(key->tfm0);
- kfree(key);
+ kzfree(key);
}
static struct mac802154_llsec_key*
@@ -267,7 +267,7 @@ int mac802154_llsec_key_add(struct mac802154_llsec *sec,
return 0;
fail:
- kfree(new);
+ kzfree(new);
return -ENOMEM;
}
@@ -347,10 +347,10 @@ static void llsec_dev_free(struct mac802154_llsec_device *dev)
devkey);
list_del(&pos->list);
- kfree(devkey);
+ kzfree(devkey);
}
- kfree(dev);
+ kzfree(dev);
}
int mac802154_llsec_dev_add(struct mac802154_llsec *sec,
@@ -681,7 +681,7 @@ llsec_do_encrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
rc = crypto_aead_encrypt(req);
- kfree(req);
+ kzfree(req);
return rc;
}
@@ -881,7 +881,7 @@ llsec_do_decrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
rc = crypto_aead_decrypt(req);
- kfree(req);
+ kzfree(req);
skb_trim(skb, skb->len - authlen);
return rc;
@@ -921,7 +921,7 @@ llsec_update_devkey_record(struct mac802154_llsec_device *dev,
if (!devkey)
list_add_rcu(&next->devkey.list, &dev->dev.keys);
else
- kfree(next);
+ kzfree(next);
spin_unlock_bh(&dev->lock);
}
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index bb185a2..c70d750 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -19,36 +19,13 @@
#include <net/ipv6.h>
#include <net/addrconf.h>
#endif
+#include <net/nexthop.h>
#include "internal.h"
-#define LABEL_NOT_SPECIFIED (1<<20)
-#define MAX_NEW_LABELS 2
-
-/* This maximum ha length copied from the definition of struct neighbour */
-#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
-
-enum mpls_payload_type {
- MPT_UNSPEC, /* IPv4 or IPv6 */
- MPT_IPV4 = 4,
- MPT_IPV6 = 6,
-
- /* Other types not implemented:
- * - Pseudo-wire with or without control word (RFC4385)
- * - GAL (RFC5586)
- */
-};
-
-struct mpls_route { /* next hop label forwarding entry */
- struct net_device __rcu *rt_dev;
- struct rcu_head rt_rcu;
- u32 rt_label[MAX_NEW_LABELS];
- u8 rt_protocol; /* routing protocol that set this entry */
- u8 rt_payload_type;
- u8 rt_labels;
- u8 rt_via_alen;
- u8 rt_via_table;
- u8 rt_via[0];
-};
+/* Maximum number of labels to look ahead at when selecting a path of
+ * a multipath route
+ */
+#define MAX_MP_SELECT_LABELS 4
static int zero = 0;
static int label_limit = (1 << 20) - 1;
@@ -80,10 +57,24 @@ bool mpls_output_possible(const struct net_device *dev)
}
EXPORT_SYMBOL_GPL(mpls_output_possible);
-static unsigned int mpls_rt_header_size(const struct mpls_route *rt)
+static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh)
+{
+ u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN);
+ int nh_index = nh - rt->rt_nh;
+
+ return nh0_via + rt->rt_max_alen * nh_index;
+}
+
+static const u8 *mpls_nh_via(const struct mpls_route *rt,
+ const struct mpls_nh *nh)
+{
+ return __mpls_nh_via((struct mpls_route *)rt, (struct mpls_nh *)nh);
+}
+
+static unsigned int mpls_nh_header_size(const struct mpls_nh *nh)
{
/* The size of the layer 2.5 labels to be added for this route */
- return rt->rt_labels * sizeof(struct mpls_shim_hdr);
+ return nh->nh_labels * sizeof(struct mpls_shim_hdr);
}
unsigned int mpls_dev_mtu(const struct net_device *dev)
@@ -105,6 +96,80 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
}
EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
+static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
+ struct sk_buff *skb, bool bos)
+{
+ struct mpls_entry_decoded dec;
+ struct mpls_shim_hdr *hdr;
+ bool eli_seen = false;
+ int label_index;
+ int nh_index = 0;
+ u32 hash = 0;
+
+ /* No need to look further into packet if there's only
+ * one path
+ */
+ if (rt->rt_nhn == 1)
+ goto out;
+
+ for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos;
+ label_index++) {
+ if (!pskb_may_pull(skb, sizeof(*hdr) * label_index))
+ break;
+
+ /* Read and decode the current label */
+ hdr = mpls_hdr(skb) + label_index;
+ dec = mpls_entry_decode(hdr);
+
+ /* RFC6790 - reserved labels MUST NOT be used as keys
+ * for the load-balancing function
+ */
+ if (likely(dec.label >= MPLS_LABEL_FIRST_UNRESERVED)) {
+ hash = jhash_1word(dec.label, hash);
+
+ /* The entropy label follows the entropy label
+ * indicator, so this means that the entropy
+ * label was just added to the hash - no need to
+ * go any deeper either in the label stack or in the
+ * payload
+ */
+ if (eli_seen)
+ break;
+ } else if (dec.label == MPLS_LABEL_ENTROPY) {
+ eli_seen = true;
+ }
+
+ bos = dec.bos;
+ if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index +
+ sizeof(struct iphdr))) {
+ const struct iphdr *v4hdr;
+
+ v4hdr = (const struct iphdr *)(mpls_hdr(skb) +
+ label_index);
+ if (v4hdr->version == 4) {
+ hash = jhash_3words(ntohl(v4hdr->saddr),
+ ntohl(v4hdr->daddr),
+ v4hdr->protocol, hash);
+ } else if (v4hdr->version == 6 &&
+ pskb_may_pull(skb, sizeof(*hdr) * label_index +
+ sizeof(struct ipv6hdr))) {
+ const struct ipv6hdr *v6hdr;
+
+ v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) +
+ label_index);
+
+ hash = __ipv6_addr_jhash(&v6hdr->saddr, hash);
+ hash = __ipv6_addr_jhash(&v6hdr->daddr, hash);
+ hash = jhash_1word(v6hdr->nexthdr, hash);
+ }
+ }
+ }
+
+ nh_index = hash % rt->rt_nhn;
+out:
+ return &rt->rt_nh[nh_index];
+}
+
static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
struct mpls_entry_decoded dec)
{
@@ -159,6 +224,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
struct net *net = dev_net(dev);
struct mpls_shim_hdr *hdr;
struct mpls_route *rt;
+ struct mpls_nh *nh;
struct mpls_entry_decoded dec;
struct net_device *out_dev;
struct mpls_dev *mdev;
@@ -196,8 +262,12 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
if (!rt)
goto drop;
+ nh = mpls_select_multipath(rt, skb, dec.bos);
+ if (!nh)
+ goto drop;
+
/* Find the output device */
- out_dev = rcu_dereference(rt->rt_dev);
+ out_dev = rcu_dereference(nh->nh_dev);
if (!mpls_output_possible(out_dev))
goto drop;
@@ -212,7 +282,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
dec.ttl -= 1;
/* Verify the destination can hold the packet */
- new_header_size = mpls_rt_header_size(rt);
+ new_header_size = mpls_nh_header_size(nh);
mtu = mpls_dev_mtu(out_dev);
if (mpls_pkt_too_big(skb, mtu - new_header_size))
goto drop;
@@ -240,13 +310,14 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
/* Push the new labels */
hdr = mpls_hdr(skb);
bos = dec.bos;
- for (i = rt->rt_labels - 1; i >= 0; i--) {
- hdr[i] = mpls_entry_encode(rt->rt_label[i], dec.ttl, 0, bos);
+ for (i = nh->nh_labels - 1; i >= 0; i--) {
+ hdr[i] = mpls_entry_encode(nh->nh_label[i],
+ dec.ttl, 0, bos);
bos = false;
}
}
- err = neigh_xmit(rt->rt_via_table, out_dev, rt->rt_via, skb);
+ err = neigh_xmit(nh->nh_via_table, out_dev, mpls_nh_via(rt, nh), skb);
if (err)
net_dbg_ratelimited("%s: packet transmission failed: %d\n",
__func__, err);
@@ -270,24 +341,33 @@ static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
struct mpls_route_config {
u32 rc_protocol;
u32 rc_ifindex;
- u16 rc_via_table;
- u16 rc_via_alen;
+ u8 rc_via_table;
+ u8 rc_via_alen;
u8 rc_via[MAX_VIA_ALEN];
u32 rc_label;
- u32 rc_output_labels;
+ u8 rc_output_labels;
u32 rc_output_label[MAX_NEW_LABELS];
u32 rc_nlflags;
enum mpls_payload_type rc_payload_type;
struct nl_info rc_nlinfo;
+ struct rtnexthop *rc_mp;
+ int rc_mp_len;
};
-static struct mpls_route *mpls_rt_alloc(size_t alen)
+static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen)
{
+ u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN);
struct mpls_route *rt;
- rt = kzalloc(sizeof(*rt) + alen, GFP_KERNEL);
- if (rt)
- rt->rt_via_alen = alen;
+ rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh),
+ VIA_ALEN_ALIGN) +
+ num_nh * max_alen_aligned,
+ GFP_KERNEL);
+ if (rt) {
+ rt->rt_nhn = num_nh;
+ rt->rt_max_alen = max_alen_aligned;
+ }
+
return rt;
}
@@ -312,25 +392,22 @@ static void mpls_notify_route(struct net *net, unsigned index,
}
static void mpls_route_update(struct net *net, unsigned index,
- struct net_device *dev, struct mpls_route *new,
+ struct mpls_route *new,
const struct nl_info *info)
{
struct mpls_route __rcu **platform_label;
- struct mpls_route *rt, *old = NULL;
+ struct mpls_route *rt;
ASSERT_RTNL();
platform_label = rtnl_dereference(net->mpls.platform_label);
rt = rtnl_dereference(platform_label[index]);
- if (!dev || (rt && (rtnl_dereference(rt->rt_dev) == dev))) {
- rcu_assign_pointer(platform_label[index], new);
- old = rt;
- }
+ rcu_assign_pointer(platform_label[index], new);
- mpls_notify_route(net, index, old, new, info);
+ mpls_notify_route(net, index, rt, new, info);
/* If we removed a route free it now */
- mpls_rt_free(old);
+ mpls_rt_free(rt);
}
static unsigned find_free_label(struct net *net)
@@ -350,7 +427,8 @@ static unsigned find_free_label(struct net *net)
}
#if IS_ENABLED(CONFIG_INET)
-static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr)
+static struct net_device *inet_fib_lookup_dev(struct net *net,
+ const void *addr)
{
struct net_device *dev;
struct rtable *rt;
@@ -369,14 +447,16 @@ static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr)
return dev;
}
#else
-static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr)
+static struct net_device *inet_fib_lookup_dev(struct net *net,
+ const void *addr)
{
return ERR_PTR(-EAFNOSUPPORT);
}
#endif
#if IS_ENABLED(CONFIG_IPV6)
-static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
+static struct net_device *inet6_fib_lookup_dev(struct net *net,
+ const void *addr)
{
struct net_device *dev;
struct dst_entry *dst;
@@ -399,47 +479,234 @@ static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
return dev;
}
#else
-static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
+static struct net_device *inet6_fib_lookup_dev(struct net *net,
+ const void *addr)
{
return ERR_PTR(-EAFNOSUPPORT);
}
#endif
static struct net_device *find_outdev(struct net *net,
- struct mpls_route_config *cfg)
+ struct mpls_route *rt,
+ struct mpls_nh *nh, int oif)
{
struct net_device *dev = NULL;
- if (!cfg->rc_ifindex) {
- switch (cfg->rc_via_table) {
+ if (!oif) {
+ switch (nh->nh_via_table) {
case NEIGH_ARP_TABLE:
- dev = inet_fib_lookup_dev(net, cfg->rc_via);
+ dev = inet_fib_lookup_dev(net, mpls_nh_via(rt, nh));
break;
case NEIGH_ND_TABLE:
- dev = inet6_fib_lookup_dev(net, cfg->rc_via);
+ dev = inet6_fib_lookup_dev(net, mpls_nh_via(rt, nh));
break;
case NEIGH_LINK_TABLE:
break;
}
} else {
- dev = dev_get_by_index(net, cfg->rc_ifindex);
+ dev = dev_get_by_index(net, oif);
}
if (!dev)
return ERR_PTR(-ENODEV);
+ /* The caller is holding rtnl anyways, so release the dev reference */
+ dev_put(dev);
+
return dev;
}
+static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt,
+ struct mpls_nh *nh, int oif)
+{
+ struct net_device *dev = NULL;
+ int err = -ENODEV;
+
+ dev = find_outdev(net, rt, nh, oif);
+ if (IS_ERR(dev)) {
+ err = PTR_ERR(dev);
+ dev = NULL;
+ goto errout;
+ }
+
+ /* Ensure this is a supported device */
+ err = -EINVAL;
+ if (!mpls_dev_get(dev))
+ goto errout;
+
+ RCU_INIT_POINTER(nh->nh_dev, dev);
+
+ return 0;
+
+errout:
+ return err;
+}
+
+static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg,
+ struct mpls_route *rt)
+{
+ struct net *net = cfg->rc_nlinfo.nl_net;
+ struct mpls_nh *nh = rt->rt_nh;
+ int err;
+ int i;
+
+ if (!nh)
+ return -ENOMEM;
+
+ err = -EINVAL;
+ /* Ensure only a supported number of labels are present */
+ if (cfg->rc_output_labels > MAX_NEW_LABELS)
+ goto errout;
+
+ nh->nh_labels = cfg->rc_output_labels;
+ for (i = 0; i < nh->nh_labels; i++)
+ nh->nh_label[i] = cfg->rc_output_label[i];
+
+ nh->nh_via_table = cfg->rc_via_table;
+ memcpy(__mpls_nh_via(rt, nh), cfg->rc_via, cfg->rc_via_alen);
+ nh->nh_via_alen = cfg->rc_via_alen;
+
+ err = mpls_nh_assign_dev(net, rt, nh, cfg->rc_ifindex);
+ if (err)
+ goto errout;
+
+ return 0;
+
+errout:
+ return err;
+}
+
+static int mpls_nh_build(struct net *net, struct mpls_route *rt,
+ struct mpls_nh *nh, int oif,
+ struct nlattr *via, struct nlattr *newdst)
+{
+ int err = -ENOMEM;
+
+ if (!nh)
+ goto errout;
+
+ if (newdst) {
+ err = nla_get_labels(newdst, MAX_NEW_LABELS,
+ &nh->nh_labels, nh->nh_label);
+ if (err)
+ goto errout;
+ }
+
+ err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table,
+ __mpls_nh_via(rt, nh));
+ if (err)
+ goto errout;
+
+ err = mpls_nh_assign_dev(net, rt, nh, oif);
+ if (err)
+ goto errout;
+
+ return 0;
+
+errout:
+ return err;
+}
+
+static int mpls_count_nexthops(struct rtnexthop *rtnh, int len,
+ u8 cfg_via_alen, u8 *max_via_alen)
+{
+ int nhs = 0;
+ int remaining = len;
+
+ if (!rtnh) {
+ *max_via_alen = cfg_via_alen;
+ return 1;
+ }
+
+ *max_via_alen = 0;
+
+ while (rtnh_ok(rtnh, remaining)) {
+ struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+ int attrlen;
+
+ attrlen = rtnh_attrlen(rtnh);
+ nla = nla_find(attrs, attrlen, RTA_VIA);
+ if (nla && nla_len(nla) >=
+ offsetof(struct rtvia, rtvia_addr)) {
+ int via_alen = nla_len(nla) -
+ offsetof(struct rtvia, rtvia_addr);
+
+ if (via_alen <= MAX_VIA_ALEN)
+ *max_via_alen = max_t(u16, *max_via_alen,
+ via_alen);
+ }
+
+ nhs++;
+ rtnh = rtnh_next(rtnh, &remaining);
+ }
+
+ /* leftover implies invalid nexthop configuration, discard it */
+ return remaining > 0 ? 0 : nhs;
+}
+
+static int mpls_nh_build_multi(struct mpls_route_config *cfg,
+ struct mpls_route *rt)
+{
+ struct rtnexthop *rtnh = cfg->rc_mp;
+ struct nlattr *nla_via, *nla_newdst;
+ int remaining = cfg->rc_mp_len;
+ int nhs = 0;
+ int err = 0;
+
+ change_nexthops(rt) {
+ int attrlen;
+
+ nla_via = NULL;
+ nla_newdst = NULL;
+
+ err = -EINVAL;
+ if (!rtnh_ok(rtnh, remaining))
+ goto errout;
+
+ /* neither weighted multipath nor any flags
+ * are supported
+ */
+ if (rtnh->rtnh_hops || rtnh->rtnh_flags)
+ goto errout;
+
+ attrlen = rtnh_attrlen(rtnh);
+ if (attrlen > 0) {
+ struct nlattr *attrs = rtnh_attrs(rtnh);
+
+ nla_via = nla_find(attrs, attrlen, RTA_VIA);
+ nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST);
+ }
+
+ if (!nla_via)
+ goto errout;
+
+ err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh,
+ rtnh->rtnh_ifindex, nla_via,
+ nla_newdst);
+ if (err)
+ goto errout;
+
+ rtnh = rtnh_next(rtnh, &remaining);
+ nhs++;
+ } endfor_nexthops(rt);
+
+ rt->rt_nhn = nhs;
+
+ return 0;
+
+errout:
+ return err;
+}
+
static int mpls_route_add(struct mpls_route_config *cfg)
{
struct mpls_route __rcu **platform_label;
struct net *net = cfg->rc_nlinfo.nl_net;
- struct net_device *dev = NULL;
struct mpls_route *rt, *old;
- unsigned index;
- int i;
int err = -EINVAL;
+ u8 max_via_alen;
+ unsigned index;
+ int nhs;
index = cfg->rc_label;
@@ -457,27 +724,6 @@ static int mpls_route_add(struct mpls_route_config *cfg)
if (index >= net->mpls.platform_labels)
goto errout;
- /* Ensure only a supported number of labels are present */
- if (cfg->rc_output_labels > MAX_NEW_LABELS)
- goto errout;
-
- dev = find_outdev(net, cfg);
- if (IS_ERR(dev)) {
- err = PTR_ERR(dev);
- dev = NULL;
- goto errout;
- }
-
- /* Ensure this is a supported device */
- err = -EINVAL;
- if (!mpls_dev_get(dev))
- goto errout;
-
- err = -EINVAL;
- if ((cfg->rc_via_table == NEIGH_LINK_TABLE) &&
- (dev->addr_len != cfg->rc_via_alen))
- goto errout;
-
/* Append makes no sense with mpls */
err = -EOPNOTSUPP;
if (cfg->rc_nlflags & NLM_F_APPEND)
@@ -497,28 +743,34 @@ static int mpls_route_add(struct mpls_route_config *cfg)
if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old)
goto errout;
+ err = -EINVAL;
+ nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len,
+ cfg->rc_via_alen, &max_via_alen);
+ if (nhs == 0)
+ goto errout;
+
err = -ENOMEM;
- rt = mpls_rt_alloc(cfg->rc_via_alen);
+ rt = mpls_rt_alloc(nhs, max_via_alen);
if (!rt)
goto errout;
- rt->rt_labels = cfg->rc_output_labels;
- for (i = 0; i < rt->rt_labels; i++)
- rt->rt_label[i] = cfg->rc_output_label[i];
rt->rt_protocol = cfg->rc_protocol;
- RCU_INIT_POINTER(rt->rt_dev, dev);
rt->rt_payload_type = cfg->rc_payload_type;
- rt->rt_via_table = cfg->rc_via_table;
- memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen);
- mpls_route_update(net, index, NULL, rt, &cfg->rc_nlinfo);
+ if (cfg->rc_mp)
+ err = mpls_nh_build_multi(cfg, rt);
+ else
+ err = mpls_nh_build_from_cfg(cfg, rt);
+ if (err)
+ goto freert;
+
+ mpls_route_update(net, index, rt, &cfg->rc_nlinfo);
- dev_put(dev);
return 0;
+freert:
+ mpls_rt_free(rt);
errout:
- if (dev)
- dev_put(dev);
return err;
}
@@ -538,7 +790,7 @@ static int mpls_route_del(struct mpls_route_config *cfg)
if (index >= net->mpls.platform_labels)
goto errout;
- mpls_route_update(net, index, NULL, NULL, &cfg->rc_nlinfo);
+ mpls_route_update(net, index, NULL, &cfg->rc_nlinfo);
err = 0;
errout:
@@ -635,9 +887,11 @@ static void mpls_ifdown(struct net_device *dev)
struct mpls_route *rt = rtnl_dereference(platform_label[index]);
if (!rt)
continue;
- if (rtnl_dereference(rt->rt_dev) != dev)
- continue;
- rt->rt_dev = NULL;
+ for_nexthops(rt) {
+ if (rtnl_dereference(nh->nh_dev) != dev)
+ continue;
+ nh->nh_dev = NULL;
+ } endfor_nexthops(rt);
}
mdev = mpls_dev_get(dev);
@@ -736,7 +990,7 @@ int nla_put_labels(struct sk_buff *skb, int attrtype,
EXPORT_SYMBOL_GPL(nla_put_labels);
int nla_get_labels(const struct nlattr *nla,
- u32 max_labels, u32 *labels, u32 label[])
+ u32 max_labels, u8 *labels, u32 label[])
{
unsigned len = nla_len(nla);
unsigned nla_labels;
@@ -781,6 +1035,48 @@ int nla_get_labels(const struct nlattr *nla,
}
EXPORT_SYMBOL_GPL(nla_get_labels);
+int nla_get_via(const struct nlattr *nla, u8 *via_alen,
+ u8 *via_table, u8 via_addr[])
+{
+ struct rtvia *via = nla_data(nla);
+ int err = -EINVAL;
+ int alen;
+
+ if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr))
+ goto errout;
+ alen = nla_len(nla) -
+ offsetof(struct rtvia, rtvia_addr);
+ if (alen > MAX_VIA_ALEN)
+ goto errout;
+
+ /* Validate the address family */
+ switch (via->rtvia_family) {
+ case AF_PACKET:
+ *via_table = NEIGH_LINK_TABLE;
+ break;
+ case AF_INET:
+ *via_table = NEIGH_ARP_TABLE;
+ if (alen != 4)
+ goto errout;
+ break;
+ case AF_INET6:
+ *via_table = NEIGH_ND_TABLE;
+ if (alen != 16)
+ goto errout;
+ break;
+ default:
+ /* Unsupported address family */
+ goto errout;
+ }
+
+ memcpy(via_addr, via->rtvia_addr, alen);
+ *via_alen = alen;
+ err = 0;
+
+errout:
+ return err;
+}
+
static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
struct mpls_route_config *cfg)
{
@@ -844,7 +1140,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
break;
case RTA_DST:
{
- u32 label_count;
+ u8 label_count;
if (nla_get_labels(nla, 1, &label_count,
&cfg->rc_label))
goto errout;
@@ -857,35 +1153,15 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
}
case RTA_VIA:
{
- struct rtvia *via = nla_data(nla);
- if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr))
- goto errout;
- cfg->rc_via_alen = nla_len(nla) -
- offsetof(struct rtvia, rtvia_addr);
- if (cfg->rc_via_alen > MAX_VIA_ALEN)
+ if (nla_get_via(nla, &cfg->rc_via_alen,
+ &cfg->rc_via_table, cfg->rc_via))
goto errout;
-
- /* Validate the address family */
- switch(via->rtvia_family) {
- case AF_PACKET:
- cfg->rc_via_table = NEIGH_LINK_TABLE;
- break;
- case AF_INET:
- cfg->rc_via_table = NEIGH_ARP_TABLE;
- if (cfg->rc_via_alen != 4)
- goto errout;
- break;
- case AF_INET6:
- cfg->rc_via_table = NEIGH_ND_TABLE;
- if (cfg->rc_via_alen != 16)
- goto errout;
- break;
- default:
- /* Unsupported address family */
- goto errout;
- }
-
- memcpy(cfg->rc_via, via->rtvia_addr, cfg->rc_via_alen);
+ break;
+ }
+ case RTA_MULTIPATH:
+ {
+ cfg->rc_mp = nla_data(nla);
+ cfg->rc_mp_len = nla_len(nla);
break;
}
default:
@@ -946,16 +1222,52 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
rtm->rtm_type = RTN_UNICAST;
rtm->rtm_flags = 0;
- if (rt->rt_labels &&
- nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label))
- goto nla_put_failure;
- if (nla_put_via(skb, rt->rt_via_table, rt->rt_via, rt->rt_via_alen))
- goto nla_put_failure;
- dev = rtnl_dereference(rt->rt_dev);
- if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
- goto nla_put_failure;
if (nla_put_labels(skb, RTA_DST, 1, &label))
goto nla_put_failure;
+ if (rt->rt_nhn == 1) {
+ const struct mpls_nh *nh = rt->rt_nh;
+
+ if (nh->nh_labels &&
+ nla_put_labels(skb, RTA_NEWDST, nh->nh_labels,
+ nh->nh_label))
+ goto nla_put_failure;
+ if (nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh),
+ nh->nh_via_alen))
+ goto nla_put_failure;
+ dev = rtnl_dereference(nh->nh_dev);
+ if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
+ goto nla_put_failure;
+ } else {
+ struct rtnexthop *rtnh;
+ struct nlattr *mp;
+
+ mp = nla_nest_start(skb, RTA_MULTIPATH);
+ if (!mp)
+ goto nla_put_failure;
+
+ for_nexthops(rt) {
+ rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
+ if (!rtnh)
+ goto nla_put_failure;
+
+ dev = rtnl_dereference(nh->nh_dev);
+ if (dev)
+ rtnh->rtnh_ifindex = dev->ifindex;
+ if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST,
+ nh->nh_labels,
+ nh->nh_label))
+ goto nla_put_failure;
+ if (nla_put_via(skb, nh->nh_via_table,
+ mpls_nh_via(rt, nh),
+ nh->nh_via_alen))
+ goto nla_put_failure;
+
+ /* length of rtnetlink header + attributes */
+ rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
+ } endfor_nexthops(rt);
+
+ nla_nest_end(skb, mp);
+ }
nlmsg_end(skb, nlh);
return 0;
@@ -1000,12 +1312,30 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
{
size_t payload =
NLMSG_ALIGN(sizeof(struct rtmsg))
- + nla_total_size(2 + rt->rt_via_alen) /* RTA_VIA */
+ nla_total_size(4); /* RTA_DST */
- if (rt->rt_labels) /* RTA_NEWDST */
- payload += nla_total_size(rt->rt_labels * 4);
- if (rt->rt_dev) /* RTA_OIF */
- payload += nla_total_size(4);
+
+ if (rt->rt_nhn == 1) {
+ struct mpls_nh *nh = rt->rt_nh;
+
+ if (nh->nh_dev)
+ payload += nla_total_size(4); /* RTA_OIF */
+ payload += nla_total_size(2 + nh->nh_via_alen); /* RTA_VIA */
+ if (nh->nh_labels) /* RTA_NEWDST */
+ payload += nla_total_size(nh->nh_labels * 4);
+ } else {
+ /* each nexthop is packed in an attribute */
+ size_t nhsize = 0;
+
+ for_nexthops(rt) {
+ nhsize += nla_total_size(sizeof(struct rtnexthop));
+ nhsize += nla_total_size(2 + nh->nh_via_alen);
+ if (nh->nh_labels)
+ nhsize += nla_total_size(nh->nh_labels * 4);
+ } endfor_nexthops(rt);
+ /* nested attribute */
+ payload += nla_total_size(nhsize);
+ }
+
return payload;
}
@@ -1057,25 +1387,29 @@ static int resize_platform_label_table(struct net *net, size_t limit)
/* In case the predefined labels need to be populated */
if (limit > MPLS_LABEL_IPV4NULL) {
struct net_device *lo = net->loopback_dev;
- rt0 = mpls_rt_alloc(lo->addr_len);
+ rt0 = mpls_rt_alloc(1, lo->addr_len);
if (!rt0)
goto nort0;
- RCU_INIT_POINTER(rt0->rt_dev, lo);
+ RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo);
rt0->rt_protocol = RTPROT_KERNEL;
rt0->rt_payload_type = MPT_IPV4;
- rt0->rt_via_table = NEIGH_LINK_TABLE;
- memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len);
+ rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
+ rt0->rt_nh->nh_via_alen = lo->addr_len;
+ memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr,
+ lo->addr_len);
}
if (limit > MPLS_LABEL_IPV6NULL) {
struct net_device *lo = net->loopback_dev;
- rt2 = mpls_rt_alloc(lo->addr_len);
+ rt2 = mpls_rt_alloc(1, lo->addr_len);
if (!rt2)
goto nort2;
- RCU_INIT_POINTER(rt2->rt_dev, lo);
+ RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo);
rt2->rt_protocol = RTPROT_KERNEL;
rt2->rt_payload_type = MPT_IPV6;
- rt2->rt_via_table = NEIGH_LINK_TABLE;
- memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len);
+ rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
+ rt2->rt_nh->nh_via_alen = lo->addr_len;
+ memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr,
+ lo->addr_len);
}
rtnl_lock();
@@ -1085,7 +1419,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
/* Free any labels beyond the new table */
for (index = limit; index < old_limit; index++)
- mpls_route_update(net, index, NULL, NULL, NULL);
+ mpls_route_update(net, index, NULL, NULL);
/* Copy over the old labels */
cp_size = size;
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 2681a4b..bde52ce 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -21,6 +21,76 @@ struct mpls_dev {
struct sk_buff;
+#define LABEL_NOT_SPECIFIED (1 << 20)
+#define MAX_NEW_LABELS 2
+
+/* This maximum ha length copied from the definition of struct neighbour */
+#define VIA_ALEN_ALIGN sizeof(unsigned long)
+#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, VIA_ALEN_ALIGN))
+
+enum mpls_payload_type {
+ MPT_UNSPEC, /* IPv4 or IPv6 */
+ MPT_IPV4 = 4,
+ MPT_IPV6 = 6,
+
+ /* Other types not implemented:
+ * - Pseudo-wire with or without control word (RFC4385)
+ * - GAL (RFC5586)
+ */
+};
+
+struct mpls_nh { /* next hop label forwarding entry */
+ struct net_device __rcu *nh_dev;
+ u32 nh_label[MAX_NEW_LABELS];
+ u8 nh_labels;
+ u8 nh_via_alen;
+ u8 nh_via_table;
+};
+
+/* The route, nexthops and vias are stored together in the same memory
+ * block:
+ *
+ * +----------------------+
+ * | mpls_route |
+ * +----------------------+
+ * | mpls_nh 0 |
+ * +----------------------+
+ * | ... |
+ * +----------------------+
+ * | mpls_nh n-1 |
+ * +----------------------+
+ * | alignment padding |
+ * +----------------------+
+ * | via[rt_max_alen] 0 |
+ * +----------------------+
+ * | ... |
+ * +----------------------+
+ * | via[rt_max_alen] n-1 |
+ * +----------------------+
+ */
+struct mpls_route { /* next hop label forwarding entry */
+ struct rcu_head rt_rcu;
+ u8 rt_protocol;
+ u8 rt_payload_type;
+ u8 rt_max_alen;
+ unsigned int rt_nhn;
+ struct mpls_nh rt_nh[0];
+};
+
+#define for_nexthops(rt) { \
+ int nhsel; struct mpls_nh *nh; \
+ for (nhsel = 0, nh = (rt)->rt_nh; \
+ nhsel < (rt)->rt_nhn; \
+ nh++, nhsel++)
+
+#define change_nexthops(rt) { \
+ int nhsel; struct mpls_nh *nh; \
+ for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh); \
+ nhsel < (rt)->rt_nhn; \
+ nh++, nhsel++)
+
+#define endfor_nexthops(rt) }
+
static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
{
return (struct mpls_shim_hdr *)skb_network_header(skb);
@@ -52,8 +122,10 @@ static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *
int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels,
const u32 label[]);
-int nla_get_labels(const struct nlattr *nla, u32 max_labels, u32 *labels,
+int nla_get_labels(const struct nlattr *nla, u32 max_labels, u8 *labels,
u32 label[]);
+int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table,
+ u8 via[]);
bool mpls_output_possible(const struct net_device *dev);
unsigned int mpls_dev_mtu(const struct net_device *dev);
bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 09e661c..f39276d 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -152,6 +152,8 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
#endif
synchronize_net();
nf_queue_nf_hook_drop(net, &entry->ops);
+ /* other cpu might still process nfqueue verdict that used reg */
+ synchronize_net();
kfree(entry);
}
EXPORT_SYMBOL(nf_unregister_net_hook);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index a1fe537..5a30ce6 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -297,7 +297,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ip_set_timeout_expired(ext_timeout(n, set))))
n = NULL;
- e = kzalloc(set->dsize, GFP_KERNEL);
+ e = kzalloc(set->dsize, GFP_ATOMIC);
if (!e)
return -ENOMEM;
e->id = d->id;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0a49a8c..fafe33b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2371,7 +2371,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
int pos, idx, shift;
err = 0;
- netlink_table_grab();
+ netlink_lock_table();
for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
if (len - pos < sizeof(u32))
break;
@@ -2386,7 +2386,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
}
if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))
err = -EFAULT;
- netlink_table_ungrab();
+ netlink_unlock_table();
break;
}
case NETLINK_CAP_ACK:
diff --git a/net/nfc/core.c b/net/nfc/core.c
index cff3f16..1fe3d3b 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -449,7 +449,7 @@ error:
* @dev: The nfc device that found the target
* @target_idx: index of the target that must be deactivated
*/
-int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx)
+int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode)
{
int rc = 0;
@@ -476,7 +476,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx)
if (dev->ops->check_presence)
del_timer_sync(&dev->check_pres_timer);
- dev->ops->deactivate_target(dev, dev->active_target);
+ dev->ops->deactivate_target(dev, dev->active_target, mode);
dev->active_target = NULL;
error:
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index 009bcf3..23c2a11 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -631,7 +631,8 @@ static int digital_activate_target(struct nfc_dev *nfc_dev,
}
static void digital_deactivate_target(struct nfc_dev *nfc_dev,
- struct nfc_target *target)
+ struct nfc_target *target,
+ u8 mode)
{
struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev);
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 6e061da..2b0f0ac 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -678,7 +678,8 @@ static int hci_activate_target(struct nfc_dev *nfc_dev,
}
static void hci_deactivate_target(struct nfc_dev *nfc_dev,
- struct nfc_target *target)
+ struct nfc_target *target,
+ u8 mode)
{
}
diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c
index 1b90c05..1399a03 100644
--- a/net/nfc/hci/llc.c
+++ b/net/nfc/hci/llc.c
@@ -144,11 +144,13 @@ inline int nfc_llc_start(struct nfc_llc *llc)
{
return llc->ops->start(llc);
}
+EXPORT_SYMBOL(nfc_llc_start);
inline int nfc_llc_stop(struct nfc_llc *llc)
{
return llc->ops->stop(llc);
}
+EXPORT_SYMBOL(nfc_llc_stop);
inline void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb)
{
diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig
index 901c1dd..85d4819 100644
--- a/net/nfc/nci/Kconfig
+++ b/net/nfc/nci/Kconfig
@@ -12,7 +12,7 @@ config NFC_NCI
config NFC_NCI_SPI
depends on NFC_NCI && SPI
select CRC_CCITT
- bool "NCI over SPI protocol support"
+ tristate "NCI over SPI protocol support"
default n
help
NCI (NFC Controller Interface) is a communication protocol between
diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile
index b4b85b8..0ca31d9 100644
--- a/net/nfc/nci/Makefile
+++ b/net/nfc/nci/Makefile
@@ -6,7 +6,8 @@ obj-$(CONFIG_NFC_NCI) += nci.o
nci-objs := core.o data.o lib.o ntf.o rsp.o hci.o
-nci-$(CONFIG_NFC_NCI_SPI) += spi.o
+nci_spi-y += spi.o
+obj-$(CONFIG_NFC_NCI_SPI) += nci_spi.o
nci_uart-y += uart.o
obj-$(CONFIG_NFC_NCI_UART) += nci_uart.o
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 943889b..10c99a5 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -64,6 +64,19 @@ struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev,
return NULL;
}
+int nci_get_conn_info_by_id(struct nci_dev *ndev, u8 id)
+{
+ struct nci_conn_info *conn_info;
+
+ list_for_each_entry(conn_info, &ndev->conn_info_list, list) {
+ if (conn_info->id == id)
+ return conn_info->conn_id;
+ }
+
+ return -EINVAL;
+}
+EXPORT_SYMBOL(nci_get_conn_info_by_id);
+
/* ---- NCI requests ---- */
void nci_req_complete(struct nci_dev *ndev, int result)
@@ -325,32 +338,46 @@ static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt)
sizeof(struct nci_rf_deactivate_cmd), &cmd);
}
-struct nci_prop_cmd_param {
+struct nci_cmd_param {
__u16 opcode;
size_t len;
__u8 *payload;
};
-static void nci_prop_cmd_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_generic_req(struct nci_dev *ndev, unsigned long opt)
{
- struct nci_prop_cmd_param *param = (struct nci_prop_cmd_param *)opt;
+ struct nci_cmd_param *param =
+ (struct nci_cmd_param *)opt;
nci_send_cmd(ndev, param->opcode, param->len, param->payload);
}
int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload)
{
- struct nci_prop_cmd_param param;
+ struct nci_cmd_param param;
param.opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, oid);
param.len = len;
param.payload = payload;
- return __nci_request(ndev, nci_prop_cmd_req, (unsigned long)&param,
+ return __nci_request(ndev, nci_generic_req, (unsigned long)&param,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_prop_cmd);
+int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload)
+{
+ struct nci_cmd_param param;
+
+ param.opcode = opcode;
+ param.len = len;
+ param.payload = payload;
+
+ return __nci_request(ndev, nci_generic_req, (unsigned long)&param,
+ msecs_to_jiffies(NCI_CMD_TIMEOUT));
+}
+EXPORT_SYMBOL(nci_core_cmd);
+
int nci_core_reset(struct nci_dev *ndev)
{
return __nci_request(ndev, nci_reset_req, 0,
@@ -402,9 +429,8 @@ static int nci_open_device(struct nci_dev *ndev)
msecs_to_jiffies(NCI_INIT_TIMEOUT));
}
- if (ndev->ops->post_setup) {
+ if (!rc && ndev->ops->post_setup)
rc = ndev->ops->post_setup(ndev);
- }
if (!rc) {
rc = __nci_request(ndev, nci_init_complete_req, 0,
@@ -540,7 +566,7 @@ static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt)
int nci_nfcee_discover(struct nci_dev *ndev, u8 action)
{
- return nci_request(ndev, nci_nfcee_discover_req, action,
+ return __nci_request(ndev, nci_nfcee_discover_req, action,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_nfcee_discover);
@@ -561,8 +587,9 @@ int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode)
cmd.nfcee_id = nfcee_id;
cmd.nfcee_mode = nfcee_mode;
- return nci_request(ndev, nci_nfcee_mode_set_req, (unsigned long)&cmd,
- msecs_to_jiffies(NCI_CMD_TIMEOUT));
+ return __nci_request(ndev, nci_nfcee_mode_set_req,
+ (unsigned long)&cmd,
+ msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_nfcee_mode_set);
@@ -588,12 +615,19 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
if (!cmd)
return -ENOMEM;
+ if (!number_destination_params)
+ return -EINVAL;
+
cmd->destination_type = destination_type;
cmd->number_destination_params = number_destination_params;
memcpy(cmd->params, params, params_len);
data.cmd = cmd;
- ndev->cur_id = params->value[DEST_SPEC_PARAMS_ID_INDEX];
+
+ if (params->length > 0)
+ ndev->cur_id = params->value[DEST_SPEC_PARAMS_ID_INDEX];
+ else
+ ndev->cur_id = 0;
r = __nci_request(ndev, nci_core_conn_create_req,
(unsigned long)&data,
@@ -612,8 +646,8 @@ static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt)
int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id)
{
- return nci_request(ndev, nci_core_conn_close_req, conn_id,
- msecs_to_jiffies(NCI_CMD_TIMEOUT));
+ return __nci_request(ndev, nci_core_conn_close_req, conn_id,
+ msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_conn_close);
@@ -801,9 +835,11 @@ static int nci_activate_target(struct nfc_dev *nfc_dev,
}
static void nci_deactivate_target(struct nfc_dev *nfc_dev,
- struct nfc_target *target)
+ struct nfc_target *target,
+ __u8 mode)
{
struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
+ u8 nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;
pr_debug("entry\n");
@@ -814,9 +850,14 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
ndev->target_active_prot = 0;
+ switch (mode) {
+ case NFC_TARGET_MODE_SLEEP:
+ nci_mode = NCI_DEACTIVATE_TYPE_SLEEP_MODE;
+ break;
+ }
+
if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) {
- nci_request(ndev, nci_rf_deactivate_req,
- NCI_DEACTIVATE_TYPE_IDLE_MODE,
+ nci_request(ndev, nci_rf_deactivate_req, nci_mode,
msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
}
}
@@ -850,7 +891,7 @@ static int nci_dep_link_down(struct nfc_dev *nfc_dev)
pr_debug("entry\n");
if (nfc_dev->rf_mode == NFC_RF_INITIATOR) {
- nci_deactivate_target(nfc_dev, NULL);
+ nci_deactivate_target(nfc_dev, NULL, NCI_DEACTIVATE_TYPE_IDLE_MODE);
} else {
if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE ||
atomic_read(&ndev->state) == NCI_DISCOVERY) {
@@ -1177,7 +1218,7 @@ int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb)
}
EXPORT_SYMBOL(nci_recv_frame);
-static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb)
+int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb)
{
pr_debug("len %d\n", skb->len);
@@ -1195,6 +1236,7 @@ static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb)
return ndev->ops->send(ndev, skb);
}
+EXPORT_SYMBOL(nci_send_frame);
/* Send NCI command */
int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
@@ -1226,48 +1268,80 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
return 0;
}
+EXPORT_SYMBOL(nci_send_cmd);
/* Proprietary commands API */
-static struct nci_prop_ops *prop_cmd_lookup(struct nci_dev *ndev,
- __u16 opcode)
+static struct nci_driver_ops *ops_cmd_lookup(struct nci_driver_ops *ops,
+ size_t n_ops,
+ __u16 opcode)
{
size_t i;
- struct nci_prop_ops *prop_op;
+ struct nci_driver_ops *op;
- if (!ndev->ops->prop_ops || !ndev->ops->n_prop_ops)
+ if (!ops || !n_ops)
return NULL;
- for (i = 0; i < ndev->ops->n_prop_ops; i++) {
- prop_op = &ndev->ops->prop_ops[i];
- if (prop_op->opcode == opcode)
- return prop_op;
+ for (i = 0; i < n_ops; i++) {
+ op = &ops[i];
+ if (op->opcode == opcode)
+ return op;
}
return NULL;
}
-int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode,
- struct sk_buff *skb)
+static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode,
+ struct sk_buff *skb, struct nci_driver_ops *ops,
+ size_t n_ops)
{
- struct nci_prop_ops *prop_op;
+ struct nci_driver_ops *op;
- prop_op = prop_cmd_lookup(ndev, rsp_opcode);
- if (!prop_op || !prop_op->rsp)
+ op = ops_cmd_lookup(ops, n_ops, rsp_opcode);
+ if (!op || !op->rsp)
return -ENOTSUPP;
- return prop_op->rsp(ndev, skb);
+ return op->rsp(ndev, skb);
}
-int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode,
- struct sk_buff *skb)
+static int nci_op_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode,
+ struct sk_buff *skb, struct nci_driver_ops *ops,
+ size_t n_ops)
{
- struct nci_prop_ops *prop_op;
+ struct nci_driver_ops *op;
- prop_op = prop_cmd_lookup(ndev, ntf_opcode);
- if (!prop_op || !prop_op->ntf)
+ op = ops_cmd_lookup(ops, n_ops, ntf_opcode);
+ if (!op || !op->ntf)
return -ENOTSUPP;
- return prop_op->ntf(ndev, skb);
+ return op->ntf(ndev, skb);
+}
+
+int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 opcode,
+ struct sk_buff *skb)
+{
+ return nci_op_rsp_packet(ndev, opcode, skb, ndev->ops->prop_ops,
+ ndev->ops->n_prop_ops);
+}
+
+int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 opcode,
+ struct sk_buff *skb)
+{
+ return nci_op_ntf_packet(ndev, opcode, skb, ndev->ops->prop_ops,
+ ndev->ops->n_prop_ops);
+}
+
+int nci_core_rsp_packet(struct nci_dev *ndev, __u16 opcode,
+ struct sk_buff *skb)
+{
+ return nci_op_rsp_packet(ndev, opcode, skb, ndev->ops->core_ops,
+ ndev->ops->n_core_ops);
+}
+
+int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode,
+ struct sk_buff *skb)
+{
+ return nci_op_ntf_packet(ndev, opcode, skb, ndev->ops->core_ops,
+ ndev->ops->n_core_ops);
}
/* ---- NCI TX Data worker thread ---- */
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index 566466d..dbd2425 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -90,6 +90,18 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev,
nci_pbf_set((__u8 *)hdr, pbf);
}
+int nci_conn_max_data_pkt_payload_size(struct nci_dev *ndev, __u8 conn_id)
+{
+ struct nci_conn_info *conn_info;
+
+ conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
+ if (!conn_info)
+ return -EPROTO;
+
+ return conn_info->max_pkt_payload_len;
+}
+EXPORT_SYMBOL(nci_conn_max_data_pkt_payload_size);
+
static int nci_queue_tx_data_frags(struct nci_dev *ndev,
__u8 conn_id,
struct sk_buff *skb) {
@@ -203,6 +215,7 @@ free_exit:
exit:
return rc;
}
+EXPORT_SYMBOL(nci_send_data);
/* ----------------- NCI RX Data ----------------- */
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 609f922..2aedac1 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -70,6 +70,7 @@ struct nci_hcp_packet {
#define NCI_HCI_ANY_SET_PARAMETER 0x01
#define NCI_HCI_ANY_GET_PARAMETER 0x02
#define NCI_HCI_ANY_CLOSE_PIPE 0x04
+#define NCI_HCI_ADM_CLEAR_ALL_PIPE 0x14
#define NCI_HFP_NO_CHAINING 0x80
@@ -78,6 +79,8 @@ struct nci_hcp_packet {
#define NCI_EVT_HOT_PLUG 0x03
#define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY 0x01
+#define NCI_HCI_ADM_CREATE_PIPE 0x10
+#define NCI_HCI_ADM_DELETE_PIPE 0x11
/* HCP headers */
#define NCI_HCI_HCP_PACKET_HEADER_LEN 1
@@ -101,6 +104,20 @@ struct nci_hcp_packet {
#define NCI_HCP_MSG_GET_CMD(header) (header & 0x3f)
#define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f)
+static int nci_hci_result_to_errno(u8 result)
+{
+ switch (result) {
+ case NCI_HCI_ANY_OK:
+ return 0;
+ case NCI_HCI_ANY_E_REG_PAR_UNKNOWN:
+ return -EOPNOTSUPP;
+ case NCI_HCI_ANY_E_TIMEOUT:
+ return -ETIME;
+ default:
+ return -1;
+ }
+}
+
/* HCI core */
static void nci_hci_reset_pipes(struct nci_hci_dev *hdev)
{
@@ -146,18 +163,18 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
if (!conn_info)
return -EPROTO;
- skb = nci_skb_alloc(ndev, 2 + conn_info->max_pkt_payload_len +
+ i = 0;
+ skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len +
NCI_DATA_HDR_SIZE, GFP_KERNEL);
if (!skb)
return -ENOMEM;
- skb_reserve(skb, 2 + NCI_DATA_HDR_SIZE);
+ skb_reserve(skb, NCI_DATA_HDR_SIZE + 2);
*skb_push(skb, 1) = data_type;
- i = 0;
- len = conn_info->max_pkt_payload_len;
-
do {
+ len = conn_info->max_pkt_payload_len;
+
/* If last packet add NCI_HFP_NO_CHAINING */
if (i + conn_info->max_pkt_payload_len -
(skb->len + 1) >= data_len) {
@@ -177,9 +194,15 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
return r;
i += len;
+
if (i < data_len) {
- skb_trim(skb, 0);
- skb_pull(skb, len);
+ skb = nci_skb_alloc(ndev,
+ conn_info->max_pkt_payload_len +
+ NCI_DATA_HDR_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ skb_reserve(skb, NCI_DATA_HDR_SIZE + 1);
}
} while (i < data_len);
@@ -212,7 +235,8 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
const u8 *param, size_t param_len,
struct sk_buff **skb)
{
- struct nci_conn_info *conn_info;
+ struct nci_hcp_message *message;
+ struct nci_conn_info *conn_info;
struct nci_data data;
int r;
u8 pipe = ndev->hci_dev->gate2pipe[gate];
@@ -232,14 +256,34 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
msecs_to_jiffies(NCI_DATA_TIMEOUT));
-
- if (r == NCI_STATUS_OK && skb)
- *skb = conn_info->rx_skb;
+ if (r == NCI_STATUS_OK) {
+ message = (struct nci_hcp_message *)conn_info->rx_skb->data;
+ r = nci_hci_result_to_errno(
+ NCI_HCP_MSG_GET_CMD(message->header));
+ skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);
+
+ if (!r && skb)
+ *skb = conn_info->rx_skb;
+ }
return r;
}
EXPORT_SYMBOL(nci_hci_send_cmd);
+int nci_hci_clear_all_pipes(struct nci_dev *ndev)
+{
+ int r;
+
+ r = nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE,
+ NCI_HCI_ADM_CLEAR_ALL_PIPE, NULL, 0, NULL);
+ if (r < 0)
+ return r;
+
+ nci_hci_reset_pipes(ndev->hci_dev);
+ return r;
+}
+EXPORT_SYMBOL(nci_hci_clear_all_pipes);
+
static void nci_hci_event_received(struct nci_dev *ndev, u8 pipe,
u8 event, struct sk_buff *skb)
{
@@ -328,9 +372,6 @@ static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe,
struct nci_conn_info *conn_info;
u8 status = result;
- if (result != NCI_HCI_ANY_OK)
- goto exit;
-
conn_info = ndev->hci_dev->conn_info;
if (!conn_info) {
status = NCI_STATUS_REJECTED;
@@ -340,7 +381,7 @@ static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe,
conn_info->rx_skb = skb;
exit:
- nci_req_complete(ndev, status);
+ nci_req_complete(ndev, NCI_STATUS_OK);
}
/* Receive hcp message for pipe, with type and cmd.
@@ -366,7 +407,7 @@ static void nci_hci_hcp_message_rx(struct nci_dev *ndev, u8 pipe,
break;
}
- nci_req_complete(ndev, 0);
+ nci_req_complete(ndev, NCI_STATUS_OK);
}
static void nci_hci_msg_rx_work(struct work_struct *work)
@@ -378,7 +419,7 @@ static void nci_hci_msg_rx_work(struct work_struct *work)
u8 pipe, type, instruction;
while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) {
- pipe = skb->data[0];
+ pipe = NCI_HCP_MSG_GET_PIPE(skb->data[0]);
skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN);
message = (struct nci_hcp_message *)skb->data;
type = NCI_HCP_MSG_GET_TYPE(message->header);
@@ -395,7 +436,7 @@ void nci_hci_data_received_cb(void *context,
{
struct nci_dev *ndev = (struct nci_dev *)context;
struct nci_hcp_packet *packet;
- u8 pipe, type, instruction;
+ u8 pipe, type;
struct sk_buff *hcp_skb;
struct sk_buff *frag_skb;
int msg_len;
@@ -415,7 +456,7 @@ void nci_hci_data_received_cb(void *context,
/* it's the last fragment. Does it need re-aggregation? */
if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) {
- pipe = packet->header & NCI_HCI_FRAGMENT;
+ pipe = NCI_HCP_MSG_GET_PIPE(packet->header);
skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb);
msg_len = 0;
@@ -434,7 +475,7 @@ void nci_hci_data_received_cb(void *context,
*skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe;
skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) {
- msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN;
+ msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN;
memcpy(skb_put(hcp_skb, msg_len), frag_skb->data +
NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len);
}
@@ -452,11 +493,10 @@ void nci_hci_data_received_cb(void *context,
packet = (struct nci_hcp_packet *)hcp_skb->data;
type = NCI_HCP_MSG_GET_TYPE(packet->message.header);
if (type == NCI_HCI_HCP_RESPONSE) {
- pipe = packet->header;
- instruction = NCI_HCP_MSG_GET_CMD(packet->message.header);
- skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN +
- NCI_HCI_HCP_MESSAGE_HEADER_LEN);
- nci_hci_hcp_message_rx(ndev, pipe, type, instruction, hcp_skb);
+ pipe = NCI_HCP_MSG_GET_PIPE(packet->header);
+ skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN);
+ nci_hci_hcp_message_rx(ndev, pipe, type,
+ NCI_STATUS_OK, hcp_skb);
} else {
skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb);
schedule_work(&ndev->hci_dev->msg_rx_work);
@@ -485,9 +525,47 @@ int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe)
}
EXPORT_SYMBOL(nci_hci_open_pipe);
+static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host,
+ u8 dest_gate, int *result)
+{
+ u8 pipe;
+ struct sk_buff *skb;
+ struct nci_hci_create_pipe_params params;
+ struct nci_hci_create_pipe_resp *resp;
+
+ pr_debug("gate=%d\n", dest_gate);
+
+ params.src_gate = NCI_HCI_ADMIN_GATE;
+ params.dest_host = dest_host;
+ params.dest_gate = dest_gate;
+
+ *result = nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE,
+ NCI_HCI_ADM_CREATE_PIPE,
+ (u8 *)&params, sizeof(params), &skb);
+ if (*result < 0)
+ return NCI_HCI_INVALID_PIPE;
+
+ resp = (struct nci_hci_create_pipe_resp *)skb->data;
+ pipe = resp->pipe;
+ kfree_skb(skb);
+
+ pr_debug("pipe created=%d\n", pipe);
+
+ return pipe;
+}
+
+static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe)
+{
+ pr_debug("\n");
+
+ return nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE,
+ NCI_HCI_ADM_DELETE_PIPE, &pipe, 1, NULL);
+}
+
int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
const u8 *param, size_t param_len)
{
+ struct nci_hcp_message *message;
struct nci_conn_info *conn_info;
struct nci_data data;
int r;
@@ -520,6 +598,12 @@ int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
r = nci_request(ndev, nci_hci_send_data_req,
(unsigned long)&data,
msecs_to_jiffies(NCI_DATA_TIMEOUT));
+ if (r == NCI_STATUS_OK) {
+ message = (struct nci_hcp_message *)conn_info->rx_skb->data;
+ r = nci_hci_result_to_errno(
+ NCI_HCP_MSG_GET_CMD(message->header));
+ skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);
+ }
kfree(tmp);
return r;
@@ -529,6 +613,7 @@ EXPORT_SYMBOL(nci_hci_set_param);
int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
struct sk_buff **skb)
{
+ struct nci_hcp_message *message;
struct nci_conn_info *conn_info;
struct nci_data data;
int r;
@@ -553,8 +638,15 @@ int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
msecs_to_jiffies(NCI_DATA_TIMEOUT));
- if (r == NCI_STATUS_OK)
- *skb = conn_info->rx_skb;
+ if (r == NCI_STATUS_OK) {
+ message = (struct nci_hcp_message *)conn_info->rx_skb->data;
+ r = nci_hci_result_to_errno(
+ NCI_HCP_MSG_GET_CMD(message->header));
+ skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);
+
+ if (!r && skb)
+ *skb = conn_info->rx_skb;
+ }
return r;
}
@@ -563,6 +655,7 @@ EXPORT_SYMBOL(nci_hci_get_param);
int nci_hci_connect_gate(struct nci_dev *ndev,
u8 dest_host, u8 dest_gate, u8 pipe)
{
+ bool pipe_created = false;
int r;
if (pipe == NCI_HCI_DO_NOT_OPEN_PIPE)
@@ -581,12 +674,26 @@ int nci_hci_connect_gate(struct nci_dev *ndev,
case NCI_HCI_ADMIN_GATE:
pipe = NCI_HCI_ADMIN_PIPE;
break;
+ default:
+ pipe = nci_hci_create_pipe(ndev, dest_host, dest_gate, &r);
+ if (pipe < 0)
+ return r;
+ pipe_created = true;
+ break;
}
open_pipe:
r = nci_hci_open_pipe(ndev, pipe);
- if (r < 0)
+ if (r < 0) {
+ if (pipe_created) {
+ if (nci_hci_delete_pipe(ndev, pipe) < 0) {
+ /* TODO: Cannot clean by deleting pipe...
+ * -> inconsistent state
+ */
+ }
+ }
return r;
+ }
ndev->hci_dev->pipes[pipe].gate = dest_gate;
ndev->hci_dev->pipes[pipe].host = dest_host;
@@ -653,6 +760,10 @@ int nci_hci_dev_session_init(struct nci_dev *ndev)
/* Restore gate<->pipe table from some proprietary location. */
r = ndev->ops->hci_load_session(ndev);
} else {
+ r = nci_hci_clear_all_pipes(ndev);
+ if (r < 0)
+ goto exit;
+
r = nci_hci_dev_connect_gates(ndev,
ndev->hci_dev->init_data.gate_count,
ndev->hci_dev->init_data.gates);
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 5d1c2e3..2ada2b3 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -759,7 +759,7 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
skb_pull(skb, NCI_CTRL_HDR_SIZE);
if (nci_opcode_gid(ntf_opcode) == NCI_GID_PROPRIETARY) {
- if (nci_prop_ntf_packet(ndev, ntf_opcode, skb)) {
+ if (nci_prop_ntf_packet(ndev, ntf_opcode, skb) == -ENOTSUPP) {
pr_err("unsupported ntf opcode 0x%x\n",
ntf_opcode);
}
@@ -805,6 +805,7 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
break;
}
+ nci_core_ntf_packet(ndev, ntf_opcode, skb);
end:
kfree_skb(skb);
}
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 408bd8f..9b6eb91 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -355,6 +355,7 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
break;
}
+ nci_core_rsp_packet(ndev, rsp_opcode, skb);
end:
kfree_skb(skb);
diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c
index ec250e7..d904cd2 100644
--- a/net/nfc/nci/spi.c
+++ b/net/nfc/nci/spi.c
@@ -18,6 +18,8 @@
#define pr_fmt(fmt) "nci_spi: %s: " fmt, __func__
+#include <linux/module.h>
+
#include <linux/export.h>
#include <linux/spi/spi.h>
#include <linux/crc-ccitt.h>
@@ -56,6 +58,7 @@ static int __nci_spi_send(struct nci_spi *nspi, struct sk_buff *skb,
}
t.cs_change = cs_change;
t.delay_usecs = nspi->xfer_udelay;
+ t.speed_hz = nspi->xfer_speed_hz;
spi_message_init(&m);
spi_message_add_tail(&t, &m);
@@ -142,7 +145,8 @@ struct nci_spi *nci_spi_allocate_spi(struct spi_device *spi,
nspi->acknowledge_mode = acknowledge_mode;
nspi->xfer_udelay = delay;
-
+ /* Use controller max SPI speed by default */
+ nspi->xfer_speed_hz = 0;
nspi->spi = spi;
nspi->ndev = ndev;
init_completion(&nspi->req_completion);
@@ -195,12 +199,14 @@ static struct sk_buff *__nci_spi_read(struct nci_spi *nspi)
tx.tx_buf = req;
tx.len = 2;
tx.cs_change = 0;
+ tx.speed_hz = nspi->xfer_speed_hz;
spi_message_add_tail(&tx, &m);
memset(&rx, 0, sizeof(struct spi_transfer));
rx.rx_buf = resp_hdr;
rx.len = 2;
rx.cs_change = 1;
+ rx.speed_hz = nspi->xfer_speed_hz;
spi_message_add_tail(&rx, &m);
ret = spi_sync(nspi->spi, &m);
@@ -224,6 +230,7 @@ static struct sk_buff *__nci_spi_read(struct nci_spi *nspi)
rx.len = rx_len;
rx.cs_change = 0;
rx.delay_usecs = nspi->xfer_udelay;
+ rx.speed_hz = nspi->xfer_speed_hz;
spi_message_add_tail(&rx, &m);
ret = spi_sync(nspi->spi, &m);
@@ -320,3 +327,5 @@ done:
return skb;
}
EXPORT_SYMBOL_GPL(nci_spi_read);
+
+MODULE_LICENSE("GPL");
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 853172c..f58c1fb 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -885,7 +885,7 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info)
target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]);
protocol = nla_get_u32(info->attrs[NFC_ATTR_PROTOCOLS]);
- nfc_deactivate_target(dev, target_idx);
+ nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP);
rc = nfc_activate_target(dev, target_idx, protocol);
nfc_put_device(dev);
@@ -1109,10 +1109,8 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
dev = nfc_get_device(idx);
- if (!dev) {
- rc = -ENODEV;
- goto exit;
- }
+ if (!dev)
+ return -ENODEV;
device_lock(&dev->dev);
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 5c93e84..c20b784 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -25,6 +25,9 @@
#include <net/nfc/nfc.h>
#include <net/sock.h>
+#define NFC_TARGET_MODE_IDLE 0
+#define NFC_TARGET_MODE_SLEEP 1
+
struct nfc_protocol {
int id;
struct proto *proto;
@@ -147,7 +150,7 @@ int nfc_dep_link_down(struct nfc_dev *dev);
int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol);
-int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx);
+int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode);
int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb,
data_exchange_cb_t cb, void *cb_context);
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index e9a9148..e386e6c 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -321,7 +321,8 @@ static void rawsock_destruct(struct sock *sk)
if (sk->sk_state == TCP_ESTABLISHED) {
nfc_deactivate_target(nfc_rawsock(sk)->dev,
- nfc_rawsock(sk)->target_idx);
+ nfc_rawsock(sk)->target_idx,
+ NFC_TARGET_MODE_IDLE);
nfc_put_device(nfc_rawsock(sk)->dev);
}
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index c608723..c88d0f2 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -769,7 +769,6 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, const struct nlattr *attr,
const struct nlattr *actions, int actions_len)
{
- struct ip_tunnel_info info;
struct dp_upcall_info upcall;
const struct nlattr *a;
int rem;
@@ -797,11 +796,9 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
if (vport) {
int err;
- upcall.egress_tun_info = &info;
- err = ovs_vport_get_egress_tun_info(vport, skb,
- &upcall);
- if (err)
- upcall.egress_tun_info = NULL;
+ err = dev_fill_metadata_dst(vport->dev, skb);
+ if (!err)
+ upcall.egress_tun_info = skb_tunnel_info(skb);
}
break;
@@ -1113,8 +1110,8 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
nla_data(a));
/* Hide stolen IP fragments from user space. */
- if (err == -EINPROGRESS)
- return 0;
+ if (err)
+ return err == -EINPROGRESS ? 0 : err;
break;
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 9ed833e..c2cc111 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -151,6 +151,8 @@ static void ovs_ct_update_key(const struct sk_buff *skb,
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
state = ovs_ct_get_state(ctinfo);
+ if (!nf_ct_is_confirmed(ct))
+ state |= OVS_CS_F_NEW;
if (ct->master)
state |= OVS_CS_F_RELATED;
zone = nf_ct_zone(ct);
@@ -222,9 +224,6 @@ static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key,
struct nf_conn *ct;
int err;
- if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS))
- return -ENOTSUPP;
-
/* The connection could be invalid, in which case set_label is no-op.*/
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
@@ -294,6 +293,9 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
return helper->help(skb, protoff, ct, ctinfo);
}
+/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
+ * value if 'skb' is freed.
+ */
static int handle_fragments(struct net *net, struct sw_flow_key *key,
u16 zone, struct sk_buff *skb)
{
@@ -309,8 +311,8 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
return err;
ovs_cb.mru = IPCB(skb)->frag_max_size;
- } else if (key->eth.type == htons(ETH_P_IPV6)) {
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ } else if (key->eth.type == htons(ETH_P_IPV6)) {
enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
struct sk_buff *reasm;
@@ -319,17 +321,25 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
if (!reasm)
return -EINPROGRESS;
- if (skb == reasm)
+ if (skb == reasm) {
+ kfree_skb(skb);
return -EINVAL;
+ }
+
+ /* Don't free 'skb' even though it is one of the original
+ * fragments, as we're going to morph it into the head.
+ */
+ skb_get(skb);
+ nf_ct_frag6_consume_orig(reasm);
key->ip.proto = ipv6_hdr(reasm)->nexthdr;
skb_morph(skb, reasm);
+ skb->next = reasm->next;
consume_skb(reasm);
ovs_cb.mru = IP6CB(skb)->frag_max_size;
-#else
- return -EPFNOSUPPORT;
#endif
} else {
+ kfree_skb(skb);
return -EPFNOSUPPORT;
}
@@ -377,7 +387,7 @@ static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
return true;
}
-static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
+static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info,
struct sk_buff *skb)
{
@@ -408,6 +418,8 @@ static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
}
}
+ ovs_ct_update_key(skb, key, true);
+
return 0;
}
@@ -430,8 +442,6 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
err = __ovs_ct_lookup(net, key, info, skb);
if (err)
return err;
-
- ovs_ct_update_key(skb, key, true);
}
return 0;
@@ -460,8 +470,6 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
if (nf_conntrack_confirm(skb) != NF_ACCEPT)
return -EINVAL;
- ovs_ct_update_key(skb, key, true);
-
return 0;
}
@@ -476,6 +484,9 @@ static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
return false;
}
+/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
+ * value if 'skb' is freed.
+ */
int ovs_ct_execute(struct net *net, struct sk_buff *skb,
struct sw_flow_key *key,
const struct ovs_conntrack_info *info)
@@ -511,6 +522,8 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
&info->labels.mask);
err:
skb_push(skb, nh_ofs);
+ if (err)
+ kfree_skb(skb);
return err;
}
@@ -587,6 +600,10 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
case OVS_CT_ATTR_MARK: {
struct md_mark *mark = nla_data(a);
+ if (!mark->mask) {
+ OVS_NLERR(log, "ct_mark mask cannot be 0");
+ return -EINVAL;
+ }
info->mark = *mark;
break;
}
@@ -595,6 +612,10 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
case OVS_CT_ATTR_LABELS: {
struct md_labels *labels = nla_data(a);
+ if (!labels_nonzero(&labels->mask)) {
+ OVS_NLERR(log, "ct_labels mask cannot be 0");
+ return -EINVAL;
+ }
info->labels = *labels;
break;
}
@@ -705,11 +726,12 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
return -EMSGSIZE;
- if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask &&
nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
&ct_info->mark))
return -EMSGSIZE;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+ labels_nonzero(&ct_info->labels.mask) &&
nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels),
&ct_info->labels))
return -EMSGSIZE;
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index da87149..a7544f4 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -35,12 +35,9 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb);
void ovs_ct_free_action(const struct nlattr *a);
-static inline bool ovs_ct_state_supported(u32 state)
-{
- return !(state & ~(OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED |
- OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR |
- OVS_CS_F_INVALID | OVS_CS_F_TRACKED));
-}
+#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \
+ OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \
+ OVS_CS_F_INVALID | OVS_CS_F_TRACKED)
#else
#include <linux/errno.h>
@@ -53,11 +50,6 @@ static inline bool ovs_ct_verify(struct net *net, int attr)
return false;
}
-static inline bool ovs_ct_state_supported(u32 state)
-{
- return false;
-}
-
static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla,
const struct sw_flow_key *key,
struct sw_flow_actions **acts, bool log)
@@ -75,6 +67,7 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
struct sw_flow_key *key,
const struct ovs_conntrack_info *info)
{
+ kfree_skb(skb);
return -ENOTSUPP;
}
@@ -94,5 +87,7 @@ static inline int ovs_ct_put_key(const struct sw_flow_key *key,
}
static inline void ovs_ct_free_action(const struct nlattr *a) { }
+
+#define CT_SUPPORTED_MASK 0
#endif /* CONFIG_NF_CONNTRACK */
#endif /* ovs_conntrack.h */
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a758280..5633172 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -489,9 +489,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
if (upcall_info->egress_tun_info) {
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
- err = ovs_nla_put_egress_tunnel_key(user_skb,
- upcall_info->egress_tun_info,
- upcall_info->egress_tun_opts);
+ err = ovs_nla_put_tunnel_info(user_skb,
+ upcall_info->egress_tun_info);
BUG_ON(err);
nla_nest_end(user_skb, nla);
}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index f88038a..67bdecd 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -117,7 +117,6 @@ struct ovs_skb_cb {
*/
struct dp_upcall_info {
struct ip_tunnel_info *egress_tun_info;
- const void *egress_tun_opts;
const struct nlattr *userdata;
const struct nlattr *actions;
int actions_len;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 80e1f09..907d6fd 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -764,7 +764,7 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
if ((output->tun_flags & TUNNEL_OAM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE;
- if (tun_opts) {
+ if (swkey_tun_opts_len) {
if (output->tun_flags & TUNNEL_GENEVE_OPT &&
nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
swkey_tun_opts_len, tun_opts))
@@ -798,14 +798,13 @@ static int ip_tun_to_nlattr(struct sk_buff *skb,
return 0;
}
-int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
- const struct ip_tunnel_info *egress_tun_info,
- const void *egress_tun_opts)
+int ovs_nla_put_tunnel_info(struct sk_buff *skb,
+ struct ip_tunnel_info *tun_info)
{
- return __ip_tun_to_nlattr(skb, &egress_tun_info->key,
- egress_tun_opts,
- egress_tun_info->options_len,
- ip_tunnel_info_af(egress_tun_info));
+ return __ip_tun_to_nlattr(skb, &tun_info->key,
+ ip_tunnel_info_opts(tun_info),
+ tun_info->options_len,
+ ip_tunnel_info_af(tun_info));
}
static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
@@ -866,7 +865,7 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
- if (!is_mask && !ovs_ct_state_supported(ct_state)) {
+ if (ct_state & ~CT_SUPPORTED_MASK) {
OVS_NLERR(log, "ct_state flags %08x unsupported",
ct_state);
return -EINVAL;
@@ -1149,6 +1148,9 @@ static void nlattr_set(struct nlattr *attr, u8 val,
} else {
memset(nla_data(nla), val, nla_len(nla));
}
+
+ if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
+ *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
}
}
@@ -2432,11 +2434,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
if (!start)
return -EMSGSIZE;
- err = ip_tun_to_nlattr(skb, &tun_info->key,
- tun_info->options_len ?
- ip_tunnel_info_opts(tun_info) : NULL,
- tun_info->options_len,
- ip_tunnel_info_af(tun_info));
+ err = ovs_nla_put_tunnel_info(skb, tun_info);
if (err)
return err;
nla_nest_end(skb, start);
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 6ca3f0b..47dd142 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -55,9 +55,9 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
int ovs_nla_get_match(struct net *, struct sw_flow_match *,
const struct nlattr *key, const struct nlattr *mask,
bool log);
-int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
- const struct ip_tunnel_info *,
- const void *egress_tun_opts);
+
+int ovs_nla_put_tunnel_info(struct sk_buff *skb,
+ struct ip_tunnel_info *tun_info);
bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 7a568ca..efb736b 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -52,18 +52,6 @@ static int geneve_get_options(const struct vport *vport,
return 0;
}
-static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall)
-{
- struct geneve_port *geneve_port = geneve_vport(vport);
- struct net *net = ovs_dp_get_net(vport->dp);
- __be16 dport = htons(geneve_port->port_no);
- __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
-
- return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
- skb, IPPROTO_UDP, sport, dport);
-}
-
static struct vport *geneve_tnl_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
@@ -130,7 +118,6 @@ static struct vport_ops ovs_geneve_vport_ops = {
.get_options = geneve_get_options,
.send = dev_queue_xmit,
.owner = THIS_MODULE,
- .get_egress_tun_info = geneve_get_egress_tun_info,
};
static int __init ovs_geneve_tnl_init(void)
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index cdb758a..c3257d7 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -84,18 +84,10 @@ static struct vport *gre_create(const struct vport_parms *parms)
return ovs_netdev_link(vport, parms->name);
}
-static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall)
-{
- return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
- skb, IPPROTO_GRE, 0, 0);
-}
-
static struct vport_ops ovs_gre_vport_ops = {
.type = OVS_VPORT_TYPE_GRE,
.create = gre_create,
.send = dev_queue_xmit,
- .get_egress_tun_info = gre_get_egress_tun_info,
.destroy = ovs_netdev_tunnel_destroy,
.owner = THIS_MODULE,
};
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 7f0a8bd..ec76398 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -106,12 +106,45 @@ static void internal_dev_destructor(struct net_device *dev)
free_netdev(dev);
}
+static struct rtnl_link_stats64 *
+internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+ int i;
+
+ memset(stats, 0, sizeof(*stats));
+ stats->rx_errors = dev->stats.rx_errors;
+ stats->tx_errors = dev->stats.tx_errors;
+ stats->tx_dropped = dev->stats.tx_dropped;
+ stats->rx_dropped = dev->stats.rx_dropped;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_sw_netstats *percpu_stats;
+ struct pcpu_sw_netstats local_stats;
+ unsigned int start;
+
+ percpu_stats = per_cpu_ptr(dev->tstats, i);
+
+ do {
+ start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
+ local_stats = *percpu_stats;
+ } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
+
+ stats->rx_bytes += local_stats.rx_bytes;
+ stats->rx_packets += local_stats.rx_packets;
+ stats->tx_bytes += local_stats.tx_bytes;
+ stats->tx_packets += local_stats.tx_packets;
+ }
+
+ return stats;
+}
+
static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_open = internal_dev_open,
.ndo_stop = internal_dev_stop,
.ndo_start_xmit = internal_dev_xmit,
.ndo_set_mac_address = eth_mac_addr,
.ndo_change_mtu = internal_dev_change_mtu,
+ .ndo_get_stats64 = internal_get_stats,
};
static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
@@ -161,6 +194,11 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
err = -ENOMEM;
goto error_free_vport;
}
+ vport->dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!vport->dev->tstats) {
+ err = -ENOMEM;
+ goto error_free_netdev;
+ }
dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
internal_dev = internal_dev_priv(vport->dev);
@@ -173,7 +211,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
rtnl_lock();
err = register_netdevice(vport->dev);
if (err)
- goto error_free_netdev;
+ goto error_unlock;
dev_set_promiscuity(vport->dev, 1);
rtnl_unlock();
@@ -181,8 +219,10 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
return vport;
-error_free_netdev:
+error_unlock:
rtnl_unlock();
+ free_percpu(vport->dev->tstats);
+error_free_netdev:
free_netdev(vport->dev);
error_free_vport:
ovs_vport_free(vport);
@@ -198,7 +238,7 @@ static void internal_dev_destroy(struct vport *vport)
/* unregister_netdevice() waits for an RCU grace period. */
unregister_netdevice(vport->dev);
-
+ free_percpu(vport->dev->tstats);
rtnl_unlock();
}
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 6f70071..1605691 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -146,32 +146,12 @@ static struct vport *vxlan_create(const struct vport_parms *parms)
return ovs_netdev_link(vport, parms->name);
}
-static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall)
-{
- struct vxlan_dev *vxlan = netdev_priv(vport->dev);
- struct net *net = ovs_dp_get_net(vport->dp);
- unsigned short family = ip_tunnel_info_af(upcall->egress_tun_info);
- __be16 dst_port = vxlan_dev_dst_port(vxlan, family);
- __be16 src_port;
- int port_min;
- int port_max;
-
- inet_get_local_port_range(net, &port_min, &port_max);
- src_port = udp_flow_src_port(net, skb, 0, 0, true);
-
- return ovs_tunnel_get_egress_info(upcall, net,
- skb, IPPROTO_UDP,
- src_port, dst_port);
-}
-
static struct vport_ops ovs_vxlan_netdev_vport_ops = {
.type = OVS_VPORT_TYPE_VXLAN,
.create = vxlan_create,
.destroy = ovs_netdev_tunnel_destroy,
.get_options = vxlan_get_options,
.send = dev_queue_xmit,
- .get_egress_tun_info = vxlan_get_egress_tun_info,
};
static int __init ovs_vxlan_tnl_init(void)
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index ef19d0b..0ac0fd0 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -480,64 +480,6 @@ void ovs_vport_deferred_free(struct vport *vport)
}
EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
-int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
- struct net *net,
- struct sk_buff *skb,
- u8 ipproto,
- __be16 tp_src,
- __be16 tp_dst)
-{
- struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info;
- const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb);
- const struct ip_tunnel_key *tun_key;
- u32 skb_mark = skb->mark;
- struct rtable *rt;
- struct flowi4 fl;
-
- if (unlikely(!tun_info))
- return -EINVAL;
- if (ip_tunnel_info_af(tun_info) != AF_INET)
- return -EINVAL;
-
- tun_key = &tun_info->key;
-
- /* Route lookup to get srouce IP address.
- * The process may need to be changed if the corresponding process
- * in vports ops changed.
- */
- rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
-
- ip_rt_put(rt);
-
- /* Generate egress_tun_info based on tun_info,
- * saddr, tp_src and tp_dst
- */
- ip_tunnel_key_init(&egress_tun_info->key,
- fl.saddr, tun_key->u.ipv4.dst,
- tun_key->tos,
- tun_key->ttl,
- tp_src, tp_dst,
- tun_key->tun_id,
- tun_key->tun_flags);
- egress_tun_info->options_len = tun_info->options_len;
- egress_tun_info->mode = tun_info->mode;
- upcall->egress_tun_opts = ip_tunnel_info_opts(egress_tun_info);
- return 0;
-}
-EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
-
-int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall)
-{
- /* get_egress_tun_info() is only implemented on tunnel ports. */
- if (unlikely(!vport->ops->get_egress_tun_info))
- return -EINVAL;
-
- return vport->ops->get_egress_tun_info(vport, skb, upcall);
-}
-
static unsigned int packet_length(const struct sk_buff *skb)
{
unsigned int length = skb->len - ETH_HLEN;
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 885607f..bdfd82a 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -27,7 +27,6 @@
#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/u64_stats_sync.h>
-#include <net/route.h>
#include "datapath.h"
@@ -53,16 +52,6 @@ int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids);
int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
-int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
- struct net *net,
- struct sk_buff *,
- u8 ipproto,
- __be16 tp_src,
- __be16 tp_dst);
-
-int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall);
-
/**
* struct vport_portids - array of netlink portids of a vport.
* must be protected by rcu.
@@ -140,8 +129,6 @@ struct vport_parms {
* have any configuration.
* @send: Send a packet on the device.
* zero for dropped packets or negative for error.
- * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for
- * a packet.
*/
struct vport_ops {
enum ovs_vport_type type;
@@ -154,9 +141,6 @@ struct vport_ops {
int (*get_options)(const struct vport *, struct sk_buff *);
netdev_tx_t (*send) (struct sk_buff *skb);
- int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
- struct dp_upcall_info *upcall);
-
struct module *owner;
struct list_head list;
};
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 384ea1e..b5476aeb 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -573,6 +573,7 @@ static void rds_exit(void)
rds_threads_exit();
rds_stats_exit();
rds_page_exit();
+ rds_bind_lock_destroy();
rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info);
rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
}
@@ -582,11 +583,14 @@ static int rds_init(void)
{
int ret;
- rds_bind_lock_init();
+ ret = rds_bind_lock_init();
+ if (ret)
+ goto out;
ret = rds_conn_init();
if (ret)
- goto out;
+ goto out_bind;
+
ret = rds_threads_init();
if (ret)
goto out_conn;
@@ -620,6 +624,8 @@ out_conn:
rds_conn_exit();
rds_cong_exit();
rds_page_exit();
+out_bind:
+ rds_bind_lock_destroy();
out:
return ret;
}
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 6192566..b22ea95 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -38,54 +38,17 @@
#include <linux/ratelimit.h>
#include "rds.h"
-struct bind_bucket {
- rwlock_t lock;
- struct hlist_head head;
+static struct rhashtable bind_hash_table;
+
+static struct rhashtable_params ht_parms = {
+ .nelem_hint = 768,
+ .key_len = sizeof(u64),
+ .key_offset = offsetof(struct rds_sock, rs_bound_key),
+ .head_offset = offsetof(struct rds_sock, rs_bound_node),
+ .max_size = 16384,
+ .min_size = 1024,
};
-#define BIND_HASH_SIZE 1024
-static struct bind_bucket bind_hash_table[BIND_HASH_SIZE];
-
-static struct bind_bucket *hash_to_bucket(__be32 addr, __be16 port)
-{
- return bind_hash_table + (jhash_2words((u32)addr, (u32)port, 0) &
- (BIND_HASH_SIZE - 1));
-}
-
-/* must hold either read or write lock (write lock for insert != NULL) */
-static struct rds_sock *rds_bind_lookup(struct bind_bucket *bucket,
- __be32 addr, __be16 port,
- struct rds_sock *insert)
-{
- struct rds_sock *rs;
- struct hlist_head *head = &bucket->head;
- u64 cmp;
- u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
-
- hlist_for_each_entry(rs, head, rs_bound_node) {
- cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
- be16_to_cpu(rs->rs_bound_port);
-
- if (cmp == needle) {
- rds_sock_addref(rs);
- return rs;
- }
- }
-
- if (insert) {
- /*
- * make sure our addr and port are set before
- * we are added to the list.
- */
- insert->rs_bound_addr = addr;
- insert->rs_bound_port = port;
- rds_sock_addref(insert);
-
- hlist_add_head(&insert->rs_bound_node, head);
- }
- return NULL;
-}
-
/*
* Return the rds_sock bound at the given local address.
*
@@ -94,18 +57,14 @@ static struct rds_sock *rds_bind_lookup(struct bind_bucket *bucket,
*/
struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
{
+ u64 key = ((u64)addr << 32) | port;
struct rds_sock *rs;
- unsigned long flags;
- struct bind_bucket *bucket = hash_to_bucket(addr, port);
-
- read_lock_irqsave(&bucket->lock, flags);
- rs = rds_bind_lookup(bucket, addr, port, NULL);
- read_unlock_irqrestore(&bucket->lock, flags);
- if (rs && sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) {
- rds_sock_put(rs);
+ rs = rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms);
+ if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
+ rds_sock_addref(rs);
+ else
rs = NULL;
- }
rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
ntohs(port));
@@ -116,10 +75,9 @@ struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
/* returns -ve errno or +ve port */
static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
{
- unsigned long flags;
int ret = -EADDRINUSE;
u16 rover, last;
- struct bind_bucket *bucket;
+ u64 key;
if (*port != 0) {
rover = be16_to_cpu(*port);
@@ -130,22 +88,29 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
}
do {
- struct rds_sock *rrs;
if (rover == 0)
rover++;
- bucket = hash_to_bucket(addr, cpu_to_be16(rover));
- write_lock_irqsave(&bucket->lock, flags);
- rrs = rds_bind_lookup(bucket, addr, cpu_to_be16(rover), rs);
- write_unlock_irqrestore(&bucket->lock, flags);
- if (!rrs) {
+ key = ((u64)addr << 32) | cpu_to_be16(rover);
+ if (rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms))
+ continue;
+
+ rs->rs_bound_key = key;
+ rs->rs_bound_addr = addr;
+ rs->rs_bound_port = cpu_to_be16(rover);
+ rs->rs_bound_node.next = NULL;
+ rds_sock_addref(rs);
+ if (!rhashtable_insert_fast(&bind_hash_table,
+ &rs->rs_bound_node, ht_parms)) {
*port = rs->rs_bound_port;
ret = 0;
rdsdebug("rs %p binding to %pI4:%d\n",
rs, &addr, (int)ntohs(*port));
break;
} else {
- rds_sock_put(rrs);
+ rds_sock_put(rs);
+ ret = -ENOMEM;
+ break;
}
} while (rover++ != last);
@@ -154,23 +119,17 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
void rds_remove_bound(struct rds_sock *rs)
{
- unsigned long flags;
- struct bind_bucket *bucket =
- hash_to_bucket(rs->rs_bound_addr, rs->rs_bound_port);
-
- write_lock_irqsave(&bucket->lock, flags);
- if (rs->rs_bound_addr) {
- rdsdebug("rs %p unbinding from %pI4:%d\n",
- rs, &rs->rs_bound_addr,
- ntohs(rs->rs_bound_port));
+ if (!rs->rs_bound_addr)
+ return;
- hlist_del_init(&rs->rs_bound_node);
- rds_sock_put(rs);
- rs->rs_bound_addr = 0;
- }
+ rdsdebug("rs %p unbinding from %pI4:%d\n",
+ rs, &rs->rs_bound_addr,
+ ntohs(rs->rs_bound_port));
- write_unlock_irqrestore(&bucket->lock, flags);
+ rhashtable_remove_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms);
+ rds_sock_put(rs);
+ rs->rs_bound_addr = 0;
}
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
@@ -224,10 +183,12 @@ out:
return ret;
}
-void rds_bind_lock_init(void)
+void rds_bind_lock_destroy(void)
{
- int i;
+ rhashtable_destroy(&bind_hash_table);
+}
- for (i = 0; i < BIND_HASH_SIZE; i++)
- rwlock_init(&bind_hash_table[i].lock);
+int rds_bind_lock_init(void)
+{
+ return rhashtable_init(&bind_hash_table, &ht_parms);
}
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 6a8fbd6..d3d4454f 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -75,7 +75,7 @@ struct rds_iw_mr_pool {
int max_pages;
};
-static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all);
+static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all);
static void rds_iw_mr_pool_flush_worker(struct work_struct *work);
static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
@@ -479,14 +479,13 @@ void rds_iw_sync_mr(void *trans_private, int direction)
* If the number of MRs allocated exceeds the limit, we also try
* to free as many MRs as needed to get back to this limit.
*/
-static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
+static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
{
struct rds_iw_mr *ibmr, *next;
LIST_HEAD(unmap_list);
LIST_HEAD(kill_list);
unsigned long flags;
unsigned int nfreed = 0, ncleaned = 0, unpinned = 0;
- int ret = 0;
rds_iw_stats_inc(s_iw_rdma_mr_pool_flush);
@@ -538,7 +537,6 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
atomic_sub(nfreed, &pool->item_count);
mutex_unlock(&pool->flush_lock);
- return ret;
}
static void rds_iw_mr_pool_flush_worker(struct work_struct *work)
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 543c308..0e2797b 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -7,6 +7,7 @@
#include <rdma/rdma_cm.h>
#include <linux/mutex.h>
#include <linux/rds.h>
+#include <linux/rhashtable.h>
#include "info.h"
@@ -474,7 +475,8 @@ struct rds_sock {
* bound_addr used for both incoming and outgoing, no INADDR_ANY
* support.
*/
- struct hlist_node rs_bound_node;
+ struct rhash_head rs_bound_node;
+ u64 rs_bound_key;
__be32 rs_bound_addr;
__be32 rs_conn_addr;
__be16 rs_bound_port;
@@ -605,7 +607,8 @@ extern wait_queue_head_t rds_poll_waitq;
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
void rds_remove_bound(struct rds_sock *rs);
struct rds_sock *rds_find_bound(__be32 addr, __be16 port);
-void rds_bind_lock_init(void);
+int rds_bind_lock_init(void);
+void rds_bind_lock_destroy(void);
/* cong.c */
int rds_cong_get_maps(struct rds_connection *conn);
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index fbc5ef8..27a9921 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -214,8 +214,15 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
}
to_copy = min(tc->t_tinc_data_rem, left);
- pskb_pull(clone, offset);
- pskb_trim(clone, to_copy);
+ if (!pskb_pull(clone, offset) ||
+ pskb_trim(clone, to_copy)) {
+ pr_warn("rds_tcp_data_recv: pull/trim failed "
+ "left %zu data_rem %zu skb_len %d\n",
+ left, tc->t_tinc_data_rem, skb->len);
+ kfree_skb(clone);
+ desc->error = -ENOMEM;
+ goto out;
+ }
skb_queue_tail(&tinc->ti_skb_list, clone);
rdsdebug("skb %p data %p len %d off %u to_copy %zu -> "
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 1eb76956..6dfd19e 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -337,6 +337,21 @@ int switchdev_port_attr_set(struct net_device *dev,
}
EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
+static size_t switchdev_obj_size(const struct switchdev_obj *obj)
+{
+ switch (obj->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ return sizeof(struct switchdev_obj_port_vlan);
+ case SWITCHDEV_OBJ_ID_IPV4_FIB:
+ return sizeof(struct switchdev_obj_ipv4_fib);
+ case SWITCHDEV_OBJ_ID_PORT_FDB:
+ return sizeof(struct switchdev_obj_port_fdb);
+ default:
+ BUG();
+ }
+ return 0;
+}
+
static int __switchdev_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct switchdev_trans *trans)
@@ -422,7 +437,7 @@ static void switchdev_port_obj_add_deferred(struct net_device *dev,
static int switchdev_port_obj_add_defer(struct net_device *dev,
const struct switchdev_obj *obj)
{
- return switchdev_deferred_enqueue(dev, obj, sizeof(*obj),
+ return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
switchdev_port_obj_add_deferred);
}
@@ -490,7 +505,7 @@ static void switchdev_port_obj_del_deferred(struct net_device *dev,
static int switchdev_port_obj_del_defer(struct net_device *dev,
const struct switchdev_obj *obj)
{
- return switchdev_deferred_enqueue(dev, obj, sizeof(*obj),
+ return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
switchdev_port_obj_del_deferred);
}
@@ -746,7 +761,7 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
};
u16 mode = BRIDGE_MODE_UNDEF;
- u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
+ u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD;
int err;
err = switchdev_port_attr_get(dev, &attr);
@@ -817,6 +832,9 @@ static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
err = switchdev_port_br_setflag(dev, attr,
BR_LEARNING_SYNC);
break;
+ case IFLA_BRPORT_UNICAST_FLOOD:
+ err = switchdev_port_br_setflag(dev, attr, BR_FLOOD);
+ break;
default:
err = -EOPNOTSUPP;
break;
@@ -866,7 +884,7 @@ static int switchdev_port_br_afspec(struct net_device *dev,
err = f(dev, &vlan.obj);
if (err)
return err;
- memset(&vlan, 0, sizeof(vlan));
+ vlan.vid_begin = 0;
} else {
if (vlan.vid_begin)
return -EINVAL;
@@ -875,7 +893,7 @@ static int switchdev_port_br_afspec(struct net_device *dev,
err = f(dev, &vlan.obj);
if (err)
return err;
- memset(&vlan, 0, sizeof(vlan));
+ vlan.vid_begin = 0;
}
}
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index e7000be..ed98c1f 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -94,10 +94,14 @@ __init int net_sysctl_init(void)
goto out;
ret = register_pernet_subsys(&sysctl_pernet_ops);
if (ret)
- goto out;
+ goto out1;
register_sysctl_root(&net_sysctl_root);
out:
return ret;
+out1:
+ unregister_sysctl_table(net_header);
+ net_header = NULL;
+ goto out;
}
struct ctl_table_header *register_net_sysctl(struct net *net,
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 41042de..9dc239d 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -35,741 +35,301 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/tipc_config.h>
#include "socket.h"
#include "msg.h"
#include "bcast.h"
#include "name_distr.h"
-#include "core.h"
+#include "link.h"
+#include "node.h"
-#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */
-#define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */
+#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */
+#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */
const char tipc_bclink_name[] = "broadcast-link";
-static void tipc_nmap_diff(struct tipc_node_map *nm_a,
- struct tipc_node_map *nm_b,
- struct tipc_node_map *nm_diff);
-static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node);
-static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node);
-
-static void tipc_bclink_lock(struct net *net)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
-
- spin_lock_bh(&tn->bclink->lock);
-}
-
-static void tipc_bclink_unlock(struct net *net)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
-
- spin_unlock_bh(&tn->bclink->lock);
-}
-
-void tipc_bclink_input(struct net *net)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
-
- tipc_sk_mcast_rcv(net, &tn->bclink->arrvq, &tn->bclink->inputq);
-}
-
-uint tipc_bclink_get_mtu(void)
-{
- return MAX_PKT_DEFAULT_MCAST;
-}
-
-static u32 bcbuf_acks(struct sk_buff *buf)
-{
- return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle;
-}
-
-static void bcbuf_set_acks(struct sk_buff *buf, u32 acks)
-{
- TIPC_SKB_CB(buf)->handle = (void *)(unsigned long)acks;
-}
-
-static void bcbuf_decr_acks(struct sk_buff *buf)
-{
- bcbuf_set_acks(buf, bcbuf_acks(buf) - 1);
-}
+/**
+ * struct tipc_bc_base - base structure for keeping broadcast send state
+ * @link: broadcast send link structure
+ * @inputq: data input queue; will only carry SOCK_WAKEUP messages
+ * @dest: array keeping number of reachable destinations per bearer
+ * @primary_bearer: a bearer having links to all broadcast destinations, if any
+ */
+struct tipc_bc_base {
+ struct tipc_link *link;
+ struct sk_buff_head inputq;
+ int dests[MAX_BEARERS];
+ int primary_bearer;
+};
-void tipc_bclink_add_node(struct net *net, u32 addr)
+static struct tipc_bc_base *tipc_bc_base(struct net *net)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
-
- tipc_bclink_lock(net);
- tipc_nmap_add(&tn->bclink->bcast_nodes, addr);
- tipc_bclink_unlock(net);
+ return tipc_net(net)->bcbase;
}
-void tipc_bclink_remove_node(struct net *net, u32 addr)
+int tipc_bcast_get_mtu(struct net *net)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
-
- tipc_bclink_lock(net);
- tipc_nmap_remove(&tn->bclink->bcast_nodes, addr);
-
- /* Last node? => reset backlog queue */
- if (!tn->bclink->bcast_nodes.count)
- tipc_link_purge_backlog(&tn->bclink->link);
-
- tipc_bclink_unlock(net);
+ return tipc_link_mtu(tipc_bc_sndlink(net));
}
-static void bclink_set_last_sent(struct net *net)
+/* tipc_bcbase_select_primary(): find a bearer with links to all destinations,
+ * if any, and make it primary bearer
+ */
+static void tipc_bcbase_select_primary(struct net *net)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_link *bcl = tn->bcl;
+ struct tipc_bc_base *bb = tipc_bc_base(net);
+ int all_dests = tipc_link_bc_peers(bb->link);
+ int i, mtu;
- bcl->silent_intv_cnt = mod(bcl->snd_nxt - 1);
-}
+ bb->primary_bearer = INVALID_BEARER_ID;
-u32 tipc_bclink_get_last_sent(struct net *net)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ if (!all_dests)
+ return;
- return tn->bcl->silent_intv_cnt;
-}
+ for (i = 0; i < MAX_BEARERS; i++) {
+ if (!bb->dests[i])
+ continue;
-static void bclink_update_last_sent(struct tipc_node *node, u32 seqno)
-{
- node->bclink.last_sent = less_eq(node->bclink.last_sent, seqno) ?
- seqno : node->bclink.last_sent;
-}
+ mtu = tipc_bearer_mtu(net, i);
+ if (mtu < tipc_link_mtu(bb->link))
+ tipc_link_set_mtu(bb->link, mtu);
-/**
- * tipc_bclink_retransmit_to - get most recent node to request retransmission
- *
- * Called with bclink_lock locked
- */
-struct tipc_node *tipc_bclink_retransmit_to(struct net *net)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
-
- return tn->bclink->retransmit_to;
-}
+ if (bb->dests[i] < all_dests)
+ continue;
-/**
- * bclink_retransmit_pkt - retransmit broadcast packets
- * @after: sequence number of last packet to *not* retransmit
- * @to: sequence number of last packet to retransmit
- *
- * Called with bclink_lock locked
- */
-static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to)
-{
- struct sk_buff *skb;
- struct tipc_link *bcl = tn->bcl;
+ bb->primary_bearer = i;
- skb_queue_walk(&bcl->transmq, skb) {
- if (more(buf_seqno(skb), after)) {
- tipc_link_retransmit(bcl, skb, mod(to - after));
+ /* Reduce risk that all nodes select same primary */
+ if ((i ^ tipc_own_addr(net)) & 1)
break;
- }
}
}
-/**
- * bclink_prepare_wakeup - prepare users for wakeup after congestion
- * @bcl: broadcast link
- * @resultq: queue for users which can be woken up
- * Move a number of waiting users, as permitted by available space in
- * the send queue, from link wait queue to specified queue for wakeup
- */
-static void bclink_prepare_wakeup(struct tipc_link *bcl, struct sk_buff_head *resultq)
+void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id)
{
- int pnd[TIPC_SYSTEM_IMPORTANCE + 1] = {0,};
- int imp, lim;
- struct sk_buff *skb, *tmp;
-
- skb_queue_walk_safe(&bcl->wakeupq, skb, tmp) {
- imp = TIPC_SKB_CB(skb)->chain_imp;
- lim = bcl->window + bcl->backlog[imp].limit;
- pnd[imp] += TIPC_SKB_CB(skb)->chain_sz;
- if ((pnd[imp] + bcl->backlog[imp].len) >= lim)
- continue;
- skb_unlink(skb, &bcl->wakeupq);
- skb_queue_tail(resultq, skb);
- }
-}
+ struct tipc_bc_base *bb = tipc_bc_base(net);
-/**
- * tipc_bclink_wakeup_users - wake up pending users
- *
- * Called with no locks taken
- */
-void tipc_bclink_wakeup_users(struct net *net)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_link *bcl = tn->bcl;
- struct sk_buff_head resultq;
-
- skb_queue_head_init(&resultq);
- bclink_prepare_wakeup(bcl, &resultq);
- tipc_sk_rcv(net, &resultq);
+ tipc_bcast_lock(net);
+ bb->dests[bearer_id]++;
+ tipc_bcbase_select_primary(net);
+ tipc_bcast_unlock(net);
}
-/**
- * tipc_bclink_acknowledge - handle acknowledgement of broadcast packets
- * @n_ptr: node that sent acknowledgement info
- * @acked: broadcast sequence # that has been acknowledged
- *
- * Node is locked, bclink_lock unlocked.
- */
-void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
+void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id)
{
- struct sk_buff *skb, *tmp;
- unsigned int released = 0;
- struct net *net = n_ptr->net;
- struct tipc_net *tn = net_generic(net, tipc_net_id);
-
- if (unlikely(!n_ptr->bclink.recv_permitted))
- return;
-
- tipc_bclink_lock(net);
-
- /* Bail out if tx queue is empty (no clean up is required) */
- skb = skb_peek(&tn->bcl->transmq);
- if (!skb)
- goto exit;
-
- /* Determine which messages need to be acknowledged */
- if (acked == INVALID_LINK_SEQ) {
- /*
- * Contact with specified node has been lost, so need to
- * acknowledge sent messages only (if other nodes still exist)
- * or both sent and unsent messages (otherwise)
- */
- if (tn->bclink->bcast_nodes.count)
- acked = tn->bcl->silent_intv_cnt;
- else
- acked = tn->bcl->snd_nxt;
- } else {
- /*
- * Bail out if specified sequence number does not correspond
- * to a message that has been sent and not yet acknowledged
- */
- if (less(acked, buf_seqno(skb)) ||
- less(tn->bcl->silent_intv_cnt, acked) ||
- less_eq(acked, n_ptr->bclink.acked))
- goto exit;
- }
-
- /* Skip over packets that node has previously acknowledged */
- skb_queue_walk(&tn->bcl->transmq, skb) {
- if (more(buf_seqno(skb), n_ptr->bclink.acked))
- break;
- }
-
- /* Update packets that node is now acknowledging */
- skb_queue_walk_from_safe(&tn->bcl->transmq, skb, tmp) {
- if (more(buf_seqno(skb), acked))
- break;
- bcbuf_decr_acks(skb);
- bclink_set_last_sent(net);
- if (bcbuf_acks(skb) == 0) {
- __skb_unlink(skb, &tn->bcl->transmq);
- kfree_skb(skb);
- released = 1;
- }
- }
- n_ptr->bclink.acked = acked;
+ struct tipc_bc_base *bb = tipc_bc_base(net);
- /* Try resolving broadcast link congestion, if necessary */
- if (unlikely(skb_peek(&tn->bcl->backlogq))) {
- tipc_link_push_packets(tn->bcl);
- bclink_set_last_sent(net);
- }
- if (unlikely(released && !skb_queue_empty(&tn->bcl->wakeupq)))
- n_ptr->action_flags |= TIPC_WAKEUP_BCAST_USERS;
-exit:
- tipc_bclink_unlock(net);
+ tipc_bcast_lock(net);
+ bb->dests[bearer_id]--;
+ tipc_bcbase_select_primary(net);
+ tipc_bcast_unlock(net);
}
-/**
- * tipc_bclink_update_link_state - update broadcast link state
+/* tipc_bcbase_xmit - broadcast a packet queue across one or more bearers
*
- * RCU and node lock set
+ * Note that number of reachable destinations, as indicated in the dests[]
+ * array, may transitionally differ from the number of destinations indicated
+ * in each sent buffer. We can sustain this. Excess destination nodes will
+ * drop and never acknowledge the unexpected packets, and missing destinations
+ * will either require retransmission (if they are just about to be added to
+ * the bearer), or be removed from the buffer's 'ackers' counter (if they
+ * just went down)
*/
-void tipc_bclink_update_link_state(struct tipc_node *n_ptr,
- u32 last_sent)
+static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq)
{
- struct sk_buff *buf;
- struct net *net = n_ptr->net;
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ int bearer_id;
+ struct tipc_bc_base *bb = tipc_bc_base(net);
+ struct sk_buff *skb, *_skb;
+ struct sk_buff_head _xmitq;
- /* Ignore "stale" link state info */
- if (less_eq(last_sent, n_ptr->bclink.last_in))
+ if (skb_queue_empty(xmitq))
return;
- /* Update link synchronization state; quit if in sync */
- bclink_update_last_sent(n_ptr, last_sent);
-
- if (n_ptr->bclink.last_sent == n_ptr->bclink.last_in)
+ /* The typical case: at least one bearer has links to all nodes */
+ bearer_id = bb->primary_bearer;
+ if (bearer_id >= 0) {
+ tipc_bearer_bc_xmit(net, bearer_id, xmitq);
return;
-
- /* Update out-of-sync state; quit if loss is still unconfirmed */
- if ((++n_ptr->bclink.oos_state) == 1) {
- if (n_ptr->bclink.deferred_size < (TIPC_MIN_LINK_WIN / 2))
- return;
- n_ptr->bclink.oos_state++;
}
- /* Don't NACK if one has been recently sent (or seen) */
- if (n_ptr->bclink.oos_state & 0x1)
- return;
-
- /* Send NACK */
- buf = tipc_buf_acquire(INT_H_SIZE);
- if (buf) {
- struct tipc_msg *msg = buf_msg(buf);
- struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferdq);
- u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent;
-
- tipc_msg_init(tn->own_addr, msg, BCAST_PROTOCOL, STATE_MSG,
- INT_H_SIZE, n_ptr->addr);
- msg_set_non_seq(msg, 1);
- msg_set_mc_netid(msg, tn->net_id);
- msg_set_bcast_ack(msg, n_ptr->bclink.last_in);
- msg_set_bcgap_after(msg, n_ptr->bclink.last_in);
- msg_set_bcgap_to(msg, to);
-
- tipc_bclink_lock(net);
- tipc_bearer_send(net, MAX_BEARERS, buf, NULL);
- tn->bcl->stats.sent_nacks++;
- tipc_bclink_unlock(net);
- kfree_skb(buf);
-
- n_ptr->bclink.oos_state++;
- }
-}
-
-void tipc_bclink_sync_state(struct tipc_node *n, struct tipc_msg *hdr)
-{
- u16 last = msg_last_bcast(hdr);
- int mtyp = msg_type(hdr);
+ /* We have to transmit across all bearers */
+ skb_queue_head_init(&_xmitq);
+ for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
+ if (!bb->dests[bearer_id])
+ continue;
- if (unlikely(msg_user(hdr) != LINK_PROTOCOL))
- return;
- if (mtyp == STATE_MSG) {
- tipc_bclink_update_link_state(n, last);
- return;
+ skb_queue_walk(xmitq, skb) {
+ _skb = pskb_copy_for_clone(skb, GFP_ATOMIC);
+ if (!_skb)
+ break;
+ __skb_queue_tail(&_xmitq, _skb);
+ }
+ tipc_bearer_bc_xmit(net, bearer_id, &_xmitq);
}
- /* Compatibility: older nodes don't know BCAST_PROTOCOL synchronization,
- * and transfer synch info in LINK_PROTOCOL messages.
- */
- if (tipc_node_is_up(n))
- return;
- if ((mtyp != RESET_MSG) && (mtyp != ACTIVATE_MSG))
- return;
- n->bclink.last_sent = last;
- n->bclink.last_in = last;
- n->bclink.oos_state = 0;
+ __skb_queue_purge(xmitq);
+ __skb_queue_purge(&_xmitq);
}
-/**
- * bclink_peek_nack - monitor retransmission requests sent by other nodes
- *
- * Delay any upcoming NACK by this node if another node has already
- * requested the first message this node is going to ask for.
- */
-static void bclink_peek_nack(struct net *net, struct tipc_msg *msg)
-{
- struct tipc_node *n_ptr = tipc_node_find(net, msg_destnode(msg));
-
- if (unlikely(!n_ptr))
- return;
-
- tipc_node_lock(n_ptr);
- if (n_ptr->bclink.recv_permitted &&
- (n_ptr->bclink.last_in != n_ptr->bclink.last_sent) &&
- (n_ptr->bclink.last_in == msg_bcgap_after(msg)))
- n_ptr->bclink.oos_state = 2;
- tipc_node_unlock(n_ptr);
- tipc_node_put(n_ptr);
-}
-
-/* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster
+/* tipc_bcast_xmit - deliver buffer chain to all nodes in cluster
* and to identified node local sockets
* @net: the applicable net namespace
* @list: chain of buffers containing message
* Consumes the buffer chain, except when returning -ELINKCONG
* Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE
*/
-int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list)
+int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_link *bcl = tn->bcl;
- struct tipc_bclink *bclink = tn->bclink;
+ struct tipc_link *l = tipc_bc_sndlink(net);
+ struct sk_buff_head xmitq, inputq, rcvq;
int rc = 0;
- int bc = 0;
- struct sk_buff *skb;
- struct sk_buff_head arrvq;
- struct sk_buff_head inputq;
- /* Prepare clone of message for local node */
- skb = tipc_msg_reassemble(list);
- if (unlikely(!skb))
- return -EHOSTUNREACH;
+ __skb_queue_head_init(&rcvq);
+ __skb_queue_head_init(&xmitq);
+ skb_queue_head_init(&inputq);
- /* Broadcast to all nodes */
- if (likely(bclink)) {
- tipc_bclink_lock(net);
- if (likely(bclink->bcast_nodes.count)) {
- rc = __tipc_link_xmit(net, bcl, list);
- if (likely(!rc)) {
- u32 len = skb_queue_len(&bcl->transmq);
-
- bclink_set_last_sent(net);
- bcl->stats.queue_sz_counts++;
- bcl->stats.accu_queue_sz += len;
- }
- bc = 1;
- }
- tipc_bclink_unlock(net);
- }
+ /* Prepare message clone for local node */
+ if (unlikely(!tipc_msg_reassemble(list, &rcvq)))
+ return -EHOSTUNREACH;
- if (unlikely(!bc))
- __skb_queue_purge(list);
+ tipc_bcast_lock(net);
+ if (tipc_link_bc_peers(l))
+ rc = tipc_link_xmit(l, list, &xmitq);
+ tipc_bcast_unlock(net);
+ /* Don't send to local node if adding to link failed */
if (unlikely(rc)) {
- kfree_skb(skb);
+ __skb_queue_purge(&rcvq);
return rc;
}
- /* Deliver message clone */
- __skb_queue_head_init(&arrvq);
- skb_queue_head_init(&inputq);
- __skb_queue_tail(&arrvq, skb);
- tipc_sk_mcast_rcv(net, &arrvq, &inputq);
- return rc;
-}
-/**
- * bclink_accept_pkt - accept an incoming, in-sequence broadcast packet
- *
- * Called with both sending node's lock and bclink_lock taken.
- */
-static void bclink_accept_pkt(struct tipc_node *node, u32 seqno)
-{
- struct tipc_net *tn = net_generic(node->net, tipc_net_id);
-
- bclink_update_last_sent(node, seqno);
- node->bclink.last_in = seqno;
- node->bclink.oos_state = 0;
- tn->bcl->stats.recv_info++;
-
- /*
- * Unicast an ACK periodically, ensuring that
- * all nodes in the cluster don't ACK at the same time
- */
- if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) {
- tipc_link_proto_xmit(node_active_link(node, node->addr),
- STATE_MSG, 0, 0, 0, 0);
- tn->bcl->stats.sent_acks++;
- }
+ /* Broadcast to all nodes, inluding local node */
+ tipc_bcbase_xmit(net, &xmitq);
+ tipc_sk_mcast_rcv(net, &rcvq, &inputq);
+ __skb_queue_purge(list);
+ return 0;
}
-/**
- * tipc_bclink_rcv - receive a broadcast packet, and deliver upwards
+/* tipc_bcast_rcv - receive a broadcast packet, and deliver to rcv link
*
* RCU is locked, no other locks set
*/
-void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
+int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_link *bcl = tn->bcl;
- struct tipc_msg *msg = buf_msg(buf);
- struct tipc_node *node;
- u32 next_in;
- u32 seqno;
- int deferred = 0;
- int pos = 0;
- struct sk_buff *iskb;
- struct sk_buff_head *arrvq, *inputq;
-
- /* Screen out unwanted broadcast messages */
- if (msg_mc_netid(msg) != tn->net_id)
- goto exit;
-
- node = tipc_node_find(net, msg_prevnode(msg));
- if (unlikely(!node))
- goto exit;
-
- tipc_node_lock(node);
- if (unlikely(!node->bclink.recv_permitted))
- goto unlock;
-
- /* Handle broadcast protocol message */
- if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) {
- if (msg_type(msg) != STATE_MSG)
- goto unlock;
- if (msg_destnode(msg) == tn->own_addr) {
- tipc_bclink_acknowledge(node, msg_bcast_ack(msg));
- tipc_bclink_lock(net);
- bcl->stats.recv_nacks++;
- tn->bclink->retransmit_to = node;
- bclink_retransmit_pkt(tn, msg_bcgap_after(msg),
- msg_bcgap_to(msg));
- tipc_bclink_unlock(net);
- tipc_node_unlock(node);
- } else {
- tipc_node_unlock(node);
- bclink_peek_nack(net, msg);
- }
- tipc_node_put(node);
- goto exit;
- }
-
- /* Handle in-sequence broadcast message */
- seqno = msg_seqno(msg);
- next_in = mod(node->bclink.last_in + 1);
- arrvq = &tn->bclink->arrvq;
- inputq = &tn->bclink->inputq;
-
- if (likely(seqno == next_in)) {
-receive:
- /* Deliver message to destination */
- if (likely(msg_isdata(msg))) {
- tipc_bclink_lock(net);
- bclink_accept_pkt(node, seqno);
- spin_lock_bh(&inputq->lock);
- __skb_queue_tail(arrvq, buf);
- spin_unlock_bh(&inputq->lock);
- node->action_flags |= TIPC_BCAST_MSG_EVT;
- tipc_bclink_unlock(net);
- tipc_node_unlock(node);
- } else if (msg_user(msg) == MSG_BUNDLER) {
- tipc_bclink_lock(net);
- bclink_accept_pkt(node, seqno);
- bcl->stats.recv_bundles++;
- bcl->stats.recv_bundled += msg_msgcnt(msg);
- pos = 0;
- while (tipc_msg_extract(buf, &iskb, &pos)) {
- spin_lock_bh(&inputq->lock);
- __skb_queue_tail(arrvq, iskb);
- spin_unlock_bh(&inputq->lock);
- }
- node->action_flags |= TIPC_BCAST_MSG_EVT;
- tipc_bclink_unlock(net);
- tipc_node_unlock(node);
- } else if (msg_user(msg) == MSG_FRAGMENTER) {
- tipc_bclink_lock(net);
- bclink_accept_pkt(node, seqno);
- tipc_buf_append(&node->bclink.reasm_buf, &buf);
- if (unlikely(!buf && !node->bclink.reasm_buf)) {
- tipc_bclink_unlock(net);
- goto unlock;
- }
- bcl->stats.recv_fragments++;
- if (buf) {
- bcl->stats.recv_fragmented++;
- msg = buf_msg(buf);
- tipc_bclink_unlock(net);
- goto receive;
- }
- tipc_bclink_unlock(net);
- tipc_node_unlock(node);
- } else {
- tipc_bclink_lock(net);
- bclink_accept_pkt(node, seqno);
- tipc_bclink_unlock(net);
- tipc_node_unlock(node);
- kfree_skb(buf);
- }
- buf = NULL;
-
- /* Determine new synchronization state */
- tipc_node_lock(node);
- if (unlikely(!tipc_node_is_up(node)))
- goto unlock;
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+ struct sk_buff_head xmitq;
+ int rc;
- if (node->bclink.last_in == node->bclink.last_sent)
- goto unlock;
+ __skb_queue_head_init(&xmitq);
- if (skb_queue_empty(&node->bclink.deferdq)) {
- node->bclink.oos_state = 1;
- goto unlock;
- }
-
- msg = buf_msg(skb_peek(&node->bclink.deferdq));
- seqno = msg_seqno(msg);
- next_in = mod(next_in + 1);
- if (seqno != next_in)
- goto unlock;
-
- /* Take in-sequence message from deferred queue & deliver it */
- buf = __skb_dequeue(&node->bclink.deferdq);
- goto receive;
- }
-
- /* Handle out-of-sequence broadcast message */
- if (less(next_in, seqno)) {
- deferred = tipc_link_defer_pkt(&node->bclink.deferdq,
- buf);
- bclink_update_last_sent(node, seqno);
- buf = NULL;
+ if (msg_mc_netid(hdr) != tipc_netid(net) || !tipc_link_is_up(l)) {
+ kfree_skb(skb);
+ return 0;
}
- tipc_bclink_lock(net);
-
- if (deferred)
- bcl->stats.deferred_recv++;
+ tipc_bcast_lock(net);
+ if (msg_user(hdr) == BCAST_PROTOCOL)
+ rc = tipc_link_bc_nack_rcv(l, skb, &xmitq);
else
- bcl->stats.duplicates++;
+ rc = tipc_link_rcv(l, skb, NULL);
+ tipc_bcast_unlock(net);
- tipc_bclink_unlock(net);
+ tipc_bcbase_xmit(net, &xmitq);
-unlock:
- tipc_node_unlock(node);
- tipc_node_put(node);
-exit:
- kfree_skb(buf);
-}
+ /* Any socket wakeup messages ? */
+ if (!skb_queue_empty(inputq))
+ tipc_sk_rcv(net, inputq);
-u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
-{
- return (n_ptr->bclink.recv_permitted &&
- (tipc_bclink_get_last_sent(n_ptr->net) != n_ptr->bclink.acked));
+ return rc;
}
-
-/**
- * tipc_bcbearer_send - send a packet through the broadcast pseudo-bearer
- *
- * Send packet over as many bearers as necessary to reach all nodes
- * that have joined the broadcast link.
+/* tipc_bcast_ack_rcv - receive and handle a broadcast acknowledge
*
- * Returns 0 (packet sent successfully) under all circumstances,
- * since the broadcast link's pseudo-bearer never blocks
+ * RCU is locked, no other locks set
*/
-static int tipc_bcbearer_send(struct net *net, struct sk_buff *buf,
- struct tipc_bearer *unused1,
- struct tipc_media_addr *unused2)
+void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked)
{
- int bp_index;
- struct tipc_msg *msg = buf_msg(buf);
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_bcbearer *bcbearer = tn->bcbearer;
- struct tipc_bclink *bclink = tn->bclink;
-
- /* Prepare broadcast link message for reliable transmission,
- * if first time trying to send it;
- * preparation is skipped for broadcast link protocol messages
- * since they are sent in an unreliable manner and don't need it
- */
- if (likely(!msg_non_seq(buf_msg(buf)))) {
- bcbuf_set_acks(buf, bclink->bcast_nodes.count);
- msg_set_non_seq(msg, 1);
- msg_set_mc_netid(msg, tn->net_id);
- tn->bcl->stats.sent_info++;
- if (WARN_ON(!bclink->bcast_nodes.count)) {
- dump_stack();
- return 0;
- }
- }
+ struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+ struct sk_buff_head xmitq;
- /* Send buffer over bearers until all targets reached */
- bcbearer->remains = bclink->bcast_nodes;
-
- for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
- struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary;
- struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary;
- struct tipc_bearer *bp[2] = {p, s};
- struct tipc_bearer *b = bp[msg_link_selector(msg)];
- struct sk_buff *tbuf;
-
- if (!p)
- break; /* No more bearers to try */
- if (!b)
- b = p;
- tipc_nmap_diff(&bcbearer->remains, &b->nodes,
- &bcbearer->remains_new);
- if (bcbearer->remains_new.count == bcbearer->remains.count)
- continue; /* Nothing added by bearer pair */
-
- if (bp_index == 0) {
- /* Use original buffer for first bearer */
- tipc_bearer_send(net, b->identity, buf, &b->bcast_addr);
- } else {
- /* Avoid concurrent buffer access */
- tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC);
- if (!tbuf)
- break;
- tipc_bearer_send(net, b->identity, tbuf,
- &b->bcast_addr);
- kfree_skb(tbuf); /* Bearer keeps a clone */
- }
- if (bcbearer->remains_new.count == 0)
- break; /* All targets reached */
+ __skb_queue_head_init(&xmitq);
- bcbearer->remains = bcbearer->remains_new;
- }
+ tipc_bcast_lock(net);
+ tipc_link_bc_ack_rcv(l, acked, &xmitq);
+ tipc_bcast_unlock(net);
- return 0;
+ tipc_bcbase_xmit(net, &xmitq);
+
+ /* Any socket wakeup messages ? */
+ if (!skb_queue_empty(inputq))
+ tipc_sk_rcv(net, inputq);
}
-/**
- * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer
+/* tipc_bcast_synch_rcv - check and update rcv link with peer's send state
+ *
+ * RCU is locked, no other locks set
*/
-void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr,
- u32 node, bool action)
+void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
+ struct tipc_msg *hdr)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_bcbearer *bcbearer = tn->bcbearer;
- struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp;
- struct tipc_bcbearer_pair *bp_curr;
- struct tipc_bearer *b;
- int b_index;
- int pri;
+ struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+ struct sk_buff_head xmitq;
- tipc_bclink_lock(net);
+ __skb_queue_head_init(&xmitq);
- if (action)
- tipc_nmap_add(nm_ptr, node);
- else
- tipc_nmap_remove(nm_ptr, node);
+ tipc_bcast_lock(net);
+ if (msg_type(hdr) == STATE_MSG) {
+ tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq);
+ tipc_link_bc_sync_rcv(l, hdr, &xmitq);
+ } else {
+ tipc_link_bc_init_rcv(l, hdr);
+ }
+ tipc_bcast_unlock(net);
- /* Group bearers by priority (can assume max of two per priority) */
- memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp));
+ tipc_bcbase_xmit(net, &xmitq);
- rcu_read_lock();
- for (b_index = 0; b_index < MAX_BEARERS; b_index++) {
- b = rcu_dereference_rtnl(tn->bearer_list[b_index]);
- if (!b || !b->nodes.count)
- continue;
-
- if (!bp_temp[b->priority].primary)
- bp_temp[b->priority].primary = b;
- else
- bp_temp[b->priority].secondary = b;
- }
- rcu_read_unlock();
+ /* Any socket wakeup messages ? */
+ if (!skb_queue_empty(inputq))
+ tipc_sk_rcv(net, inputq);
+}
- /* Create array of bearer pairs for broadcasting */
- bp_curr = bcbearer->bpairs;
- memset(bcbearer->bpairs, 0, sizeof(bcbearer->bpairs));
+/* tipc_bcast_add_peer - add a peer node to broadcast link and bearer
+ *
+ * RCU is locked, node lock is set
+ */
+void tipc_bcast_add_peer(struct net *net, struct tipc_link *uc_l,
+ struct sk_buff_head *xmitq)
+{
+ struct tipc_link *snd_l = tipc_bc_sndlink(net);
- for (pri = TIPC_MAX_LINK_PRI; pri >= 0; pri--) {
+ tipc_bcast_lock(net);
+ tipc_link_add_bc_peer(snd_l, uc_l, xmitq);
+ tipc_bcbase_select_primary(net);
+ tipc_bcast_unlock(net);
+}
- if (!bp_temp[pri].primary)
- continue;
+/* tipc_bcast_remove_peer - remove a peer node from broadcast link and bearer
+ *
+ * RCU is locked, node lock is set
+ */
+void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l)
+{
+ struct tipc_link *snd_l = tipc_bc_sndlink(net);
+ struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+ struct sk_buff_head xmitq;
- bp_curr->primary = bp_temp[pri].primary;
+ __skb_queue_head_init(&xmitq);
- if (bp_temp[pri].secondary) {
- if (tipc_nmap_equal(&bp_temp[pri].primary->nodes,
- &bp_temp[pri].secondary->nodes)) {
- bp_curr->secondary = bp_temp[pri].secondary;
- } else {
- bp_curr++;
- bp_curr->primary = bp_temp[pri].secondary;
- }
- }
+ tipc_bcast_lock(net);
+ tipc_link_remove_bc_peer(snd_l, rcv_l, &xmitq);
+ tipc_bcbase_select_primary(net);
+ tipc_bcast_unlock(net);
- bp_curr++;
- }
+ tipc_bcbase_xmit(net, &xmitq);
- tipc_bclink_unlock(net);
+ /* Any socket wakeup messages ? */
+ if (!skb_queue_empty(inputq))
+ tipc_sk_rcv(net, inputq);
}
static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb,
@@ -835,7 +395,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
if (!bcl)
return 0;
- tipc_bclink_lock(net);
+ tipc_bcast_lock(net);
hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
NLM_F_MULTI, TIPC_NL_LINK_GET);
@@ -870,7 +430,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
if (err)
goto attr_msg_full;
- tipc_bclink_unlock(net);
+ tipc_bcast_unlock(net);
nla_nest_end(msg->skb, attrs);
genlmsg_end(msg->skb, hdr);
@@ -881,7 +441,7 @@ prop_msg_full:
attr_msg_full:
nla_nest_cancel(msg->skb, attrs);
msg_full:
- tipc_bclink_unlock(net);
+ tipc_bcast_unlock(net);
genlmsg_cancel(msg->skb, hdr);
return -EMSGSIZE;
@@ -895,25 +455,25 @@ int tipc_bclink_reset_stats(struct net *net)
if (!bcl)
return -ENOPROTOOPT;
- tipc_bclink_lock(net);
+ tipc_bcast_lock(net);
memset(&bcl->stats, 0, sizeof(bcl->stats));
- tipc_bclink_unlock(net);
+ tipc_bcast_unlock(net);
return 0;
}
-int tipc_bclink_set_queue_limits(struct net *net, u32 limit)
+static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_link *bcl = tn->bcl;
+ struct tipc_link *l = tipc_bc_sndlink(net);
- if (!bcl)
+ if (!l)
return -ENOPROTOOPT;
- if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN))
+ if (limit < BCLINK_WIN_MIN)
+ limit = BCLINK_WIN_MIN;
+ if (limit > TIPC_MAX_LINK_WIN)
return -EINVAL;
-
- tipc_bclink_lock(net);
- tipc_link_set_queue_limits(bcl, limit);
- tipc_bclink_unlock(net);
+ tipc_bcast_lock(net);
+ tipc_link_set_queue_limits(l, limit);
+ tipc_bcast_unlock(net);
return 0;
}
@@ -935,123 +495,51 @@ int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[])
win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
- return tipc_bclink_set_queue_limits(net, win);
+ return tipc_bc_link_set_queue_limits(net, win);
}
-int tipc_bclink_init(struct net *net)
+int tipc_bcast_init(struct net *net)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_bcbearer *bcbearer;
- struct tipc_bclink *bclink;
- struct tipc_link *bcl;
-
- bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC);
- if (!bcbearer)
- return -ENOMEM;
-
- bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC);
- if (!bclink) {
- kfree(bcbearer);
- return -ENOMEM;
- }
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_bc_base *bb = NULL;
+ struct tipc_link *l = NULL;
- bcl = &bclink->link;
- bcbearer->bearer.media = &bcbearer->media;
- bcbearer->media.send_msg = tipc_bcbearer_send;
- sprintf(bcbearer->media.name, "tipc-broadcast");
-
- spin_lock_init(&bclink->lock);
- __skb_queue_head_init(&bcl->transmq);
- __skb_queue_head_init(&bcl->backlogq);
- __skb_queue_head_init(&bcl->deferdq);
- skb_queue_head_init(&bcl->wakeupq);
- bcl->snd_nxt = 1;
- spin_lock_init(&bclink->node.lock);
- __skb_queue_head_init(&bclink->arrvq);
- skb_queue_head_init(&bclink->inputq);
- bcl->owner = &bclink->node;
- bcl->owner->net = net;
- bcl->mtu = MAX_PKT_DEFAULT_MCAST;
- tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
- bcl->bearer_id = MAX_BEARERS;
- rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer);
- bcl->pmsg = (struct tipc_msg *)&bcl->proto_msg;
- msg_set_prevnode(bcl->pmsg, tn->own_addr);
- strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
- tn->bcbearer = bcbearer;
- tn->bclink = bclink;
- tn->bcl = bcl;
- return 0;
-}
+ bb = kzalloc(sizeof(*bb), GFP_ATOMIC);
+ if (!bb)
+ goto enomem;
+ tn->bcbase = bb;
+ spin_lock_init(&tipc_net(net)->bclock);
-void tipc_bclink_stop(struct net *net)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
-
- tipc_bclink_lock(net);
- tipc_link_purge_queues(tn->bcl);
- tipc_bclink_unlock(net);
-
- RCU_INIT_POINTER(tn->bearer_list[BCBEARER], NULL);
- synchronize_net();
- kfree(tn->bcbearer);
- kfree(tn->bclink);
+ if (!tipc_link_bc_create(net, 0, 0,
+ U16_MAX,
+ BCLINK_WIN_DEFAULT,
+ 0,
+ &bb->inputq,
+ NULL,
+ NULL,
+ &l))
+ goto enomem;
+ bb->link = l;
+ tn->bcl = l;
+ return 0;
+enomem:
+ kfree(bb);
+ kfree(l);
+ return -ENOMEM;
}
-/**
- * tipc_nmap_add - add a node to a node map
- */
-static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
+void tipc_bcast_reinit(struct net *net)
{
- int n = tipc_node(node);
- int w = n / WSIZE;
- u32 mask = (1 << (n % WSIZE));
+ struct tipc_bc_base *b = tipc_bc_base(net);
- if ((nm_ptr->map[w] & mask) == 0) {
- nm_ptr->count++;
- nm_ptr->map[w] |= mask;
- }
+ msg_set_prevnode(b->link->pmsg, tipc_own_addr(net));
}
-/**
- * tipc_nmap_remove - remove a node from a node map
- */
-static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
+void tipc_bcast_stop(struct net *net)
{
- int n = tipc_node(node);
- int w = n / WSIZE;
- u32 mask = (1 << (n % WSIZE));
-
- if ((nm_ptr->map[w] & mask) != 0) {
- nm_ptr->map[w] &= ~mask;
- nm_ptr->count--;
- }
-}
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
-/**
- * tipc_nmap_diff - find differences between node maps
- * @nm_a: input node map A
- * @nm_b: input node map B
- * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
- */
-static void tipc_nmap_diff(struct tipc_node_map *nm_a,
- struct tipc_node_map *nm_b,
- struct tipc_node_map *nm_diff)
-{
- int stop = ARRAY_SIZE(nm_a->map);
- int w;
- int b;
- u32 map;
-
- memset(nm_diff, 0, sizeof(*nm_diff));
- for (w = 0; w < stop; w++) {
- map = nm_a->map[w] ^ (nm_a->map[w] & nm_b->map[w]);
- nm_diff->map[w] = map;
- if (map != 0) {
- for (b = 0 ; b < WSIZE; b++) {
- if (map & (1 << b))
- nm_diff->count++;
- }
- }
- }
+ synchronize_net();
+ kfree(tn->bcbase);
+ kfree(tn->bcl);
}
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index d74c69b..2855b93 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -37,102 +37,44 @@
#ifndef _TIPC_BCAST_H
#define _TIPC_BCAST_H
-#include <linux/tipc_config.h>
-#include "link.h"
-#include "node.h"
+#include "core.h"
-/**
- * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link
- * @primary: pointer to primary bearer
- * @secondary: pointer to secondary bearer
- *
- * Bearers must have same priority and same set of reachable destinations
- * to be paired.
- */
-
-struct tipc_bcbearer_pair {
- struct tipc_bearer *primary;
- struct tipc_bearer *secondary;
-};
-
-#define BCBEARER MAX_BEARERS
-
-/**
- * struct tipc_bcbearer - bearer used by broadcast link
- * @bearer: (non-standard) broadcast bearer structure
- * @media: (non-standard) broadcast media structure
- * @bpairs: array of bearer pairs
- * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort()
- * @remains: temporary node map used by tipc_bcbearer_send()
- * @remains_new: temporary node map used tipc_bcbearer_send()
- *
- * Note: The fields labelled "temporary" are incorporated into the bearer
- * to avoid consuming potentially limited stack space through the use of
- * large local variables within multicast routines. Concurrent access is
- * prevented through use of the spinlock "bclink_lock".
- */
-struct tipc_bcbearer {
- struct tipc_bearer bearer;
- struct tipc_media media;
- struct tipc_bcbearer_pair bpairs[MAX_BEARERS];
- struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1];
- struct tipc_node_map remains;
- struct tipc_node_map remains_new;
-};
+struct tipc_node;
+struct tipc_msg;
+struct tipc_nl_msg;
+struct tipc_node_map;
-/**
- * struct tipc_bclink - link used for broadcast messages
- * @lock: spinlock governing access to structure
- * @link: (non-standard) broadcast link structure
- * @node: (non-standard) node structure representing b'cast link's peer node
- * @bcast_nodes: map of broadcast-capable nodes
- * @retransmit_to: node that most recently requested a retransmit
- *
- * Handles sequence numbering, fragmentation, bundling, etc.
- */
-struct tipc_bclink {
- spinlock_t lock;
- struct tipc_link link;
- struct tipc_node node;
- struct sk_buff_head arrvq;
- struct sk_buff_head inputq;
- struct tipc_node_map bcast_nodes;
- struct tipc_node *retransmit_to;
-};
+int tipc_bcast_init(struct net *net);
+void tipc_bcast_reinit(struct net *net);
+void tipc_bcast_stop(struct net *net);
+void tipc_bcast_add_peer(struct net *net, struct tipc_link *l,
+ struct sk_buff_head *xmitq);
+void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_bcl);
+void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id);
+void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id);
+int tipc_bcast_get_mtu(struct net *net);
+int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list);
+int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb);
+void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked);
+void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
+ struct tipc_msg *hdr);
+int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
+int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
+int tipc_bclink_reset_stats(struct net *net);
-struct tipc_node;
-extern const char tipc_bclink_name[];
+static inline void tipc_bcast_lock(struct net *net)
+{
+ spin_lock_bh(&tipc_net(net)->bclock);
+}
-/**
- * tipc_nmap_equal - test for equality of node maps
- */
-static inline int tipc_nmap_equal(struct tipc_node_map *nm_a,
- struct tipc_node_map *nm_b)
+static inline void tipc_bcast_unlock(struct net *net)
{
- return !memcmp(nm_a, nm_b, sizeof(*nm_a));
+ spin_unlock_bh(&tipc_net(net)->bclock);
}
-int tipc_bclink_init(struct net *net);
-void tipc_bclink_stop(struct net *net);
-void tipc_bclink_add_node(struct net *net, u32 addr);
-void tipc_bclink_remove_node(struct net *net, u32 addr);
-struct tipc_node *tipc_bclink_retransmit_to(struct net *tn);
-void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
-void tipc_bclink_rcv(struct net *net, struct sk_buff *buf);
-u32 tipc_bclink_get_last_sent(struct net *net);
-u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr);
-void tipc_bclink_update_link_state(struct tipc_node *node,
- u32 last_sent);
-int tipc_bclink_reset_stats(struct net *net);
-int tipc_bclink_set_queue_limits(struct net *net, u32 limit);
-void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr,
- u32 node, bool action);
-uint tipc_bclink_get_mtu(void);
-int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list);
-void tipc_bclink_wakeup_users(struct net *net);
-int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
-int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
-void tipc_bclink_input(struct net *net);
-void tipc_bclink_sync_state(struct tipc_node *n, struct tipc_msg *msg);
+static inline struct tipc_link *tipc_bc_sndlink(struct net *net)
+{
+ return tipc_net(net)->bcl;
+}
#endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 82b2786..648f2a6 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -193,10 +193,8 @@ void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
rcu_read_lock();
b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
- if (b_ptr) {
- tipc_bcbearer_sort(net, &b_ptr->nodes, dest, true);
+ if (b_ptr)
tipc_disc_add_dest(b_ptr->link_req);
- }
rcu_read_unlock();
}
@@ -207,10 +205,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
rcu_read_lock();
b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
- if (b_ptr) {
- tipc_bcbearer_sort(net, &b_ptr->nodes, dest, false);
+ if (b_ptr)
tipc_disc_remove_dest(b_ptr->link_req);
- }
rcu_read_unlock();
}
@@ -418,10 +414,9 @@ void tipc_disable_l2_media(struct tipc_bearer *b)
* @b_ptr: the bearer through which the packet is to be sent
* @dest: peer destination address
*/
-int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
+int tipc_l2_send_msg(struct net *net, struct sk_buff *skb,
struct tipc_bearer *b, struct tipc_media_addr *dest)
{
- struct sk_buff *clone;
struct net_device *dev;
int delta;
@@ -429,42 +424,48 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
if (!dev)
return 0;
- clone = skb_clone(buf, GFP_ATOMIC);
- if (!clone)
- return 0;
-
- delta = dev->hard_header_len - skb_headroom(buf);
+ delta = dev->hard_header_len - skb_headroom(skb);
if ((delta > 0) &&
- pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
- kfree_skb(clone);
+ pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
+ kfree_skb(skb);
return 0;
}
- skb_reset_network_header(clone);
- clone->dev = dev;
- clone->protocol = htons(ETH_P_TIPC);
- dev_hard_header(clone, dev, ETH_P_TIPC, dest->value,
- dev->dev_addr, clone->len);
- dev_queue_xmit(clone);
+ skb_reset_network_header(skb);
+ skb->dev = dev;
+ skb->protocol = htons(ETH_P_TIPC);
+ dev_hard_header(skb, dev, ETH_P_TIPC, dest->value,
+ dev->dev_addr, skb->len);
+ dev_queue_xmit(skb);
return 0;
}
-/* tipc_bearer_send- sends buffer to destination over bearer
- *
- * IMPORTANT:
- * The media send routine must not alter the buffer being passed in
- * as it may be needed for later retransmission!
+int tipc_bearer_mtu(struct net *net, u32 bearer_id)
+{
+ int mtu = 0;
+ struct tipc_bearer *b;
+
+ rcu_read_lock();
+ b = rcu_dereference_rtnl(tipc_net(net)->bearer_list[bearer_id]);
+ if (b)
+ mtu = b->mtu;
+ rcu_read_unlock();
+ return mtu;
+}
+
+/* tipc_bearer_xmit_skb - sends buffer to destination over bearer
*/
-void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf,
- struct tipc_media_addr *dest)
+void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
+ struct sk_buff *skb,
+ struct tipc_media_addr *dest)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_bearer *b_ptr;
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_bearer *b;
rcu_read_lock();
- b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
- if (likely(b_ptr))
- b_ptr->media->send_msg(net, buf, b_ptr, dest);
+ b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+ if (likely(b))
+ b->media->send_msg(net, skb, b, dest);
rcu_read_unlock();
}
@@ -487,8 +488,31 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id,
skb_queue_walk_safe(xmitq, skb, tmp) {
__skb_dequeue(xmitq);
b->media->send_msg(net, skb, b, dst);
- /* Until we remove cloning in tipc_l2_send_msg(): */
- kfree_skb(skb);
+ }
+ }
+ rcu_read_unlock();
+}
+
+/* tipc_bearer_bc_xmit() - broadcast buffers to all destinations
+ */
+void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
+ struct sk_buff_head *xmitq)
+{
+ struct tipc_net *tn = tipc_net(net);
+ int net_id = tn->net_id;
+ struct tipc_bearer *b;
+ struct sk_buff *skb, *tmp;
+ struct tipc_msg *hdr;
+
+ rcu_read_lock();
+ b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+ if (likely(b)) {
+ skb_queue_walk_safe(xmitq, skb, tmp) {
+ hdr = buf_msg(skb);
+ msg_set_non_seq(hdr, 1);
+ msg_set_mc_netid(hdr, net_id);
+ __skb_dequeue(xmitq);
+ b->media->send_msg(net, skb, b, &b->bcast_addr);
}
}
rcu_read_unlock();
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 6426f24..552185b 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -163,6 +163,7 @@ struct tipc_bearer {
u32 identity;
struct tipc_link_req *link_req;
char net_plane;
+ int node_cnt;
struct tipc_node_map nodes;
};
@@ -215,10 +216,14 @@ struct tipc_media *tipc_media_find(const char *name);
int tipc_bearer_setup(void);
void tipc_bearer_cleanup(void);
void tipc_bearer_stop(struct net *net);
-void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf,
- struct tipc_media_addr *dest);
+int tipc_bearer_mtu(struct net *net, u32 bearer_id);
+void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
+ struct sk_buff *skb,
+ struct tipc_media_addr *dest);
void tipc_bearer_xmit(struct net *net, u32 bearer_id,
struct sk_buff_head *xmitq,
struct tipc_media_addr *dst);
+void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
+ struct sk_buff_head *xmitq);
#endif /* _TIPC_BEARER_H */
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 005ba5e..03a8428 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -42,6 +42,7 @@
#include "bearer.h"
#include "net.h"
#include "socket.h"
+#include "bcast.h"
#include <linux/module.h>
@@ -71,8 +72,15 @@ static int __net_init tipc_init_net(struct net *net)
err = tipc_topsrv_start(net);
if (err)
goto out_subscr;
+
+ err = tipc_bcast_init(net);
+ if (err)
+ goto out_bclink;
+
return 0;
+out_bclink:
+ tipc_bcast_stop(net);
out_subscr:
tipc_nametbl_stop(net);
out_nametbl:
@@ -85,6 +93,7 @@ static void __net_exit tipc_exit_net(struct net *net)
{
tipc_topsrv_stop(net);
tipc_net_stop(net);
+ tipc_bcast_stop(net);
tipc_nametbl_stop(net);
tipc_sk_rht_destroy(net);
}
diff --git a/net/tipc/core.h b/net/tipc/core.h
index b96b41e..18e95a8 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -62,8 +62,7 @@
struct tipc_node;
struct tipc_bearer;
-struct tipc_bcbearer;
-struct tipc_bclink;
+struct tipc_bc_base;
struct tipc_link;
struct tipc_name_table;
struct tipc_server;
@@ -93,8 +92,8 @@ struct tipc_net {
struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
/* Broadcast link */
- struct tipc_bcbearer *bcbearer;
- struct tipc_bclink *bclink;
+ spinlock_t bclock;
+ struct tipc_bc_base *bcbase;
struct tipc_link *bcl;
/* Socket hash table */
@@ -114,6 +113,11 @@ static inline struct tipc_net *tipc_net(struct net *net)
return net_generic(net, tipc_net_id);
}
+static inline int tipc_netid(struct net *net)
+{
+ return tipc_net(net)->net_id;
+}
+
static inline u16 mod(u16 x)
{
return x & 0xffffu;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index d14e0a4..afe8c47 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -89,7 +89,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
MAX_H_SIZE, dest_domain);
msg_set_non_seq(msg, 1);
msg_set_node_sig(msg, tn->random);
- msg_set_node_capabilities(msg, 0);
+ msg_set_node_capabilities(msg, TIPC_NODE_CAPABILITIES);
msg_set_dest_domain(msg, dest_domain);
msg_set_bc_netid(msg, tn->net_id);
b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr);
@@ -167,11 +167,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
/* Send response, if necessary */
if (respond && (mtyp == DSC_REQ_MSG)) {
rskb = tipc_buf_acquire(MAX_H_SIZE);
- if (rskb) {
- tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer);
- tipc_bearer_send(net, bearer->identity, rskb, &maddr);
- kfree_skb(rskb);
- }
+ if (!rskb)
+ return;
+ tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer);
+ tipc_bearer_xmit_skb(net, bearer->identity, rskb, &maddr);
}
}
@@ -225,6 +224,7 @@ void tipc_disc_remove_dest(struct tipc_link_req *req)
static void disc_timeout(unsigned long data)
{
struct tipc_link_req *req = (struct tipc_link_req *)data;
+ struct sk_buff *skb;
int max_delay;
spin_lock_bh(&req->lock);
@@ -242,9 +242,9 @@ static void disc_timeout(unsigned long data)
* hold at fast polling rate if don't have any associated nodes,
* otherwise hold at slow polling rate
*/
- tipc_bearer_send(req->net, req->bearer_id, req->buf, &req->dest);
-
-
+ skb = skb_clone(req->buf, GFP_ATOMIC);
+ if (skb)
+ tipc_bearer_xmit_skb(req->net, req->bearer_id, skb, &req->dest);
req->timer_intv *= 2;
if (req->num_nodes)
max_delay = TIPC_LINK_REQ_SLOW;
@@ -271,6 +271,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
struct tipc_media_addr *dest)
{
struct tipc_link_req *req;
+ struct sk_buff *skb;
req = kmalloc(sizeof(*req), GFP_ATOMIC);
if (!req)
@@ -292,7 +293,9 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
setup_timer(&req->timer, disc_timeout, (unsigned long)req);
mod_timer(&req->timer, jiffies + req->timer_intv);
b_ptr->link_req = req;
- tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest);
+ skb = skb_clone(req->buf, GFP_ATOMIC);
+ if (skb)
+ tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest);
return 0;
}
@@ -316,6 +319,7 @@ void tipc_disc_delete(struct tipc_link_req *req)
void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr)
{
struct tipc_link_req *req = b_ptr->link_req;
+ struct sk_buff *skb;
spin_lock_bh(&req->lock);
tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr);
@@ -325,6 +329,8 @@ void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr)
req->num_nodes = 0;
req->timer_intv = TIPC_LINK_REQ_INIT;
mod_timer(&req->timer, jiffies + req->timer_intv);
- tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest);
+ skb = skb_clone(req->buf, GFP_ATOMIC);
+ if (skb)
+ tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest);
spin_unlock_bh(&req->lock);
}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index ff9b0b9..9efbdbd 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -50,6 +50,7 @@
*/
static const char *link_co_err = "Link tunneling error, ";
static const char *link_rst_msg = "Resetting link ";
+static const char tipc_bclink_name[] = "broadcast-link";
static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
[TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC },
@@ -75,6 +76,14 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
[TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }
};
+/* Send states for broadcast NACKs
+ */
+enum {
+ BC_NACK_SND_CONDITIONAL,
+ BC_NACK_SND_UNCONDITIONAL,
+ BC_NACK_SND_SUPPRESS,
+};
+
/*
* Interval between NACKs when packets arrive out of order
*/
@@ -110,7 +119,11 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
struct sk_buff_head *xmitq);
static void link_reset_statistics(struct tipc_link *l_ptr);
static void link_print(struct tipc_link *l_ptr, const char *str);
-static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf);
+static void tipc_link_build_nack_msg(struct tipc_link *l,
+ struct sk_buff_head *xmitq);
+static void tipc_link_build_bc_init_msg(struct tipc_link *l,
+ struct sk_buff_head *xmitq);
+static bool tipc_link_release_pkts(struct tipc_link *l, u16 to);
/*
* Simple non-static link routines (i.e. referenced outside this file)
@@ -150,11 +163,66 @@ bool tipc_link_is_blocked(struct tipc_link *l)
return l->state & (LINK_RESETTING | LINK_PEER_RESET | LINK_FAILINGOVER);
}
+static bool link_is_bc_sndlink(struct tipc_link *l)
+{
+ return !l->bc_sndlink;
+}
+
+static bool link_is_bc_rcvlink(struct tipc_link *l)
+{
+ return ((l->bc_rcvlink == l) && !link_is_bc_sndlink(l));
+}
+
int tipc_link_is_active(struct tipc_link *l)
{
- struct tipc_node *n = l->owner;
+ return l->active;
+}
+
+void tipc_link_set_active(struct tipc_link *l, bool active)
+{
+ l->active = active;
+}
+
+void tipc_link_add_bc_peer(struct tipc_link *snd_l,
+ struct tipc_link *uc_l,
+ struct sk_buff_head *xmitq)
+{
+ struct tipc_link *rcv_l = uc_l->bc_rcvlink;
- return (node_active_link(n, 0) == l) || (node_active_link(n, 1) == l);
+ snd_l->ackers++;
+ rcv_l->acked = snd_l->snd_nxt - 1;
+ tipc_link_build_bc_init_msg(uc_l, xmitq);
+}
+
+void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
+ struct tipc_link *rcv_l,
+ struct sk_buff_head *xmitq)
+{
+ u16 ack = snd_l->snd_nxt - 1;
+
+ snd_l->ackers--;
+ tipc_link_bc_ack_rcv(rcv_l, ack, xmitq);
+ tipc_link_reset(rcv_l);
+ rcv_l->state = LINK_RESET;
+ if (!snd_l->ackers) {
+ tipc_link_reset(snd_l);
+ __skb_queue_purge(xmitq);
+ }
+}
+
+int tipc_link_bc_peers(struct tipc_link *l)
+{
+ return l->ackers;
+}
+
+void tipc_link_set_mtu(struct tipc_link *l, int mtu)
+{
+ l->mtu = mtu;
+}
+
+int tipc_link_mtu(struct tipc_link *l)
+{
+ return l->mtu;
}
static u32 link_own_addr(struct tipc_link *l)
@@ -165,57 +233,72 @@ static u32 link_own_addr(struct tipc_link *l)
/**
* tipc_link_create - create a new link
* @n: pointer to associated node
- * @b: pointer to associated bearer
+ * @if_name: associated interface name
+ * @bearer_id: id (index) of associated bearer
+ * @tolerance: link tolerance to be used by link
+ * @net_plane: network plane (A,B,c..) this link belongs to
+ * @mtu: mtu to be advertised by link
+ * @priority: priority to be used by link
+ * @window: send window to be used by link
+ * @session: session to be used by link
* @ownnode: identity of own node
- * @peer: identity of peer node
- * @maddr: media address to be used
+ * @peer: node id of peer node
+ * @peer_caps: bitmap describing peer node capabilities
+ * @bc_sndlink: the namespace global link used for broadcast sending
+ * @bc_rcvlink: the peer specific link used for broadcast reception
* @inputq: queue to put messages ready for delivery
* @namedq: queue to put binding table update messages ready for delivery
* @link: return value, pointer to put the created link
*
* Returns true if link was created, otherwise false
*/
-bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session,
- u32 ownnode, u32 peer, struct tipc_media_addr *maddr,
- struct sk_buff_head *inputq, struct sk_buff_head *namedq,
+bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
+ int tolerance, char net_plane, u32 mtu, int priority,
+ int window, u32 session, u32 ownnode, u32 peer,
+ u16 peer_caps,
+ struct tipc_link *bc_sndlink,
+ struct tipc_link *bc_rcvlink,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *namedq,
struct tipc_link **link)
{
struct tipc_link *l;
struct tipc_msg *hdr;
- char *if_name;
l = kzalloc(sizeof(*l), GFP_ATOMIC);
if (!l)
return false;
*link = l;
+ l->pmsg = (struct tipc_msg *)&l->proto_msg;
+ hdr = l->pmsg;
+ tipc_msg_init(ownnode, hdr, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, peer);
+ msg_set_size(hdr, sizeof(l->proto_msg));
+ msg_set_session(hdr, session);
+ msg_set_bearer_id(hdr, l->bearer_id);
/* Note: peer i/f name is completed by reset/activate message */
- if_name = strchr(b->name, ':') + 1;
sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode),
if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
+ strcpy((char *)msg_data(hdr), if_name);
l->addr = peer;
- l->media_addr = maddr;
- l->owner = n;
+ l->peer_caps = peer_caps;
+ l->net = net;
l->peer_session = WILDCARD_SESSION;
- l->bearer_id = b->identity;
- l->tolerance = b->tolerance;
- l->net_plane = b->net_plane;
- l->advertised_mtu = b->mtu;
- l->mtu = b->mtu;
- l->priority = b->priority;
- tipc_link_set_queue_limits(l, b->window);
+ l->bearer_id = bearer_id;
+ l->tolerance = tolerance;
+ l->net_plane = net_plane;
+ l->advertised_mtu = mtu;
+ l->mtu = mtu;
+ l->priority = priority;
+ tipc_link_set_queue_limits(l, window);
+ l->ackers = 1;
+ l->bc_sndlink = bc_sndlink;
+ l->bc_rcvlink = bc_rcvlink;
l->inputq = inputq;
l->namedq = namedq;
l->state = LINK_RESETTING;
- l->pmsg = (struct tipc_msg *)&l->proto_msg;
- hdr = l->pmsg;
- tipc_msg_init(ownnode, hdr, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, peer);
- msg_set_size(hdr, sizeof(l->proto_msg));
- msg_set_session(hdr, session);
- msg_set_bearer_id(hdr, l->bearer_id);
- strcpy((char *)msg_data(hdr), if_name);
__skb_queue_head_init(&l->transmq);
__skb_queue_head_init(&l->backlogq);
__skb_queue_head_init(&l->deferdq);
@@ -224,27 +307,43 @@ bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session,
return true;
}
-/* tipc_link_build_bcast_sync_msg() - synchronize broadcast link endpoints.
+/**
+ * tipc_link_bc_create - create new link to be used for broadcast
+ * @n: pointer to associated node
+ * @mtu: mtu to be used
+ * @window: send window to be used
+ * @inputq: queue to put messages ready for delivery
+ * @namedq: queue to put binding table update messages ready for delivery
+ * @link: return value, pointer to put the created link
*
- * Give a newly added peer node the sequence number where it should
- * start receiving and acking broadcast packets.
+ * Returns true if link was created, otherwise false
*/
-void tipc_link_build_bcast_sync_msg(struct tipc_link *l,
- struct sk_buff_head *xmitq)
+bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
+ int mtu, int window, u16 peer_caps,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *namedq,
+ struct tipc_link *bc_sndlink,
+ struct tipc_link **link)
{
- struct sk_buff *skb;
- struct sk_buff_head list;
- u16 last_sent;
+ struct tipc_link *l;
- skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE,
- 0, l->addr, link_own_addr(l), 0, 0, 0);
- if (!skb)
- return;
- last_sent = tipc_bclink_get_last_sent(l->owner->net);
- msg_set_last_bcast(buf_msg(skb), last_sent);
- __skb_queue_head_init(&list);
- __skb_queue_tail(&list, skb);
- tipc_link_xmit(l, &list, xmitq);
+ if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, window,
+ 0, ownnode, peer, peer_caps, bc_sndlink,
+ NULL, inputq, namedq, link))
+ return false;
+
+ l = *link;
+ strcpy(l->name, tipc_bclink_name);
+ tipc_link_reset(l);
+ l->state = LINK_RESET;
+ l->ackers = 0;
+ l->bc_rcvlink = l;
+
+ /* Broadcast send link is always up */
+ if (link_is_bc_sndlink(l))
+ l->state = LINK_ESTABLISHED;
+
+ return true;
}
/**
@@ -451,12 +550,17 @@ static void link_profile_stats(struct tipc_link *l)
/* tipc_link_timeout - perform periodic task as instructed from node timeout
*/
+/* tipc_link_timeout - perform periodic task as instructed from node timeout
+ */
int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
{
int rc = 0;
int mtyp = STATE_MSG;
bool xmit = false;
bool prb = false;
+ u16 bc_snt = l->bc_sndlink->snd_nxt - 1;
+ u16 bc_acked = l->bc_rcvlink->acked;
+ bool bc_up = link_is_up(l->bc_rcvlink);
link_profile_stats(l);
@@ -464,7 +568,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
case LINK_ESTABLISHED:
case LINK_SYNCHING:
if (!l->silent_intv_cnt) {
- if (tipc_bclink_acks_missing(l->owner))
+ if (bc_up && (bc_acked != bc_snt))
xmit = true;
} else if (l->silent_intv_cnt <= l->abort_limit) {
xmit = true;
@@ -555,38 +659,6 @@ void link_prepare_wakeup(struct tipc_link *l)
}
}
-/**
- * tipc_link_reset_fragments - purge link's inbound message fragments queue
- * @l_ptr: pointer to link
- */
-void tipc_link_reset_fragments(struct tipc_link *l_ptr)
-{
- kfree_skb(l_ptr->reasm_buf);
- l_ptr->reasm_buf = NULL;
-}
-
-void tipc_link_purge_backlog(struct tipc_link *l)
-{
- __skb_queue_purge(&l->backlogq);
- l->backlog[TIPC_LOW_IMPORTANCE].len = 0;
- l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0;
- l->backlog[TIPC_HIGH_IMPORTANCE].len = 0;
- l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0;
- l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0;
-}
-
-/**
- * tipc_link_purge_queues - purge all pkt queues associated with link
- * @l_ptr: pointer to link
- */
-void tipc_link_purge_queues(struct tipc_link *l_ptr)
-{
- __skb_queue_purge(&l_ptr->deferdq);
- __skb_queue_purge(&l_ptr->transmq);
- tipc_link_purge_backlog(l_ptr);
- tipc_link_reset_fragments(l_ptr);
-}
-
void tipc_link_reset(struct tipc_link *l)
{
/* Link is down, accept any session */
@@ -598,12 +670,16 @@ void tipc_link_reset(struct tipc_link *l)
/* Prepare for renewed mtu size negotiation */
l->mtu = l->advertised_mtu;
- /* Clean up all queues: */
+ /* Clean up all queues and counters: */
__skb_queue_purge(&l->transmq);
__skb_queue_purge(&l->deferdq);
skb_queue_splice_init(&l->wakeupq, l->inputq);
-
- tipc_link_purge_backlog(l);
+ __skb_queue_purge(&l->backlogq);
+ l->backlog[TIPC_LOW_IMPORTANCE].len = 0;
+ l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0;
+ l->backlog[TIPC_HIGH_IMPORTANCE].len = 0;
+ l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0;
+ l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0;
kfree_skb(l->reasm_buf);
kfree_skb(l->failover_reasm_skb);
l->reasm_buf = NULL;
@@ -611,81 +687,15 @@ void tipc_link_reset(struct tipc_link *l)
l->rcv_unacked = 0;
l->snd_nxt = 1;
l->rcv_nxt = 1;
+ l->acked = 0;
l->silent_intv_cnt = 0;
l->stats.recv_info = 0;
l->stale_count = 0;
+ l->bc_peer_is_up = false;
link_reset_statistics(l);
}
/**
- * __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked
- * @link: link to use
- * @list: chain of buffers containing message
- *
- * Consumes the buffer chain, except when returning an error code,
- * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
- * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
- */
-int __tipc_link_xmit(struct net *net, struct tipc_link *link,
- struct sk_buff_head *list)
-{
- struct tipc_msg *msg = buf_msg(skb_peek(list));
- unsigned int maxwin = link->window;
- unsigned int i, imp = msg_importance(msg);
- uint mtu = link->mtu;
- u16 ack = mod(link->rcv_nxt - 1);
- u16 seqno = link->snd_nxt;
- u16 bc_last_in = link->owner->bclink.last_in;
- struct tipc_media_addr *addr = link->media_addr;
- struct sk_buff_head *transmq = &link->transmq;
- struct sk_buff_head *backlogq = &link->backlogq;
- struct sk_buff *skb, *bskb;
-
- /* Match msg importance against this and all higher backlog limits: */
- for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
- if (unlikely(link->backlog[i].len >= link->backlog[i].limit))
- return link_schedule_user(link, list);
- }
- if (unlikely(msg_size(msg) > mtu))
- return -EMSGSIZE;
-
- /* Prepare each packet for sending, and add to relevant queue: */
- while (skb_queue_len(list)) {
- skb = skb_peek(list);
- msg = buf_msg(skb);
- msg_set_seqno(msg, seqno);
- msg_set_ack(msg, ack);
- msg_set_bcast_ack(msg, bc_last_in);
-
- if (likely(skb_queue_len(transmq) < maxwin)) {
- __skb_dequeue(list);
- __skb_queue_tail(transmq, skb);
- tipc_bearer_send(net, link->bearer_id, skb, addr);
- link->rcv_unacked = 0;
- seqno++;
- continue;
- }
- if (tipc_msg_bundle(skb_peek_tail(backlogq), msg, mtu)) {
- kfree_skb(__skb_dequeue(list));
- link->stats.sent_bundled++;
- continue;
- }
- if (tipc_msg_make_bundle(&bskb, msg, mtu, link->addr)) {
- kfree_skb(__skb_dequeue(list));
- __skb_queue_tail(backlogq, bskb);
- link->backlog[msg_importance(buf_msg(bskb))].len++;
- link->stats.sent_bundled++;
- link->stats.sent_bundles++;
- continue;
- }
- link->backlog[imp].len += skb_queue_len(list);
- skb_queue_splice_tail_init(list, backlogq);
- }
- link->snd_nxt = seqno;
- return 0;
-}
-
-/**
* tipc_link_xmit(): enqueue buffer list according to queue situation
* @link: link to use
* @list: chain of buffers containing message
@@ -705,7 +715,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
unsigned int mtu = l->mtu;
u16 ack = l->rcv_nxt - 1;
u16 seqno = l->snd_nxt;
- u16 bc_last_in = l->owner->bclink.last_in;
+ u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
struct sk_buff_head *transmq = &l->transmq;
struct sk_buff_head *backlogq = &l->backlogq;
struct sk_buff *skb, *_skb, *bskb;
@@ -724,7 +734,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
hdr = buf_msg(skb);
msg_set_seqno(hdr, seqno);
msg_set_ack(hdr, ack);
- msg_set_bcast_ack(hdr, bc_last_in);
+ msg_set_bcast_ack(hdr, bc_ack);
if (likely(skb_queue_len(transmq) < maxwin)) {
_skb = skb_clone(skb, GFP_ATOMIC);
@@ -733,6 +743,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
__skb_dequeue(list);
__skb_queue_tail(transmq, skb);
__skb_queue_tail(xmitq, _skb);
+ TIPC_SKB_CB(skb)->ackers = l->ackers;
l->rcv_unacked = 0;
seqno++;
continue;
@@ -757,62 +768,13 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
return 0;
}
-/*
- * tipc_link_sync_rcv - synchronize broadcast link endpoints.
- * Receive the sequence number where we should start receiving and
- * acking broadcast packets from a newly added peer node, and open
- * up for reception of such packets.
- *
- * Called with node locked
- */
-static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf)
-{
- struct tipc_msg *msg = buf_msg(buf);
-
- n->bclink.last_sent = n->bclink.last_in = msg_last_bcast(msg);
- n->bclink.recv_permitted = true;
- kfree_skb(buf);
-}
-
-/*
- * tipc_link_push_packets - push unsent packets to bearer
- *
- * Push out the unsent messages of a link where congestion
- * has abated. Node is locked.
- *
- * Called with node locked
- */
-void tipc_link_push_packets(struct tipc_link *link)
-{
- struct sk_buff *skb;
- struct tipc_msg *msg;
- u16 seqno = link->snd_nxt;
- u16 ack = mod(link->rcv_nxt - 1);
-
- while (skb_queue_len(&link->transmq) < link->window) {
- skb = __skb_dequeue(&link->backlogq);
- if (!skb)
- break;
- msg = buf_msg(skb);
- link->backlog[msg_importance(msg)].len--;
- msg_set_ack(msg, ack);
- msg_set_seqno(msg, seqno);
- seqno = mod(seqno + 1);
- msg_set_bcast_ack(msg, link->owner->bclink.last_in);
- link->rcv_unacked = 0;
- __skb_queue_tail(&link->transmq, skb);
- tipc_bearer_send(link->owner->net, link->bearer_id,
- skb, link->media_addr);
- }
- link->snd_nxt = seqno;
-}
-
void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq)
{
struct sk_buff *skb, *_skb;
struct tipc_msg *hdr;
u16 seqno = l->snd_nxt;
u16 ack = l->rcv_nxt - 1;
+ u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
while (skb_queue_len(&l->transmq) < l->window) {
skb = skb_peek(&l->backlogq);
@@ -826,96 +788,35 @@ void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq)
l->backlog[msg_importance(hdr)].len--;
__skb_queue_tail(&l->transmq, skb);
__skb_queue_tail(xmitq, _skb);
- msg_set_ack(hdr, ack);
+ TIPC_SKB_CB(skb)->ackers = l->ackers;
msg_set_seqno(hdr, seqno);
- msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
+ msg_set_ack(hdr, ack);
+ msg_set_bcast_ack(hdr, bc_ack);
l->rcv_unacked = 0;
seqno++;
}
l->snd_nxt = seqno;
}
-static void link_retransmit_failure(struct tipc_link *l_ptr,
- struct sk_buff *buf)
-{
- struct tipc_msg *msg = buf_msg(buf);
- struct net *net = l_ptr->owner->net;
-
- pr_warn("Retransmission failure on link <%s>\n", l_ptr->name);
-
- if (l_ptr->addr) {
- /* Handle failure on standard link */
- link_print(l_ptr, "Resetting link ");
- pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
- msg_user(msg), msg_type(msg), msg_size(msg),
- msg_errcode(msg));
- pr_info("sqno %u, prev: %x, src: %x\n",
- msg_seqno(msg), msg_prevnode(msg), msg_orignode(msg));
- } else {
- /* Handle failure on broadcast link */
- struct tipc_node *n_ptr;
- char addr_string[16];
-
- pr_info("Msg seq number: %u, ", msg_seqno(msg));
- pr_cont("Outstanding acks: %lu\n",
- (unsigned long) TIPC_SKB_CB(buf)->handle);
-
- n_ptr = tipc_bclink_retransmit_to(net);
-
- tipc_addr_string_fill(addr_string, n_ptr->addr);
- pr_info("Broadcast link info for %s\n", addr_string);
- pr_info("Reception permitted: %d, Acked: %u\n",
- n_ptr->bclink.recv_permitted,
- n_ptr->bclink.acked);
- pr_info("Last in: %u, Oos state: %u, Last sent: %u\n",
- n_ptr->bclink.last_in,
- n_ptr->bclink.oos_state,
- n_ptr->bclink.last_sent);
-
- n_ptr->action_flags |= TIPC_BCAST_RESET;
- l_ptr->stale_count = 0;
- }
-}
-
-void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb,
- u32 retransmits)
+static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb)
{
- struct tipc_msg *msg;
-
- if (!skb)
- return;
-
- msg = buf_msg(skb);
-
- /* Detect repeated retransmit failures */
- if (l_ptr->last_retransm == msg_seqno(msg)) {
- if (++l_ptr->stale_count > 100) {
- link_retransmit_failure(l_ptr, skb);
- return;
- }
- } else {
- l_ptr->last_retransm = msg_seqno(msg);
- l_ptr->stale_count = 1;
- }
+ struct tipc_msg *hdr = buf_msg(skb);
- skb_queue_walk_from(&l_ptr->transmq, skb) {
- if (!retransmits)
- break;
- msg = buf_msg(skb);
- msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1));
- msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
- tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb,
- l_ptr->media_addr);
- retransmits--;
- l_ptr->stats.retransmitted++;
- }
+ pr_warn("Retransmission failure on link <%s>\n", l->name);
+ link_print(l, "Resetting link ");
+ pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
+ msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr));
+ pr_info("sqno %u, prev: %x, src: %x\n",
+ msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
}
-static int tipc_link_retransm(struct tipc_link *l, int retransm,
- struct sk_buff_head *xmitq)
+int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to,
+ struct sk_buff_head *xmitq)
{
struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
struct tipc_msg *hdr;
+ u16 ack = l->rcv_nxt - 1;
+ u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
if (!skb)
return 0;
@@ -928,19 +829,25 @@ static int tipc_link_retransm(struct tipc_link *l, int retransm,
link_retransmit_failure(l, skb);
return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
}
+
+ /* Move forward to where retransmission should start */
skb_queue_walk(&l->transmq, skb) {
- if (!retransm)
- return 0;
+ if (!less(buf_seqno(skb), from))
+ break;
+ }
+
+ skb_queue_walk_from(&l->transmq, skb) {
+ if (more(buf_seqno(skb), to))
+ break;
hdr = buf_msg(skb);
_skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
if (!_skb)
return 0;
hdr = buf_msg(_skb);
- msg_set_ack(hdr, l->rcv_nxt - 1);
- msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
+ msg_set_ack(hdr, ack);
+ msg_set_bcast_ack(hdr, bc_ack);
_skb->priority = TC_PRIO_CONTROL;
__skb_queue_tail(xmitq, _skb);
- retransm--;
l->stats.retransmitted++;
}
return 0;
@@ -951,11 +858,9 @@ static int tipc_link_retransm(struct tipc_link *l, int retransm,
* Consumes buffer if message is of right type
* Node lock must be held
*/
-static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb,
+static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *inputq)
{
- struct tipc_node *node = link->owner;
-
switch (msg_user(buf_msg(skb))) {
case TIPC_LOW_IMPORTANCE:
case TIPC_MEDIUM_IMPORTANCE:
@@ -965,8 +870,8 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb,
skb_queue_tail(inputq, skb);
return true;
case NAME_DISTRIBUTOR:
- node->bclink.recv_permitted = true;
- skb_queue_tail(link->namedq, skb);
+ l->bc_rcvlink->state = LINK_ESTABLISHED;
+ skb_queue_tail(l->namedq, skb);
return true;
case MSG_BUNDLER:
case TUNNEL_PROTOCOL:
@@ -987,7 +892,6 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb,
static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *inputq)
{
- struct tipc_node *node = l->owner;
struct tipc_msg *hdr = buf_msg(skb);
struct sk_buff **reasm_skb = &l->reasm_buf;
struct sk_buff *iskb;
@@ -1028,13 +932,15 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb,
if (tipc_buf_append(reasm_skb, &skb)) {
l->stats.recv_fragmented++;
tipc_data_input(l, skb, inputq);
- } else if (!*reasm_skb) {
+ } else if (!*reasm_skb && !link_is_bc_rcvlink(l)) {
+ pr_warn_ratelimited("Unable to build fragment list\n");
return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
}
return 0;
} else if (usr == BCAST_PROTOCOL) {
- tipc_link_sync_rcv(node, skb);
- return 0;
+ tipc_bcast_lock(l->net);
+ tipc_link_bc_init_rcv(l->bc_rcvlink, hdr);
+ tipc_bcast_unlock(l->net);
}
drop:
kfree_skb(skb);
@@ -1057,12 +963,28 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked)
}
/* tipc_link_build_ack_msg: prepare link acknowledge message for transmission
+ *
+ * Note that sending of broadcast ack is coordinated among nodes, to reduce
+ * risk of ack storms towards the sender
*/
-void tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
+int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
{
+ if (!l)
+ return 0;
+
+ /* Broadcast ACK must be sent via a unicast link => defer to caller */
+ if (link_is_bc_rcvlink(l)) {
+ if (((l->rcv_nxt ^ link_own_addr(l)) & 0xf) != 0xf)
+ return 0;
+ l->rcv_unacked = 0;
+ return TIPC_LINK_SND_BC_ACK;
+ }
+
+ /* Unicast ACK */
l->rcv_unacked = 0;
l->stats.sent_acks++;
tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+ return 0;
}
/* tipc_link_build_reset_msg: prepare link RESET or ACTIVATE message
@@ -1084,6 +1006,9 @@ static void tipc_link_build_nack_msg(struct tipc_link *l,
{
u32 def_cnt = ++l->stats.deferred_recv;
+ if (link_is_bc_rcvlink(l))
+ return;
+
if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
}
@@ -1144,12 +1069,11 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
l->rcv_nxt++;
l->stats.recv_info++;
if (!tipc_data_input(l, skb, l->inputq))
- rc = tipc_link_input(l, skb, l->inputq);
- if (unlikely(rc))
- break;
+ rc |= tipc_link_input(l, skb, l->inputq);
if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN))
- tipc_link_build_ack_msg(l, xmitq);
-
+ rc |= tipc_link_build_ack_msg(l, xmitq);
+ if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK))
+ break;
} while ((skb = __skb_dequeue(defq)));
return rc;
@@ -1158,45 +1082,6 @@ drop:
return rc;
}
-/**
- * tipc_link_defer_pkt - Add out-of-sequence message to deferred reception queue
- *
- * Returns increase in queue length (i.e. 0 or 1)
- */
-u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb)
-{
- struct sk_buff *skb1;
- u16 seq_no = buf_seqno(skb);
-
- /* Empty queue ? */
- if (skb_queue_empty(list)) {
- __skb_queue_tail(list, skb);
- return 1;
- }
-
- /* Last ? */
- if (less(buf_seqno(skb_peek_tail(list)), seq_no)) {
- __skb_queue_tail(list, skb);
- return 1;
- }
-
- /* Locate insertion point in queue, then insert; discard if duplicate */
- skb_queue_walk(list, skb1) {
- u16 curr_seqno = buf_seqno(skb1);
-
- if (seq_no == curr_seqno) {
- kfree_skb(skb);
- return 0;
- }
-
- if (less(seq_no, curr_seqno))
- break;
- }
-
- __skb_queue_before(list, skb1, skb);
- return 1;
-}
-
/*
* Send protocol message to the other endpoint.
*/
@@ -1212,23 +1097,17 @@ void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg,
skb = __skb_dequeue(&xmitq);
if (!skb)
return;
- tipc_bearer_send(l->owner->net, l->bearer_id, skb, l->media_addr);
+ tipc_bearer_xmit_skb(l->net, l->bearer_id, skb, l->media_addr);
l->rcv_unacked = 0;
- kfree_skb(skb);
}
-/* tipc_link_build_proto_msg: prepare link protocol message for transmission
- */
static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
u16 rcvgap, int tolerance, int priority,
struct sk_buff_head *xmitq)
{
struct sk_buff *skb = NULL;
struct tipc_msg *hdr = l->pmsg;
- u16 snd_nxt = l->snd_nxt;
- u16 rcv_nxt = l->rcv_nxt;
- u16 rcv_last = rcv_nxt - 1;
- int node_up = l->owner->bclink.recv_permitted;
+ bool node_up = link_is_up(l->bc_rcvlink);
/* Don't send protocol message during reset or link failover */
if (tipc_link_is_blocked(l))
@@ -1236,33 +1115,34 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
msg_set_type(hdr, mtyp);
msg_set_net_plane(hdr, l->net_plane);
- msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
- msg_set_last_bcast(hdr, tipc_bclink_get_last_sent(l->owner->net));
+ msg_set_next_sent(hdr, l->snd_nxt);
+ msg_set_ack(hdr, l->rcv_nxt - 1);
+ msg_set_bcast_ack(hdr, l->bc_rcvlink->rcv_nxt - 1);
+ msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1);
msg_set_link_tolerance(hdr, tolerance);
msg_set_linkprio(hdr, priority);
msg_set_redundant_link(hdr, node_up);
msg_set_seq_gap(hdr, 0);
/* Compatibility: created msg must not be in sequence with pkt flow */
- msg_set_seqno(hdr, snd_nxt + U16_MAX / 2);
+ msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2);
if (mtyp == STATE_MSG) {
if (!tipc_link_is_up(l))
return;
- msg_set_next_sent(hdr, snd_nxt);
/* Override rcvgap if there are packets in deferred queue */
if (!skb_queue_empty(&l->deferdq))
- rcvgap = buf_seqno(skb_peek(&l->deferdq)) - rcv_nxt;
+ rcvgap = buf_seqno(skb_peek(&l->deferdq)) - l->rcv_nxt;
if (rcvgap) {
msg_set_seq_gap(hdr, rcvgap);
l->stats.sent_nacks++;
}
- msg_set_ack(hdr, rcv_last);
msg_set_probe(hdr, probe);
if (probe)
l->stats.sent_probes++;
l->stats.sent_states++;
+ l->rcv_unacked = 0;
} else {
/* RESET_MSG or ACTIVATE_MSG */
msg_set_max_pkt(hdr, l->advertised_mtu);
@@ -1354,7 +1234,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
{
struct tipc_msg *hdr = buf_msg(skb);
u16 rcvgap = 0;
- u16 nacked_gap = msg_seq_gap(hdr);
+ u16 ack = msg_ack(hdr);
+ u16 gap = msg_seq_gap(hdr);
u16 peers_snd_nxt = msg_next_sent(hdr);
u16 peers_tol = msg_link_tolerance(hdr);
u16 peers_prio = msg_linkprio(hdr);
@@ -1363,7 +1244,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
char *if_name;
int rc = 0;
- if (tipc_link_is_blocked(l))
+ if (tipc_link_is_blocked(l) || !xmitq)
goto exit;
if (link_own_addr(l) > msg_prevnode(hdr))
@@ -1433,11 +1314,11 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (rcvgap || (msg_probe(hdr)))
tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap,
0, 0, xmitq);
- tipc_link_release_pkts(l, msg_ack(hdr));
+ tipc_link_release_pkts(l, ack);
/* If NACK, retransmit will now start at right position */
- if (nacked_gap) {
- rc = tipc_link_retransm(l, nacked_gap, xmitq);
+ if (gap) {
+ rc = tipc_link_retrans(l, ack + 1, ack + gap, xmitq);
l->stats.recv_nacks++;
}
@@ -1450,6 +1331,188 @@ exit:
return rc;
}
+/* tipc_link_build_bc_proto_msg() - create broadcast protocol message
+ */
+static bool tipc_link_build_bc_proto_msg(struct tipc_link *l, bool bcast,
+ u16 peers_snd_nxt,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb;
+ struct tipc_msg *hdr;
+ struct sk_buff *dfrd_skb = skb_peek(&l->deferdq);
+ u16 ack = l->rcv_nxt - 1;
+ u16 gap_to = peers_snd_nxt - 1;
+
+ skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE,
+ 0, l->addr, link_own_addr(l), 0, 0, 0);
+ if (!skb)
+ return false;
+ hdr = buf_msg(skb);
+ msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1);
+ msg_set_bcast_ack(hdr, ack);
+ msg_set_bcgap_after(hdr, ack);
+ if (dfrd_skb)
+ gap_to = buf_seqno(dfrd_skb) - 1;
+ msg_set_bcgap_to(hdr, gap_to);
+ msg_set_non_seq(hdr, bcast);
+ __skb_queue_tail(xmitq, skb);
+ return true;
+}
+
+/* tipc_link_build_bc_init_msg() - synchronize broadcast link endpoints.
+ *
+ * Give a newly added peer node the sequence number where it should
+ * start receiving and acking broadcast packets.
+ */
+static void tipc_link_build_bc_init_msg(struct tipc_link *l,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff_head list;
+
+ __skb_queue_head_init(&list);
+ if (!tipc_link_build_bc_proto_msg(l->bc_rcvlink, false, 0, &list))
+ return;
+ tipc_link_xmit(l, &list, xmitq);
+}
+
+/* tipc_link_bc_init_rcv - receive initial broadcast synch data from peer
+ */
+void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr)
+{
+ int mtyp = msg_type(hdr);
+ u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
+
+ if (link_is_up(l))
+ return;
+
+ if (msg_user(hdr) == BCAST_PROTOCOL) {
+ l->rcv_nxt = peers_snd_nxt;
+ l->state = LINK_ESTABLISHED;
+ return;
+ }
+
+ if (l->peer_caps & TIPC_BCAST_SYNCH)
+ return;
+
+ if (msg_peer_node_is_up(hdr))
+ return;
+
+ /* Compatibility: accept older, less safe initial synch data */
+ if ((mtyp == RESET_MSG) || (mtyp == ACTIVATE_MSG))
+ l->rcv_nxt = peers_snd_nxt;
+}
+
+/* tipc_link_bc_sync_rcv - update rcv link according to peer's send state
+ */
+void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
+ struct sk_buff_head *xmitq)
+{
+ u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
+
+ if (!link_is_up(l))
+ return;
+
+ if (!msg_peer_node_is_up(hdr))
+ return;
+
+ l->bc_peer_is_up = true;
+
+ /* Ignore if peers_snd_nxt goes beyond receive window */
+ if (more(peers_snd_nxt, l->rcv_nxt + l->window))
+ return;
+
+ if (!more(peers_snd_nxt, l->rcv_nxt)) {
+ l->nack_state = BC_NACK_SND_CONDITIONAL;
+ return;
+ }
+
+ /* Don't NACK if one was recently sent or peeked */
+ if (l->nack_state == BC_NACK_SND_SUPPRESS) {
+ l->nack_state = BC_NACK_SND_UNCONDITIONAL;
+ return;
+ }
+
+ /* Conditionally delay NACK sending until next synch rcv */
+ if (l->nack_state == BC_NACK_SND_CONDITIONAL) {
+ l->nack_state = BC_NACK_SND_UNCONDITIONAL;
+ if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN)
+ return;
+ }
+
+ /* Send NACK now but suppress next one */
+ tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq);
+ l->nack_state = BC_NACK_SND_SUPPRESS;
+}
+
+void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb, *tmp;
+ struct tipc_link *snd_l = l->bc_sndlink;
+
+ if (!link_is_up(l) || !l->bc_peer_is_up)
+ return;
+
+ if (!more(acked, l->acked))
+ return;
+
+ /* Skip over packets peer has already acked */
+ skb_queue_walk(&snd_l->transmq, skb) {
+ if (more(buf_seqno(skb), l->acked))
+ break;
+ }
+
+ /* Update/release the packets peer is acking now */
+ skb_queue_walk_from_safe(&snd_l->transmq, skb, tmp) {
+ if (more(buf_seqno(skb), acked))
+ break;
+ if (!--TIPC_SKB_CB(skb)->ackers) {
+ __skb_unlink(skb, &snd_l->transmq);
+ kfree_skb(skb);
+ }
+ }
+ l->acked = acked;
+ tipc_link_advance_backlog(snd_l, xmitq);
+ if (unlikely(!skb_queue_empty(&snd_l->wakeupq)))
+ link_prepare_wakeup(snd_l);
+}
+
+/* tipc_link_bc_nack_rcv(): receive broadcast nack message
+ */
+int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
+{
+ struct tipc_msg *hdr = buf_msg(skb);
+ u32 dnode = msg_destnode(hdr);
+ int mtyp = msg_type(hdr);
+ u16 acked = msg_bcast_ack(hdr);
+ u16 from = acked + 1;
+ u16 to = msg_bcgap_to(hdr);
+ u16 peers_snd_nxt = to + 1;
+ int rc = 0;
+
+ kfree_skb(skb);
+
+ if (!tipc_link_is_up(l) || !l->bc_peer_is_up)
+ return 0;
+
+ if (mtyp != STATE_MSG)
+ return 0;
+
+ if (dnode == link_own_addr(l)) {
+ tipc_link_bc_ack_rcv(l, acked, xmitq);
+ rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq);
+ l->stats.recv_nacks++;
+ return rc;
+ }
+
+ /* Msg for other node => suppress own NACK at next sync if applicable */
+ if (more(peers_snd_nxt, l->rcv_nxt) && !less(l->rcv_nxt, from))
+ l->nack_state = BC_NACK_SND_SUPPRESS;
+
+ return 0;
+}
+
void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
{
int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE);
@@ -1514,7 +1577,7 @@ static void link_reset_statistics(struct tipc_link *l_ptr)
static void link_print(struct tipc_link *l, const char *str)
{
struct sk_buff *hskb = skb_peek(&l->transmq);
- u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt;
+ u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt - 1;
u16 tail = l->snd_nxt - 1;
pr_info("%s Link <%s> state %x\n", str, l->name, l->state);
@@ -1738,7 +1801,7 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
if (tipc_link_is_up(link))
if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP))
goto attr_msg_full;
- if (tipc_link_is_active(link))
+ if (link->active)
if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE))
goto attr_msg_full;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 0201212..66d859b 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -66,7 +66,8 @@ enum {
*/
enum {
TIPC_LINK_UP_EVT = 1,
- TIPC_LINK_DOWN_EVT = (1 << 1)
+ TIPC_LINK_DOWN_EVT = (1 << 1),
+ TIPC_LINK_SND_BC_ACK = (1 << 2)
};
/* Starting value for maximum packet size negotiation on unicast links
@@ -110,7 +111,7 @@ struct tipc_stats {
* @name: link name character string
* @media_addr: media address to use when sending messages over link
* @timer: link timer
- * @owner: pointer to peer node
+ * @net: pointer to namespace struct
* @refcnt: reference counter for permanent references (owner node & timer)
* @peer_session: link session # being used by peer end of link
* @peer_bearer_id: bearer id used by link's peer endpoint
@@ -119,6 +120,7 @@ struct tipc_stats {
* @keepalive_intv: link keepalive timer interval
* @abort_limit: # of unacknowledged continuity probes needed to reset link
* @state: current state of link FSM
+ * @peer_caps: bitmap describing capabilities of peer node
* @silent_intv_cnt: # of timer intervals without any reception from peer
* @proto_msg: template for control messages generated by link
* @pmsg: convenience pointer to "proto_msg" field
@@ -134,6 +136,8 @@ struct tipc_stats {
* @snt_nxt: next sequence number to use for outbound messages
* @last_retransmitted: sequence number of most recently retransmitted message
* @stale_count: # of identical retransmit requests made by peer
+ * @ackers: # of peers that needs to ack each packet before it can be released
+ * @acked: # last packet acked by a certain peer. Used for broadcast.
* @rcv_nxt: next sequence number to expect for inbound messages
* @deferred_queue: deferred queue saved OOS b'cast message received from node
* @unacked_window: # of inbound messages rx'd without ack'ing back to peer
@@ -143,13 +147,14 @@ struct tipc_stats {
* @wakeupq: linked list of wakeup msgs waiting for link congestion to abate
* @long_msg_seq_no: next identifier to use for outbound fragmented messages
* @reasm_buf: head of partially reassembled inbound message fragments
+ * @bc_rcvr: marks that this is a broadcast receiver link
* @stats: collects statistics regarding link activity
*/
struct tipc_link {
u32 addr;
char name[TIPC_MAX_LINK_NAME];
struct tipc_media_addr *media_addr;
- struct tipc_node *owner;
+ struct net *net;
/* Management and link supervision data */
u32 peer_session;
@@ -159,6 +164,8 @@ struct tipc_link {
unsigned long keepalive_intv;
u32 abort_limit;
u32 state;
+ u16 peer_caps;
+ bool active;
u32 silent_intv_cnt;
struct {
unchar hdr[INT_H_SIZE];
@@ -201,18 +208,35 @@ struct tipc_link {
/* Fragmentation/reassembly */
struct sk_buff *reasm_buf;
+ /* Broadcast */
+ u16 ackers;
+ u16 acked;
+ struct tipc_link *bc_rcvlink;
+ struct tipc_link *bc_sndlink;
+ int nack_state;
+ bool bc_peer_is_up;
+
/* Statistics */
struct tipc_stats stats;
};
-bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session,
- u32 ownnode, u32 peer, struct tipc_media_addr *maddr,
- struct sk_buff_head *inputq, struct sk_buff_head *namedq,
+bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
+ int tolerance, char net_plane, u32 mtu, int priority,
+ int window, u32 session, u32 ownnode, u32 peer,
+ u16 peer_caps,
+ struct tipc_link *bc_sndlink,
+ struct tipc_link *bc_rcvlink,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *namedq,
struct tipc_link **link);
+bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
+ int mtu, int window, u16 peer_caps,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *namedq,
+ struct tipc_link *bc_sndlink,
+ struct tipc_link **link);
void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
int mtyp, struct sk_buff_head *xmitq);
-void tipc_link_build_bcast_sync_msg(struct tipc_link *l,
- struct sk_buff_head *xmitq);
void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
int tipc_link_fsm_evt(struct tipc_link *l, int evt);
void tipc_link_reset_fragments(struct tipc_link *l_ptr);
@@ -223,23 +247,11 @@ bool tipc_link_is_establishing(struct tipc_link *l);
bool tipc_link_is_synching(struct tipc_link *l);
bool tipc_link_is_failingover(struct tipc_link *l);
bool tipc_link_is_blocked(struct tipc_link *l);
-int tipc_link_is_active(struct tipc_link *l_ptr);
-void tipc_link_purge_queues(struct tipc_link *l_ptr);
-void tipc_link_purge_backlog(struct tipc_link *l);
+void tipc_link_set_active(struct tipc_link *l, bool active);
void tipc_link_reset(struct tipc_link *l_ptr);
-int __tipc_link_xmit(struct net *net, struct tipc_link *link,
- struct sk_buff_head *list);
int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list,
struct sk_buff_head *xmitq);
-void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob,
- u32 gap, u32 tolerance, u32 priority);
-void tipc_link_push_packets(struct tipc_link *l_ptr);
-u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *buf);
-void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window);
-void tipc_link_retransmit(struct tipc_link *l_ptr,
- struct sk_buff *start, u32 retransmits);
-struct sk_buff *tipc_skb_queue_next(const struct sk_buff_head *list,
- const struct sk_buff *skb);
+void tipc_link_set_queue_limits(struct tipc_link *l, u32 window);
int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info);
@@ -249,5 +261,23 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]);
int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq);
int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *xmitq);
-
+int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
+void tipc_link_add_bc_peer(struct tipc_link *snd_l,
+ struct tipc_link *uc_l,
+ struct sk_buff_head *xmitq);
+void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
+ struct tipc_link *rcv_l,
+ struct sk_buff_head *xmitq);
+int tipc_link_bc_peers(struct tipc_link *l);
+void tipc_link_set_mtu(struct tipc_link *l, int mtu);
+int tipc_link_mtu(struct tipc_link *l);
+void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
+ struct sk_buff_head *xmitq);
+void tipc_link_build_bc_sync_msg(struct tipc_link *l,
+ struct sk_buff_head *xmitq);
+void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr);
+void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
+ struct sk_buff_head *xmitq);
+int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *xmitq);
#endif
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 454f5ec..8740930 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -121,7 +121,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
{
struct sk_buff *head = *headbuf;
struct sk_buff *frag = *buf;
- struct sk_buff *tail;
+ struct sk_buff *tail = NULL;
struct tipc_msg *msg;
u32 fragid;
int delta;
@@ -141,9 +141,15 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (unlikely(skb_unclone(frag, GFP_ATOMIC)))
goto err;
head = *headbuf = frag;
- skb_frag_list_init(head);
- TIPC_SKB_CB(head)->tail = NULL;
*buf = NULL;
+ TIPC_SKB_CB(head)->tail = NULL;
+ if (skb_is_nonlinear(head)) {
+ skb_walk_frags(head, tail) {
+ TIPC_SKB_CB(head)->tail = tail;
+ }
+ } else {
+ skb_frag_list_init(head);
+ }
return 0;
}
@@ -176,7 +182,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
*buf = NULL;
return 0;
err:
- pr_warn_ratelimited("Unable to build fragment list\n");
kfree_skb(*buf);
kfree_skb(*headbuf);
*buf = *headbuf = NULL;
@@ -559,18 +564,22 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
/* tipc_msg_reassemble() - clone a buffer chain of fragments and
* reassemble the clones into one message
*/
-struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list)
+bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq)
{
- struct sk_buff *skb;
+ struct sk_buff *skb, *_skb;
struct sk_buff *frag = NULL;
struct sk_buff *head = NULL;
- int hdr_sz;
+ int hdr_len;
/* Copy header if single buffer */
if (skb_queue_len(list) == 1) {
skb = skb_peek(list);
- hdr_sz = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb));
- return __pskb_copy(skb, hdr_sz, GFP_ATOMIC);
+ hdr_len = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb));
+ _skb = __pskb_copy(skb, hdr_len, GFP_ATOMIC);
+ if (!_skb)
+ return false;
+ __skb_queue_tail(rcvq, _skb);
+ return true;
}
/* Clone all fragments and reassemble */
@@ -584,11 +593,12 @@ struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list)
if (!head)
goto error;
}
- return frag;
+ __skb_queue_tail(rcvq, frag);
+ return true;
error:
pr_warn("Failed do clone local mcast rcv buffer\n");
kfree_skb(head);
- return NULL;
+ return false;
}
/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 9f0ef54..55778a0 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -112,6 +112,7 @@ struct tipc_skb_cb {
bool wakeup_pending;
u16 chain_sz;
u16 chain_imp;
+ u16 ackers;
};
#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
@@ -600,6 +601,11 @@ static inline u32 msg_last_bcast(struct tipc_msg *m)
return msg_bits(m, 4, 16, 0xffff);
}
+static inline u32 msg_bc_snd_nxt(struct tipc_msg *m)
+{
+ return msg_last_bcast(m) + 1;
+}
+
static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n)
{
msg_set_bits(m, 4, 16, 0xffff, n);
@@ -789,7 +795,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset, int dsz, int mtu, struct sk_buff_head *list);
bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
-struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list);
+bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq);
void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
struct sk_buff *skb);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index e6018b7..c07612b 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -102,7 +102,7 @@ void named_cluster_distribute(struct net *net, struct sk_buff *skb)
if (!oskb)
break;
msg_set_destnode(buf_msg(oskb), dnode);
- tipc_node_xmit_skb(net, oskb, dnode, dnode);
+ tipc_node_xmit_skb(net, oskb, dnode, 0);
}
rcu_read_unlock();
@@ -223,7 +223,7 @@ void tipc_named_node_up(struct net *net, u32 dnode)
&tn->nametbl->publ_list[TIPC_ZONE_SCOPE]);
rcu_read_unlock();
- tipc_node_xmit(net, &head, dnode, dnode);
+ tipc_node_xmit(net, &head, dnode, 0);
}
static void tipc_publ_subscribe(struct net *net, struct publication *publ,
diff --git a/net/tipc/net.c b/net/tipc/net.c
index d6d1399..77bf911 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -112,14 +112,11 @@ int tipc_net_start(struct net *net, u32 addr)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
char addr_string[16];
- int res;
tn->own_addr = addr;
tipc_named_reinit(net);
tipc_sk_reinit(net);
- res = tipc_bclink_init(net);
- if (res)
- return res;
+ tipc_bcast_reinit(net);
tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr,
TIPC_ZONE_SCOPE, 0, tn->own_addr);
@@ -142,7 +139,6 @@ void tipc_net_stop(struct net *net)
tn->own_addr);
rtnl_lock();
tipc_bearer_stop(net);
- tipc_bclink_stop(net);
tipc_node_stop(net);
rtnl_unlock();
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2670751..20cddec 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -72,7 +72,6 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
static void tipc_node_link_down(struct tipc_node *n, int bearer_id,
bool delete);
static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq);
-static void node_established_contact(struct tipc_node *n_ptr);
static void tipc_node_delete(struct tipc_node *node);
static void tipc_node_timeout(unsigned long data);
static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
@@ -165,8 +164,10 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
INIT_LIST_HEAD(&n_ptr->list);
INIT_LIST_HEAD(&n_ptr->publ_list);
INIT_LIST_HEAD(&n_ptr->conn_sks);
- skb_queue_head_init(&n_ptr->bclink.namedq);
- __skb_queue_head_init(&n_ptr->bclink.deferdq);
+ skb_queue_head_init(&n_ptr->bc_entry.namedq);
+ skb_queue_head_init(&n_ptr->bc_entry.inputq1);
+ __skb_queue_head_init(&n_ptr->bc_entry.arrvq);
+ skb_queue_head_init(&n_ptr->bc_entry.inputq2);
hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]);
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
if (n_ptr->addr < temp_node->addr)
@@ -177,6 +178,18 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
n_ptr->signature = INVALID_NODE_SIG;
n_ptr->active_links[0] = INVALID_BEARER_ID;
n_ptr->active_links[1] = INVALID_BEARER_ID;
+ if (!tipc_link_bc_create(net, tipc_own_addr(net), n_ptr->addr,
+ U16_MAX, tipc_bc_sndlink(net)->window,
+ n_ptr->capabilities,
+ &n_ptr->bc_entry.inputq1,
+ &n_ptr->bc_entry.namedq,
+ tipc_bc_sndlink(net),
+ &n_ptr->bc_entry.link)) {
+ pr_warn("Broadcast rcv link creation failed, no memory\n");
+ kfree(n_ptr);
+ n_ptr = NULL;
+ goto exit;
+ }
tipc_node_get(n_ptr);
setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr);
n_ptr->keepalive_intv = U32_MAX;
@@ -203,6 +216,7 @@ static void tipc_node_delete(struct tipc_node *node)
{
list_del_rcu(&node->list);
hlist_del_rcu(&node->hash);
+ kfree(node->bc_entry.link);
kfree_rcu(node, rcu);
}
@@ -332,6 +346,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
n->links[bearer_id].mtu = nl->mtu - INT_H_SIZE;
tipc_bearer_add_dest(n->net, bearer_id, n->addr);
+ tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id);
pr_debug("Established link <%s> on network plane %c\n",
nl->name, nl->net_plane);
@@ -340,8 +355,9 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
if (!ol) {
*slot0 = bearer_id;
*slot1 = bearer_id;
- tipc_link_build_bcast_sync_msg(nl, xmitq);
- node_established_contact(n);
+ tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT);
+ n->action_flags |= TIPC_NOTIFY_NODE_UP;
+ tipc_bcast_add_peer(n->net, nl, xmitq);
return;
}
@@ -350,8 +366,11 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
pr_debug("Old link <%s> becomes standby\n", ol->name);
*slot0 = bearer_id;
*slot1 = bearer_id;
+ tipc_link_set_active(nl, true);
+ tipc_link_set_active(ol, false);
} else if (nl->priority == ol->priority) {
- *slot0 = bearer_id;
+ tipc_link_set_active(nl, true);
+ *slot1 = bearer_id;
} else {
pr_debug("New link <%s> is standby\n", nl->name);
}
@@ -428,8 +447,10 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
tipc_link_build_reset_msg(l, xmitq);
*maddr = &n->links[*bearer_id].maddr;
node_lost_contact(n, &le->inputq);
+ tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id);
return;
}
+ tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id);
/* There is still a working link => initiate failover */
tnl = node_active_link(n, 0);
@@ -493,6 +514,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
bool link_up = false;
bool accept_addr = false;
bool reset = true;
+ char *if_name;
*dupl_addr = false;
*respond = false;
@@ -579,9 +601,15 @@ void tipc_node_check_dest(struct net *net, u32 onode,
pr_warn("Cannot establish 3rd link to %x\n", n->addr);
goto exit;
}
- if (!tipc_link_create(n, b, mod(tipc_net(net)->random),
- tipc_own_addr(net), onode, &le->maddr,
- &le->inputq, &n->bclink.namedq, &l)) {
+ if_name = strchr(b->name, ':') + 1;
+ if (!tipc_link_create(net, if_name, b->identity, b->tolerance,
+ b->net_plane, b->mtu, b->priority,
+ b->window, mod(tipc_net(net)->random),
+ tipc_own_addr(net), onode,
+ n->capabilities,
+ tipc_bc_sndlink(n->net), n->bc_entry.link,
+ &le->inputq,
+ &n->bc_entry.namedq, &l)) {
*respond = false;
goto exit;
}
@@ -824,58 +852,36 @@ bool tipc_node_filter_pkt(struct tipc_node *n, struct tipc_msg *hdr)
return true;
}
-static void node_established_contact(struct tipc_node *n_ptr)
-{
- tipc_node_fsm_evt(n_ptr, SELF_ESTABL_CONTACT_EVT);
- n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP;
- n_ptr->bclink.oos_state = 0;
- n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net);
- tipc_bclink_add_node(n_ptr->net, n_ptr->addr);
-}
-
-static void node_lost_contact(struct tipc_node *n_ptr,
+static void node_lost_contact(struct tipc_node *n,
struct sk_buff_head *inputq)
{
char addr_string[16];
struct tipc_sock_conn *conn, *safe;
struct tipc_link *l;
- struct list_head *conns = &n_ptr->conn_sks;
+ struct list_head *conns = &n->conn_sks;
struct sk_buff *skb;
- struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
uint i;
pr_debug("Lost contact with %s\n",
- tipc_addr_string_fill(addr_string, n_ptr->addr));
+ tipc_addr_string_fill(addr_string, n->addr));
- /* Flush broadcast link info associated with lost node */
- if (n_ptr->bclink.recv_permitted) {
- __skb_queue_purge(&n_ptr->bclink.deferdq);
-
- if (n_ptr->bclink.reasm_buf) {
- kfree_skb(n_ptr->bclink.reasm_buf);
- n_ptr->bclink.reasm_buf = NULL;
- }
-
- tipc_bclink_remove_node(n_ptr->net, n_ptr->addr);
- tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ);
-
- n_ptr->bclink.recv_permitted = false;
- }
+ /* Clean up broadcast state */
+ tipc_bcast_remove_peer(n->net, n->bc_entry.link);
/* Abort any ongoing link failover */
for (i = 0; i < MAX_BEARERS; i++) {
- l = n_ptr->links[i].link;
+ l = n->links[i].link;
if (l)
tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT);
}
/* Notify publications from this node */
- n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN;
+ n->action_flags |= TIPC_NOTIFY_NODE_DOWN;
/* Notify sockets connected to node */
list_for_each_entry_safe(conn, safe, conns, list) {
skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
- SHORT_H_SIZE, 0, tn->own_addr,
+ SHORT_H_SIZE, 0, tipc_own_addr(n->net),
conn->peer_node, conn->port,
conn->peer_port, TIPC_ERR_NO_NODE);
if (likely(skb))
@@ -937,18 +943,13 @@ void tipc_node_unlock(struct tipc_node *node)
publ_list = &node->publ_list;
node->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
- TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP |
- TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT |
- TIPC_BCAST_RESET);
+ TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP);
spin_unlock_bh(&node->lock);
if (flags & TIPC_NOTIFY_NODE_DOWN)
tipc_publ_notify(net, publ_list, addr);
- if (flags & TIPC_WAKEUP_BCAST_USERS)
- tipc_bclink_wakeup_users(net);
-
if (flags & TIPC_NOTIFY_NODE_UP)
tipc_named_node_up(net, addr);
@@ -960,11 +961,6 @@ void tipc_node_unlock(struct tipc_node *node)
tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
link_id, addr);
- if (flags & TIPC_BCAST_MSG_EVT)
- tipc_bclink_input(net);
-
- if (flags & TIPC_BCAST_RESET)
- tipc_node_reset_links(node);
}
/* Caller should hold node lock for the passed node */
@@ -1080,6 +1076,67 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
}
/**
+ * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node
+ * @net: the applicable net namespace
+ * @skb: TIPC packet
+ * @bearer_id: id of bearer message arrived on
+ *
+ * Invoked with no locks held.
+ */
+static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id)
+{
+ int rc;
+ struct sk_buff_head xmitq;
+ struct tipc_bclink_entry *be;
+ struct tipc_link_entry *le;
+ struct tipc_msg *hdr = buf_msg(skb);
+ int usr = msg_user(hdr);
+ u32 dnode = msg_destnode(hdr);
+ struct tipc_node *n;
+
+ __skb_queue_head_init(&xmitq);
+
+ /* If NACK for other node, let rcv link for that node peek into it */
+ if ((usr == BCAST_PROTOCOL) && (dnode != tipc_own_addr(net)))
+ n = tipc_node_find(net, dnode);
+ else
+ n = tipc_node_find(net, msg_prevnode(hdr));
+ if (!n) {
+ kfree_skb(skb);
+ return;
+ }
+ be = &n->bc_entry;
+ le = &n->links[bearer_id];
+
+ rc = tipc_bcast_rcv(net, be->link, skb);
+
+ /* Broadcast link reset may happen at reassembly failure */
+ if (rc & TIPC_LINK_DOWN_EVT)
+ tipc_node_reset_links(n);
+
+ /* Broadcast ACKs are sent on a unicast link */
+ if (rc & TIPC_LINK_SND_BC_ACK) {
+ tipc_node_lock(n);
+ tipc_link_build_ack_msg(le->link, &xmitq);
+ tipc_node_unlock(n);
+ }
+
+ if (!skb_queue_empty(&xmitq))
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
+
+ /* Deliver. 'arrvq' is under inputq2's lock protection */
+ if (!skb_queue_empty(&be->inputq1)) {
+ spin_lock_bh(&be->inputq2.lock);
+ spin_lock_bh(&be->inputq1.lock);
+ skb_queue_splice_tail_init(&be->inputq1, &be->arrvq);
+ spin_unlock_bh(&be->inputq1.lock);
+ spin_unlock_bh(&be->inputq2.lock);
+ tipc_sk_mcast_rcv(net, &be->arrvq, &be->inputq2);
+ }
+ tipc_node_put(n);
+}
+
+/**
* tipc_node_check_state - check and if necessary update node state
* @skb: TIPC packet
* @bearer_id: identity of bearer delivering the packet
@@ -1221,6 +1278,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
int usr = msg_user(hdr);
int bearer_id = b->identity;
struct tipc_link_entry *le;
+ u16 bc_ack = msg_bcast_ack(hdr);
int rc = 0;
__skb_queue_head_init(&xmitq);
@@ -1229,13 +1287,12 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
if (unlikely(!tipc_msg_validate(skb)))
goto discard;
- /* Handle arrival of a non-unicast link packet */
+ /* Handle arrival of discovery or broadcast packet */
if (unlikely(msg_non_seq(hdr))) {
- if (usr == LINK_CONFIG)
- tipc_disc_rcv(net, skb, b);
+ if (unlikely(usr == LINK_CONFIG))
+ return tipc_disc_rcv(net, skb, b);
else
- tipc_bclink_rcv(net, skb);
- return;
+ return tipc_node_bc_rcv(net, skb, bearer_id);
}
/* Locate neighboring node that sent packet */
@@ -1244,19 +1301,18 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
goto discard;
le = &n->links[bearer_id];
+ /* Ensure broadcast reception is in synch with peer's send state */
+ if (unlikely(usr == LINK_PROTOCOL))
+ tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr);
+ else if (unlikely(n->bc_entry.link->acked != bc_ack))
+ tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack);
+
tipc_node_lock(n);
/* Is reception permitted at the moment ? */
if (!tipc_node_filter_pkt(n, hdr))
goto unlock;
- if (unlikely(msg_user(hdr) == LINK_PROTOCOL))
- tipc_bclink_sync_state(n, hdr);
-
- /* Release acked broadcast packets */
- if (unlikely(n->bclink.acked != msg_bcast_ack(hdr)))
- tipc_bclink_acknowledge(n, msg_bcast_ack(hdr));
-
/* Check and if necessary update node state */
if (likely(tipc_node_check_state(n, skb, bearer_id, &xmitq))) {
rc = tipc_link_rcv(le->link, skb, &xmitq);
@@ -1271,8 +1327,8 @@ unlock:
if (unlikely(rc & TIPC_LINK_DOWN_EVT))
tipc_node_link_down(n, bearer_id, false);
- if (unlikely(!skb_queue_empty(&n->bclink.namedq)))
- tipc_named_rcv(net, &n->bclink.namedq);
+ if (unlikely(!skb_queue_empty(&n->bc_entry.namedq)))
+ tipc_named_rcv(net, &n->bc_entry.namedq);
if (!skb_queue_empty(&le->inputq))
tipc_sk_rcv(net, &le->inputq);
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 344b3e7..6734562 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -55,36 +55,18 @@
enum {
TIPC_NOTIFY_NODE_DOWN = (1 << 3),
TIPC_NOTIFY_NODE_UP = (1 << 4),
- TIPC_WAKEUP_BCAST_USERS = (1 << 5),
TIPC_NOTIFY_LINK_UP = (1 << 6),
- TIPC_NOTIFY_LINK_DOWN = (1 << 7),
- TIPC_BCAST_MSG_EVT = (1 << 9),
- TIPC_BCAST_RESET = (1 << 10)
+ TIPC_NOTIFY_LINK_DOWN = (1 << 7)
};
-/**
- * struct tipc_node_bclink - TIPC node bclink structure
- * @acked: sequence # of last outbound b'cast message acknowledged by node
- * @last_in: sequence # of last in-sequence b'cast message received from node
- * @last_sent: sequence # of last b'cast message sent by node
- * @oos_state: state tracker for handling OOS b'cast messages
- * @deferred_queue: deferred queue saved OOS b'cast message received from node
- * @reasm_buf: broadcast reassembly queue head from node
- * @inputq_map: bitmap indicating which inqueues should be kicked
- * @recv_permitted: true if node is allowed to receive b'cast messages
+/* Optional capabilities supported by this code version
*/
-struct tipc_node_bclink {
- u32 acked;
- u32 last_in;
- u32 last_sent;
- u32 oos_state;
- u32 deferred_size;
- struct sk_buff_head deferdq;
- struct sk_buff *reasm_buf;
- struct sk_buff_head namedq;
- bool recv_permitted;
+enum {
+ TIPC_BCAST_SYNCH = (1 << 1)
};
+#define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH
+
struct tipc_link_entry {
struct tipc_link *link;
u32 mtu;
@@ -92,6 +74,14 @@ struct tipc_link_entry {
struct tipc_media_addr maddr;
};
+struct tipc_bclink_entry {
+ struct tipc_link *link;
+ struct sk_buff_head inputq1;
+ struct sk_buff_head arrvq;
+ struct sk_buff_head inputq2;
+ struct sk_buff_head namedq;
+};
+
/**
* struct tipc_node - TIPC node structure
* @addr: network address of node
@@ -104,7 +94,6 @@ struct tipc_link_entry {
* @active_links: bearer ids of active links, used as index into links[] array
* @links: array containing references to all links to node
* @action_flags: bit mask of different types of node actions
- * @bclink: broadcast-related info
* @state: connectivity state vs peer node
* @sync_point: sequence number where synch/failover is finished
* @list: links to adjacent nodes in sorted list of cluster's nodes
@@ -124,8 +113,8 @@ struct tipc_node {
struct hlist_node hash;
int active_links[2];
struct tipc_link_entry links[MAX_BEARERS];
+ struct tipc_bclink_entry bc_entry;
int action_flags;
- struct tipc_node_bclink bclink;
struct list_head list;
int state;
u16 sync_point;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1060d52..552dbab 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -689,13 +689,13 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
msg_set_hdr_sz(mhdr, MCAST_H_SIZE);
new_mtu:
- mtu = tipc_bclink_get_mtu();
+ mtu = tipc_bcast_get_mtu(net);
rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain);
if (unlikely(rc < 0))
return rc;
do {
- rc = tipc_bclink_xmit(net, pktchain);
+ rc = tipc_bcast_xmit(net, pktchain);
if (likely(!rc))
return dsz;
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 9bc0b1e..816914e 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -52,6 +52,8 @@
/* IANA assigned UDP port */
#define UDP_PORT_DEFAULT 6118
+#define UDP_MIN_HEADROOM 28
+
static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
[TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC},
[TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY,
@@ -153,11 +155,12 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
struct udp_bearer *ub;
struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value;
struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
- struct sk_buff *clone;
struct rtable *rt;
- clone = skb_clone(skb, GFP_ATOMIC);
- skb_set_inner_protocol(clone, htons(ETH_P_TIPC));
+ if (skb_headroom(skb) < UDP_MIN_HEADROOM)
+ pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+
+ skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
ub = rcu_dereference_rtnl(b->media_ptr);
if (!ub) {
err = -ENODEV;
@@ -167,7 +170,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
struct flowi4 fl = {
.daddr = dst->ipv4.s_addr,
.saddr = src->ipv4.s_addr,
- .flowi4_mark = clone->mark,
+ .flowi4_mark = skb->mark,
.flowi4_proto = IPPROTO_UDP
};
rt = ip_route_output_key(net, &fl);
@@ -176,7 +179,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
goto tx_error;
}
ttl = ip4_dst_hoplimit(&rt->dst);
- err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, clone,
+ err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb,
src->ipv4.s_addr,
dst->ipv4.s_addr, 0, ttl, 0,
src->udp_port, dst->udp_port,
@@ -199,7 +202,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
if (err)
goto tx_error;
ttl = ip6_dst_hoplimit(ndst);
- err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, clone,
+ err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
ndst->dev, &src->ipv6,
&dst->ipv6, 0, ttl, src->udp_port,
dst->udp_port, false);
@@ -208,7 +211,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
return err;
tx_error:
- kfree_skb(clone);
+ kfree_skb(skb);
return err;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 94f6582..aaa0b58 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -326,9 +326,10 @@ found:
return s;
}
-static inline int unix_writable(struct sock *sk)
+static int unix_writable(const struct sock *sk)
{
- return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
+ return sk->sk_state != TCP_LISTEN &&
+ (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
}
static void unix_write_space(struct sock *sk)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index df5fc6b..7fd1220 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -36,19 +36,20 @@
* not support simultaneous connects (two "client" sockets connecting).
*
* - "Server" sockets are referred to as listener sockets throughout this
- * implementation because they are in the SS_LISTEN state. When a connection
- * request is received (the second kind of socket mentioned above), we create a
- * new socket and refer to it as a pending socket. These pending sockets are
- * placed on the pending connection list of the listener socket. When future
- * packets are received for the address the listener socket is bound to, we
- * check if the source of the packet is from one that has an existing pending
- * connection. If it does, we process the packet for the pending socket. When
- * that socket reaches the connected state, it is removed from the listener
- * socket's pending list and enqueued in the listener socket's accept queue.
- * Callers of accept(2) will accept connected sockets from the listener socket's
- * accept queue. If the socket cannot be accepted for some reason then it is
- * marked rejected. Once the connection is accepted, it is owned by the user
- * process and the responsibility for cleanup falls with that user process.
+ * implementation because they are in the VSOCK_SS_LISTEN state. When a
+ * connection request is received (the second kind of socket mentioned above),
+ * we create a new socket and refer to it as a pending socket. These pending
+ * sockets are placed on the pending connection list of the listener socket.
+ * When future packets are received for the address the listener socket is
+ * bound to, we check if the source of the packet is from one that has an
+ * existing pending connection. If it does, we process the packet for the
+ * pending socket. When that socket reaches the connected state, it is removed
+ * from the listener socket's pending list and enqueued in the listener
+ * socket's accept queue. Callers of accept(2) will accept connected sockets
+ * from the listener socket's accept queue. If the socket cannot be accepted
+ * for some reason then it is marked rejected. Once the connection is
+ * accepted, it is owned by the user process and the responsibility for cleanup
+ * falls with that user process.
*
* - It is possible that these pending sockets will never reach the connected
* state; in fact, we may never receive another packet after the connection
@@ -114,8 +115,6 @@ static struct proto vsock_proto = {
*/
#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
-#define SS_LISTEN 255
-
static const struct vsock_transport *transport;
static DEFINE_MUTEX(vsock_register_mutex);
@@ -887,7 +886,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
/* Listening sockets that have connections in their accept
* queue can be read.
*/
- if (sk->sk_state == SS_LISTEN
+ if (sk->sk_state == VSOCK_SS_LISTEN
&& !vsock_is_accept_queue_empty(sk))
mask |= POLLIN | POLLRDNORM;
@@ -1144,7 +1143,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
err = -EALREADY;
break;
default:
- if ((sk->sk_state == SS_LISTEN) ||
+ if ((sk->sk_state == VSOCK_SS_LISTEN) ||
vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
err = -EINVAL;
goto out;
@@ -1256,7 +1255,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
goto out;
}
- if (listener->sk_state != SS_LISTEN) {
+ if (listener->sk_state != VSOCK_SS_LISTEN) {
err = -EINVAL;
goto out;
}
@@ -1348,7 +1347,7 @@ static int vsock_listen(struct socket *sock, int backlog)
}
sk->sk_max_ack_backlog = backlog;
- sk->sk_state = SS_LISTEN;
+ sk->sk_state = VSOCK_SS_LISTEN;
err = 0;
@@ -1948,13 +1947,13 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
err = misc_register(&vsock_device);
if (err) {
pr_err("Failed to register misc device\n");
- return -ENOENT;
+ goto err_reset_transport;
}
err = proto_register(&vsock_proto, 1); /* we want our slab */
if (err) {
pr_err("Cannot register vsock protocol\n");
- goto err_misc_deregister;
+ goto err_deregister_misc;
}
err = sock_register(&vsock_family_ops);
@@ -1969,8 +1968,9 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
err_unregister_proto:
proto_unregister(&vsock_proto);
-err_misc_deregister:
+err_deregister_misc:
misc_deregister(&vsock_device);
+err_reset_transport:
transport = NULL;
err_busy:
mutex_unlock(&vsock_register_mutex);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 1f63daf..400d872 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -40,13 +40,11 @@
static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
-static void vmci_transport_peer_attach_cb(u32 sub_id,
- const struct vmci_event_data *ed,
- void *client_data);
static void vmci_transport_peer_detach_cb(u32 sub_id,
const struct vmci_event_data *ed,
void *client_data);
static void vmci_transport_recv_pkt_work(struct work_struct *work);
+static void vmci_transport_cleanup(struct work_struct *work);
static int vmci_transport_recv_listen(struct sock *sk,
struct vmci_transport_packet *pkt);
static int vmci_transport_recv_connecting_server(
@@ -75,6 +73,10 @@ struct vmci_transport_recv_pkt_info {
struct vmci_transport_packet pkt;
};
+static LIST_HEAD(vmci_transport_cleanup_list);
+static DEFINE_SPINLOCK(vmci_transport_cleanup_lock);
+static DECLARE_WORK(vmci_transport_cleanup_work, vmci_transport_cleanup);
+
static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
VMCI_INVALID_ID };
static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
@@ -90,8 +92,6 @@ static int PROTOCOL_OVERRIDE = -1;
*/
#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
-#define SS_LISTEN 255
-
/* Helper function to convert from a VMCI error code to a VSock error code. */
static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
@@ -791,44 +791,6 @@ out:
return err;
}
-static void vmci_transport_peer_attach_cb(u32 sub_id,
- const struct vmci_event_data *e_data,
- void *client_data)
-{
- struct sock *sk = client_data;
- const struct vmci_event_payload_qp *e_payload;
- struct vsock_sock *vsk;
-
- e_payload = vmci_event_data_const_payload(e_data);
-
- vsk = vsock_sk(sk);
-
- /* We don't ask for delayed CBs when we subscribe to this event (we
- * pass 0 as flags to vmci_event_subscribe()). VMCI makes no
- * guarantees in that case about what context we might be running in,
- * so it could be BH or process, blockable or non-blockable. So we
- * need to account for all possible contexts here.
- */
- local_bh_disable();
- bh_lock_sock(sk);
-
- /* XXX This is lame, we should provide a way to lookup sockets by
- * qp_handle.
- */
- if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
- e_payload->handle)) {
- /* XXX This doesn't do anything, but in the future we may want
- * to set a flag here to verify the attach really did occur and
- * we weren't just sent a datagram claiming it was.
- */
- goto out;
- }
-
-out:
- bh_unlock_sock(sk);
- local_bh_enable();
-}
-
static void vmci_transport_handle_detach(struct sock *sk)
{
struct vsock_sock *vsk;
@@ -871,28 +833,38 @@ static void vmci_transport_peer_detach_cb(u32 sub_id,
const struct vmci_event_data *e_data,
void *client_data)
{
- struct sock *sk = client_data;
+ struct vmci_transport *trans = client_data;
const struct vmci_event_payload_qp *e_payload;
- struct vsock_sock *vsk;
e_payload = vmci_event_data_const_payload(e_data);
- vsk = vsock_sk(sk);
- if (vmci_handle_is_invalid(e_payload->handle))
- return;
-
- /* Same rules for locking as for peer_attach_cb(). */
- local_bh_disable();
- bh_lock_sock(sk);
/* XXX This is lame, we should provide a way to lookup sockets by
* qp_handle.
*/
- if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
- e_payload->handle))
- vmci_transport_handle_detach(sk);
+ if (vmci_handle_is_invalid(e_payload->handle) ||
+ vmci_handle_is_equal(trans->qp_handle, e_payload->handle))
+ return;
- bh_unlock_sock(sk);
- local_bh_enable();
+ /* We don't ask for delayed CBs when we subscribe to this event (we
+ * pass 0 as flags to vmci_event_subscribe()). VMCI makes no
+ * guarantees in that case about what context we might be running in,
+ * so it could be BH or process, blockable or non-blockable. So we
+ * need to account for all possible contexts here.
+ */
+ spin_lock_bh(&trans->lock);
+ if (!trans->sk)
+ goto out;
+
+ /* Apart from here, trans->lock is only grabbed as part of sk destruct,
+ * where trans->sk isn't locked.
+ */
+ bh_lock_sock(trans->sk);
+
+ vmci_transport_handle_detach(trans->sk);
+
+ bh_unlock_sock(trans->sk);
+ out:
+ spin_unlock_bh(&trans->lock);
}
static void vmci_transport_qp_resumed_cb(u32 sub_id,
@@ -919,7 +891,7 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work)
vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
switch (sk->sk_state) {
- case SS_LISTEN:
+ case VSOCK_SS_LISTEN:
vmci_transport_recv_listen(sk, pkt);
break;
case SS_CONNECTING:
@@ -1181,7 +1153,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
*/
err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
vmci_transport_peer_detach_cb,
- pending, &detach_sub_id);
+ vmci_trans(vpending), &detach_sub_id);
if (err < VMCI_SUCCESS) {
vmci_transport_send_reset(pending, pkt);
err = vmci_transport_error_to_vsock_error(err);
@@ -1321,7 +1293,6 @@ vmci_transport_recv_connecting_client(struct sock *sk,
|| vmci_trans(vsk)->qpair
|| vmci_trans(vsk)->produce_size != 0
|| vmci_trans(vsk)->consume_size != 0
- || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID
|| vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
skerr = EPROTO;
err = -EINVAL;
@@ -1389,7 +1360,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
struct vsock_sock *vsk;
struct vmci_handle handle;
struct vmci_qp *qpair;
- u32 attach_sub_id;
u32 detach_sub_id;
bool is_local;
u32 flags;
@@ -1399,7 +1369,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
vsk = vsock_sk(sk);
handle = VMCI_INVALID_HANDLE;
- attach_sub_id = VMCI_INVALID_ID;
detach_sub_id = VMCI_INVALID_ID;
/* If we have gotten here then we should be past the point where old
@@ -1444,23 +1413,15 @@ static int vmci_transport_recv_connecting_client_negotiate(
goto destroy;
}
- /* Subscribe to attach and detach events first.
+ /* Subscribe to detach events first.
*
* XXX We attach once for each queue pair created for now so it is easy
* to find the socket (it's provided), but later we should only
* subscribe once and add a way to lookup sockets by queue pair handle.
*/
- err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH,
- vmci_transport_peer_attach_cb,
- sk, &attach_sub_id);
- if (err < VMCI_SUCCESS) {
- err = vmci_transport_error_to_vsock_error(err);
- goto destroy;
- }
-
err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
vmci_transport_peer_detach_cb,
- sk, &detach_sub_id);
+ vmci_trans(vsk), &detach_sub_id);
if (err < VMCI_SUCCESS) {
err = vmci_transport_error_to_vsock_error(err);
goto destroy;
@@ -1496,7 +1457,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size =
pkt->u.size;
- vmci_trans(vsk)->attach_sub_id = attach_sub_id;
vmci_trans(vsk)->detach_sub_id = detach_sub_id;
vmci_trans(vsk)->notify_ops->process_negotiate(sk);
@@ -1504,9 +1464,6 @@ static int vmci_transport_recv_connecting_client_negotiate(
return 0;
destroy:
- if (attach_sub_id != VMCI_INVALID_ID)
- vmci_event_unsubscribe(attach_sub_id);
-
if (detach_sub_id != VMCI_INVALID_ID)
vmci_event_unsubscribe(detach_sub_id);
@@ -1607,9 +1564,11 @@ static int vmci_transport_socket_init(struct vsock_sock *vsk,
vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
vmci_trans(vsk)->qpair = NULL;
vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0;
- vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id =
- VMCI_INVALID_ID;
+ vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
vmci_trans(vsk)->notify_ops = NULL;
+ INIT_LIST_HEAD(&vmci_trans(vsk)->elem);
+ vmci_trans(vsk)->sk = &vsk->sk;
+ spin_lock_init(&vmci_trans(vsk)->lock);
if (psk) {
vmci_trans(vsk)->queue_pair_size =
vmci_trans(psk)->queue_pair_size;
@@ -1629,29 +1588,57 @@ static int vmci_transport_socket_init(struct vsock_sock *vsk,
return 0;
}
-static void vmci_transport_destruct(struct vsock_sock *vsk)
+static void vmci_transport_free_resources(struct list_head *transport_list)
{
- if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) {
- vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id);
- vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID;
- }
+ while (!list_empty(transport_list)) {
+ struct vmci_transport *transport =
+ list_first_entry(transport_list, struct vmci_transport,
+ elem);
+ list_del(&transport->elem);
- if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
- vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id);
- vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
- }
+ if (transport->detach_sub_id != VMCI_INVALID_ID) {
+ vmci_event_unsubscribe(transport->detach_sub_id);
+ transport->detach_sub_id = VMCI_INVALID_ID;
+ }
- if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
- vmci_qpair_detach(&vmci_trans(vsk)->qpair);
- vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
- vmci_trans(vsk)->produce_size = 0;
- vmci_trans(vsk)->consume_size = 0;
+ if (!vmci_handle_is_invalid(transport->qp_handle)) {
+ vmci_qpair_detach(&transport->qpair);
+ transport->qp_handle = VMCI_INVALID_HANDLE;
+ transport->produce_size = 0;
+ transport->consume_size = 0;
+ }
+
+ kfree(transport);
}
+}
+
+static void vmci_transport_cleanup(struct work_struct *work)
+{
+ LIST_HEAD(pending);
+
+ spin_lock_bh(&vmci_transport_cleanup_lock);
+ list_replace_init(&vmci_transport_cleanup_list, &pending);
+ spin_unlock_bh(&vmci_transport_cleanup_lock);
+ vmci_transport_free_resources(&pending);
+}
+
+static void vmci_transport_destruct(struct vsock_sock *vsk)
+{
+ /* Ensure that the detach callback doesn't use the sk/vsk
+ * we are about to destruct.
+ */
+ spin_lock_bh(&vmci_trans(vsk)->lock);
+ vmci_trans(vsk)->sk = NULL;
+ spin_unlock_bh(&vmci_trans(vsk)->lock);
if (vmci_trans(vsk)->notify_ops)
vmci_trans(vsk)->notify_ops->socket_destruct(vsk);
- kfree(vsk->trans);
+ spin_lock_bh(&vmci_transport_cleanup_lock);
+ list_add(&vmci_trans(vsk)->elem, &vmci_transport_cleanup_list);
+ spin_unlock_bh(&vmci_transport_cleanup_lock);
+ schedule_work(&vmci_transport_cleanup_work);
+
vsk->trans = NULL;
}
@@ -2146,6 +2133,9 @@ module_init(vmci_transport_init);
static void __exit vmci_transport_exit(void)
{
+ cancel_work_sync(&vmci_transport_cleanup_work);
+ vmci_transport_free_resources(&vmci_transport_cleanup_list);
+
if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) {
if (vmci_datagram_destroy_handle(
vmci_transport_stream_handle) != VMCI_SUCCESS)
@@ -2164,6 +2154,7 @@ module_exit(vmci_transport_exit);
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
+MODULE_VERSION("1.0.2.0-k");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("vmware_vsock");
MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index ce6c962..2ad46f3 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -119,10 +119,12 @@ struct vmci_transport {
u64 queue_pair_size;
u64 queue_pair_min_size;
u64 queue_pair_max_size;
- u32 attach_sub_id;
u32 detach_sub_id;
union vmci_transport_notify notify;
struct vmci_transport_notify_ops *notify_ops;
+ struct list_head elem;
+ struct sock *sk;
+ spinlock_t lock; /* protects sk. */
};
int vmci_transport_register(void);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 60ce701..ad7f5b3 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -330,8 +330,10 @@ resume:
if (x->sel.family == AF_UNSPEC) {
inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
- if (inner_mode == NULL)
+ if (inner_mode == NULL) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
goto drop;
+ }
}
if (inner_mode->input(x, skb)) {
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index a8de9e3..805681a 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -31,6 +31,7 @@
#if IS_ENABLED(CONFIG_IPV6)
#include <linux/in6.h>
#endif
+#include <asm/unaligned.h>
static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
{
@@ -728,7 +729,9 @@ static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
memcpy(&p->sel, &x->sel, sizeof(p->sel));
memcpy(&p->lft, &x->lft, sizeof(p->lft));
memcpy(&p->curlft, &x->curlft, sizeof(p->curlft));
- memcpy(&p->stats, &x->stats, sizeof(p->stats));
+ put_unaligned(x->stats.replay_window, &p->stats.replay_window);
+ put_unaligned(x->stats.replay, &p->stats.replay);
+ put_unaligned(x->stats.integrity_failed, &p->stats.integrity_failed);
memcpy(&p->saddr, &x->props.saddr, sizeof(p->saddr));
p->mode = x->props.mode;
p->replay_window = x->props.replay_window;
@@ -1928,8 +1931,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
struct nlattr *rp = attrs[XFRMA_REPLAY_VAL];
struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL];
struct nlattr *lt = attrs[XFRMA_LTIME_VAL];
+ struct nlattr *et = attrs[XFRMA_ETIMER_THRESH];
+ struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH];
- if (!lt && !rp && !re)
+ if (!lt && !rp && !re && !et && !rt)
return err;
/* pedantic mode - thou shalt sayeth replaceth */
OpenPOWER on IntegriCloud