From 09605cc12c07830659a19b266503795c511a2060 Mon Sep 17 00:00:00 2001 From: Bastian Stender Date: Fri, 13 Nov 2015 11:40:34 +0100 Subject: net ipv4: use preferred log methods Replace printk calls with preferred unconditional log method calls to keep kernel messages clean. Added newline to "too small MTU" message. Signed-off-by: Bastian Stender Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 73 ++++++++++---------------- net/ipv4/netfilter/arp_tables.c | 6 +-- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_nat_snmp_basic.c | 22 ++++---- 4 files changed, 44 insertions(+), 59 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 0bc7412..e86e8a9 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -65,15 +65,6 @@ #include #include -/* Define this to allow debugging output */ -#undef IPCONFIG_DEBUG - -#ifdef IPCONFIG_DEBUG -#define DBG(x) printk x -#else -#define DBG(x) do { } while(0) -#endif - #if defined(CONFIG_IP_PNP_DHCP) #define IPCONFIG_DHCP #endif @@ -227,7 +218,7 @@ static int __init ic_open_devs(void) if (dev->mtu >= 364) able |= IC_BOOTP; else - pr_warn("DHCP/BOOTP: Ignoring device %s, MTU %d too small", + pr_warn("DHCP/BOOTP: Ignoring device %s, MTU %d too small\n", dev->name, dev->mtu); if (!(dev->flags & IFF_NOARP)) able |= IC_RARP; @@ -254,8 +245,8 @@ static int __init ic_open_devs(void) else d->xid = 0; ic_proto_have_if |= able; - DBG(("IP-Config: %s UP (able=%d, xid=%08x)\n", - dev->name, able, d->xid)); + pr_debug("IP-Config: %s UP (able=%d, xid=%08x)\n", + dev->name, able, d->xid); } } @@ -311,7 +302,7 @@ static void __init ic_close_devs(void) next = d->next; dev = d->dev; if (dev != ic_dev && !netdev_uses_dsa(dev)) { - DBG(("IP-Config: Downing %s\n", dev->name)); + pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags); } kfree(d); @@ -464,7 +455,8 @@ static int __init ic_defaults(void) &ic_myaddr); return -1; } - printk("IP-Config: Guessing netmask %pI4\n", &ic_netmask); + pr_notice("IP-Config: Guessing netmask %pI4\n", + &ic_netmask); } return 0; @@ -675,9 +667,7 @@ ic_dhcp_init_options(u8 *options) u8 *e = options; int len; -#ifdef IPCONFIG_DEBUG - printk("DHCP: Sending message type %d\n", mt); -#endif + pr_debug("DHCP: Sending message type %d\n", mt); memcpy(e, ic_bootp_cookie, 4); /* RFC1048 Magic Cookie */ e += 4; @@ -847,7 +837,8 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d else if (dev->type == ARPHRD_FDDI) b->htype = ARPHRD_ETHER; else { - printk("Unknown ARP type 0x%04x for device %s\n", dev->type, dev->name); + pr_warn("Unknown ARP type 0x%04x for device %s\n", dev->type, + dev->name); b->htype = dev->type; /* can cause undefined behavior */ } @@ -904,14 +895,12 @@ static void __init ic_do_bootp_ext(u8 *ext) int i; __be16 mtu; -#ifdef IPCONFIG_DEBUG u8 *c; - printk("DHCP/BOOTP: Got extension %d:",*ext); + pr_debug("DHCP/BOOTP: Got extension %d:", *ext); for (c=ext+2; cyour_ip; ic_servaddr = server_id; -#ifdef IPCONFIG_DEBUG - printk("DHCP: Offered address %pI4 by server %pI4\n", - &ic_myaddr, &b->iph.saddr); -#endif + pr_debug("DHCP: Offered address %pI4 by server %pI4\n", + &ic_myaddr, &b->iph.saddr); /* The DHCP indicated server address takes * precedence over the bootp header one if * they are different. @@ -1254,13 +1239,13 @@ static int __init ic_dynamic(void) (ic_proto_enabled & IC_USE_DHCP) && ic_dhcp_msgtype != DHCPACK) { ic_got_reply = 0; - pr_cont(","); + pr_notice(","); continue; } #endif /* IPCONFIG_DHCP */ if (ic_got_reply) { - pr_cont(" OK\n"); + pr_notice(" OK\n"); break; } @@ -1268,7 +1253,7 @@ static int __init ic_dynamic(void) continue; if (! --retries) { - pr_cont(" timed out!\n"); + pr_notice(" timed out!\n"); break; } @@ -1278,7 +1263,7 @@ static int __init ic_dynamic(void) if (timeout > CONF_TIMEOUT_MAX) timeout = CONF_TIMEOUT_MAX; - pr_cont("."); + pr_notice("."); } #ifdef IPCONFIG_BOOTP @@ -1295,11 +1280,11 @@ static int __init ic_dynamic(void) return -1; } - printk("IP-Config: Got %s answer from %pI4, ", + pr_info("IP-Config: Got %s answer from %pI4, ", ((ic_got_reply & IC_RARP) ? "RARP" - : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), - &ic_addrservaddr); - pr_cont("my address is %pI4\n", &ic_myaddr); + : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), + &ic_addrservaddr); + pr_info("my address is %pI4\n", &ic_myaddr); return 0; } @@ -1426,7 +1411,7 @@ static int __init ip_auto_config(void) if (!ic_enable) return 0; - DBG(("IP-Config: Entered.\n")); + pr_debug("IP-Config: Entered.\n"); #ifdef IPCONFIG_DYNAMIC try_try_again: #endif @@ -1548,8 +1533,8 @@ static int __init ip_auto_config(void) } for (i++; i < CONF_NAMESERVERS_MAX; i++) if (ic_nameservers[i] != NONE) - pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]); - pr_cont("\n"); + pr_info(", nameserver%u=%pI4", i, &ic_nameservers[i]); + pr_info("\n"); #endif /* !SILENT */ return 0; @@ -1585,7 +1570,7 @@ static int __init ic_proto_name(char *name) return 1; *v = 0; if (kstrtou8(client_id, 0, dhcp_client_identifier)) - DBG("DHCP: Invalid client identifier type\n"); + pr_debug("DHCP: Invalid client identifier type\n"); strncpy(dhcp_client_identifier + 1, v + 1, 251); *v = ','; } @@ -1644,7 +1629,7 @@ static int __init ip_auto_config_setup(char *addrs) if ((cp = strchr(ip, ':'))) *cp++ = '\0'; if (strlen(ip) > 0) { - DBG(("IP-Config: Parameter #%d: `%s'\n", num, ip)); + pr_debug("IP-Config: Parameter #%d: `%s'\n", num, ip); switch (num) { case 0: if ((ic_myaddr = in_aton(ip)) == ANY) @@ -1716,7 +1701,7 @@ static int __init vendor_class_identifier_setup(char *addrs) if (strlcpy(vendor_class_identifier, addrs, sizeof(vendor_class_identifier)) >= sizeof(vendor_class_identifier)) - pr_warn("DHCP: vendorclass too long, truncated to \"%s\"", + pr_warn("DHCP: vendorclass too long, truncated to \"%s\"\n", vendor_class_identifier); return 1; } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 11dccba..b488cac 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -38,13 +38,13 @@ MODULE_DESCRIPTION("arptables core"); /*#define DEBUG_ARP_TABLES_USER*/ #ifdef DEBUG_ARP_TABLES -#define dprintf(format, args...) printk(format , ## args) +#define dprintf(format, args...) pr_debug(format, ## args) #else #define dprintf(format, args...) #endif #ifdef DEBUG_ARP_TABLES_USER -#define duprintf(format, args...) printk(format , ## args) +#define duprintf(format, args...) pr_debug(format, ## args) #else #define duprintf(format, args...) #endif @@ -1905,7 +1905,7 @@ static int __init arp_tables_init(void) if (ret < 0) goto err4; - printk(KERN_INFO "arp_tables: (C) 2002 David S. Miller\n"); + pr_info("arp_tables: (C) 2002 David S. Miller\n"); return 0; err4: diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 461ca92..e3c46e8 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -451,7 +451,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) ret = nf_register_sockopt(&so_getorigdst); if (ret < 0) { - printk(KERN_ERR "Unable to register netfilter socket option\n"); + pr_err("Unable to register netfilter socket option\n"); return ret; } diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index ddb894a..c9b52c3 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1048,7 +1048,7 @@ static int snmp_parse_mangle(unsigned char *msg, if (!asn1_uint_decode (&ctx, end, &vers)) return 0; if (debug > 1) - printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1); + pr_debug("bsalg: snmp version: %u\n", vers + 1); if (vers > 1) return 1; @@ -1064,10 +1064,10 @@ static int snmp_parse_mangle(unsigned char *msg, if (debug > 1) { unsigned int i; - printk(KERN_DEBUG "bsalg: community: "); + pr_debug("bsalg: community: "); for (i = 0; i < comm.len; i++) - printk("%c", comm.data[i]); - printk("\n"); + pr_cont("%c", comm.data[i]); + pr_cont("\n"); } kfree(comm.data); @@ -1091,9 +1091,9 @@ static int snmp_parse_mangle(unsigned char *msg, }; if (pdutype > SNMP_PDU_TRAP2) - printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype); + pr_debug("bsalg: bad pdu type %u\n", pdutype); else - printk(KERN_DEBUG "bsalg: pdu: %s\n", pdus[pdutype]); + pr_debug("bsalg: pdu: %s\n", pdus[pdutype]); } if (pdutype != SNMP_PDU_RESPONSE && pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2) @@ -1119,7 +1119,7 @@ static int snmp_parse_mangle(unsigned char *msg, return 0; if (debug > 1) - printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u " + pr_debug("bsalg: request: id=0x%lx error_status=%u " "error_index=%u\n", req.id, req.error_status, req.error_index); } @@ -1145,13 +1145,13 @@ static int snmp_parse_mangle(unsigned char *msg, } if (debug > 1) { - printk(KERN_DEBUG "bsalg: object: "); + pr_debug("bsalg: object: "); for (i = 0; i < obj->id_len; i++) { if (i > 0) - printk("."); - printk("%lu", obj->id[i]); + pr_cont("."); + pr_cont("%lu", obj->id[i]); } - printk(": type=%u\n", obj->type); + pr_cont(": type=%u\n", obj->type); } -- cgit v1.1 From 52bd2d62ce6758d811edcbd2256eb9ea7f6a56cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:30:50 -0800 Subject: net: better skb->sender_cpu and skb->napi_id cohabitation skb->sender_cpu and skb->napi_id share a common storage, and we had various bugs about this. We had to call skb_sender_cpu_clear() in some places to not leave a prior skb->napi_id and fool netdev_pick_tx() As suggested by Alexei, we could split the space so that these errors can not happen. 0 value being reserved as the common (not initialized) value, let's reserve [1 .. NR_CPUS] range for valid sender_cpu, and [NR_CPUS+1 .. ~0U] for valid napi_id. This will allow proper busy polling support over tunnels. Signed-off-by: Eric Dumazet Suggested-by: Alexei Starovoitov Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/dev.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index ae00b89..2582c24 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -182,7 +182,7 @@ EXPORT_SYMBOL(dev_base_lock); /* protects napi_hash addition/deletion and napi_gen_id */ static DEFINE_SPINLOCK(napi_hash_lock); -static unsigned int napi_gen_id; +static unsigned int napi_gen_id = NR_CPUS; static DEFINE_HASHTABLE(napi_hash, 8); static seqcount_t devnet_rename_seq; @@ -3021,7 +3021,9 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, int queue_index = 0; #ifdef CONFIG_XPS - if (skb->sender_cpu == 0) + u32 sender_cpu = skb->sender_cpu - 1; + + if (sender_cpu >= (u32)NR_CPUS) skb->sender_cpu = raw_smp_processor_id() + 1; #endif @@ -4676,25 +4678,22 @@ EXPORT_SYMBOL_GPL(napi_by_id); void napi_hash_add(struct napi_struct *napi) { - if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) { + if (test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) + return; - spin_lock(&napi_hash_lock); + spin_lock(&napi_hash_lock); - /* 0 is not a valid id, we also skip an id that is taken - * we expect both events to be extremely rare - */ - napi->napi_id = 0; - while (!napi->napi_id) { - napi->napi_id = ++napi_gen_id; - if (napi_by_id(napi->napi_id)) - napi->napi_id = 0; - } + /* 0..NR_CPUS+1 range is reserved for sender_cpu use */ + do { + if (unlikely(++napi_gen_id < NR_CPUS + 1)) + napi_gen_id = NR_CPUS + 1; + } while (napi_by_id(napi_gen_id)); + napi->napi_id = napi_gen_id; - hlist_add_head_rcu(&napi->napi_hash_node, - &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); + hlist_add_head_rcu(&napi->napi_hash_node, + &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); - spin_unlock(&napi_hash_lock); - } + spin_unlock(&napi_hash_lock); } EXPORT_SYMBOL_GPL(napi_hash_add); -- cgit v1.1 From 02d62e86fe892c59a1259d089d4d16ac76977a37 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:30:52 -0800 Subject: net: un-inline sk_busy_loop() There is really little gain from inlining this big function. We'll soon make it even bigger in following patches. This means we no longer need to export napi_by_id() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 2582c24..74a816b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -96,6 +96,7 @@ #include #include #include +#include #include #include #include @@ -4663,7 +4664,7 @@ void napi_complete_done(struct napi_struct *n, int work_done) EXPORT_SYMBOL(napi_complete_done); /* must be called under rcu_read_lock(), as we dont take a reference */ -struct napi_struct *napi_by_id(unsigned int napi_id) +static struct napi_struct *napi_by_id(unsigned int napi_id) { unsigned int hash = napi_id % HASH_SIZE(napi_hash); struct napi_struct *napi; @@ -4674,7 +4675,52 @@ struct napi_struct *napi_by_id(unsigned int napi_id) return NULL; } -EXPORT_SYMBOL_GPL(napi_by_id); + +#if defined(CONFIG_NET_RX_BUSY_POLL) +bool sk_busy_loop(struct sock *sk, int nonblock) +{ + unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; + const struct net_device_ops *ops; + struct napi_struct *napi; + int rc = false; + + /* + * rcu read lock for napi hash + * bh so we don't race with net_rx_action + */ + rcu_read_lock_bh(); + + napi = napi_by_id(sk->sk_napi_id); + if (!napi) + goto out; + + ops = napi->dev->netdev_ops; + if (!ops->ndo_busy_poll) + goto out; + + do { + rc = ops->ndo_busy_poll(napi); + + if (rc == LL_FLUSH_FAILED) + break; /* permanent failure */ + + if (rc > 0) + /* local bh are disabled so it is ok to use _BH */ + NET_ADD_STATS_BH(sock_net(sk), + LINUX_MIB_BUSYPOLLRXPACKETS, rc); + cpu_relax(); + + } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && + !need_resched() && !busy_loop_timeout(end_time)); + + rc = !skb_queue_empty(&sk->sk_receive_queue); +out: + rcu_read_unlock_bh(); + return rc; +} +EXPORT_SYMBOL(sk_busy_loop); + +#endif /* CONFIG_NET_RX_BUSY_POLL */ void napi_hash_add(struct napi_struct *napi) { -- cgit v1.1 From 2a028ecb76497d05e5cd4e3e8b09d965cac2e3f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:30:53 -0800 Subject: net: allow BH servicing in sk_busy_loop() Instead of blocking BH in whole sk_busy_loop(), block them only around ->ndo_busy_poll() calls. This has many benefits. 1) allow tunneled traffic to use busy poll as well as native traffic. Tunnels handlers usually call netif_rx() and depend on net_rx_action() being run (from sofirq handler) 2) allow RFS/RPS being used (sending IPI to other cpus if needed) 3) use the 'lets burn cpu cycles' budget to do useful work (like TX completions, timers, RCU callbacks...) 4) reduce BH latencies, making busy poll a better citizen. Tested: Tested with SIT tunnel lpaa5:~# echo 0 >/proc/sys/net/core/busy_read lpaa5:~# ./netperf -H 2002:af6:786::1 -t TCP_RR MIGRATED TCP REQUEST/RESPONSE TEST from ::0 (::) port 0 AF_INET6 to 2002:af6:786::1 () port 0 AF_INET6 : first burst 0 Local /Remote Socket Size Request Resp. Elapsed Trans. Send Recv Size Size Time Rate bytes Bytes bytes bytes secs. per sec 16384 87380 1 1 10.00 37373.93 16384 87380 Now enable busy poll on both hosts lpaa5:~# echo 70 >/proc/sys/net/core/busy_read lpaa6:~# echo 70 >/proc/sys/net/core/busy_read lpaa5:~# ./netperf -H 2002:af6:786::1 -t TCP_RR MIGRATED TCP REQUEST/RESPONSE TEST from ::0 (::) port 0 AF_INET6 to 2002:af6:786::1 () port 0 AF_INET6 : first burst 0 Local /Remote Socket Size Request Resp. Elapsed Trans. Send Recv Size Size Time Rate bytes Bytes bytes bytes secs. per sec 16384 87380 1 1 10.00 58314.77 16384 87380 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 74a816b..2002eec 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4684,11 +4684,7 @@ bool sk_busy_loop(struct sock *sk, int nonblock) struct napi_struct *napi; int rc = false; - /* - * rcu read lock for napi hash - * bh so we don't race with net_rx_action - */ - rcu_read_lock_bh(); + rcu_read_lock(); napi = napi_by_id(sk->sk_napi_id); if (!napi) @@ -4699,23 +4695,23 @@ bool sk_busy_loop(struct sock *sk, int nonblock) goto out; do { + local_bh_disable(); rc = ops->ndo_busy_poll(napi); + if (rc > 0) + NET_ADD_STATS_BH(sock_net(sk), + LINUX_MIB_BUSYPOLLRXPACKETS, rc); + local_bh_enable(); if (rc == LL_FLUSH_FAILED) break; /* permanent failure */ - if (rc > 0) - /* local bh are disabled so it is ok to use _BH */ - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_BUSYPOLLRXPACKETS, rc); cpu_relax(); - } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && !need_resched() && !busy_loop_timeout(end_time)); rc = !skb_queue_empty(&sk->sk_receive_queue); out: - rcu_read_unlock_bh(); + rcu_read_unlock(); return rc; } EXPORT_SYMBOL(sk_busy_loop); -- cgit v1.1 From ce6aea93f7510437dde625b77a7a2f4d20b72660 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:30:54 -0800 Subject: net: network drivers no longer need to implement ndo_busy_poll() Instead of having to implement complex ndo_busy_poll() method, drivers can simply rely on NAPI poll logic. Busy polling gains are mainly coming from polling itself, not on exact details on how we poll the device. ndo_busy_poll() if implemented can avoid touching napi state, but it adds extra synchronization between normal napi->poll() and busy poll handler, slowing down the common path (non busy polling) with extra atomic operations. In practice few drivers ever got busy poll because of the complexity. We could go one step further, and make busy polling available for all NAPI drivers, but this would require that all netif_napi_del() calls are done in process context so that we can call synchronize_rcu(). Full audit would be required. Before this is done, a driver still needs to call : - skb_mark_napi_id() for each skb provided to the stack. - napi_hash_add() and napi_hash_del() to allocate a napi_id per napi struct. - Make sure RCU grace period is respected after napi_hash_del() before memory containing napi structure is freed. Followup patch implements busy poll for mlx5 driver as an example. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 2002eec..9300961 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4677,10 +4677,11 @@ static struct napi_struct *napi_by_id(unsigned int napi_id) } #if defined(CONFIG_NET_RX_BUSY_POLL) +#define BUSY_POLL_BUDGET 8 bool sk_busy_loop(struct sock *sk, int nonblock) { unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; - const struct net_device_ops *ops; + int (*busy_poll)(struct napi_struct *dev); struct napi_struct *napi; int rc = false; @@ -4690,13 +4691,27 @@ bool sk_busy_loop(struct sock *sk, int nonblock) if (!napi) goto out; - ops = napi->dev->netdev_ops; - if (!ops->ndo_busy_poll) - goto out; + /* Note: ndo_busy_poll method is optional in linux-4.5 */ + busy_poll = napi->dev->netdev_ops->ndo_busy_poll; do { + rc = 0; local_bh_disable(); - rc = ops->ndo_busy_poll(napi); + if (busy_poll) { + rc = busy_poll(napi); + } else if (napi_schedule_prep(napi)) { + void *have = netpoll_poll_lock(napi); + + if (test_bit(NAPI_STATE_SCHED, &napi->state)) { + rc = napi->poll(napi, BUSY_POLL_BUDGET); + trace_napi_poll(napi); + if (rc == BUSY_POLL_BUDGET) { + napi_complete_done(napi, rc); + napi_schedule(napi); + } + } + netpoll_poll_unlock(have); + } if (rc > 0) NET_ADD_STATS_BH(sock_net(sk), LINUX_MIB_BUSYPOLLRXPACKETS, rc); -- cgit v1.1 From 93f93a4404159ecf7e9148f5ad0718ec702ac4cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:30:59 -0800 Subject: net: move skb_mark_napi_id() into core networking stack We would like to automatically provide busy polling support to all NAPI drivers, without them having to implement anything. skb_mark_napi_id() can be called from napi_gro_receive() and napi_get_frags(). Few drivers are still calling skb_mark_napi_id() because they use netif_receive_skb(). They should eventually call napi_gro_receive() instead. I will leave this to drivers maintainers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 9300961..83b4874 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4356,6 +4356,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { + skb_mark_napi_id(skb, napi); trace_napi_gro_receive_entry(skb); skb_gro_reset_offset(skb); @@ -4390,6 +4391,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) if (!skb) { skb = napi_alloc_skb(napi, GRO_MAX_HEAD); napi->skb = skb; + skb_mark_napi_id(skb, napi); } return skb; } -- cgit v1.1 From d64b5e85bfe2fe4c790abcbd16d9ae32391ddd7e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:31:00 -0800 Subject: net: add netif_tx_napi_add() netif_tx_napi_add() is a variant of netif_napi_add() It should be used by drivers that use a napi structure to exclusively poll TX. We do not want to add this kind of napi in napi_hash[] in following patches, adding generic busy polling to all NAPI drivers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 83b4874..ff58a8b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4737,7 +4737,8 @@ EXPORT_SYMBOL(sk_busy_loop); void napi_hash_add(struct napi_struct *napi) { - if (test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) + if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) || + test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) return; spin_lock(&napi_hash_lock); -- cgit v1.1 From 6180d9de61a5c461f9e3efef5417a844701dbbb2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:31:01 -0800 Subject: net: move napi_hash[] into read mostly section We do not often add/delete a napi context. Moving napi_hash[] into read mostly section avoids potential false sharing. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index ff58a8b..02dfbd9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -184,7 +184,7 @@ EXPORT_SYMBOL(dev_base_lock); static DEFINE_SPINLOCK(napi_hash_lock); static unsigned int napi_gen_id = NR_CPUS; -static DEFINE_HASHTABLE(napi_hash, 8); +static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8); static seqcount_t devnet_rename_seq; -- cgit v1.1 From 34cbe27e811c591c854a39c0dee1b461bb796953 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:31:02 -0800 Subject: net: napi_hash_del() returns a boolean status napi_hash_del() will soon be used from both drivers (if they want) or core networking stack. Callers are responsibles to ensure an RCU grace period is respected before freeing napi structure : napi_hash_del() can signal if this RCU grace period is needed or not. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 02dfbd9..59dddac 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4760,14 +4760,18 @@ EXPORT_SYMBOL_GPL(napi_hash_add); /* Warning : caller is responsible to make sure rcu grace period * is respected before freeing memory containing @napi */ -void napi_hash_del(struct napi_struct *napi) +bool napi_hash_del(struct napi_struct *napi) { + bool rcu_sync_needed = false; + spin_lock(&napi_hash_lock); - if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) + if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) { + rcu_sync_needed = true; hlist_del_rcu(&napi->napi_hash_node); - + } spin_unlock(&napi_hash_lock); + return rcu_sync_needed; } EXPORT_SYMBOL_GPL(napi_hash_del); -- cgit v1.1 From 93d05d4a320cb16712bb3d57a9658f395d8cecb9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:31:03 -0800 Subject: net: provide generic busy polling to all NAPI drivers NAPI drivers no longer need to observe a particular protocol to benefit from busy polling (CONFIG_NET_RX_BUSY_POLL=y) napi_hash_add() and napi_hash_del() are automatically called from core networking stack, respectively from netif_napi_add() and netif_napi_del() This patch depends on free_netdev() and netif_napi_del() being called from process context, which seems to be the norm. Drivers might still prefer to call napi_hash_del() on their own, since they might combine all the rcu grace periods into a single one, knowing their NAPI structures lifetime, while core networking stack has no idea of a possible combining. Once this patch proves to not bring serious regressions, we will cleanup drivers to either remove napi_hash_del() or provide appropriate rcu grace periods combining. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 59dddac..41cef3e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4807,6 +4807,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, napi->poll_owner = -1; #endif set_bit(NAPI_STATE_SCHED, &napi->state); + napi_hash_add(napi); } EXPORT_SYMBOL(netif_napi_add); @@ -4826,8 +4827,12 @@ void napi_disable(struct napi_struct *n) } EXPORT_SYMBOL(napi_disable); +/* Must be called in process context */ void netif_napi_del(struct napi_struct *napi) { + might_sleep(); + if (napi_hash_del(napi)) + synchronize_net(); list_del_init(&napi->dev_list); napi_free_frags(napi); @@ -7227,11 +7232,13 @@ EXPORT_SYMBOL(alloc_netdev_mqs); * This function does the last stage of destroying an allocated device * interface. The reference to the device object is released. * If this is the last reference then it will be freed. + * Must be called in process context. */ void free_netdev(struct net_device *dev) { struct napi_struct *p, *n; + might_sleep(); netif_free_tx_queues(dev); #ifdef CONFIG_SYSFS kvfree(dev->_rx); -- cgit v1.1 From 70f56aa2ee7142a53a8c5285a685c55987a1a990 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 1 Nov 2015 09:39:49 +0100 Subject: Bluetooth: Move BR/EDR default events behind its features There are some BR/EDR default events for Bluetooth 1.2 or later controllers that are not conditional on their features being present. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 62edbf1..db42365 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -531,10 +531,6 @@ static void hci_setup_event_mask(struct hci_request *req) if (lmp_bredr_capable(hdev)) { events[4] |= 0x01; /* Flow Specification Complete */ - events[4] |= 0x02; /* Inquiry Result with RSSI */ - events[4] |= 0x04; /* Read Remote Extended Features Complete */ - events[5] |= 0x08; /* Synchronous Connection Complete */ - events[5] |= 0x10; /* Synchronous Connection Changed */ } else { /* Use a different default for LE-only devices */ memset(events, 0, sizeof(events)); @@ -555,6 +551,14 @@ static void hci_setup_event_mask(struct hci_request *req) if (lmp_inq_rssi_capable(hdev)) events[4] |= 0x02; /* Inquiry Result with RSSI */ + if (lmp_ext_feat_capable(hdev)) + events[4] |= 0x04; /* Read Remote Extended Features Complete */ + + if (lmp_esco_capable(hdev)) { + events[5] |= 0x08; /* Synchronous Connection Complete */ + events[5] |= 0x10; /* Synchronous Connection Changed */ + } + if (lmp_sniffsubr_capable(hdev)) events[5] |= 0x20; /* Sniff Subrating */ -- cgit v1.1 From 7d26f5c4be620a384c3c9c7590cae2828d50626f Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 1 Nov 2015 09:39:51 +0100 Subject: Bluetooth: Build LE event mask based on supported commands The LE event mask should be created based on the commands that are actually supported by the controller. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index db42365..ea95075 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -781,7 +781,6 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) u8 events[8]; memset(events, 0, sizeof(events)); - events[0] = 0x0f; if (hdev->le_features[0] & HCI_LE_ENCRYPTION) events[0] |= 0x10; /* LE Long Term Key Request */ @@ -808,6 +807,34 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) * Report */ + /* If the controller supports the LE Set Scan Enable command, + * enable the corresponding advertising report event. + */ + if (hdev->commands[26] & 0x08) + events[0] |= 0x02; /* LE Advertising Report */ + + /* If the controller supports the LE Create Connection + * command, enable the corresponding event. + */ + if (hdev->commands[26] & 0x10) + events[0] |= 0x01; /* LE Connection Complete */ + + /* If the controller supports the LE Connection Update + * command, enable the corresponding event. + */ + if (hdev->commands[27] & 0x04) + events[0] |= 0x04; /* LE Connection Update + * Complete + */ + + /* If the controller supports the LE Read Remote Used Features + * command, enable the corresponding event. + */ + if (hdev->commands[27] & 0x20) + events[0] |= 0x08; /* LE Read Remote Used + * Features Complete + */ + /* If the controller supports the LE Read Local P-256 * Public Key command, enable the corresponding event. */ -- cgit v1.1 From 9fe759ceedcdc0c43234382425a158c3f31e6909 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 1 Nov 2015 09:45:22 +0100 Subject: Bluetooth: Fix issue with HCI_QUIRK_FIXUP_INQUIRY_MODE and event mask When setting the event mask, the HCI_QUIRK_FIXUP_INQUIRY_MODE quirk is required to be checked so that the Inquiry Result with RSSI event gets actually enabled. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ea95075..556c173 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -548,7 +548,8 @@ static void hci_setup_event_mask(struct hci_request *req) } } - if (lmp_inq_rssi_capable(hdev)) + if (lmp_inq_rssi_capable(hdev) || + test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks)) events[4] |= 0x02; /* Inquiry Result with RSSI */ if (lmp_ext_feat_capable(hdev)) -- cgit v1.1 From 5c3d3b4c4f3df584a90301b944580bf4c1974f12 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 4 Nov 2015 07:17:23 +0100 Subject: Bluetooth: Make LE only events conditional on supported commands For the LE only controllers, there are events that should not be enabled if the corresponding command is not supported. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 556c173..97734ca 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -534,13 +534,27 @@ static void hci_setup_event_mask(struct hci_request *req) } else { /* Use a different default for LE-only devices */ memset(events, 0, sizeof(events)); - events[0] |= 0x10; /* Disconnection Complete */ - events[1] |= 0x08; /* Read Remote Version Information Complete */ events[1] |= 0x20; /* Command Complete */ events[1] |= 0x40; /* Command Status */ events[1] |= 0x80; /* Hardware Error */ - events[2] |= 0x04; /* Number of Completed Packets */ - events[3] |= 0x02; /* Data Buffer Overflow */ + + /* If the controller supports the Disconnect command, enable + * the corresponding event. In addition enable packet flow + * control related events. + */ + if (hdev->commands[0] & 0x20) { + events[0] |= 0x10; /* Disconnection Complete */ + events[2] |= 0x04; /* Number of Completed Packets */ + events[3] |= 0x02; /* Data Buffer Overflow */ + } + + /* If the controller supports the Read Remote Version + * Information command, enable the corresponding event. + */ + if (hdev->commands[2] & 0x80) + events[1] |= 0x08; /* Read Remote Version Information + * Complete + */ if (hdev->le_features[0] & HCI_LE_ENCRYPTION) { events[0] |= 0x80; /* Encryption Change */ -- cgit v1.1 From d79f34e32b833cb8651dfd4209d36cf99c89d1d3 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 5 Nov 2015 07:10:00 +0100 Subject: Bluetooth: Use new hci_skb_pkt_* wrappers for core packet handling The new hci_skb_pkt_* wrappers only help if they are used consistently in the Bluetooth subsystem. So first convert the core packet handling. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 23 ++++++++++++----------- net/bluetooth/hci_request.c | 4 ++-- net/bluetooth/hci_sock.c | 38 +++++++++++++++++++------------------- 3 files changed, 33 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 97734ca..db26cbd 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3566,7 +3566,7 @@ int hci_reset_dev(struct hci_dev *hdev) if (!skb) return -ENOMEM; - bt_cb(skb)->pkt_type = HCI_EVENT_PKT; + hci_skb_pkt_type(skb) = HCI_EVENT_PKT; memcpy(skb_put(skb, 3), hw_err, 3); /* Send Hardware Error to upper stack */ @@ -3583,9 +3583,9 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb) return -ENXIO; } - if (bt_cb(skb)->pkt_type != HCI_EVENT_PKT && - bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && - bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) { + if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT && + hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && + hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) { kfree_skb(skb); return -EINVAL; } @@ -3607,7 +3607,7 @@ EXPORT_SYMBOL(hci_recv_frame); int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb) { /* Mark as diagnostic packet */ - bt_cb(skb)->pkt_type = HCI_DIAG_PKT; + hci_skb_pkt_type(skb) = HCI_DIAG_PKT; /* Time stamp */ __net_timestamp(skb); @@ -3649,7 +3649,8 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) { int err; - BT_DBG("%s type %d len %d", hdev->name, bt_cb(skb)->pkt_type, skb->len); + BT_DBG("%s type %d len %d", hdev->name, hci_skb_pkt_type(skb), + skb->len); /* Time stamp */ __net_timestamp(skb); @@ -3762,7 +3763,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue, skb->len = skb_headlen(skb); skb->data_len = 0; - bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; + hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT; switch (hdev->dev_type) { case HCI_BREDR: @@ -3802,7 +3803,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue, do { skb = list; list = list->next; - bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; + hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT; hci_add_acl_hdr(skb, conn->handle, flags); BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); @@ -3840,7 +3841,7 @@ void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb) skb_reset_transport_header(skb); memcpy(skb_transport_header(skb), &hdr, HCI_SCO_HDR_SIZE); - bt_cb(skb)->pkt_type = HCI_SCODATA_PKT; + hci_skb_pkt_type(skb) = HCI_SCODATA_PKT; skb_queue_tail(&conn->data_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); @@ -4499,7 +4500,7 @@ static void hci_rx_work(struct work_struct *work) if (test_bit(HCI_INIT, &hdev->flags)) { /* Don't process data packets in this states. */ - switch (bt_cb(skb)->pkt_type) { + switch (hci_skb_pkt_type(skb)) { case HCI_ACLDATA_PKT: case HCI_SCODATA_PKT: kfree_skb(skb); @@ -4508,7 +4509,7 @@ static void hci_rx_work(struct work_struct *work) } /* Process frame */ - switch (bt_cb(skb)->pkt_type) { + switch (hci_skb_pkt_type(skb)) { case HCI_EVENT_PKT: BT_DBG("%s Event packet", hdev->name); hci_event_packet(hdev, skb); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 981f8a2..bdb17099 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -98,8 +98,8 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, BT_DBG("skb len %d", skb->len); - bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; - bt_cb(skb)->hci.opcode = opcode; + hci_skb_pkt_type(skb) = HCI_COMMAND_PKT; + hci_skb_opcode(skb) = opcode; return skb; } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index b1eb8c0..235ad0f 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -120,13 +120,13 @@ static bool is_filtered_packet(struct sock *sk, struct sk_buff *skb) /* Apply filter */ flt = &hci_pi(sk)->filter; - flt_type = bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS; + flt_type = hci_skb_pkt_type(skb) & HCI_FLT_TYPE_BITS; if (!test_bit(flt_type, &flt->type_mask)) return true; /* Extra filter for event packets only */ - if (bt_cb(skb)->pkt_type != HCI_EVENT_PKT) + if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT) return false; flt_event = (*(__u8 *)skb->data & HCI_FLT_EVENT_BITS); @@ -170,19 +170,19 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) continue; if (hci_pi(sk)->channel == HCI_CHANNEL_RAW) { - if (bt_cb(skb)->pkt_type != HCI_COMMAND_PKT && - bt_cb(skb)->pkt_type != HCI_EVENT_PKT && - bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && - bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) + if (hci_skb_pkt_type(skb) != HCI_COMMAND_PKT && + hci_skb_pkt_type(skb) != HCI_EVENT_PKT && + hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && + hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) continue; if (is_filtered_packet(sk, skb)) continue; } else if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { if (!bt_cb(skb)->incoming) continue; - if (bt_cb(skb)->pkt_type != HCI_EVENT_PKT && - bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && - bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) + if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT && + hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && + hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) continue; } else { /* Don't send frame to other channel types */ @@ -196,7 +196,7 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) continue; /* Put type byte before the data */ - memcpy(skb_push(skb_copy, 1), &bt_cb(skb)->pkt_type, 1); + memcpy(skb_push(skb_copy, 1), &hci_skb_pkt_type(skb), 1); } nskb = skb_clone(skb_copy, GFP_ATOMIC); @@ -262,7 +262,7 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("hdev %p len %d", hdev, skb->len); - switch (bt_cb(skb)->pkt_type) { + switch (hci_skb_pkt_type(skb)) { case HCI_COMMAND_PKT: opcode = cpu_to_le16(HCI_MON_COMMAND_PKT); break; @@ -447,7 +447,7 @@ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) bt_cb(skb)->incoming = 1; __net_timestamp(skb); - bt_cb(skb)->pkt_type = HCI_EVENT_PKT; + hci_skb_pkt_type(skb) = HCI_EVENT_PKT; hci_send_to_sock(hdev, skb); kfree_skb(skb); } @@ -1211,7 +1211,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, goto drop; } - bt_cb(skb)->pkt_type = *((unsigned char *) skb->data); + hci_skb_pkt_type(skb) = *((unsigned char *) skb->data); skb_pull(skb, 1); if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { @@ -1220,16 +1220,16 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, * * However check that the packet type is valid. */ - if (bt_cb(skb)->pkt_type != HCI_COMMAND_PKT && - bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && - bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) { + if (hci_skb_pkt_type(skb) != HCI_COMMAND_PKT && + hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && + hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) { err = -EINVAL; goto drop; } skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); - } else if (bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { + } else if (hci_skb_pkt_type(skb) == HCI_COMMAND_PKT) { u16 opcode = get_unaligned_le16(skb->data); u16 ogf = hci_opcode_ogf(opcode); u16 ocf = hci_opcode_ocf(opcode); @@ -1260,8 +1260,8 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, goto drop; } - if (bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && - bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) { + if (hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && + hci_skb_pkt_type(skb) != HCI_SCODATA_PKT) { err = -EINVAL; goto drop; } -- cgit v1.1 From 44d271377479c4d4fe7f2d07d188656684773fbd Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 5 Nov 2015 09:31:40 +0200 Subject: Bluetooth: Compress the size of struct hci_ctrl We can reduce the size of the hci_ctrl struct by converting 'bool req_start' to 'u8 req_flags' and making the two function pointers a union (since only one is ever set at a time). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 14 +++++++------- net/bluetooth/hci_request.c | 10 +++++++--- net/bluetooth/hci_sock.c | 2 +- 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index db26cbd..bc97fc6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3695,7 +3695,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, /* Stand-alone HCI commands must be flagged as * single-command requests. */ - bt_cb(skb)->hci.req_start = true; + bt_cb(skb)->hci.req_flags |= HCI_REQ_START; skb_queue_tail(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); @@ -4392,7 +4392,7 @@ static bool hci_req_is_complete(struct hci_dev *hdev) if (!skb) return true; - return bt_cb(skb)->hci.req_start; + return (bt_cb(skb)->hci.req_flags & HCI_REQ_START); } static void hci_resend_last(struct hci_dev *hdev) @@ -4452,20 +4452,20 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, * callback would be found in hdev->sent_cmd instead of the * command queue (hdev->cmd_q). */ - if (bt_cb(hdev->sent_cmd)->hci.req_complete) { - *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete; + if (bt_cb(hdev->sent_cmd)->hci.req_flags & HCI_REQ_SKB) { + *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb; return; } - if (bt_cb(hdev->sent_cmd)->hci.req_complete_skb) { - *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb; + if (bt_cb(hdev->sent_cmd)->hci.req_complete) { + *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete; return; } /* Remove all pending commands belonging to this request */ spin_lock_irqsave(&hdev->cmd_q.lock, flags); while ((skb = __skb_dequeue(&hdev->cmd_q))) { - if (bt_cb(skb)->hci.req_start) { + if (bt_cb(skb)->hci.req_flags & HCI_REQ_START) { __skb_queue_head(&hdev->cmd_q, skb); break; } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index bdb17099..5ba27c3 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -56,8 +56,12 @@ static int req_run(struct hci_request *req, hci_req_complete_t complete, return -ENODATA; skb = skb_peek_tail(&req->cmd_q); - bt_cb(skb)->hci.req_complete = complete; - bt_cb(skb)->hci.req_complete_skb = complete_skb; + if (complete) { + bt_cb(skb)->hci.req_complete = complete; + } else if (complete_skb) { + bt_cb(skb)->hci.req_complete_skb = complete_skb; + bt_cb(skb)->hci.req_flags |= HCI_REQ_SKB; + } spin_lock_irqsave(&hdev->cmd_q.lock, flags); skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); @@ -128,7 +132,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, } if (skb_queue_empty(&req->cmd_q)) - bt_cb(skb)->hci.req_start = true; + bt_cb(skb)->hci.req_flags |= HCI_REQ_START; bt_cb(skb)->hci.req_event = event; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 235ad0f..19b2301 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1249,7 +1249,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, /* Stand-alone HCI commands must be flagged as * single-command requests. */ - bt_cb(skb)->hci.req_start = true; + bt_cb(skb)->hci.req_flags |= HCI_REQ_START; skb_queue_tail(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); -- cgit v1.1 From 1982162bbe20672941897566f2f42d51a306a155 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 6 Nov 2015 07:42:20 +0100 Subject: Bluetooth: Add missing hci_skb_opcode for raw socket commands When HCI commands are injected via the raw socket, the core was not including the decoded opcode value. So ensure that it is actually set. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 19b2301..32caa62 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1242,6 +1242,11 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, goto drop; } + /* Since the opcode has already been extracted here, store + * a copy of the value for later use by the drivers. + */ + hci_skb_opcode(skb) = opcode; + if (ogf == 0x3f) { skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); -- cgit v1.1 From 0ebc181884e8f538c4786840ed4abef828d4dc9b Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 6 Nov 2015 13:35:33 +0200 Subject: Bluetooth: Add clarifying comment why schedule_work is used It's not obvious why schedule_work is used instead of queue_work. Add a comment explaining why. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 85b82f7..fd6120a 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -178,6 +178,10 @@ static void hci_connect_le_scan_remove(struct hci_conn *conn) hci_dev_hold(conn->hdev); hci_conn_get(conn); + /* Even though we hold a reference to the hdev, many other + * things might get cleaned up meanwhile, including the hdev's + * own workqueue, so we can't use that for scheduling. + */ schedule_work(&conn->le_scan_cleanup); } -- cgit v1.1 From 8528d3f738386706a6d2af05d7bdb542594bc95c Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 8 Nov 2015 07:47:11 +0100 Subject: Bluetooth: Fix casting coding style within HCI sockets The HCI sockets code has still some old casting coding style. Fix this to match with the rest of the code. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 32caa62..18a41ea 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -294,7 +294,7 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) return; /* Put header before the data */ - hdr = (void *) skb_push(skb_copy, HCI_MON_HDR_SIZE); + hdr = (void *)skb_push(skb_copy, HCI_MON_HDR_SIZE); hdr->opcode = opcode; hdr->index = cpu_to_le16(hdev->id); hdr->len = cpu_to_le16(skb->len); @@ -375,7 +375,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) __net_timestamp(skb); - hdr = (void *) skb_push(skb, HCI_MON_HDR_SIZE); + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); hdr->opcode = opcode; hdr->index = cpu_to_le16(hdev->id); hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); @@ -436,11 +436,11 @@ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) if (!skb) return; - hdr = (void *) skb_put(skb, HCI_EVENT_HDR_SIZE); + hdr = (void *)skb_put(skb, HCI_EVENT_HDR_SIZE); hdr->evt = HCI_EV_STACK_INTERNAL; hdr->plen = sizeof(*ev) + dlen; - ev = (void *) skb_put(skb, sizeof(*ev) + dlen); + ev = (void *)skb_put(skb, sizeof(*ev) + dlen); ev->type = type; memcpy(ev->data, data, dlen); @@ -653,20 +653,20 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, return -EOPNOTSUPP; case HCIGETCONNINFO: - return hci_get_conn_info(hdev, (void __user *) arg); + return hci_get_conn_info(hdev, (void __user *)arg); case HCIGETAUTHINFO: - return hci_get_auth_info(hdev, (void __user *) arg); + return hci_get_auth_info(hdev, (void __user *)arg); case HCIBLOCKADDR: if (!capable(CAP_NET_ADMIN)) return -EPERM; - return hci_sock_blacklist_add(hdev, (void __user *) arg); + return hci_sock_blacklist_add(hdev, (void __user *)arg); case HCIUNBLOCKADDR: if (!capable(CAP_NET_ADMIN)) return -EPERM; - return hci_sock_blacklist_del(hdev, (void __user *) arg); + return hci_sock_blacklist_del(hdev, (void __user *)arg); } return -ENOIOCTLCMD; @@ -675,7 +675,7 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { - void __user *argp = (void __user *) arg; + void __user *argp = (void __user *)arg; struct sock *sk = sock->sk; int err; @@ -926,7 +926,7 @@ done: static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer) { - struct sockaddr_hci *haddr = (struct sockaddr_hci *) addr; + struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr; struct sock *sk = sock->sk; struct hci_dev *hdev; int err = 0; @@ -991,8 +991,8 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, } } -static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - int flags) +static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; @@ -1211,7 +1211,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, goto drop; } - hci_skb_pkt_type(skb) = *((unsigned char *) skb->data); + hci_skb_pkt_type(skb) = skb->data[0]; skb_pull(skb, 1); if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { -- cgit v1.1 From dd31506d4aece48943802c2bca3f1f7d2e7266b4 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 8 Nov 2015 07:47:12 +0100 Subject: Bluetooth: Add support for sending system notes to monitor channel The monitor channel can be used to send generic system notes as text strings for debugging purposes. This adds the system note monitor code and uses it for including kernel and subsystem version into traces. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/af_bluetooth.c | 8 +++----- net/bluetooth/hci_sock.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index a3bffd1..34c53d5 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -33,8 +33,6 @@ #include "selftest.h" -#define VERSION "2.21" - /* Bluetooth sockets */ #define BT_MAX_PROTO 8 static const struct net_proto_family *bt_proto[BT_MAX_PROTO]; @@ -715,7 +713,7 @@ static int __init bt_init(void) sock_skb_cb_check_size(sizeof(struct bt_skb_cb)); - BT_INFO("Core ver %s", VERSION); + BT_INFO("Core ver %s", BT_SUBSYS_VERSION); err = bt_selftest(); if (err < 0) @@ -789,7 +787,7 @@ subsys_initcall(bt_init); module_exit(bt_exit); MODULE_AUTHOR("Marcel Holtmann "); -MODULE_DESCRIPTION("Bluetooth Core ver " VERSION); -MODULE_VERSION(VERSION); +MODULE_DESCRIPTION("Bluetooth Core ver " BT_SUBSYS_VERSION); +MODULE_VERSION(BT_SUBSYS_VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_BLUETOOTH); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 18a41ea..710265c 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -26,6 +26,8 @@ #include #include +#include +#include #include #include @@ -383,6 +385,29 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) return skb; } +static void send_monitor_note(struct sock *sk, const char *text) +{ + size_t len = strlen(text); + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(len + 1, GFP_ATOMIC); + if (!skb) + return; + + strcpy(skb_put(skb, len + 1), text); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_SYSTEM_NOTE); + hdr->index = cpu_to_le16(HCI_DEV_NONE); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + if (sock_queue_rcv_skb(sk, skb)) + kfree_skb(skb); +} + static void send_monitor_replay(struct sock *sk) { struct hci_dev *hdev; @@ -872,6 +897,10 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, */ hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + send_monitor_note(sk, "Linux version " UTS_RELEASE + " (" UTS_MACHINE ")"); + send_monitor_note(sk, "Bluetooth subsystem version " + BT_SUBSYS_VERSION); send_monitor_replay(sk); atomic_inc(&monitor_promisc); -- cgit v1.1 From ac71494934c475e3f51e5e3e64a12f57618d82a4 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 8 Nov 2015 07:47:13 +0100 Subject: Bluetooth: Add support for controller specific logging To enable controller specific logging, the userspace daemon has to have the ability to log per controller. To facilitate this support, provide a dedicated logging channel. Messages in this channel will be included in the monitor queue and with that also forwarded to monitoring tools along with the actual hardware traces. All messages from the logging channel are timestamped and with that allow an easy correlation between userspace messages and hardware events. This will increase the ability to debug problems faster. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_sock.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 710265c..41f579b 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -906,6 +906,18 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, atomic_inc(&monitor_promisc); break; + case HCI_CHANNEL_LOGGING: + if (haddr.hci_dev != HCI_DEV_NONE) { + err = -EINVAL; + goto done; + } + + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + goto done; + } + break; + default: if (!hci_mgmt_chan_find(haddr.hci_channel)) { err = -EINVAL; @@ -1033,6 +1045,9 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, if (flags & MSG_OOB) return -EOPNOTSUPP; + if (hci_pi(sk)->channel == HCI_CHANNEL_LOGGING) + return -EOPNOTSUPP; + if (sk->sk_state == BT_CLOSED) return 0; @@ -1179,6 +1194,90 @@ done: return err; } +static int hci_logging_frame(struct sock *sk, struct msghdr *msg, int len) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + struct hci_dev *hdev; + u16 index; + int err; + + /* The logging frame consists at minimum of the standard header, + * the priority byte, the ident length byte and at least one string + * terminator NUL byte. Anything shorter are invalid packets. + */ + if (len < sizeof(*hdr) + 3) + return -EINVAL; + + skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) + return err; + + if (memcpy_from_msg(skb_put(skb, len), msg, len)) { + err = -EFAULT; + goto drop; + } + + hdr = (void *)skb->data; + + if (__le16_to_cpu(hdr->len) != len - sizeof(*hdr)) { + err = -EINVAL; + goto drop; + } + + if (__le16_to_cpu(hdr->opcode) == 0x0000) { + __u8 priority = skb->data[sizeof(*hdr)]; + __u8 ident_len = skb->data[sizeof(*hdr) + 1]; + + /* Only the priorities 0-7 are valid and with that any other + * value results in an invalid packet. + * + * The priority byte is followed by an ident length byte and + * the NUL terminated ident string. Check that the ident + * length is not overflowing the packet and also that the + * ident string itself is NUL terminated. In case the ident + * length is zero, the length value actually doubles as NUL + * terminator identifier. + * + * The message follows the ident string (if present) and + * must be NUL terminated. Otherwise it is not a valid packet. + */ + if (priority > 7 || skb->data[len - 1] != 0x00 || + ident_len > len - sizeof(*hdr) - 3 || + skb->data[sizeof(*hdr) + ident_len + 1] != 0x00) { + err = -EINVAL; + goto drop; + } + } else { + err = -EINVAL; + goto drop; + } + + index = __le16_to_cpu(hdr->index); + + if (index != MGMT_INDEX_NONE) { + hdev = hci_dev_get(index); + if (!hdev) { + err = -ENODEV; + goto drop; + } + } else { + hdev = NULL; + } + + hdr->opcode = cpu_to_le16(HCI_MON_USER_LOGGING); + + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); + err = len; + + if (hdev) + hci_dev_put(hdev); + +drop: + kfree_skb(skb); + return err; +} + static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { @@ -1208,6 +1307,9 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, case HCI_CHANNEL_MONITOR: err = -EOPNOTSUPP; goto done; + case HCI_CHANNEL_LOGGING: + err = hci_logging_frame(sk, msg, len); + goto done; default: mutex_lock(&mgmt_chan_list_lock); chan = __hci_mgmt_chan_find(hci_pi(sk)->channel); -- cgit v1.1 From 030e7f8141a262e32dc064d7cf12377d769d45c2 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 10 Nov 2015 09:44:53 +0200 Subject: Bluetooth: Remove unnecessary call to hci_update_background_scan The hci_conn_params_clear_all() function is only called from hci_unregister_dev() at which point it's completely futile to try to do any LE scanning updates. Simply remove this unnecessary function call. At the same time we can make the function static since it's only accessed from within the same c-file. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index bc97fc6..ea648e9 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3070,15 +3070,13 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev) } /* This function requires the caller holds hdev->lock */ -void hci_conn_params_clear_all(struct hci_dev *hdev) +static void hci_conn_params_clear_all(struct hci_dev *hdev) { struct hci_conn_params *params, *tmp; list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list) hci_conn_params_free(params); - hci_update_background_scan(hdev); - BT_DBG("All LE connection parameters were removed"); } -- cgit v1.1 From be91cd05704d5a547de086d0e61c249ee62d2e13 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 10 Nov 2015 09:44:54 +0200 Subject: Bluetooth: Move synchronous request handling into hci_request.c hci_request.c is a more natural place for the synchronous request handling. Furthermore, we will soon need access to some of the previously private-to-hci_core.c functions from hci_request.c. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 193 -------------------------------------------- net/bluetooth/hci_request.c | 184 +++++++++++++++++++++++++++++++++++++++++ net/bluetooth/hci_request.h | 11 +++ 3 files changed, 195 insertions(+), 193 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ea648e9..aa18ec7 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -56,15 +56,6 @@ DEFINE_MUTEX(hci_cb_list_lock); /* HCI ID Numbering */ static DEFINE_IDA(hci_index_ida); -/* ----- HCI requests ----- */ - -#define HCI_REQ_DONE 0 -#define HCI_REQ_PEND 1 -#define HCI_REQ_CANCELED 2 - -#define hci_req_lock(d) mutex_lock(&d->req_lock) -#define hci_req_unlock(d) mutex_unlock(&d->req_lock) - /* ---- HCI debugfs entries ---- */ static ssize_t dut_mode_read(struct file *file, char __user *user_buf, @@ -198,190 +189,6 @@ static void hci_debugfs_create_basic(struct hci_dev *hdev) &vendor_diag_fops); } -/* ---- HCI requests ---- */ - -static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, - struct sk_buff *skb) -{ - BT_DBG("%s result 0x%2.2x", hdev->name, result); - - if (hdev->req_status == HCI_REQ_PEND) { - hdev->req_result = result; - hdev->req_status = HCI_REQ_DONE; - if (skb) - hdev->req_skb = skb_get(skb); - wake_up_interruptible(&hdev->req_wait_q); - } -} - -static void hci_req_cancel(struct hci_dev *hdev, int err) -{ - BT_DBG("%s err 0x%2.2x", hdev->name, err); - - if (hdev->req_status == HCI_REQ_PEND) { - hdev->req_result = err; - hdev->req_status = HCI_REQ_CANCELED; - wake_up_interruptible(&hdev->req_wait_q); - } -} - -struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param, u8 event, u32 timeout) -{ - DECLARE_WAITQUEUE(wait, current); - struct hci_request req; - struct sk_buff *skb; - int err = 0; - - BT_DBG("%s", hdev->name); - - hci_req_init(&req, hdev); - - hci_req_add_ev(&req, opcode, plen, param, event); - - hdev->req_status = HCI_REQ_PEND; - - add_wait_queue(&hdev->req_wait_q, &wait); - set_current_state(TASK_INTERRUPTIBLE); - - err = hci_req_run_skb(&req, hci_req_sync_complete); - if (err < 0) { - remove_wait_queue(&hdev->req_wait_q, &wait); - set_current_state(TASK_RUNNING); - return ERR_PTR(err); - } - - schedule_timeout(timeout); - - remove_wait_queue(&hdev->req_wait_q, &wait); - - if (signal_pending(current)) - return ERR_PTR(-EINTR); - - switch (hdev->req_status) { - case HCI_REQ_DONE: - err = -bt_to_errno(hdev->req_result); - break; - - case HCI_REQ_CANCELED: - err = -hdev->req_result; - break; - - default: - err = -ETIMEDOUT; - break; - } - - hdev->req_status = hdev->req_result = 0; - skb = hdev->req_skb; - hdev->req_skb = NULL; - - BT_DBG("%s end: err %d", hdev->name, err); - - if (err < 0) { - kfree_skb(skb); - return ERR_PTR(err); - } - - if (!skb) - return ERR_PTR(-ENODATA); - - return skb; -} -EXPORT_SYMBOL(__hci_cmd_sync_ev); - -struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param, u32 timeout) -{ - return __hci_cmd_sync_ev(hdev, opcode, plen, param, 0, timeout); -} -EXPORT_SYMBOL(__hci_cmd_sync); - -/* Execute request and wait for completion. */ -static int __hci_req_sync(struct hci_dev *hdev, - void (*func)(struct hci_request *req, - unsigned long opt), - unsigned long opt, __u32 timeout) -{ - struct hci_request req; - DECLARE_WAITQUEUE(wait, current); - int err = 0; - - BT_DBG("%s start", hdev->name); - - hci_req_init(&req, hdev); - - hdev->req_status = HCI_REQ_PEND; - - func(&req, opt); - - add_wait_queue(&hdev->req_wait_q, &wait); - set_current_state(TASK_INTERRUPTIBLE); - - err = hci_req_run_skb(&req, hci_req_sync_complete); - if (err < 0) { - hdev->req_status = 0; - - remove_wait_queue(&hdev->req_wait_q, &wait); - set_current_state(TASK_RUNNING); - - /* ENODATA means the HCI request command queue is empty. - * This can happen when a request with conditionals doesn't - * trigger any commands to be sent. This is normal behavior - * and should not trigger an error return. - */ - if (err == -ENODATA) - return 0; - - return err; - } - - schedule_timeout(timeout); - - remove_wait_queue(&hdev->req_wait_q, &wait); - - if (signal_pending(current)) - return -EINTR; - - switch (hdev->req_status) { - case HCI_REQ_DONE: - err = -bt_to_errno(hdev->req_result); - break; - - case HCI_REQ_CANCELED: - err = -hdev->req_result; - break; - - default: - err = -ETIMEDOUT; - break; - } - - hdev->req_status = hdev->req_result = 0; - - BT_DBG("%s end: err %d", hdev->name, err); - - return err; -} - -static int hci_req_sync(struct hci_dev *hdev, - void (*req)(struct hci_request *req, - unsigned long opt), - unsigned long opt, __u32 timeout) -{ - int ret; - - if (!test_bit(HCI_UP, &hdev->flags)) - return -ENETDOWN; - - /* Serialize all requests */ - hci_req_lock(hdev); - ret = __hci_req_sync(hdev, req, opt, timeout); - hci_req_unlock(hdev); - - return ret; -} - static void hci_reset_req(struct hci_request *req, unsigned long opt) { BT_DBG("%s %ld", req->hdev->name, opt); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 5ba27c3..aa868f6 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -27,6 +27,10 @@ #include "smp.h" #include "hci_request.h" +#define HCI_REQ_DONE 0 +#define HCI_REQ_PEND 1 +#define HCI_REQ_CANCELED 2 + void hci_req_init(struct hci_request *req, struct hci_dev *hdev) { skb_queue_head_init(&req->cmd_q); @@ -82,6 +86,186 @@ int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete) return req_run(req, NULL, complete); } +static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, + struct sk_buff *skb) +{ + BT_DBG("%s result 0x%2.2x", hdev->name, result); + + if (hdev->req_status == HCI_REQ_PEND) { + hdev->req_result = result; + hdev->req_status = HCI_REQ_DONE; + if (skb) + hdev->req_skb = skb_get(skb); + wake_up_interruptible(&hdev->req_wait_q); + } +} + +void hci_req_cancel(struct hci_dev *hdev, int err) +{ + BT_DBG("%s err 0x%2.2x", hdev->name, err); + + if (hdev->req_status == HCI_REQ_PEND) { + hdev->req_result = err; + hdev->req_status = HCI_REQ_CANCELED; + wake_up_interruptible(&hdev->req_wait_q); + } +} + +struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u8 event, u32 timeout) +{ + DECLARE_WAITQUEUE(wait, current); + struct hci_request req; + struct sk_buff *skb; + int err = 0; + + BT_DBG("%s", hdev->name); + + hci_req_init(&req, hdev); + + hci_req_add_ev(&req, opcode, plen, param, event); + + hdev->req_status = HCI_REQ_PEND; + + add_wait_queue(&hdev->req_wait_q, &wait); + set_current_state(TASK_INTERRUPTIBLE); + + err = hci_req_run_skb(&req, hci_req_sync_complete); + if (err < 0) { + remove_wait_queue(&hdev->req_wait_q, &wait); + set_current_state(TASK_RUNNING); + return ERR_PTR(err); + } + + schedule_timeout(timeout); + + remove_wait_queue(&hdev->req_wait_q, &wait); + + if (signal_pending(current)) + return ERR_PTR(-EINTR); + + switch (hdev->req_status) { + case HCI_REQ_DONE: + err = -bt_to_errno(hdev->req_result); + break; + + case HCI_REQ_CANCELED: + err = -hdev->req_result; + break; + + default: + err = -ETIMEDOUT; + break; + } + + hdev->req_status = hdev->req_result = 0; + skb = hdev->req_skb; + hdev->req_skb = NULL; + + BT_DBG("%s end: err %d", hdev->name, err); + + if (err < 0) { + kfree_skb(skb); + return ERR_PTR(err); + } + + if (!skb) + return ERR_PTR(-ENODATA); + + return skb; +} +EXPORT_SYMBOL(__hci_cmd_sync_ev); + +struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout) +{ + return __hci_cmd_sync_ev(hdev, opcode, plen, param, 0, timeout); +} +EXPORT_SYMBOL(__hci_cmd_sync); + +/* Execute request and wait for completion. */ +int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, + unsigned long opt), + unsigned long opt, __u32 timeout) +{ + struct hci_request req; + DECLARE_WAITQUEUE(wait, current); + int err = 0; + + BT_DBG("%s start", hdev->name); + + hci_req_init(&req, hdev); + + hdev->req_status = HCI_REQ_PEND; + + func(&req, opt); + + add_wait_queue(&hdev->req_wait_q, &wait); + set_current_state(TASK_INTERRUPTIBLE); + + err = hci_req_run_skb(&req, hci_req_sync_complete); + if (err < 0) { + hdev->req_status = 0; + + remove_wait_queue(&hdev->req_wait_q, &wait); + set_current_state(TASK_RUNNING); + + /* ENODATA means the HCI request command queue is empty. + * This can happen when a request with conditionals doesn't + * trigger any commands to be sent. This is normal behavior + * and should not trigger an error return. + */ + if (err == -ENODATA) + return 0; + + return err; + } + + schedule_timeout(timeout); + + remove_wait_queue(&hdev->req_wait_q, &wait); + + if (signal_pending(current)) + return -EINTR; + + switch (hdev->req_status) { + case HCI_REQ_DONE: + err = -bt_to_errno(hdev->req_result); + break; + + case HCI_REQ_CANCELED: + err = -hdev->req_result; + break; + + default: + err = -ETIMEDOUT; + break; + } + + hdev->req_status = hdev->req_result = 0; + + BT_DBG("%s end: err %d", hdev->name, err); + + return err; +} + +int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, + unsigned long opt), + unsigned long opt, __u32 timeout) +{ + int ret; + + if (!test_bit(HCI_UP, &hdev->flags)) + return -ENETDOWN; + + /* Serialize all requests */ + hci_req_lock(hdev); + ret = __hci_req_sync(hdev, req, opt, timeout); + hci_req_unlock(hdev); + + return ret; +} + struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param) { diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 25c7f13..6e6bad4 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -20,6 +20,9 @@ SOFTWARE IS DISCLAIMED. */ +#define hci_req_lock(d) mutex_lock(&d->req_lock) +#define hci_req_unlock(d) mutex_unlock(&d->req_lock) + struct hci_request { struct hci_dev *hdev; struct sk_buff_head cmd_q; @@ -41,6 +44,14 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, hci_req_complete_t *req_complete, hci_req_complete_skb_t *req_complete_skb); +int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, + unsigned long opt), + unsigned long opt, __u32 timeout); +int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, + unsigned long opt), + unsigned long opt, __u32 timeout); +void hci_req_cancel(struct hci_dev *hdev, int err); + struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); -- cgit v1.1 From b504430c868c2979d2dbee9be051e425fdeb36ac Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 10 Nov 2015 09:44:55 +0200 Subject: Bluetooth: Add 'sync' specifier to synchronous request APIs To make it clear which HCI request APIs target specifically synchronous requests, add 'sync' to the API names. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 28 ++++++++++++++-------------- net/bluetooth/hci_request.c | 6 +++--- net/bluetooth/hci_request.h | 6 +++--- 3 files changed, 20 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index aa18ec7..ec1beba 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -92,14 +92,14 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf, if (enable == hci_dev_test_flag(hdev, HCI_DUT_MODE)) return -EALREADY; - hci_req_lock(hdev); + hci_req_sync_lock(hdev); if (enable) skb = __hci_cmd_sync(hdev, HCI_OP_ENABLE_DUT_MODE, 0, NULL, HCI_CMD_TIMEOUT); else skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_CMD_TIMEOUT); - hci_req_unlock(hdev); + hci_req_sync_unlock(hdev); if (IS_ERR(skb)) return PTR_ERR(skb); @@ -156,9 +156,9 @@ static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf, !test_bit(HCI_RUNNING, &hdev->flags)) goto done; - hci_req_lock(hdev); + hci_req_sync_lock(hdev); err = hdev->set_diag(hdev, enable); - hci_req_unlock(hdev); + hci_req_sync_unlock(hdev); if (err < 0) return err; @@ -1257,7 +1257,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) BT_DBG("%s %p", hdev->name, hdev); - hci_req_lock(hdev); + hci_req_sync_lock(hdev); if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) { ret = -ENODEV; @@ -1410,7 +1410,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) } done: - hci_req_unlock(hdev); + hci_req_sync_unlock(hdev); return ret; } @@ -1504,12 +1504,12 @@ int hci_dev_do_close(struct hci_dev *hdev) cancel_delayed_work(&hdev->power_off); - hci_req_cancel(hdev, ENODEV); - hci_req_lock(hdev); + hci_req_sync_cancel(hdev, ENODEV); + hci_req_sync_lock(hdev); if (!test_and_clear_bit(HCI_UP, &hdev->flags)) { cancel_delayed_work_sync(&hdev->cmd_timer); - hci_req_unlock(hdev); + hci_req_sync_unlock(hdev); return 0; } @@ -1607,7 +1607,7 @@ int hci_dev_do_close(struct hci_dev *hdev) memset(hdev->dev_class, 0, sizeof(hdev->dev_class)); bacpy(&hdev->random_addr, BDADDR_ANY); - hci_req_unlock(hdev); + hci_req_sync_unlock(hdev); hci_dev_put(hdev); return 0; @@ -1643,7 +1643,7 @@ static int hci_dev_do_reset(struct hci_dev *hdev) BT_DBG("%s %p", hdev->name, hdev); - hci_req_lock(hdev); + hci_req_sync_lock(hdev); /* Drop queues */ skb_queue_purge(&hdev->rx_q); @@ -1667,7 +1667,7 @@ static int hci_dev_do_reset(struct hci_dev *hdev) ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); - hci_req_unlock(hdev); + hci_req_sync_unlock(hdev); return ret; } @@ -3537,9 +3537,9 @@ struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, bt_dev_dbg(hdev, "opcode 0x%4.4x plen %d", opcode, plen); - hci_req_lock(hdev); + hci_req_sync_lock(hdev); skb = __hci_cmd_sync(hdev, opcode, plen, param, timeout); - hci_req_unlock(hdev); + hci_req_sync_unlock(hdev); return skb; } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index aa868f6..ae19bce 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -100,7 +100,7 @@ static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, } } -void hci_req_cancel(struct hci_dev *hdev, int err) +void hci_req_sync_cancel(struct hci_dev *hdev, int err) { BT_DBG("%s err 0x%2.2x", hdev->name, err); @@ -259,9 +259,9 @@ int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, return -ENETDOWN; /* Serialize all requests */ - hci_req_lock(hdev); + hci_req_sync_lock(hdev); ret = __hci_req_sync(hdev, req, opt, timeout); - hci_req_unlock(hdev); + hci_req_sync_unlock(hdev); return ret; } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 6e6bad4..5b3240c 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -20,8 +20,8 @@ SOFTWARE IS DISCLAIMED. */ -#define hci_req_lock(d) mutex_lock(&d->req_lock) -#define hci_req_unlock(d) mutex_unlock(&d->req_lock) +#define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock) +#define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock) struct hci_request { struct hci_dev *hdev; @@ -50,7 +50,7 @@ int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, unsigned long opt), unsigned long opt, __u32 timeout); -void hci_req_cancel(struct hci_dev *hdev, int err); +void hci_req_sync_cancel(struct hci_dev *hdev, int err); struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); -- cgit v1.1 From 5fc16cc4f3044551587dfee8e12422cbf59303e8 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:16 +0200 Subject: Bluetooth: Add stubs for synchronous HCI request functionality Prepare hci_request.c to have code for doing synchronous HCI requests, such as LE scanning or advertising changes. The necessary work callbacks will be set up in hci_request_setup() and cleaned up in hci_request_cancel_all(). The former is used when an HCI device get registered, and the latter each time it is powered off (or unregistered). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 4 ++++ net/bluetooth/hci_request.c | 8 ++++++++ net/bluetooth/hci_request.h | 3 +++ 3 files changed, 15 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ec1beba..965bc01 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1609,6 +1609,8 @@ int hci_dev_do_close(struct hci_dev *hdev) hci_req_sync_unlock(hdev); + hci_request_cancel_all(hdev); + hci_dev_put(hdev); return 0; } @@ -3161,6 +3163,8 @@ struct hci_dev *hci_alloc_dev(void) INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout); + hci_request_setup(hdev); + hci_init_sysfs(hdev); discovery_init(hdev); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index ae19bce..d482062 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -845,3 +845,11 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) return 0; } + +void hci_request_setup(struct hci_dev *hdev) +{ +} + +void hci_request_cancel_all(struct hci_dev *hdev) +{ +} diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 5b3240c..9759b71 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -70,3 +70,6 @@ void __hci_update_background_scan(struct hci_request *req); int hci_abort_conn(struct hci_conn *conn, u8 reason); void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason); + +void hci_request_setup(struct hci_dev *hdev); +void hci_request_cancel_all(struct hci_dev *hdev); -- cgit v1.1 From 2e93e53b8f86fb38a9a3c3bd08e539c40b3f8d89 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:17 +0200 Subject: Bluetooth: Run all background scan updates through req_workqueue Instead of firing off a simple async request queue all background scan updates through req_workqueue and use hci_req_sync() there to ensure that no two updates overlap with each other. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 39 +++++++++++++++++---------------------- net/bluetooth/hci_request.h | 6 +++++- net/bluetooth/mgmt.c | 2 +- 3 files changed, 23 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index d482062..0adbb59 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -731,28 +731,6 @@ void __hci_update_background_scan(struct hci_request *req) } } -static void update_background_scan_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - if (status) - BT_DBG("HCI request failed to update background scanning: " - "status 0x%2.2x", status); -} - -void hci_update_background_scan(struct hci_dev *hdev) -{ - int err; - struct hci_request req; - - hci_req_init(&req, hdev); - - __hci_update_background_scan(&req); - - err = hci_req_run(&req, update_background_scan_complete); - if (err && err != -ENODATA) - BT_ERR("Failed to run HCI request: err %d", err); -} - void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason) { @@ -846,10 +824,27 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) return 0; } +static void update_bg_scan(struct hci_request *req, unsigned long opt) +{ + hci_dev_lock(req->hdev); + __hci_update_background_scan(req); + hci_dev_unlock(req->hdev); +} + +static void bg_scan_update(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + bg_scan_update); + + hci_req_sync(hdev, update_bg_scan, 0, HCI_CMD_TIMEOUT); +} + void hci_request_setup(struct hci_dev *hdev) { + INIT_WORK(&hdev->bg_scan_update, bg_scan_update); } void hci_request_cancel_all(struct hci_dev *hdev) { + cancel_work_sync(&hdev->bg_scan_update); } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 9759b71..983e687 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -64,12 +64,16 @@ void __hci_update_page_scan(struct hci_request *req); int hci_update_random_address(struct hci_request *req, bool require_privacy, u8 *own_addr_type); -void hci_update_background_scan(struct hci_dev *hdev); void __hci_update_background_scan(struct hci_request *req); int hci_abort_conn(struct hci_conn *conn, u8 reason); void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason); +static inline void hci_update_background_scan(struct hci_dev *hdev) +{ + queue_work(hdev->req_workqueue, &hdev->bg_scan_update); +} + void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7f22119..29c9fec 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2510,8 +2510,8 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) hci_req_init(&req, hdev); update_adv_data(&req); update_scan_rsp_data(&req); - __hci_update_background_scan(&req); hci_req_run(&req, NULL); + hci_update_background_scan(hdev); } unlock: -- cgit v1.1 From 51d7a94d56f842a6bd752c11de2f80f2cbc4a507 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:18 +0200 Subject: Bluetooth: Don't wait for HCI in Add/Remove Device There's no point in waiting for HCI activity in Add/Remove Device since the effects of these calls are long-lasting and we can anyway not report up to the application all HCI failures. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 161 ++++++++++++++++----------------------------------- 1 file changed, 50 insertions(+), 111 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 29c9fec..2750494 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6076,10 +6076,9 @@ static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) } /* This function requires the caller holds hdev->lock */ -static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr, +static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type, u8 auto_connect) { - struct hci_dev *hdev = req->hdev; struct hci_conn_params *params; params = hci_conn_params_add(hdev, addr, addr_type); @@ -6099,26 +6098,17 @@ static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr, */ if (params->explicit_connect) list_add(¶ms->action, &hdev->pend_le_conns); - - __hci_update_background_scan(req); break; case HCI_AUTO_CONN_REPORT: if (params->explicit_connect) list_add(¶ms->action, &hdev->pend_le_conns); else list_add(¶ms->action, &hdev->pend_le_reports); - __hci_update_background_scan(req); break; case HCI_AUTO_CONN_DIRECT: case HCI_AUTO_CONN_ALWAYS: - if (!is_connected(hdev, addr, addr_type)) { + if (!is_connected(hdev, addr, addr_type)) list_add(¶ms->action, &hdev->pend_le_conns); - /* If we are in scan phase of connecting, we were - * already added to pend_le_conns and scanning. - */ - if (params->auto_connect != HCI_AUTO_CONN_EXPLICIT) - __hci_update_background_scan(req); - } break; } @@ -6142,25 +6132,6 @@ static void device_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_DEVICE_ADDED, hdev, &ev, sizeof(ev), sk); } -static void add_device_complete(struct hci_dev *hdev, u8 status, u16 opcode) -{ - struct mgmt_pending_cmd *cmd; - - BT_DBG("status 0x%02x", status); - - hci_dev_lock(hdev); - - cmd = pending_find(MGMT_OP_ADD_DEVICE, hdev); - if (!cmd) - goto unlock; - - cmd->cmd_complete(cmd, mgmt_status(status)); - mgmt_pending_remove(cmd); - -unlock: - hci_dev_unlock(hdev); -} - static int add_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -6198,9 +6169,10 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, if (cp->addr.type == BDADDR_BREDR) { /* Only incoming connections action is supported for now */ if (cp->action != 0x01) { - err = cmd->cmd_complete(cmd, - MGMT_STATUS_INVALID_PARAMS); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_ADD_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); goto unlock; } @@ -6229,33 +6201,31 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, * hci_conn_params_lookup. */ if (!hci_is_identity_address(&cp->addr.bdaddr, addr_type)) { - err = cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); goto unlock; } /* If the connection parameters don't exist for this device, * they will be created and configured with defaults. */ - if (hci_conn_params_set(&req, &cp->addr.bdaddr, addr_type, + if (hci_conn_params_set(hdev, &cp->addr.bdaddr, addr_type, auto_conn) < 0) { - err = cmd->cmd_complete(cmd, MGMT_STATUS_FAILED); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, + MGMT_STATUS_FAILED, &cp->addr, + sizeof(cp->addr)); goto unlock; } + hci_update_background_scan(hdev); + added: device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action); - err = hci_req_run(&req, add_device_complete); - if (err < 0) { - /* ENODATA means no HCI commands were needed (e.g. if - * the adapter is powered off). - */ - if (err == -ENODATA) - err = cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS); - mgmt_pending_remove(cmd); - } + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, + MGMT_STATUS_SUCCESS, &cp->addr, + sizeof(cp->addr)); unlock: hci_dev_unlock(hdev); @@ -6273,55 +6243,25 @@ static void device_removed(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_DEVICE_REMOVED, hdev, &ev, sizeof(ev), sk); } -static void remove_device_complete(struct hci_dev *hdev, u8 status, u16 opcode) -{ - struct mgmt_pending_cmd *cmd; - - BT_DBG("status 0x%02x", status); - - hci_dev_lock(hdev); - - cmd = pending_find(MGMT_OP_REMOVE_DEVICE, hdev); - if (!cmd) - goto unlock; - - cmd->cmd_complete(cmd, mgmt_status(status)); - mgmt_pending_remove(cmd); - -unlock: - hci_dev_unlock(hdev); -} - static int remove_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_remove_device *cp = data; - struct mgmt_pending_cmd *cmd; - struct hci_request req; int err; BT_DBG("%s", hdev->name); - hci_req_init(&req, hdev); - hci_dev_lock(hdev); - cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_DEVICE, hdev, data, len); - if (!cmd) { - err = -ENOMEM; - goto unlock; - } - - cmd->cmd_complete = addr_cmd_complete; - if (bacmp(&cp->addr.bdaddr, BDADDR_ANY)) { struct hci_conn_params *params; u8 addr_type; if (!bdaddr_type_is_valid(cp->addr.type)) { - err = cmd->cmd_complete(cmd, - MGMT_STATUS_INVALID_PARAMS); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); goto unlock; } @@ -6330,13 +6270,15 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, &cp->addr.bdaddr, cp->addr.type); if (err) { - err = cmd->cmd_complete(cmd, - MGMT_STATUS_INVALID_PARAMS); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, + sizeof(cp->addr)); goto unlock; } - __hci_update_page_scan(&req); + hci_update_page_scan(hdev); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); @@ -6351,33 +6293,36 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, * hci_conn_params_lookup. */ if (!hci_is_identity_address(&cp->addr.bdaddr, addr_type)) { - err = cmd->cmd_complete(cmd, - MGMT_STATUS_INVALID_PARAMS); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); goto unlock; } params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, addr_type); if (!params) { - err = cmd->cmd_complete(cmd, - MGMT_STATUS_INVALID_PARAMS); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); goto unlock; } if (params->auto_connect == HCI_AUTO_CONN_DISABLED || params->auto_connect == HCI_AUTO_CONN_EXPLICIT) { - err = cmd->cmd_complete(cmd, - MGMT_STATUS_INVALID_PARAMS); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); goto unlock; } list_del(¶ms->action); list_del(¶ms->list); kfree(params); - __hci_update_background_scan(&req); + hci_update_background_scan(hdev); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); } else { @@ -6385,9 +6330,10 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, struct bdaddr_list *b, *btmp; if (cp->addr.type) { - err = cmd->cmd_complete(cmd, - MGMT_STATUS_INVALID_PARAMS); - mgmt_pending_remove(cmd); + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_INVALID_PARAMS, + &cp->addr, sizeof(cp->addr)); goto unlock; } @@ -6397,7 +6343,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, kfree(b); } - __hci_update_page_scan(&req); + hci_update_page_scan(hdev); list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) { if (p->auto_connect == HCI_AUTO_CONN_DISABLED) @@ -6414,20 +6360,13 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, BT_DBG("All LE connection parameters were removed"); - __hci_update_background_scan(&req); + hci_update_background_scan(hdev); } complete: - err = hci_req_run(&req, remove_device_complete); - if (err < 0) { - /* ENODATA means no HCI commands were needed (e.g. if - * the adapter is powered off). - */ - if (err == -ENODATA) - err = cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS); - mgmt_pending_remove(cmd); - } - + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_DEVICE, + MGMT_STATUS_SUCCESS, &cp->addr, + sizeof(cp->addr)); unlock: hci_dev_unlock(hdev); return err; -- cgit v1.1 From 4ebeee2dff9815619be6ff9a845d33716f48468c Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:19 +0200 Subject: Bluetooth: Add HCI status return parameter to hci_req_sync() In some cases it may be important to get the exact HCI status rather than the converted HCI-to-errno value. Add an optional return parameter to the hci_req_sync() API to allow for this. Since there are no good HCI translation candidates for cancelation and timeout, use the "unknown" status code for those cases. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 26 +++++++++++++------------- net/bluetooth/hci_request.c | 12 +++++++++--- net/bluetooth/hci_request.h | 4 ++-- 3 files changed, 24 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 965bc01..029d7798 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -768,14 +768,14 @@ static int __hci_init(struct hci_dev *hdev) { int err; - err = __hci_req_sync(hdev, hci_init1_req, 0, HCI_INIT_TIMEOUT); + err = __hci_req_sync(hdev, hci_init1_req, 0, HCI_INIT_TIMEOUT, NULL); if (err < 0) return err; if (hci_dev_test_flag(hdev, HCI_SETUP)) hci_debugfs_create_basic(hdev); - err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT); + err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT, NULL); if (err < 0) return err; @@ -786,11 +786,11 @@ static int __hci_init(struct hci_dev *hdev) if (hdev->dev_type != HCI_BREDR) return 0; - err = __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT); + err = __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT, NULL); if (err < 0) return err; - err = __hci_req_sync(hdev, hci_init4_req, 0, HCI_INIT_TIMEOUT); + err = __hci_req_sync(hdev, hci_init4_req, 0, HCI_INIT_TIMEOUT, NULL); if (err < 0) return err; @@ -846,7 +846,7 @@ static int __hci_unconf_init(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return 0; - err = __hci_req_sync(hdev, hci_init0_req, 0, HCI_INIT_TIMEOUT); + err = __hci_req_sync(hdev, hci_init0_req, 0, HCI_INIT_TIMEOUT, NULL); if (err < 0) return err; @@ -1204,7 +1204,7 @@ int hci_inquiry(void __user *arg) if (do_inquiry) { err = hci_req_sync(hdev, hci_inq_req, (unsigned long) &ir, - timeo); + timeo, NULL); if (err < 0) goto done; @@ -1570,7 +1570,7 @@ int hci_dev_do_close(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks) && !auto_off && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { set_bit(HCI_INIT, &hdev->flags); - __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT); + __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT, NULL); clear_bit(HCI_INIT, &hdev->flags); } @@ -1667,7 +1667,7 @@ static int hci_dev_do_reset(struct hci_dev *hdev) atomic_set(&hdev->cmd_cnt, 1); hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0; - ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); + ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT, NULL); hci_req_sync_unlock(hdev); return ret; @@ -1802,7 +1802,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) switch (cmd) { case HCISETAUTH: err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + HCI_INIT_TIMEOUT, NULL); break; case HCISETENCRYPT: @@ -1814,18 +1814,18 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) if (!test_bit(HCI_AUTH, &hdev->flags)) { /* Auth must be enabled first */ err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + HCI_INIT_TIMEOUT, NULL); if (err) break; } err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + HCI_INIT_TIMEOUT, NULL); break; case HCISETSCAN: err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + HCI_INIT_TIMEOUT, NULL); /* Ensure that the connectable and discoverable states * get correctly modified as this was a non-mgmt change. @@ -1836,7 +1836,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) case HCISETLINKPOL: err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + HCI_INIT_TIMEOUT, NULL); break; case HCISETLINKMODE: diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 0adbb59..b1d4d5b 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -186,7 +186,7 @@ EXPORT_SYMBOL(__hci_cmd_sync); /* Execute request and wait for completion. */ int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, unsigned long opt), - unsigned long opt, __u32 timeout) + unsigned long opt, u32 timeout, u8 *hci_status) { struct hci_request req; DECLARE_WAITQUEUE(wait, current); @@ -231,14 +231,20 @@ int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, switch (hdev->req_status) { case HCI_REQ_DONE: err = -bt_to_errno(hdev->req_result); + if (hci_status) + *hci_status = hdev->req_result; break; case HCI_REQ_CANCELED: err = -hdev->req_result; + if (hci_status) + *hci_status = HCI_ERROR_UNSPECIFIED; break; default: err = -ETIMEDOUT; + if (hci_status) + *hci_status = HCI_ERROR_UNSPECIFIED; break; } @@ -251,7 +257,7 @@ int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, unsigned long opt), - unsigned long opt, __u32 timeout) + unsigned long opt, u32 timeout, u8 *hci_status) { int ret; @@ -260,7 +266,7 @@ int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, /* Serialize all requests */ hci_req_sync_lock(hdev); - ret = __hci_req_sync(hdev, req, opt, timeout); + ret = __hci_req_sync(hdev, req, opt, timeout, hci_status); hci_req_sync_unlock(hdev); return ret; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 983e687..8441d12 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -46,10 +46,10 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, unsigned long opt), - unsigned long opt, __u32 timeout); + unsigned long opt, u32 timeout, u8 *hci_status); int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, unsigned long opt), - unsigned long opt, __u32 timeout); + unsigned long opt, u32 timeout, u8 *hci_status); void hci_req_sync_cancel(struct hci_dev *hdev, int err); struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, -- cgit v1.1 From 84235d222a297a281dbe984ef4f28519cacc5fe3 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:20 +0200 Subject: Bluetooth: Use req_workqueue for explicit connect requests Since explicit connect requests are also a sub-category of passive scan updates, run them through the same workqueue as the other passive scan changes. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 39 ++++----------------------------------- net/bluetooth/hci_request.c | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index fd6120a..1ed1e15 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -933,26 +933,6 @@ done: return conn; } -static void hci_connect_le_scan_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - struct hci_conn *conn; - - if (!status) - return; - - BT_ERR("Failed to add device to auto conn whitelist: status 0x%2.2x", - status); - - hci_dev_lock(hdev); - - conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); - if (conn) - hci_le_conn_failed(conn, status); - - hci_dev_unlock(hdev); -} - static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) { struct hci_conn *conn; @@ -968,10 +948,9 @@ static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type) } /* This function requires the caller holds hdev->lock */ -static int hci_explicit_conn_params_set(struct hci_request *req, +static int hci_explicit_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type) { - struct hci_dev *hdev = req->hdev; struct hci_conn_params *params; if (is_connected(hdev, addr, addr_type)) @@ -999,7 +978,6 @@ static int hci_explicit_conn_params_set(struct hci_request *req, } params->explicit_connect = true; - __hci_update_background_scan(req); BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type, params->auto_connect); @@ -1013,8 +991,6 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, u16 conn_timeout, u8 role) { struct hci_conn *conn; - struct hci_request req; - int err; /* Let's make sure that le is enabled.*/ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { @@ -1046,25 +1022,18 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, if (!conn) return ERR_PTR(-ENOMEM); - hci_req_init(&req, hdev); - - if (hci_explicit_conn_params_set(&req, dst, dst_type) < 0) + if (hci_explicit_conn_params_set(hdev, dst, dst_type) < 0) return ERR_PTR(-EBUSY); conn->state = BT_CONNECT; set_bit(HCI_CONN_SCANNING, &conn->flags); - - err = hci_req_run(&req, hci_connect_le_scan_complete); - if (err && err != -ENODATA) { - hci_conn_del(conn); - return ERR_PTR(err); - } - conn->dst_type = dst_type; conn->sec_level = BT_SECURITY_LOW; conn->pending_sec_level = sec_level; conn->conn_timeout = conn_timeout; + hci_update_background_scan(hdev); + done: hci_conn_hold(conn); return conn; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index b1d4d5b..c0ea310 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -841,8 +841,21 @@ static void bg_scan_update(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, bg_scan_update); + struct hci_conn *conn; + u8 status; + int err; + + err = hci_req_sync(hdev, update_bg_scan, 0, HCI_CMD_TIMEOUT, &status); + if (!err) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (conn) + hci_le_conn_failed(conn, status); - hci_req_sync(hdev, update_bg_scan, 0, HCI_CMD_TIMEOUT); + hci_dev_unlock(hdev); } void hci_request_setup(struct hci_dev *hdev) -- cgit v1.1 From af02dd446999796a742e3940d1a25f2b35b6eeba Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:21 +0200 Subject: Bluetooth: Use req_workqueue for background scanning when powering on We can easily use the new req_workqueue based background scan update for the power on case. This also removes the last external user of __hci_update_background_scan(). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 2750494..bb870c3 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7465,9 +7465,8 @@ void mgmt_index_removed(struct hci_dev *hdev) } /* This function requires the caller holds hdev->lock */ -static void restart_le_actions(struct hci_request *req) +static void restart_le_actions(struct hci_dev *hdev) { - struct hci_dev *hdev = req->hdev; struct hci_conn_params *p; list_for_each_entry(p, &hdev->le_conn_params, list) { @@ -7488,8 +7487,6 @@ static void restart_le_actions(struct hci_request *req) break; } } - - __hci_update_background_scan(req); } static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode) @@ -7505,6 +7502,9 @@ static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode) * decide if the public address or static address is used. */ smp_register(hdev); + + restart_le_actions(hdev); + hci_update_background_scan(hdev); } hci_dev_lock(hdev); @@ -7583,8 +7583,6 @@ static int powered_update_hci(struct hci_dev *hdev) hdev->cur_adv_instance) schedule_adv_instance(&req, hdev->cur_adv_instance, true); - - restart_le_actions(&req); } link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); -- cgit v1.1 From 145a0913ef180af6be7af2c50056ae171c2a2b94 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:22 +0200 Subject: Bluetooth: Make __hci_update_background_scan private to hci_request.c There are no more external users so this API can be made private. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 2 +- net/bluetooth/hci_request.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index c0ea310..8aa06cc 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -670,7 +670,7 @@ void hci_update_page_scan(struct hci_dev *hdev) * * This function requires the caller holds hdev->lock. */ -void __hci_update_background_scan(struct hci_request *req) +static void __hci_update_background_scan(struct hci_request *req) { struct hci_dev *hdev = req->hdev; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 8441d12..1f11946 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -64,8 +64,6 @@ void __hci_update_page_scan(struct hci_request *req); int hci_update_random_address(struct hci_request *req, bool require_privacy, u8 *own_addr_type); -void __hci_update_background_scan(struct hci_request *req); - int hci_abort_conn(struct hci_conn *conn, u8 reason); void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason); -- cgit v1.1 From 7c1fbed23981faff2840ddc8909e7c78d80ade30 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:23 +0200 Subject: Bluetooth: Move LE scan disable/restart behind req_workqueue To avoid any risks of races, place also these LE scan modification work callbacks behind the same work queue as the other LE scan changes. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 168 ----------------------------------------- net/bluetooth/hci_request.c | 179 ++++++++++++++++++++++++++++++++++++++++++++ net/bluetooth/mgmt.c | 4 +- 3 files changed, 181 insertions(+), 170 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 029d7798..0655521 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1527,9 +1527,6 @@ int hci_dev_do_close(struct hci_dev *hdev) if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) cancel_delayed_work(&hdev->service_cache); - cancel_delayed_work_sync(&hdev->le_scan_disable); - cancel_delayed_work_sync(&hdev->le_scan_restart); - if (hci_dev_test_flag(hdev, HCI_MGMT)) cancel_delayed_work_sync(&hdev->rpa_expired); @@ -2889,169 +2886,6 @@ static void hci_conn_params_clear_all(struct hci_dev *hdev) BT_DBG("All LE connection parameters were removed"); } -static void inquiry_complete(struct hci_dev *hdev, u8 status, u16 opcode) -{ - if (status) { - BT_ERR("Failed to start inquiry: status %d", status); - - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); - return; - } -} - -static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - /* General inquiry access code (GIAC) */ - u8 lap[3] = { 0x33, 0x8b, 0x9e }; - struct hci_cp_inquiry cp; - int err; - - if (status) { - BT_ERR("Failed to disable LE scanning: status %d", status); - return; - } - - hdev->discovery.scan_start = 0; - - switch (hdev->discovery.type) { - case DISCOV_TYPE_LE: - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); - break; - - case DISCOV_TYPE_INTERLEAVED: - hci_dev_lock(hdev); - - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, - &hdev->quirks)) { - /* If we were running LE only scan, change discovery - * state. If we were running both LE and BR/EDR inquiry - * simultaneously, and BR/EDR inquiry is already - * finished, stop discovery, otherwise BR/EDR inquiry - * will stop discovery when finished. If we will resolve - * remote device name, do not change discovery state. - */ - if (!test_bit(HCI_INQUIRY, &hdev->flags) && - hdev->discovery.state != DISCOVERY_RESOLVING) - hci_discovery_set_state(hdev, - DISCOVERY_STOPPED); - } else { - struct hci_request req; - - hci_inquiry_cache_flush(hdev); - - hci_req_init(&req, hdev); - - memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN; - hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp); - - err = hci_req_run(&req, inquiry_complete); - if (err) { - BT_ERR("Inquiry request failed: err %d", err); - hci_discovery_set_state(hdev, - DISCOVERY_STOPPED); - } - } - - hci_dev_unlock(hdev); - break; - } -} - -static void le_scan_disable_work(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - le_scan_disable.work); - struct hci_request req; - int err; - - BT_DBG("%s", hdev->name); - - cancel_delayed_work_sync(&hdev->le_scan_restart); - - hci_req_init(&req, hdev); - - hci_req_add_le_scan_disable(&req); - - err = hci_req_run(&req, le_scan_disable_work_complete); - if (err) - BT_ERR("Disable LE scanning request failed: err %d", err); -} - -static void le_scan_restart_work_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - unsigned long timeout, duration, scan_start, now; - - BT_DBG("%s", hdev->name); - - if (status) { - BT_ERR("Failed to restart LE scan: status %d", status); - return; - } - - if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) || - !hdev->discovery.scan_start) - return; - - /* When the scan was started, hdev->le_scan_disable has been queued - * after duration from scan_start. During scan restart this job - * has been canceled, and we need to queue it again after proper - * timeout, to make sure that scan does not run indefinitely. - */ - duration = hdev->discovery.scan_duration; - scan_start = hdev->discovery.scan_start; - now = jiffies; - if (now - scan_start <= duration) { - int elapsed; - - if (now >= scan_start) - elapsed = now - scan_start; - else - elapsed = ULONG_MAX - scan_start + now; - - timeout = duration - elapsed; - } else { - timeout = 0; - } - queue_delayed_work(hdev->workqueue, - &hdev->le_scan_disable, timeout); -} - -static void le_scan_restart_work(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - le_scan_restart.work); - struct hci_request req; - struct hci_cp_le_set_scan_enable cp; - int err; - - BT_DBG("%s", hdev->name); - - /* If controller is not scanning we are done. */ - if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) - return; - - hci_req_init(&req, hdev); - - hci_req_add_le_scan_disable(&req); - - memset(&cp, 0, sizeof(cp)); - cp.enable = LE_SCAN_ENABLE; - cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); - - err = hci_req_run(&req, le_scan_restart_work_complete); - if (err) - BT_ERR("Restart LE scan request failed: err %d", err); -} - /* Copy the Identity Address of the controller. * * If the controller has a public BD_ADDR, then by default use that one. @@ -3151,8 +2985,6 @@ struct hci_dev *hci_alloc_dev(void) INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); - INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); - INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); INIT_DELAYED_WORK(&hdev->adv_instance_expire, hci_adv_timeout_expire); skb_queue_head_init(&hdev->rx_q); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 8aa06cc..4588fe2 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -858,12 +858,191 @@ static void bg_scan_update(struct work_struct *work) hci_dev_unlock(hdev); } +static void inquiry_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + if (status) { + BT_ERR("Failed to start inquiry: status %d", status); + + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hci_dev_unlock(hdev); + return; + } +} + +static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status) +{ + /* General inquiry access code (GIAC) */ + u8 lap[3] = { 0x33, 0x8b, 0x9e }; + struct hci_cp_inquiry cp; + int err; + + if (status) { + BT_ERR("Failed to disable LE scanning: status %d", status); + return; + } + + hdev->discovery.scan_start = 0; + + switch (hdev->discovery.type) { + case DISCOV_TYPE_LE: + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hci_dev_unlock(hdev); + break; + + case DISCOV_TYPE_INTERLEAVED: + hci_dev_lock(hdev); + + if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, + &hdev->quirks)) { + /* If we were running LE only scan, change discovery + * state. If we were running both LE and BR/EDR inquiry + * simultaneously, and BR/EDR inquiry is already + * finished, stop discovery, otherwise BR/EDR inquiry + * will stop discovery when finished. If we will resolve + * remote device name, do not change discovery state. + */ + if (!test_bit(HCI_INQUIRY, &hdev->flags) && + hdev->discovery.state != DISCOVERY_RESOLVING) + hci_discovery_set_state(hdev, + DISCOVERY_STOPPED); + } else { + struct hci_request req; + + hci_inquiry_cache_flush(hdev); + + hci_req_init(&req, hdev); + + memset(&cp, 0, sizeof(cp)); + memcpy(&cp.lap, lap, sizeof(cp.lap)); + cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN; + hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp); + + err = hci_req_run(&req, inquiry_complete); + if (err) { + BT_ERR("Inquiry request failed: err %d", err); + hci_discovery_set_state(hdev, + DISCOVERY_STOPPED); + } + } + + hci_dev_unlock(hdev); + break; + } +} + +static void le_scan_disable(struct hci_request *req, unsigned long opt) +{ + hci_req_add_le_scan_disable(req); +} + +static void le_scan_disable_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + le_scan_disable.work); + u8 status; + int err; + + BT_DBG("%s", hdev->name); + + cancel_delayed_work(&hdev->le_scan_restart); + + err = hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status); + if (err) + return; + + le_scan_disable_work_complete(hdev, status); +} + +static void le_scan_restart_work_complete(struct hci_dev *hdev, u8 status) +{ + unsigned long timeout, duration, scan_start, now; + + BT_DBG("%s", hdev->name); + + if (status) { + BT_ERR("Failed to restart LE scan: status %d", status); + return; + } + + hci_dev_lock(hdev); + + if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) || + !hdev->discovery.scan_start) + goto unlock; + + /* When the scan was started, hdev->le_scan_disable has been queued + * after duration from scan_start. During scan restart this job + * has been canceled, and we need to queue it again after proper + * timeout, to make sure that scan does not run indefinitely. + */ + duration = hdev->discovery.scan_duration; + scan_start = hdev->discovery.scan_start; + now = jiffies; + if (now - scan_start <= duration) { + int elapsed; + + if (now >= scan_start) + elapsed = now - scan_start; + else + elapsed = ULONG_MAX - scan_start + now; + + timeout = duration - elapsed; + } else { + timeout = 0; + } + + queue_delayed_work(hdev->req_workqueue, + &hdev->le_scan_disable, timeout); + +unlock: + hci_dev_unlock(hdev); +} + +static void le_scan_restart(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_scan_enable cp; + + /* If controller is not scanning we are done. */ + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) + return; + + hci_req_add_le_scan_disable(req); + + memset(&cp, 0, sizeof(cp)); + cp.enable = LE_SCAN_ENABLE; + cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); +} + +static void le_scan_restart_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + le_scan_restart.work); + u8 status; + int err; + + BT_DBG("%s", hdev->name); + + err = hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status); + if (err) + return; + + le_scan_restart_work_complete(hdev, status); +} + void hci_request_setup(struct hci_dev *hdev) { INIT_WORK(&hdev->bg_scan_update, bg_scan_update); + INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); + INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); } void hci_request_cancel_all(struct hci_dev *hdev) { cancel_work_sync(&hdev->bg_scan_update); + cancel_delayed_work_sync(&hdev->le_scan_disable); + cancel_delayed_work_sync(&hdev->le_scan_restart); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index bb870c3..a229cfd 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4367,7 +4367,7 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status, hdev->discovery.scan_duration = timeout; } - queue_delayed_work(hdev->workqueue, + queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_disable, timeout); } @@ -8389,7 +8389,7 @@ static void restart_le_scan(struct hci_dev *hdev) hdev->discovery.scan_duration)) return; - queue_delayed_work(hdev->workqueue, &hdev->le_scan_restart, + queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_restart, DISCOV_LE_RESTART_DELAY); } -- cgit v1.1 From 591752afbcc8179979296698cae698541d2e5431 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:24 +0200 Subject: Bluetooth: Add discovery type validity helper As preparation for moving the discovery HCI commands behind req_workqueue, add a helper and do the validity checks of the given discovery type before proceeding further. This way we don't need to do them again in hci_request.c. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a229cfd..e634b4d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4375,6 +4375,33 @@ unlock: hci_dev_unlock(hdev); } +static bool discovery_type_is_valid(struct hci_dev *hdev, uint8_t type, + uint8_t *mgmt_status) +{ + switch (type) { + case DISCOV_TYPE_LE: + *mgmt_status = mgmt_le_support(hdev); + if (*mgmt_status) + return false; + break; + case DISCOV_TYPE_INTERLEAVED: + *mgmt_status = mgmt_le_support(hdev); + if (*mgmt_status) + return false; + /* Intentional fall-through */ + case DISCOV_TYPE_BREDR: + *mgmt_status = mgmt_bredr_support(hdev); + if (*mgmt_status) + return false; + break; + default: + *mgmt_status = MGMT_STATUS_INVALID_PARAMS; + return false; + } + + return true; +} + static int start_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -4403,6 +4430,12 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } + if (!discovery_type_is_valid(hdev, cp->type, &status)) { + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, + status, &cp->type, sizeof(cp->type)); + goto failed; + } + cmd = mgmt_pending_add(sk, MGMT_OP_START_DISCOVERY, hdev, data, len); if (!cmd) { err = -ENOMEM; @@ -4502,6 +4535,13 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } + if (!discovery_type_is_valid(hdev, cp->type, &status)) { + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_START_SERVICE_DISCOVERY, + status, &cp->type, sizeof(cp->type)); + goto failed; + } + cmd = mgmt_pending_add(sk, MGMT_OP_START_SERVICE_DISCOVERY, hdev, data, len); if (!cmd) { -- cgit v1.1 From a1d01db1202ee6795c0a665b43896293ad4e2a77 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:25 +0200 Subject: Bluetooth: Add error return value to hci_req_sync callback In some circumstances it may be useful to abort the request through checks done in the request callback. To make the feature possible this patch changes the return value of the request callback from void to int and aborts the request if a non-zero value is returned. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 45 ++++++++++++++++++++++++++++++++------------- net/bluetooth/hci_request.c | 27 ++++++++++++++++++--------- net/bluetooth/hci_request.h | 8 ++++---- 3 files changed, 54 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0655521..fb618d6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -189,13 +189,14 @@ static void hci_debugfs_create_basic(struct hci_dev *hdev) &vendor_diag_fops); } -static void hci_reset_req(struct hci_request *req, unsigned long opt) +static int hci_reset_req(struct hci_request *req, unsigned long opt) { BT_DBG("%s %ld", req->hdev->name, opt); /* Reset device */ set_bit(HCI_RESET, &req->hdev->flags); hci_req_add(req, HCI_OP_RESET, 0, NULL); + return 0; } static void bredr_init(struct hci_request *req) @@ -235,7 +236,7 @@ static void amp_init1(struct hci_request *req) hci_req_add(req, HCI_OP_READ_LOCATION_DATA, 0, NULL); } -static void amp_init2(struct hci_request *req) +static int amp_init2(struct hci_request *req) { /* Read Local Supported Features. Not all AMP controllers * support this so it's placed conditionally in the second @@ -243,9 +244,11 @@ static void amp_init2(struct hci_request *req) */ if (req->hdev->commands[14] & 0x20) hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); + + return 0; } -static void hci_init1_req(struct hci_request *req, unsigned long opt) +static int hci_init1_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; @@ -268,6 +271,8 @@ static void hci_init1_req(struct hci_request *req, unsigned long opt) BT_ERR("Unknown device type %d", hdev->dev_type); break; } + + return 0; } static void bredr_setup(struct hci_request *req) @@ -416,7 +421,7 @@ static void hci_setup_event_mask(struct hci_request *req) hci_req_add(req, HCI_OP_SET_EVENT_MASK, sizeof(events), events); } -static void hci_init2_req(struct hci_request *req, unsigned long opt) +static int hci_init2_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; @@ -496,6 +501,8 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable), &enable); } + + return 0; } static void hci_setup_link_policy(struct hci_request *req) @@ -570,7 +577,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req) hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, sizeof(events), events); } -static void hci_init3_req(struct hci_request *req, unsigned long opt) +static int hci_init3_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; u8 p; @@ -709,9 +716,11 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES, sizeof(cp), &cp); } + + return 0; } -static void hci_init4_req(struct hci_request *req, unsigned long opt) +static int hci_init4_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; @@ -762,6 +771,8 @@ static void hci_init4_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT, sizeof(support), &support); } + + return 0; } static int __hci_init(struct hci_dev *hdev) @@ -821,7 +832,7 @@ static int __hci_init(struct hci_dev *hdev) return 0; } -static void hci_init0_req(struct hci_request *req, unsigned long opt) +static int hci_init0_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; @@ -837,6 +848,8 @@ static void hci_init0_req(struct hci_request *req, unsigned long opt) /* Read BD Address */ if (hdev->set_bdaddr) hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL); + + return 0; } static int __hci_unconf_init(struct hci_dev *hdev) @@ -856,7 +869,7 @@ static int __hci_unconf_init(struct hci_dev *hdev) return 0; } -static void hci_scan_req(struct hci_request *req, unsigned long opt) +static int hci_scan_req(struct hci_request *req, unsigned long opt) { __u8 scan = opt; @@ -864,9 +877,10 @@ static void hci_scan_req(struct hci_request *req, unsigned long opt) /* Inquiry and Page scans */ hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + return 0; } -static void hci_auth_req(struct hci_request *req, unsigned long opt) +static int hci_auth_req(struct hci_request *req, unsigned long opt) { __u8 auth = opt; @@ -874,9 +888,10 @@ static void hci_auth_req(struct hci_request *req, unsigned long opt) /* Authentication */ hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth); + return 0; } -static void hci_encrypt_req(struct hci_request *req, unsigned long opt) +static int hci_encrypt_req(struct hci_request *req, unsigned long opt) { __u8 encrypt = opt; @@ -884,9 +899,10 @@ static void hci_encrypt_req(struct hci_request *req, unsigned long opt) /* Encryption */ hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); + return 0; } -static void hci_linkpol_req(struct hci_request *req, unsigned long opt) +static int hci_linkpol_req(struct hci_request *req, unsigned long opt) { __le16 policy = cpu_to_le16(opt); @@ -894,6 +910,7 @@ static void hci_linkpol_req(struct hci_request *req, unsigned long opt) /* Default link policy */ hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); + return 0; } /* Get HCI device by index. @@ -1138,7 +1155,7 @@ static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf) return copied; } -static void hci_inq_req(struct hci_request *req, unsigned long opt) +static int hci_inq_req(struct hci_request *req, unsigned long opt) { struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt; struct hci_dev *hdev = req->hdev; @@ -1147,13 +1164,15 @@ static void hci_inq_req(struct hci_request *req, unsigned long opt) BT_DBG("%s", hdev->name); if (test_bit(HCI_INQUIRY, &hdev->flags)) - return; + return 0; /* Start Inquiry */ memcpy(&cp.lap, &ir->lap, 3); cp.length = ir->length; cp.num_rsp = ir->num_rsp; hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); + + return 0; } int hci_inquiry(void __user *arg) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 4588fe2..ecfa410 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -184,8 +184,8 @@ struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, EXPORT_SYMBOL(__hci_cmd_sync); /* Execute request and wait for completion. */ -int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, - unsigned long opt), +int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, + unsigned long opt), unsigned long opt, u32 timeout, u8 *hci_status) { struct hci_request req; @@ -198,7 +198,12 @@ int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, hdev->req_status = HCI_REQ_PEND; - func(&req, opt); + err = func(&req, opt); + if (err) { + if (hci_status) + *hci_status = HCI_ERROR_UNSPECIFIED; + return err; + } add_wait_queue(&hdev->req_wait_q, &wait); set_current_state(TASK_INTERRUPTIBLE); @@ -255,8 +260,8 @@ int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, return err; } -int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, - unsigned long opt), +int hci_req_sync(struct hci_dev *hdev, int (*req)(struct hci_request *req, + unsigned long opt), unsigned long opt, u32 timeout, u8 *hci_status) { int ret; @@ -830,11 +835,12 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) return 0; } -static void update_bg_scan(struct hci_request *req, unsigned long opt) +static int update_bg_scan(struct hci_request *req, unsigned long opt) { hci_dev_lock(req->hdev); __hci_update_background_scan(req); hci_dev_unlock(req->hdev); + return 0; } static void bg_scan_update(struct work_struct *work) @@ -932,9 +938,10 @@ static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status) } } -static void le_scan_disable(struct hci_request *req, unsigned long opt) +static int le_scan_disable(struct hci_request *req, unsigned long opt) { hci_req_add_le_scan_disable(req); + return 0; } static void le_scan_disable_work(struct work_struct *work) @@ -1000,14 +1007,14 @@ unlock: hci_dev_unlock(hdev); } -static void le_scan_restart(struct hci_request *req, unsigned long opt) +static int le_scan_restart(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; struct hci_cp_le_set_scan_enable cp; /* If controller is not scanning we are done. */ if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) - return; + return 0; hci_req_add_le_scan_disable(req); @@ -1015,6 +1022,8 @@ static void le_scan_restart(struct hci_request *req, unsigned long opt) cp.enable = LE_SCAN_ENABLE; cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); + + return 0; } static void le_scan_restart_work(struct work_struct *work) diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 1f11946..1927013 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -44,11 +44,11 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, hci_req_complete_t *req_complete, hci_req_complete_skb_t *req_complete_skb); -int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, - unsigned long opt), +int hci_req_sync(struct hci_dev *hdev, int (*req)(struct hci_request *req, + unsigned long opt), unsigned long opt, u32 timeout, u8 *hci_status); -int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, - unsigned long opt), +int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, + unsigned long opt), unsigned long opt, u32 timeout, u8 *hci_status); void hci_req_sync_cancel(struct hci_dev *hdev, int err); -- cgit v1.1 From e68f072b7396574df5324e1cf93e4b0c92460735 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:30:30 +0200 Subject: Bluetooth: Move Start Discovery to req_workqueue Since discovery also deals with LE scanning it makes sense to move it behind the same req_workqueue as other LE scanning changes. This also simplifies the logic since we do many of the actions in a synchronous manner. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 202 +++++++++++++++++++++++++++++++++++++ net/bluetooth/mgmt.c | 238 ++------------------------------------------ 2 files changed, 211 insertions(+), 229 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index ecfa410..da1e30b 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1042,8 +1042,209 @@ static void le_scan_restart_work(struct work_struct *work) le_scan_restart_work_complete(hdev, status); } +static int bredr_inquiry(struct hci_request *req, unsigned long opt) +{ + struct hci_cp_inquiry cp; + /* General inquiry access code (GIAC) */ + u8 lap[3] = { 0x33, 0x8b, 0x9e }; + + BT_DBG("%s", req->hdev->name); + + hci_dev_lock(req->hdev); + hci_inquiry_cache_flush(req->hdev); + hci_dev_unlock(req->hdev); + + memset(&cp, 0, sizeof(cp)); + memcpy(&cp.lap, lap, sizeof(cp.lap)); + cp.length = DISCOV_BREDR_INQUIRY_LEN; + + hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); + + return 0; +} + +static void cancel_adv_timeout(struct hci_dev *hdev) +{ + if (hdev->adv_instance_timeout) { + hdev->adv_instance_timeout = 0; + cancel_delayed_work(&hdev->adv_instance_expire); + } +} + +static void disable_advertising(struct hci_request *req) +{ + u8 enable = 0x00; + + hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); +} + +static int active_scan(struct hci_request *req, unsigned long opt) +{ + uint16_t interval = opt; + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_scan_param param_cp; + struct hci_cp_le_set_scan_enable enable_cp; + u8 own_addr_type; + int err; + + BT_DBG("%s", hdev->name); + + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) { + hci_dev_lock(hdev); + + /* Don't let discovery abort an outgoing connection attempt + * that's using directed advertising. + */ + if (hci_lookup_le_connect(hdev)) { + hci_dev_unlock(hdev); + return -EBUSY; + } + + cancel_adv_timeout(hdev); + hci_dev_unlock(hdev); + + disable_advertising(req); + } + + /* If controller is scanning, it means the background scanning is + * running. Thus, we should temporarily stop it in order to set the + * discovery scanning parameters. + */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) + hci_req_add_le_scan_disable(req); + + /* All active scans will be done with either a resolvable private + * address (when privacy feature has been enabled) or non-resolvable + * private address. + */ + err = hci_update_random_address(req, true, &own_addr_type); + if (err < 0) + own_addr_type = ADDR_LE_DEV_PUBLIC; + + memset(¶m_cp, 0, sizeof(param_cp)); + param_cp.type = LE_SCAN_ACTIVE; + param_cp.interval = cpu_to_le16(interval); + param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN); + param_cp.own_address_type = own_addr_type; + + hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), + ¶m_cp); + + memset(&enable_cp, 0, sizeof(enable_cp)); + enable_cp.enable = LE_SCAN_ENABLE; + enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), + &enable_cp); + + return 0; +} + +static int interleaved_discov(struct hci_request *req, unsigned long opt) +{ + int err; + + BT_DBG("%s", req->hdev->name); + + err = active_scan(req, opt); + if (err) + return err; + + return bredr_inquiry(req, opt); +} + +static void start_discovery(struct hci_dev *hdev, u8 *status) +{ + unsigned long timeout; + + BT_DBG("%s type %u", hdev->name, hdev->discovery.type); + + switch (hdev->discovery.type) { + case DISCOV_TYPE_BREDR: + if (!hci_dev_test_flag(hdev, HCI_INQUIRY)) + hci_req_sync(hdev, bredr_inquiry, 0, HCI_CMD_TIMEOUT, + status); + return; + case DISCOV_TYPE_INTERLEAVED: + /* When running simultaneous discovery, the LE scanning time + * should occupy the whole discovery time sine BR/EDR inquiry + * and LE scanning are scheduled by the controller. + * + * For interleaving discovery in comparison, BR/EDR inquiry + * and LE scanning are done sequentially with separate + * timeouts. + */ + if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, + &hdev->quirks)) { + timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); + /* During simultaneous discovery, we double LE scan + * interval. We must leave some time for the controller + * to do BR/EDR inquiry. + */ + hci_req_sync(hdev, interleaved_discov, + DISCOV_LE_SCAN_INT * 2, HCI_CMD_TIMEOUT, + status); + break; + } + + timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout); + hci_req_sync(hdev, active_scan, DISCOV_LE_SCAN_INT, + HCI_CMD_TIMEOUT, status); + break; + case DISCOV_TYPE_LE: + timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); + hci_req_sync(hdev, active_scan, DISCOV_LE_SCAN_INT, + HCI_CMD_TIMEOUT, status); + break; + default: + *status = HCI_ERROR_UNSPECIFIED; + return; + } + + if (*status) + return; + + BT_DBG("%s timeout %u ms", hdev->name, jiffies_to_msecs(timeout)); + + /* When service discovery is used and the controller has a + * strict duplicate filter, it is important to remember the + * start and duration of the scan. This is required for + * restarting scanning during the discovery phase. + */ + if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) && + hdev->discovery.result_filtering) { + hdev->discovery.scan_start = jiffies; + hdev->discovery.scan_duration = timeout; + } + + queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_disable, + timeout); +} + +static void discov_update(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + discov_update); + u8 status = 0; + + switch (hdev->discovery.state) { + case DISCOVERY_STARTING: + start_discovery(hdev, &status); + mgmt_start_discovery_complete(hdev, status); + if (status) + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + else + hci_discovery_set_state(hdev, DISCOVERY_FINDING); + break; + case DISCOVERY_STOPPED: + default: + return; + } +} + void hci_request_setup(struct hci_dev *hdev) { + INIT_WORK(&hdev->discov_update, discov_update); INIT_WORK(&hdev->bg_scan_update, bg_scan_update); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); @@ -1051,6 +1252,7 @@ void hci_request_setup(struct hci_dev *hdev) void hci_request_cancel_all(struct hci_dev *hdev) { + cancel_work_sync(&hdev->discov_update); cancel_work_sync(&hdev->bg_scan_update); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e634b4d..63b0994 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4164,145 +4164,9 @@ done: return err; } -static bool trigger_bredr_inquiry(struct hci_request *req, u8 *status) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_inquiry cp; - /* General inquiry access code (GIAC) */ - u8 lap[3] = { 0x33, 0x8b, 0x9e }; - - *status = mgmt_bredr_support(hdev); - if (*status) - return false; - - if (hci_dev_test_flag(hdev, HCI_INQUIRY)) { - *status = MGMT_STATUS_BUSY; - return false; - } - - hci_inquiry_cache_flush(hdev); - - memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = DISCOV_BREDR_INQUIRY_LEN; - - hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); - - return true; -} - -static bool trigger_le_scan(struct hci_request *req, u16 interval, u8 *status) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_scan_param param_cp; - struct hci_cp_le_set_scan_enable enable_cp; - u8 own_addr_type; - int err; - - *status = mgmt_le_support(hdev); - if (*status) - return false; - - if (hci_dev_test_flag(hdev, HCI_LE_ADV)) { - /* Don't let discovery abort an outgoing connection attempt - * that's using directed advertising. - */ - if (hci_lookup_le_connect(hdev)) { - *status = MGMT_STATUS_REJECTED; - return false; - } - - cancel_adv_timeout(hdev); - disable_advertising(req); - } - - /* If controller is scanning, it means the background scanning is - * running. Thus, we should temporarily stop it in order to set the - * discovery scanning parameters. - */ - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) - hci_req_add_le_scan_disable(req); - - /* All active scans will be done with either a resolvable private - * address (when privacy feature has been enabled) or non-resolvable - * private address. - */ - err = hci_update_random_address(req, true, &own_addr_type); - if (err < 0) { - *status = MGMT_STATUS_FAILED; - return false; - } - - memset(¶m_cp, 0, sizeof(param_cp)); - param_cp.type = LE_SCAN_ACTIVE; - param_cp.interval = cpu_to_le16(interval); - param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN); - param_cp.own_address_type = own_addr_type; - - hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), - ¶m_cp); - - memset(&enable_cp, 0, sizeof(enable_cp)); - enable_cp.enable = LE_SCAN_ENABLE; - enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), - &enable_cp); - - return true; -} - -static bool trigger_discovery(struct hci_request *req, u8 *status) -{ - struct hci_dev *hdev = req->hdev; - - switch (hdev->discovery.type) { - case DISCOV_TYPE_BREDR: - if (!trigger_bredr_inquiry(req, status)) - return false; - break; - - case DISCOV_TYPE_INTERLEAVED: - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, - &hdev->quirks)) { - /* During simultaneous discovery, we double LE scan - * interval. We must leave some time for the controller - * to do BR/EDR inquiry. - */ - if (!trigger_le_scan(req, DISCOV_LE_SCAN_INT * 2, - status)) - return false; - - if (!trigger_bredr_inquiry(req, status)) - return false; - - return true; - } - - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { - *status = MGMT_STATUS_NOT_SUPPORTED; - return false; - } - /* fall through */ - - case DISCOV_TYPE_LE: - if (!trigger_le_scan(req, DISCOV_LE_SCAN_INT, status)) - return false; - break; - - default: - *status = MGMT_STATUS_INVALID_PARAMS; - return false; - } - - return true; -} - -static void start_discovery_complete(struct hci_dev *hdev, u8 status, - u16 opcode) +void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; - unsigned long timeout; BT_DBG("status %d", status); @@ -4317,61 +4181,6 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status, mgmt_pending_remove(cmd); } - if (status) { - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - goto unlock; - } - - hci_discovery_set_state(hdev, DISCOVERY_FINDING); - - /* If the scan involves LE scan, pick proper timeout to schedule - * hdev->le_scan_disable that will stop it. - */ - switch (hdev->discovery.type) { - case DISCOV_TYPE_LE: - timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); - break; - case DISCOV_TYPE_INTERLEAVED: - /* When running simultaneous discovery, the LE scanning time - * should occupy the whole discovery time sine BR/EDR inquiry - * and LE scanning are scheduled by the controller. - * - * For interleaving discovery in comparison, BR/EDR inquiry - * and LE scanning are done sequentially with separate - * timeouts. - */ - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) - timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); - else - timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout); - break; - case DISCOV_TYPE_BREDR: - timeout = 0; - break; - default: - BT_ERR("Invalid discovery type %d", hdev->discovery.type); - timeout = 0; - break; - } - - if (timeout) { - /* When service discovery is used and the controller has - * a strict duplicate filter, it is important to remember - * the start and duration of the scan. This is required - * for restarting scanning during the discovery phase. - */ - if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, - &hdev->quirks) && - hdev->discovery.result_filtering) { - hdev->discovery.scan_start = jiffies; - hdev->discovery.scan_duration = timeout; - } - - queue_delayed_work(hdev->req_workqueue, - &hdev->le_scan_disable, timeout); - } - -unlock: hci_dev_unlock(hdev); } @@ -4407,7 +4216,6 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, { struct mgmt_cp_start_discovery *cp = data; struct mgmt_pending_cmd *cmd; - struct hci_request req; u8 status; int err; @@ -4436,14 +4244,6 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } - cmd = mgmt_pending_add(sk, MGMT_OP_START_DISCOVERY, hdev, data, len); - if (!cmd) { - err = -ENOMEM; - goto failed; - } - - cmd->cmd_complete = generic_cmd_complete; - /* Clear the discovery filter first to free any previously * allocated memory for the UUID list. */ @@ -4452,22 +4252,17 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, hdev->discovery.type = cp->type; hdev->discovery.report_invalid_rssi = false; - hci_req_init(&req, hdev); - - if (!trigger_discovery(&req, &status)) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, - status, &cp->type, sizeof(cp->type)); - mgmt_pending_remove(cmd); + cmd = mgmt_pending_add(sk, MGMT_OP_START_DISCOVERY, hdev, data, len); + if (!cmd) { + err = -ENOMEM; goto failed; } - err = hci_req_run(&req, start_discovery_complete); - if (err < 0) { - mgmt_pending_remove(cmd); - goto failed; - } + cmd->cmd_complete = generic_cmd_complete; hci_discovery_set_state(hdev, DISCOVERY_STARTING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + err = 0; failed: hci_dev_unlock(hdev); @@ -4486,7 +4281,6 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, { struct mgmt_cp_start_service_discovery *cp = data; struct mgmt_pending_cmd *cmd; - struct hci_request req; const u16 max_uuid_count = ((U16_MAX - sizeof(*cp)) / 16); u16 uuid_count, expected_len; u8 status; @@ -4574,23 +4368,9 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, } } - hci_req_init(&req, hdev); - - if (!trigger_discovery(&req, &status)) { - err = mgmt_cmd_complete(sk, hdev->id, - MGMT_OP_START_SERVICE_DISCOVERY, - status, &cp->type, sizeof(cp->type)); - mgmt_pending_remove(cmd); - goto failed; - } - - err = hci_req_run(&req, start_discovery_complete); - if (err < 0) { - mgmt_pending_remove(cmd); - goto failed; - } - hci_discovery_set_state(hdev, DISCOVERY_STARTING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + err = 0; failed: hci_dev_unlock(hdev); -- cgit v1.1 From 2154d3f4fb83c812a161c4910948dd876997e111 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:30:45 +0200 Subject: Bluetooth: Move Stop Discovery to req_workqueue Since discovery also deals with LE scanning it makes sense to move it behind the same req_workqueue as other LE scanning changes. This also simplifies the logic since we do many of the actions in a synchronous manner. Part of this refactoring is moving hci_req_stop_discovery() to hci_request.c. At the same time the function receives support for properly handling the STOPPING state since that's the state we'll be in when stopping through the req_workqueue. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 62 ++++++++++++++++++++++++++++++++++++++ net/bluetooth/hci_request.h | 3 ++ net/bluetooth/mgmt.c | 72 ++++----------------------------------------- 3 files changed, 70 insertions(+), 67 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index da1e30b..3219ee6 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1221,6 +1221,62 @@ static void start_discovery(struct hci_dev *hdev, u8 *status) timeout); } +bool hci_req_stop_discovery(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct discovery_state *d = &hdev->discovery; + struct hci_cp_remote_name_req_cancel cp; + struct inquiry_entry *e; + bool ret = false; + + BT_DBG("%s state %u", hdev->name, hdev->discovery.state); + + if (d->state == DISCOVERY_FINDING || d->state == DISCOVERY_STOPPING) { + if (test_bit(HCI_INQUIRY, &hdev->flags)) + hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL); + + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { + cancel_delayed_work(&hdev->le_scan_disable); + hci_req_add_le_scan_disable(req); + } + + ret = true; + } else { + /* Passive scanning */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { + hci_req_add_le_scan_disable(req); + ret = true; + } + } + + /* No further actions needed for LE-only discovery */ + if (d->type == DISCOV_TYPE_LE) + return ret; + + if (d->state == DISCOVERY_RESOLVING || d->state == DISCOVERY_STOPPING) { + e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, + NAME_PENDING); + if (!e) + return ret; + + bacpy(&cp.bdaddr, &e->data.bdaddr); + hci_req_add(req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp), + &cp); + ret = true; + } + + return ret; +} + +static int stop_discovery(struct hci_request *req, unsigned long opt) +{ + hci_dev_lock(req->hdev); + hci_req_stop_discovery(req); + hci_dev_unlock(req->hdev); + + return 0; +} + static void discov_update(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -1236,6 +1292,12 @@ static void discov_update(struct work_struct *work) else hci_discovery_set_state(hdev, DISCOVERY_FINDING); break; + case DISCOVERY_STOPPING: + hci_req_sync(hdev, stop_discovery, 0, HCI_CMD_TIMEOUT, &status); + mgmt_stop_discovery_complete(hdev, status); + if (!status) + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + break; case DISCOVERY_STOPPED: default: return; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 1927013..6b9e59f 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -58,6 +58,9 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, void hci_req_add_le_scan_disable(struct hci_request *req); void hci_req_add_le_passive_scan(struct hci_request *req); +/* Returns true if HCI commands were queued */ +bool hci_req_stop_discovery(struct hci_request *req); + void hci_update_page_scan(struct hci_dev *hdev); void __hci_update_page_scan(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 63b0994..8cdacef 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1416,49 +1416,6 @@ static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode) } } -static bool hci_stop_discovery(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_remote_name_req_cancel cp; - struct inquiry_entry *e; - - switch (hdev->discovery.state) { - case DISCOVERY_FINDING: - if (test_bit(HCI_INQUIRY, &hdev->flags)) - hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL); - - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { - cancel_delayed_work(&hdev->le_scan_disable); - hci_req_add_le_scan_disable(req); - } - - return true; - - case DISCOVERY_RESOLVING: - e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, - NAME_PENDING); - if (!e) - break; - - bacpy(&cp.bdaddr, &e->data.bdaddr); - hci_req_add(req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp), - &cp); - - return true; - - default: - /* Passive scanning */ - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { - hci_req_add_le_scan_disable(req); - return true; - } - - break; - } - - return false; -} - static void advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance) { @@ -1636,7 +1593,7 @@ static int clean_up_hci_state(struct hci_dev *hdev) if (hci_dev_test_flag(hdev, HCI_LE_ADV)) disable_advertising(&req); - discov_stopped = hci_stop_discovery(&req); + discov_stopped = hci_req_stop_discovery(&req); list_for_each_entry(conn, &hdev->conn_hash.list, list) { /* 0x15 == Terminated due to Power Off */ @@ -4377,7 +4334,7 @@ failed: return err; } -static void stop_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode) +void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; @@ -4391,9 +4348,6 @@ static void stop_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode) mgmt_pending_remove(cmd); } - if (!status) - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); } @@ -4402,7 +4356,6 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_stop_discovery *mgmt_cp = data; struct mgmt_pending_cmd *cmd; - struct hci_request req; int err; BT_DBG("%s", hdev->name); @@ -4431,24 +4384,9 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, cmd->cmd_complete = generic_cmd_complete; - hci_req_init(&req, hdev); - - hci_stop_discovery(&req); - - err = hci_req_run(&req, stop_discovery_complete); - if (!err) { - hci_discovery_set_state(hdev, DISCOVERY_STOPPING); - goto unlock; - } - - mgmt_pending_remove(cmd); - - /* If no HCI commands were sent we're done */ - if (err == -ENODATA) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, 0, - &mgmt_cp->type, sizeof(mgmt_cp->type)); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - } + hci_discovery_set_state(hdev, DISCOVERY_STOPPING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + err = 0; unlock: hci_dev_unlock(hdev); -- cgit v1.1 From 2f27498107c298b9cdd93c77d9e3ad409949b36b Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 10:36:15 +0200 Subject: Bluetooth: Fix BR/EDR Page Scan update with Add Device The recent changes to remove dependency on HCI in Add Device missed out relevant changes for BR/EDR. This patch removes the left-overs and ensures the right HCI command gets queued for BR/EDR. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 8cdacef..e4ad045 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5894,8 +5894,6 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_device *cp = data; - struct mgmt_pending_cmd *cmd; - struct hci_request req; u8 auto_conn, addr_type; int err; @@ -5912,18 +5910,8 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, MGMT_STATUS_INVALID_PARAMS, &cp->addr, sizeof(cp->addr)); - hci_req_init(&req, hdev); - hci_dev_lock(hdev); - cmd = mgmt_pending_add(sk, MGMT_OP_ADD_DEVICE, hdev, data, len); - if (!cmd) { - err = -ENOMEM; - goto unlock; - } - - cmd->cmd_complete = addr_cmd_complete; - if (cp->addr.type == BDADDR_BREDR) { /* Only incoming connections action is supported for now */ if (cp->action != 0x01) { @@ -5939,7 +5927,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, if (err) goto unlock; - __hci_update_page_scan(&req); + hci_update_page_scan(hdev); goto added; } -- cgit v1.1 From 7df26b56297456a133b8cc2efb069065d6b9a555 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 12:24:21 +0200 Subject: Bluetooth: Pass inquiry length to bredr_inquiry() Passing the needed inquiry length to bredr_inquiry() makes it possible to also use this helper for interleaved discovery where the controller doesn't support simultaneous Inquiry & LE scan. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 3219ee6..98827e7 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1044,6 +1044,7 @@ static void le_scan_restart_work(struct work_struct *work) static int bredr_inquiry(struct hci_request *req, unsigned long opt) { + u8 length = opt; struct hci_cp_inquiry cp; /* General inquiry access code (GIAC) */ u8 lap[3] = { 0x33, 0x8b, 0x9e }; @@ -1056,7 +1057,7 @@ static int bredr_inquiry(struct hci_request *req, unsigned long opt) memset(&cp, 0, sizeof(cp)); memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = DISCOV_BREDR_INQUIRY_LEN; + cp.length = length; hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); @@ -1150,7 +1151,7 @@ static int interleaved_discov(struct hci_request *req, unsigned long opt) if (err) return err; - return bredr_inquiry(req, opt); + return bredr_inquiry(req, DISCOV_BREDR_INQUIRY_LEN); } static void start_discovery(struct hci_dev *hdev, u8 *status) @@ -1162,7 +1163,8 @@ static void start_discovery(struct hci_dev *hdev, u8 *status) switch (hdev->discovery.type) { case DISCOV_TYPE_BREDR: if (!hci_dev_test_flag(hdev, HCI_INQUIRY)) - hci_req_sync(hdev, bredr_inquiry, 0, HCI_CMD_TIMEOUT, + hci_req_sync(hdev, bredr_inquiry, + DISCOV_BREDR_INQUIRY_LEN, HCI_CMD_TIMEOUT, status); return; case DISCOV_TYPE_INTERLEAVED: -- cgit v1.1 From f4a2cb4d8f792350ec38b35b94026fc2c4be8d0f Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 12:24:22 +0200 Subject: Bluetooth: Simplify le_scan_disable_work() Merge le_scan_disable_work_complete into the main le_scan_disable_work function and take advantage of the updated bredr_inquiry() to run the Inquiry through hci_req_sync(). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 148 +++++++++++++++++--------------------------- 1 file changed, 57 insertions(+), 91 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 98827e7..04c3357 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -864,83 +864,31 @@ static void bg_scan_update(struct work_struct *work) hci_dev_unlock(hdev); } -static void inquiry_complete(struct hci_dev *hdev, u8 status, u16 opcode) +static int le_scan_disable(struct hci_request *req, unsigned long opt) { - if (status) { - BT_ERR("Failed to start inquiry: status %d", status); - - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); - return; - } + hci_req_add_le_scan_disable(req); + return 0; } -static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status) +static int bredr_inquiry(struct hci_request *req, unsigned long opt) { + u8 length = opt; /* General inquiry access code (GIAC) */ u8 lap[3] = { 0x33, 0x8b, 0x9e }; struct hci_cp_inquiry cp; - int err; - - if (status) { - BT_ERR("Failed to disable LE scanning: status %d", status); - return; - } - - hdev->discovery.scan_start = 0; - - switch (hdev->discovery.type) { - case DISCOV_TYPE_LE: - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); - break; - case DISCOV_TYPE_INTERLEAVED: - hci_dev_lock(hdev); - - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, - &hdev->quirks)) { - /* If we were running LE only scan, change discovery - * state. If we were running both LE and BR/EDR inquiry - * simultaneously, and BR/EDR inquiry is already - * finished, stop discovery, otherwise BR/EDR inquiry - * will stop discovery when finished. If we will resolve - * remote device name, do not change discovery state. - */ - if (!test_bit(HCI_INQUIRY, &hdev->flags) && - hdev->discovery.state != DISCOVERY_RESOLVING) - hci_discovery_set_state(hdev, - DISCOVERY_STOPPED); - } else { - struct hci_request req; - - hci_inquiry_cache_flush(hdev); - - hci_req_init(&req, hdev); + BT_DBG("%s", req->hdev->name); - memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN; - hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp); + hci_dev_lock(req->hdev); + hci_inquiry_cache_flush(req->hdev); + hci_dev_unlock(req->hdev); - err = hci_req_run(&req, inquiry_complete); - if (err) { - BT_ERR("Inquiry request failed: err %d", err); - hci_discovery_set_state(hdev, - DISCOVERY_STOPPED); - } - } + memset(&cp, 0, sizeof(cp)); + memcpy(&cp.lap, lap, sizeof(cp.lap)); + cp.length = length; - hci_dev_unlock(hdev); - break; - } -} + hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); -static int le_scan_disable(struct hci_request *req, unsigned long opt) -{ - hci_req_add_le_scan_disable(req); return 0; } @@ -949,17 +897,57 @@ static void le_scan_disable_work(struct work_struct *work) struct hci_dev *hdev = container_of(work, struct hci_dev, le_scan_disable.work); u8 status; - int err; BT_DBG("%s", hdev->name); + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) + return; + cancel_delayed_work(&hdev->le_scan_restart); - err = hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status); - if (err) + hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status); + if (status) { + BT_ERR("Failed to disable LE scan: status 0x%02x", status); + return; + } + + hdev->discovery.scan_start = 0; + + /* If we were running LE only scan, change discovery state. If + * we were running both LE and BR/EDR inquiry simultaneously, + * and BR/EDR inquiry is already finished, stop discovery, + * otherwise BR/EDR inquiry will stop discovery when finished. + * If we will resolve remote device name, do not change + * discovery state. + */ + + if (hdev->discovery.type == DISCOV_TYPE_LE) + goto discov_stopped; + + if (hdev->discovery.type != DISCOV_TYPE_INTERLEAVED) + return; + + if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) { + if (!test_bit(HCI_INQUIRY, &hdev->flags) && + hdev->discovery.state != DISCOVERY_RESOLVING) + goto discov_stopped; + return; + } + + hci_req_sync(hdev, bredr_inquiry, DISCOV_INTERLEAVED_INQUIRY_LEN, + HCI_CMD_TIMEOUT, &status); + if (status) { + BT_ERR("Inquiry failed: status 0x%02x", status); + goto discov_stopped; + } + + return; - le_scan_disable_work_complete(hdev, status); +discov_stopped: + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hci_dev_unlock(hdev); } static void le_scan_restart_work_complete(struct hci_dev *hdev, u8 status) @@ -1042,28 +1030,6 @@ static void le_scan_restart_work(struct work_struct *work) le_scan_restart_work_complete(hdev, status); } -static int bredr_inquiry(struct hci_request *req, unsigned long opt) -{ - u8 length = opt; - struct hci_cp_inquiry cp; - /* General inquiry access code (GIAC) */ - u8 lap[3] = { 0x33, 0x8b, 0x9e }; - - BT_DBG("%s", req->hdev->name); - - hci_dev_lock(req->hdev); - hci_inquiry_cache_flush(req->hdev); - hci_dev_unlock(req->hdev); - - memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = length; - - hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); - - return 0; -} - static void cancel_adv_timeout(struct hci_dev *hdev) { if (hdev->adv_instance_timeout) { -- cgit v1.1 From 3dfe5905a7505bc0cbf5f63405631d8e188d9235 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 12:24:23 +0200 Subject: Bluetooth: Remove unnecessary le_scan_restart_work_complete() function The only user of this, le_scan_restart_work(), is so short and simple that it makes sense to just merge the code there. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 60 ++++++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 04c3357..e8345d8 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -950,12 +950,35 @@ discov_stopped: hci_dev_unlock(hdev); } -static void le_scan_restart_work_complete(struct hci_dev *hdev, u8 status) +static int le_scan_restart(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_scan_enable cp; + + /* If controller is not scanning we are done. */ + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) + return 0; + + hci_req_add_le_scan_disable(req); + + memset(&cp, 0, sizeof(cp)); + cp.enable = LE_SCAN_ENABLE; + cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); + + return 0; +} + +static void le_scan_restart_work(struct work_struct *work) { + struct hci_dev *hdev = container_of(work, struct hci_dev, + le_scan_restart.work); unsigned long timeout, duration, scan_start, now; + u8 status; BT_DBG("%s", hdev->name); + hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status); if (status) { BT_ERR("Failed to restart LE scan: status %d", status); return; @@ -995,41 +1018,6 @@ unlock: hci_dev_unlock(hdev); } -static int le_scan_restart(struct hci_request *req, unsigned long opt) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_scan_enable cp; - - /* If controller is not scanning we are done. */ - if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) - return 0; - - hci_req_add_le_scan_disable(req); - - memset(&cp, 0, sizeof(cp)); - cp.enable = LE_SCAN_ENABLE; - cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); - - return 0; -} - -static void le_scan_restart_work(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - le_scan_restart.work); - u8 status; - int err; - - BT_DBG("%s", hdev->name); - - err = hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status); - if (err) - return; - - le_scan_restart_work_complete(hdev, status); -} - static void cancel_adv_timeout(struct hci_dev *hdev) { if (hdev->adv_instance_timeout) { -- cgit v1.1 From 0ad06aa6a7682319bb1adcc187a1fa8db6b2da2c Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 14:44:57 +0200 Subject: Bluetooth: Fix specifying role for LE connections The hci_connect_le_scan() is (as the name implies) a master/central role API, so it makes no sense in passing a role parameter to it. At the same time this patch also fixes the direct advertising support for LE L2CAP sockets where we now call the more appropriate hci_le_connect() API if slave/peripheral role is desired. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 4 ++-- net/bluetooth/l2cap_core.c | 15 +++++++-------- net/bluetooth/mgmt.c | 3 +-- 3 files changed, 10 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 1ed1e15..673c225 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -988,7 +988,7 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev, /* This function requires the caller holds hdev->lock */ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, - u16 conn_timeout, u8 role) + u16 conn_timeout) { struct hci_conn *conn; @@ -1018,7 +1018,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, BT_DBG("requesting refresh of dst_addr"); - conn = hci_conn_add(hdev, LE_LINK, dst, role); + conn = hci_conn_add(hdev, LE_LINK, dst, HCI_ROLE_MASTER); if (!conn) return ERR_PTR(-ENOMEM); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 66e8b6e..139da81 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7113,8 +7113,6 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, chan->dcid = cid; if (bdaddr_type_is_le(dst_type)) { - u8 role; - /* Convert from L2CAP channel address type to HCI address type */ if (dst_type == BDADDR_LE_PUBLIC) @@ -7123,14 +7121,15 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, dst_type = ADDR_LE_DEV_RANDOM; if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - role = HCI_ROLE_SLAVE; + hcon = hci_connect_le(hdev, dst, dst_type, + chan->sec_level, + HCI_LE_CONN_TIMEOUT, + HCI_ROLE_SLAVE); else - role = HCI_ROLE_MASTER; + hcon = hci_connect_le_scan(hdev, dst, dst_type, + chan->sec_level, + HCI_LE_CONN_TIMEOUT); - hcon = hci_connect_le_scan(hdev, dst, dst_type, - chan->sec_level, - HCI_LE_CONN_TIMEOUT, - role); } else { u8 auth_type = l2cap_get_auth_type(chan); hcon = hci_connect_acl(hdev, dst, chan->sec_level, auth_type); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e4ad045..eca203e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3518,8 +3518,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_connect_le_scan(hdev, &cp->addr.bdaddr, addr_type, sec_level, - HCI_LE_CONN_TIMEOUT, - HCI_ROLE_MASTER); + HCI_LE_CONN_TIMEOUT); } if (IS_ERR(conn)) { -- cgit v1.1 From 658aead94bb65c0141391f20f8c24f51e971b6ea Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 14:44:58 +0200 Subject: Bluetooth: Move check for ongoing connect earlier in hci_connect_le() This helps simplify the logic in further patches (less cleanups to do in this failure branch). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 673c225..08a291d 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -798,6 +798,12 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, return ERR_PTR(-EOPNOTSUPP); } + /* Since the controller supports only one LE connection attempt at a + * time, we return -EBUSY if there is any connection attempt running. + */ + if (hci_lookup_le_connect(hdev)) + return ERR_PTR(-EBUSY); + /* Some devices send ATT messages as soon as the physical link is * established. To be able to handle these ATT messages, the user- * space first establishes the connection and then starts the pairing @@ -821,12 +827,6 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, } } - /* Since the controller supports only one LE connection attempt at a - * time, we return -EBUSY if there is any connection attempt running. - */ - if (hci_lookup_le_connect(hdev)) - return ERR_PTR(-EBUSY); - /* When given an identity address with existing identity * resolving key, the connection needs to be established * to a resolvable random address. -- cgit v1.1 From e2caced40734731e2a17b501840809e30a08141a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 14:44:59 +0200 Subject: Bluetooth: Remove conn_unfinished variable from hci_connect_le() The conn_unfinished variable makes the entire logic of hci_connect_le() rather confusing. By restructuring and clarifying the logic we can actually remove the conn_unfinished variable and still keep the same behavior. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 48 ++++++++++++------------------------------------ 1 file changed, 12 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 08a291d..2d334e0 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -785,7 +785,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 role) { struct hci_conn_params *params; - struct hci_conn *conn, *conn_unfinished; + struct hci_conn *conn; struct smp_irk *irk; struct hci_request req; int err; @@ -804,27 +804,14 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, if (hci_lookup_le_connect(hdev)) return ERR_PTR(-EBUSY); - /* Some devices send ATT messages as soon as the physical link is - * established. To be able to handle these ATT messages, the user- - * space first establishes the connection and then starts the pairing - * process. - * - * So if a hci_conn object already exists for the following connection - * attempt, we simply update pending_sec_level and auth_type fields - * and return the object found. + /* If there's already a connection object but it's not in + * scanning state it means it must already be established, in + * which case we can't do anything else except report a failure + * to connect. */ conn = hci_conn_hash_lookup_le(hdev, dst, dst_type); - conn_unfinished = NULL; - if (conn) { - if (conn->state == BT_CONNECT && - test_bit(HCI_CONN_SCANNING, &conn->flags)) { - BT_DBG("will continue unfinished conn %pMR", dst); - conn_unfinished = conn; - } else { - if (conn->pending_sec_level < sec_level) - conn->pending_sec_level = sec_level; - goto done; - } + if (conn && !test_bit(HCI_CONN_SCANNING, &conn->flags)) { + return ERR_PTR(-EBUSY); } /* When given an identity address with existing identity @@ -842,23 +829,20 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, dst_type = ADDR_LE_DEV_RANDOM; } - if (conn_unfinished) { - conn = conn_unfinished; + if (conn) { bacpy(&conn->dst, dst); } else { conn = hci_conn_add(hdev, LE_LINK, dst, role); + if (!conn) + return ERR_PTR(-ENOMEM); + hci_conn_hold(conn); + conn->pending_sec_level = sec_level; } - if (!conn) - return ERR_PTR(-ENOMEM); - conn->dst_type = dst_type; conn->sec_level = BT_SECURITY_LOW; conn->conn_timeout = conn_timeout; - if (!conn_unfinished) - conn->pending_sec_level = sec_level; - hci_req_init(&req, hdev); /* Disable advertising if we're active. For master role @@ -922,14 +906,6 @@ create_conn: return ERR_PTR(err); } -done: - /* If this is continuation of connect started by hci_connect_le_scan, - * it already called hci_conn_hold and calling it again would mess the - * counter. - */ - if (!conn_unfinished) - hci_conn_hold(conn); - return conn; } -- cgit v1.1 From 7df0f73ece45c2e499b416cbc90949e0226eb134 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 12 Nov 2015 15:15:00 +0200 Subject: Bluetooth: Simplify request cleanup code The hci_req_sync_cancel() is just as much related to the request cleanup as hci_request_cancel_all() is. Just move the former into the latter and do the cleanup from a single place in hci_dev_do_close(). The important thing is to avoid deadlocks by holding the req_sync lock: previously hci_request_cancel_all was done right after releasing the lock and with this patch it's right before taking it. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 4 +--- net/bluetooth/hci_request.c | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index fb618d6..63fd31d 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1523,7 +1523,7 @@ int hci_dev_do_close(struct hci_dev *hdev) cancel_delayed_work(&hdev->power_off); - hci_req_sync_cancel(hdev, ENODEV); + hci_request_cancel_all(hdev); hci_req_sync_lock(hdev); if (!test_and_clear_bit(HCI_UP, &hdev->flags)) { @@ -1625,8 +1625,6 @@ int hci_dev_do_close(struct hci_dev *hdev) hci_req_sync_unlock(hdev); - hci_request_cancel_all(hdev); - hci_dev_put(hdev); return 0; } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e8345d8..76bd912 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1270,6 +1270,8 @@ void hci_request_setup(struct hci_dev *hdev) void hci_request_cancel_all(struct hci_dev *hdev) { + hci_req_sync_cancel(hdev, ENODEV); + cancel_work_sync(&hdev->discov_update); cancel_work_sync(&hdev->bg_scan_update); cancel_delayed_work_sync(&hdev->le_scan_disable); -- cgit v1.1 From 56f9ebe641d613916d3dce710004d48ab66660fa Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 14 Nov 2015 20:22:41 +0100 Subject: mac802154: Delete an unnecessary check before the function call "kfree_skb" The kfree_skb() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Marcel Holtmann --- net/mac802154/rx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index 42e9672..446e130 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -217,8 +217,7 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local, break; } - if (skb) - kfree_skb(skb); + kfree_skb(skb); } static void -- cgit v1.1 From 06fbb3d5c7ff366fe7ab7b4157bdb3096fca6d09 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 14 Nov 2015 22:00:27 +0100 Subject: Bluetooth: Delete an unnecessary check before the function call "kfree_skb" The kfree_skb() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Marcel Holtmann --- net/bluetooth/cmtp/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 298ed37..9e59b66 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -178,8 +178,7 @@ static inline int cmtp_recv_frame(struct cmtp_session *session, struct sk_buff * cmtp_add_msgpart(session, id, skb->data + hdrlen, len); break; default: - if (session->reassembly[id] != NULL) - kfree_skb(session->reassembly[id]); + kfree_skb(session->reassembly[id]); session->reassembly[id] = NULL; break; } -- cgit v1.1 From f37590bd772243db8ce47071a56c3a2b84cb282b Mon Sep 17 00:00:00 2001 From: Prasanna Karthik Date: Tue, 17 Nov 2015 11:06:53 +0000 Subject: Bluetooth: clean up af_bluetooth code Fix error reported by checkpatch. ERROR:"foo* bar" should be "foo *bar" Signed-off-by: Prasanna Karthik Signed-off-by: Marcel Holtmann --- net/bluetooth/af_bluetooth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 34c53d5..a83c6a7 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -669,7 +669,7 @@ static const struct file_operations bt_fops = { }; int bt_procfs_init(struct net *net, const char *name, - struct bt_sock_list* sk_list, + struct bt_sock_list *sk_list, int (* seq_show)(struct seq_file *, void *)) { sk_list->custom_seq_show = seq_show; @@ -685,7 +685,7 @@ void bt_procfs_cleanup(struct net *net, const char *name) } #else int bt_procfs_init(struct net *net, const char *name, - struct bt_sock_list* sk_list, + struct bt_sock_list *sk_list, int (* seq_show)(struct seq_file *, void *)) { return 0; -- cgit v1.1 From 74b93e9f4ee0ae9292730de1a1e7d919c59c8ad2 Mon Sep 17 00:00:00 2001 From: Prasanna Karthik Date: Wed, 18 Nov 2015 12:38:41 +0000 Subject: Bluetooth: Clean up hci_core code Fix errors reported by checkpatch. - ERROR: spaces required around that ':' (ctx:VxW) - ERROR: open brace '{' following function declarations go on the next line Signed-off-by: Prasanna Karthik Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 63fd31d..89af7e4 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -64,7 +64,7 @@ static ssize_t dut_mode_read(struct file *file, char __user *user_buf, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = hci_dev_test_flag(hdev, HCI_DUT_MODE) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_DUT_MODE) ? 'Y' : 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -124,7 +124,7 @@ static ssize_t vendor_diag_read(struct file *file, char __user *user_buf, struct hci_dev *hdev = file->private_data; char buf[3]; - buf[0] = hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) ? 'Y': 'N'; + buf[0] = hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) ? 'Y' : 'N'; buf[1] = '\n'; buf[2] = '\0'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -2600,7 +2600,8 @@ struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance) } /* This function requires the caller holds hdev->lock */ -struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance) { +struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance) +{ struct adv_info *cur_instance; cur_instance = hci_find_adv_instance(hdev, instance); -- cgit v1.1 From 9a54421018d76c50c2fa82f88dffbfa6af0383d6 Mon Sep 17 00:00:00 2001 From: Prasanna Karthik Date: Thu, 19 Nov 2015 12:05:35 +0000 Subject: Bluetooth: remove unneeded variable in l2cap_stream_rx Remove unneeded variable used to store return value. Error reported by coccicheck. Signed-off-by: Prasanna Karthik Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap_core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 139da81..39a5149 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6538,8 +6538,6 @@ static int l2cap_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb) { - int err = 0; - BT_DBG("chan %p, control %p, skb %p, state %d", chan, control, skb, chan->rx_state); @@ -6570,7 +6568,7 @@ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, chan->last_acked_seq = control->txseq; chan->expected_tx_seq = __next_seq(chan, control->txseq); - return err; + return 0; } static int l2cap_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) -- cgit v1.1 From c7cad0d6f70cd4ce8644ffe528a4df1cdc2e77f5 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:30:40 -0500 Subject: tipc: move linearization of buffers to generic code In commit 5cbb28a4bf65c7e4 ("tipc: linearize arriving NAME_DISTR and LINK_PROTO buffers") we added linearization of NAME_DISTRIBUTOR, LINK_PROTOCOL/RESET and LINK_PROTOCOL/ACTIVATE to the function tipc_udp_recv(). The location of the change was selected in order to make the commit easily appliable to 'net' and 'stable'. We now move this linearization to where it should be done, in the functions tipc_named_rcv() and tipc_link_proto_rcv() respectively. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 2 ++ net/tipc/name_distr.c | 1 + net/tipc/udp_media.c | 5 ----- 3 files changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index 9efbdbd..fa452fb 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1260,6 +1260,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, /* fall thru' */ case ACTIVATE_MSG: + skb_linearize(skb); + hdr = buf_msg(skb); /* Complete own link name with peer's interface name */ if_name = strrchr(l->name, ':') + 1; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index c07612b..f51c8bd 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -397,6 +397,7 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) spin_lock_bh(&tn->nametbl_lock); for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) { + skb_linearize(skb); msg = buf_msg(skb); mtype = msg_type(msg); item = (struct distr_item *)msg_data(msg); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ad2719a..816914e 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -48,7 +48,6 @@ #include #include "core.h" #include "bearer.h" -#include "msg.h" /* IANA assigned UDP port */ #define UDP_PORT_DEFAULT 6118 @@ -221,10 +220,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) { struct udp_bearer *ub; struct tipc_bearer *b; - int usr = msg_user(buf_msg(skb)); - - if ((usr == LINK_PROTOCOL) || (usr == NAME_DISTRIBUTOR)) - skb_linearize(skb); ub = rcu_dereference_sk_user_data(sk); if (!ub) { -- cgit v1.1 From 5c10e9794013143eec80d494603d46dcb219970a Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:30:41 -0500 Subject: tipc: small cleanup of function tipc_node_check_state() The function tipc_node_check_state() contains the core logics for handling link synchronization and failover. For this reason, it is important to keep it as comprehensible as possible. In this commit, we make three small cleanups. 1) If the node is in state SELF_DOWN_PEER_LEAVING and the received packet confirms that the peer has lost contact, there will be no further action in this function. To make this clearer, we return from the function directly after the state change. 2) Since commit 0f8b8e28fb3241f9fd ("tipc: eliminate risk of stalled link synchronization") only the logically first TUNNEL_PROTO/SYNCH packet can alter the link state and set the synch point, independently of arrival order. Hence, there is not any longer any need to adjust the synch value in case such packets arrive in disorder. We remove this adjustment. 3) It is the intention that any message arriving on any of the links may trig a check for and possible termination of a node SYNCH state. A redundant and unnoticed check for tipc_link_is_synching() obviously beats this purpose, with the effect that only packets arriving on the synching link may currently end the synch state. We remove this check. This change will further shorten the synchronization period between parallel links. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/node.c b/net/tipc/node.c index 20cddec..7756804 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1187,6 +1187,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, if (msg_peer_node_is_up(hdr)) return false; tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); + return true; } /* Ignore duplicate packets */ @@ -1232,12 +1233,10 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_BEGIN_EVT); } - if (less(syncpt, n->sync_point)) - n->sync_point = syncpt; } /* Open tunnel link when parallel link reaches synch point */ - if ((n->state == NODE_SYNCHING) && tipc_link_is_synching(l)) { + if (n->state == NODE_SYNCHING) { if (tipc_link_is_synching(l)) { tnl = l; } else { -- cgit v1.1 From 1d7e1c2595bd20c5274a8e49d89cf0cf483759de Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:30:42 -0500 Subject: tipc: reduce code dependency between binding table and node layer The file name_distr.c currently contains three functions, named_cluster_distribute(), tipc_publ_subcscribe() and tipc_publ_unsubscribe() that all directly access fields in struct tipc_node. We want to eliminate such dependencies, so we move those functions to the file node.c and rename them to tipc_node_broadcast(), tipc_node_subscribe() and tipc_node_unsubscribe() respectively. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/core.h | 5 ++++ net/tipc/name_distr.c | 67 +++------------------------------------------------ net/tipc/name_distr.h | 1 - net/tipc/name_table.c | 5 ++-- net/tipc/node.c | 60 +++++++++++++++++++++++++++++++++++++++++++++ net/tipc/node.h | 3 +++ 6 files changed, 74 insertions(+), 67 deletions(-) (limited to 'net') diff --git a/net/tipc/core.h b/net/tipc/core.h index 18e95a8..5504d63 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -118,6 +118,11 @@ static inline int tipc_netid(struct net *net) return tipc_net(net)->net_id; } +static inline struct list_head *tipc_nodes(struct net *net) +{ + return &tipc_net(net)->node_list; +} + static inline u16 mod(u16 x) { return x & 0xffffu; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index f51c8bd..ebe9d0f 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -84,31 +84,6 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, return buf; } -void named_cluster_distribute(struct net *net, struct sk_buff *skb) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct sk_buff *oskb; - struct tipc_node *node; - u32 dnode; - - rcu_read_lock(); - list_for_each_entry_rcu(node, &tn->node_list, list) { - dnode = node->addr; - if (in_own_node(net, dnode)) - continue; - if (!tipc_node_is_up(node)) - continue; - oskb = pskb_copy(skb, GFP_ATOMIC); - if (!oskb) - break; - msg_set_destnode(buf_msg(oskb), dnode); - tipc_node_xmit_skb(net, oskb, dnode, 0); - } - rcu_read_unlock(); - - kfree_skb(skb); -} - /** * tipc_named_publish - tell other nodes about a new publication by this node */ @@ -226,42 +201,6 @@ void tipc_named_node_up(struct net *net, u32 dnode) tipc_node_xmit(net, &head, dnode, 0); } -static void tipc_publ_subscribe(struct net *net, struct publication *publ, - u32 addr) -{ - struct tipc_node *node; - - if (in_own_node(net, addr)) - return; - - node = tipc_node_find(net, addr); - if (!node) { - pr_warn("Node subscription rejected, unknown node 0x%x\n", - addr); - return; - } - - tipc_node_lock(node); - list_add_tail(&publ->nodesub_list, &node->publ_list); - tipc_node_unlock(node); - tipc_node_put(node); -} - -static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, - u32 addr) -{ - struct tipc_node *node; - - node = tipc_node_find(net, addr); - if (!node) - return; - - tipc_node_lock(node); - list_del_init(&publ->nodesub_list); - tipc_node_unlock(node); - tipc_node_put(node); -} - /** * tipc_publ_purge - remove publication associated with a failed node * @@ -277,7 +216,7 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, publ->node, publ->ref, publ->key); if (p) - tipc_publ_unsubscribe(net, p, addr); + tipc_node_unsubscribe(net, &p->nodesub_list, addr); spin_unlock_bh(&tn->nametbl_lock); if (p != publ) { @@ -317,7 +256,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, TIPC_CLUSTER_SCOPE, node, ntohl(i->ref), ntohl(i->key)); if (publ) { - tipc_publ_subscribe(net, publ, node); + tipc_node_subscribe(net, &publ->nodesub_list, node); return true; } } else if (dtype == WITHDRAWAL) { @@ -326,7 +265,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, node, ntohl(i->ref), ntohl(i->key)); if (publ) { - tipc_publ_unsubscribe(net, publ, node); + tipc_node_unsubscribe(net, &publ->nodesub_list, node); kfree_rcu(publ, rcu); return true; } diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index dd2d9fd..1264ba0 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -69,7 +69,6 @@ struct distr_item { struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ); struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ); -void named_cluster_distribute(struct net *net, struct sk_buff *buf); void tipc_named_node_up(struct net *net, u32 dnode); void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue); void tipc_named_reinit(struct net *net); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 0f47f08..91fce70 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -42,6 +42,7 @@ #include "subscr.h" #include "bcast.h" #include "addr.h" +#include "node.h" #include #define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ @@ -677,7 +678,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, spin_unlock_bh(&tn->nametbl_lock); if (buf) - named_cluster_distribute(net, buf); + tipc_node_broadcast(net, buf); return publ; } @@ -709,7 +710,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, spin_unlock_bh(&tn->nametbl_lock); if (skb) { - named_cluster_distribute(net, skb); + tipc_node_broadcast(net, skb); return 1; } return 0; diff --git a/net/tipc/node.c b/net/tipc/node.c index 7756804..9321952 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -234,6 +234,42 @@ void tipc_node_stop(struct net *net) spin_unlock_bh(&tn->node_list_lock); } +void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr) +{ + struct tipc_node *n; + + if (in_own_node(net, addr)) + return; + + n = tipc_node_find(net, addr); + if (!n) { + pr_warn("Node subscribe rejected, unknown node 0x%x\n", addr); + return; + } + tipc_node_lock(n); + list_add_tail(subscr, &n->publ_list); + tipc_node_unlock(n); + tipc_node_put(n); +} + +void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr) +{ + struct tipc_node *n; + + if (in_own_node(net, addr)) + return; + + n = tipc_node_find(net, addr); + if (!n) { + pr_warn("Node unsubscribe rejected, unknown node 0x%x\n", addr); + return; + } + tipc_node_lock(n); + list_del_init(subscr); + tipc_node_unlock(n); + tipc_node_put(n); +} + int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) { struct tipc_node *node; @@ -1075,6 +1111,30 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, return 0; } +void tipc_node_broadcast(struct net *net, struct sk_buff *skb) +{ + struct sk_buff *txskb; + struct tipc_node *n; + u32 dst; + + rcu_read_lock(); + list_for_each_entry_rcu(n, tipc_nodes(net), list) { + dst = n->addr; + if (in_own_node(net, dst)) + continue; + if (!tipc_node_is_up(n)) + continue; + txskb = pskb_copy(skb, GFP_ATOMIC); + if (!txskb) + break; + msg_set_destnode(buf_msg(txskb), dst); + tipc_node_xmit_skb(net, txskb, dst, 0); + } + rcu_read_unlock(); + + kfree_skb(skb); +} + /** * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node * @net: the applicable net namespace diff --git a/net/tipc/node.h b/net/tipc/node.h index 6734562..dd79e97 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -149,6 +149,9 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector); int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, u32 selector); +void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr); +void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr); +void tipc_node_broadcast(struct net *net, struct sk_buff *skb); int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); -- cgit v1.1 From 2312bf61ae365fdd6b9bfb24558a417859759447 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:30:43 -0500 Subject: tipc: introduce per-link spinlock As a preparation to allow parallel links to work more independently from each other we introduce a per-link spinlock, to be stored in the struct nodes's link entry area. Since the node lock still is a regular spinlock there is no increase in parallellism at this stage. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 9 +++++---- net/tipc/node.c | 39 ++++++++++++++++++--------------------- net/tipc/node.h | 3 ++- 3 files changed, 25 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index fa452fb..b5e895c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1995,6 +1995,7 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) struct tipc_node *node; struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; struct net *net = sock_net(skb->sk); + struct tipc_link_entry *le; if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; @@ -2020,17 +2021,17 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) node = tipc_link_find_owner(net, link_name, &bearer_id); if (!node) return -EINVAL; - + le = &node->links[bearer_id]; tipc_node_lock(node); - - link = node->links[bearer_id].link; + spin_lock_bh(&le->lock); + link = le->link; if (!link) { tipc_node_unlock(node); return -EINVAL; } link_reset_statistics(link); - + spin_unlock_bh(&le->lock); tipc_node_unlock(node); return 0; diff --git a/net/tipc/node.c b/net/tipc/node.c index 9321952..572063a 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -339,11 +339,13 @@ static void tipc_node_timeout(unsigned long data) for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { tipc_node_lock(n); le = &n->links[bearer_id]; + spin_lock_bh(&le->lock); if (le->link) { /* Link tolerance may change asynchronously: */ tipc_node_calculate_timer(n, le->link); rc = tipc_link_timeout(le->link, &xmitq); } + spin_unlock_bh(&le->lock); tipc_node_unlock(n); tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr); if (rc & TIPC_LINK_DOWN_EVT) @@ -654,6 +656,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, if (n->state == NODE_FAILINGOVER) tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); le->link = l; + spin_lock_init(&le->lock); n->link_cnt++; tipc_node_calculate_timer(n, l); if (n->link_cnt == 1) @@ -1033,20 +1036,6 @@ msg_full: return -EMSGSIZE; } -static struct tipc_link *tipc_node_select_link(struct tipc_node *n, int sel, - int *bearer_id, - struct tipc_media_addr **maddr) -{ - int id = n->active_links[sel & 1]; - - if (unlikely(id < 0)) - return NULL; - - *bearer_id = id; - *maddr = &n->links[id].maddr; - return n->links[id].link; -} - /** * tipc_node_xmit() is the general link level function for message sending * @net: the applicable net namespace @@ -1059,26 +1048,32 @@ static struct tipc_link *tipc_node_select_link(struct tipc_node *n, int sel, int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector) { - struct tipc_link *l = NULL; + struct tipc_link_entry *le; struct tipc_node *n; struct sk_buff_head xmitq; - struct tipc_media_addr *maddr; - int bearer_id; + struct tipc_media_addr *maddr = NULL; + int bearer_id = -1; int rc = -EHOSTUNREACH; __skb_queue_head_init(&xmitq); n = tipc_node_find(net, dnode); if (likely(n)) { tipc_node_lock(n); - l = tipc_node_select_link(n, selector, &bearer_id, &maddr); - if (likely(l)) - rc = tipc_link_xmit(l, list, &xmitq); + bearer_id = n->active_links[selector & 1]; + if (bearer_id >= 0) { + le = &n->links[bearer_id]; + maddr = &le->maddr; + spin_lock_bh(&le->lock); + if (likely(le->link)) + rc = tipc_link_xmit(le->link, list, &xmitq); + spin_unlock_bh(&le->lock); + } tipc_node_unlock(n); if (unlikely(rc == -ENOBUFS)) tipc_node_link_down(n, bearer_id, false); tipc_node_put(n); } - if (likely(!rc)) { + if (likely(!skb_queue_empty(&xmitq))) { tipc_bearer_xmit(net, bearer_id, &xmitq, maddr); return 0; } @@ -1374,7 +1369,9 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) /* Check and if necessary update node state */ if (likely(tipc_node_check_state(n, skb, bearer_id, &xmitq))) { + spin_lock_bh(&le->lock); rc = tipc_link_rcv(le->link, skb, &xmitq); + spin_unlock_bh(&le->lock); skb = NULL; } unlock: diff --git a/net/tipc/node.h b/net/tipc/node.h index dd79e97..8784907 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -69,6 +69,7 @@ enum { struct tipc_link_entry { struct tipc_link *link; + spinlock_t lock; /* per-link */ u32 mtu; struct sk_buff_head inputq; struct tipc_media_addr maddr; @@ -86,7 +87,7 @@ struct tipc_bclink_entry { * struct tipc_node - TIPC node structure * @addr: network address of node * @ref: reference counter to node object - * @lock: spinlock governing access to structure + * @lock: rwlock governing access to structure * @net: the applicable net namespace * @hash: links to adjacent nodes in unsorted hash chain * @inputq: pointer to input queue containing messages for msg event -- cgit v1.1 From 5405ff6e15f40f2f53e37d2dcd7de521e2b7a96f Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:30:44 -0500 Subject: tipc: convert node lock to rwlock According to the node FSM a node in state SELF_UP_PEER_UP cannot change state inside a lock context, except when a TUNNEL_PROTOCOL (SYNCH or FAILOVER) packet arrives. However, the node's individual links may still change state. Since each link now is protected by its own spinlock, we finally have the conditions in place to convert the node spinlock to an rwlock_t. If the node state and arriving packet type are rigth, we can let the link directly receive the packet under protection of its own spinlock and the node lock in read mode. In all other cases we use the node lock in write mode. This enables full concurrent execution between parallel links during steady-state traffic situations, i.e., 99+ % of the time. This commit implements this change. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 32 ++++---- net/tipc/node.c | 227 +++++++++++++++++++++++++++++--------------------------- net/tipc/node.h | 10 +-- 3 files changed, 136 insertions(+), 133 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index b5e895c..1dda46e 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1547,7 +1547,7 @@ static struct tipc_node *tipc_link_find_owner(struct net *net, *bearer_id = 0; rcu_read_lock(); list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { - tipc_node_lock(n_ptr); + tipc_node_read_lock(n_ptr); for (i = 0; i < MAX_BEARERS; i++) { l_ptr = n_ptr->links[i].link; if (l_ptr && !strcmp(l_ptr->name, link_name)) { @@ -1556,7 +1556,7 @@ static struct tipc_node *tipc_link_find_owner(struct net *net, break; } } - tipc_node_unlock(n_ptr); + tipc_node_read_unlock(n_ptr); if (found_node) break; } @@ -1658,7 +1658,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) if (!node) return -EINVAL; - tipc_node_lock(node); + tipc_node_read_lock(node); link = node->links[bearer_id].link; if (!link) { @@ -1699,7 +1699,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) } out: - tipc_node_unlock(node); + tipc_node_read_unlock(node); return res; } @@ -1898,10 +1898,10 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) list_for_each_entry_continue_rcu(node, &tn->node_list, list) { - tipc_node_lock(node); + tipc_node_read_lock(node); err = __tipc_nl_add_node_links(net, &msg, node, &prev_link); - tipc_node_unlock(node); + tipc_node_read_unlock(node); if (err) goto out; @@ -1913,10 +1913,10 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) goto out; list_for_each_entry_rcu(node, &tn->node_list, list) { - tipc_node_lock(node); + tipc_node_read_lock(node); err = __tipc_nl_add_node_links(net, &msg, node, &prev_link); - tipc_node_unlock(node); + tipc_node_read_unlock(node); if (err) goto out; @@ -1967,16 +1967,16 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) if (!node) return -EINVAL; - tipc_node_lock(node); + tipc_node_read_lock(node); link = node->links[bearer_id].link; if (!link) { - tipc_node_unlock(node); + tipc_node_read_unlock(node); nlmsg_free(msg.skb); return -EINVAL; } err = __tipc_nl_add_link(net, &msg, link, 0); - tipc_node_unlock(node); + tipc_node_read_unlock(node); if (err) { nlmsg_free(msg.skb); return err; @@ -2021,18 +2021,18 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) node = tipc_link_find_owner(net, link_name, &bearer_id); if (!node) return -EINVAL; + le = &node->links[bearer_id]; - tipc_node_lock(node); + tipc_node_read_lock(node); spin_lock_bh(&le->lock); link = le->link; if (!link) { - tipc_node_unlock(node); + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(node); return -EINVAL; } - link_reset_statistics(link); spin_unlock_bh(&le->lock); - tipc_node_unlock(node); - + tipc_node_read_unlock(node); return 0; } diff --git a/net/tipc/node.c b/net/tipc/node.c index 572063a..47d5f84 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -141,10 +141,63 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr) return NULL; } +void tipc_node_read_lock(struct tipc_node *n) +{ + read_lock_bh(&n->lock); +} + +void tipc_node_read_unlock(struct tipc_node *n) +{ + read_unlock_bh(&n->lock); +} + +static void tipc_node_write_lock(struct tipc_node *n) +{ + write_lock_bh(&n->lock); +} + +static void tipc_node_write_unlock(struct tipc_node *n) +{ + struct net *net = n->net; + u32 addr = 0; + u32 flags = n->action_flags; + u32 link_id = 0; + struct list_head *publ_list; + + if (likely(!flags)) { + write_unlock_bh(&n->lock); + return; + } + + addr = n->addr; + link_id = n->link_id; + publ_list = &n->publ_list; + + n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | + TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP); + + write_unlock_bh(&n->lock); + + if (flags & TIPC_NOTIFY_NODE_DOWN) + tipc_publ_notify(net, publ_list, addr); + + if (flags & TIPC_NOTIFY_NODE_UP) + tipc_named_node_up(net, addr); + + if (flags & TIPC_NOTIFY_LINK_UP) + tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, + TIPC_NODE_SCOPE, link_id, addr); + + if (flags & TIPC_NOTIFY_LINK_DOWN) + tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, + link_id, addr); +} + struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *n_ptr, *temp_node; + int i; spin_lock_bh(&tn->node_list_lock); n_ptr = tipc_node_find(net, addr); @@ -159,7 +212,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) n_ptr->net = net; n_ptr->capabilities = capabilities; kref_init(&n_ptr->kref); - spin_lock_init(&n_ptr->lock); + rwlock_init(&n_ptr->lock); INIT_HLIST_NODE(&n_ptr->hash); INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->publ_list); @@ -168,6 +221,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) skb_queue_head_init(&n_ptr->bc_entry.inputq1); __skb_queue_head_init(&n_ptr->bc_entry.arrvq); skb_queue_head_init(&n_ptr->bc_entry.inputq2); + for (i = 0; i < MAX_BEARERS; i++) + spin_lock_init(&n_ptr->links[i].lock); hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n_ptr->addr < temp_node->addr) @@ -246,9 +301,9 @@ void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr) pr_warn("Node subscribe rejected, unknown node 0x%x\n", addr); return; } - tipc_node_lock(n); + tipc_node_write_lock(n); list_add_tail(subscr, &n->publ_list); - tipc_node_unlock(n); + tipc_node_write_unlock(n); tipc_node_put(n); } @@ -264,9 +319,9 @@ void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr) pr_warn("Node unsubscribe rejected, unknown node 0x%x\n", addr); return; } - tipc_node_lock(n); + tipc_node_write_lock(n); list_del_init(subscr); - tipc_node_unlock(n); + tipc_node_write_unlock(n); tipc_node_put(n); } @@ -293,9 +348,9 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) conn->port = port; conn->peer_port = peer_port; - tipc_node_lock(node); + tipc_node_write_lock(node); list_add_tail(&conn->list, &node->conn_sks); - tipc_node_unlock(node); + tipc_node_write_unlock(node); exit: tipc_node_put(node); return err; @@ -313,14 +368,14 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) if (!node) return; - tipc_node_lock(node); + tipc_node_write_lock(node); list_for_each_entry_safe(conn, safe, &node->conn_sks, list) { if (port != conn->port) continue; list_del(&conn->list); kfree(conn); } - tipc_node_unlock(node); + tipc_node_write_unlock(node); tipc_node_put(node); } @@ -337,7 +392,7 @@ static void tipc_node_timeout(unsigned long data) __skb_queue_head_init(&xmitq); for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { - tipc_node_lock(n); + tipc_node_read_lock(n); le = &n->links[bearer_id]; spin_lock_bh(&le->lock); if (le->link) { @@ -346,7 +401,7 @@ static void tipc_node_timeout(unsigned long data) rc = tipc_link_timeout(le->link, &xmitq); } spin_unlock_bh(&le->lock); - tipc_node_unlock(n); + tipc_node_read_unlock(n); tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr); if (rc & TIPC_LINK_DOWN_EVT) tipc_node_link_down(n, bearer_id, false); @@ -425,9 +480,9 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, static void tipc_node_link_up(struct tipc_node *n, int bearer_id, struct sk_buff_head *xmitq) { - tipc_node_lock(n); + tipc_node_write_lock(n); __tipc_node_link_up(n, bearer_id, xmitq); - tipc_node_unlock(n); + tipc_node_write_unlock(n); } /** @@ -516,7 +571,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) __skb_queue_head_init(&xmitq); - tipc_node_lock(n); + tipc_node_write_lock(n); if (!tipc_link_is_establishing(l)) { __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); if (delete) { @@ -528,7 +583,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) /* Defuse pending tipc_node_link_up() */ tipc_link_fsm_evt(l, LINK_RESET_EVT); } - tipc_node_unlock(n); + tipc_node_write_unlock(n); tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); tipc_sk_rcv(n->net, &le->inputq); } @@ -561,7 +616,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, if (!n) return; - tipc_node_lock(n); + tipc_node_write_lock(n); le = &n->links[b->identity]; @@ -656,7 +711,6 @@ void tipc_node_check_dest(struct net *net, u32 onode, if (n->state == NODE_FAILINGOVER) tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); le->link = l; - spin_lock_init(&le->lock); n->link_cnt++; tipc_node_calculate_timer(n, l); if (n->link_cnt == 1) @@ -665,7 +719,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, } memcpy(&le->maddr, maddr, sizeof(*maddr)); exit: - tipc_node_unlock(n); + tipc_node_write_unlock(n); if (reset && !tipc_link_is_reset(l)) tipc_node_link_down(n, b->identity, false); tipc_node_put(n); @@ -873,24 +927,6 @@ illegal_evt: pr_err("Illegal node fsm evt %x in state %x\n", evt, state); } -bool tipc_node_filter_pkt(struct tipc_node *n, struct tipc_msg *hdr) -{ - int state = n->state; - - if (likely(state == SELF_UP_PEER_UP)) - return true; - - if (state == SELF_LEAVING_PEER_DOWN) - return false; - - if (state == SELF_DOWN_PEER_LEAVING) { - if (msg_peer_node_is_up(hdr)) - return false; - } - - return true; -} - static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq) { @@ -952,56 +988,18 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, if (bearer_id >= MAX_BEARERS) goto exit; - tipc_node_lock(node); + tipc_node_read_lock(node); link = node->links[bearer_id].link; if (link) { strncpy(linkname, link->name, len); err = 0; } exit: - tipc_node_unlock(node); + tipc_node_read_unlock(node); tipc_node_put(node); return err; } -void tipc_node_unlock(struct tipc_node *node) -{ - struct net *net = node->net; - u32 addr = 0; - u32 flags = node->action_flags; - u32 link_id = 0; - struct list_head *publ_list; - - if (likely(!flags)) { - spin_unlock_bh(&node->lock); - return; - } - - addr = node->addr; - link_id = node->link_id; - publ_list = &node->publ_list; - - node->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | - TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP); - - spin_unlock_bh(&node->lock); - - if (flags & TIPC_NOTIFY_NODE_DOWN) - tipc_publ_notify(net, publ_list, addr); - - if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(net, addr); - - if (flags & TIPC_NOTIFY_LINK_UP) - tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, - TIPC_NODE_SCOPE, link_id, addr); - - if (flags & TIPC_NOTIFY_LINK_DOWN) - tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, - link_id, addr); - -} - /* Caller should hold node lock for the passed node */ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) { @@ -1048,40 +1046,38 @@ msg_full: int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector) { - struct tipc_link_entry *le; + struct tipc_link_entry *le = NULL; struct tipc_node *n; struct sk_buff_head xmitq; - struct tipc_media_addr *maddr = NULL; int bearer_id = -1; int rc = -EHOSTUNREACH; __skb_queue_head_init(&xmitq); n = tipc_node_find(net, dnode); if (likely(n)) { - tipc_node_lock(n); + tipc_node_read_lock(n); bearer_id = n->active_links[selector & 1]; if (bearer_id >= 0) { le = &n->links[bearer_id]; - maddr = &le->maddr; spin_lock_bh(&le->lock); - if (likely(le->link)) - rc = tipc_link_xmit(le->link, list, &xmitq); + rc = tipc_link_xmit(le->link, list, &xmitq); spin_unlock_bh(&le->lock); } - tipc_node_unlock(n); + tipc_node_read_unlock(n); + if (likely(!skb_queue_empty(&xmitq))) { + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + return 0; + } if (unlikely(rc == -ENOBUFS)) tipc_node_link_down(n, bearer_id, false); tipc_node_put(n); + return rc; } - if (likely(!skb_queue_empty(&xmitq))) { - tipc_bearer_xmit(net, bearer_id, &xmitq, maddr); - return 0; - } - if (likely(in_own_node(net, dnode))) { - tipc_sk_rcv(net, list); - return 0; - } - return rc; + + if (unlikely(!in_own_node(net, dnode))) + return rc; + tipc_sk_rcv(net, list); + return 0; } /* tipc_node_xmit_skb(): send single buffer to destination @@ -1171,9 +1167,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id /* Broadcast ACKs are sent on a unicast link */ if (rc & TIPC_LINK_SND_BC_ACK) { - tipc_node_lock(n); + tipc_node_read_lock(n); tipc_link_build_ack_msg(le->link, &xmitq); - tipc_node_unlock(n); + tipc_node_read_unlock(n); } if (!skb_queue_empty(&xmitq)) @@ -1229,7 +1225,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, } } - /* Update node accesibility if applicable */ + /* Check and update node accesibility if applicable */ if (state == SELF_UP_PEER_COMING) { if (!tipc_link_is_up(l)) return true; @@ -1245,6 +1241,9 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, return true; } + if (state == SELF_LEAVING_PEER_DOWN) + return false; + /* Ignore duplicate packets */ if ((usr != LINK_PROTOCOL) && less(oseqno, rcv_nxt)) return true; @@ -1361,21 +1360,29 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) else if (unlikely(n->bc_entry.link->acked != bc_ack)) tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack); - tipc_node_lock(n); - - /* Is reception permitted at the moment ? */ - if (!tipc_node_filter_pkt(n, hdr)) - goto unlock; - - /* Check and if necessary update node state */ - if (likely(tipc_node_check_state(n, skb, bearer_id, &xmitq))) { + /* Receive packet directly if conditions permit */ + tipc_node_read_lock(n); + if (likely((n->state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) { spin_lock_bh(&le->lock); - rc = tipc_link_rcv(le->link, skb, &xmitq); + if (le->link) { + rc = tipc_link_rcv(le->link, skb, &xmitq); + skb = NULL; + } spin_unlock_bh(&le->lock); - skb = NULL; } -unlock: - tipc_node_unlock(n); + tipc_node_read_unlock(n); + + /* Check/update node state before receiving */ + if (unlikely(skb)) { + tipc_node_write_lock(n); + if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) { + if (le->link) { + rc = tipc_link_rcv(le->link, skb, &xmitq); + skb = NULL; + } + } + tipc_node_write_unlock(n); + } if (unlikely(rc & TIPC_LINK_UP_EVT)) tipc_node_link_up(n, bearer_id, &xmitq); @@ -1440,15 +1447,15 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; } - tipc_node_lock(node); + tipc_node_read_lock(node); err = __tipc_nl_add_node(&msg, node); if (err) { last_addr = node->addr; - tipc_node_unlock(node); + tipc_node_read_unlock(node); goto out; } - tipc_node_unlock(node); + tipc_node_read_unlock(node); } done = 1; out: diff --git a/net/tipc/node.h b/net/tipc/node.h index 8784907..651a1581 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -109,7 +109,7 @@ struct tipc_bclink_entry { struct tipc_node { u32 addr; struct kref kref; - spinlock_t lock; + rwlock_t lock; struct net *net; struct hlist_node hash; int active_links[2]; @@ -145,7 +145,8 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); bool tipc_node_is_up(struct tipc_node *n); int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, char *linkname, size_t len); -void tipc_node_unlock(struct tipc_node *node); +void tipc_node_read_lock(struct tipc_node *n); +void tipc_node_read_unlock(struct tipc_node *node); int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector); int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, @@ -157,11 +158,6 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); -static inline void tipc_node_lock(struct tipc_node *node) -{ - spin_lock_bh(&node->lock); -} - static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel) { int bearer_id = n->active_links[sel & 1]; -- cgit v1.1 From 5be9c086715c10fb9ae3ffc0ef580dc3a165f98a Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:30:45 -0500 Subject: tipc: narrow down exposure of struct tipc_node In our effort to have less code and include dependencies between entities such as node, link and bearer, we try to narrow down the exposed interface towards the node as much as possible. In this commit, we move the definition of struct tipc_node, along with many of its associated function declarations, from node.h to node.c. We also move some function definitions from link.c and name_distr.c to node.c, since they access fields in struct tipc_node that should not be externally visible. The moved functions are renamed according to new location, and made static whenever possible. There are no functional changes in this commit. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.h | 1 + net/tipc/link.c | 337 +--------------------------------- net/tipc/link.h | 9 +- net/tipc/netlink.c | 6 +- net/tipc/netlink_compat.c | 4 +- net/tipc/node.c | 449 +++++++++++++++++++++++++++++++++++++++++++++- net/tipc/node.h | 117 +----------- 7 files changed, 462 insertions(+), 461 deletions(-) (limited to 'net') diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 2855b93..1944c6c 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -43,6 +43,7 @@ struct tipc_node; struct tipc_msg; struct tipc_nl_msg; struct tipc_node_map; +extern const char tipc_bclink_name[]; int tipc_bcast_init(struct net *net); void tipc_bcast_reinit(struct net *net); diff --git a/net/tipc/link.c b/net/tipc/link.c index 1dda46e..c513a80 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -50,23 +50,6 @@ */ static const char *link_co_err = "Link tunneling error, "; static const char *link_rst_msg = "Resetting link "; -static const char tipc_bclink_name[] = "broadcast-link"; - -static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { - [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, - [TIPC_NLA_LINK_NAME] = { - .type = NLA_STRING, - .len = TIPC_MAX_LINK_NAME - }, - [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 }, - [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG }, - [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG }, - [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG }, - [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED }, - [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED }, - [TIPC_NLA_LINK_RX] = { .type = NLA_U32 }, - [TIPC_NLA_LINK_TX] = { .type = NLA_U32 } -}; /* Properties valid for media, bearar and link */ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { @@ -117,7 +100,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq); -static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); static void tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); @@ -1527,49 +1509,11 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; } -/* tipc_link_find_owner - locate owner node of link by link's name - * @net: the applicable net namespace - * @name: pointer to link name string - * @bearer_id: pointer to index in 'node->links' array where the link was found. - * - * Returns pointer to node owning the link, or 0 if no matching link is found. - */ -static struct tipc_node *tipc_link_find_owner(struct net *net, - const char *link_name, - unsigned int *bearer_id) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *l_ptr; - struct tipc_node *n_ptr; - struct tipc_node *found_node = NULL; - int i; - - *bearer_id = 0; - rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { - tipc_node_read_lock(n_ptr); - for (i = 0; i < MAX_BEARERS; i++) { - l_ptr = n_ptr->links[i].link; - if (l_ptr && !strcmp(l_ptr->name, link_name)) { - *bearer_id = i; - found_node = n_ptr; - break; - } - } - tipc_node_read_unlock(n_ptr); - if (found_node) - break; - } - rcu_read_unlock(); - - return found_node; -} - /** * link_reset_statistics - reset link statistics * @l_ptr: pointer to link */ -static void link_reset_statistics(struct tipc_link *l_ptr) +void link_reset_statistics(struct tipc_link *l_ptr) { memset(&l_ptr->stats, 0, sizeof(l_ptr->stats)); l_ptr->stats.sent_info = l_ptr->snd_nxt; @@ -1626,84 +1570,6 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) return 0; } -int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) -{ - int err; - int res = 0; - int bearer_id; - char *name; - struct tipc_link *link; - struct tipc_node *node; - struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; - struct net *net = sock_net(skb->sk); - - if (!info->attrs[TIPC_NLA_LINK]) - return -EINVAL; - - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy); - if (err) - return err; - - if (!attrs[TIPC_NLA_LINK_NAME]) - return -EINVAL; - - name = nla_data(attrs[TIPC_NLA_LINK_NAME]); - - if (strcmp(name, tipc_bclink_name) == 0) - return tipc_nl_bc_link_set(net, attrs); - - node = tipc_link_find_owner(net, name, &bearer_id); - if (!node) - return -EINVAL; - - tipc_node_read_lock(node); - - link = node->links[bearer_id].link; - if (!link) { - res = -EINVAL; - goto out; - } - - if (attrs[TIPC_NLA_LINK_PROP]) { - struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; - - err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], - props); - if (err) { - res = err; - goto out; - } - - if (props[TIPC_NLA_PROP_TOL]) { - u32 tol; - - tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); - link->tolerance = tol; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0); - } - if (props[TIPC_NLA_PROP_PRIO]) { - u32 prio; - - prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); - link->priority = prio; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio); - } - if (props[TIPC_NLA_PROP_WIN]) { - u32 win; - - win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); - tipc_link_set_queue_limits(link, win); - } - } - -out: - tipc_node_read_unlock(node); - - return res; -} - static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) { int i; @@ -1770,8 +1636,8 @@ msg_full: } /* Caller should hold appropriate locks to protect the link */ -static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, - struct tipc_link *link, int nlflags) +int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *link, int nlflags) { int err; void *hdr; @@ -1839,200 +1705,3 @@ msg_full: return -EMSGSIZE; } - -/* Caller should hold node lock */ -static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, - struct tipc_node *node, u32 *prev_link) -{ - u32 i; - int err; - - for (i = *prev_link; i < MAX_BEARERS; i++) { - *prev_link = i; - - if (!node->links[i].link) - continue; - - err = __tipc_nl_add_link(net, msg, - node->links[i].link, NLM_F_MULTI); - if (err) - return err; - } - *prev_link = 0; - - return 0; -} - -int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct net *net = sock_net(skb->sk); - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_node *node; - struct tipc_nl_msg msg; - u32 prev_node = cb->args[0]; - u32 prev_link = cb->args[1]; - int done = cb->args[2]; - int err; - - if (done) - return 0; - - msg.skb = skb; - msg.portid = NETLINK_CB(cb->skb).portid; - msg.seq = cb->nlh->nlmsg_seq; - - rcu_read_lock(); - if (prev_node) { - node = tipc_node_find(net, prev_node); - if (!node) { - /* We never set seq or call nl_dump_check_consistent() - * this means that setting prev_seq here will cause the - * consistence check to fail in the netlink callback - * handler. Resulting in the last NLMSG_DONE message - * having the NLM_F_DUMP_INTR flag set. - */ - cb->prev_seq = 1; - goto out; - } - tipc_node_put(node); - - list_for_each_entry_continue_rcu(node, &tn->node_list, - list) { - tipc_node_read_lock(node); - err = __tipc_nl_add_node_links(net, &msg, node, - &prev_link); - tipc_node_read_unlock(node); - if (err) - goto out; - - prev_node = node->addr; - } - } else { - err = tipc_nl_add_bc_link(net, &msg); - if (err) - goto out; - - list_for_each_entry_rcu(node, &tn->node_list, list) { - tipc_node_read_lock(node); - err = __tipc_nl_add_node_links(net, &msg, node, - &prev_link); - tipc_node_read_unlock(node); - if (err) - goto out; - - prev_node = node->addr; - } - } - done = 1; -out: - rcu_read_unlock(); - - cb->args[0] = prev_node; - cb->args[1] = prev_link; - cb->args[2] = done; - - return skb->len; -} - -int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) -{ - struct net *net = genl_info_net(info); - struct tipc_nl_msg msg; - char *name; - int err; - - msg.portid = info->snd_portid; - msg.seq = info->snd_seq; - - if (!info->attrs[TIPC_NLA_LINK_NAME]) - return -EINVAL; - name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); - - msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); - if (!msg.skb) - return -ENOMEM; - - if (strcmp(name, tipc_bclink_name) == 0) { - err = tipc_nl_add_bc_link(net, &msg); - if (err) { - nlmsg_free(msg.skb); - return err; - } - } else { - int bearer_id; - struct tipc_node *node; - struct tipc_link *link; - - node = tipc_link_find_owner(net, name, &bearer_id); - if (!node) - return -EINVAL; - - tipc_node_read_lock(node); - link = node->links[bearer_id].link; - if (!link) { - tipc_node_read_unlock(node); - nlmsg_free(msg.skb); - return -EINVAL; - } - - err = __tipc_nl_add_link(net, &msg, link, 0); - tipc_node_read_unlock(node); - if (err) { - nlmsg_free(msg.skb); - return err; - } - } - - return genlmsg_reply(msg.skb, info); -} - -int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) -{ - int err; - char *link_name; - unsigned int bearer_id; - struct tipc_link *link; - struct tipc_node *node; - struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; - struct net *net = sock_net(skb->sk); - struct tipc_link_entry *le; - - if (!info->attrs[TIPC_NLA_LINK]) - return -EINVAL; - - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy); - if (err) - return err; - - if (!attrs[TIPC_NLA_LINK_NAME]) - return -EINVAL; - - link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); - - if (strcmp(link_name, tipc_bclink_name) == 0) { - err = tipc_bclink_reset_stats(net); - if (err) - return err; - return 0; - } - - node = tipc_link_find_owner(net, link_name, &bearer_id); - if (!node) - return -EINVAL; - - le = &node->links[bearer_id]; - tipc_node_read_lock(node); - spin_lock_bh(&le->lock); - link = le->link; - if (!link) { - spin_unlock_bh(&le->lock); - tipc_node_read_unlock(node); - return -EINVAL; - } - link_reset_statistics(link); - spin_unlock_bh(&le->lock); - tipc_node_read_unlock(node); - return 0; -} diff --git a/net/tipc/link.h b/net/tipc/link.h index 66d859b..a7ee806 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -249,14 +249,15 @@ bool tipc_link_is_failingover(struct tipc_link *l); bool tipc_link_is_blocked(struct tipc_link *l); void tipc_link_set_active(struct tipc_link *l, bool active); void tipc_link_reset(struct tipc_link *l_ptr); +void link_reset_statistics(struct tipc_link *l); int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, struct sk_buff_head *xmitq); +void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg, + u32 gap, u32 tolerance, u32 priority); void tipc_link_set_queue_limits(struct tipc_link *l, u32 window); - +int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *link, int nlflags); int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb); -int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info); -int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info); -int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 7f6475e..29dfcc9 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -101,18 +101,18 @@ static const struct genl_ops tipc_genl_v2_ops[] = { }, { .cmd = TIPC_NL_LINK_GET, - .doit = tipc_nl_link_get, + .doit = tipc_nl_node_get_link, .dumpit = tipc_nl_link_dump, .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_SET, - .doit = tipc_nl_link_set, + .doit = tipc_nl_node_set_link, .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_RESET_STATS, - .doit = tipc_nl_link_reset_stats, + .doit = tipc_nl_node_reset_link_stats, .policy = tipc_nl_policy, }, { diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 1eadc95..acda1ce 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1036,12 +1036,12 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg) case TIPC_CMD_SET_LINK_PRI: case TIPC_CMD_SET_LINK_WINDOW: msg->req_type = TIPC_TLV_LINK_CONFIG; - doit.doit = tipc_nl_link_set; + doit.doit = tipc_nl_node_set_link; doit.transcode = tipc_nl_compat_link_set; return tipc_nl_compat_doit(&doit, msg); case TIPC_CMD_RESET_LINK_STATS: msg->req_type = TIPC_TLV_LINK_NAME; - doit.doit = tipc_nl_link_reset_stats; + doit.doit = tipc_nl_node_reset_link_stats; doit.transcode = tipc_nl_compat_link_reset_stats; return tipc_nl_compat_doit(&doit, msg); case TIPC_CMD_SHOW_NAME_TABLE: diff --git a/net/tipc/node.c b/net/tipc/node.c index 47d5f84..e110ba6 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -42,6 +42,87 @@ #include "bcast.h" #include "discover.h" +/* Out-of-range value for node signature */ +#define INVALID_NODE_SIG 0x10000 + +#define INVALID_BEARER_ID -1 + +/* Flags used to take different actions according to flag type + * TIPC_NOTIFY_NODE_DOWN: notify node is down + * TIPC_NOTIFY_NODE_UP: notify node is up + * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type + */ +enum { + TIPC_NOTIFY_NODE_DOWN = (1 << 3), + TIPC_NOTIFY_NODE_UP = (1 << 4), + TIPC_NOTIFY_LINK_UP = (1 << 6), + TIPC_NOTIFY_LINK_DOWN = (1 << 7) +}; + +struct tipc_link_entry { + struct tipc_link *link; + spinlock_t lock; /* per link */ + u32 mtu; + struct sk_buff_head inputq; + struct tipc_media_addr maddr; +}; + +struct tipc_bclink_entry { + struct tipc_link *link; + struct sk_buff_head inputq1; + struct sk_buff_head arrvq; + struct sk_buff_head inputq2; + struct sk_buff_head namedq; +}; + +/** + * struct tipc_node - TIPC node structure + * @addr: network address of node + * @ref: reference counter to node object + * @lock: rwlock governing access to structure + * @net: the applicable net namespace + * @hash: links to adjacent nodes in unsorted hash chain + * @inputq: pointer to input queue containing messages for msg event + * @namedq: pointer to name table input queue with name table messages + * @active_links: bearer ids of active links, used as index into links[] array + * @links: array containing references to all links to node + * @action_flags: bit mask of different types of node actions + * @state: connectivity state vs peer node + * @sync_point: sequence number where synch/failover is finished + * @list: links to adjacent nodes in sorted list of cluster's nodes + * @working_links: number of working links to node (both active and standby) + * @link_cnt: number of links to node + * @capabilities: bitmap, indicating peer node's functional capabilities + * @signature: node instance identifier + * @link_id: local and remote bearer ids of changing link, if any + * @publ_list: list of publications + * @rcu: rcu struct for tipc_node + */ +struct tipc_node { + u32 addr; + struct kref kref; + rwlock_t lock; + struct net *net; + struct hlist_node hash; + int active_links[2]; + struct tipc_link_entry links[MAX_BEARERS]; + struct tipc_bclink_entry bc_entry; + int action_flags; + struct list_head list; + int state; + u16 sync_point; + int link_cnt; + u16 working_links; + u16 capabilities; + u32 signature; + u32 link_id; + struct list_head publ_list; + struct list_head conn_sks; + unsigned long keepalive_intv; + struct timer_list timer; + struct rcu_head rcu; +}; + /* Node FSM states and events: */ enum { @@ -75,6 +156,9 @@ static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); static void tipc_node_delete(struct tipc_node *node); static void tipc_node_timeout(unsigned long data); static void tipc_node_fsm_evt(struct tipc_node *n, int evt); +static struct tipc_node *tipc_node_find(struct net *net, u32 addr); +static void tipc_node_put(struct tipc_node *node); +static bool tipc_node_is_up(struct tipc_node *n); struct tipc_sock_conn { u32 port; @@ -83,12 +167,54 @@ struct tipc_sock_conn { struct list_head list; }; +static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { + [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_LINK_NAME] = { + .type = NLA_STRING, + .len = TIPC_MAX_LINK_NAME + }, + [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 }, + [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED }, + [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED }, + [TIPC_NLA_LINK_RX] = { .type = NLA_U32 }, + [TIPC_NLA_LINK_TX] = { .type = NLA_U32 } +}; + static const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = { [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC }, [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 }, [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG } }; +static struct tipc_link *node_active_link(struct tipc_node *n, int sel) +{ + int bearer_id = n->active_links[sel & 1]; + + if (unlikely(bearer_id == INVALID_BEARER_ID)) + return NULL; + + return n->links[bearer_id].link; +} + +int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) +{ + struct tipc_node *n; + int bearer_id; + unsigned int mtu = MAX_MSG_SIZE; + + n = tipc_node_find(net, addr); + if (unlikely(!n)) + return mtu; + + bearer_id = n->active_links[sel & 1]; + if (likely(bearer_id != INVALID_BEARER_ID)) + mtu = n->links[bearer_id].mtu; + tipc_node_put(n); + return mtu; +} /* * A trivial power-of-two bitmask technique is used for speed, since this * operation is done for every incoming TIPC packet. The number of hash table @@ -107,7 +233,7 @@ static void tipc_node_kref_release(struct kref *kref) tipc_node_delete(node); } -void tipc_node_put(struct tipc_node *node) +static void tipc_node_put(struct tipc_node *node) { kref_put(&node->kref, tipc_node_kref_release); } @@ -120,7 +246,7 @@ static void tipc_node_get(struct tipc_node *node) /* * tipc_node_find - locate specified node object, if it exists */ -struct tipc_node *tipc_node_find(struct net *net, u32 addr) +static struct tipc_node *tipc_node_find(struct net *net, u32 addr) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; @@ -141,12 +267,12 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr) return NULL; } -void tipc_node_read_lock(struct tipc_node *n) +static void tipc_node_read_lock(struct tipc_node *n) { read_lock_bh(&n->lock); } -void tipc_node_read_unlock(struct tipc_node *n) +static void tipc_node_read_unlock(struct tipc_node *n) { read_unlock_bh(&n->lock); } @@ -588,7 +714,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_sk_rcv(n->net, &le->inputq); } -bool tipc_node_is_up(struct tipc_node *n) +static bool tipc_node_is_up(struct tipc_node *n) { return n->active_links[0] != INVALID_BEARER_ID; } @@ -1465,3 +1591,316 @@ out: return skb->len; } + +/* tipc_link_find_owner - locate owner node of link by link's name + * @net: the applicable net namespace + * @name: pointer to link name string + * @bearer_id: pointer to index in 'node->links' array where the link was found. + * + * Returns pointer to node owning the link, or 0 if no matching link is found. + */ +static struct tipc_node *tipc_link_find_owner(struct net *net, + const char *link_name, + unsigned int *bearer_id) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *l_ptr; + struct tipc_node *n_ptr; + struct tipc_node *found_node = NULL; + int i; + + *bearer_id = 0; + rcu_read_lock(); + list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { + tipc_node_read_lock(n_ptr); + for (i = 0; i < MAX_BEARERS; i++) { + l_ptr = n_ptr->links[i].link; + if (l_ptr && !strcmp(l_ptr->name, link_name)) { + *bearer_id = i; + found_node = n_ptr; + break; + } + } + tipc_node_read_unlock(n_ptr); + if (found_node) + break; + } + rcu_read_unlock(); + + return found_node; +} + +int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) +{ + int err; + int res = 0; + int bearer_id; + char *name; + struct tipc_link *link; + struct tipc_node *node; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + if (strcmp(name, tipc_bclink_name) == 0) + return tipc_nl_bc_link_set(net, attrs); + + node = tipc_link_find_owner(net, name, &bearer_id); + if (!node) + return -EINVAL; + + tipc_node_read_lock(node); + + link = node->links[bearer_id].link; + if (!link) { + res = -EINVAL; + goto out; + } + + if (attrs[TIPC_NLA_LINK_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], + props); + if (err) { + res = err; + goto out; + } + + if (props[TIPC_NLA_PROP_TOL]) { + u32 tol; + + tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + link->tolerance = tol; + tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0); + } + if (props[TIPC_NLA_PROP_PRIO]) { + u32 prio; + + prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + link->priority = prio; + tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio); + } + if (props[TIPC_NLA_PROP_WIN]) { + u32 win; + + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + tipc_link_set_queue_limits(link, win); + } + } + +out: + tipc_node_read_unlock(node); + + return res; +} + +int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct tipc_nl_msg msg; + char *name; + int err; + + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + if (!info->attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); + + msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg.skb) + return -ENOMEM; + + if (strcmp(name, tipc_bclink_name) == 0) { + err = tipc_nl_add_bc_link(net, &msg); + if (err) { + nlmsg_free(msg.skb); + return err; + } + } else { + int bearer_id; + struct tipc_node *node; + struct tipc_link *link; + + node = tipc_link_find_owner(net, name, &bearer_id); + if (!node) + return -EINVAL; + + tipc_node_read_lock(node); + link = node->links[bearer_id].link; + if (!link) { + tipc_node_read_unlock(node); + nlmsg_free(msg.skb); + return -EINVAL; + } + + err = __tipc_nl_add_link(net, &msg, link, 0); + tipc_node_read_unlock(node); + if (err) { + nlmsg_free(msg.skb); + return err; + } + } + + return genlmsg_reply(msg.skb, info); +} + +int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *link_name; + unsigned int bearer_id; + struct tipc_link *link; + struct tipc_node *node; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct tipc_link_entry *le; + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + if (strcmp(link_name, tipc_bclink_name) == 0) { + err = tipc_bclink_reset_stats(net); + if (err) + return err; + return 0; + } + + node = tipc_link_find_owner(net, link_name, &bearer_id); + if (!node) + return -EINVAL; + + le = &node->links[bearer_id]; + tipc_node_read_lock(node); + spin_lock_bh(&le->lock); + link = node->links[bearer_id].link; + if (!link) { + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(node); + return -EINVAL; + } + link_reset_statistics(link); + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(node); + return 0; +} + +/* Caller should hold node lock */ +static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, + struct tipc_node *node, u32 *prev_link) +{ + u32 i; + int err; + + for (i = *prev_link; i < MAX_BEARERS; i++) { + *prev_link = i; + + if (!node->links[i].link) + continue; + + err = __tipc_nl_add_link(net, msg, + node->links[i].link, NLM_F_MULTI); + if (err) + return err; + } + *prev_link = 0; + + return 0; +} + +int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_node *node; + struct tipc_nl_msg msg; + u32 prev_node = cb->args[0]; + u32 prev_link = cb->args[1]; + int done = cb->args[2]; + int err; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + if (prev_node) { + node = tipc_node_find(net, prev_node); + if (!node) { + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the last NLMSG_DONE message + * having the NLM_F_DUMP_INTR flag set. + */ + cb->prev_seq = 1; + goto out; + } + tipc_node_put(node); + + list_for_each_entry_continue_rcu(node, &tn->node_list, + list) { + tipc_node_read_lock(node); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); + tipc_node_read_unlock(node); + if (err) + goto out; + + prev_node = node->addr; + } + } else { + err = tipc_nl_add_bc_link(net, &msg); + if (err) + goto out; + + list_for_each_entry_rcu(node, &tn->node_list, list) { + tipc_node_read_lock(node); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); + tipc_node_read_unlock(node); + if (err) + goto out; + + prev_node = node->addr; + } + } + done = 1; +out: + rcu_read_unlock(); + + cb->args[0] = prev_node; + cb->args[1] = prev_link; + cb->args[2] = done; + + return skb->len; +} diff --git a/net/tipc/node.h b/net/tipc/node.h index 651a1581..1fbed29 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -42,23 +42,8 @@ #include "bearer.h" #include "msg.h" -/* Out-of-range value for node signature */ -#define INVALID_NODE_SIG 0x10000 - #define INVALID_BEARER_ID -1 -/* Flags used to take different actions according to flag type - * TIPC_NOTIFY_NODE_DOWN: notify node is down - * TIPC_NOTIFY_NODE_UP: notify node is up - * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type - */ -enum { - TIPC_NOTIFY_NODE_DOWN = (1 << 3), - TIPC_NOTIFY_NODE_UP = (1 << 4), - TIPC_NOTIFY_LINK_UP = (1 << 6), - TIPC_NOTIFY_LINK_DOWN = (1 << 7) -}; - /* Optional capabilities supported by this code version */ enum { @@ -67,72 +52,6 @@ enum { #define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH -struct tipc_link_entry { - struct tipc_link *link; - spinlock_t lock; /* per-link */ - u32 mtu; - struct sk_buff_head inputq; - struct tipc_media_addr maddr; -}; - -struct tipc_bclink_entry { - struct tipc_link *link; - struct sk_buff_head inputq1; - struct sk_buff_head arrvq; - struct sk_buff_head inputq2; - struct sk_buff_head namedq; -}; - -/** - * struct tipc_node - TIPC node structure - * @addr: network address of node - * @ref: reference counter to node object - * @lock: rwlock governing access to structure - * @net: the applicable net namespace - * @hash: links to adjacent nodes in unsorted hash chain - * @inputq: pointer to input queue containing messages for msg event - * @namedq: pointer to name table input queue with name table messages - * @active_links: bearer ids of active links, used as index into links[] array - * @links: array containing references to all links to node - * @action_flags: bit mask of different types of node actions - * @state: connectivity state vs peer node - * @sync_point: sequence number where synch/failover is finished - * @list: links to adjacent nodes in sorted list of cluster's nodes - * @working_links: number of working links to node (both active and standby) - * @link_cnt: number of links to node - * @capabilities: bitmap, indicating peer node's functional capabilities - * @signature: node instance identifier - * @link_id: local and remote bearer ids of changing link, if any - * @publ_list: list of publications - * @rcu: rcu struct for tipc_node - */ -struct tipc_node { - u32 addr; - struct kref kref; - rwlock_t lock; - struct net *net; - struct hlist_node hash; - int active_links[2]; - struct tipc_link_entry links[MAX_BEARERS]; - struct tipc_bclink_entry bc_entry; - int action_flags; - struct list_head list; - int state; - u16 sync_point; - int link_cnt; - u16 working_links; - u16 capabilities; - u32 signature; - u32 link_id; - struct list_head publ_list; - struct list_head conn_sks; - unsigned long keepalive_intv; - struct timer_list timer; - struct rcu_head rcu; -}; - -struct tipc_node *tipc_node_find(struct net *net, u32 addr); -void tipc_node_put(struct tipc_node *node); void tipc_node_stop(struct net *net); void tipc_node_check_dest(struct net *net, u32 onode, struct tipc_bearer *bearer, @@ -140,13 +59,8 @@ void tipc_node_check_dest(struct net *net, u32 onode, struct tipc_media_addr *maddr, bool *respond, bool *dupl_addr); void tipc_node_delete_links(struct net *net, int bearer_id); -void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); -bool tipc_node_is_up(struct tipc_node *n); int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, char *linkname, size_t len); -void tipc_node_read_lock(struct tipc_node *n); -void tipc_node_read_unlock(struct tipc_node *node); int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector); int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, @@ -156,33 +70,10 @@ void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr); void tipc_node_broadcast(struct net *net, struct sk_buff *skb); int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); +int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); - -static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel) -{ - int bearer_id = n->active_links[sel & 1]; - - if (unlikely(bearer_id == INVALID_BEARER_ID)) - return NULL; - - return n->links[bearer_id].link; -} - -static inline unsigned int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) -{ - struct tipc_node *n; - int bearer_id; - unsigned int mtu = MAX_MSG_SIZE; - - n = tipc_node_find(net, addr); - if (unlikely(!n)) - return mtu; - - bearer_id = n->active_links[sel & 1]; - if (likely(bearer_id != INVALID_BEARER_ID)) - mtu = n->links[bearer_id].mtu; - tipc_node_put(n); - return mtu; -} +int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info); #endif -- cgit v1.1 From 38206d5939068415c413ac253be6f364d06e672f Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:30:46 -0500 Subject: tipc: narrow down interface towards struct tipc_link We move the definition of struct tipc_link from link.h to link.c in order to minimize its exposure to the rest of the code. When needed, we define new functions to make it possible for external entities to access and set data in the link. Apart from the above, there are no functional changes. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 126 +---------------- net/tipc/link.c | 346 ++++++++++++++++++++++++++++++++++++++++++++-- net/tipc/link.h | 170 ++--------------------- net/tipc/netlink.c | 2 +- net/tipc/netlink_compat.c | 4 +- net/tipc/node.c | 108 +++++++-------- net/tipc/node.h | 4 +- 7 files changed, 415 insertions(+), 345 deletions(-) (limited to 'net') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 9dc239d..e401108 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -332,131 +332,15 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l) tipc_sk_rcv(net, inputq); } -static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, - struct tipc_stats *stats) -{ - int i; - struct nlattr *nest; - - struct nla_map { - __u32 key; - __u32 val; - }; - - struct nla_map map[] = { - {TIPC_NLA_STATS_RX_INFO, stats->recv_info}, - {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments}, - {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented}, - {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles}, - {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled}, - {TIPC_NLA_STATS_TX_INFO, stats->sent_info}, - {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments}, - {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented}, - {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles}, - {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled}, - {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks}, - {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv}, - {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks}, - {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks}, - {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted}, - {TIPC_NLA_STATS_DUPLICATES, stats->duplicates}, - {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs}, - {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz}, - {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ? - (stats->accu_queue_sz / stats->queue_sz_counts) : 0} - }; - - nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS); - if (!nest) - return -EMSGSIZE; - - for (i = 0; i < ARRAY_SIZE(map); i++) - if (nla_put_u32(skb, map[i].key, map[i].val)) - goto msg_full; - - nla_nest_end(skb, nest); - - return 0; -msg_full: - nla_nest_cancel(skb, nest); - - return -EMSGSIZE; -} - -int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) -{ - int err; - void *hdr; - struct nlattr *attrs; - struct nlattr *prop; - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; - - if (!bcl) - return 0; - - tipc_bcast_lock(net); - - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, - NLM_F_MULTI, TIPC_NL_LINK_GET); - if (!hdr) - return -EMSGSIZE; - - attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); - if (!attrs) - goto msg_full; - - /* The broadcast link is always up */ - if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) - goto attr_msg_full; - - if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST)) - goto attr_msg_full; - if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) - goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->rcv_nxt)) - goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->snd_nxt)) - goto attr_msg_full; - - prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); - if (!prop) - goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) - goto prop_msg_full; - nla_nest_end(msg->skb, prop); - - err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); - if (err) - goto attr_msg_full; - - tipc_bcast_unlock(net); - nla_nest_end(msg->skb, attrs); - genlmsg_end(msg->skb, hdr); - - return 0; - -prop_msg_full: - nla_nest_cancel(msg->skb, prop); -attr_msg_full: - nla_nest_cancel(msg->skb, attrs); -msg_full: - tipc_bcast_unlock(net); - genlmsg_cancel(msg->skb, hdr); - - return -EMSGSIZE; -} - int tipc_bclink_reset_stats(struct net *net) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; + struct tipc_link *l = tipc_bc_sndlink(net); - if (!bcl) + if (!l) return -ENOPROTOOPT; tipc_bcast_lock(net); - memset(&bcl->stats, 0, sizeof(bcl->stats)); + tipc_link_reset_stats(l); tipc_bcast_unlock(net); return 0; } @@ -530,9 +414,7 @@ enomem: void tipc_bcast_reinit(struct net *net) { - struct tipc_bc_base *b = tipc_bc_base(net); - - msg_set_prevnode(b->link->pmsg, tipc_own_addr(net)); + tipc_link_reinit(tipc_bc_sndlink(net), tipc_own_addr(net)); } void tipc_bcast_stop(struct net *net) diff --git a/net/tipc/link.c b/net/tipc/link.c index c513a80..4380eb1 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -45,6 +45,151 @@ #include +struct tipc_stats { + u32 sent_info; /* used in counting # sent packets */ + u32 recv_info; /* used in counting # recv'd packets */ + u32 sent_states; + u32 recv_states; + u32 sent_probes; + u32 recv_probes; + u32 sent_nacks; + u32 recv_nacks; + u32 sent_acks; + u32 sent_bundled; + u32 sent_bundles; + u32 recv_bundled; + u32 recv_bundles; + u32 retransmitted; + u32 sent_fragmented; + u32 sent_fragments; + u32 recv_fragmented; + u32 recv_fragments; + u32 link_congs; /* # port sends blocked by congestion */ + u32 deferred_recv; + u32 duplicates; + u32 max_queue_sz; /* send queue size high water mark */ + u32 accu_queue_sz; /* used for send queue size profiling */ + u32 queue_sz_counts; /* used for send queue size profiling */ + u32 msg_length_counts; /* used for message length profiling */ + u32 msg_lengths_total; /* used for message length profiling */ + u32 msg_length_profile[7]; /* used for msg. length profiling */ +}; + +/** + * struct tipc_link - TIPC link data structure + * @addr: network address of link's peer node + * @name: link name character string + * @media_addr: media address to use when sending messages over link + * @timer: link timer + * @net: pointer to namespace struct + * @refcnt: reference counter for permanent references (owner node & timer) + * @peer_session: link session # being used by peer end of link + * @peer_bearer_id: bearer id used by link's peer endpoint + * @bearer_id: local bearer id used by link + * @tolerance: minimum link continuity loss needed to reset link [in ms] + * @keepalive_intv: link keepalive timer interval + * @abort_limit: # of unacknowledged continuity probes needed to reset link + * @state: current state of link FSM + * @peer_caps: bitmap describing capabilities of peer node + * @silent_intv_cnt: # of timer intervals without any reception from peer + * @proto_msg: template for control messages generated by link + * @pmsg: convenience pointer to "proto_msg" field + * @priority: current link priority + * @net_plane: current link network plane ('A' through 'H') + * @backlog_limit: backlog queue congestion thresholds (indexed by importance) + * @exp_msg_count: # of tunnelled messages expected during link changeover + * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset + * @mtu: current maximum packet size for this link + * @advertised_mtu: advertised own mtu when link is being established + * @transmitq: queue for sent, non-acked messages + * @backlogq: queue for messages waiting to be sent + * @snt_nxt: next sequence number to use for outbound messages + * @last_retransmitted: sequence number of most recently retransmitted message + * @stale_count: # of identical retransmit requests made by peer + * @ackers: # of peers that needs to ack each packet before it can be released + * @acked: # last packet acked by a certain peer. Used for broadcast. + * @rcv_nxt: next sequence number to expect for inbound messages + * @deferred_queue: deferred queue saved OOS b'cast message received from node + * @unacked_window: # of inbound messages rx'd without ack'ing back to peer + * @inputq: buffer queue for messages to be delivered upwards + * @namedq: buffer queue for name table messages to be delivered upwards + * @next_out: ptr to first unsent outbound message in queue + * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate + * @long_msg_seq_no: next identifier to use for outbound fragmented messages + * @reasm_buf: head of partially reassembled inbound message fragments + * @bc_rcvr: marks that this is a broadcast receiver link + * @stats: collects statistics regarding link activity + */ +struct tipc_link { + u32 addr; + char name[TIPC_MAX_LINK_NAME]; + struct tipc_media_addr *media_addr; + struct net *net; + + /* Management and link supervision data */ + u32 peer_session; + u32 peer_bearer_id; + u32 bearer_id; + u32 tolerance; + unsigned long keepalive_intv; + u32 abort_limit; + u32 state; + u16 peer_caps; + bool active; + u32 silent_intv_cnt; + struct { + unchar hdr[INT_H_SIZE]; + unchar body[TIPC_MAX_IF_NAME]; + } proto_msg; + struct tipc_msg *pmsg; + u32 priority; + char net_plane; + + /* Failover/synch */ + u16 drop_point; + struct sk_buff *failover_reasm_skb; + + /* Max packet negotiation */ + u16 mtu; + u16 advertised_mtu; + + /* Sending */ + struct sk_buff_head transmq; + struct sk_buff_head backlogq; + struct { + u16 len; + u16 limit; + } backlog[5]; + u16 snd_nxt; + u16 last_retransm; + u16 window; + u32 stale_count; + + /* Reception */ + u16 rcv_nxt; + u32 rcv_unacked; + struct sk_buff_head deferdq; + struct sk_buff_head *inputq; + struct sk_buff_head *namedq; + + /* Congestion handling */ + struct sk_buff_head wakeupq; + + /* Fragmentation/reassembly */ + struct sk_buff *reasm_buf; + + /* Broadcast */ + u16 ackers; + u16 acked; + struct tipc_link *bc_rcvlink; + struct tipc_link *bc_sndlink; + int nack_state; + bool bc_peer_is_up; + + /* Statistics */ + struct tipc_stats stats; +}; + /* * Error message prefixes */ @@ -165,6 +310,36 @@ void tipc_link_set_active(struct tipc_link *l, bool active) l->active = active; } +u32 tipc_link_id(struct tipc_link *l) +{ + return l->peer_bearer_id << 16 | l->bearer_id; +} + +int tipc_link_window(struct tipc_link *l) +{ + return l->window; +} + +int tipc_link_prio(struct tipc_link *l) +{ + return l->priority; +} + +unsigned long tipc_link_tolerance(struct tipc_link *l) +{ + return l->tolerance; +} + +struct sk_buff_head *tipc_link_inputq(struct tipc_link *l) +{ + return l->inputq; +} + +char tipc_link_plane(struct tipc_link *l) +{ + return l->net_plane; +} + void tipc_link_add_bc_peer(struct tipc_link *snd_l, struct tipc_link *uc_l, struct sk_buff_head *xmitq) @@ -207,11 +382,31 @@ int tipc_link_mtu(struct tipc_link *l) return l->mtu; } +u16 tipc_link_rcv_nxt(struct tipc_link *l) +{ + return l->rcv_nxt; +} + +u16 tipc_link_acked(struct tipc_link *l) +{ + return l->acked; +} + +char *tipc_link_name(struct tipc_link *l) +{ + return l->name; +} + static u32 link_own_addr(struct tipc_link *l) { return msg_prevnode(l->pmsg); } +void tipc_link_reinit(struct tipc_link *l, u32 addr) +{ + msg_set_prevnode(l->pmsg, addr); +} + /** * tipc_link_create - create a new link * @n: pointer to associated node @@ -674,7 +869,7 @@ void tipc_link_reset(struct tipc_link *l) l->stats.recv_info = 0; l->stale_count = 0; l->bc_peer_is_up = false; - link_reset_statistics(l); + tipc_link_reset_stats(l); } /** @@ -1067,8 +1262,9 @@ drop: /* * Send protocol message to the other endpoint. */ -void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg, - u32 gap, u32 tolerance, u32 priority) +static void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, + int probe_msg, u32 gap, u32 tolerance, + u32 priority) { struct sk_buff *skb = NULL; struct sk_buff_head xmitq; @@ -1510,14 +1706,16 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) } /** - * link_reset_statistics - reset link statistics + * link_reset_stats - reset link statistics * @l_ptr: pointer to link */ -void link_reset_statistics(struct tipc_link *l_ptr) +void tipc_link_reset_stats(struct tipc_link *l) { - memset(&l_ptr->stats, 0, sizeof(l_ptr->stats)); - l_ptr->stats.sent_info = l_ptr->snd_nxt; - l_ptr->stats.recv_info = l_ptr->rcv_nxt; + memset(&l->stats, 0, sizeof(l->stats)); + if (!link_is_bc_sndlink(l)) { + l->stats.sent_info = l->snd_nxt; + l->stats.recv_info = l->rcv_nxt; + } } static void link_print(struct tipc_link *l, const char *str) @@ -1705,3 +1903,135 @@ msg_full: return -EMSGSIZE; } + +static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, + struct tipc_stats *stats) +{ + int i; + struct nlattr *nest; + + struct nla_map { + __u32 key; + __u32 val; + }; + + struct nla_map map[] = { + {TIPC_NLA_STATS_RX_INFO, stats->recv_info}, + {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments}, + {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented}, + {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles}, + {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled}, + {TIPC_NLA_STATS_TX_INFO, stats->sent_info}, + {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments}, + {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented}, + {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles}, + {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled}, + {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks}, + {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv}, + {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks}, + {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks}, + {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted}, + {TIPC_NLA_STATS_DUPLICATES, stats->duplicates}, + {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs}, + {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz}, + {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ? + (stats->accu_queue_sz / stats->queue_sz_counts) : 0} + }; + + nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS); + if (!nest) + return -EMSGSIZE; + + for (i = 0; i < ARRAY_SIZE(map); i++) + if (nla_put_u32(skb, map[i].key, map[i].val)) + goto msg_full; + + nla_nest_end(skb, nest); + + return 0; +msg_full: + nla_nest_cancel(skb, nest); + + return -EMSGSIZE; +} + +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) +{ + int err; + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + + if (!bcl) + return 0; + + tipc_bcast_lock(net); + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_LINK_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + if (!attrs) + goto msg_full; + + /* The broadcast link is always up */ + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) + goto attr_msg_full; + + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST)) + goto attr_msg_full; + if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, bcl->rcv_nxt)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, bcl->snd_nxt)) + goto attr_msg_full; + + prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); + if (!prop) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) + goto prop_msg_full; + nla_nest_end(msg->skb, prop); + + err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); + if (err) + goto attr_msg_full; + + tipc_bcast_unlock(net); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + tipc_bcast_unlock(net); + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +void tipc_link_set_tolerance(struct tipc_link *l, u32 tol) +{ + l->tolerance = tol; + tipc_link_proto_xmit(l, STATE_MSG, 0, 0, tol, 0); +} + +void tipc_link_set_prio(struct tipc_link *l, u32 prio) +{ + l->priority = prio; + tipc_link_proto_xmit(l, STATE_MSG, 0, 0, 0, prio); +} + +void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) +{ + l->abort_limit = limit; +} diff --git a/net/tipc/link.h b/net/tipc/link.h index a7ee806..616fc80 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -45,10 +45,6 @@ */ #define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ -/* Out-of-range value for link sequence numbers - */ -#define INVALID_LINK_SEQ 0x10000 - /* Link FSM events: */ enum { @@ -75,151 +71,6 @@ enum { */ #define MAX_PKT_DEFAULT 1500 -struct tipc_stats { - u32 sent_info; /* used in counting # sent packets */ - u32 recv_info; /* used in counting # recv'd packets */ - u32 sent_states; - u32 recv_states; - u32 sent_probes; - u32 recv_probes; - u32 sent_nacks; - u32 recv_nacks; - u32 sent_acks; - u32 sent_bundled; - u32 sent_bundles; - u32 recv_bundled; - u32 recv_bundles; - u32 retransmitted; - u32 sent_fragmented; - u32 sent_fragments; - u32 recv_fragmented; - u32 recv_fragments; - u32 link_congs; /* # port sends blocked by congestion */ - u32 deferred_recv; - u32 duplicates; - u32 max_queue_sz; /* send queue size high water mark */ - u32 accu_queue_sz; /* used for send queue size profiling */ - u32 queue_sz_counts; /* used for send queue size profiling */ - u32 msg_length_counts; /* used for message length profiling */ - u32 msg_lengths_total; /* used for message length profiling */ - u32 msg_length_profile[7]; /* used for msg. length profiling */ -}; - -/** - * struct tipc_link - TIPC link data structure - * @addr: network address of link's peer node - * @name: link name character string - * @media_addr: media address to use when sending messages over link - * @timer: link timer - * @net: pointer to namespace struct - * @refcnt: reference counter for permanent references (owner node & timer) - * @peer_session: link session # being used by peer end of link - * @peer_bearer_id: bearer id used by link's peer endpoint - * @bearer_id: local bearer id used by link - * @tolerance: minimum link continuity loss needed to reset link [in ms] - * @keepalive_intv: link keepalive timer interval - * @abort_limit: # of unacknowledged continuity probes needed to reset link - * @state: current state of link FSM - * @peer_caps: bitmap describing capabilities of peer node - * @silent_intv_cnt: # of timer intervals without any reception from peer - * @proto_msg: template for control messages generated by link - * @pmsg: convenience pointer to "proto_msg" field - * @priority: current link priority - * @net_plane: current link network plane ('A' through 'H') - * @backlog_limit: backlog queue congestion thresholds (indexed by importance) - * @exp_msg_count: # of tunnelled messages expected during link changeover - * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset - * @mtu: current maximum packet size for this link - * @advertised_mtu: advertised own mtu when link is being established - * @transmitq: queue for sent, non-acked messages - * @backlogq: queue for messages waiting to be sent - * @snt_nxt: next sequence number to use for outbound messages - * @last_retransmitted: sequence number of most recently retransmitted message - * @stale_count: # of identical retransmit requests made by peer - * @ackers: # of peers that needs to ack each packet before it can be released - * @acked: # last packet acked by a certain peer. Used for broadcast. - * @rcv_nxt: next sequence number to expect for inbound messages - * @deferred_queue: deferred queue saved OOS b'cast message received from node - * @unacked_window: # of inbound messages rx'd without ack'ing back to peer - * @inputq: buffer queue for messages to be delivered upwards - * @namedq: buffer queue for name table messages to be delivered upwards - * @next_out: ptr to first unsent outbound message in queue - * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate - * @long_msg_seq_no: next identifier to use for outbound fragmented messages - * @reasm_buf: head of partially reassembled inbound message fragments - * @bc_rcvr: marks that this is a broadcast receiver link - * @stats: collects statistics regarding link activity - */ -struct tipc_link { - u32 addr; - char name[TIPC_MAX_LINK_NAME]; - struct tipc_media_addr *media_addr; - struct net *net; - - /* Management and link supervision data */ - u32 peer_session; - u32 peer_bearer_id; - u32 bearer_id; - u32 tolerance; - unsigned long keepalive_intv; - u32 abort_limit; - u32 state; - u16 peer_caps; - bool active; - u32 silent_intv_cnt; - struct { - unchar hdr[INT_H_SIZE]; - unchar body[TIPC_MAX_IF_NAME]; - } proto_msg; - struct tipc_msg *pmsg; - u32 priority; - char net_plane; - - /* Failover/synch */ - u16 drop_point; - struct sk_buff *failover_reasm_skb; - - /* Max packet negotiation */ - u16 mtu; - u16 advertised_mtu; - - /* Sending */ - struct sk_buff_head transmq; - struct sk_buff_head backlogq; - struct { - u16 len; - u16 limit; - } backlog[5]; - u16 snd_nxt; - u16 last_retransm; - u16 window; - u32 stale_count; - - /* Reception */ - u16 rcv_nxt; - u32 rcv_unacked; - struct sk_buff_head deferdq; - struct sk_buff_head *inputq; - struct sk_buff_head *namedq; - - /* Congestion handling */ - struct sk_buff_head wakeupq; - - /* Fragmentation/reassembly */ - struct sk_buff *reasm_buf; - - /* Broadcast */ - u16 ackers; - u16 acked; - struct tipc_link *bc_rcvlink; - struct tipc_link *bc_sndlink; - int nack_state; - bool bc_peer_is_up; - - /* Statistics */ - struct tipc_stats stats; -}; - bool tipc_link_create(struct net *net, char *if_name, int bearer_id, int tolerance, char net_plane, u32 mtu, int priority, int window, u32 session, u32 ownnode, u32 peer, @@ -235,11 +86,11 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, struct sk_buff_head *namedq, struct tipc_link *bc_sndlink, struct tipc_link **link); +void tipc_link_reinit(struct tipc_link *l, u32 addr); void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq); void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_fsm_evt(struct tipc_link *l, int evt); -void tipc_link_reset_fragments(struct tipc_link *l_ptr); bool tipc_link_is_up(struct tipc_link *l); bool tipc_link_peer_is_down(struct tipc_link *l); bool tipc_link_is_reset(struct tipc_link *l); @@ -249,15 +100,24 @@ bool tipc_link_is_failingover(struct tipc_link *l); bool tipc_link_is_blocked(struct tipc_link *l); void tipc_link_set_active(struct tipc_link *l, bool active); void tipc_link_reset(struct tipc_link *l_ptr); -void link_reset_statistics(struct tipc_link *l); -int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, +void tipc_link_reset_stats(struct tipc_link *l); +int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, struct sk_buff_head *xmitq); -void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg, - u32 gap, u32 tolerance, u32 priority); +struct sk_buff_head *tipc_link_inputq(struct tipc_link *l); +u16 tipc_link_rcv_nxt(struct tipc_link *l); +u16 tipc_link_acked(struct tipc_link *l); +u32 tipc_link_id(struct tipc_link *l); +char *tipc_link_name(struct tipc_link *l); +char tipc_link_plane(struct tipc_link *l); +int tipc_link_prio(struct tipc_link *l); +int tipc_link_window(struct tipc_link *l); +unsigned long tipc_link_tolerance(struct tipc_link *l); +void tipc_link_set_tolerance(struct tipc_link *l, u32 tol); +void tipc_link_set_prio(struct tipc_link *l, u32 prio); +void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit); void tipc_link_set_queue_limits(struct tipc_link *l, u32 window); int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, struct tipc_link *link, int nlflags); -int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 29dfcc9..8975b01 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -102,7 +102,7 @@ static const struct genl_ops tipc_genl_v2_ops[] = { { .cmd = TIPC_NL_LINK_GET, .doit = tipc_nl_node_get_link, - .dumpit = tipc_nl_link_dump, + .dumpit = tipc_nl_node_dump_link, .policy = tipc_nl_policy, }, { diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index acda1ce..2c016fd 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1023,13 +1023,13 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg) msg->req_type = TIPC_TLV_LINK_NAME; msg->rep_size = ULTRA_STRING_MAX_LEN; msg->rep_type = TIPC_TLV_ULTRA_STRING; - dump.dumpit = tipc_nl_link_dump; + dump.dumpit = tipc_nl_node_dump_link; dump.format = tipc_nl_compat_link_stat_dump; return tipc_nl_compat_dumpit(&dump, msg); case TIPC_CMD_GET_LINKS: msg->req_type = TIPC_TLV_NET_ADDR; msg->rep_size = ULTRA_STRING_MAX_LEN; - dump.dumpit = tipc_nl_link_dump; + dump.dumpit = tipc_nl_node_dump_link; dump.format = tipc_nl_compat_link_dump; return tipc_nl_compat_dumpit(&dump, msg); case TIPC_CMD_SET_LINK_TOL: diff --git a/net/tipc/node.c b/net/tipc/node.c index e110ba6..82c05e9 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -42,11 +42,8 @@ #include "bcast.h" #include "discover.h" -/* Out-of-range value for node signature */ #define INVALID_NODE_SIG 0x10000 -#define INVALID_BEARER_ID -1 - /* Flags used to take different actions according to flag type * TIPC_NOTIFY_NODE_DOWN: notify node is down * TIPC_NOTIFY_NODE_UP: notify node is up @@ -360,7 +357,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) n_ptr->active_links[0] = INVALID_BEARER_ID; n_ptr->active_links[1] = INVALID_BEARER_ID; if (!tipc_link_bc_create(net, tipc_own_addr(net), n_ptr->addr, - U16_MAX, tipc_bc_sndlink(net)->window, + U16_MAX, + tipc_link_window(tipc_bc_sndlink(net)), n_ptr->capabilities, &n_ptr->bc_entry.inputq1, &n_ptr->bc_entry.namedq, @@ -381,7 +379,7 @@ exit: static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) { - unsigned long tol = l->tolerance; + unsigned long tol = tipc_link_tolerance(l); unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4; unsigned long keepalive_intv = msecs_to_jiffies(intv); @@ -390,7 +388,7 @@ static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) n->keepalive_intv = keepalive_intv; /* Ensure link's abort limit corresponds to current interval */ - l->abort_limit = l->tolerance / jiffies_to_msecs(n->keepalive_intv); + tipc_link_set_abort_limit(l, tol / jiffies_to_msecs(n->keepalive_intv)); } static void tipc_node_delete(struct tipc_node *node) @@ -559,16 +557,16 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, n->working_links++; n->action_flags |= TIPC_NOTIFY_LINK_UP; - n->link_id = nl->peer_bearer_id << 16 | bearer_id; + n->link_id = tipc_link_id(nl); /* Leave room for tunnel header when returning 'mtu' to users: */ - n->links[bearer_id].mtu = nl->mtu - INT_H_SIZE; + n->links[bearer_id].mtu = tipc_link_mtu(nl) - INT_H_SIZE; tipc_bearer_add_dest(n->net, bearer_id, n->addr); tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id); pr_debug("Established link <%s> on network plane %c\n", - nl->name, nl->net_plane); + tipc_link_name(nl), tipc_link_plane(nl)); /* First link? => give it both slots */ if (!ol) { @@ -581,17 +579,17 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, } /* Second link => redistribute slots */ - if (nl->priority > ol->priority) { - pr_debug("Old link <%s> becomes standby\n", ol->name); + if (tipc_link_prio(nl) > tipc_link_prio(ol)) { + pr_debug("Old link <%s> becomes standby\n", tipc_link_name(ol)); *slot0 = bearer_id; *slot1 = bearer_id; tipc_link_set_active(nl, true); tipc_link_set_active(ol, false); - } else if (nl->priority == ol->priority) { + } else if (tipc_link_prio(nl) == tipc_link_prio(ol)) { tipc_link_set_active(nl, true); *slot1 = bearer_id; } else { - pr_debug("New link <%s> is standby\n", nl->name); + pr_debug("New link <%s> is standby\n", tipc_link_name(nl)); } /* Prepare synchronization with first link */ @@ -621,7 +619,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, struct tipc_link_entry *le = &n->links[*bearer_id]; int *slot0 = &n->active_links[0]; int *slot1 = &n->active_links[1]; - int i, highest = 0; + int i, highest = 0, prio; struct tipc_link *l, *_l, *tnl; l = n->links[*bearer_id].link; @@ -630,12 +628,12 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, n->working_links--; n->action_flags |= TIPC_NOTIFY_LINK_DOWN; - n->link_id = l->peer_bearer_id << 16 | *bearer_id; + n->link_id = tipc_link_id(l); tipc_bearer_remove_dest(n->net, *bearer_id, n->addr); pr_debug("Lost link <%s> on network plane %c\n", - l->name, l->net_plane); + tipc_link_name(l), tipc_link_plane(l)); /* Select new active link if any available */ *slot0 = INVALID_BEARER_ID; @@ -646,10 +644,11 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, continue; if (_l == l) continue; - if (_l->priority < highest) + prio = tipc_link_prio(_l); + if (prio < highest) continue; - if (_l->priority > highest) { - highest = _l->priority; + if (prio > highest) { + highest = prio; *slot0 = i; *slot1 = i; continue; @@ -672,17 +671,17 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id); /* There is still a working link => initiate failover */ - tnl = node_active_link(n, 0); + *bearer_id = n->active_links[0]; + tnl = n->links[*bearer_id].link; tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); - n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1); + n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1); tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq); tipc_link_reset(l); tipc_link_fsm_evt(l, LINK_RESET_EVT); tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); - *maddr = &n->links[tnl->bearer_id].maddr; - *bearer_id = tnl->bearer_id; + *maddr = &n->links[*bearer_id].maddr; } static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) @@ -1117,7 +1116,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, tipc_node_read_lock(node); link = node->links[bearer_id].link; if (link) { - strncpy(linkname, link->name, len); + strncpy(linkname, tipc_link_name(link), len); err = 0; } exit: @@ -1328,25 +1327,25 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, u16 oseqno = msg_seqno(hdr); u16 iseqno = msg_seqno(msg_get_wrapped(hdr)); u16 exp_pkts = msg_msgcnt(hdr); - u16 rcv_nxt, syncpt, dlv_nxt; + u16 rcv_nxt, syncpt, dlv_nxt, inputq_len; int state = n->state; struct tipc_link *l, *tnl, *pl = NULL; struct tipc_media_addr *maddr; - int i, pb_id; + int pb_id; l = n->links[bearer_id].link; if (!l) return false; - rcv_nxt = l->rcv_nxt; + rcv_nxt = tipc_link_rcv_nxt(l); if (likely((state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) return true; /* Find parallel link, if any */ - for (i = 0; i < MAX_BEARERS; i++) { - if ((i != bearer_id) && n->links[i].link) { - pl = n->links[i].link; + for (pb_id = 0; pb_id < MAX_BEARERS; pb_id++) { + if ((pb_id != bearer_id) && n->links[pb_id].link) { + pl = n->links[pb_id].link; break; } } @@ -1378,9 +1377,9 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) { syncpt = oseqno + exp_pkts - 1; if (pl && tipc_link_is_up(pl)) { - pb_id = pl->bearer_id; __tipc_node_link_down(n, &pb_id, xmitq, &maddr); - tipc_skb_queue_splice_tail_init(pl->inputq, l->inputq); + tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl), + tipc_link_inputq(l)); } /* If pkts arrive out of order, use lowest calculated syncpt */ if (less(syncpt, n->sync_point)) @@ -1423,7 +1422,8 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, tnl = pl; pl = l; } - dlv_nxt = pl->rcv_nxt - mod(skb_queue_len(pl->inputq)); + inputq_len = skb_queue_len(tipc_link_inputq(pl)); + dlv_nxt = tipc_link_rcv_nxt(pl) - inputq_len; if (more(dlv_nxt, n->sync_point)) { tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); @@ -1483,7 +1483,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) /* Ensure broadcast reception is in synch with peer's send state */ if (unlikely(usr == LINK_PROTOCOL)) tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr); - else if (unlikely(n->bc_entry.link->acked != bc_ack)) + else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack); /* Receive packet directly if conditions permit */ @@ -1592,36 +1592,36 @@ out: return skb->len; } -/* tipc_link_find_owner - locate owner node of link by link's name +/* tipc_node_find_by_name - locate owner node of link by link's name * @net: the applicable net namespace * @name: pointer to link name string * @bearer_id: pointer to index in 'node->links' array where the link was found. * * Returns pointer to node owning the link, or 0 if no matching link is found. */ -static struct tipc_node *tipc_link_find_owner(struct net *net, - const char *link_name, - unsigned int *bearer_id) +static struct tipc_node *tipc_node_find_by_name(struct net *net, + const char *link_name, + unsigned int *bearer_id) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *l_ptr; - struct tipc_node *n_ptr; + struct tipc_link *l; + struct tipc_node *n; struct tipc_node *found_node = NULL; int i; *bearer_id = 0; rcu_read_lock(); - list_for_each_entry_rcu(n_ptr, &tn->node_list, list) { - tipc_node_read_lock(n_ptr); + list_for_each_entry_rcu(n, &tn->node_list, list) { + tipc_node_read_lock(n); for (i = 0; i < MAX_BEARERS; i++) { - l_ptr = n_ptr->links[i].link; - if (l_ptr && !strcmp(l_ptr->name, link_name)) { + l = n->links[i].link; + if (l && !strcmp(tipc_link_name(l), link_name)) { *bearer_id = i; - found_node = n_ptr; + found_node = n; break; } } - tipc_node_read_unlock(n_ptr); + tipc_node_read_unlock(n); if (found_node) break; } @@ -1658,7 +1658,7 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) if (strcmp(name, tipc_bclink_name) == 0) return tipc_nl_bc_link_set(net, attrs); - node = tipc_link_find_owner(net, name, &bearer_id); + node = tipc_node_find_by_name(net, name, &bearer_id); if (!node) return -EINVAL; @@ -1684,15 +1684,13 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) u32 tol; tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); - link->tolerance = tol; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0); + tipc_link_set_tolerance(link, tol); } if (props[TIPC_NLA_PROP_PRIO]) { u32 prio; prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); - link->priority = prio; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio); + tipc_link_set_prio(link, prio); } if (props[TIPC_NLA_PROP_WIN]) { u32 win; @@ -1737,7 +1735,7 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) struct tipc_node *node; struct tipc_link *link; - node = tipc_link_find_owner(net, name, &bearer_id); + node = tipc_node_find_by_name(net, name, &bearer_id); if (!node) return -EINVAL; @@ -1792,7 +1790,7 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) return 0; } - node = tipc_link_find_owner(net, link_name, &bearer_id); + node = tipc_node_find_by_name(net, link_name, &bearer_id); if (!node) return -EINVAL; @@ -1805,7 +1803,7 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) tipc_node_read_unlock(node); return -EINVAL; } - link_reset_statistics(link); + tipc_link_reset_stats(link); spin_unlock_bh(&le->lock); tipc_node_read_unlock(node); return 0; @@ -1834,7 +1832,7 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, return 0; } -int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) +int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); diff --git a/net/tipc/node.h b/net/tipc/node.h index 1fbed29..f39d9d0 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -42,8 +42,6 @@ #include "bearer.h" #include "msg.h" -#define INVALID_BEARER_ID -1 - /* Optional capabilities supported by this code version */ enum { @@ -51,6 +49,7 @@ enum { }; #define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH +#define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); void tipc_node_check_dest(struct net *net, u32 onode, @@ -72,6 +71,7 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info); -- cgit v1.1 From 1a90632da8c17a27e0c93538ee987764adee43a5 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 19 Nov 2015 14:30:47 -0500 Subject: tipc: eliminate remnants of hungarian notation The number of variables with Hungarian notation (l_ptr, n_ptr etc.) has been significantly reduced over the last couple of years. We now root out the last traces of this practice. There are no functional changes in this commit. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bearer.c | 140 ++++++++++++++++++++++++++-------------------------- net/tipc/bearer.h | 8 +-- net/tipc/discover.c | 38 +++++++------- net/tipc/link.c | 4 +- net/tipc/link.h | 2 +- net/tipc/node.c | 74 +++++++++++++-------------- 6 files changed, 133 insertions(+), 133 deletions(-) (limited to 'net') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 648f2a6..802ffad 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -71,7 +71,7 @@ static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = { [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED } }; -static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr); +static void bearer_disable(struct net *net, struct tipc_bearer *b); /** * tipc_media_find - locates specified media object by name @@ -107,13 +107,13 @@ static struct tipc_media *media_find_id(u8 type) void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) { char addr_str[MAX_ADDR_STR]; - struct tipc_media *m_ptr; + struct tipc_media *m; int ret; - m_ptr = media_find_id(a->media_id); + m = media_find_id(a->media_id); - if (m_ptr && !m_ptr->addr2str(a, addr_str, sizeof(addr_str))) - ret = scnprintf(buf, len, "%s(%s)", m_ptr->name, addr_str); + if (m && !m->addr2str(a, addr_str, sizeof(addr_str))) + ret = scnprintf(buf, len, "%s(%s)", m->name, addr_str); else { u32 i; @@ -175,13 +175,13 @@ static int bearer_name_validate(const char *name, struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = rtnl_dereference(tn->bearer_list[i]); - if (b_ptr && (!strcmp(b_ptr->name, name))) - return b_ptr; + b = rtnl_dereference(tn->bearer_list[i]); + if (b && (!strcmp(b->name, name))) + return b; } return NULL; } @@ -189,24 +189,24 @@ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (b_ptr) - tipc_disc_add_dest(b_ptr->link_req); + b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (b) + tipc_disc_add_dest(b->link_req); rcu_read_unlock(); } void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (b_ptr) - tipc_disc_remove_dest(b_ptr->link_req); + b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (b) + tipc_disc_remove_dest(b->link_req); rcu_read_unlock(); } @@ -218,8 +218,8 @@ static int tipc_enable_bearer(struct net *net, const char *name, struct nlattr *attr[]) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; - struct tipc_media *m_ptr; + struct tipc_bearer *b; + struct tipc_media *m; struct tipc_bearer_names b_names; char addr_string[16]; u32 bearer_id; @@ -255,31 +255,31 @@ static int tipc_enable_bearer(struct net *net, const char *name, return -EINVAL; } - m_ptr = tipc_media_find(b_names.media_name); - if (!m_ptr) { + m = tipc_media_find(b_names.media_name); + if (!m) { pr_warn("Bearer <%s> rejected, media <%s> not registered\n", name, b_names.media_name); return -EINVAL; } if (priority == TIPC_MEDIA_LINK_PRI) - priority = m_ptr->priority; + priority = m->priority; restart: bearer_id = MAX_BEARERS; with_this_prio = 1; for (i = MAX_BEARERS; i-- != 0; ) { - b_ptr = rtnl_dereference(tn->bearer_list[i]); - if (!b_ptr) { + b = rtnl_dereference(tn->bearer_list[i]); + if (!b) { bearer_id = i; continue; } - if (!strcmp(name, b_ptr->name)) { + if (!strcmp(name, b->name)) { pr_warn("Bearer <%s> rejected, already enabled\n", name); return -EINVAL; } - if ((b_ptr->priority == priority) && + if ((b->priority == priority) && (++with_this_prio > 2)) { if (priority-- == 0) { pr_warn("Bearer <%s> rejected, duplicate priority\n", @@ -297,35 +297,35 @@ restart: return -EINVAL; } - b_ptr = kzalloc(sizeof(*b_ptr), GFP_ATOMIC); - if (!b_ptr) + b = kzalloc(sizeof(*b), GFP_ATOMIC); + if (!b) return -ENOMEM; - strcpy(b_ptr->name, name); - b_ptr->media = m_ptr; - res = m_ptr->enable_media(net, b_ptr, attr); + strcpy(b->name, name); + b->media = m; + res = m->enable_media(net, b, attr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); return -EINVAL; } - b_ptr->identity = bearer_id; - b_ptr->tolerance = m_ptr->tolerance; - b_ptr->window = m_ptr->window; - b_ptr->domain = disc_domain; - b_ptr->net_plane = bearer_id + 'A'; - b_ptr->priority = priority; + b->identity = bearer_id; + b->tolerance = m->tolerance; + b->window = m->window; + b->domain = disc_domain; + b->net_plane = bearer_id + 'A'; + b->priority = priority; - res = tipc_disc_create(net, b_ptr, &b_ptr->bcast_addr); + res = tipc_disc_create(net, b, &b->bcast_addr); if (res) { - bearer_disable(net, b_ptr); + bearer_disable(net, b); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", name); return -EINVAL; } - rcu_assign_pointer(tn->bearer_list[bearer_id], b_ptr); + rcu_assign_pointer(tn->bearer_list[bearer_id], b); pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, @@ -336,11 +336,11 @@ restart: /** * tipc_reset_bearer - Reset all links established over this bearer */ -static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr) +static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) { - pr_info("Resetting bearer <%s>\n", b_ptr->name); - tipc_node_delete_links(net, b_ptr->identity); - tipc_disc_reset(net, b_ptr); + pr_info("Resetting bearer <%s>\n", b->name); + tipc_node_delete_links(net, b->identity); + tipc_disc_reset(net, b); return 0; } @@ -349,26 +349,26 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr) * * Note: This routine assumes caller holds RTNL lock. */ -static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr) +static void bearer_disable(struct net *net, struct tipc_bearer *b) { struct tipc_net *tn = net_generic(net, tipc_net_id); u32 i; - pr_info("Disabling bearer <%s>\n", b_ptr->name); - b_ptr->media->disable_media(b_ptr); + pr_info("Disabling bearer <%s>\n", b->name); + b->media->disable_media(b); - tipc_node_delete_links(net, b_ptr->identity); - RCU_INIT_POINTER(b_ptr->media_ptr, NULL); - if (b_ptr->link_req) - tipc_disc_delete(b_ptr->link_req); + tipc_node_delete_links(net, b->identity); + RCU_INIT_POINTER(b->media_ptr, NULL); + if (b->link_req) + tipc_disc_delete(b->link_req); for (i = 0; i < MAX_BEARERS; i++) { - if (b_ptr == rtnl_dereference(tn->bearer_list[i])) { + if (b == rtnl_dereference(tn->bearer_list[i])) { RCU_INIT_POINTER(tn->bearer_list[i], NULL); break; } } - kfree_rcu(b_ptr, rcu); + kfree_rcu(b, rcu); } int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, @@ -411,7 +411,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b) /** * tipc_l2_send_msg - send a TIPC packet out over an L2 interface * @buf: the packet to be sent - * @b_ptr: the bearer through which the packet is to be sent + * @b: the bearer through which the packet is to be sent * @dest: peer destination address */ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, @@ -532,14 +532,14 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(dev->tipc_ptr); - if (likely(b_ptr)) { + b = rcu_dereference_rtnl(dev->tipc_ptr); + if (likely(b)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { buf->next = NULL; - tipc_rcv(dev_net(dev), buf, b_ptr); + tipc_rcv(dev_net(dev), buf, b); rcu_read_unlock(); return NET_RX_SUCCESS; } @@ -564,13 +564,13 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; - b_ptr = rtnl_dereference(dev->tipc_ptr); - if (!b_ptr) + b = rtnl_dereference(dev->tipc_ptr); + if (!b) return NOTIFY_DONE; - b_ptr->mtu = dev->mtu; + b->mtu = dev->mtu; switch (evt) { case NETDEV_CHANGE: @@ -578,16 +578,16 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, break; case NETDEV_GOING_DOWN: case NETDEV_CHANGEMTU: - tipc_reset_bearer(net, b_ptr); + tipc_reset_bearer(net, b); break; case NETDEV_CHANGEADDR: - b_ptr->media->raw2addr(b_ptr, &b_ptr->addr, + b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr); - tipc_reset_bearer(net, b_ptr); + tipc_reset_bearer(net, b); break; case NETDEV_UNREGISTER: case NETDEV_CHANGENAME: - bearer_disable(dev_net(dev), b_ptr); + bearer_disable(dev_net(dev), b); break; } return NOTIFY_OK; @@ -623,13 +623,13 @@ void tipc_bearer_cleanup(void) void tipc_bearer_stop(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_bearer *b_ptr; + struct tipc_bearer *b; u32 i; for (i = 0; i < MAX_BEARERS; i++) { - b_ptr = rtnl_dereference(tn->bearer_list[i]); - if (b_ptr) { - bearer_disable(net, b_ptr); + b = rtnl_dereference(tn->bearer_list[i]); + if (b) { + bearer_disable(net, b); tn->bearer_list[i] = NULL; } } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 552185b..e318205 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -103,11 +103,11 @@ struct tipc_bearer; */ struct tipc_media { int (*send_msg)(struct net *net, struct sk_buff *buf, - struct tipc_bearer *b_ptr, + struct tipc_bearer *b, struct tipc_media_addr *dest); - int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr, + int (*enable_media)(struct net *net, struct tipc_bearer *b, struct nlattr *attr[]); - void (*disable_media)(struct tipc_bearer *b_ptr); + void (*disable_media)(struct tipc_bearer *b); int (*addr2str)(struct tipc_media_addr *addr, char *strbuf, int bufsz); @@ -176,7 +176,7 @@ struct tipc_bearer_names { * TIPC routines available to supported media types */ -void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr); +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b); /* * Routines made available to TIPC by supported media types diff --git a/net/tipc/discover.c b/net/tipc/discover.c index afe8c47..f1e738e 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -75,14 +75,14 @@ struct tipc_link_req { * tipc_disc_init_msg - initialize a link setup message * @net: the applicable net namespace * @type: message type (request or response) - * @b_ptr: ptr to bearer issuing message + * @b: ptr to bearer issuing message */ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, - struct tipc_bearer *b_ptr) + struct tipc_bearer *b) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_msg *msg; - u32 dest_domain = b_ptr->domain; + u32 dest_domain = b->domain; msg = buf_msg(buf); tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type, @@ -92,16 +92,16 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, msg_set_node_capabilities(msg, TIPC_NODE_CAPABILITIES); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tn->net_id); - b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); + b->media->addr2msg(msg_media_addr(msg), &b->addr); } /** * disc_dupl_alert - issue node address duplication alert - * @b_ptr: pointer to bearer detecting duplication + * @b: pointer to bearer detecting duplication * @node_addr: duplicated node address * @media_addr: media address advertised by duplicated node */ -static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, +static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr, struct tipc_media_addr *media_addr) { char node_addr_str[16]; @@ -111,7 +111,7 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, tipc_media_addr_printf(media_addr_str, sizeof(media_addr_str), media_addr); pr_warn("Duplicate %s using %s seen on <%s>\n", node_addr_str, - media_addr_str, b_ptr->name); + media_addr_str, b->name); } /** @@ -261,13 +261,13 @@ exit: /** * tipc_disc_create - create object to send periodic link setup requests * @net: the applicable net namespace - * @b_ptr: ptr to bearer issuing requests + * @b: ptr to bearer issuing requests * @dest: destination address for request messages * @dest_domain: network domain to which links can be established * * Returns 0 if successful, otherwise -errno. */ -int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, +int tipc_disc_create(struct net *net, struct tipc_bearer *b, struct tipc_media_addr *dest) { struct tipc_link_req *req; @@ -282,17 +282,17 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, return -ENOMEM; } - tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); + tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b); memcpy(&req->dest, dest, sizeof(*dest)); req->net = net; - req->bearer_id = b_ptr->identity; - req->domain = b_ptr->domain; + req->bearer_id = b->identity; + req->domain = b->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; spin_lock_init(&req->lock); setup_timer(&req->timer, disc_timeout, (unsigned long)req); mod_timer(&req->timer, jiffies + req->timer_intv); - b_ptr->link_req = req; + b->link_req = req; skb = skb_clone(req->buf, GFP_ATOMIC); if (skb) tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest); @@ -313,19 +313,19 @@ void tipc_disc_delete(struct tipc_link_req *req) /** * tipc_disc_reset - reset object to send periodic link setup requests * @net: the applicable net namespace - * @b_ptr: ptr to bearer issuing requests + * @b: ptr to bearer issuing requests * @dest_domain: network domain to which links can be established */ -void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr) +void tipc_disc_reset(struct net *net, struct tipc_bearer *b) { - struct tipc_link_req *req = b_ptr->link_req; + struct tipc_link_req *req = b->link_req; struct sk_buff *skb; spin_lock_bh(&req->lock); - tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr); + tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b); req->net = net; - req->bearer_id = b_ptr->identity; - req->domain = b_ptr->domain; + req->bearer_id = b->identity; + req->domain = b->domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; mod_timer(&req->timer, jiffies + req->timer_intv); diff --git a/net/tipc/link.c b/net/tipc/link.c index 4380eb1..b11afe7 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -245,7 +245,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq); -static void link_print(struct tipc_link *l_ptr, const char *str); +static void link_print(struct tipc_link *l, const char *str); static void tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static void tipc_link_build_bc_init_msg(struct tipc_link *l, @@ -1707,7 +1707,7 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) /** * link_reset_stats - reset link statistics - * @l_ptr: pointer to link + * @l: pointer to link */ void tipc_link_reset_stats(struct tipc_link *l) { diff --git a/net/tipc/link.h b/net/tipc/link.h index 616fc80..b2ae0f4 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -99,7 +99,7 @@ bool tipc_link_is_synching(struct tipc_link *l); bool tipc_link_is_failingover(struct tipc_link *l); bool tipc_link_is_blocked(struct tipc_link *l); void tipc_link_set_active(struct tipc_link *l, bool active); -void tipc_link_reset(struct tipc_link *l_ptr); +void tipc_link_reset(struct tipc_link *l); void tipc_link_reset_stats(struct tipc_link *l); int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, struct sk_buff_head *xmitq); diff --git a/net/tipc/node.c b/net/tipc/node.c index 82c05e9..3f7a4ed 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -319,62 +319,62 @@ static void tipc_node_write_unlock(struct tipc_node *n) struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_node *n_ptr, *temp_node; + struct tipc_node *n, *temp_node; int i; spin_lock_bh(&tn->node_list_lock); - n_ptr = tipc_node_find(net, addr); - if (n_ptr) + n = tipc_node_find(net, addr); + if (n) goto exit; - n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC); - if (!n_ptr) { + n = kzalloc(sizeof(*n), GFP_ATOMIC); + if (!n) { pr_warn("Node creation failed, no memory\n"); goto exit; } - n_ptr->addr = addr; - n_ptr->net = net; - n_ptr->capabilities = capabilities; - kref_init(&n_ptr->kref); - rwlock_init(&n_ptr->lock); - INIT_HLIST_NODE(&n_ptr->hash); - INIT_LIST_HEAD(&n_ptr->list); - INIT_LIST_HEAD(&n_ptr->publ_list); - INIT_LIST_HEAD(&n_ptr->conn_sks); - skb_queue_head_init(&n_ptr->bc_entry.namedq); - skb_queue_head_init(&n_ptr->bc_entry.inputq1); - __skb_queue_head_init(&n_ptr->bc_entry.arrvq); - skb_queue_head_init(&n_ptr->bc_entry.inputq2); + n->addr = addr; + n->net = net; + n->capabilities = capabilities; + kref_init(&n->kref); + rwlock_init(&n->lock); + INIT_HLIST_NODE(&n->hash); + INIT_LIST_HEAD(&n->list); + INIT_LIST_HEAD(&n->publ_list); + INIT_LIST_HEAD(&n->conn_sks); + skb_queue_head_init(&n->bc_entry.namedq); + skb_queue_head_init(&n->bc_entry.inputq1); + __skb_queue_head_init(&n->bc_entry.arrvq); + skb_queue_head_init(&n->bc_entry.inputq2); for (i = 0; i < MAX_BEARERS; i++) - spin_lock_init(&n_ptr->links[i].lock); - hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); + spin_lock_init(&n->links[i].lock); + hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { - if (n_ptr->addr < temp_node->addr) + if (n->addr < temp_node->addr) break; } - list_add_tail_rcu(&n_ptr->list, &temp_node->list); - n_ptr->state = SELF_DOWN_PEER_LEAVING; - n_ptr->signature = INVALID_NODE_SIG; - n_ptr->active_links[0] = INVALID_BEARER_ID; - n_ptr->active_links[1] = INVALID_BEARER_ID; - if (!tipc_link_bc_create(net, tipc_own_addr(net), n_ptr->addr, + list_add_tail_rcu(&n->list, &temp_node->list); + n->state = SELF_DOWN_PEER_LEAVING; + n->signature = INVALID_NODE_SIG; + n->active_links[0] = INVALID_BEARER_ID; + n->active_links[1] = INVALID_BEARER_ID; + if (!tipc_link_bc_create(net, tipc_own_addr(net), n->addr, U16_MAX, tipc_link_window(tipc_bc_sndlink(net)), - n_ptr->capabilities, - &n_ptr->bc_entry.inputq1, - &n_ptr->bc_entry.namedq, + n->capabilities, + &n->bc_entry.inputq1, + &n->bc_entry.namedq, tipc_bc_sndlink(net), - &n_ptr->bc_entry.link)) { + &n->bc_entry.link)) { pr_warn("Broadcast rcv link creation failed, no memory\n"); - kfree(n_ptr); - n_ptr = NULL; + kfree(n); + n = NULL; goto exit; } - tipc_node_get(n_ptr); - setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr); - n_ptr->keepalive_intv = U32_MAX; + tipc_node_get(n); + setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n); + n->keepalive_intv = U32_MAX; exit: spin_unlock_bh(&tn->node_list_lock); - return n_ptr; + return n; } static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) -- cgit v1.1 From e2f9dc3bd213792ac006e83f50a5453f23b8c354 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Nov 2015 12:11:23 -0800 Subject: net: avoid NULL deref in napi_get_frags() napi_alloc_skb() can return NULL. We should not crash should this happen. Fixes: 93f93a440415 ("net: move skb_mark_napi_id() into core networking stack") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 41cef3e..5df6cbc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4390,8 +4390,10 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) if (!skb) { skb = napi_alloc_skb(napi, GRO_MAX_HEAD); - napi->skb = skb; - skb_mark_napi_id(skb, napi); + if (skb) { + napi->skb = skb; + skb_mark_napi_id(skb, napi); + } } return skb; } -- cgit v1.1 From ceff86af56e09469d21732c16fd27a7337983c48 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 19 Nov 2015 16:16:41 +0100 Subject: Bluetooth: Add instance range check for Add Advertising command The instance range check for Add Advertising command is missing. If the provided instance is out of range an Invalid Parameters error should be returned. At the moment, the generic Failed error is returned. This extra check ensures that clear error messages are returned. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index eca203e..2c6533a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6839,6 +6839,10 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, status); + if (cp->instance < 1 || cp->instance > HCI_MAX_ADV_INSTANCES) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); + flags = __le32_to_cpu(cp->flags); timeout = __le16_to_cpu(cp->timeout); duration = __le16_to_cpu(cp->duration); -- cgit v1.1 From 31a3248dd97be9268859abed9a30c1040b2f4090 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 19 Nov 2015 16:16:42 +0100 Subject: Bluetooth: Simplify if statements in tlv_data_is_valid function The if statements for checking the flags parameter could be written a bit easier to read. This changes this. No functional behavior has been changed. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 2c6533a..05370e7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6700,17 +6700,19 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, int i, cur_len; bool flags_managed = false; bool tx_power_managed = false; - u32 flags_params = MGMT_ADV_FLAG_DISCOV | MGMT_ADV_FLAG_LIMITED_DISCOV | - MGMT_ADV_FLAG_MANAGED_FLAGS; - if (is_adv_data && (adv_flags & flags_params)) { - flags_managed = true; - max_len -= 3; - } + if (is_adv_data) { + if (adv_flags & (MGMT_ADV_FLAG_DISCOV | + MGMT_ADV_FLAG_LIMITED_DISCOV | + MGMT_ADV_FLAG_MANAGED_FLAGS)) { + flags_managed = true; + max_len -= 3; + } - if (is_adv_data && (adv_flags & MGMT_ADV_FLAG_TX_POWER)) { - tx_power_managed = true; - max_len -= 3; + if (adv_flags & MGMT_ADV_FLAG_TX_POWER) { + tx_power_managed = true; + max_len -= 3; + } } if (len > max_len) -- cgit v1.1 From 40b25fe5dc57a6557b96241b75ae63dce716a487 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 19 Nov 2015 16:16:43 +0100 Subject: Bluetooth: Add support for Get Advertising Size Information command The Get Advertising Size Information command allows to retrieve size information for advertising data and scan response data fields depending on the selected flags. This is useful if applications want to know the available size ahead of time. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 05370e7..dc8e428 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -102,6 +102,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_READ_ADV_FEATURES, MGMT_OP_ADD_ADVERTISING, MGMT_OP_REMOVE_ADVERTISING, + MGMT_OP_GET_ADV_SIZE_INFO, }; static const u16 mgmt_events[] = { @@ -7059,6 +7060,62 @@ unlock: return err; } +static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) +{ + u8 max_len = HCI_MAX_AD_LENGTH; + + if (is_adv_data) { + if (adv_flags & (MGMT_ADV_FLAG_DISCOV | + MGMT_ADV_FLAG_LIMITED_DISCOV | + MGMT_ADV_FLAG_MANAGED_FLAGS)) + max_len -= 3; + + if (adv_flags & MGMT_ADV_FLAG_TX_POWER) + max_len -= 3; + } + + return max_len; +} + +static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_cp_get_adv_size_info *cp = data; + struct mgmt_rp_get_adv_size_info rp; + u32 flags, supported_flags; + int err; + + BT_DBG("%s", hdev->name); + + if (!lmp_le_capable(hdev)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, + MGMT_STATUS_REJECTED); + + if (cp->instance < 1 || cp->instance > HCI_MAX_ADV_INSTANCES) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, + MGMT_STATUS_INVALID_PARAMS); + + flags = __le32_to_cpu(cp->flags); + + /* The current implementation only supports a subset of the specified + * flags. + */ + supported_flags = get_supported_adv_flags(hdev); + if (flags & ~supported_flags) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, + MGMT_STATUS_INVALID_PARAMS); + + rp.instance = cp->instance; + rp.flags = cp->flags; + rp.max_adv_data_len = tlv_data_max_len(flags, true); + rp.max_scan_rsp_len = tlv_data_max_len(flags, false); + + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, + MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + + return err; +} + static const struct hci_mgmt_handler mgmt_handlers[] = { { NULL }, /* 0x0000 (no command) */ { read_version, MGMT_READ_VERSION_SIZE, @@ -7146,6 +7203,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { add_advertising, MGMT_ADD_ADVERTISING_SIZE, HCI_MGMT_VAR_LEN }, { remove_advertising, MGMT_REMOVE_ADVERTISING_SIZE }, + { get_adv_size_info, MGMT_GET_ADV_SIZE_INFO_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) -- cgit v1.1 From b811580d91e9c0945b0a923dcec3e10cce04ac30 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 19 Nov 2015 12:24:22 -0800 Subject: net: IPv6 fib lookup tracepoint Add tracepoint to show fib6 table lookups and result. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/core/net-traces.c | 4 ++++ net/ipv6/route.c | 10 ++++++++++ 2 files changed, 14 insertions(+) (limited to 'net') diff --git a/net/core/net-traces.c b/net/core/net-traces.c index adef015..92da5e4 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -32,6 +32,10 @@ #include #include #include +#if IS_ENABLED(CONFIG_IPV6) +#include +EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup); +#endif EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6f01fe1..89758be 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -62,6 +62,7 @@ #include #include #include +#include #include @@ -865,6 +866,9 @@ restart: } dst_use(&rt->dst, jiffies); read_unlock_bh(&table->tb6_lock); + + trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); + return rt; } @@ -1078,6 +1082,8 @@ redo_rt6_select: read_unlock_bh(&table->tb6_lock); rt6_dst_from_metrics_check(rt); + + trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && !(rt->rt6i_flags & RTF_GATEWAY))) { @@ -1101,6 +1107,8 @@ redo_rt6_select: uncached_rt = net->ipv6.ip6_null_entry; dst_hold(&uncached_rt->dst); + + trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6); return uncached_rt; } else { @@ -1125,6 +1133,7 @@ redo_rt6_select: dst_release(&rt->dst); } + trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6); return pcpu_rt; } @@ -1474,6 +1483,7 @@ out: read_unlock_bh(&table->tb6_lock); + trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); return rt; }; -- cgit v1.1 From 568f44f63621e00af9895f09c70aa38025be8813 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 23 Nov 2015 14:40:47 +0200 Subject: Bluetooth: Fix returning proper HCI status from __hci_req_sync There were a couple of code paths missed by the previous patch that added a HCI status return parameter to __hci_req_sync. This patch adds the missing assignments for them. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 76bd912..e639671 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -220,8 +220,14 @@ int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, * trigger any commands to be sent. This is normal behavior * and should not trigger an error return. */ - if (err == -ENODATA) + if (err == -ENODATA) { + if (hci_status) + *hci_status = 0; return 0; + } + + if (hci_status) + *hci_status = HCI_ERROR_UNSPECIFIED; return err; } -- cgit v1.1 From e59a554235b960b3b251772ac1bb743e49d09cee Mon Sep 17 00:00:00 2001 From: Andrzej Kaczmarek Date: Sun, 22 Nov 2015 21:42:21 +0100 Subject: Bluetooth: Fix powering on with privacy and advertising In order to enable advertising with privacy enabled, SMP has to be registered in order to generate new RPA. During power on, it will be registered at the very end which is the reason why advertising is not enabled and it's not possible to enable it anymore due to mismatch between hci_dev settings and actual controller state. This fixes this problem by moving SMP registration earlier, just after controller is powered (which is ok, because LE SMP will be already able to decide on identity address to be used), but before advertising is enabled. Signed-off-by: Andrzej Kaczmarek Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index dc8e428..e8a2f8b 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7305,13 +7305,6 @@ static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode) BT_DBG("status 0x%02x", status); if (!status) { - /* Register the available SMP channels (BR/EDR and LE) only - * when successfully powering on the controller. This late - * registration is required so that LE SMP can clearly - * decide if the public address or static address is used. - */ - smp_register(hdev); - restart_le_actions(hdev); hci_update_background_scan(hdev); } @@ -7423,6 +7416,13 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) return 0; if (powered) { + /* Register the available SMP channels (BR/EDR and LE) only + * when successfully powering on the controller. This late + * registration is required so that LE SMP can clearly + * decide if the public address or static address is used. + */ + smp_register(hdev); + if (powered_update_hci(hdev) == 0) return 0; -- cgit v1.1 From dc4270c0cd880f1b28dd48f2a31d869d22da941e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 23 Nov 2015 15:07:51 +0200 Subject: Bluetooth: Increment management interface revision This patch increments the management interface revision due to introduction of a new Get Advertising Size Information command and various other fixes & improvements. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e8a2f8b..3d9d2e4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -38,7 +38,7 @@ #include "mgmt_util.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 10 +#define MGMT_REVISION 11 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, -- cgit v1.1 From cc30c16344fc3a25153175c7eb9037b2136cd466 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 20 Nov 2015 03:56:23 +0100 Subject: net: dsa: Add support for a switch reset gpio Some boards have a gpio line tied to the switch reset pin. Allow this gpio to be retrieved from the device tree, and take the switch out of reset before performing the probe. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/dsa.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 1eba07f..0b5565f 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include "dsa_priv.h" @@ -688,6 +689,9 @@ static int dsa_of_probe(struct device *dev) const char *port_name; int chip_index, port_index; const unsigned int *sw_addr, *port_reg; + int gpio; + enum of_gpio_flags of_flags; + unsigned long flags; u32 eeprom_len; int ret; @@ -766,6 +770,19 @@ static int dsa_of_probe(struct device *dev) put_device(cd->host_dev); cd->host_dev = &mdio_bus_switch->dev; } + gpio = of_get_named_gpio_flags(child, "reset-gpios", 0, + &of_flags); + if (gpio_is_valid(gpio)) { + flags = (of_flags == OF_GPIO_ACTIVE_LOW ? + GPIOF_ACTIVE_LOW : 0); + ret = devm_gpio_request_one(dev, gpio, flags, + "switch_reset"); + if (ret) + goto out_free_chip; + + cd->reset = gpio_to_desc(gpio); + gpiod_direction_output(cd->reset, 0); + } for_each_available_child_of_node(child, port) { port_reg = of_get_property(port, "reg", NULL); -- cgit v1.1 From 052a4bc49de9f959682140a200e7bcff98ca2cdf Mon Sep 17 00:00:00 2001 From: Ian Morris Date: Mon, 26 Oct 2015 09:10:40 +0000 Subject: netfilter-bridge: Cleanse indentation Fixes a bunch of issues detected by checkpatch with regards to code indentation. No changes detected by objdiff. Signed-off-by: Ian Morris Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_stp.c | 2 +- net/bridge/netfilter/ebtable_filter.c | 2 +- net/bridge/netfilter/ebtable_nat.c | 2 +- net/bridge/netfilter/ebtables.c | 44 +++++++++++++++++------------------ 4 files changed, 25 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 0c40570..6b731e1 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -41,7 +41,7 @@ struct stp_config_pdu { #define NR32(p) ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]) static bool ebt_filter_config(const struct ebt_stp_info *info, - const struct stp_config_pdu *stpc) + const struct stp_config_pdu *stpc) { const struct ebt_stp_config_info *c; uint16_t v16; diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 32eccd1..593a1bdc 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -12,7 +12,7 @@ #include #define FILTER_VALID_HOOKS ((1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | \ - (1 << NF_BR_LOCAL_OUT)) + (1 << NF_BR_LOCAL_OUT)) static struct ebt_entries initial_chains[] = { { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index ec55358..eb33919 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -12,7 +12,7 @@ #include #define NAT_VALID_HOOKS ((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT) | \ - (1 << NF_BR_POST_ROUTING)) + (1 << NF_BR_POST_ROUTING)) static struct ebt_entries initial_chains[] = { { diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index f46ca41..2a0b2f6 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -46,7 +46,7 @@ #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) #define COUNTER_OFFSET(n) (SMP_ALIGN(n * sizeof(struct ebt_counter))) #define COUNTER_BASE(c, n, cpu) ((struct ebt_counter *)(((char *)c) + \ - COUNTER_OFFSET(n) * cpu)) + COUNTER_OFFSET(n) * cpu)) @@ -126,7 +126,7 @@ ebt_dev_check(const char *entry, const struct net_device *device) /* process standard matches */ static inline int ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out) + const struct net_device *in, const struct net_device *out) { const struct ethhdr *h = eth_hdr(skb); const struct net_bridge_port *p; @@ -323,7 +323,7 @@ letscontinue: /* If it succeeds, returns element and locks mutex */ static inline void * find_inlist_lock_noload(struct list_head *head, const char *name, int *error, - struct mutex *mutex) + struct mutex *mutex) { struct { struct list_head list; @@ -342,7 +342,7 @@ find_inlist_lock_noload(struct list_head *head, const char *name, int *error, static void * find_inlist_lock(struct list_head *head, const char *name, const char *prefix, - int *error, struct mutex *mutex) + int *error, struct mutex *mutex) { return try_then_request_module( find_inlist_lock_noload(head, name, error, mutex), @@ -493,9 +493,9 @@ static int ebt_verify_pointers(const struct ebt_replace *repl, */ static inline int ebt_check_entry_size_and_hooks(const struct ebt_entry *e, - const struct ebt_table_info *newinfo, - unsigned int *n, unsigned int *cnt, - unsigned int *totalcnt, unsigned int *udc_cnt) + const struct ebt_table_info *newinfo, + unsigned int *n, unsigned int *cnt, + unsigned int *totalcnt, unsigned int *udc_cnt) { int i; @@ -562,7 +562,7 @@ struct ebt_cl_stack */ static inline int ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo, - unsigned int *n, struct ebt_cl_stack *udc) + unsigned int *n, struct ebt_cl_stack *udc) { int i; @@ -649,9 +649,9 @@ ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt) static inline int ebt_check_entry(struct ebt_entry *e, struct net *net, - const struct ebt_table_info *newinfo, - const char *name, unsigned int *cnt, - struct ebt_cl_stack *cl_s, unsigned int udc_cnt) + const struct ebt_table_info *newinfo, + const char *name, unsigned int *cnt, + struct ebt_cl_stack *cl_s, unsigned int udc_cnt) { struct ebt_entry_target *t; struct xt_target *target; @@ -764,7 +764,7 @@ cleanup_matches: * accessed. This mask is a parameter to the check() functions of the extensions */ static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack *cl_s, - unsigned int udc_cnt, unsigned int hooknr, char *base) + unsigned int udc_cnt, unsigned int hooknr, char *base) { int i, chain_nr = -1, pos = 0, nentries = chain->nentries, verdict; const struct ebt_entry *e = (struct ebt_entry *)chain->data; @@ -955,7 +955,7 @@ static int translate_table(struct net *net, const char *name, /* called under write_lock */ static void get_counters(const struct ebt_counter *oldcounters, - struct ebt_counter *counters, unsigned int nentries) + struct ebt_counter *counters, unsigned int nentries) { int i, cpu; struct ebt_counter *counter_base; @@ -1342,7 +1342,7 @@ static int update_counters(struct net *net, const void __user *user, } static inline int ebt_make_matchname(const struct ebt_entry_match *m, - const char *base, char __user *ubase) + const char *base, char __user *ubase) { char __user *hlp = ubase + ((char *)m - base); char name[EBT_FUNCTION_MAXNAMELEN] = {}; @@ -1356,7 +1356,7 @@ static inline int ebt_make_matchname(const struct ebt_entry_match *m, } static inline int ebt_make_watchername(const struct ebt_entry_watcher *w, - const char *base, char __user *ubase) + const char *base, char __user *ubase) { char __user *hlp = ubase + ((char *)w - base); char name[EBT_FUNCTION_MAXNAMELEN] = {}; @@ -1367,8 +1367,8 @@ static inline int ebt_make_watchername(const struct ebt_entry_watcher *w, return 0; } -static inline int -ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase) +static inline int ebt_make_names(struct ebt_entry *e, const char *base, + char __user *ubase) { int ret; char __user *hlp; @@ -1394,9 +1394,9 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase) } static int copy_counters_to_user(struct ebt_table *t, - const struct ebt_counter *oldcounters, - void __user *user, unsigned int num_counters, - unsigned int nentries) + const struct ebt_counter *oldcounters, + void __user *user, unsigned int num_counters, + unsigned int nentries) { struct ebt_counter *counterstmp; int ret = 0; @@ -1427,7 +1427,7 @@ static int copy_counters_to_user(struct ebt_table *t, /* called with ebt_mutex locked */ static int copy_everything_to_user(struct ebt_table *t, void __user *user, - const int *len, int cmd) + const int *len, int cmd) { struct ebt_replace tmp; const struct ebt_counter *oldcounters; @@ -2305,7 +2305,7 @@ static int compat_do_ebt_set_ctl(struct sock *sk, break; default: ret = -EINVAL; - } + } return ret; } -- cgit v1.1 From 7f495ad946a6be7bd78df752fad3a084d2710ee2 Mon Sep 17 00:00:00 2001 From: Ian Morris Date: Mon, 26 Oct 2015 09:10:41 +0000 Subject: netfilter-bridge: use netdev style comments Changes comments to use netdev style. No changes detected by objdiff. Signed-off-by: Ian Morris Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_log.c | 3 +- net/bridge/netfilter/ebt_vlan.c | 15 +++++--- net/bridge/netfilter/ebtables.c | 84 +++++++++++++++++++++-------------------- 3 files changed, 56 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 0ad639a..f22284d 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -152,7 +152,8 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, ntohs(ah->ar_op)); /* If it's for Ethernet and the lengths are OK, - * then log the ARP payload */ + * then log the ARP payload + */ if (ah->ar_hrd == htons(1) && ah->ar_hln == ETH_ALEN && ah->ar_pln == sizeof(__be32)) { diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index 6185688..98c221d 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -66,7 +66,8 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par) * - Canonical Format Indicator (CFI). The Canonical Format Indicator * (CFI) is a single bit flag value. Currently ignored. * - VLAN Identifier (VID). The VID is encoded as - * an unsigned binary number. */ + * an unsigned binary number. + */ id = TCI & VLAN_VID_MASK; prio = (TCI >> 13) & 0x7; @@ -98,7 +99,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par) } /* Check for bitmask range - * True if even one bit is out of mask */ + * True if even one bit is out of mask + */ if (info->bitmask & ~EBT_VLAN_MASK) { pr_debug("bitmask %2X is out of mask (%2X)\n", info->bitmask, EBT_VLAN_MASK); @@ -117,7 +119,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par) * 0 - The null VLAN ID. * 1 - The default Port VID (PVID) * 0x0FFF - Reserved for implementation use. - * if_vlan.h: VLAN_N_VID 4096. */ + * if_vlan.h: VLAN_N_VID 4096. + */ if (GET_BITMASK(EBT_VLAN_ID)) { if (!!info->id) { /* if id!=0 => check vid range */ if (info->id > VLAN_N_VID) { @@ -128,7 +131,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par) /* Note: This is valid VLAN-tagged frame point. * Any value of user_priority are acceptable, * but should be ignored according to 802.1Q Std. - * So we just drop the prio flag. */ + * So we just drop the prio flag. + */ info->bitmask &= ~EBT_VLAN_PRIO; } /* Else, id=0 (null VLAN ID) => user_priority range (any?) */ @@ -143,7 +147,8 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par) } /* Check for encapsulated proto range - it is possible to be * any value for u_short range. - * if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS */ + * if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS + */ if (GET_BITMASK(EBT_VLAN_ENCAP)) { if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) { pr_debug("encap frame length %d is less than " diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 2a0b2f6..62090e2 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -35,8 +35,7 @@ "report to author: "format, ## args) /* #define BUGPRINT(format, args...) */ -/* - * Each cpu has its own set of counters, so there is no need for write_lock in +/* Each cpu has its own set of counters, so there is no need for write_lock in * the softirq * For reading or updating the counters, the user context needs to * get a write_lock @@ -237,7 +236,8 @@ unsigned int ebt_do_table(struct sk_buff *skb, (*(counter_base + i)).bcnt += skb->len; /* these should only watch: not modify, nor tell us - what to do with the packet */ + * what to do with the packet + */ EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar); t = (struct ebt_entry_target *) @@ -451,7 +451,8 @@ static int ebt_verify_pointers(const struct ebt_replace *repl, if (i != NF_BR_NUMHOOKS || !(e->bitmask & EBT_ENTRY_OR_ENTRIES)) { if (e->bitmask != 0) { /* we make userspace set this right, - so there is no misunderstanding */ + * so there is no misunderstanding + */ BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set " "in distinguisher\n"); return -EINVAL; @@ -487,8 +488,7 @@ static int ebt_verify_pointers(const struct ebt_replace *repl, return 0; } -/* - * this one is very careful, as it is the first function +/* this one is very careful, as it is the first function * to parse the userspace data */ static inline int @@ -504,10 +504,12 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e, break; } /* beginning of a new chain - if i == NF_BR_NUMHOOKS it must be a user defined chain */ + * if i == NF_BR_NUMHOOKS it must be a user defined chain + */ if (i != NF_BR_NUMHOOKS || !e->bitmask) { /* this checks if the previous chain has as many entries - as it said it has */ + * as it said it has + */ if (*n != *cnt) { BUGPRINT("nentries does not equal the nr of entries " "in the chain\n"); @@ -556,8 +558,7 @@ struct ebt_cl_stack unsigned int hookmask; }; -/* - * we need these positions to check that the jumps to a different part of the +/* We need these positions to check that the jumps to a different part of the * entries is a jump to the beginning of a new chain. */ static inline int @@ -687,7 +688,8 @@ ebt_check_entry(struct ebt_entry *e, struct net *net, break; } /* (1 << NF_BR_NUMHOOKS) tells the check functions the rule is on - a base chain */ + * a base chain + */ if (i < NF_BR_NUMHOOKS) hookmask = (1 << hook) | (1 << NF_BR_NUMHOOKS); else { @@ -758,8 +760,7 @@ cleanup_matches: return ret; } -/* - * checks for loops and sets the hook mask for udc +/* checks for loops and sets the hook mask for udc * the hook mask for udc tells us from which base chains the udc can be * accessed. This mask is a parameter to the check() functions of the extensions */ @@ -853,7 +854,8 @@ static int translate_table(struct net *net, const char *name, return -EINVAL; } /* make sure chains are ordered after each other in same order - as their corresponding hooks */ + * as their corresponding hooks + */ for (j = i + 1; j < NF_BR_NUMHOOKS; j++) { if (!newinfo->hook_entry[j]) continue; @@ -868,7 +870,8 @@ static int translate_table(struct net *net, const char *name, i = 0; /* holds the expected nr. of entries for the chain */ j = 0; /* holds the up to now counted entries for the chain */ k = 0; /* holds the total nr. of entries, should equal - newinfo->nentries afterwards */ + * newinfo->nentries afterwards + */ udc_cnt = 0; /* will hold the nr. of user defined chains (udc) */ ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, ebt_check_entry_size_and_hooks, newinfo, @@ -888,10 +891,12 @@ static int translate_table(struct net *net, const char *name, } /* get the location of the udc, put them in an array - while we're at it, allocate the chainstack */ + * while we're at it, allocate the chainstack + */ if (udc_cnt) { /* this will get free'd in do_replace()/ebt_register_table() - if an error occurs */ + * if an error occurs + */ newinfo->chainstack = vmalloc(nr_cpu_ids * sizeof(*(newinfo->chainstack))); if (!newinfo->chainstack) @@ -932,14 +937,15 @@ static int translate_table(struct net *net, const char *name, } /* we now know the following (along with E=mc²): - - the nr of entries in each chain is right - - the size of the allocated space is right - - all valid hooks have a corresponding chain - - there are no loops - - wrong data can still be on the level of a single entry - - could be there are jumps to places that are not the - beginning of a chain. This can only occur in chains that - are not accessible from any base chains, so we don't care. */ + * - the nr of entries in each chain is right + * - the size of the allocated space is right + * - all valid hooks have a corresponding chain + * - there are no loops + * - wrong data can still be on the level of a single entry + * - could be there are jumps to places that are not the + * beginning of a chain. This can only occur in chains that + * are not accessible from any base chains, so we don't care. + */ /* used to know what we need to clean up if something goes wrong */ i = 0; @@ -986,7 +992,8 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, struct ebt_table *t; /* the user wants counters back - the check on the size is done later, when we have the lock */ + * the check on the size is done later, when we have the lock + */ if (repl->num_counters) { unsigned long size = repl->num_counters * sizeof(*counterstmp); counterstmp = vmalloc(size); @@ -1038,9 +1045,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, write_unlock_bh(&t->lock); mutex_unlock(&ebt_mutex); /* so, a user can change the chains while having messed up her counter - allocation. Only reason why this is done is because this way the lock - is held only once, while this doesn't bring the kernel into a - dangerous state. */ + * allocation. Only reason why this is done is because this way the lock + * is held only once, while this doesn't bring the kernel into a + * dangerous state. + */ if (repl->num_counters && copy_to_user(repl->counters, counterstmp, repl->num_counters * sizeof(struct ebt_counter))) { @@ -1348,7 +1356,8 @@ static inline int ebt_make_matchname(const struct ebt_entry_match *m, char name[EBT_FUNCTION_MAXNAMELEN] = {}; /* ebtables expects 32 bytes long names but xt_match names are 29 bytes - long. Copy 29 bytes and fill remaining bytes with zeroes. */ + * long. Copy 29 bytes and fill remaining bytes with zeroes. + */ strlcpy(name, m->u.match->name, sizeof(name)); if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; @@ -1595,8 +1604,7 @@ static int ebt_compat_entry_padsize(void) static int ebt_compat_match_offset(const struct xt_match *match, unsigned int userlen) { - /* - * ebt_among needs special handling. The kernel .matchsize is + /* ebt_among needs special handling. The kernel .matchsize is * set to -1 at registration time; at runtime an EBT_ALIGN()ed * value is expected. * Example: userspace sends 4500, ebt_among.c wants 4504. @@ -1966,8 +1974,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, return off + match_size; } -/* - * return size of all matches, watchers or target, including necessary +/* return size of all matches, watchers or target, including necessary * alignment and padding. */ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, @@ -2070,8 +2077,7 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, if (ret < 0) return ret; buf_start = (char *) entry; - /* - * 0: matches offset, always follows ebt_entry. + /* 0: matches offset, always follows ebt_entry. * 1: watchers offset, from ebt_entry structure * 2: target offset, from ebt_entry structure * 3: next ebt_entry offset, from ebt_entry structure @@ -2115,8 +2121,7 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, return 0; } -/* - * repl->entries_size is the size of the ebt_entry blob in userspace. +/* repl->entries_size is the size of the ebt_entry blob in userspace. * It might need more memory when copied to a 64 bit kernel in case * userspace is 32-bit. So, first task: find out how much memory is needed. * @@ -2360,8 +2365,7 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, break; case EBT_SO_GET_ENTRIES: case EBT_SO_GET_INIT_ENTRIES: - /* - * try real handler first in case of userland-side padding. + /* try real handler first in case of userland-side padding. * in case we are dealing with an 'ordinary' 32 bit binary * without 64bit compatibility padding, this will fail right * after copy_from_user when the *len argument is validated. -- cgit v1.1 From abcdd9a6239d42851faac86ba32158fbfee71b22 Mon Sep 17 00:00:00 2001 From: Ian Morris Date: Mon, 26 Oct 2015 09:10:42 +0000 Subject: netfilter-bridge: brace placement Change brace placement to eliminate checkpatch error. No changes detected by objdiff. Signed-off-by: Ian Morris Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_log.c | 6 ++---- net/bridge/netfilter/ebtables.c | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index f22284d..152300d 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -36,14 +36,12 @@ static int ebt_log_tg_check(const struct xt_tgchk_param *par) return 0; } -struct tcpudphdr -{ +struct tcpudphdr { __be16 src; __be16 dst; }; -struct arppayload -{ +struct arppayload { unsigned char mac_src[ETH_ALEN]; unsigned char ip_src[4]; unsigned char mac_dst[ETH_ALEN]; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 62090e2..b13ea69 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -551,8 +551,7 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e, return 0; } -struct ebt_cl_stack -{ +struct ebt_cl_stack { struct ebt_chainstack cs; int from; unsigned int hookmask; -- cgit v1.1 From c1bc1d257bd06943413f9b0e943028c028eb34a6 Mon Sep 17 00:00:00 2001 From: Ian Morris Date: Mon, 26 Oct 2015 09:10:43 +0000 Subject: netfilter-bridge: layout of if statements Eliminate some checkpatch issues by improved layout of if statements. No changes detected by objdiff. Signed-off-by: Ian Morris Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_ip6.c | 4 ++-- net/bridge/netfilter/ebtables.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 17fd5f2..98de6e7 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -65,8 +65,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) return false; - if (!(info->bitmask & ( EBT_IP6_DPORT | - EBT_IP6_SPORT | EBT_IP6_ICMP6))) + if (!(info->bitmask & (EBT_IP6_DPORT | + EBT_IP6_SPORT | EBT_IP6_ICMP6))) return true; /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */ diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index b13ea69..67b2e27 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -161,7 +161,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, for (i = 0; i < 6; i++) verdict |= (h->h_source[i] ^ e->sourcemac[i]) & e->sourcemsk[i]; - if (FWINV2(verdict != 0, EBT_ISOURCE) ) + if (FWINV2(verdict != 0, EBT_ISOURCE)) return 1; } if (e->bitmask & EBT_DESTMAC) { @@ -169,7 +169,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, for (i = 0; i < 6; i++) verdict |= (h->h_dest[i] ^ e->destmac[i]) & e->destmsk[i]; - if (FWINV2(verdict != 0, EBT_IDEST) ) + if (FWINV2(verdict != 0, EBT_IDEST)) return 1; } return 0; @@ -673,7 +673,7 @@ ebt_check_entry(struct ebt_entry *e, struct net *net, BUGPRINT("Unknown flag for inv bitmask\n"); return -EINVAL; } - if ( (e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3) ) { + if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3)) { BUGPRINT("NOPROTO & 802_3 not allowed\n"); return -EINVAL; } @@ -1370,7 +1370,7 @@ static inline int ebt_make_watchername(const struct ebt_entry_watcher *w, char name[EBT_FUNCTION_MAXNAMELEN] = {}; strlcpy(name, w->u.watcher->name, sizeof(name)); - if (copy_to_user(hlp , name, EBT_FUNCTION_MAXNAMELEN)) + if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; } -- cgit v1.1 From a18fd970ce99eee5105a511621d7064812b8cc8c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 17 Nov 2015 13:45:53 -0800 Subject: netfilter: remove duplicate include Signed-off-by: Stephen Hemminger Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_reject_ipv4.c | 1 - net/ipv6/netfilter/nf_reject_ipv6.c | 1 - 2 files changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index c747b2d..b6ea57e 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -14,7 +14,6 @@ #include #include #include -#include const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, struct tcphdr *_oth, int hook) diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index e0f922b..4709f65 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -14,7 +14,6 @@ #include #include #include -#include const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, struct tcphdr *otcph, -- cgit v1.1 From 029f7f3b8701cc7aca8bdb31f0c7edd6a479e357 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Nov 2015 23:32:39 +0100 Subject: netfilter: ipv6: nf_defrag: avoid/free clone operations commit 6aafeef03b9d9ecf ("netfilter: push reasm skb through instead of original frag skbs") changed ipv6 defrag to not use the original skbs anymore. So rather than keeping the original skbs around just to discard them afterwards just use the original skbs directly for the fraglist of the newly assembled skb and remove the extra clone/free operations. The skb that completes the fragment queue is morphed into a the reassembled one instead, just like ipv4 defrag. openvswitch doesn't need any additional skb_morph magic anymore to deal with this situation so just remove that. A followup patch can then also remove the NF_HOOK (re)invocation in the ipv6 netfilter defrag hook. Cc: Joe Stringer Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_conntrack_reasm.c | 105 ++++++++++++------------------ net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 6 -- net/openvswitch/conntrack.c | 14 ---- 3 files changed, 40 insertions(+), 85 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index d5efeb8..1a86a08 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -56,7 +56,6 @@ struct nf_ct_frag6_skb_cb { struct inet6_skb_parm h; int offset; - struct sk_buff *orig; }; #define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb)) @@ -170,12 +169,6 @@ static unsigned int nf_hashfn(const struct inet_frag_queue *q) return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr); } -static void nf_skb_free(struct sk_buff *skb) -{ - if (NFCT_FRAG6_CB(skb)->orig) - kfree_skb(NFCT_FRAG6_CB(skb)->orig); -} - static void nf_ct_frag6_expire(unsigned long data) { struct frag_queue *fq; @@ -376,9 +369,9 @@ err: * the last and the first frames arrived and all the bits are here. */ static struct sk_buff * -nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) +nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { - struct sk_buff *fp, *op, *head = fq->q.fragments; + struct sk_buff *fp, *head = fq->q.fragments; int payload_len; u8 ecn; @@ -429,10 +422,38 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->csum = 0; clone->ip_summed = head->ip_summed; - NFCT_FRAG6_CB(clone)->orig = NULL; add_frag_mem_limit(fq->q.net, clone->truesize); } + /* morph head into last received skb: prev. + * + * This allows callers of ipv6 conntrack defrag to continue + * to use the last skb(frag) passed into the reasm engine. + * The last skb frag 'silently' turns into the full reassembled skb. + * + * Since prev is also part of q->fragments we have to clone it first. + */ + if (head != prev) { + struct sk_buff *iter; + + fp = skb_clone(prev, GFP_ATOMIC); + if (!fp) + goto out_oom; + + fp->next = prev->next; + skb_queue_walk(head, iter) { + if (iter->next != prev) + continue; + iter->next = fp; + break; + } + + skb_morph(prev, head); + prev->next = head->next; + consume_skb(head); + head = prev; + } + /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0]; @@ -473,21 +494,6 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) fq->q.fragments = NULL; fq->q.fragments_tail = NULL; - /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ - fp = skb_shinfo(head)->frag_list; - if (fp && NFCT_FRAG6_CB(fp)->orig == NULL) - /* at above code, head skb is divided into two skbs. */ - fp = fp->next; - - op = NFCT_FRAG6_CB(head)->orig; - for (; fp; fp = fp->next) { - struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig; - - op->next = orig; - op = orig; - NFCT_FRAG6_CB(fp)->orig = NULL; - } - return head; out_oversize: @@ -565,7 +571,6 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { - struct sk_buff *clone; struct net_device *dev = skb->dev; struct frag_hdr *fhdr; struct frag_queue *fq; @@ -583,42 +588,30 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) return skb; - clone = skb_clone(skb, GFP_ATOMIC); - if (clone == NULL) { - pr_debug("Can't clone skb\n"); + if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr))) return skb; - } - NFCT_FRAG6_CB(clone)->orig = skb; - - if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) { - pr_debug("message is too short.\n"); - goto ret_orig; - } - - skb_set_transport_header(clone, fhoff); - hdr = ipv6_hdr(clone); - fhdr = (struct frag_hdr *)skb_transport_header(clone); + skb_set_transport_header(skb, fhoff); + hdr = ipv6_hdr(skb); + fhdr = (struct frag_hdr *)skb_transport_header(skb); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); - if (fq == NULL) { - pr_debug("Can't find and can't create new queue\n"); - goto ret_orig; - } + if (fq == NULL) + return skb; spin_lock_bh(&fq->q.lock); - if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { + if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) { spin_unlock_bh(&fq->q.lock); pr_debug("Can't insert skb to queue\n"); inet_frag_put(&fq->q, &nf_frags); - goto ret_orig; + return skb; } if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { - ret_skb = nf_ct_frag6_reasm(fq, dev); + ret_skb = nf_ct_frag6_reasm(fq, skb, dev); if (ret_skb == NULL) pr_debug("Can't reassemble fragmented packets\n"); } @@ -626,26 +619,9 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use inet_frag_put(&fq->q, &nf_frags); return ret_skb; - -ret_orig: - kfree_skb(clone); - return skb; } EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); -void nf_ct_frag6_consume_orig(struct sk_buff *skb) -{ - struct sk_buff *s, *s2; - - for (s = NFCT_FRAG6_CB(skb)->orig; s;) { - s2 = s->next; - s->next = NULL; - consume_skb(s); - s = s2; - } -} -EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig); - static int nf_ct_net_init(struct net *net) { int res; @@ -680,7 +656,6 @@ int nf_ct_frag6_init(void) nf_frags.hashfn = nf_hashfn; nf_frags.constructor = ip6_frag_init; nf_frags.destructor = NULL; - nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct frag_queue); nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 4fdbed5e..fb96b10 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -69,12 +69,6 @@ static unsigned int ipv6_defrag(void *priv, if (reasm == NULL) return NF_STOLEN; - /* error occurred or not fragmented */ - if (reasm == skb) - return NF_ACCEPT; - - nf_ct_frag6_consume_orig(reasm); - NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm, state->in, state->out, state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index c2cc111..cac2169 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -321,21 +321,7 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, if (!reasm) return -EINPROGRESS; - if (skb == reasm) { - kfree_skb(skb); - return -EINVAL; - } - - /* Don't free 'skb' even though it is one of the original - * fragments, as we're going to morph it into the head. - */ - skb_get(skb); - nf_ct_frag6_consume_orig(reasm); - key->ip.proto = ipv6_hdr(reasm)->nexthdr; - skb_morph(skb, reasm); - skb->next = reasm->next; - consume_skb(reasm); ovs_cb.mru = IP6CB(skb)->frag_max_size; #endif } else { -- cgit v1.1 From daaa7d647f81f3f1494d9a9029d611b666d63181 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Nov 2015 23:32:40 +0100 Subject: netfilter: ipv6: avoid nf_iterate recursion The previous patch changed nf_ct_frag6_gather() to morph reassembled skb with the previous one. This means that the return value is always NULL or the skb argument. So change it to an err value. Instead of invoking NF_HOOK recursively with threshold to skip already-called hooks we can now just return NF_ACCEPT to move on to the next hook except for -EINPROGRESS (which means skb has been queued for reassembly), in which case we return NF_STOLEN. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_conntrack_reasm.c | 71 ++++++++++++++----------------- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 14 +++--- net/openvswitch/conntrack.c | 11 +++-- 3 files changed, 41 insertions(+), 55 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 1a86a08..912bc3a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -361,14 +361,15 @@ err: /* * Check if this packet is complete. - * Returns NULL on failure by any reason, and pointer - * to current nexthdr field in reassembled frame. * * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. + * + * returns true if *prev skb has been transformed into the reassembled + * skb, false otherwise. */ -static struct sk_buff * +static bool nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { struct sk_buff *fp, *head = fq->q.fragments; @@ -382,22 +383,21 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) - goto out_fail; + return false; /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { - pr_debug("payload len is too large.\n"); - goto out_oversize; + net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", + payload_len); + return false; } /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) { - pr_debug("skb is cloned but can't expand head"); - goto out_oom; - } + if (skb_unclone(head, GFP_ATOMIC)) + return false; /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part @@ -408,7 +408,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic clone = alloc_skb(0, GFP_ATOMIC); if (clone == NULL) - goto out_oom; + return false; clone->next = head->next; head->next = clone; @@ -438,7 +438,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic fp = skb_clone(prev, GFP_ATOMIC); if (!fp) - goto out_oom; + return false; fp->next = prev->next; skb_queue_walk(head, iter) { @@ -494,16 +494,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic fq->q.fragments = NULL; fq->q.fragments_tail = NULL; - return head; - -out_oversize: - net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", - payload_len); - goto out_fail; -out_oom: - net_dbg_ratelimited("nf_ct_frag6_reasm: no memory for reassembly\n"); -out_fail: - return NULL; + return true; } /* @@ -569,27 +560,26 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) return 0; } -struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) +int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { struct net_device *dev = skb->dev; + int fhoff, nhoff, ret; struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr; - int fhoff, nhoff; u8 prevhdr; - struct sk_buff *ret_skb = NULL; /* Jumbo payload inhibits frag. header */ if (ipv6_hdr(skb)->payload_len == 0) { pr_debug("payload len = 0\n"); - return skb; + return -EINVAL; } if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) - return skb; + return -EINVAL; if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr))) - return skb; + return -ENOMEM; skb_set_transport_header(skb, fhoff); hdr = ipv6_hdr(skb); @@ -598,27 +588,28 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); if (fq == NULL) - return skb; + return -ENOMEM; spin_lock_bh(&fq->q.lock); if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) { - spin_unlock_bh(&fq->q.lock); - pr_debug("Can't insert skb to queue\n"); - inet_frag_put(&fq->q, &nf_frags); - return skb; + ret = -EINVAL; + goto out_unlock; } + /* after queue has assumed skb ownership, only 0 or -EINPROGRESS + * must be returned. + */ + ret = -EINPROGRESS; if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len) { - ret_skb = nf_ct_frag6_reasm(fq, skb, dev); - if (ret_skb == NULL) - pr_debug("Can't reassemble fragmented packets\n"); - } - spin_unlock_bh(&fq->q.lock); + fq->q.meat == fq->q.len && + nf_ct_frag6_reasm(fq, skb, dev)) + ret = 0; +out_unlock: + spin_unlock_bh(&fq->q.lock); inet_frag_put(&fq->q, &nf_frags); - return ret_skb; + return ret; } EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index fb96b10..f7aab5a 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -55,7 +55,7 @@ static unsigned int ipv6_defrag(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - struct sk_buff *reasm; + int err; #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Previously seen (loopback)? */ @@ -63,17 +63,13 @@ static unsigned int ipv6_defrag(void *priv, return NF_ACCEPT; #endif - reasm = nf_ct_frag6_gather(state->net, skb, - nf_ct6_defrag_user(state->hook, skb)); + err = nf_ct_frag6_gather(state->net, skb, + nf_ct6_defrag_user(state->hook, skb)); /* queued */ - if (reasm == NULL) + if (err == -EINPROGRESS) return NF_STOLEN; - NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm, - state->in, state->out, - state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); - - return NF_STOLEN; + return NF_ACCEPT; } static struct nf_hook_ops ipv6_defrag_ops[] = { diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index cac2169..0c68c8e 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -300,10 +300,10 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, u16 zone, struct sk_buff *skb) { struct ovs_skb_cb ovs_cb = *OVS_CB(skb); + int err; if (key->eth.type == htons(ETH_P_IP)) { enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; - int err; memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); err = ip_defrag(net, skb, user); @@ -314,14 +314,13 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) } else if (key->eth.type == htons(ETH_P_IPV6)) { enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; - struct sk_buff *reasm; memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); - reasm = nf_ct_frag6_gather(net, skb, user); - if (!reasm) - return -EINPROGRESS; + err = nf_ct_frag6_gather(net, skb, user); + if (err) + return err; - key->ip.proto = ipv6_hdr(reasm)->nexthdr; + key->ip.proto = ipv6_hdr(skb)->nexthdr; ovs_cb.mru = IP6CB(skb)->frag_max_size; #endif } else { -- cgit v1.1 From 1113ebbcf9e43c80fe5ef05c48b4cd1c25b306b2 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:24 +0100 Subject: net: ipmr: move the tbl id check in ipmr_new_table Move the table id check in ipmr_new_table and make it return error pointer. We need this change for the upcoming netlink table manipulation support in order to avoid code duplication and a race condition. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 92dd4b7..5271e2e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -252,8 +252,8 @@ static int __net_init ipmr_rules_init(struct net *net) INIT_LIST_HEAD(&net->ipv4.mr_tables); mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); - if (!mrt) { - err = -ENOMEM; + if (IS_ERR(mrt)) { + err = PTR_ERR(mrt); goto err1; } @@ -301,8 +301,13 @@ static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, static int __net_init ipmr_rules_init(struct net *net) { - net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); - return net->ipv4.mrt ? 0 : -ENOMEM; + struct mr_table *mrt; + + mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); + if (IS_ERR(mrt)) + return PTR_ERR(mrt); + net->ipv4.mrt = mrt; + return 0; } static void __net_exit ipmr_rules_exit(struct net *net) @@ -319,13 +324,17 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) struct mr_table *mrt; unsigned int i; + /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ + if (id != RT_TABLE_DEFAULT && id >= 1000000000) + return ERR_PTR(-EINVAL); + mrt = ipmr_get_table(net, id); if (mrt) return mrt; mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); if (!mrt) - return NULL; + return ERR_PTR(-ENOMEM); write_pnet(&mrt->net, net); mrt->id = id; @@ -1407,17 +1416,14 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi if (get_user(v, (u32 __user *)optval)) return -EFAULT; - /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ - if (v != RT_TABLE_DEFAULT && v >= 1000000000) - return -EINVAL; - rtnl_lock(); ret = 0; if (sk == rtnl_dereference(mrt->mroute_sk)) { ret = -EBUSY; } else { - if (!ipmr_new_table(net, v)) - ret = -ENOMEM; + mrt = ipmr_new_table(net, v); + if (IS_ERR(mrt)) + ret = PTR_ERR(mrt); else raw_sk(sk)->ipmr_table = v; } -- cgit v1.1 From f3d431810e85bad13635669402ca1153bb7e398c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:25 +0100 Subject: net: ipmr: always define mroute_reg_vif_num Before mroute_reg_vif_num was defined only if any of the CONFIG_PIMSM_ options were set, but that's not really necessary as the size of the struct is the same in both cases (checked with pahole, both cases size is 3256 bytes) and we can remove some unnecessary ifdefs to simplify the code. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5271e2e..dd2462f 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -84,9 +84,7 @@ struct mr_table { atomic_t cache_resolve_queue_len; bool mroute_do_assert; bool mroute_do_pim; -#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) int mroute_reg_vif_num; -#endif }; struct ipmr_rule { @@ -347,9 +345,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, (unsigned long)mrt); -#ifdef CONFIG_IP_PIMSM mrt->mroute_reg_vif_num = -1; -#endif #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); #endif @@ -584,10 +580,8 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, return -EADDRNOTAVAIL; } -#ifdef CONFIG_IP_PIMSM if (vifi == mrt->mroute_reg_vif_num) mrt->mroute_reg_vif_num = -1; -#endif if (vifi + 1 == mrt->maxvif) { int tmp; @@ -824,10 +818,8 @@ static int vif_add(struct net *net, struct mr_table *mrt, /* And finish update writing critical data */ write_lock_bh(&mrt_lock); v->dev = dev; -#ifdef CONFIG_IP_PIMSM if (v->flags & VIFF_REGISTER) mrt->mroute_reg_vif_num = vifi; -#endif if (vifi+1 > mrt->maxvif) mrt->maxvif = vifi+1; write_unlock_bh(&mrt_lock); -- cgit v1.1 From c316c629f12e01e5d7710e456248a1ebef8426ef Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:26 +0100 Subject: net: ipmr: remove some pimsm ifdefs and simplify Add the helper pimsm_enabled() which replaces the old CONFIG_IP_PIMSM define and is used to check if any version of PIM-SM has been enabled. Use a single if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) for the pim-sm shared code. This is okay w.r.t IGMPMSG_WHOLEPKT because only a VIFF_REGISTER device can send such packet, and it can't be created if pimsm_enabled() is false. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 180 ++++++++++++++++++++++++++------------------------------ 1 file changed, 84 insertions(+), 96 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index dd2462f..e153ab7 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -67,10 +67,6 @@ #include #include -#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) -#define CONFIG_IP_PIMSM 1 -#endif - struct mr_table { struct list_head list; possible_net_t net; @@ -95,6 +91,11 @@ struct ipmr_result { struct mr_table *mrt; }; +static inline bool pimsm_enabled(void) +{ + return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); +} + /* Big lock, protecting vif table, mrt cache and mroute socket state. * Note that the changes are semaphored via rtnl_lock. */ @@ -454,8 +455,7 @@ failure: return NULL; } -#ifdef CONFIG_IP_PIMSM - +#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); @@ -552,6 +552,51 @@ failure: unregister_netdevice(dev); return NULL; } + +/* called with rcu_read_lock() */ +static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, + unsigned int pimlen) +{ + struct net_device *reg_dev = NULL; + struct iphdr *encap; + + encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); + /* + * Check that: + * a. packet is really sent to a multicast group + * b. packet is not a NULL-REGISTER + * c. packet is not truncated + */ + if (!ipv4_is_multicast(encap->daddr) || + encap->tot_len == 0 || + ntohs(encap->tot_len) + pimlen > skb->len) + return 1; + + read_lock(&mrt_lock); + if (mrt->mroute_reg_vif_num >= 0) + reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; + read_unlock(&mrt_lock); + + if (!reg_dev) + return 1; + + skb->mac_header = skb->network_header; + skb_pull(skb, (u8 *)encap - skb->data); + skb_reset_network_header(skb); + skb->protocol = htons(ETH_P_IP); + skb->ip_summed = CHECKSUM_NONE; + + skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); + + netif_rx(skb); + + return NET_RX_SUCCESS; +} +#else +static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) +{ + return NULL; +} #endif /** @@ -734,10 +779,10 @@ static int vif_add(struct net *net, struct mr_table *mrt, return -EADDRINUSE; switch (vifc->vifc_flags) { -#ifdef CONFIG_IP_PIMSM case VIFF_REGISTER: - /* - * Special Purpose VIF in PIM + if (!pimsm_enabled()) + return -EINVAL; + /* Special Purpose VIF in PIM * All the packets will be sent to the daemon */ if (mrt->mroute_reg_vif_num >= 0) @@ -752,7 +797,6 @@ static int vif_add(struct net *net, struct mr_table *mrt, return err; } break; -#endif case VIFF_TUNNEL: dev = ipmr_new_tunnel(net, vifc); if (!dev) @@ -942,34 +986,29 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, } } -/* - * Bounce a cache query up to mrouted. We could use netlink for this but mrouted - * expects the following bizarre scheme. +/* Bounce a cache query up to mrouted. We could use netlink for this but mrouted + * expects the following bizarre scheme. * - * Called under mrt_lock. + * Called under mrt_lock. */ - static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert) { - struct sk_buff *skb; const int ihl = ip_hdrlen(pkt); + struct sock *mroute_sk; struct igmphdr *igmp; struct igmpmsg *msg; - struct sock *mroute_sk; + struct sk_buff *skb; int ret; -#ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); else -#endif skb = alloc_skb(128, GFP_ATOMIC); if (!skb) return -ENOBUFS; -#ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) { /* Ugly, but we have no choice with this interface. * Duplicate old header, fix ihl, length etc. @@ -987,28 +1026,23 @@ static int ipmr_cache_report(struct mr_table *mrt, ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + sizeof(struct iphdr)); - } else -#endif - { - - /* Copy the IP header */ - - skb_set_network_header(skb, skb->len); - skb_put(skb, ihl); - skb_copy_to_linear_data(skb, pkt->data, ihl); - ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ - msg = (struct igmpmsg *)skb_network_header(skb); - msg->im_vif = vifi; - skb_dst_set(skb, dst_clone(skb_dst(pkt))); - - /* Add our header */ - - igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); - igmp->type = - msg->im_msgtype = assert; - igmp->code = 0; - ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ - skb->transport_header = skb->network_header; + } else { + /* Copy the IP header */ + skb_set_network_header(skb, skb->len); + skb_put(skb, ihl); + skb_copy_to_linear_data(skb, pkt->data, ihl); + /* Flag to the kernel this is a route add */ + ip_hdr(skb)->protocol = 0; + msg = (struct igmpmsg *)skb_network_header(skb); + msg->im_vif = vifi; + skb_dst_set(skb, dst_clone(skb_dst(pkt))); + /* Add our header */ + igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); + igmp->type = assert; + msg->im_msgtype = assert; + igmp->code = 0; + ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ + skb->transport_header = skb->network_header; } rcu_read_lock(); @@ -1020,7 +1054,6 @@ static int ipmr_cache_report(struct mr_table *mrt, } /* Deliver to mrouted */ - ret = sock_queue_rcv_skb(mroute_sk, skb); rcu_read_unlock(); if (ret < 0) { @@ -1377,11 +1410,12 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi mrt->mroute_do_assert = v; return 0; } -#ifdef CONFIG_IP_PIMSM case MRT_PIM: { int v; + if (!pimsm_enabled()) + return -ENOPROTOOPT; if (optlen != sizeof(v)) return -EINVAL; if (get_user(v, (int __user *)optval)) @@ -1397,7 +1431,6 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi rtnl_unlock(); return ret; } -#endif #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES case MRT_TABLE: { @@ -1452,9 +1485,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int return -ENOENT; if (optname != MRT_VERSION && -#ifdef CONFIG_IP_PIMSM optname != MRT_PIM && -#endif optname != MRT_ASSERT) return -ENOPROTOOPT; @@ -1467,14 +1498,15 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int if (put_user(olr, optlen)) return -EFAULT; - if (optname == MRT_VERSION) + if (optname == MRT_VERSION) { val = 0x0305; -#ifdef CONFIG_IP_PIMSM - else if (optname == MRT_PIM) + } else if (optname == MRT_PIM) { + if (!pimsm_enabled()) + return -ENOPROTOOPT; val = mrt->mroute_do_pim; -#endif - else + } else { val = mrt->mroute_do_assert; + } if (copy_to_user(optval, &val, olr)) return -EFAULT; return 0; @@ -1707,7 +1739,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, if (!vif->dev) goto out_free; -#ifdef CONFIG_IP_PIMSM if (vif->flags & VIFF_REGISTER) { vif->pkt_out++; vif->bytes_out += skb->len; @@ -1716,7 +1747,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); goto out_free; } -#endif if (vif->flags & VIFF_TUNNEL) { rt = ip_route_output_ports(net, &fl4, NULL, @@ -2047,48 +2077,6 @@ dont_forward: return 0; } -#ifdef CONFIG_IP_PIMSM -/* called with rcu_read_lock() */ -static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, - unsigned int pimlen) -{ - struct net_device *reg_dev = NULL; - struct iphdr *encap; - - encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); - /* - * Check that: - * a. packet is really sent to a multicast group - * b. packet is not a NULL-REGISTER - * c. packet is not truncated - */ - if (!ipv4_is_multicast(encap->daddr) || - encap->tot_len == 0 || - ntohs(encap->tot_len) + pimlen > skb->len) - return 1; - - read_lock(&mrt_lock); - if (mrt->mroute_reg_vif_num >= 0) - reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; - read_unlock(&mrt_lock); - - if (!reg_dev) - return 1; - - skb->mac_header = skb->network_header; - skb_pull(skb, (u8 *)encap - skb->data); - skb_reset_network_header(skb); - skb->protocol = htons(ETH_P_IP); - skb->ip_summed = CHECKSUM_NONE; - - skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev)); - - netif_rx(skb); - - return NET_RX_SUCCESS; -} -#endif - #ifdef CONFIG_IP_PIMSM_V1 /* * Handle IGMP messages of PIMv1 -- cgit v1.1 From 7ef8f65df976369588fa1b6466668b1b6a26eb3c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:27 +0100 Subject: net: ipmr: fix code and comment style Trivial code and comment style fixes, also removed some extra newlines, spaces and tabs. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 142 +++++++++++++++----------------------------------------- 1 file changed, 37 insertions(+), 105 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index e153ab7..286ede3 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -102,9 +102,7 @@ static inline bool pimsm_enabled(void) static DEFINE_RWLOCK(mrt_lock); -/* - * Multicast router control variables - */ +/* Multicast router control variables */ #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) @@ -393,8 +391,7 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) } } -static -struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) +static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) { struct net_device *dev; @@ -561,8 +558,7 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, struct iphdr *encap; encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); - /* - * Check that: + /* Check that: * a. packet is really sent to a multicast group * b. packet is not a NULL-REGISTER * c. packet is not truncated @@ -603,7 +599,6 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) * vif_delete - Delete a VIF entry * @notify: Set to 1, if the caller is a notifier_call */ - static int vif_delete(struct mr_table *mrt, int vifi, int notify, struct list_head *head) { @@ -673,7 +668,6 @@ static inline void ipmr_cache_free(struct mfc_cache *c) /* Destroy an unresolved cache entry, killing queued skbs * and reporting error to netlink readers. */ - static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) { struct net *net = read_pnet(&mrt->net); @@ -701,9 +695,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) ipmr_cache_free(c); } - /* Timer process for the unresolved queue. */ - static void ipmr_expire_process(unsigned long arg) { struct mr_table *mrt = (struct mr_table *)arg; @@ -743,7 +735,6 @@ out: } /* Fill oifs list. It is called under write locked mrt_lock. */ - static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, unsigned char *ttls) { @@ -808,7 +799,6 @@ static int vif_add(struct net *net, struct mr_table *mrt, return err; } break; - case VIFF_USE_IFINDEX: case 0: if (vifc->vifc_flags == VIFF_USE_IFINDEX) { @@ -928,9 +918,7 @@ skip: return ipmr_cache_find_any_parent(mrt, vifi); } -/* - * Allocate a multicast cache entry - */ +/* Allocate a multicast cache entry */ static struct mfc_cache *ipmr_cache_alloc(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); @@ -951,10 +939,7 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void) return c; } -/* - * A cache entry has gone into a resolved state from queued - */ - +/* A cache entry has gone into a resolved state from queued */ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, struct mfc_cache *uc, struct mfc_cache *c) { @@ -962,7 +947,6 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, struct nlmsgerr *e; /* Play the pending entries through our router */ - while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); @@ -1064,12 +1048,9 @@ static int ipmr_cache_report(struct mr_table *mrt, return ret; } -/* - * Queue a packet for resolution. It gets locked cache entry! - */ - -static int -ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) +/* Queue a packet for resolution. It gets locked cache entry! */ +static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, + struct sk_buff *skb) { bool found = false; int err; @@ -1087,7 +1068,6 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) if (!found) { /* Create a new entry if allowable */ - if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || (c = ipmr_cache_alloc_unres()) == NULL) { spin_unlock_bh(&mfc_unres_lock); @@ -1097,13 +1077,11 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) } /* Fill in the new cache entry */ - c->mfc_parent = -1; c->mfc_origin = iph->saddr; c->mfc_mcastgrp = iph->daddr; /* Reflect first query at mrouted. */ - err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); if (err < 0) { /* If the report failed throw the cache entry @@ -1125,7 +1103,6 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) } /* See if we can append the packet */ - if (c->mfc_un.unres.unresolved.qlen > 3) { kfree_skb(skb); err = -ENOBUFS; @@ -1138,9 +1115,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) return err; } -/* - * MFC cache manipulation by user space mroute daemon - */ +/* MFC cache manipulation by user space mroute daemon */ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) { @@ -1211,9 +1186,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, list_add_rcu(&c->list, &mrt->mfc_cache_array[line]); - /* - * Check to see if we resolved a queued list. If so we - * need to send on the frames and tidy up. + /* Check to see if we resolved a queued list. If so we + * need to send on the frames and tidy up. */ found = false; spin_lock_bh(&mfc_unres_lock); @@ -1238,10 +1212,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, return 0; } -/* - * Close the multicast socket, and clear the vif tables etc - */ - +/* Close the multicast socket, and clear the vif tables etc */ static void mroute_clean_tables(struct mr_table *mrt) { int i; @@ -1249,7 +1220,6 @@ static void mroute_clean_tables(struct mr_table *mrt) struct mfc_cache *c, *next; /* Shut down all active vif entries */ - for (i = 0; i < mrt->maxvif; i++) { if (!(mrt->vif_table[i].flags & VIFF_STATIC)) vif_delete(mrt, i, 0, &list); @@ -1257,7 +1227,6 @@ static void mroute_clean_tables(struct mr_table *mrt) unregister_netdevice_many(&list); /* Wipe the cache */ - for (i = 0; i < MFC_LINES; i++) { list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { if (c->mfc_flags & MFC_STATIC) @@ -1301,11 +1270,10 @@ static void mrtsock_destruct(struct sock *sk) rtnl_unlock(); } -/* - * Socket options and virtual interface manipulation. The whole - * virtual interface system is a complete heap, but unfortunately - * that's how BSD mrouted happens to think. Maybe one day with a proper - * MOSPF/PIM router set up we can clean this up. +/* Socket options and virtual interface manipulation. The whole + * virtual interface system is a complete heap, but unfortunately + * that's how BSD mrouted happens to think. Maybe one day with a proper + * MOSPF/PIM router set up we can clean this up. */ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) @@ -1373,10 +1341,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi rtnl_unlock(); return ret; - /* - * Manipulate the forwarding caches. These live - * in a sort of kernel/user symbiosis. - */ + /* Manipulate the forwarding caches. These live + * in a sort of kernel/user symbiosis. + */ case MRT_ADD_MFC: case MRT_DEL_MFC: parent = -1; @@ -1397,9 +1364,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi parent); rtnl_unlock(); return ret; - /* - * Control PIM assert. - */ + /* Control PIM assert. */ case MRT_ASSERT: { int v; @@ -1456,19 +1421,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi return ret; } #endif - /* - * Spurious command, or MRT_VERSION which you cannot - * set. - */ + /* Spurious command, or MRT_VERSION which you cannot set. */ default: return -ENOPROTOOPT; } } -/* - * Getsock opt support for the multicast routing system. - */ - +/* Getsock opt support for the multicast routing system. */ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) { int olr; @@ -1512,10 +1471,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int return 0; } -/* - * The IP multicast ioctl support routines. - */ - +/* The IP multicast ioctl support routines. */ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) { struct sioc_sg_req sr; @@ -1648,7 +1604,6 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) } #endif - static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); @@ -1670,17 +1625,14 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v return NOTIFY_DONE; } - static struct notifier_block ip_mr_notifier = { .notifier_call = ipmr_device_event, }; -/* - * Encapsulate a packet by attaching a valid IPIP header to it. - * This avoids tunnel drivers and other mess and gives us the speed so - * important for multicast video. +/* Encapsulate a packet by attaching a valid IPIP header to it. + * This avoids tunnel drivers and other mess and gives us the speed so + * important for multicast video. */ - static void ip_encap(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr) { @@ -1722,9 +1674,7 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk, return dst_output(net, sk, skb); } -/* - * Processing handlers for ipmr_forward - */ +/* Processing handlers for ipmr_forward */ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, int vifi) @@ -1773,7 +1723,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * allow to send ICMP, so that packets will disappear * to blackhole. */ - IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); ip_rt_put(rt); goto out_free; @@ -1805,8 +1754,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, IPCB(skb)->flags |= IPSKB_FORWARDED; - /* - * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally + /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally * not only before forwarding, but after forwarding on all output * interfaces. It is clear, if mrouter runs a multicasting * program, it should receive packets not depending to what interface @@ -1837,7 +1785,6 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) } /* "local" means that we should preserve one skb (for local delivery) */ - static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *cache, int local) @@ -1862,9 +1809,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, goto forward; } - /* - * Wrong interface: drop packet and (maybe) send PIM assert. - */ + /* Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif_table[vif].dev != skb->dev) { if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. @@ -1903,9 +1848,7 @@ forward: mrt->vif_table[vif].pkt_in++; mrt->vif_table[vif].bytes_in += skb->len; - /* - * Forward the frame - */ + /* Forward the frame */ if (cache->mfc_origin == htonl(INADDR_ANY) && cache->mfc_mcastgrp == htonl(INADDR_ANY)) { if (true_vifi >= 0 && @@ -1979,11 +1922,9 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) return mrt; } -/* - * Multicast packets for forwarding arrive here - * Called with rcu_read_lock(); +/* Multicast packets for forwarding arrive here + * Called with rcu_read_lock(); */ - int ip_mr_input(struct sk_buff *skb) { struct mfc_cache *cache; @@ -2034,9 +1975,7 @@ int ip_mr_input(struct sk_buff *skb) vif); } - /* - * No usable cache entry - */ + /* No usable cache entry */ if (!cache) { int vif; @@ -2078,10 +2017,7 @@ dont_forward: } #ifdef CONFIG_IP_PIMSM_V1 -/* - * Handle IGMP messages of PIMv1 - */ - +/* Handle IGMP messages of PIMv1 */ int pim_rcv_v1(struct sk_buff *skb) { struct igmphdr *pim; @@ -2406,9 +2342,8 @@ done: } #ifdef CONFIG_PROC_FS -/* - * The /proc interfaces to multicast routing : - * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif +/* The /proc interfaces to multicast routing : + * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif */ struct ipmr_vif_iter { struct seq_net_private p; @@ -2692,10 +2627,7 @@ static const struct net_protocol pim_protocol = { }; #endif - -/* - * Setup for IP multicast routing - */ +/* Setup for IP multicast routing */ static int __net_init ipmr_net_init(struct net *net) { int err; -- cgit v1.1 From fe9ef3ce395d06e4f17e5995ab8455b9627f3306 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:28 +0100 Subject: net: ipmr: make ip_mroute_getsockopt more understandable Use a switch to determine if optname is correct and set val accordingly. This produces a much more straight-forward and readable code. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 286ede3..694fecf 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1443,29 +1443,29 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int if (!mrt) return -ENOENT; - if (optname != MRT_VERSION && - optname != MRT_PIM && - optname != MRT_ASSERT) + switch (optname) { + case MRT_VERSION: + val = 0x0305; + break; + case MRT_PIM: + if (!pimsm_enabled()) + return -ENOPROTOOPT; + val = mrt->mroute_do_pim; + break; + case MRT_ASSERT: + val = mrt->mroute_do_assert; + break; + default: return -ENOPROTOOPT; + } if (get_user(olr, optlen)) return -EFAULT; - olr = min_t(unsigned int, olr, sizeof(int)); if (olr < 0) return -EINVAL; - if (put_user(olr, optlen)) return -EFAULT; - if (optname == MRT_VERSION) { - val = 0x0305; - } else if (optname == MRT_PIM) { - if (!pimsm_enabled()) - return -ENOPROTOOPT; - val = mrt->mroute_do_pim; - } else { - val = mrt->mroute_do_assert; - } if (copy_to_user(optval, &val, olr)) return -EFAULT; return 0; -- cgit v1.1 From 29c3f19739421cf749991cb8c693093b4ac58ad1 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:29 +0100 Subject: net: ipmr: drop an instance of CONFIG_IP_MROUTE_MULTIPLE_TABLES Trivial replace of ifdef with IS_BUILTIN(). Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 694fecf..a006d96 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1396,11 +1396,12 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi rtnl_unlock(); return ret; } -#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES case MRT_TABLE: { u32 v; + if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) + return -ENOPROTOOPT; if (optlen != sizeof(u32)) return -EINVAL; if (get_user(v, (u32 __user *)optval)) @@ -1420,7 +1421,6 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi rtnl_unlock(); return ret; } -#endif /* Spurious command, or MRT_VERSION which you cannot set. */ default: return -ENOPROTOOPT; -- cgit v1.1 From af623236a9f3a20aa2f15f03cf9fe7bfd13b8889 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:30 +0100 Subject: net: ipmr: drop ip_mr_init() mrt_cachep null check as we'll panic if it fails It's not necessary to check for null as SLAB_PANIC is used and we'll panic if the alloc fails, so just drop it. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a006d96..50aec31 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2677,8 +2677,6 @@ int __init ip_mr_init(void) sizeof(struct mfc_cache), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - if (!mrt_cachep) - return -ENOMEM; err = register_pernet_subsys(&ipmr_net_ops); if (err) -- cgit v1.1 From 29e97d214509ef4977838e073d30f6b16f75c6d5 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:31 +0100 Subject: net: ipmr: rearrange and cleanup setsockopt Take rtnl in the beginning unconditionally as most options already need it (one exception - MRT_DONE, see the comment inside), make the lock/unlock places central and move out the switch() local variables. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 191 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 107 insertions(+), 84 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 50aec31..e384f39 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1276,38 +1276,45 @@ static void mrtsock_destruct(struct sock *sk) * MOSPF/PIM router set up we can clean this up. */ -int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) +int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, + unsigned int optlen) { - int ret, parent = 0; - struct vifctl vif; - struct mfcctl mfc; struct net *net = sock_net(sk); + int val, ret = 0, parent = 0; struct mr_table *mrt; + struct vifctl vif; + struct mfcctl mfc; + u32 uval; + /* There's one exception to the lock - MRT_DONE which needs to unlock */ + rtnl_lock(); if (sk->sk_type != SOCK_RAW || - inet_sk(sk)->inet_num != IPPROTO_IGMP) - return -EOPNOTSUPP; + inet_sk(sk)->inet_num != IPPROTO_IGMP) { + ret = -EOPNOTSUPP; + goto out_unlock; + } mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); - if (!mrt) - return -ENOENT; - + if (!mrt) { + ret = -ENOENT; + goto out_unlock; + } if (optname != MRT_INIT) { if (sk != rcu_access_pointer(mrt->mroute_sk) && - !ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EACCES; + !ns_capable(net->user_ns, CAP_NET_ADMIN)) { + ret = -EACCES; + goto out_unlock; + } } switch (optname) { case MRT_INIT: if (optlen != sizeof(int)) - return -EINVAL; - - rtnl_lock(); - if (rtnl_dereference(mrt->mroute_sk)) { - rtnl_unlock(); - return -EADDRINUSE; - } + ret = -EINVAL; + if (rtnl_dereference(mrt->mroute_sk)) + ret = -EADDRINUSE; + if (ret) + break; ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { @@ -1317,30 +1324,41 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); } - rtnl_unlock(); - return ret; + break; case MRT_DONE: - if (sk != rcu_access_pointer(mrt->mroute_sk)) - return -EACCES; - return ip_ra_control(sk, 0, NULL); + if (sk != rcu_access_pointer(mrt->mroute_sk)) { + ret = -EACCES; + } else { + /* We need to unlock here because mrtsock_destruct takes + * care of rtnl itself and we can't change that due to + * the IP_ROUTER_ALERT setsockopt which runs without it. + */ + rtnl_unlock(); + ret = ip_ra_control(sk, 0, NULL); + goto out; + } + break; case MRT_ADD_VIF: case MRT_DEL_VIF: - if (optlen != sizeof(vif)) - return -EINVAL; - if (copy_from_user(&vif, optval, sizeof(vif))) - return -EFAULT; - if (vif.vifc_vifi >= MAXVIFS) - return -ENFILE; - rtnl_lock(); + if (optlen != sizeof(vif)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&vif, optval, sizeof(vif))) { + ret = -EFAULT; + break; + } + if (vif.vifc_vifi >= MAXVIFS) { + ret = -ENFILE; + break; + } if (optname == MRT_ADD_VIF) { ret = vif_add(net, mrt, &vif, sk == rtnl_dereference(mrt->mroute_sk)); } else { ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); } - rtnl_unlock(); - return ret; - + break; /* Manipulate the forwarding caches. These live * in a sort of kernel/user symbiosis. */ @@ -1349,82 +1367,87 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi parent = -1; case MRT_ADD_MFC_PROXY: case MRT_DEL_MFC_PROXY: - if (optlen != sizeof(mfc)) - return -EINVAL; - if (copy_from_user(&mfc, optval, sizeof(mfc))) - return -EFAULT; + if (optlen != sizeof(mfc)) { + ret = -EINVAL; + break; + } + if (copy_from_user(&mfc, optval, sizeof(mfc))) { + ret = -EFAULT; + break; + } if (parent == 0) parent = mfc.mfcc_parent; - rtnl_lock(); if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) ret = ipmr_mfc_delete(mrt, &mfc, parent); else ret = ipmr_mfc_add(net, mrt, &mfc, sk == rtnl_dereference(mrt->mroute_sk), parent); - rtnl_unlock(); - return ret; + break; /* Control PIM assert. */ case MRT_ASSERT: - { - int v; - if (optlen != sizeof(v)) - return -EINVAL; - if (get_user(v, (int __user *)optval)) - return -EFAULT; - mrt->mroute_do_assert = v; - return 0; - } + if (optlen != sizeof(val)) { + ret = -EINVAL; + break; + } + if (get_user(val, (int __user *)optval)) { + ret = -EFAULT; + break; + } + mrt->mroute_do_assert = val; + break; case MRT_PIM: - { - int v; - - if (!pimsm_enabled()) - return -ENOPROTOOPT; - if (optlen != sizeof(v)) - return -EINVAL; - if (get_user(v, (int __user *)optval)) - return -EFAULT; - v = !!v; + if (!pimsm_enabled()) { + ret = -ENOPROTOOPT; + break; + } + if (optlen != sizeof(val)) { + ret = -EINVAL; + break; + } + if (get_user(val, (int __user *)optval)) { + ret = -EFAULT; + break; + } - rtnl_lock(); - ret = 0; - if (v != mrt->mroute_do_pim) { - mrt->mroute_do_pim = v; - mrt->mroute_do_assert = v; + val = !!val; + if (val != mrt->mroute_do_pim) { + mrt->mroute_do_pim = val; + mrt->mroute_do_assert = val; } - rtnl_unlock(); - return ret; - } + break; case MRT_TABLE: - { - u32 v; - - if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) - return -ENOPROTOOPT; - if (optlen != sizeof(u32)) - return -EINVAL; - if (get_user(v, (u32 __user *)optval)) - return -EFAULT; + if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) { + ret = -ENOPROTOOPT; + break; + } + if (optlen != sizeof(uval)) { + ret = -EINVAL; + break; + } + if (get_user(uval, (u32 __user *)optval)) { + ret = -EFAULT; + break; + } - rtnl_lock(); - ret = 0; if (sk == rtnl_dereference(mrt->mroute_sk)) { ret = -EBUSY; } else { - mrt = ipmr_new_table(net, v); + mrt = ipmr_new_table(net, uval); if (IS_ERR(mrt)) ret = PTR_ERR(mrt); else - raw_sk(sk)->ipmr_table = v; + raw_sk(sk)->ipmr_table = uval; } - rtnl_unlock(); - return ret; - } + break; /* Spurious command, or MRT_VERSION which you cannot set. */ default: - return -ENOPROTOOPT; + ret = -ENOPROTOOPT; } +out_unlock: + rtnl_unlock(); +out: + return ret; } /* Getsock opt support for the multicast routing system. */ -- cgit v1.1 From a0b477366a9550ae46f78caa9e55de34fac4ba9c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 15:57:32 +0100 Subject: net: ipmr: factor out common vif init code Factor out common vif init code used in both tunnel and pimreg initialization and create ipmr_init_vif_indev() function. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index e384f39..a2d248d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -391,6 +391,23 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) } } +/* Initialize ipmr pimreg/tunnel in_device */ +static bool ipmr_init_vif_indev(const struct net_device *dev) +{ + struct in_device *in_dev; + + ASSERT_RTNL(); + + in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) + return false; + ipv4_devconf_setall(in_dev); + neigh_parms_data_state_setall(in_dev->arp_parms); + IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; + + return true; +} + static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) { struct net_device *dev; @@ -402,7 +419,6 @@ static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) int err; struct ifreq ifr; struct ip_tunnel_parm p; - struct in_device *in_dev; memset(&p, 0, sizeof(p)); p.iph.daddr = v->vifc_rmt_addr.s_addr; @@ -427,15 +443,8 @@ static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) if (err == 0 && (dev = __dev_get_by_name(net, p.name)) != NULL) { dev->flags |= IFF_MULTICAST; - - in_dev = __in_dev_get_rtnl(dev); - if (!in_dev) + if (!ipmr_init_vif_indev(dev)) goto failure; - - ipv4_devconf_setall(in_dev); - neigh_parms_data_state_setall(in_dev->arp_parms); - IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; - if (dev_open(dev)) goto failure; dev_hold(dev); @@ -502,7 +511,6 @@ static void reg_vif_setup(struct net_device *dev) static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) { struct net_device *dev; - struct in_device *in_dev; char name[IFNAMSIZ]; if (mrt->id == RT_TABLE_DEFAULT) @@ -522,18 +530,8 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) return NULL; } - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - if (!in_dev) { - rcu_read_unlock(); + if (!ipmr_init_vif_indev(dev)) goto failure; - } - - ipv4_devconf_setall(in_dev); - neigh_parms_data_state_setall(in_dev->arp_parms); - IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0; - rcu_read_unlock(); - if (dev_open(dev)) goto failure; -- cgit v1.1 From 4dd191bb6195641edbc527a8495b7b1b816a41e6 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 21 Nov 2015 18:28:05 +0100 Subject: net: atm: constify in_cache_ops and eg_cache_ops structures The in_cache_ops and eg_cache_ops structures are never modified, so declare them as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/atm/mpc.h | 4 ++-- net/atm/mpoa_caches.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/atm/mpc.h b/net/atm/mpc.h index 0919a88b..cfc7b74 100644 --- a/net/atm/mpc.h +++ b/net/atm/mpc.h @@ -21,11 +21,11 @@ struct mpoa_client { uint8_t our_ctrl_addr[ATM_ESA_LEN]; /* MPC's control ATM address */ rwlock_t ingress_lock; - struct in_cache_ops *in_ops; /* ingress cache operations */ + const struct in_cache_ops *in_ops; /* ingress cache operations */ in_cache_entry *in_cache; /* the ingress cache of this MPC */ rwlock_t egress_lock; - struct eg_cache_ops *eg_ops; /* egress cache operations */ + const struct eg_cache_ops *eg_ops; /* egress cache operations */ eg_cache_entry *eg_cache; /* the egress cache of this MPC */ uint8_t *mps_macs; /* array of MPS MAC addresses, >=1 */ diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c index d1b2d9a..9e60e74 100644 --- a/net/atm/mpoa_caches.c +++ b/net/atm/mpoa_caches.c @@ -534,7 +534,7 @@ static void eg_destroy_cache(struct mpoa_client *mpc) } -static struct in_cache_ops ingress_ops = { +static const struct in_cache_ops ingress_ops = { in_cache_add_entry, /* add_entry */ in_cache_get, /* get */ in_cache_get_with_mask, /* get_with_mask */ @@ -548,7 +548,7 @@ static struct in_cache_ops ingress_ops = { in_destroy_cache /* destroy_cache */ }; -static struct eg_cache_ops egress_ops = { +static const struct eg_cache_ops egress_ops = { eg_cache_add_entry, /* add_entry */ eg_cache_get_by_cache_id, /* get_by_cache_id */ eg_cache_get_by_tag, /* get_by_tag */ -- cgit v1.1 From 3b22dae38db1cea9ead3229f08cfb0b69aca5706 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 21 Nov 2015 18:39:17 +0100 Subject: VSOCK: constify vmci_transport_notify_ops structures The vmci_transport_notify_ops structures are never modified, so declare them as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.h | 2 +- net/vmw_vsock/vmci_transport_notify.c | 2 +- net/vmw_vsock/vmci_transport_notify.h | 5 +++-- net/vmw_vsock/vmci_transport_notify_qstate.c | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h index 2ad46f3..1820e74 100644 --- a/net/vmw_vsock/vmci_transport.h +++ b/net/vmw_vsock/vmci_transport.h @@ -121,7 +121,7 @@ struct vmci_transport { u64 queue_pair_max_size; u32 detach_sub_id; union vmci_transport_notify notify; - struct vmci_transport_notify_ops *notify_ops; + const struct vmci_transport_notify_ops *notify_ops; struct list_head elem; struct sock *sk; spinlock_t lock; /* protects sk. */ diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c index 9b7f207..fd8cf02 100644 --- a/net/vmw_vsock/vmci_transport_notify.c +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -661,7 +661,7 @@ static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) } /* Socket control packet based operations. */ -struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { +const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { vmci_transport_notify_pkt_socket_init, vmci_transport_notify_pkt_socket_destruct, vmci_transport_notify_pkt_poll_in, diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h index 7df7932..3c464d3 100644 --- a/net/vmw_vsock/vmci_transport_notify.h +++ b/net/vmw_vsock/vmci_transport_notify.h @@ -77,7 +77,8 @@ struct vmci_transport_notify_ops { void (*process_negotiate) (struct sock *sk); }; -extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; -extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; +extern const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; +extern const +struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; #endif /* __VMCI_TRANSPORT_NOTIFY_H__ */ diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c index dc9c792..21e591d 100644 --- a/net/vmw_vsock/vmci_transport_notify_qstate.c +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -419,7 +419,7 @@ vmci_transport_notify_pkt_send_pre_enqueue( } /* Socket always on control packet based operations. */ -struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { +const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { vmci_transport_notify_pkt_socket_init, vmci_transport_notify_pkt_socket_destruct, vmci_transport_notify_pkt_poll_in, -- cgit v1.1 From 85beabfeca5343b86057c0d588e33f7975684d37 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 24 Nov 2015 12:34:49 +0100 Subject: net: dsa: include gpio consumer header file After the introduction of the switch gpio reset API, I'm getting build errors in configurations that disable CONFIG_GPIOLIB: net/dsa/dsa.c:783:16: error: implicit declaration of function 'gpio_to_desc' [-Werror=implicit-function-declaration] The reason is that linux/gpio/consumer.h is not automatically included without gpiolib support. This adds an explicit #include statement to make it compile in all configurations. The reset functionality will not work without gpiolib, which is what you get when disabling the feature. As far as I can tell, gpiolib is supported on all architectures on which you can have DSA at the moment. Signed-off-by: Arnd Bergmann Fixes: cc30c16344fc ("net: dsa: Add support for a switch reset gpio") Acked-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/dsa.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 0b5565f..b7448c8 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "dsa_priv.h" char dsa_driver_version[] = "0.1"; -- cgit v1.1 From 6c1c36b02c325ecebce6e3b34bce7f1dfe012cf9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 Nov 2015 14:08:23 +0100 Subject: net/ipv4/ipconfig: Rejoin broken lines in console output Commit 09605cc12c078306 ("net ipv4: use preferred log methods") replaced a few calls of pr_cont() after a console print without a trailing newline by pr_info(), causing lines to be split during IP autoconfiguration, like: . , OK IP-Config: Got DHCP answer from 192.168.97.254, my address is 192.168.97.44 Convert these back to using pr_cont(), so it prints again: ., OK IP-Config: Got DHCP answer from 192.168.97.254, my address is 192.168.97.44 Absorb the printing of "my address ..." into the previous call to pr_info(), as there's no reason to use a continuation there. Convert one more pr_info() to print nameservers while we're at it. Fixes: 09605cc12c078306 ("net ipv4: use preferred log methods") Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index e86e8a9..67f7c9d 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1239,13 +1239,13 @@ static int __init ic_dynamic(void) (ic_proto_enabled & IC_USE_DHCP) && ic_dhcp_msgtype != DHCPACK) { ic_got_reply = 0; - pr_notice(","); + pr_cont(","); continue; } #endif /* IPCONFIG_DHCP */ if (ic_got_reply) { - pr_notice(" OK\n"); + pr_cont(" OK\n"); break; } @@ -1253,7 +1253,7 @@ static int __init ic_dynamic(void) continue; if (! --retries) { - pr_notice(" timed out!\n"); + pr_cont(" timed out!\n"); break; } @@ -1263,7 +1263,7 @@ static int __init ic_dynamic(void) if (timeout > CONF_TIMEOUT_MAX) timeout = CONF_TIMEOUT_MAX; - pr_notice("."); + pr_cont("."); } #ifdef IPCONFIG_BOOTP @@ -1280,11 +1280,10 @@ static int __init ic_dynamic(void) return -1; } - pr_info("IP-Config: Got %s answer from %pI4, ", + pr_info("IP-Config: Got %s answer from %pI4, my address is %pI4\n", ((ic_got_reply & IC_RARP) ? "RARP" : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), - &ic_addrservaddr); - pr_info("my address is %pI4\n", &ic_myaddr); + &ic_addrservaddr, &ic_myaddr); return 0; } @@ -1527,14 +1526,14 @@ static int __init ip_auto_config(void) pr_cont(", mtu=%d", ic_dev_mtu); for (i = 0; i < CONF_NAMESERVERS_MAX; i++) if (ic_nameservers[i] != NONE) { - pr_info(" nameserver%u=%pI4", + pr_cont(" nameserver%u=%pI4", i, &ic_nameservers[i]); break; } for (i++; i < CONF_NAMESERVERS_MAX; i++) if (ic_nameservers[i] != NONE) - pr_info(", nameserver%u=%pI4", i, &ic_nameservers[i]); - pr_info("\n"); + pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]); + pr_cont("\n"); #endif /* !SILENT */ return 0; -- cgit v1.1 From f13f2aeed154da8e48f90b85e720f8ba39b1e881 Mon Sep 17 00:00:00 2001 From: Philip Whineray Date: Sun, 22 Nov 2015 11:35:07 +0000 Subject: netfilter: Set /proc/net entries owner to root in namespace Various files are owned by root with 0440 permission. Reading them is impossible in an unprivileged user namespace, interfering with firewall tools. For instance, iptables-save relies on /proc/net/ip_tables_names contents to dump only loaded tables. This patch assigned ownership of the following files to root in the current namespace: - /proc/net/*_tables_names - /proc/net/*_tables_matches - /proc/net/*_tables_targets - /proc/net/nf_conntrack - /proc/net/nf_conntrack_expect - /proc/net/netfilter/nfnetlink_log A mapping for root must be available, so this order should be followed: unshare(CLONE_NEWUSER); /* Setup the mapping */ unshare(CLONE_NEWNET); Signed-off-by: Philip Whineray Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_expect.c | 7 +++++++ net/netfilter/nf_conntrack_standalone.c | 7 +++++++ net/netfilter/nfnetlink_log.c | 15 +++++++++++++-- net/netfilter/x_tables.c | 12 ++++++++++++ 4 files changed, 39 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index acf5c7b..278927a 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -596,11 +596,18 @@ static int exp_proc_init(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_PROCFS struct proc_dir_entry *proc; + kuid_t root_uid; + kgid_t root_gid; proc = proc_create("nf_conntrack_expect", 0440, net->proc_net, &exp_file_ops); if (!proc) return -ENOMEM; + + root_uid = make_kuid(net->user_ns, 0); + root_gid = make_kgid(net->user_ns, 0); + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); #endif /* CONFIG_NF_CONNTRACK_PROCFS */ return 0; } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 1fb3cac..0f1a45b 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -392,11 +392,18 @@ static const struct file_operations ct_cpu_seq_fops = { static int nf_conntrack_standalone_init_proc(struct net *net) { struct proc_dir_entry *pde; + kuid_t root_uid; + kgid_t root_gid; pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops); if (!pde) goto out_nf_conntrack; + root_uid = make_kuid(net->user_ns, 0); + root_gid = make_kgid(net->user_ns, 0); + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(pde, root_uid, root_gid); + pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat, &ct_cpu_seq_fops); if (!pde) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 740cce4..dea4676 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -1064,15 +1064,26 @@ static int __net_init nfnl_log_net_init(struct net *net) { unsigned int i; struct nfnl_log_net *log = nfnl_log_pernet(net); +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc; + kuid_t root_uid; + kgid_t root_gid; +#endif for (i = 0; i < INSTANCE_BUCKETS; i++) INIT_HLIST_HEAD(&log->instance_table[i]); spin_lock_init(&log->instances_lock); #ifdef CONFIG_PROC_FS - if (!proc_create("nfnetlink_log", 0440, - net->nf.proc_netfilter, &nful_file_ops)) + proc = proc_create("nfnetlink_log", 0440, + net->nf.proc_netfilter, &nful_file_ops); + if (!proc) return -ENOMEM; + + root_uid = make_kuid(net->user_ns, 0); + root_gid = make_kgid(net->user_ns, 0); + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); #endif return 0; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index d4aaad7..c8a0b7d 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -1226,6 +1227,8 @@ int xt_proto_init(struct net *net, u_int8_t af) #ifdef CONFIG_PROC_FS char buf[XT_FUNCTION_MAXNAMELEN]; struct proc_dir_entry *proc; + kuid_t root_uid; + kgid_t root_gid; #endif if (af >= ARRAY_SIZE(xt_prefix)) @@ -1233,12 +1236,17 @@ int xt_proto_init(struct net *net, u_int8_t af) #ifdef CONFIG_PROC_FS + root_uid = make_kuid(net->user_ns, 0); + root_gid = make_kgid(net->user_ns, 0); + strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); proc = proc_create_data(buf, 0440, net->proc_net, &xt_table_ops, (void *)(unsigned long)af); if (!proc) goto out; + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); @@ -1246,6 +1254,8 @@ int xt_proto_init(struct net *net, u_int8_t af) (void *)(unsigned long)af); if (!proc) goto out_remove_tables; + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); @@ -1253,6 +1263,8 @@ int xt_proto_init(struct net *net, u_int8_t af) (void *)(unsigned long)af); if (!proc) goto out_remove_matches; + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); #endif return 0; -- cgit v1.1 From 7ec3f7b47b8d9ad7ba425726f2c58f9ddce040df Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 24 Nov 2015 10:00:22 +0000 Subject: netfilter: nft_payload: add packet mangling support Add support for mangling packet payload. Checksum for the specified base header is updated automatically if requested, however no updates for any kind of pseudo headers are supported, meaning no stateless NAT is supported. For checksum updates different checksumming methods can be specified. The currently supported methods are NONE for no checksum updates, and INET for internet type checksums. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_payload.c | 135 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 129 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 09b4b07..12cd4bf 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -107,10 +107,13 @@ err: } static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { - [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_SREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 }, }; static int nft_payload_init(const struct nft_ctx *ctx, @@ -160,6 +163,118 @@ const struct nft_expr_ops nft_payload_fast_ops = { .dump = nft_payload_dump, }; +static void nft_payload_set_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_payload_set *priv = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + const u32 *src = ®s->data[priv->sreg]; + int offset, csum_offset; + __wsum fsum, tsum; + __sum16 sum; + + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: + if (!skb_mac_header_was_set(skb)) + goto err; + offset = skb_mac_header(skb) - skb->data; + break; + case NFT_PAYLOAD_NETWORK_HEADER: + offset = skb_network_offset(skb); + break; + case NFT_PAYLOAD_TRANSPORT_HEADER: + offset = pkt->xt.thoff; + break; + default: + BUG(); + } + + csum_offset = offset + priv->csum_offset; + offset += priv->offset; + + if (priv->csum_type == NFT_PAYLOAD_CSUM_INET && + (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER || + skb->ip_summed != CHECKSUM_PARTIAL)) { + if (skb_copy_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) + goto err; + + fsum = skb_checksum(skb, offset, priv->len, 0); + tsum = csum_partial(src, priv->len, 0); + sum = csum_fold(csum_add(csum_sub(~csum_unfold(sum), fsum), + tsum)); + if (sum == 0) + sum = CSUM_MANGLED_0; + + if (!skb_make_writable(skb, csum_offset + sizeof(sum)) || + skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) + goto err; + } + + if (!skb_make_writable(skb, max(offset + priv->len, 0)) || + skb_store_bits(skb, offset, src, priv->len) < 0) + goto err; + + return; +err: + regs->verdict.code = NFT_BREAK; +} + +static int nft_payload_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_payload_set *priv = nft_expr_priv(expr); + + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]); + + if (tb[NFTA_PAYLOAD_CSUM_TYPE]) + priv->csum_type = + ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE])); + if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) + priv->csum_offset = + ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET])); + + switch (priv->csum_type) { + case NFT_PAYLOAD_CSUM_NONE: + case NFT_PAYLOAD_CSUM_INET: + break; + default: + return -EOPNOTSUPP; + } + + return nft_validate_register_load(priv->sreg, priv->len); +} + +static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_payload_set *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_PAYLOAD_SREG, priv->sreg) || + nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) || + nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) || + nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) || + nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) || + nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET, + htonl(priv->csum_offset))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static const struct nft_expr_ops nft_payload_set_ops = { + .type = &nft_payload_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_payload_set)), + .eval = nft_payload_set_eval, + .init = nft_payload_set_init, + .dump = nft_payload_set_dump, +}; + static const struct nft_expr_ops * nft_payload_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -167,8 +282,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx, enum nft_payload_bases base; unsigned int offset, len; - if (tb[NFTA_PAYLOAD_DREG] == NULL || - tb[NFTA_PAYLOAD_BASE] == NULL || + if (tb[NFTA_PAYLOAD_BASE] == NULL || tb[NFTA_PAYLOAD_OFFSET] == NULL || tb[NFTA_PAYLOAD_LEN] == NULL) return ERR_PTR(-EINVAL); @@ -183,6 +297,15 @@ nft_payload_select_ops(const struct nft_ctx *ctx, return ERR_PTR(-EOPNOTSUPP); } + if (tb[NFTA_PAYLOAD_SREG] != NULL) { + if (tb[NFTA_PAYLOAD_DREG] != NULL) + return ERR_PTR(-EINVAL); + return &nft_payload_set_ops; + } + + if (tb[NFTA_PAYLOAD_DREG] == NULL) + return ERR_PTR(-EINVAL); + offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); -- cgit v1.1 From 1ce0bf50ae2233c7115a18c0c623662d177b434c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 26 Nov 2015 13:55:39 +0800 Subject: net: Generalise wq_has_sleeper helper The memory barrier in the helper wq_has_sleeper is needed by just about every user of waitqueue_active. This patch generalises it by making it take a wait_queue_head_t directly. The existing helper is renamed to skwq_has_sleeper. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/atm/common.c | 4 ++-- net/core/sock.c | 8 ++++---- net/core/stream.c | 2 +- net/dccp/output.c | 2 +- net/iucv/af_iucv.c | 2 +- net/rxrpc/af_rxrpc.c | 2 +- net/sctp/socket.c | 2 +- net/tipc/socket.c | 4 ++-- net/unix/af_unix.c | 2 +- 9 files changed, 14 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/atm/common.c b/net/atm/common.c index 49a872d..6dc1230 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -96,7 +96,7 @@ static void vcc_def_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up(&wq->wait); rcu_read_unlock(); } @@ -117,7 +117,7 @@ static void vcc_write_space(struct sock *sk) if (vcc_writable(sk)) { wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); diff --git a/net/core/sock.c b/net/core/sock.c index 1e4dd54..2769bd3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2283,7 +2283,7 @@ static void sock_def_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_all(&wq->wait); rcu_read_unlock(); } @@ -2294,7 +2294,7 @@ static void sock_def_error_report(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_poll(&wq->wait, POLLERR); sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); rcu_read_unlock(); @@ -2306,7 +2306,7 @@ static void sock_def_readable(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); @@ -2324,7 +2324,7 @@ static void sock_def_write_space(struct sock *sk) */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); diff --git a/net/core/stream.c b/net/core/stream.c index d70f77a0..8ff9d63 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -35,7 +35,7 @@ void sk_stream_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) diff --git a/net/dccp/output.c b/net/dccp/output.c index 4ce912e..b66c84d 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -201,7 +201,7 @@ void dccp_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index fcb2752..4f0aa91 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -303,7 +303,7 @@ static void iucv_sock_wake_msglim(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_all(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); rcu_read_unlock(); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 1f8a144..7e2d105 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -67,7 +67,7 @@ static void rxrpc_write_space(struct sock *sk) if (rxrpc_writable(sk)) { struct socket_wq *wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 897c01c..ec10b66 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6978,7 +6978,7 @@ void sctp_data_ready(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 552dbab..525acf6 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1492,7 +1492,7 @@ static void tipc_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); rcu_read_unlock(); @@ -1509,7 +1509,7 @@ static void tipc_data_ready(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); rcu_read_unlock(); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 955ec15..efb706e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -339,7 +339,7 @@ static void unix_write_space(struct sock *sk) rcu_read_lock(); if (unix_writable(sk)) { wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); -- cgit v1.1 From dfc3b0e89188e0dfe6eb12f9bb29c9dfc27bbda1 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 26 Nov 2015 15:23:44 +0100 Subject: net: remove unnecessary mroute.h includes It looks like many files are including mroute.h unnecessarily, so remove the include. Most importantly remove it from ipv6. CC: Hideaki YOSHIFUJI CC: Steffen Klassert CC: Herbert Xu Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 1 - net/ipv4/ip_output.c | 1 - net/ipv4/ip_tunnel.c | 1 - net/ipv4/ip_tunnel_core.c | 1 - net/ipv4/ip_vti.c | 1 - net/ipv4/ipip.c | 1 - net/ipv6/ip6_gre.c | 1 - 7 files changed, 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 6145214..04a48c0 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4233cbe..e0b94cd 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -76,7 +76,6 @@ #include #include #include -#include #include #include diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index cbb51f3..0f6e9ee 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6cb9009..1db8418 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 4d8f0b6..02d9c21 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index f34c31d..1f06729 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -103,7 +103,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3c7b931..938d03ce5 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include -- cgit v1.1 From 06bd6c0370bb88a2256c6763a32bc4e4ade06521 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 26 Nov 2015 15:23:45 +0100 Subject: net: ipmr: remove unused MFC_NOTIFY flag and make the flags enum MFC_NOTIFY was introduced in kernel 2.1.68 but afaik it hasn't been used and I couldn't find any users currently so just remove it. Only MFC_STATIC is left, so move it into an enum, add a description and use BIT(). Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a2d248d..a74e618 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2199,8 +2199,6 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, } read_lock(&mrt_lock); - if (!nowait && (rtm->rtm_flags & RTM_F_NOTIFY)) - cache->mfc_flags |= MFC_NOTIFY; err = __ipmr_fill_mroute(mrt, skb, cache, rtm); read_unlock(&mrt_lock); rcu_read_unlock(); -- cgit v1.1 From 5ea1f13299d8b8edcb2969eda4c81f8e3264b706 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 26 Nov 2015 15:23:47 +0100 Subject: net: ipmr: move struct mr_table and VIF_EXISTS to mroute.h Move the definitions of VIF_EXISTS() and struct mr_table to mroute.h Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a74e618..ff3dbbb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -67,22 +67,6 @@ #include #include -struct mr_table { - struct list_head list; - possible_net_t net; - u32 id; - struct sock __rcu *mroute_sk; - struct timer_list ipmr_expire_timer; - struct list_head mfc_unres_queue; - struct list_head mfc_cache_array[MFC_LINES]; - struct vif_device vif_table[MAXVIFS]; - int maxvif; - atomic_t cache_resolve_queue_len; - bool mroute_do_assert; - bool mroute_do_pim; - int mroute_reg_vif_num; -}; - struct ipmr_rule { struct fib_rule common; }; @@ -104,8 +88,6 @@ static DEFINE_RWLOCK(mrt_lock); /* Multicast router control variables */ -#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) - /* Special spinlock for queue of unresolved entries */ static DEFINE_SPINLOCK(mfc_unres_lock); -- cgit v1.1 From 1973a4ea6ceaa47671227c3077f90508ea30897b Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 26 Nov 2015 15:23:48 +0100 Subject: net: ipmr: move pimsm_enabled to pim.h and rename Move the inline pimsm_enabled() to pim.h and rename it to ipmr_pimsm_enabled to show it's for the ipv4 ipmr code since pim.h is used by IPv6 too. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ff3dbbb..322fdc6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -75,11 +75,6 @@ struct ipmr_result { struct mr_table *mrt; }; -static inline bool pimsm_enabled(void) -{ - return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); -} - /* Big lock, protecting vif table, mrt cache and mroute socket state. * Note that the changes are semaphored via rtnl_lock. */ @@ -751,7 +746,7 @@ static int vif_add(struct net *net, struct mr_table *mrt, switch (vifc->vifc_flags) { case VIFF_REGISTER: - if (!pimsm_enabled()) + if (!ipmr_pimsm_enabled()) return -EINVAL; /* Special Purpose VIF in PIM * All the packets will be sent to the daemon @@ -1377,7 +1372,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, mrt->mroute_do_assert = val; break; case MRT_PIM: - if (!pimsm_enabled()) { + if (!ipmr_pimsm_enabled()) { ret = -ENOPROTOOPT; break; } @@ -1451,7 +1446,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int val = 0x0305; break; case MRT_PIM: - if (!pimsm_enabled()) + if (!ipmr_pimsm_enabled()) return -ENOPROTOOPT; val = mrt->mroute_do_pim; break; -- cgit v1.1 From 42e6b89ce4e8a4f02a1e906694d81acf60db6f4d Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 26 Nov 2015 15:23:49 +0100 Subject: net: ipmr: fix setsockopt error return We can have both errors and we'll return the second one, fix it to return an error at a time as it's normal. I've overlooked this in my previous set. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 322fdc6..6c24a16 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1284,12 +1284,14 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, switch (optname) { case MRT_INIT: - if (optlen != sizeof(int)) + if (optlen != sizeof(int)) { ret = -EINVAL; - if (rtnl_dereference(mrt->mroute_sk)) + break; + } + if (rtnl_dereference(mrt->mroute_sk)) { ret = -EADDRINUSE; - if (ret) break; + } ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { -- cgit v1.1 From ccbb0aa62da7f4b765b3e311caf25ea43cc3d0ad Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 26 Nov 2015 15:23:50 +0100 Subject: net: ipmr: add mfc newroute/delroute netlink support This patch adds support to add and remove MFC entries. It uses the same attributes like the already present dump support in order to be consistent. There's one new entry - RTA_PREFSRC, it's used to denote an MFC_PROXY entry (see MRT_ADD_MFC vs MRT_ADD_MFC_PROXY). The already existing infrastructure is used to create and delete the entries, the netlink message gets converted internally to a struct mfcctl which is used with ipmr_mfc_add/delete. The other used attributes are: RTA_IIF - used for mfcc_parent (when adding it's required to be valid) RTA_SRC - used for mfcc_origin RTA_DST - used for mfcc_mcastgrp RTA_TABLE - the MRT table id RTA_MULTIPATH - the "oifs" ttl array Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 6c24a16..4c10ee7 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -66,6 +66,7 @@ #include #include #include +#include struct ipmr_rule { struct fib_rule common; @@ -2339,6 +2340,130 @@ done: return skb->len; } +static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { + [RTA_SRC] = { .type = NLA_U32 }, + [RTA_DST] = { .type = NLA_U32 }, + [RTA_IIF] = { .type = NLA_U32 }, + [RTA_TABLE] = { .type = NLA_U32 }, + [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, +}; + +static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol) +{ + switch (rtm_protocol) { + case RTPROT_STATIC: + case RTPROT_MROUTED: + return true; + } + return false; +} + +static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc) +{ + struct rtnexthop *rtnh = nla_data(nla); + int remaining = nla_len(nla), vifi = 0; + + while (rtnh_ok(rtnh, remaining)) { + mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops; + if (++vifi == MAXVIFS) + break; + rtnh = rtnh_next(rtnh, &remaining); + } + + return remaining > 0 ? -EINVAL : vifi; +} + +/* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */ +static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, + struct mfcctl *mfcc, int *mrtsock, + struct mr_table **mrtret) +{ + struct net_device *dev = NULL; + u32 tblid = RT_TABLE_DEFAULT; + struct mr_table *mrt; + struct nlattr *attr; + struct rtmsg *rtm; + int ret, rem; + + ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy); + if (ret < 0) + goto out; + rtm = nlmsg_data(nlh); + + ret = -EINVAL; + if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 || + rtm->rtm_type != RTN_MULTICAST || + rtm->rtm_scope != RT_SCOPE_UNIVERSE || + !ipmr_rtm_validate_proto(rtm->rtm_protocol)) + goto out; + + memset(mfcc, 0, sizeof(*mfcc)); + mfcc->mfcc_parent = -1; + ret = 0; + nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) { + switch (nla_type(attr)) { + case RTA_SRC: + mfcc->mfcc_origin.s_addr = nla_get_be32(attr); + break; + case RTA_DST: + mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); + break; + case RTA_IIF: + dev = __dev_get_by_index(net, nla_get_u32(attr)); + if (!dev) { + ret = -ENODEV; + goto out; + } + break; + case RTA_MULTIPATH: + if (ipmr_nla_get_ttls(attr, mfcc) < 0) { + ret = -EINVAL; + goto out; + } + break; + case RTA_PREFSRC: + ret = 1; + break; + case RTA_TABLE: + tblid = nla_get_u32(attr); + break; + } + } + mrt = ipmr_get_table(net, tblid); + if (!mrt) { + ret = -ENOENT; + goto out; + } + *mrtret = mrt; + *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; + if (dev) + mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); + +out: + return ret; +} + +/* takes care of both newroute and delroute */ +static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct net *net = sock_net(skb->sk); + int ret, mrtsock, parent; + struct mr_table *tbl; + struct mfcctl mfcc; + + mrtsock = 0; + tbl = NULL; + ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl); + if (ret < 0) + return ret; + + parent = ret ? mfcc.mfcc_parent : -1; + if (nlh->nlmsg_type == RTM_NEWROUTE) + return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); + else + return ipmr_mfc_delete(tbl, &mfcc, parent); +} + #ifdef CONFIG_PROC_FS /* The /proc interfaces to multicast routing : * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif @@ -2692,6 +2817,10 @@ int __init ip_mr_init(void) #endif rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute, NULL); + rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, + ipmr_rtm_route, NULL, NULL); + rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, + ipmr_rtm_route, NULL, NULL); return 0; #ifdef CONFIG_IP_PIMSM_V2 -- cgit v1.1 From 77b75f4d8cf105b599beef38724f8171e557919d Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Thu, 26 Nov 2015 19:23:15 +0000 Subject: unix: use wq_has_sleeper in unix_dgram_recvmsg The current unix_dgram_recvmsg does a wake up for every received datagram. This seems wasteful as only SOCK_DGRAM client sockets in an n:1 association with a server socket will ever wait because of the associated condition. The patch below changes the function such that the wake up only happens if wq_has_sleeper indicates that someone actually wants to be notified. Testing with SOCK_SEQPACKET and SOCK_DGRAM socket seems to confirm that this is an improvment. Signed-Off-By: Rainer Weikusat Signed-off-by: David S. Miller --- net/unix/af_unix.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index efb706e..ac011b9 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1914,8 +1914,10 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, goto out_unlock; } - wake_up_interruptible_sync_poll(&u->peer_wait, - POLLOUT | POLLWRNORM | POLLWRBAND); + if (wq_has_sleeper(&u->peer_wait)) + wake_up_interruptible_sync_poll(&u->peer_wait, + POLLOUT | POLLWRNORM | + POLLWRBAND); if (msg->msg_name) unix_copy_addr(msg, skb->sk); -- cgit v1.1 From 7450aaf61f0ae2ee6cc6491138d11df2c25e7609 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Nov 2015 08:57:28 -0800 Subject: tcp: suppress too verbose messages in tcp_send_ack() If tcp_send_ack() can not allocate skb, we properly handle this and setup a timer to try later. Use __GFP_NOWARN to avoid polluting syslog in the case host is under memory pressure, so that pertinent messages are not lost under a flood of useless information. sk_gfp_atomic() can use its gfp_mask argument (all callers currently were using GFP_ATOMIC before this patch) We rename sk_gfp_atomic() to sk_gfp_mask() to clearly express this function now takes into account its second argument (gfp_mask) Note that when tcp_transmit_skb() is called with clone_it set to false, we do not attempt memory allocations, so can pass a 0 gfp_mask, which most compilers can emit faster than a non zero or constant value. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 14 ++++++++------ net/ipv6/tcp_ipv6.c | 6 +++--- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index cb7ca56..a800cee 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2296,7 +2296,7 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, return; if (tcp_write_xmit(sk, cur_mss, nonagle, 0, - sk_gfp_atomic(sk, GFP_ATOMIC))) + sk_gfp_mask(sk, GFP_ATOMIC))) tcp_check_probe_timer(sk); } @@ -3352,8 +3352,9 @@ void tcp_send_ack(struct sock *sk) * tcp_transmit_skb() will set the ownership to this * sock. */ - buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); - if (!buff) { + buff = alloc_skb(MAX_TCP_HEADER, + sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN)); + if (unlikely(!buff)) { inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, @@ -3375,7 +3376,7 @@ void tcp_send_ack(struct sock *sk) /* Send it off, this clears delayed acks for us. */ skb_mstamp_get(&buff->skb_mstamp); - tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); + tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0); } EXPORT_SYMBOL_GPL(tcp_send_ack); @@ -3396,7 +3397,8 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) struct sk_buff *skb; /* We don't queue it, tcp_transmit_skb() sets ownership. */ - skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); + skb = alloc_skb(MAX_TCP_HEADER, + sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN)); if (!skb) return -1; @@ -3409,7 +3411,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); skb_mstamp_get(&skb->skb_mstamp); NET_INC_STATS(sock_net(sk), mib); - return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); + return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0); } void tcp_send_window_probe(struct sock *sk) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c5429a6..41bcd59 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1130,7 +1130,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * */ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, AF_INET6, key->key, key->keylen, - sk_gfp_atomic(sk, GFP_ATOMIC)); + sk_gfp_mask(sk, GFP_ATOMIC)); } #endif @@ -1146,7 +1146,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * /* Clone pktoptions received with SYN, if we own the req */ if (ireq->pktopts) { newnp->pktoptions = skb_clone(ireq->pktopts, - sk_gfp_atomic(sk, GFP_ATOMIC)); + sk_gfp_mask(sk, GFP_ATOMIC)); consume_skb(ireq->pktopts); ireq->pktopts = NULL; if (newnp->pktoptions) @@ -1212,7 +1212,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC)); + opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ struct dst_entry *dst = sk->sk_rx_dst; -- cgit v1.1 From b03804e7c3ad41c265c0ca21ddb306b252b4f99f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Dec 2015 12:12:03 +0100 Subject: net: Check CHANGEUPPER notifier return value switchdev drivers reflect the newly requested topology to hardware when CHANGEUPPER is received, after software links were already formed. However, the operation can fail and user will not be notified, as the return value of the notifier is not checked. Add this check and rollback software links if necessary. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 5df6cbc..939cd1b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5490,8 +5490,12 @@ static int __netdev_upper_dev_link(struct net_device *dev, goto rollback_lower_mesh; } - call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, - &changeupper_info.info); + ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + &changeupper_info.info); + ret = notifier_to_errno(ret); + if (ret) + goto rollback_lower_mesh; + return 0; rollback_lower_mesh: -- cgit v1.1 From 6dffb0447c25476f499d205dfceb1972e8dae919 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:10 +0100 Subject: net: propagate upper priv via netdev_master_upper_dev_link Eliminate netdev_master_upper_dev_link_private and pass priv directly as a parameter of netdev_master_upper_dev_link. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/batman-adv/hard-interface.c | 3 ++- net/bridge/br_if.c | 2 +- net/core/dev.c | 18 ++++++------------ net/openvswitch/vport-netdev.c | 2 +- 4 files changed, 10 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index f11345e..a7f4f10 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -464,7 +464,8 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->soft_iface = soft_iface; bat_priv = netdev_priv(hard_iface->soft_iface); - ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface); + ret = netdev_master_upper_dev_link(hard_iface->net_dev, + soft_iface, NULL); if (ret) goto err_dev; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ec02f586..781abc34 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -493,7 +493,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev->priv_flags |= IFF_BRIDGE_PORT; - err = netdev_master_upper_dev_link(dev, br->dev); + err = netdev_master_upper_dev_link(dev, br->dev, NULL); if (err) goto err5; diff --git a/net/core/dev.c b/net/core/dev.c index 939cd1b..27d052b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5421,7 +5421,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, - void *private) + void *upper_priv) { struct netdev_notifier_changeupper_info changeupper_info; struct netdev_adjacent *i, *j, *to_i, *to_j; @@ -5452,7 +5452,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (ret) return ret; - ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, + ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv, master); if (ret) return ret; @@ -5557,6 +5557,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * netdev_master_upper_dev_link - Add a master link to the upper device * @dev: device * @upper_dev: new upper device + * @upper_priv: upper device private * * Adds a link to device which is upper to this one. In this case, only * one master upper device can be linked, although other non-master devices @@ -5565,20 +5566,13 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * counts are adjusted and the function returns zero. */ int netdev_master_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + void *upper_priv) { - return __netdev_upper_dev_link(dev, upper_dev, true, NULL); + return __netdev_upper_dev_link(dev, upper_dev, true, upper_priv); } EXPORT_SYMBOL(netdev_master_upper_dev_link); -int netdev_master_upper_dev_link_private(struct net_device *dev, - struct net_device *upper_dev, - void *private) -{ - return __netdev_upper_dev_link(dev, upper_dev, true, private); -} -EXPORT_SYMBOL(netdev_master_upper_dev_link_private); - /** * netdev_upper_dev_unlink - Removes a link to upper device * @dev: device diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index b327368..3ee3df1 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -105,7 +105,7 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) rtnl_lock(); err = netdev_master_upper_dev_link(vport->dev, - get_dpdev(vport->dp)); + get_dpdev(vport->dp), NULL); if (err) goto error_unlock; -- cgit v1.1 From 29bf24afb29042f568fa67b1b0eee46796725ed2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:11 +0100 Subject: net: add possibility to pass information about upper device via notifier Sometimes the drivers and other code would find it handy to know some internal information about upper device being changed. So allow upper-code to pass information down to notifier listeners during linking. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/batman-adv/hard-interface.c | 2 +- net/bridge/br_if.c | 2 +- net/core/dev.c | 11 +++++++---- net/openvswitch/vport-netdev.c | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index a7f4f10..aa8867e 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -465,7 +465,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, bat_priv = netdev_priv(hard_iface->soft_iface); ret = netdev_master_upper_dev_link(hard_iface->net_dev, - soft_iface, NULL); + soft_iface, NULL, NULL); if (ret) goto err_dev; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 781abc34..8d1d4a2 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -493,7 +493,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev->priv_flags |= IFF_BRIDGE_PORT; - err = netdev_master_upper_dev_link(dev, br->dev, NULL); + err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL); if (err) goto err5; diff --git a/net/core/dev.c b/net/core/dev.c index 27d052b..8ed8866 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5421,7 +5421,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, - void *upper_priv) + void *upper_priv, void *upper_info) { struct netdev_notifier_changeupper_info changeupper_info; struct netdev_adjacent *i, *j, *to_i, *to_j; @@ -5445,6 +5445,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, changeupper_info.upper_dev = upper_dev; changeupper_info.master = master; changeupper_info.linking = true; + changeupper_info.upper_info = upper_info; ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, &changeupper_info.info); @@ -5549,7 +5550,7 @@ rollback_mesh: int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev) { - return __netdev_upper_dev_link(dev, upper_dev, false, NULL); + return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -5558,6 +5559,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * @dev: device * @upper_dev: new upper device * @upper_priv: upper device private + * @upper_info: upper info to be passed down via notifier * * Adds a link to device which is upper to this one. In this case, only * one master upper device can be linked, although other non-master devices @@ -5567,9 +5569,10 @@ EXPORT_SYMBOL(netdev_upper_dev_link); */ int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, - void *upper_priv) + void *upper_priv, void *upper_info) { - return __netdev_upper_dev_link(dev, upper_dev, true, upper_priv); + return __netdev_upper_dev_link(dev, upper_dev, true, + upper_priv, upper_info); } EXPORT_SYMBOL(netdev_master_upper_dev_link); diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 3ee3df1..8f4dd4c 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -105,7 +105,7 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) rtnl_lock(); err = netdev_master_upper_dev_link(vport->dev, - get_dpdev(vport->dp), NULL); + get_dpdev(vport->dp), NULL, NULL); if (err) goto error_unlock; -- cgit v1.1 From 04d482660a07039fc4e9a42bb3517db236d98f96 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:15 +0100 Subject: net: introduce change lower state notifier When lower device like bonding slave, team/bridge port, etc changes its state, it is useful for others to notice this change. Currently this is implemented specificly for bonding as NETDEV_BONDING_INFO notifier. This patch aims to replace this specific usage and make this more generic to be used for all upper-lower devices. Introduce NETDEV_CHANGELOWERSTATE netdev notifier type and netdev_lower_state_changed() helper. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 8ed8866..d1706e8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5756,6 +5756,26 @@ int dev_get_nest_level(struct net_device *dev, } EXPORT_SYMBOL(dev_get_nest_level); +/** + * netdev_lower_change - Dispatch event about lower device state change + * @lower_dev: device + * @lower_state_info: state to dispatch + * + * Send NETDEV_CHANGELOWERSTATE to netdev notifiers with info. + * The caller must hold the RTNL lock. + */ +void netdev_lower_state_changed(struct net_device *lower_dev, + void *lower_state_info) +{ + struct netdev_notifier_changelowerstate_info changelowerstate_info; + + ASSERT_RTNL(); + changelowerstate_info.lower_state_info = lower_state_info; + call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev, + &changelowerstate_info.info); +} +EXPORT_SYMBOL(netdev_lower_state_changed); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; -- cgit v1.1 From d6df198d924775e4751561cf60ef0294e95f74df Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 1 Dec 2015 22:45:15 +0100 Subject: net: ipv6: restrict hop_limit sysctl setting to range [1; 255] Setting a value bigger than 255 resulted in using only the lower eight bits of that value as it is assigned to the u8 header field. To avoid this unexpected result, reject such values. Setting a value of zero is technically possible, but hosts receiving such a packet have to treat it like hop_limit was set to one, according to RFC2460. Therefore I don't see a use-case for that. Setting a route's hop_limit to zero in iproute2 means to use the sysctl default, which is not the case here: Setting e.g. net.conf.eth0.hop_limit=0 will not make the kernel use net.conf.all.hop_limit for outgoing packets on eth0. To avoid these kinds of confusion, reject zero. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d84742f..a5de1a6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5200,6 +5200,20 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write, } static +int addrconf_sysctl_hop_limit(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table lctl; + int min_hl = 1, max_hl = 255; + + lctl = *ctl; + lctl.extra1 = &min_hl; + lctl.extra2 = &max_hl; + + return proc_dointvec_minmax(&lctl, write, buffer, lenp, ppos); +} + +static int addrconf_sysctl_mtu(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -5454,7 +5468,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.hop_limit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = addrconf_sysctl_hop_limit, }, { .procname = "mtu", -- cgit v1.1 From c89359a42e2a49656451569c382eed63e781153c Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 1 Dec 2015 22:18:11 -0800 Subject: mpls: support for dead routes Adds support for RTNH_F_DEAD and RTNH_F_LINKDOWN flags on mpls routes due to link events. Also adds code to ignore dead routes during route selection. Unlike ip routes, mpls routes are not deleted when the route goes dead. This is current mpls behaviour and this patch does not change that. With this patch however, routes will be marked dead. dead routes are not notified to userspace (this is consistent with ipv4 routes). dead routes: ----------- $ip -f mpls route show 100 nexthop as to 200 via inet 10.1.1.2 dev swp1 nexthop as to 700 via inet 10.1.1.6 dev swp2 $ip link set dev swp1 down $ip link show dev swp1 4: swp1: mtu 1500 qdisc pfifo_fast state DOWN mode DEFAULT group default qlen 1000 link/ether 00:02:00:00:00:01 brd ff:ff:ff:ff:ff:ff $ip -f mpls route show 100 nexthop as to 200 via inet 10.1.1.2 dev swp1 dead linkdown nexthop as to 700 via inet 10.1.1.6 dev swp2 linkdown routes: ---------------- $ip -f mpls route show 100 nexthop as to 200 via inet 10.1.1.2 dev swp1 nexthop as to 700 via inet 10.1.1.6 dev swp2 $ip link show dev swp1 4: swp1: mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000 link/ether 00:02:00:00:00:01 brd ff:ff:ff:ff:ff:ff /* carrier goes down */ $ip link show dev swp1 4: swp1: mtu 1500 qdisc pfifo_fast state DOWN mode DEFAULT group default qlen 1000 link/ether 00:02:00:00:00:01 brd ff:ff:ff:ff:ff:ff $ip -f mpls route show 100 nexthop as to 200 via inet 10.1.1.2 dev swp1 linkdown nexthop as to 700 via inet 10.1.1.6 dev swp2 Signed-off-by: Roopa Prabhu Acked-by: Robert Shearman Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 185 ++++++++++++++++++++++++++++++++++++++++++++-------- net/mpls/internal.h | 2 + 2 files changed, 159 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index c70d750..4b3b9b3 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -96,22 +96,15 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) } EXPORT_SYMBOL_GPL(mpls_pkt_too_big); -static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, - struct sk_buff *skb, bool bos) +static u32 mpls_multipath_hash(struct mpls_route *rt, + struct sk_buff *skb, bool bos) { struct mpls_entry_decoded dec; struct mpls_shim_hdr *hdr; bool eli_seen = false; int label_index; - int nh_index = 0; u32 hash = 0; - /* No need to look further into packet if there's only - * one path - */ - if (rt->rt_nhn == 1) - goto out; - for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; label_index++) { if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) @@ -165,7 +158,38 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, } } - nh_index = hash % rt->rt_nhn; + return hash; +} + +static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, + struct sk_buff *skb, bool bos) +{ + int alive = ACCESS_ONCE(rt->rt_nhn_alive); + u32 hash = 0; + int nh_index = 0; + int n = 0; + + /* No need to look further into packet if there's only + * one path + */ + if (rt->rt_nhn == 1) + goto out; + + if (alive <= 0) + return NULL; + + hash = mpls_multipath_hash(rt, skb, bos); + nh_index = hash % alive; + if (alive == rt->rt_nhn) + goto out; + for_nexthops(rt) { + if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + continue; + if (n == nh_index) + return nh; + n++; + } endfor_nexthops(rt); + out: return &rt->rt_nh[nh_index]; } @@ -365,6 +389,7 @@ static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen) GFP_KERNEL); if (rt) { rt->rt_nhn = num_nh; + rt->rt_nhn_alive = num_nh; rt->rt_max_alen = max_alen_aligned; } @@ -536,6 +561,16 @@ static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, RCU_INIT_POINTER(nh->nh_dev, dev); + if (!(dev->flags & IFF_UP)) { + nh->nh_flags |= RTNH_F_DEAD; + } else { + unsigned int flags; + + flags = dev_get_flags(dev); + if (!(flags & (IFF_RUNNING | IFF_LOWER_UP))) + nh->nh_flags |= RTNH_F_LINKDOWN; + } + return 0; errout: @@ -570,6 +605,9 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, if (err) goto errout; + if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + rt->rt_nhn_alive--; + return 0; errout: @@ -577,8 +615,8 @@ errout: } static int mpls_nh_build(struct net *net, struct mpls_route *rt, - struct mpls_nh *nh, int oif, - struct nlattr *via, struct nlattr *newdst) + struct mpls_nh *nh, int oif, struct nlattr *via, + struct nlattr *newdst) { int err = -ENOMEM; @@ -681,11 +719,13 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, goto errout; err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, - rtnh->rtnh_ifindex, nla_via, - nla_newdst); + rtnh->rtnh_ifindex, nla_via, nla_newdst); if (err) goto errout; + if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + rt->rt_nhn_alive--; + rtnh = rtnh_next(rtnh, &remaining); nhs++; } endfor_nexthops(rt); @@ -875,34 +915,74 @@ free: return ERR_PTR(err); } -static void mpls_ifdown(struct net_device *dev) +static void mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); - struct mpls_dev *mdev; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); for (index = 0; index < net->mpls.platform_labels; index++) { struct mpls_route *rt = rtnl_dereference(platform_label[index]); + if (!rt) continue; - for_nexthops(rt) { + + change_nexthops(rt) { if (rtnl_dereference(nh->nh_dev) != dev) continue; - nh->nh_dev = NULL; + switch (event) { + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + nh->nh_flags |= RTNH_F_DEAD; + /* fall through */ + case NETDEV_CHANGE: + nh->nh_flags |= RTNH_F_LINKDOWN; + ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1; + break; + } + if (event == NETDEV_UNREGISTER) + RCU_INIT_POINTER(nh->nh_dev, NULL); } endfor_nexthops(rt); } - mdev = mpls_dev_get(dev); - if (!mdev) - return; - mpls_dev_sysctl_unregister(mdev); + return; +} + +static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) +{ + struct mpls_route __rcu **platform_label; + struct net *net = dev_net(dev); + unsigned index; + int alive; + + platform_label = rtnl_dereference(net->mpls.platform_label); + for (index = 0; index < net->mpls.platform_labels; index++) { + struct mpls_route *rt = rtnl_dereference(platform_label[index]); + + if (!rt) + continue; + + alive = 0; + change_nexthops(rt) { + struct net_device *nh_dev = + rtnl_dereference(nh->nh_dev); + + if (!(nh->nh_flags & nh_flags)) { + alive++; + continue; + } + if (nh_dev != dev) + continue; + alive++; + nh->nh_flags &= ~nh_flags; + } endfor_nexthops(rt); - RCU_INIT_POINTER(dev->mpls_ptr, NULL); + ACCESS_ONCE(rt->rt_nhn_alive) = alive; + } - kfree_rcu(mdev, rcu); + return; } static int mpls_dev_notify(struct notifier_block *this, unsigned long event, @@ -910,9 +990,9 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct mpls_dev *mdev; + unsigned int flags; - switch(event) { - case NETDEV_REGISTER: + if (event == NETDEV_REGISTER) { /* For now just support ethernet devices */ if ((dev->type == ARPHRD_ETHER) || (dev->type == ARPHRD_LOOPBACK)) { @@ -920,10 +1000,39 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, if (IS_ERR(mdev)) return notifier_from_errno(PTR_ERR(mdev)); } - break; + return NOTIFY_OK; + } + mdev = mpls_dev_get(dev); + if (!mdev) + return NOTIFY_OK; + + switch (event) { + case NETDEV_DOWN: + mpls_ifdown(dev, event); + break; + case NETDEV_UP: + flags = dev_get_flags(dev); + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); + else + mpls_ifup(dev, RTNH_F_DEAD); + break; + case NETDEV_CHANGE: + flags = dev_get_flags(dev); + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); + else + mpls_ifdown(dev, event); + break; case NETDEV_UNREGISTER: - mpls_ifdown(dev); + mpls_ifdown(dev, event); + mdev = mpls_dev_get(dev); + if (mdev) { + mpls_dev_sysctl_unregister(mdev); + RCU_INIT_POINTER(dev->mpls_ptr, NULL); + kfree_rcu(mdev, rcu); + } break; case NETDEV_CHANGENAME: mdev = mpls_dev_get(dev); @@ -1237,9 +1346,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, dev = rtnl_dereference(nh->nh_dev); if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) goto nla_put_failure; + if (nh->nh_flags & RTNH_F_LINKDOWN) + rtm->rtm_flags |= RTNH_F_LINKDOWN; + if (nh->nh_flags & RTNH_F_DEAD) + rtm->rtm_flags |= RTNH_F_DEAD; } else { struct rtnexthop *rtnh; struct nlattr *mp; + int dead = 0; + int linkdown = 0; mp = nla_nest_start(skb, RTA_MULTIPATH); if (!mp) @@ -1253,6 +1368,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, dev = rtnl_dereference(nh->nh_dev); if (dev) rtnh->rtnh_ifindex = dev->ifindex; + if (nh->nh_flags & RTNH_F_LINKDOWN) { + rtnh->rtnh_flags |= RTNH_F_LINKDOWN; + linkdown++; + } + if (nh->nh_flags & RTNH_F_DEAD) { + rtnh->rtnh_flags |= RTNH_F_DEAD; + dead++; + } + if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, nh->nh_label)) @@ -1266,6 +1390,11 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; } endfor_nexthops(rt); + if (linkdown == rt->rt_nhn) + rtm->rtm_flags |= RTNH_F_LINKDOWN; + if (dead == rt->rt_nhn) + rtm->rtm_flags |= RTNH_F_DEAD; + nla_nest_end(skb, mp); } diff --git a/net/mpls/internal.h b/net/mpls/internal.h index bde52ce..732a5c1 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -41,6 +41,7 @@ enum mpls_payload_type { struct mpls_nh { /* next hop label forwarding entry */ struct net_device __rcu *nh_dev; + unsigned int nh_flags; u32 nh_label[MAX_NEW_LABELS]; u8 nh_labels; u8 nh_via_alen; @@ -74,6 +75,7 @@ struct mpls_route { /* next hop label forwarding entry */ u8 rt_payload_type; u8 rt_max_alen; unsigned int rt_nhn; + unsigned int rt_nhn_alive; struct mpls_nh rt_nh[0]; }; -- cgit v1.1 From 357ab2234d57f6c74386f64ded42dff8e3c0500b Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 2 Dec 2015 14:43:59 +0800 Subject: VSOCK: Introduce vsock_find_unbound_socket and vsock_bind_dgram_generic Signed-off-by: Asias He Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'net') diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 7fd1220..77247a2 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -223,6 +223,17 @@ static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) return NULL; } +static struct sock *__vsock_find_unbound_socket(struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_unbound_sockets, bound_table) + if (addr->svm_port == vsk->local_addr.svm_port) + return sk_vsock(vsk); + + return NULL; +} + static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst) { @@ -298,6 +309,21 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) } EXPORT_SYMBOL_GPL(vsock_find_bound_socket); +struct sock *vsock_find_unbound_socket(struct sockaddr_vm *addr) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_unbound_socket(addr); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_unbound_socket); + struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst) { @@ -532,6 +558,50 @@ static int __vsock_bind_stream(struct vsock_sock *vsk, return 0; } +int vsock_bind_dgram_generic(struct vsock_sock *vsk, struct sockaddr_vm *addr) +{ + static u32 port = LAST_RESERVED_PORT + 1; + struct sockaddr_vm new_addr; + + vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); + + if (addr->svm_port == VMADDR_PORT_ANY) { + bool found = false; + unsigned int i; + + for (i = 0; i < MAX_PORT_RETRIES; i++) { + if (port <= LAST_RESERVED_PORT) + port = LAST_RESERVED_PORT + 1; + + new_addr.svm_port = port++; + + if (!__vsock_find_unbound_socket(&new_addr)) { + found = true; + break; + } + } + + if (!found) + return -EADDRNOTAVAIL; + } else { + /* If port is in reserved range, ensure caller + * has necessary privileges. + */ + if (addr->svm_port <= LAST_RESERVED_PORT && + !capable(CAP_NET_BIND_SERVICE)) { + return -EACCES; + } + + if (__vsock_find_unbound_socket(&new_addr)) + return -EADDRINUSE; + } + + vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port); + + return 0; +} +EXPORT_SYMBOL_GPL(vsock_bind_dgram_generic); + static int __vsock_bind_dgram(struct vsock_sock *vsk, struct sockaddr_vm *addr) { -- cgit v1.1 From 80a19e338d458abb5a700df3fd00795c51361f06 Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 2 Dec 2015 14:44:00 +0800 Subject: VSOCK: Introduce virtio-vsock-common.ko This module contains the common code and header files for the following virtio-vsock and virtio-vhost kernel modules. Signed-off-by: Asias He Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport_common.c | 1272 +++++++++++++++++++++++++++++++ 1 file changed, 1272 insertions(+) create mode 100644 net/vmw_vsock/virtio_transport_common.c (limited to 'net') diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c new file mode 100644 index 0000000..28f790d --- /dev/null +++ b/net/vmw_vsock/virtio_transport_common.c @@ -0,0 +1,1272 @@ +/* + * common code for virtio vsock + * + * Copyright (C) 2013-2015 Red Hat, Inc. + * Author: Asias He + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define COOKIEBITS 24 +#define COOKIEMASK (((u32)1 << COOKIEBITS) - 1) +#define VSOCK_TIMEOUT_INIT 4 + +#define SHA_MESSAGE_WORDS 16 +#define SHA_VSOCK_WORDS 5 + +static u32 vsockcookie_secret[2][SHA_MESSAGE_WORDS - SHA_VSOCK_WORDS + + SHA_DIGEST_WORDS]; + +static DEFINE_PER_CPU(__u32[SHA_MESSAGE_WORDS + SHA_DIGEST_WORDS + + SHA_WORKSPACE_WORDS], vsock_cookie_scratch); + +static u32 cookie_hash(u32 saddr, u32 daddr, u16 sport, u16 dport, + u32 count, int c) +{ + __u32 *tmp = this_cpu_ptr(vsock_cookie_scratch); + + memcpy(tmp + SHA_VSOCK_WORDS, vsockcookie_secret[c], + sizeof(vsockcookie_secret[c])); + tmp[0] = saddr; + tmp[1] = daddr; + tmp[2] = sport; + tmp[3] = dport; + tmp[4] = count; + sha_transform(tmp + SHA_MESSAGE_WORDS, (__u8 *)tmp, + tmp + SHA_MESSAGE_WORDS + SHA_DIGEST_WORDS); + + return tmp[17]; +} + +static u32 +virtio_vsock_secure_cookie(u32 saddr, u32 daddr, u32 sport, u32 dport, + u32 count) +{ + u32 h1, h2; + + h1 = cookie_hash(saddr, daddr, sport, dport, 0, 0); + h2 = cookie_hash(saddr, daddr, sport, dport, count, 1); + + return h1 + (count << COOKIEBITS) + (h2 & COOKIEMASK); +} + +static u32 +virtio_vsock_check_cookie(u32 saddr, u32 daddr, u32 sport, u32 dport, + u32 count, u32 cookie, u32 maxdiff) +{ + u32 diff; + u32 ret; + + cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0); + + diff = (count - (cookie >> COOKIEBITS)) & ((u32)-1 >> COOKIEBITS); + pr_debug("%s: diff=%x\n", __func__, diff); + if (diff >= maxdiff) + return (u32)-1; + + ret = (cookie - + cookie_hash(saddr, daddr, sport, dport, count - diff, 1)) + & COOKIEMASK; + pr_debug("%s: ret=%x\n", __func__, diff); + + return ret; +} + +void virtio_vsock_dumppkt(const char *func, const struct virtio_vsock_pkt *pkt) +{ + pr_debug("%s: pkt=%p, op=%d, len=%d, %d:%d---%d:%d, len=%d\n", + func, pkt, + le16_to_cpu(pkt->hdr.op), + le32_to_cpu(pkt->hdr.len), + le32_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.src_port), + le32_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.dst_port), + pkt->len); +} +EXPORT_SYMBOL_GPL(virtio_vsock_dumppkt); + +struct virtio_vsock_pkt * +virtio_transport_alloc_pkt(struct vsock_sock *vsk, + struct virtio_vsock_pkt_info *info, + size_t len, + u32 src_cid, + u32 src_port, + u32 dst_cid, + u32 dst_port) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt *pkt; + int err; + + BUG_ON(!trans); + + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return NULL; + + pkt->hdr.type = cpu_to_le16(info->type); + pkt->hdr.op = cpu_to_le16(info->op); + pkt->hdr.src_cid = cpu_to_le32(src_cid); + pkt->hdr.src_port = cpu_to_le32(src_port); + pkt->hdr.dst_cid = cpu_to_le32(dst_cid); + pkt->hdr.dst_port = cpu_to_le32(dst_port); + pkt->hdr.flags = cpu_to_le32(info->flags); + pkt->len = len; + pkt->trans = trans; + if (info->type == VIRTIO_VSOCK_TYPE_DGRAM) + pkt->hdr.len = cpu_to_le32(len + (info->dgram_len << 16)); + else if (info->type == VIRTIO_VSOCK_TYPE_STREAM) + pkt->hdr.len = cpu_to_le32(len); + + if (info->msg && len > 0) { + pkt->buf = kmalloc(len, GFP_KERNEL); + if (!pkt->buf) + goto out_pkt; + err = memcpy_from_msg(pkt->buf, info->msg, len); + if (err) + goto out; + } + + return pkt; + +out: + kfree(pkt->buf); +out_pkt: + kfree(pkt); + return NULL; +} +EXPORT_SYMBOL_GPL(virtio_transport_alloc_pkt); + +struct sock * +virtio_transport_get_pending(struct sock *listener, + struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + struct sockaddr_vm src; + struct sockaddr_vm dst; + struct sock *pending; + + vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); + vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); + + vlistener = vsock_sk(listener); + list_for_each_entry(vpending, &vlistener->pending_links, + pending_links) { + if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && + vsock_addr_equals_addr(&dst, &vpending->local_addr)) { + pending = sk_vsock(vpending); + sock_hold(pending); + return pending; + } + } + + return NULL; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_pending); + +static void virtio_transport_inc_rx_pkt(struct virtio_vsock_pkt *pkt) +{ + pkt->trans->rx_bytes += pkt->len; +} + +static void virtio_transport_dec_rx_pkt(struct virtio_vsock_pkt *pkt) +{ + pkt->trans->rx_bytes -= pkt->len; + pkt->trans->fwd_cnt += pkt->len; +} + +void virtio_transport_inc_tx_pkt(struct virtio_vsock_pkt *pkt) +{ + mutex_lock(&pkt->trans->tx_lock); + pkt->hdr.fwd_cnt = cpu_to_le32(pkt->trans->fwd_cnt); + pkt->hdr.buf_alloc = cpu_to_le32(pkt->trans->buf_alloc); + mutex_unlock(&pkt->trans->tx_lock); +} +EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); + +void virtio_transport_dec_tx_pkt(struct virtio_vsock_pkt *pkt) +{ +} +EXPORT_SYMBOL_GPL(virtio_transport_dec_tx_pkt); + +u32 virtio_transport_get_credit(struct virtio_transport *trans, u32 credit) +{ + u32 ret; + + mutex_lock(&trans->tx_lock); + ret = trans->peer_buf_alloc - (trans->tx_cnt - trans->peer_fwd_cnt); + if (ret > credit) + ret = credit; + trans->tx_cnt += ret; + mutex_unlock(&trans->tx_lock); + + pr_debug("%s: ret=%d, buf_alloc=%d, peer_buf_alloc=%d," + "tx_cnt=%d, fwd_cnt=%d, peer_fwd_cnt=%d\n", __func__, + ret, trans->buf_alloc, trans->peer_buf_alloc, + trans->tx_cnt, trans->fwd_cnt, trans->peer_fwd_cnt); + + return ret; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_credit); + +void virtio_transport_put_credit(struct virtio_transport *trans, u32 credit) +{ + mutex_lock(&trans->tx_lock); + trans->tx_cnt -= credit; + mutex_unlock(&trans->tx_lock); +} +EXPORT_SYMBOL_GPL(virtio_transport_put_credit); + +static int virtio_transport_send_credit_update(struct vsock_sock *vsk, int type, struct virtio_vsock_hdr *hdr) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, + .type = type, + }; + + if (hdr && type == VIRTIO_VSOCK_TYPE_DGRAM) { + info.remote_cid = le32_to_cpu(hdr->src_cid); + info.remote_port = le32_to_cpu(hdr->src_port); + } + + pr_debug("%s: sk=%p send_credit_update\n", __func__, vsk); + return trans->ops->send_pkt(vsk, &info); +} + +static int virtio_transport_send_credit_request(struct vsock_sock *vsk, int type) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_CREDIT_REQUEST, + .type = type, + }; + + pr_debug("%s: sk=%p send_credit_request\n", __func__, vsk); + return trans->ops->send_pkt(vsk, &info); +} + +static ssize_t +virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt *pkt; + size_t bytes, total = 0; + int err = -EFAULT; + + mutex_lock(&trans->rx_lock); + while (total < len && trans->rx_bytes > 0 && + !list_empty(&trans->rx_queue)) { + pkt = list_first_entry(&trans->rx_queue, + struct virtio_vsock_pkt, list); + + bytes = len - total; + if (bytes > pkt->len - pkt->off) + bytes = pkt->len - pkt->off; + + err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); + if (err) + goto out; + total += bytes; + pkt->off += bytes; + if (pkt->off == pkt->len) { + virtio_transport_dec_rx_pkt(pkt); + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + } + mutex_unlock(&trans->rx_lock); + + /* Send a credit pkt to peer */ + virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, + NULL); + + return total; + +out: + mutex_unlock(&trans->rx_lock); + if (total) + err = total; + return err; +} + +ssize_t +virtio_transport_stream_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len, int flags) +{ + if (flags & MSG_PEEK) + return -EOPNOTSUPP; + + return virtio_transport_stream_do_dequeue(vsk, msg, len); +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); + +struct dgram_skb { + struct list_head list; + struct sk_buff *skb; + u16 id; +}; + +static struct dgram_skb *dgram_id_to_skb(struct virtio_transport *trans, + u16 id) +{ + struct dgram_skb *dgram_skb; + + list_for_each_entry(dgram_skb, &trans->incomplete_dgrams, list) { + if (dgram_skb->id == id) + return dgram_skb; + } + + return NULL; +} + +static void +virtio_transport_recv_dgram(struct sock *sk, + struct virtio_vsock_pkt *pkt) +{ + struct sk_buff *skb = NULL; + struct vsock_sock *vsk; + struct virtio_transport *trans; + size_t size; + u16 dgram_id, pkt_off, dgram_len, pkt_len; + u32 flags, len; + struct dgram_skb *dgram_skb; + + vsk = vsock_sk(sk); + trans = vsk->trans; + + /* len: dgram_len | pkt_len */ + len = le32_to_cpu(pkt->hdr.len); + dgram_len = len >> 16; + pkt_len = len & 0xFFFF; + + /* flags: dgram_id | pkt_off */ + flags = le32_to_cpu(pkt->hdr.flags); + dgram_id = flags >> 16; + pkt_off = flags & 0xFFFF; + + pr_debug("%s: dgram_len=%d, pkt_len=%d, id=%d, off=%d\n", __func__, + dgram_len, pkt_len, dgram_id, pkt_off); + + dgram_skb = dgram_id_to_skb(trans, dgram_id); + if (dgram_skb) { + /* This pkt is for a existing dgram */ + skb = dgram_skb->skb; + pr_debug("%s:found skb\n", __func__); + } + + /* Packet payload must be within datagram bounds */ + if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) + goto drop; + if (pkt_len > dgram_len) + goto drop; + if (pkt_off > dgram_len) + goto drop; + if (dgram_len - pkt_off < pkt_len) + goto drop; + + if (!skb) { + /* This pkt is for a new dgram */ + pr_debug("%s:create skb\n", __func__); + + size = sizeof(pkt->hdr) + dgram_len; + /* Attach the packet to the socket's receive queue as an sk_buff. */ + dgram_skb = kzalloc(sizeof(struct dgram_skb), GFP_ATOMIC); + if (!dgram_skb) + goto drop; + + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) { + kfree(dgram_skb); + dgram_skb = NULL; + goto drop; + } + dgram_skb->id = dgram_id; + dgram_skb->skb = skb; + list_add_tail(&dgram_skb->list, &trans->incomplete_dgrams); + + /* sk_receive_skb() will do a sock_put(), so hold here. */ + sock_hold(sk); + skb_put(skb, size); + memcpy(skb->data, &pkt->hdr, sizeof(pkt->hdr)); + } + + memcpy(skb->data + sizeof(pkt->hdr) + pkt_off, pkt->buf, pkt_len); + + pr_debug("%s:C, off=%d, pkt_len=%d, dgram_len=%d\n", __func__, + pkt_off, pkt_len, dgram_len); + + /* We are done with this dgram */ + if (pkt_off + pkt_len == dgram_len) { + pr_debug("%s:dgram_id=%d is done\n", __func__, dgram_id); + list_del(&dgram_skb->list); + kfree(dgram_skb); + sk_receive_skb(sk, skb, 0); + } + virtio_transport_free_pkt(pkt); + return; + +drop: + if (dgram_skb) { + list_del(&dgram_skb->list); + kfree(dgram_skb); + kfree_skb(skb); + sock_put(sk); + } + virtio_transport_free_pkt(pkt); +} + +int +virtio_transport_dgram_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len, int flags) +{ + struct virtio_vsock_hdr *hdr; + struct sk_buff *skb; + int noblock; + int err; + int dgram_len; + + noblock = flags & MSG_DONTWAIT; + + if (flags & MSG_OOB || flags & MSG_ERRQUEUE) + return -EOPNOTSUPP; + + /* Retrieve the head sk_buff from the socket's receive queue. */ + err = 0; + skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); + if (err) + return err; + if (!skb) + return -EAGAIN; + + hdr = (struct virtio_vsock_hdr *)skb->data; + if (!hdr) + goto out; + + dgram_len = le32_to_cpu(hdr->len) >> 16; + /* Place the datagram payload in the user's iovec. */ + err = skb_copy_datagram_msg(skb, sizeof(*hdr), msg, dgram_len); + if (err) + goto out; + + if (msg->msg_name) { + /* Provide the address of the sender. */ + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); + vsock_addr_init(vm_addr, le32_to_cpu(hdr->src_cid), le32_to_cpu(hdr->src_port)); + msg->msg_namelen = sizeof(*vm_addr); + } + err = dgram_len; + + /* Send a credit pkt to peer */ + virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_DGRAM, hdr); + + pr_debug("%s:done, recved =%d\n", __func__, dgram_len); +out: + skb_free_datagram(&vsk->sk, skb); + return err; +} +EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); + +s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + s64 bytes; + + mutex_lock(&trans->rx_lock); + bytes = trans->rx_bytes; + mutex_unlock(&trans->rx_lock); + + return bytes; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); + +static s64 virtio_transport_has_space(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + s64 bytes; + + bytes = trans->peer_buf_alloc - (trans->tx_cnt - trans->peer_fwd_cnt); + if (bytes < 0) + bytes = 0; + + return bytes; +} + +s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + s64 bytes; + + mutex_lock(&trans->tx_lock); + bytes = virtio_transport_has_space(vsk); + mutex_unlock(&trans->tx_lock); + + pr_debug("%s: bytes=%lld\n", __func__, bytes); + + return bytes; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); + +int virtio_transport_do_socket_init(struct vsock_sock *vsk, + struct vsock_sock *psk) +{ + struct virtio_transport *trans; + + trans = kzalloc(sizeof(*trans), GFP_KERNEL); + if (!trans) + return -ENOMEM; + + vsk->trans = trans; + trans->vsk = vsk; + if (psk) { + struct virtio_transport *ptrans = psk->trans; + trans->buf_size = ptrans->buf_size; + trans->buf_size_min = ptrans->buf_size_min; + trans->buf_size_max = ptrans->buf_size_max; + trans->peer_buf_alloc = ptrans->peer_buf_alloc; + } else { + trans->buf_size = VIRTIO_VSOCK_DEFAULT_BUF_SIZE; + trans->buf_size_min = VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE; + trans->buf_size_max = VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE; + } + + trans->buf_alloc = trans->buf_size; + + pr_debug("%s: trans->buf_alloc=%d\n", __func__, trans->buf_alloc); + + mutex_init(&trans->rx_lock); + mutex_init(&trans->tx_lock); + INIT_LIST_HEAD(&trans->rx_queue); + INIT_LIST_HEAD(&trans->incomplete_dgrams); + + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); + +u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + + return trans->buf_size; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_buffer_size); + +u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + + return trans->buf_size_min; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_min_buffer_size); + +u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + + return trans->buf_size_max; +} +EXPORT_SYMBOL_GPL(virtio_transport_get_max_buffer_size); + +void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) +{ + struct virtio_transport *trans = vsk->trans; + + if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) + val = VIRTIO_VSOCK_MAX_BUF_SIZE; + if (val < trans->buf_size_min) + trans->buf_size_min = val; + if (val > trans->buf_size_max) + trans->buf_size_max = val; + trans->buf_size = val; + trans->buf_alloc = val; +} +EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size); + +void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val) +{ + struct virtio_transport *trans = vsk->trans; + + if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) + val = VIRTIO_VSOCK_MAX_BUF_SIZE; + if (val > trans->buf_size) + trans->buf_size = val; + trans->buf_size_min = val; +} +EXPORT_SYMBOL_GPL(virtio_transport_set_min_buffer_size); + +void virtio_transport_set_max_buffer_size(struct vsock_sock *vsk, u64 val) +{ + struct virtio_transport *trans = vsk->trans; + + if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) + val = VIRTIO_VSOCK_MAX_BUF_SIZE; + if (val < trans->buf_size) + trans->buf_size = val; + trans->buf_size_max = val; +} +EXPORT_SYMBOL_GPL(virtio_transport_set_max_buffer_size); + +int +virtio_transport_notify_poll_in(struct vsock_sock *vsk, + size_t target, + bool *data_ready_now) +{ + if (vsock_stream_has_data(vsk)) + *data_ready_now = true; + else + *data_ready_now = false; + + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); + +int +virtio_transport_notify_poll_out(struct vsock_sock *vsk, + size_t target, + bool *space_avail_now) +{ + s64 free_space; + + free_space = vsock_stream_has_space(vsk); + if (free_space > 0) + *space_avail_now = true; + else if (free_space == 0) + *space_avail_now = false; + + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); + +int virtio_transport_notify_recv_init(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); + +int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); + +int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, + size_t target, struct vsock_transport_recv_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); + +int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, + size_t target, ssize_t copied, bool data_read, + struct vsock_transport_recv_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); + +int virtio_transport_notify_send_init(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); + +int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); + +int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); + +int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, + ssize_t written, struct vsock_transport_send_notify_data *data) +{ + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); + +u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + + return trans->buf_size; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); + +bool virtio_transport_stream_is_active(struct vsock_sock *vsk) +{ + return true; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); + +bool virtio_transport_stream_allow(u32 cid, u32 port) +{ + return true; +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); + +int virtio_transport_dgram_bind(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + return vsock_bind_dgram_generic(vsk, addr); +} +EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); + +bool virtio_transport_dgram_allow(u32 cid, u32 port) +{ + return true; +} +EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); + +int virtio_transport_connect(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_REQUEST, + .type = VIRTIO_VSOCK_TYPE_STREAM, + }; + + pr_debug("%s: vsk=%p send_request\n", __func__, vsk); + return trans->ops->send_pkt(vsk, &info); +} +EXPORT_SYMBOL_GPL(virtio_transport_connect); + +int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_SHUTDOWN, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .flags = (mode & RCV_SHUTDOWN ? + VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | + (mode & SEND_SHUTDOWN ? + VIRTIO_VSOCK_SHUTDOWN_SEND : 0), + }; + + pr_debug("%s: vsk=%p: send_shutdown\n", __func__, vsk); + return trans->ops->send_pkt(vsk, &info); +} +EXPORT_SYMBOL_GPL(virtio_transport_shutdown); + +void virtio_transport_release(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + struct sock *sk = &vsk->sk; + struct dgram_skb *dgram_skb; + struct dgram_skb *dgram_skb_tmp; + + pr_debug("%s: vsk=%p\n", __func__, vsk); + + /* Tell other side to terminate connection */ + if (sk->sk_type == SOCK_STREAM && sk->sk_state == SS_CONNECTED) { + virtio_transport_shutdown(vsk, SHUTDOWN_MASK); + } + + /* Free incomplete dgrams */ + lock_sock(sk); + list_for_each_entry_safe(dgram_skb, dgram_skb_tmp, + &trans->incomplete_dgrams, list) { + list_del(&dgram_skb->list); + kfree_skb(dgram_skb->skb); + kfree(dgram_skb); + sock_put(sk); /* held in virtio_transport_recv_dgram() */ + } + release_sock(sk); +} +EXPORT_SYMBOL_GPL(virtio_transport_release); + +int +virtio_transport_dgram_enqueue(struct vsock_sock *vsk, + struct sockaddr_vm *remote_addr, + struct msghdr *msg, + size_t dgram_len) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RW, + .type = VIRTIO_VSOCK_TYPE_DGRAM, + .msg = msg, + }; + size_t total_written = 0, pkt_off = 0, written; + u16 dgram_id; + + /* The max size of a single dgram we support is 64KB */ + if (dgram_len > VIRTIO_VSOCK_MAX_DGRAM_SIZE) + return -EMSGSIZE; + + info.dgram_len = dgram_len; + vsk->remote_addr = *remote_addr; + + dgram_id = trans->dgram_id++; + + /* TODO: To optimize, if we have enough credit to send the pkt already, + * do not ask the peer to send credit to use */ + virtio_transport_send_credit_request(vsk, VIRTIO_VSOCK_TYPE_DGRAM); + + while (total_written < dgram_len) { + info.pkt_len = dgram_len - total_written; + info.flags = dgram_id << 16 | pkt_off; + written = trans->ops->send_pkt(vsk, &info); + if (written < 0) + return -ENOMEM; + if (written == 0) { + /* TODO: if written = 0, we need a sleep & wakeup + * instead of sleep */ + pr_debug("%s: SHOULD WAIT written==0", __func__); + msleep(10); + } + total_written += written; + pkt_off += written; + pr_debug("%s:id=%d, dgram_len=%zu, off=%zu, total_written=%zu, written=%zu\n", + __func__, dgram_id, dgram_len, pkt_off, total_written, written); + } + + return dgram_len; +} +EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); + +ssize_t +virtio_transport_stream_enqueue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RW, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .msg = msg, + .pkt_len = len, + }; + + return trans->ops->send_pkt(vsk, &info); +} +EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); + +void virtio_transport_destruct(struct vsock_sock *vsk) +{ + struct virtio_transport *trans = vsk->trans; + + pr_debug("%s: vsk=%p\n", __func__, vsk); + kfree(trans); +} +EXPORT_SYMBOL_GPL(virtio_transport_destruct); + +static int virtio_transport_send_ack(struct vsock_sock *vsk, u32 cookie) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_ACK, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .flags = cpu_to_le32(cookie), + }; + + pr_debug("%s: sk=%p send_offer\n", __func__, vsk); + return trans->ops->send_pkt(vsk, &info); +} + +static int virtio_transport_send_reset(struct vsock_sock *vsk, + struct virtio_vsock_pkt *pkt) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RST, + .type = VIRTIO_VSOCK_TYPE_STREAM, + }; + + pr_debug("%s\n", __func__); + + /* Send RST only if the original pkt is not a RST pkt */ + if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) + return 0; + + return trans->ops->send_pkt(vsk, &info); +} + +static int +virtio_transport_recv_connecting(struct sock *sk, + struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + int err; + int skerr; + u32 cookie; + + pr_debug("%s: vsk=%p\n", __func__, vsk); + switch (le16_to_cpu(pkt->hdr.op)) { + case VIRTIO_VSOCK_OP_RESPONSE: + cookie = le32_to_cpu(pkt->hdr.flags); + pr_debug("%s: got RESPONSE and send ACK, cookie=%x\n", __func__, cookie); + err = virtio_transport_send_ack(vsk, cookie); + if (err < 0) { + skerr = -err; + goto destroy; + } + sk->sk_state = SS_CONNECTED; + sk->sk_socket->state = SS_CONNECTED; + vsock_insert_connected(vsk); + sk->sk_state_change(sk); + break; + case VIRTIO_VSOCK_OP_INVALID: + pr_debug("%s: got invalid\n", __func__); + break; + case VIRTIO_VSOCK_OP_RST: + pr_debug("%s: got rst\n", __func__); + skerr = ECONNRESET; + err = 0; + goto destroy; + default: + pr_debug("%s: got def\n", __func__); + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + return 0; + +destroy: + virtio_transport_send_reset(vsk, pkt); + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = skerr; + sk->sk_error_report(sk); + return err; +} + +static int +virtio_transport_recv_connected(struct sock *sk, + struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + struct virtio_transport *trans = vsk->trans; + int err = 0; + + switch (le16_to_cpu(pkt->hdr.op)) { + case VIRTIO_VSOCK_OP_RW: + pkt->len = le32_to_cpu(pkt->hdr.len); + pkt->off = 0; + pkt->trans = trans; + + mutex_lock(&trans->rx_lock); + virtio_transport_inc_rx_pkt(pkt); + list_add_tail(&pkt->list, &trans->rx_queue); + mutex_unlock(&trans->rx_lock); + + sk->sk_data_ready(sk); + return err; + case VIRTIO_VSOCK_OP_CREDIT_UPDATE: + sk->sk_write_space(sk); + break; + case VIRTIO_VSOCK_OP_SHUTDOWN: + pr_debug("%s: got shutdown\n", __func__); + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) + vsk->peer_shutdown |= RCV_SHUTDOWN; + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) + vsk->peer_shutdown |= SEND_SHUTDOWN; + if (le32_to_cpu(pkt->hdr.flags)) + sk->sk_state_change(sk); + break; + case VIRTIO_VSOCK_OP_RST: + pr_debug("%s: got rst\n", __func__); + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = SS_DISCONNECTING; + sk->sk_state_change(sk); + break; + default: + err = -EINVAL; + break; + } + + virtio_transport_free_pkt(pkt); + return err; +} + +static int +virtio_transport_send_response(struct vsock_sock *vsk, + struct virtio_vsock_pkt *pkt) +{ + struct virtio_transport *trans = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = VIRTIO_VSOCK_OP_RESPONSE, + .type = VIRTIO_VSOCK_TYPE_STREAM, + .remote_cid = le32_to_cpu(pkt->hdr.src_cid), + .remote_port = le32_to_cpu(pkt->hdr.src_port), + }; + u32 cookie; + + cookie = virtio_vsock_secure_cookie(le32_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.src_port), + le32_to_cpu(pkt->hdr.dst_port), + jiffies / (HZ * 60)); + info.flags = cpu_to_le32(cookie); + + pr_debug("%s: send_response, cookie=%x\n", __func__, le32_to_cpu(cookie)); + + return trans->ops->send_pkt(vsk, &info); +} + +/* Handle server socket */ +static int +virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + struct vsock_sock *vpending; + struct sock *pending; + int err; + u32 cookie; + + switch (le16_to_cpu(pkt->hdr.op)) { + case VIRTIO_VSOCK_OP_REQUEST: + err = virtio_transport_send_response(vsk, pkt); + if (err < 0) { + // FIXME vsk should be vpending + virtio_transport_send_reset(vsk, pkt); + return err; + } + break; + case VIRTIO_VSOCK_OP_ACK: + cookie = le32_to_cpu(pkt->hdr.flags); + err = virtio_vsock_check_cookie(le32_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.src_port), + le32_to_cpu(pkt->hdr.dst_port), + jiffies / (HZ * 60), + le32_to_cpu(pkt->hdr.flags), + VSOCK_TIMEOUT_INIT); + pr_debug("%s: cookie=%x, err=%d\n", __func__, cookie, err); + if (err) + return err; + + /* So no pending socket are responsible for this pkt, create one */ + pr_debug("%s: create pending\n", __func__); + pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, + sk->sk_type, 0); + if (!pending) { + virtio_transport_send_reset(vsk, pkt); + return -ENOMEM; + } + sk->sk_ack_backlog++; + pending->sk_state = SS_CONNECTING; + + vpending = vsock_sk(pending); + vsock_addr_init(&vpending->local_addr, le32_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.dst_port)); + vsock_addr_init(&vpending->remote_addr, le32_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.src_port)); + vsock_add_pending(sk, pending); + + pr_debug("%s: get pending\n", __func__); + pending = virtio_transport_get_pending(sk, pkt); + vpending = vsock_sk(pending); + lock_sock(pending); + switch (pending->sk_state) { + case SS_CONNECTING: + if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_ACK) { + pr_debug("%s: op=%d != OP_ACK\n", __func__, + le16_to_cpu(pkt->hdr.op)); + virtio_transport_send_reset(vpending, pkt); + pending->sk_err = EPROTO; + pending->sk_state = SS_UNCONNECTED; + sock_put(pending); + } else { + pending->sk_state = SS_CONNECTED; + vsock_insert_connected(vpending); + + vsock_remove_pending(sk, pending); + vsock_enqueue_accept(sk, pending); + + sk->sk_data_ready(sk); + } + err = 0; + break; + default: + pr_debug("%s: sk->sk_ack_backlog=%d\n", __func__, + sk->sk_ack_backlog); + virtio_transport_send_reset(vpending, pkt); + err = -EINVAL; + break; + } + if (err < 0) + vsock_remove_pending(sk, pending); + release_sock(pending); + + /* Release refcnt obtained in virtio_transport_get_pending */ + sock_put(pending); + break; + default: + break; + } + + return 0; +} + +static void virtio_transport_space_update(struct sock *sk, + struct virtio_vsock_pkt *pkt) +{ + struct vsock_sock *vsk = vsock_sk(sk); + struct virtio_transport *trans = vsk->trans; + bool space_available; + + /* buf_alloc and fwd_cnt is always included in the hdr */ + mutex_lock(&trans->tx_lock); + trans->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); + trans->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); + space_available = virtio_transport_has_space(vsk); + mutex_unlock(&trans->tx_lock); + + if (space_available) + sk->sk_write_space(sk); +} + +/* We are under the virtio-vsock's vsock->rx_lock or + * vhost-vsock's vq->mutex lock */ +void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) +{ + struct virtio_transport *trans; + struct sockaddr_vm src, dst; + struct vsock_sock *vsk; + struct sock *sk; + + vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); + vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); + + virtio_vsock_dumppkt(__func__, pkt); + + if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_DGRAM) { + sk = vsock_find_unbound_socket(&dst); + if (!sk) + goto free_pkt; + + vsk = vsock_sk(sk); + trans = vsk->trans; + BUG_ON(!trans); + + virtio_transport_space_update(sk, pkt); + + lock_sock(sk); + switch (le16_to_cpu(pkt->hdr.op)) { + case VIRTIO_VSOCK_OP_CREDIT_UPDATE: + virtio_transport_free_pkt(pkt); + break; + case VIRTIO_VSOCK_OP_CREDIT_REQUEST: + virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_DGRAM, + &pkt->hdr); + virtio_transport_free_pkt(pkt); + break; + case VIRTIO_VSOCK_OP_RW: + virtio_transport_recv_dgram(sk, pkt); + break; + default: + virtio_transport_free_pkt(pkt); + break; + } + release_sock(sk); + + /* Release refcnt obtained when we fetched this socket out of + * the unbound list. + */ + sock_put(sk); + return; + } else if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) { + /* The socket must be in connected or bound table + * otherwise send reset back + */ + sk = vsock_find_connected_socket(&src, &dst); + if (!sk) { + sk = vsock_find_bound_socket(&dst); + if (!sk) { + pr_debug("%s: can not find bound_socket\n", __func__); + virtio_vsock_dumppkt(__func__, pkt); + /* Ignore this pkt instead of sending reset back */ + /* TODO send a RST unless this packet is a RST (to avoid infinite loops) */ + goto free_pkt; + } + } + + vsk = vsock_sk(sk); + trans = vsk->trans; + BUG_ON(!trans); + + virtio_transport_space_update(sk, pkt); + + lock_sock(sk); + switch (sk->sk_state) { + case VSOCK_SS_LISTEN: + virtio_transport_recv_listen(sk, pkt); + virtio_transport_free_pkt(pkt); + break; + case SS_CONNECTING: + virtio_transport_recv_connecting(sk, pkt); + virtio_transport_free_pkt(pkt); + break; + case SS_CONNECTED: + virtio_transport_recv_connected(sk, pkt); + break; + default: + virtio_transport_free_pkt(pkt); + break; + } + release_sock(sk); + + /* Release refcnt obtained when we fetched this socket out of the + * bound or connected list. + */ + sock_put(sk); + } + return; + +free_pkt: + virtio_transport_free_pkt(pkt); +} +EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); + +void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) +{ + kfree(pkt->buf); + kfree(pkt); +} +EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); + +static int __init virtio_vsock_common_init(void) +{ + get_random_bytes(vsockcookie_secret, sizeof(vsockcookie_secret)); + return 0; +} + +static void __exit virtio_vsock_common_exit(void) +{ +} + +module_init(virtio_vsock_common_init); +module_exit(virtio_vsock_common_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Asias He"); +MODULE_DESCRIPTION("common code for virtio vsock"); -- cgit v1.1 From 32e61b06b6946ba137723c5b1de2a1fdb2e0e0a5 Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 2 Dec 2015 14:44:01 +0800 Subject: VSOCK: Introduce virtio-vsock.ko VM sockets virtio transport implementation. This module runs in guest kernel. Signed-off-by: Asias He Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport.c | 466 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 466 insertions(+) create mode 100644 net/vmw_vsock/virtio_transport.c (limited to 'net') diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c new file mode 100644 index 0000000..df65dca --- /dev/null +++ b/net/vmw_vsock/virtio_transport.c @@ -0,0 +1,466 @@ +/* + * virtio transport for vsock + * + * Copyright (C) 2013-2015 Red Hat, Inc. + * Author: Asias He + * Stefan Hajnoczi + * + * Some of the code is take from Gerd Hoffmann 's + * early virtio-vsock proof-of-concept bits. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct workqueue_struct *virtio_vsock_workqueue; +static struct virtio_vsock *the_virtio_vsock; +static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ +static void virtio_vsock_rx_fill(struct virtio_vsock *vsock); + +struct virtio_vsock { + /* Virtio device */ + struct virtio_device *vdev; + /* Virtio virtqueue */ + struct virtqueue *vqs[VSOCK_VQ_MAX]; + /* Wait queue for send pkt */ + wait_queue_head_t queue_wait; + /* Work item to send pkt */ + struct work_struct tx_work; + /* Work item to recv pkt */ + struct work_struct rx_work; + /* Mutex to protect send pkt*/ + struct mutex tx_lock; + /* Mutex to protect recv pkt*/ + struct mutex rx_lock; + /* Number of recv buffers */ + int rx_buf_nr; + /* Number of max recv buffers */ + int rx_buf_max_nr; + /* Used for global tx buf limitation */ + u32 total_tx_buf; + /* Guest context id, just like guest ip address */ + u32 guest_cid; +}; + +static struct virtio_vsock *virtio_vsock_get(void) +{ + return the_virtio_vsock; +} + +static u32 virtio_transport_get_local_cid(void) +{ + struct virtio_vsock *vsock = virtio_vsock_get(); + + return vsock->guest_cid; +} + +static int +virtio_transport_send_pkt(struct vsock_sock *vsk, + struct virtio_vsock_pkt_info *info) +{ + u32 src_cid, src_port, dst_cid, dst_port; + int ret, in_sg = 0, out_sg = 0; + struct virtio_transport *trans; + struct virtio_vsock_pkt *pkt; + struct virtio_vsock *vsock; + struct scatterlist hdr, buf, *sgs[2]; + struct virtqueue *vq; + u32 pkt_len = info->pkt_len; + DEFINE_WAIT(wait); + + vsock = virtio_vsock_get(); + if (!vsock) + return -ENODEV; + + src_cid = virtio_transport_get_local_cid(); + src_port = vsk->local_addr.svm_port; + if (!info->remote_cid) { + dst_cid = vsk->remote_addr.svm_cid; + dst_port = vsk->remote_addr.svm_port; + } else { + dst_cid = info->remote_cid; + dst_port = info->remote_port; + } + + trans = vsk->trans; + vq = vsock->vqs[VSOCK_VQ_TX]; + + if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) + pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; + pkt_len = virtio_transport_get_credit(trans, pkt_len); + /* Do not send zero length OP_RW pkt*/ + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) + return pkt_len; + + /* Respect global tx buf limitation */ + mutex_lock(&vsock->tx_lock); + while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) { + prepare_to_wait_exclusive(&vsock->queue_wait, &wait, + TASK_UNINTERRUPTIBLE); + mutex_unlock(&vsock->tx_lock); + schedule(); + mutex_lock(&vsock->tx_lock); + finish_wait(&vsock->queue_wait, &wait); + } + vsock->total_tx_buf += pkt_len; + mutex_unlock(&vsock->tx_lock); + + pkt = virtio_transport_alloc_pkt(vsk, info, pkt_len, + src_cid, src_port, + dst_cid, dst_port); + if (!pkt) { + mutex_lock(&vsock->tx_lock); + vsock->total_tx_buf -= pkt_len; + mutex_unlock(&vsock->tx_lock); + virtio_transport_put_credit(trans, pkt_len); + return -ENOMEM; + } + + pr_debug("%s:info->pkt_len= %d\n", __func__, info->pkt_len); + + /* Will be released in virtio_transport_send_pkt_work */ + sock_hold(&trans->vsk->sk); + virtio_transport_inc_tx_pkt(pkt); + + /* Put pkt in the virtqueue */ + sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); + sgs[out_sg++] = &hdr; + if (info->msg && info->pkt_len > 0) { + sg_init_one(&buf, pkt->buf, pkt->len); + sgs[out_sg++] = &buf; + } + + mutex_lock(&vsock->tx_lock); + while ((ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, + GFP_KERNEL)) < 0) { + prepare_to_wait_exclusive(&vsock->queue_wait, &wait, + TASK_UNINTERRUPTIBLE); + mutex_unlock(&vsock->tx_lock); + schedule(); + mutex_lock(&vsock->tx_lock); + finish_wait(&vsock->queue_wait, &wait); + } + virtqueue_kick(vq); + mutex_unlock(&vsock->tx_lock); + + return pkt_len; +} + +static struct virtio_transport_pkt_ops virtio_ops = { + .send_pkt = virtio_transport_send_pkt, +}; + +static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) +{ + int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; + struct virtio_vsock_pkt *pkt; + struct scatterlist hdr, buf, *sgs[2]; + struct virtqueue *vq; + int ret; + + vq = vsock->vqs[VSOCK_VQ_RX]; + + do { + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) { + pr_debug("%s: fail to allocate pkt\n", __func__); + goto out; + } + + /* TODO: use mergeable rx buffer */ + pkt->buf = kmalloc(buf_len, GFP_KERNEL); + if (!pkt->buf) { + pr_debug("%s: fail to allocate pkt->buf\n", __func__); + goto err; + } + + sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); + sgs[0] = &hdr; + + sg_init_one(&buf, pkt->buf, buf_len); + sgs[1] = &buf; + ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL); + if (ret) + goto err; + vsock->rx_buf_nr++; + } while (vq->num_free); + if (vsock->rx_buf_nr > vsock->rx_buf_max_nr) + vsock->rx_buf_max_nr = vsock->rx_buf_nr; +out: + virtqueue_kick(vq); + return; +err: + virtqueue_kick(vq); + virtio_transport_free_pkt(pkt); + return; +} + +static void virtio_transport_send_pkt_work(struct work_struct *work) +{ + struct virtio_vsock *vsock = + container_of(work, struct virtio_vsock, tx_work); + struct virtio_vsock_pkt *pkt; + bool added = false; + struct virtqueue *vq; + unsigned int len; + struct sock *sk; + + vq = vsock->vqs[VSOCK_VQ_TX]; + mutex_lock(&vsock->tx_lock); + do { + virtqueue_disable_cb(vq); + while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { + sk = &pkt->trans->vsk->sk; + virtio_transport_dec_tx_pkt(pkt); + /* Release refcnt taken in virtio_transport_send_pkt */ + sock_put(sk); + vsock->total_tx_buf -= pkt->len; + virtio_transport_free_pkt(pkt); + added = true; + } + } while (!virtqueue_enable_cb(vq)); + mutex_unlock(&vsock->tx_lock); + + if (added) + wake_up(&vsock->queue_wait); +} + +static void virtio_transport_recv_pkt_work(struct work_struct *work) +{ + struct virtio_vsock *vsock = + container_of(work, struct virtio_vsock, rx_work); + struct virtio_vsock_pkt *pkt; + struct virtqueue *vq; + unsigned int len; + + vq = vsock->vqs[VSOCK_VQ_RX]; + mutex_lock(&vsock->rx_lock); + do { + virtqueue_disable_cb(vq); + while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { + pkt->len = len; + virtio_transport_recv_pkt(pkt); + vsock->rx_buf_nr--; + } + } while (!virtqueue_enable_cb(vq)); + + if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2) + virtio_vsock_rx_fill(vsock); + mutex_unlock(&vsock->rx_lock); +} + +static void virtio_vsock_ctrl_done(struct virtqueue *vq) +{ +} + +static void virtio_vsock_tx_done(struct virtqueue *vq) +{ + struct virtio_vsock *vsock = vq->vdev->priv; + + if (!vsock) + return; + queue_work(virtio_vsock_workqueue, &vsock->tx_work); +} + +static void virtio_vsock_rx_done(struct virtqueue *vq) +{ + struct virtio_vsock *vsock = vq->vdev->priv; + + if (!vsock) + return; + queue_work(virtio_vsock_workqueue, &vsock->rx_work); +} + +static int +virtio_transport_socket_init(struct vsock_sock *vsk, struct vsock_sock *psk) +{ + struct virtio_transport *trans; + int ret; + + ret = virtio_transport_do_socket_init(vsk, psk); + if (ret) + return ret; + + trans = vsk->trans; + trans->ops = &virtio_ops; + return ret; +} + +static struct vsock_transport virtio_transport = { + .get_local_cid = virtio_transport_get_local_cid, + + .init = virtio_transport_socket_init, + .destruct = virtio_transport_destruct, + .release = virtio_transport_release, + .connect = virtio_transport_connect, + .shutdown = virtio_transport_shutdown, + + .dgram_bind = virtio_transport_dgram_bind, + .dgram_dequeue = virtio_transport_dgram_dequeue, + .dgram_enqueue = virtio_transport_dgram_enqueue, + .dgram_allow = virtio_transport_dgram_allow, + + .stream_dequeue = virtio_transport_stream_dequeue, + .stream_enqueue = virtio_transport_stream_enqueue, + .stream_has_data = virtio_transport_stream_has_data, + .stream_has_space = virtio_transport_stream_has_space, + .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, + .stream_is_active = virtio_transport_stream_is_active, + .stream_allow = virtio_transport_stream_allow, + + .notify_poll_in = virtio_transport_notify_poll_in, + .notify_poll_out = virtio_transport_notify_poll_out, + .notify_recv_init = virtio_transport_notify_recv_init, + .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, + .notify_send_init = virtio_transport_notify_send_init, + .notify_send_pre_block = virtio_transport_notify_send_pre_block, + .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, + + .set_buffer_size = virtio_transport_set_buffer_size, + .set_min_buffer_size = virtio_transport_set_min_buffer_size, + .set_max_buffer_size = virtio_transport_set_max_buffer_size, + .get_buffer_size = virtio_transport_get_buffer_size, + .get_min_buffer_size = virtio_transport_get_min_buffer_size, + .get_max_buffer_size = virtio_transport_get_max_buffer_size, +}; + +static int virtio_vsock_probe(struct virtio_device *vdev) +{ + vq_callback_t *callbacks[] = { + virtio_vsock_ctrl_done, + virtio_vsock_rx_done, + virtio_vsock_tx_done, + }; + const char *names[] = { + "ctrl", + "rx", + "tx", + }; + struct virtio_vsock *vsock = NULL; + u32 guest_cid; + int ret; + + ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); + if (ret) + return ret; + + /* Only one virtio-vsock device per guest is supported */ + if (the_virtio_vsock) { + ret = -EBUSY; + goto out; + } + + vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); + if (!vsock) { + ret = -ENOMEM; + goto out; + } + + vsock->vdev = vdev; + + ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, + vsock->vqs, callbacks, names); + if (ret < 0) + goto out; + + vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid), + &guest_cid, sizeof(guest_cid)); + vsock->guest_cid = le32_to_cpu(guest_cid); + pr_debug("%s:guest_cid=%d\n", __func__, vsock->guest_cid); + + ret = vsock_core_init(&virtio_transport); + if (ret < 0) + goto out_vqs; + + vsock->rx_buf_nr = 0; + vsock->rx_buf_max_nr = 0; + + vdev->priv = the_virtio_vsock = vsock; + init_waitqueue_head(&vsock->queue_wait); + mutex_init(&vsock->tx_lock); + mutex_init(&vsock->rx_lock); + INIT_WORK(&vsock->rx_work, virtio_transport_recv_pkt_work); + INIT_WORK(&vsock->tx_work, virtio_transport_send_pkt_work); + + mutex_lock(&vsock->rx_lock); + virtio_vsock_rx_fill(vsock); + mutex_unlock(&vsock->rx_lock); + + mutex_unlock(&the_virtio_vsock_mutex); + return 0; + +out_vqs: + vsock->vdev->config->del_vqs(vsock->vdev); +out: + kfree(vsock); + mutex_unlock(&the_virtio_vsock_mutex); + return ret; +} + +static void virtio_vsock_remove(struct virtio_device *vdev) +{ + struct virtio_vsock *vsock = vdev->priv; + + mutex_lock(&the_virtio_vsock_mutex); + the_virtio_vsock = NULL; + vsock_core_exit(); + mutex_unlock(&the_virtio_vsock_mutex); + + kfree(vsock); +} + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +static unsigned int features[] = { +}; + +static struct virtio_driver virtio_vsock_driver = { + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = virtio_vsock_probe, + .remove = virtio_vsock_remove, +}; + +static int __init virtio_vsock_init(void) +{ + int ret; + + virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); + if (!virtio_vsock_workqueue) + return -ENOMEM; + ret = register_virtio_driver(&virtio_vsock_driver); + if (ret) + destroy_workqueue(virtio_vsock_workqueue); + return ret; +} + +static void __exit virtio_vsock_exit(void) +{ + unregister_virtio_driver(&virtio_vsock_driver); + destroy_workqueue(virtio_vsock_workqueue); +} + +module_init(virtio_vsock_init); +module_exit(virtio_vsock_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Asias He"); +MODULE_DESCRIPTION("virtio transport for vsock"); +MODULE_DEVICE_TABLE(virtio, id_table); -- cgit v1.1 From 8a2a2029893b4c35d1aba2932111a1a164b9c948 Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 2 Dec 2015 14:44:03 +0800 Subject: VSOCK: Add Makefile and Kconfig Enable virtio-vsock and vhost-vsock. Signed-off-by: Asias He Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/Kconfig | 18 ++++++++++++++++++ net/vmw_vsock/Makefile | 2 ++ 2 files changed, 20 insertions(+) (limited to 'net') diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig index 14810ab..74e0bc8 100644 --- a/net/vmw_vsock/Kconfig +++ b/net/vmw_vsock/Kconfig @@ -26,3 +26,21 @@ config VMWARE_VMCI_VSOCKETS To compile this driver as a module, choose M here: the module will be called vmw_vsock_vmci_transport. If unsure, say N. + +config VIRTIO_VSOCKETS + tristate "virtio transport for Virtual Sockets" + depends on VSOCKETS && VIRTIO + select VIRTIO_VSOCKETS_COMMON + help + This module implements a virtio transport for Virtual Sockets. + + Enable this transport if your Virtual Machine runs on Qemu/KVM. + + To compile this driver as a module, choose M here: the module + will be called virtio_vsock_transport. If unsure, say N. + +config VIRTIO_VSOCKETS_COMMON + tristate + ---help--- + This option is selected by any driver which needs to access + the virtio_vsock. diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile index 2ce52d7..cf4c294 100644 --- a/net/vmw_vsock/Makefile +++ b/net/vmw_vsock/Makefile @@ -1,5 +1,7 @@ obj-$(CONFIG_VSOCKETS) += vsock.o obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o +obj-$(CONFIG_VIRTIO_VSOCKETS) += virtio_transport.o +obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += virtio_transport_common.o vsock-y += af_vsock.o vsock_addr.o -- cgit v1.1 From 681b4d88ad8e5b67c34f4d0a40448efb94e2b227 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 2 Dec 2015 16:27:39 +0100 Subject: pppox: use standard module auto-loading feature * Register PF_PPPOX with pppox module rather than with pppoe, so that pppoe doesn't get loaded for any PF_PPPOX socket. * Register PX_PROTO_* with standard MODULE_ALIAS_NET_PF_PROTO() instead of using pppox's own naming scheme. * While there, add auto-loading feature for pptp. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 1ad18c5..d93f113 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1862,5 +1862,5 @@ MODULE_AUTHOR("James Chapman "); MODULE_DESCRIPTION("PPP over L2TP over UDP"); MODULE_LICENSE("GPL"); MODULE_VERSION(PPPOL2TP_DRV_VERSION); -MODULE_ALIAS("pppox-proto-" __stringify(PX_PROTO_OL2TP)); +MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OL2TP); MODULE_ALIAS_L2TP_PWTYPE(11); -- cgit v1.1 From dc8d1eb305984b1182f5e85de3c3a1f8592b83af Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 2 Dec 2015 15:19:37 -0500 Subject: tipc: fix node reference count bug Commit 5405ff6e15f40f2f ("tipc: convert node lock to rwlock") introduced a bug to the node reference counter handling. When a message is successfully sent in the function tipc_node_xmit(), we return directly after releasing the node lock, instead of continuing and decrementing the node reference counter as we should do. This commit fixes this bug. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/tipc/node.c b/net/tipc/node.c index 3f7a4ed..fa97d96 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1189,20 +1189,19 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, spin_unlock_bh(&le->lock); } tipc_node_read_unlock(n); - if (likely(!skb_queue_empty(&xmitq))) { + if (likely(!rc)) tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); - return 0; - } - if (unlikely(rc == -ENOBUFS)) + else if (rc == -ENOBUFS) tipc_node_link_down(n, bearer_id, false); tipc_node_put(n); return rc; } - if (unlikely(!in_own_node(net, dnode))) - return rc; - tipc_sk_rcv(net, list); - return 0; + if (likely(in_own_node(net, dnode))) { + tipc_sk_rcv(net, list); + return 0; + } + return rc; } /* tipc_node_xmit_skb(): send single buffer to destination -- cgit v1.1 From 3110489117581a980537b6d999a3724214ba772c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Oct 2015 17:35:19 +0200 Subject: mac80211: allow driver to prevent two stations w/ same address Some devices or drivers cannot deal with having the same station address for different virtual interfaces, say as a client to two virtual AP interfaces. Rather than requiring each driver with a limitation like that to enforce it, add a hardware flag for it. Signed-off-by: Johannes Berg --- net/mac80211/debugfs.c | 1 + net/mac80211/sta_info.c | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 4d2aaeb..abbdff0 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -125,6 +125,7 @@ static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = { FLAG(TDLS_WIDER_BW), FLAG(SUPPORTS_AMSDU_IN_AMPDU), FLAG(BEACON_TX_STATUS), + FLAG(NEEDS_UNIQUE_STA_ADDR), /* keep last for the build bug below */ (void *)0x1 diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f91d187..8f630f5 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -435,6 +435,19 @@ static int sta_info_insert_check(struct sta_info *sta) is_multicast_ether_addr(sta->sta.addr))) return -EINVAL; + /* Strictly speaking this isn't necessary as we hold the mutex, but + * the rhashtable code can't really deal with that distinction. We + * do require the mutex for correctness though. + */ + rcu_read_lock(); + lockdep_assert_held(&sdata->local->sta_mtx); + if (ieee80211_hw_check(&sdata->local->hw, NEEDS_UNIQUE_STA_ADDR) && + ieee80211_find_sta_by_ifaddr(&sdata->local->hw, sta->addr, NULL)) { + rcu_read_unlock(); + return -ENOTUNIQ; + } + rcu_read_unlock(); + return 0; } @@ -554,14 +567,15 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) might_sleep(); + mutex_lock(&local->sta_mtx); + err = sta_info_insert_check(sta); if (err) { + mutex_unlock(&local->sta_mtx); rcu_read_lock(); goto out_free; } - mutex_lock(&local->sta_mtx); - err = sta_info_insert_finish(sta); if (err) goto out_free; -- cgit v1.1 From a1056b1baaa887de52a76a5fcf5aeb4327c96c8a Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 22 Oct 2015 22:27:46 +0300 Subject: cfg80211: Add missing tracing to cfg80211 Add missing tracing for: 1. start_radar_detection() 2. set_mcast_rates() 3. set_coalesce() Signed-off-by: Ilan Peer Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 9 ++++---- net/wireless/rdev-ops.h | 43 ++++++++++++++++++++++++++++++++++ net/wireless/trace.h | 61 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c71e274..41e57d0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6507,8 +6507,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (WARN_ON(!cac_time_ms)) cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; - err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef, - cac_time_ms); + err = rdev_start_radar_detection(rdev, dev, &chandef, cac_time_ms); if (!err) { wdev->chandef = chandef; wdev->cac_started = true; @@ -7571,7 +7570,7 @@ static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info) if (!nl80211_parse_mcast_rate(rdev, mcast_rate, nla_rate)) return -EINVAL; - err = rdev->ops->set_mcast_rate(&rdev->wiphy, dev, mcast_rate); + err = rdev_set_mcast_rate(rdev, dev, mcast_rate); return err; } @@ -9716,7 +9715,7 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_COALESCE_RULE]) { cfg80211_rdev_free_coalesce(rdev); - rdev->ops->set_coalesce(&rdev->wiphy, NULL); + rdev_set_coalesce(rdev, NULL); return 0; } @@ -9744,7 +9743,7 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) i++; } - err = rdev->ops->set_coalesce(&rdev->wiphy, &new_coalesce); + err = rdev_set_coalesce(rdev, &new_coalesce); if (err) goto error; diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index c23516d..b8cc594 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1020,4 +1020,47 @@ rdev_tdls_cancel_channel_switch(struct cfg80211_registered_device *rdev, trace_rdev_return_void(&rdev->wiphy); } +static inline int +rdev_start_radar_detection(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_chan_def *chandef, + u32 cac_time_ms) +{ + int ret = -ENOTSUPP; + + trace_rdev_start_radar_detection(&rdev->wiphy, dev, chandef, + cac_time_ms); + if (rdev->ops->start_radar_detection) + ret = rdev->ops->start_radar_detection(&rdev->wiphy, dev, + chandef, cac_time_ms); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline int +rdev_set_mcast_rate(struct cfg80211_registered_device *rdev, + struct net_device *dev, + int mcast_rate[IEEE80211_NUM_BANDS]) +{ + int ret = -ENOTSUPP; + + trace_rdev_set_mcast_rate(&rdev->wiphy, dev, mcast_rate); + if (rdev->ops->set_mcast_rate) + ret = rdev->ops->set_mcast_rate(&rdev->wiphy, dev, mcast_rate); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline int +rdev_set_coalesce(struct cfg80211_registered_device *rdev, + struct cfg80211_coalesce *coalesce) +{ + int ret = -ENOTSUPP; + + trace_rdev_set_coalesce(&rdev->wiphy, coalesce); + if (rdev->ops->set_coalesce) + ret = rdev->ops->set_coalesce(&rdev->wiphy, coalesce); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 0c392d3..62d9b96 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2818,6 +2818,67 @@ TRACE_EVENT(cfg80211_stop_iface, WIPHY_PR_ARG, WDEV_PR_ARG) ); +TRACE_EVENT(rdev_start_radar_detection, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_chan_def *chandef, + u32 cac_time_ms), + TP_ARGS(wiphy, netdev, chandef, cac_time_ms), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + CHAN_DEF_ENTRY + __field(u32, cac_time_ms) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + __entry->cac_time_ms = cac_time_ms; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT + ", cac_time_ms=%u", + WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG, + __entry->cac_time_ms) +); + +TRACE_EVENT(rdev_set_mcast_rate, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + int mcast_rate[IEEE80211_NUM_BANDS]), + TP_ARGS(wiphy, netdev, mcast_rate), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __array(int, mcast_rate, IEEE80211_NUM_BANDS) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + memcpy(__entry->mcast_rate, mcast_rate, + sizeof(int) * IEEE80211_NUM_BANDS); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " + "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 60GHz=0x%x]", + WIPHY_PR_ARG, NETDEV_PR_ARG, + __entry->mcast_rate[IEEE80211_BAND_2GHZ], + __entry->mcast_rate[IEEE80211_BAND_5GHZ], + __entry->mcast_rate[IEEE80211_BAND_60GHZ]) +); + +TRACE_EVENT(rdev_set_coalesce, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_coalesce *coalesce), + TP_ARGS(wiphy, coalesce), + TP_STRUCT__entry( + WIPHY_ENTRY + __field(int, n_rules) + ), + TP_fast_assign( + WIPHY_ASSIGN; + __entry->n_rules = coalesce ? coalesce->n_rules : 0; + ), + TP_printk(WIPHY_PR_FMT ", n_rules=%d", + WIPHY_PR_ARG, __entry->n_rules) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.1 From 6e045905d1786f62cb3f7ddc6c987f7dc3ad8ed6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Oct 2015 22:27:47 +0300 Subject: cfg80211: add complete data to station add/change tracing Complete the tracepoint with the missing data - it's not printed by default (a lot of it is dynamic arrays) but will be recorded and be available during post-processing. Signed-off-by: Johannes Berg --- net/wireless/trace.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'net') diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 62d9b96..5b9139e 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -623,12 +623,24 @@ DECLARE_EVENT_CLASS(station_add_change, __field(u32, sta_flags_set) __field(u32, sta_modify_mask) __field(int, listen_interval) + __field(u16, capability) __field(u16, aid) __field(u8, plink_action) __field(u8, plink_state) __field(u8, uapsd_queues) + __field(u8, max_sp) + __field(u8, opmode_notif) + __field(bool, opmode_notif_used) __array(u8, ht_capa, (int)sizeof(struct ieee80211_ht_cap)) + __array(u8, vht_capa, (int)sizeof(struct ieee80211_vht_cap)) __array(char, vlan, IFNAMSIZ) + __dynamic_array(u8, supported_rates, + params->supported_rates_len) + __dynamic_array(u8, ext_capab, params->ext_capab_len) + __dynamic_array(u8, supported_channels, + params->supported_channels_len) + __dynamic_array(u8, supported_oper_classes, + params->supported_oper_classes_len) ), TP_fast_assign( WIPHY_ASSIGN; @@ -646,9 +658,35 @@ DECLARE_EVENT_CLASS(station_add_change, if (params->ht_capa) memcpy(__entry->ht_capa, params->ht_capa, sizeof(struct ieee80211_ht_cap)); + memset(__entry->vht_capa, 0, sizeof(struct ieee80211_vht_cap)); + if (params->vht_capa) + memcpy(__entry->vht_capa, params->vht_capa, + sizeof(struct ieee80211_vht_cap)); memset(__entry->vlan, 0, sizeof(__entry->vlan)); if (params->vlan) memcpy(__entry->vlan, params->vlan->name, IFNAMSIZ); + if (params->supported_rates && params->supported_rates_len) + memcpy(__get_dynamic_array(supported_rates), + params->supported_rates, + params->supported_rates_len); + if (params->ext_capab && params->ext_capab_len) + memcpy(__get_dynamic_array(ext_capab), + params->ext_capab, + params->ext_capab_len); + if (params->supported_channels && + params->supported_channels_len) + memcpy(__get_dynamic_array(supported_channels), + params->supported_channels, + params->supported_channels_len); + if (params->supported_oper_classes && + params->supported_oper_classes_len) + memcpy(__get_dynamic_array(supported_oper_classes), + params->supported_oper_classes, + params->supported_oper_classes_len); + __entry->max_sp = params->max_sp; + __entry->capability = params->capability; + __entry->opmode_notif = params->opmode_notif; + __entry->opmode_notif_used = params->opmode_notif_used; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: " MAC_PR_FMT ", station flags mask: %u, station flags set: %u, " -- cgit v1.1 From 0483eeac59876ac37d4edbabd48727a468416d5b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 23 Oct 2015 09:50:03 +0200 Subject: cfg80211: replace ieee80211_ie_split() with an inline The function is a very simple wrapper around another one, just adds a few default parameters, so replace it with a static inline instead of using EXPORT_SYMBOL, reducing the module size slightly. Signed-off-by: Johannes Berg --- net/wireless/util.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net') diff --git a/net/wireless/util.c b/net/wireless/util.c index baf7218..010a3c7 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1325,13 +1325,6 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, } EXPORT_SYMBOL(ieee80211_ie_split_ric); -size_t ieee80211_ie_split(const u8 *ies, size_t ielen, - const u8 *ids, int n_ids, size_t offset) -{ - return ieee80211_ie_split_ric(ies, ielen, ids, n_ids, NULL, 0, offset); -} -EXPORT_SYMBOL(ieee80211_ie_split); - bool ieee80211_operating_class_to_band(u8 operating_class, enum ieee80211_band *band) { -- cgit v1.1 From d671b2a077a92ff71ad76fba0e8bfd1b7c5ca820 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Nov 2015 11:30:46 +0100 Subject: mac80211: mesh: print MAC address instead of pointer There's no point in printing the mpath pointer since it can't be used for anything - print the MAC address instead (like in the forwarding case.) Signed-off-by: Johannes Berg Acked-by: Bob Copeland Signed-off-by: Johannes Berg --- net/mac80211/mesh_pathtbl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index b3b44a5..dadf8dc 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -968,8 +968,8 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) copy = true; } else { mpath_dbg(sdata, - "Not forwarding %p (flags %#x)\n", - gate->mpath, gate->mpath->flags); + "Not forwarding to %pM (flags %#x)\n", + gate->mpath->dst, gate->mpath->flags); } } -- cgit v1.1 From 996bf99c71944590e4f56504d5ec99ddd0d85e9c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Nov 2015 12:02:31 +0100 Subject: lib80211: ratelimit key index mismatch This indicates a driver key selection issue, but even then there's no point in printing it all the time, so ratelimit it. Also remove the priv pointer from it -- people debugging will only have a single device anyway and it's useless as anything but a cookie. Signed-off-by: Johannes Berg --- net/wireless/lib80211_crypt_ccmp.c | 4 ++-- net/wireless/lib80211_crypt_tkip.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c index dc0e59e..6beab0c 100644 --- a/net/wireless/lib80211_crypt_ccmp.c +++ b/net/wireless/lib80211_crypt_ccmp.c @@ -311,8 +311,8 @@ static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) } keyidx >>= 6; if (key->key_idx != keyidx) { - printk(KERN_DEBUG "CCMP: RX tkey->key_idx=%d frame " - "keyidx=%d priv=%p\n", key->key_idx, keyidx, priv); + net_dbg_ratelimited("CCMP: RX tkey->key_idx=%d frame keyidx=%d\n", + key->key_idx, keyidx); return -6; } if (!key->key_set) { diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c index 8c90ba7..3cd8195 100644 --- a/net/wireless/lib80211_crypt_tkip.c +++ b/net/wireless/lib80211_crypt_tkip.c @@ -434,8 +434,8 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) } keyidx >>= 6; if (tkey->key_idx != keyidx) { - printk(KERN_DEBUG "TKIP: RX tkey->key_idx=%d frame " - "keyidx=%d priv=%p\n", tkey->key_idx, keyidx, priv); + net_dbg_ratelimited("TKIP: RX tkey->key_idx=%d frame keyidx=%d\n", + tkey->key_idx, keyidx); return -6; } if (!tkey->key_set) { -- cgit v1.1 From 441275e1038a803d61df41eae9a44d26486d8301 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Nov 2015 12:34:24 +0100 Subject: mac80211: remove string from unaligned packet warning This really should never happen except very early in the process of bringing up a new driver, at which point you'll have to add more debugging in the driver and this string isn't useful. Remove it and save some size (when it's even compiled in.) Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8bae5de..1f82753 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -661,8 +661,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) static void ieee80211_verify_alignment(struct ieee80211_rx_data *rx) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - WARN_ONCE((unsigned long)rx->skb->data & 1, - "unaligned packet at 0x%p\n", rx->skb->data); + WARN_ON_ONCE((unsigned long)rx->skb->data & 1); #endif } -- cgit v1.1 From 1b9df2d20eee9f3a675d1a3a7aa3640e6d8d7e94 Mon Sep 17 00:00:00 2001 From: Ola Olsson Date: Mon, 9 Nov 2015 22:02:09 +0100 Subject: cfg80211: ocb: Fix null pointer deref if join_ocb is unimplemented Signed-off-by: Ola Olsson Signed-off-by: Johannes Berg --- net/wireless/ocb.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/wireless/ocb.c b/net/wireless/ocb.c index c00d4a7..e64dbf1 100644 --- a/net/wireless/ocb.c +++ b/net/wireless/ocb.c @@ -29,6 +29,9 @@ int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev, if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_OCB) return -EOPNOTSUPP; + if (!rdev->ops->join_ocb) + return -EOPNOTSUPP; + if (WARN_ON(!setup->chandef.chan)) return -EINVAL; -- cgit v1.1 From 0ead2510f8cec11ce96308d79a1b4ee272fb5238 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 17 Nov 2015 10:24:36 +0200 Subject: mac80211: allow the driver to send EOSP when needed This can happen when the driver needs to send less frames than expected and then needs to close the SP. Mac80211 still needs to set the more_data properly based on its buffer state (ps_tx_buffer and buffered frames on other TIDs). To that end, refactor the code that delivers frames upon uAPSD trigger frames to be able to get only the more_data bit without actually delivering those frames in case the driver is just asking to set a NDP with EOSP and MORE_DATA bit properly set. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 144 +++++++++++++++++++++++++++++++++--------------- net/mac80211/trace.h | 25 +++++++++ 2 files changed, 126 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 8f630f5..723fa30 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2,6 +2,7 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2006-2007 Jiri Benc * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright (C) 2015 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1244,11 +1245,11 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) ieee80211_check_fast_xmit(sta); } -static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, int tid, +static void ieee80211_send_null_response(struct sta_info *sta, int tid, enum ieee80211_frame_release_type reason, - bool call_driver) + bool call_driver, bool more_data) { + struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_qos_hdr *nullfunc; struct sk_buff *skb; @@ -1288,9 +1289,13 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, if (qos) { nullfunc->qos_ctrl = cpu_to_le16(tid); - if (reason == IEEE80211_FRAME_RELEASE_UAPSD) + if (reason == IEEE80211_FRAME_RELEASE_UAPSD) { nullfunc->qos_ctrl |= cpu_to_le16(IEEE80211_QOS_CTL_EOSP); + if (more_data) + nullfunc->frame_control |= + cpu_to_le16(IEEE80211_FCTL_MOREDATA); + } } info = IEEE80211_SKB_CB(skb); @@ -1337,22 +1342,48 @@ static int find_highest_prio_tid(unsigned long tids) return fls(tids) - 1; } +/* Indicates if the MORE_DATA bit should be set in the last + * frame obtained by ieee80211_sta_ps_get_frames. + * Note that driver_release_tids is relevant only if + * reason = IEEE80211_FRAME_RELEASE_PSPOLL + */ +static bool +ieee80211_sta_ps_more_data(struct sta_info *sta, u8 ignored_acs, + enum ieee80211_frame_release_type reason, + unsigned long driver_release_tids) +{ + int ac; + + /* If the driver has data on more than one TID then + * certainly there's more data if we release just a + * single frame now (from a single TID). This will + * only happen for PS-Poll. + */ + if (reason == IEEE80211_FRAME_RELEASE_PSPOLL && + hweight16(driver_release_tids) > 1) + return true; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + if (ignored_acs & BIT(ac)) + continue; + + if (!skb_queue_empty(&sta->tx_filtered[ac]) || + !skb_queue_empty(&sta->ps_tx_buf[ac])) + return true; + } + + return false; +} + static void -ieee80211_sta_ps_deliver_response(struct sta_info *sta, - int n_frames, u8 ignored_acs, - enum ieee80211_frame_release_type reason) +ieee80211_sta_ps_get_frames(struct sta_info *sta, int n_frames, u8 ignored_acs, + enum ieee80211_frame_release_type reason, + struct sk_buff_head *frames, + unsigned long *driver_release_tids) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; - bool more_data = false; int ac; - unsigned long driver_release_tids = 0; - struct sk_buff_head frames; - - /* Service or PS-Poll period starts */ - set_sta_flag(sta, WLAN_STA_SP); - - __skb_queue_head_init(&frames); /* Get response frame(s) and more data bit for the last one. */ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { @@ -1366,26 +1397,13 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, /* if we already have frames from software, then we can't also * release from hardware queues */ - if (skb_queue_empty(&frames)) { - driver_release_tids |= sta->driver_buffered_tids & tids; - driver_release_tids |= sta->txq_buffered_tids & tids; + if (skb_queue_empty(frames)) { + *driver_release_tids |= + sta->driver_buffered_tids & tids; + *driver_release_tids |= sta->txq_buffered_tids & tids; } - if (driver_release_tids) { - /* If the driver has data on more than one TID then - * certainly there's more data if we release just a - * single frame now (from a single TID). This will - * only happen for PS-Poll. - */ - if (reason == IEEE80211_FRAME_RELEASE_PSPOLL && - hweight16(driver_release_tids) > 1) { - more_data = true; - driver_release_tids = - BIT(find_highest_prio_tid( - driver_release_tids)); - break; - } - } else { + if (!*driver_release_tids) { struct sk_buff *skb; while (n_frames > 0) { @@ -1399,20 +1417,44 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, if (!skb) break; n_frames--; - __skb_queue_tail(&frames, skb); + __skb_queue_tail(frames, skb); } } - /* If we have more frames buffered on this AC, then set the - * more-data bit and abort the loop since we can't send more - * data from other ACs before the buffered frames from this. + /* If we have more frames buffered on this AC, then abort the + * loop since we can't send more data from other ACs before + * the buffered frames from this. */ if (!skb_queue_empty(&sta->tx_filtered[ac]) || - !skb_queue_empty(&sta->ps_tx_buf[ac])) { - more_data = true; + !skb_queue_empty(&sta->ps_tx_buf[ac])) break; - } } +} + +static void +ieee80211_sta_ps_deliver_response(struct sta_info *sta, + int n_frames, u8 ignored_acs, + enum ieee80211_frame_release_type reason) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + unsigned long driver_release_tids = 0; + struct sk_buff_head frames; + bool more_data; + + /* Service or PS-Poll period starts */ + set_sta_flag(sta, WLAN_STA_SP); + + __skb_queue_head_init(&frames); + + ieee80211_sta_ps_get_frames(sta, n_frames, ignored_acs, reason, + &frames, &driver_release_tids); + + more_data = ieee80211_sta_ps_more_data(sta, ignored_acs, reason, driver_release_tids); + + if (reason == IEEE80211_FRAME_RELEASE_PSPOLL) + driver_release_tids = + BIT(find_highest_prio_tid(driver_release_tids)); if (skb_queue_empty(&frames) && !driver_release_tids) { int tid; @@ -1435,7 +1477,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, /* This will evaluate to 1, 3, 5 or 7. */ tid = 7 - ((ffs(~ignored_acs) - 1) << 1); - ieee80211_send_null_response(sdata, sta, tid, reason, true); + ieee80211_send_null_response(sta, tid, reason, true, false); } else if (!driver_release_tids) { struct sk_buff_head pending; struct sk_buff *skb; @@ -1535,8 +1577,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, if (need_null) ieee80211_send_null_response( - sdata, sta, find_highest_prio_tid(tids), - reason, false); + sta, find_highest_prio_tid(tids), + reason, false, false); sta_info_recalc_tim(sta); } else { @@ -1674,6 +1716,22 @@ void ieee80211_sta_eosp(struct ieee80211_sta *pubsta) } EXPORT_SYMBOL(ieee80211_sta_eosp); +void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + enum ieee80211_frame_release_type reason; + bool more_data; + + trace_api_send_eosp_nullfunc(sta->local, pubsta, tid); + + reason = IEEE80211_FRAME_RELEASE_UAPSD; + more_data = ieee80211_sta_ps_more_data(sta, ~sta->sta.uapsd_queues, + reason, 0); + + ieee80211_send_null_response(sta, tid, reason, false, more_data); +} +EXPORT_SYMBOL(ieee80211_send_eosp_nullfunc); + void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, u8 tid, bool buffered) { diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 56c6d6c..a6b4442 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2027,6 +2027,31 @@ TRACE_EVENT(api_eosp, ) ); +TRACE_EVENT(api_send_eosp_nullfunc, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sta *sta, + u8 tid), + + TP_ARGS(local, sta, tid), + + TP_STRUCT__entry( + LOCAL_ENTRY + STA_ENTRY + __field(u8, tid) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + STA_ASSIGN; + __entry->tid = tid; + ), + + TP_printk( + LOCAL_PR_FMT STA_PR_FMT " tid:%d", + LOCAL_PR_ARG, STA_PR_ARG, __entry->tid + ) +); + TRACE_EVENT(api_sta_set_buffered, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta, -- cgit v1.1 From ef044763a3ca6b9e0bb65a9ce0cb38c0eca62756 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Tue, 17 Nov 2015 10:24:37 +0200 Subject: mac80211: add atomic uploaded keys iterator add ieee80211_iter_keys_rcu() to iterate over uploaded keys in atomic context (when rcu is locked) The station removal code removes the keys only after calling synchronize_net(), so it's not safe to iterate the keys at this point (and postponing the actual key deletion with call_rcu() might result in some badly-ordered ops calls). Add a flag to indicate a station is being removed, and skip the configured keys if it's set. Signed-off-by: Eliad Peller Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/key.c | 56 +++++++++++++++++++++++++++++++++++++++++++++---- net/mac80211/sta_info.c | 1 + net/mac80211/sta_info.h | 2 ++ 3 files changed, 55 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 44388d6..5e5bc59 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -4,6 +4,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007-2008 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright 2015 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -320,7 +321,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, return; if (new) - list_add_tail(&new->list, &sdata->key_list); + list_add_tail_rcu(&new->list, &sdata->key_list); WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx); @@ -368,7 +369,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, } if (old) - list_del(&old->list); + list_del_rcu(&old->list); } struct ieee80211_key * @@ -592,8 +593,8 @@ static void ieee80211_key_destroy(struct ieee80211_key *key, return; /* - * Synchronize so the TX path can no longer be using - * this key before we free/remove it. + * Synchronize so the TX path and rcu key iterators + * can no longer be using this key before we free/remove it. */ synchronize_net(); @@ -744,6 +745,53 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_iter_keys); +static void +_ieee80211_iter_keys_rcu(struct ieee80211_hw *hw, + struct ieee80211_sub_if_data *sdata, + void (*iter)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key, + void *data), + void *iter_data) +{ + struct ieee80211_key *key; + + list_for_each_entry_rcu(key, &sdata->key_list, list) { + /* skip keys of station in removal process */ + if (key->sta && key->sta->removed) + continue; + if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + continue; + + iter(hw, &sdata->vif, + key->sta ? &key->sta->sta : NULL, + &key->conf, iter_data); + } +} + +void ieee80211_iter_keys_rcu(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + void (*iter)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key, + void *data), + void *iter_data) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; + + if (vif) { + sdata = vif_to_sdata(vif); + _ieee80211_iter_keys_rcu(hw, sdata, iter, iter_data); + } else { + list_for_each_entry_rcu(sdata, &local->interfaces, list) + _ieee80211_iter_keys_rcu(hw, sdata, iter, iter_data); + } +} +EXPORT_SYMBOL(ieee80211_iter_keys_rcu); + static void ieee80211_free_keys_iface(struct ieee80211_sub_if_data *sdata, struct list_head *keys) { diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 723fa30..4402ad5 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -883,6 +883,7 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta) } list_del_rcu(&sta->list); + sta->removed = true; drv_sta_pre_rcu_remove(local, sta->sdata, sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 2cafb21..d605162 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -367,6 +367,7 @@ DECLARE_EWMA(signal, 1024, 8) * @mesh: mesh STA information * @debugfs: debug filesystem info * @dead: set to true when sta is unlinked + * @removed: set to true when sta is being removed from sta_list * @uploaded: set to true when sta is uploaded to the driver * @sta: station information we share with the driver * @sta_state: duplicates information about station state (for debug) @@ -412,6 +413,7 @@ struct sta_info { u16 listen_interval; bool dead; + bool removed; bool uploaded; -- cgit v1.1 From 491728746b500b22f384cb1d0aba76f7c55a9269 Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Mon, 23 Nov 2015 19:27:14 +0100 Subject: cfg80211: reg: Remove unused function parameter Signed-off-by: Michal Sojka Signed-off-by: Johannes Berg --- net/wireless/reg.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2e8d6f3..43b3e57 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1052,7 +1052,7 @@ static u32 map_regdom_flags(u32 rd_flags) } static const struct ieee80211_reg_rule * -freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq, +freq_reg_info_regd(u32 center_freq, const struct ieee80211_regdomain *regd, u32 bw) { int i; @@ -1097,7 +1097,7 @@ __freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 min_bw) u32 bw; for (bw = MHZ_TO_KHZ(20); bw >= min_bw; bw = bw / 2) { - reg_rule = freq_reg_info_regd(wiphy, center_freq, regd, bw); + reg_rule = freq_reg_info_regd(center_freq, regd, bw); if (!IS_ERR(reg_rule)) return reg_rule; } @@ -1765,8 +1765,7 @@ static void handle_channel_custom(struct wiphy *wiphy, u32 bw; for (bw = MHZ_TO_KHZ(20); bw >= MHZ_TO_KHZ(5); bw = bw / 2) { - reg_rule = freq_reg_info_regd(wiphy, - MHZ_TO_KHZ(chan->center_freq), + reg_rule = freq_reg_info_regd(MHZ_TO_KHZ(chan->center_freq), regd, bw); if (!IS_ERR(reg_rule)) break; -- cgit v1.1 From c781944b71f87aa4d30eaaafb4e7573ce94bdcfd Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Mon, 23 Nov 2015 19:27:15 +0100 Subject: cfg80211: Remove unused cfg80211_can_use_iftype_chan() Last caller of this function was removed in 3.17 in commit 97dc94f1d933c9df2c0b327066ea130c0e92083f. Signed-off-by: Michal Sojka Signed-off-by: Johannes Berg --- net/wireless/core.h | 7 ---- net/wireless/util.c | 114 ---------------------------------------------------- 2 files changed, 121 deletions(-) (limited to 'net') diff --git a/net/wireless/core.h b/net/wireless/core.h index a618b4b..022ccad 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -416,13 +416,6 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); void cfg80211_process_wdev_events(struct wireless_dev *wdev); -int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - enum nl80211_iftype iftype, - struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode, - u8 radar_detect); - /** * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable * @wiphy: the wiphy to validate against diff --git a/net/wireless/util.c b/net/wireless/util.c index 010a3c7..9277042 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1613,120 +1613,6 @@ int cfg80211_check_combinations(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_check_combinations); -int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - enum nl80211_iftype iftype, - struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode, - u8 radar_detect) -{ - struct wireless_dev *wdev_iter; - int num[NUM_NL80211_IFTYPES]; - struct ieee80211_channel - *used_channels[CFG80211_MAX_NUM_DIFFERENT_CHANNELS]; - struct ieee80211_channel *ch; - enum cfg80211_chan_mode chmode; - int num_different_channels = 0; - int total = 1; - int i; - - ASSERT_RTNL(); - - if (WARN_ON(hweight32(radar_detect) > 1)) - return -EINVAL; - - if (WARN_ON(iftype >= NUM_NL80211_IFTYPES)) - return -EINVAL; - - /* Always allow software iftypes */ - if (rdev->wiphy.software_iftypes & BIT(iftype)) { - if (radar_detect) - return -EINVAL; - return 0; - } - - memset(num, 0, sizeof(num)); - memset(used_channels, 0, sizeof(used_channels)); - - num[iftype] = 1; - - /* TODO: We'll probably not need this anymore, since this - * should only be called with CHAN_MODE_UNDEFINED. There are - * still a couple of pending calls where other chanmodes are - * used, but we should get rid of them. - */ - switch (chanmode) { - case CHAN_MODE_UNDEFINED: - break; - case CHAN_MODE_SHARED: - WARN_ON(!chan); - used_channels[0] = chan; - num_different_channels++; - break; - case CHAN_MODE_EXCLUSIVE: - num_different_channels++; - break; - } - - list_for_each_entry(wdev_iter, &rdev->wdev_list, list) { - if (wdev_iter == wdev) - continue; - if (wdev_iter->iftype == NL80211_IFTYPE_P2P_DEVICE) { - if (!wdev_iter->p2p_started) - continue; - } else if (wdev_iter->netdev) { - if (!netif_running(wdev_iter->netdev)) - continue; - } else { - WARN_ON(1); - } - - if (rdev->wiphy.software_iftypes & BIT(wdev_iter->iftype)) - continue; - - /* - * We may be holding the "wdev" mutex, but now need to lock - * wdev_iter. This is OK because once we get here wdev_iter - * is not wdev (tested above), but we need to use the nested - * locking for lockdep. - */ - mutex_lock_nested(&wdev_iter->mtx, 1); - __acquire(wdev_iter->mtx); - cfg80211_get_chan_state(wdev_iter, &ch, &chmode, &radar_detect); - wdev_unlock(wdev_iter); - - switch (chmode) { - case CHAN_MODE_UNDEFINED: - break; - case CHAN_MODE_SHARED: - for (i = 0; i < CFG80211_MAX_NUM_DIFFERENT_CHANNELS; i++) - if (!used_channels[i] || used_channels[i] == ch) - break; - - if (i == CFG80211_MAX_NUM_DIFFERENT_CHANNELS) - return -EBUSY; - - if (used_channels[i] == NULL) { - used_channels[i] = ch; - num_different_channels++; - } - break; - case CHAN_MODE_EXCLUSIVE: - num_different_channels++; - break; - } - - num[wdev_iter->iftype]++; - total++; - } - - if (total == 1 && !radar_detect) - return 0; - - return cfg80211_check_combinations(&rdev->wiphy, num_different_channels, - radar_detect, num); -} - int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, const u8 *rates, unsigned int n_rates, u32 *mask) -- cgit v1.1 From b115b972997428b9134aba377721fea6486adbd0 Mon Sep 17 00:00:00 2001 From: "Janusz.Dziedzic@tieto.com" Date: Tue, 27 Oct 2015 08:38:40 +0100 Subject: mac80211: add new IEEE80211_VIF_GET_NOA_UPDATE flag Add new VIF flag, that will allow get NOA update notification when driver will request this, even this is not pure P2P vif (eg. STA vif). Signed-off-by: Janusz Dziedzic Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index b140cc6..123b26d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1930,7 +1930,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, sdata->u.mgd.flags |= IEEE80211_STA_RESET_SIGNAL_AVE; - if (sdata->vif.p2p) { + if (sdata->vif.p2p || + sdata->vif.driver_flags & IEEE80211_VIF_GET_NOA_UPDATE) { const struct cfg80211_bss_ies *ies; rcu_read_lock(); @@ -3458,7 +3459,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } } - if (sdata->vif.p2p) { + if (sdata->vif.p2p || + sdata->vif.driver_flags & IEEE80211_VIF_GET_NOA_UPDATE) { struct ieee80211_p2p_noa_attr noa = {}; int ret; -- cgit v1.1 From 91d3ab46730379e89e1e908c6f62fbcadb3d8f08 Mon Sep 17 00:00:00 2001 From: Vidyullatha Kanchanapally Date: Fri, 30 Oct 2015 19:14:49 +0530 Subject: cfg80211: Add support for aborting an ongoing scan Implement new functionality for aborting an ongoing scan. Add NL80211_CMD_ABORT_SCAN to the nl80211 interface. After aborting the scan, driver shall provide the scan status by calling cfg80211_scan_done(). Reviewed-by: Jouni Malinen Signed-off-by: Vidyullatha Kanchanapally Signed-off-by: Sunil Dutt [change command to take wdev instead of netdev so that it can be used on p2p-device scans] Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 26 ++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 8 ++++++++ net/wireless/trace.h | 4 ++++ 3 files changed, 38 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 41e57d0..67e7b53 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5997,6 +5997,24 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_abort_scan(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + + if (!rdev->ops->abort_scan) + return -EOPNOTSUPP; + + if (rdev->scan_msg) + return 0; + + if (!rdev->scan_req) + return -ENOENT; + + rdev_abort_scan(rdev, wdev); + return 0; +} + static int nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans, struct cfg80211_sched_scan_request *request, @@ -10945,6 +10963,14 @@ static const struct genl_ops nl80211_ops[] = { NL80211_FLAG_NEED_RTNL, }, { + .cmd = NL80211_CMD_ABORT_SCAN, + .doit = nl80211_abort_scan, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { .cmd = NL80211_CMD_GET_SCAN, .policy = nl80211_policy, .dumpit = nl80211_dump_scan, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index b8cc594..8ae0c04 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -427,6 +427,14 @@ static inline int rdev_scan(struct cfg80211_registered_device *rdev, return ret; } +static inline void rdev_abort_scan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + trace_rdev_abort_scan(&rdev->wiphy, wdev); + rdev->ops->abort_scan(&rdev->wiphy, wdev); + trace_rdev_return_void(&rdev->wiphy); +} + static inline int rdev_auth(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_auth_request *req) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 5b9139e..09b242b 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2917,6 +2917,10 @@ TRACE_EVENT(rdev_set_coalesce, WIPHY_PR_ARG, __entry->n_rules) ); +DEFINE_EVENT(wiphy_wdev_evt, rdev_abort_scan, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), + TP_ARGS(wiphy, wdev) +); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.1 From 91f123f20d64c99db0ce8d2bbc5bb82012d3cc1a Mon Sep 17 00:00:00 2001 From: Vidyullatha Kanchanapally Date: Fri, 30 Oct 2015 19:14:50 +0530 Subject: mac80211: Add support for aborting an ongoing scan This commit adds implementation for abort scan in mac80211. Reviewed-by: Jouni Malinen Signed-off-by: Vidyullatha Kanchanapally Signed-off-by: Sunil Dutt [adjust to wdev change in previous patch and clean up code a bit] Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index da471ee..763f2eb 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1994,6 +1994,11 @@ static int ieee80211_scan(struct wiphy *wiphy, return ieee80211_request_scan(sdata, req); } +static void ieee80211_abort_scan(struct wiphy *wiphy, struct wireless_dev *wdev) +{ + ieee80211_scan_cancel(wiphy_priv(wiphy)); +} + static int ieee80211_sched_scan_start(struct wiphy *wiphy, struct net_device *dev, @@ -3842,6 +3847,7 @@ const struct cfg80211_ops mac80211_config_ops = { .suspend = ieee80211_suspend, .resume = ieee80211_resume, .scan = ieee80211_scan, + .abort_scan = ieee80211_abort_scan, .sched_scan_start = ieee80211_sched_scan_start, .sched_scan_stop = ieee80211_sched_scan_stop, .auth = ieee80211_auth, -- cgit v1.1 From a9bc31e418733e4c476f4322c90b7c09aab31002 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Thu, 26 Nov 2015 16:26:12 +0100 Subject: cfg80211: use NL80211_ATTR_STA_AID in nl82011_set_station Fix nl80211_set_station() to use the value of NL80211_ATTR_STA_AID attribute instead of NL80211_ATTR_PEER_AID attribute. Signed-off-by: Ayala Beker Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 67e7b53..f4afa99 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4256,8 +4256,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) * station. Include these parameters here and will check them in * cfg80211_check_station_change(). */ - if (info->attrs[NL80211_ATTR_PEER_AID]) - params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); + if (info->attrs[NL80211_ATTR_STA_AID]) + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) params.listen_interval = -- cgit v1.1 From bda95eb1d1581cfd79e9717ebda4b7ccd2265351 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Nov 2015 16:26:13 +0100 Subject: cfg80211: handle add_station auth/assoc flag quirks When a new station is added to AP/GO interfaces the default behaviour is for it to be added authenticated and associated, due to backwards compatibility. To prevent that, the driver must be able to do that (setting the NL80211_FEATURE_FULL_AP_CLIENT_STATE feature flag) and userspace must set the flag mask to auth|assoc and clear the set. Handle this quirk in the API entirely in nl80211, and always push the full flags to the drivers. NL80211_FEATURE_FULL_AP_CLIENT_STATE is still required for userspace to be allowed to set the mask including those bits, but after checking that add both flags to the mask and set in case userspace didn't set them otherwise. This obsoletes the mac80211 code handling this difference, no other driver is currently using these flags. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 10 ---------- net/wireless/nl80211.c | 23 +++++++++++++++++++---- 2 files changed, 19 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 763f2eb..1df92fe 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1216,16 +1216,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (!sta) return -ENOMEM; - /* - * defaults -- if userspace wants something else we'll - * change it accordingly in sta_apply_parameters() - */ - if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) && - !(params->sta_flags_set & (BIT(NL80211_STA_FLAG_AUTHENTICATED) | - BIT(NL80211_STA_FLAG_ASSOCIATED)))) { - sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); - sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); - } if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) sta->sta.tdls = true; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f4afa99..72de698 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4359,6 +4359,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) struct net_device *dev = info->user_ptr[1]; struct station_parameters params; u8 *mac_addr = NULL; + u32 auth_assoc = BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED); memset(¶ms, 0, sizeof(params)); @@ -4470,10 +4472,23 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* allow authenticated/associated only if driver handles it */ if (!(rdev->wiphy.features & NL80211_FEATURE_FULL_AP_CLIENT_STATE) && - params.sta_flags_mask & - (BIT(NL80211_STA_FLAG_AUTHENTICATED) | - BIT(NL80211_STA_FLAG_ASSOCIATED))) - return -EINVAL; + params.sta_flags_mask & auth_assoc) + return -EINVAL; + + /* Older userspace, or userspace wanting to be compatible with + * !NL80211_FEATURE_FULL_AP_CLIENT_STATE, will not set the auth + * and assoc flags in the mask, but assumes the station will be + * added as associated anyway since this was the required driver + * behaviour before NL80211_FEATURE_FULL_AP_CLIENT_STATE was + * introduced. + * In order to not bother drivers with this quirk in the API + * set the flags in both the mask and set for new stations in + * this case. + */ + if (!(params.sta_flags_mask & auth_assoc)) { + params.sta_flags_mask |= auth_assoc; + params.sta_flags_set |= auth_assoc; + } /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); -- cgit v1.1 From 90f9ba9b89d88072324251da011b2a59992ad3e1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Nov 2015 16:26:14 +0100 Subject: Revert "mac80211: don't advertise NL80211_FEATURE_FULL_AP_CLIENT_STATE" This reverts commit 45bb780a2147b9995f3d288c44ecb87ca8a330e2, the previous two patches fixed the functionality. Signed-off-by: Johannes Berg --- net/mac80211/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 175ffcf..858f6b1 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -541,7 +541,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, NL80211_FEATURE_HT_IBSS | NL80211_FEATURE_VIF_TXPOWER | NL80211_FEATURE_MAC_ON_CREATE | - NL80211_FEATURE_USERSPACE_MPM; + NL80211_FEATURE_USERSPACE_MPM | + NL80211_FEATURE_FULL_AP_CLIENT_STATE; if (!ops->hw_scan) wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | -- cgit v1.1 From 86c7ec9eb154020797c39e1cc7dafa92da02f603 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Nov 2015 15:38:43 +0100 Subject: mac80211: properly free skb when r-o-c for TX fails When freeing the TX skb for an off-channel TX, use the correct API to also free the ACK skb that might have been allocated. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 1df92fe..6bcdbab 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3474,7 +3474,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, params->wait, cookie, skb, IEEE80211_ROC_TYPE_MGMT_TX); if (ret) - kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); out_unlock: mutex_unlock(&local->mtx); return ret; -- cgit v1.1 From 63b4d8b3736b83126ea531c536eff9f76e4cd739 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Nov 2015 15:41:50 +0100 Subject: mac80211: properly free TX skbs when monitor TX fails We need to free all skbs here, not just the one we peeked from the list. Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index bdc224d..3311ce0 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1431,7 +1431,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local, info->hw_queue = vif->hw_queue[skb_get_queue_mapping(skb)]; } else if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) { - dev_kfree_skb(skb); + ieee80211_purge_tx_queue(&local->hw, skbs); return true; } else vif = NULL; -- cgit v1.1 From 856142cdaa483099f50cac70a16898ead8e4094d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Nov 2015 15:29:53 +0100 Subject: mac80211: catch queue stop underflow If some code stops the queues more times than having started (for when refcounting is used), warn on and reset the counter to 0 to avoid blocking forever. Signed-off-by: Johannes Berg --- net/mac80211/util.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 7405802..08af2b3 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -288,10 +288,13 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, if (!test_bit(reason, &local->queue_stop_reasons[queue])) return; - if (!refcounted) + if (!refcounted) { local->q_stop_reasons[queue][reason] = 0; - else + } else { local->q_stop_reasons[queue][reason]--; + if (WARN_ON(local->q_stop_reasons[queue][reason] < 0)) + local->q_stop_reasons[queue][reason] = 0; + } if (local->q_stop_reasons[queue][reason] == 0) __clear_bit(reason, &local->queue_stop_reasons[queue]); -- cgit v1.1 From e673a65952b4ab045a3e3eb200fdf408004fb4fd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Nov 2015 20:28:27 +0100 Subject: mac80211: fix mgmt-tx abort cookie and leak If a mgmt-tx operation is aborted before it runs, the wrong cookie is reported back to userspace, and the ack_skb gets leaked since the frame is freed directly instead of freeing it using ieee80211_free_txskb(). Fix that. Fixes: 3b79af973cf4 ("mac80211: stop using pointers as userspace cookies") Signed-off-by: Johannes Berg --- net/mac80211/offchannel.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 0440103..0fe9f74 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -308,11 +308,10 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free) /* was never transmitted */ if (roc->frame) { - cfg80211_mgmt_tx_status(&roc->sdata->wdev, - (unsigned long)roc->frame, + cfg80211_mgmt_tx_status(&roc->sdata->wdev, roc->mgmt_tx_cookie, roc->frame->data, roc->frame->len, false, GFP_KERNEL); - kfree_skb(roc->frame); + ieee80211_free_txskb(&roc->sdata->local->hw, roc->frame); } if (!roc->mgmt_tx_cookie) -- cgit v1.1 From a2fcfccbad43e413de7e7ac39879ba91548f06c1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Nov 2015 17:18:35 +0100 Subject: mac80211: move off-channel/mgmt-tx code to offchannel.c This is quite a bit of code that logically depends here since it has to deal with all the remain-on-channel logic. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 504 ++------------------------------------------- net/mac80211/ieee80211_i.h | 19 +- net/mac80211/offchannel.c | 474 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 502 insertions(+), 495 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 6bcdbab..b8ef33e6 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2504,294 +2504,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, return 0; } -static bool ieee80211_coalesce_started_roc(struct ieee80211_local *local, - struct ieee80211_roc_work *new_roc, - struct ieee80211_roc_work *cur_roc) -{ - unsigned long now = jiffies; - unsigned long remaining = cur_roc->hw_start_time + - msecs_to_jiffies(cur_roc->duration) - - now; - - if (WARN_ON(!cur_roc->started || !cur_roc->hw_begun)) - return false; - - /* if it doesn't fit entirely, schedule a new one */ - if (new_roc->duration > jiffies_to_msecs(remaining)) - return false; - - ieee80211_handle_roc_started(new_roc); - - /* add to dependents so we send the expired event properly */ - list_add_tail(&new_roc->list, &cur_roc->dependents); - return true; -} - -static u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local) -{ - lockdep_assert_held(&local->mtx); - - local->roc_cookie_counter++; - - /* wow, you wrapped 64 bits ... more likely a bug */ - if (WARN_ON(local->roc_cookie_counter == 0)) - local->roc_cookie_counter++; - - return local->roc_cookie_counter; -} - -static int ieee80211_start_roc_work(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel *channel, - unsigned int duration, u64 *cookie, - struct sk_buff *txskb, - enum ieee80211_roc_type type) -{ - struct ieee80211_roc_work *roc, *tmp; - bool queued = false; - int ret; - - lockdep_assert_held(&local->mtx); - - if (local->use_chanctx && !local->ops->remain_on_channel) - return -EOPNOTSUPP; - - roc = kzalloc(sizeof(*roc), GFP_KERNEL); - if (!roc) - return -ENOMEM; - - /* - * If the duration is zero, then the driver - * wouldn't actually do anything. Set it to - * 10 for now. - * - * TODO: cancel the off-channel operation - * when we get the SKB's TX status and - * the wait time was zero before. - */ - if (!duration) - duration = 10; - - roc->chan = channel; - roc->duration = duration; - roc->req_duration = duration; - roc->frame = txskb; - roc->type = type; - roc->sdata = sdata; - INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); - INIT_LIST_HEAD(&roc->dependents); - - /* - * cookie is either the roc cookie (for normal roc) - * or the SKB (for mgmt TX) - */ - if (!txskb) { - roc->cookie = ieee80211_mgmt_tx_cookie(local); - *cookie = roc->cookie; - } else { - roc->mgmt_tx_cookie = *cookie; - } - - /* if there's one pending or we're scanning, queue this one */ - if (!list_empty(&local->roc_list) || - local->scanning || ieee80211_is_radar_required(local)) - goto out_check_combine; - - /* if not HW assist, just queue & schedule work */ - if (!local->ops->remain_on_channel) { - ieee80211_queue_delayed_work(&local->hw, &roc->work, 0); - goto out_queue; - } - - /* otherwise actually kick it off here (for error handling) */ - - ret = drv_remain_on_channel(local, sdata, channel, duration, type); - if (ret) { - kfree(roc); - return ret; - } - - roc->started = true; - goto out_queue; - - out_check_combine: - list_for_each_entry(tmp, &local->roc_list, list) { - if (tmp->chan != channel || tmp->sdata != sdata) - continue; - - /* - * Extend this ROC if possible: - * - * If it hasn't started yet, just increase the duration - * and add the new one to the list of dependents. - * If the type of the new ROC has higher priority, modify the - * type of the previous one to match that of the new one. - */ - if (!tmp->started) { - list_add_tail(&roc->list, &tmp->dependents); - tmp->duration = max(tmp->duration, roc->duration); - tmp->type = max(tmp->type, roc->type); - queued = true; - break; - } - - /* If it has already started, it's more difficult ... */ - if (local->ops->remain_on_channel) { - /* - * In the offloaded ROC case, if it hasn't begun, add - * this new one to the dependent list to be handled - * when the master one begins. If it has begun, - * check if it fits entirely within the existing one, - * in which case it will just be dependent as well. - * Otherwise, schedule it by itself. - */ - if (!tmp->hw_begun) { - list_add_tail(&roc->list, &tmp->dependents); - queued = true; - break; - } - - if (ieee80211_coalesce_started_roc(local, roc, tmp)) - queued = true; - } else if (del_timer_sync(&tmp->work.timer)) { - unsigned long new_end; - - /* - * In the software ROC case, cancel the timer, if - * that fails then the finish work is already - * queued/pending and thus we queue the new ROC - * normally, if that succeeds then we can extend - * the timer duration and TX the frame (if any.) - */ - - list_add_tail(&roc->list, &tmp->dependents); - queued = true; - - new_end = jiffies + msecs_to_jiffies(roc->duration); - - /* ok, it was started & we canceled timer */ - if (time_after(new_end, tmp->work.timer.expires)) - mod_timer(&tmp->work.timer, new_end); - else - add_timer(&tmp->work.timer); - - ieee80211_handle_roc_started(roc); - } - break; - } - - out_queue: - if (!queued) - list_add_tail(&roc->list, &local->roc_list); - - return 0; -} - -static int ieee80211_remain_on_channel(struct wiphy *wiphy, - struct wireless_dev *wdev, - struct ieee80211_channel *chan, - unsigned int duration, - u64 *cookie) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); - struct ieee80211_local *local = sdata->local; - int ret; - - mutex_lock(&local->mtx); - ret = ieee80211_start_roc_work(local, sdata, chan, - duration, cookie, NULL, - IEEE80211_ROC_TYPE_NORMAL); - mutex_unlock(&local->mtx); - - return ret; -} - -static int ieee80211_cancel_roc(struct ieee80211_local *local, - u64 cookie, bool mgmt_tx) -{ - struct ieee80211_roc_work *roc, *tmp, *found = NULL; - int ret; - - mutex_lock(&local->mtx); - list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { - struct ieee80211_roc_work *dep, *tmp2; - - list_for_each_entry_safe(dep, tmp2, &roc->dependents, list) { - if (!mgmt_tx && dep->cookie != cookie) - continue; - else if (mgmt_tx && dep->mgmt_tx_cookie != cookie) - continue; - /* found dependent item -- just remove it */ - list_del(&dep->list); - mutex_unlock(&local->mtx); - - ieee80211_roc_notify_destroy(dep, true); - return 0; - } - - if (!mgmt_tx && roc->cookie != cookie) - continue; - else if (mgmt_tx && roc->mgmt_tx_cookie != cookie) - continue; - - found = roc; - break; - } - - if (!found) { - mutex_unlock(&local->mtx); - return -ENOENT; - } - - /* - * We found the item to cancel, so do that. Note that it - * may have dependents, which we also cancel (and send - * the expired signal for.) Not doing so would be quite - * tricky here, but we may need to fix it later. - */ - - if (local->ops->remain_on_channel) { - if (found->started) { - ret = drv_cancel_remain_on_channel(local); - if (WARN_ON_ONCE(ret)) { - mutex_unlock(&local->mtx); - return ret; - } - } - - list_del(&found->list); - - if (found->started) - ieee80211_start_next_roc(local); - mutex_unlock(&local->mtx); - - ieee80211_roc_notify_destroy(found, true); - } else { - /* work may be pending so use it all the time */ - found->abort = true; - ieee80211_queue_delayed_work(&local->hw, &found->work, 0); - - mutex_unlock(&local->mtx); - - /* work will clean up etc */ - flush_delayed_work(&found->work); - WARN_ON(!found->to_be_freed); - kfree(found); - } - - return 0; -} - -static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, - struct wireless_dev *wdev, - u64 cookie) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); - struct ieee80211_local *local = sdata->local; - - return ieee80211_cancel_roc(local, cookie, false); -} - static int ieee80211_start_radar_detection(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_chan_def *chandef, @@ -3262,9 +2974,22 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, return err; } -static struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, - struct sk_buff *skb, u64 *cookie, - gfp_t gfp) +u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local) +{ + lockdep_assert_held(&local->mtx); + + local->roc_cookie_counter++; + + /* wow, you wrapped 64 bits ... more likely a bug */ + if (WARN_ON(local->roc_cookie_counter == 0)) + local->roc_cookie_counter++; + + return local->roc_cookie_counter; +} + +struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, + struct sk_buff *skb, u64 *cookie, + gfp_t gfp) { unsigned long spin_flags; struct sk_buff *ack_skb; @@ -3292,203 +3017,6 @@ static struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, return ack_skb; } -static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, - struct cfg80211_mgmt_tx_params *params, - u64 *cookie) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); - struct ieee80211_local *local = sdata->local; - struct sk_buff *skb, *ack_skb; - struct sta_info *sta; - const struct ieee80211_mgmt *mgmt = (void *)params->buf; - bool need_offchan = false; - u32 flags; - int ret; - u8 *data; - - if (params->dont_wait_for_ack) - flags = IEEE80211_TX_CTL_NO_ACK; - else - flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX | - IEEE80211_TX_CTL_REQ_TX_STATUS; - - if (params->no_cck) - flags |= IEEE80211_TX_CTL_NO_CCK_RATE; - - switch (sdata->vif.type) { - case NL80211_IFTYPE_ADHOC: - if (!sdata->vif.bss_conf.ibss_joined) - need_offchan = true; - /* fall through */ -#ifdef CONFIG_MAC80211_MESH - case NL80211_IFTYPE_MESH_POINT: - if (ieee80211_vif_is_mesh(&sdata->vif) && - !sdata->u.mesh.mesh_id_len) - need_offchan = true; - /* fall through */ -#endif - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_P2P_GO: - if (sdata->vif.type != NL80211_IFTYPE_ADHOC && - !ieee80211_vif_is_mesh(&sdata->vif) && - !rcu_access_pointer(sdata->bss->beacon)) - need_offchan = true; - if (!ieee80211_is_action(mgmt->frame_control) || - mgmt->u.action.category == WLAN_CATEGORY_PUBLIC || - mgmt->u.action.category == WLAN_CATEGORY_SELF_PROTECTED || - mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) - break; - rcu_read_lock(); - sta = sta_info_get(sdata, mgmt->da); - rcu_read_unlock(); - if (!sta) - return -ENOLINK; - break; - case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_P2P_CLIENT: - sdata_lock(sdata); - if (!sdata->u.mgd.associated || - (params->offchan && params->wait && - local->ops->remain_on_channel && - memcmp(sdata->u.mgd.associated->bssid, - mgmt->bssid, ETH_ALEN))) - need_offchan = true; - sdata_unlock(sdata); - break; - case NL80211_IFTYPE_P2P_DEVICE: - need_offchan = true; - break; - default: - return -EOPNOTSUPP; - } - - /* configurations requiring offchan cannot work if no channel has been - * specified - */ - if (need_offchan && !params->chan) - return -EINVAL; - - mutex_lock(&local->mtx); - - /* Check if the operating channel is the requested channel */ - if (!need_offchan) { - struct ieee80211_chanctx_conf *chanctx_conf; - - rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - - if (chanctx_conf) { - need_offchan = params->chan && - (params->chan != - chanctx_conf->def.chan); - } else if (!params->chan) { - ret = -EINVAL; - rcu_read_unlock(); - goto out_unlock; - } else { - need_offchan = true; - } - rcu_read_unlock(); - } - - if (need_offchan && !params->offchan) { - ret = -EBUSY; - goto out_unlock; - } - - skb = dev_alloc_skb(local->hw.extra_tx_headroom + params->len); - if (!skb) { - ret = -ENOMEM; - goto out_unlock; - } - skb_reserve(skb, local->hw.extra_tx_headroom); - - data = skb_put(skb, params->len); - memcpy(data, params->buf, params->len); - - /* Update CSA counters */ - if (sdata->vif.csa_active && - (sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_MESH_POINT || - sdata->vif.type == NL80211_IFTYPE_ADHOC) && - params->n_csa_offsets) { - int i; - struct beacon_data *beacon = NULL; - - rcu_read_lock(); - - if (sdata->vif.type == NL80211_IFTYPE_AP) - beacon = rcu_dereference(sdata->u.ap.beacon); - else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) - beacon = rcu_dereference(sdata->u.ibss.presp); - else if (ieee80211_vif_is_mesh(&sdata->vif)) - beacon = rcu_dereference(sdata->u.mesh.beacon); - - if (beacon) - for (i = 0; i < params->n_csa_offsets; i++) - data[params->csa_offsets[i]] = - beacon->csa_current_counter; - - rcu_read_unlock(); - } - - IEEE80211_SKB_CB(skb)->flags = flags; - - skb->dev = sdata->dev; - - if (!params->dont_wait_for_ack) { - /* make a copy to preserve the frame contents - * in case of encryption. - */ - ack_skb = ieee80211_make_ack_skb(local, skb, cookie, - GFP_KERNEL); - if (IS_ERR(ack_skb)) { - ret = PTR_ERR(ack_skb); - kfree_skb(skb); - goto out_unlock; - } - } else { - /* Assign a dummy non-zero cookie, it's not sent to - * userspace in this case but we rely on its value - * internally in the need_offchan case to distinguish - * mgmt-tx from remain-on-channel. - */ - *cookie = 0xffffffff; - } - - if (!need_offchan) { - ieee80211_tx_skb(sdata, skb); - ret = 0; - goto out_unlock; - } - - IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN | - IEEE80211_TX_INTFL_OFFCHAN_TX_OK; - if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) - IEEE80211_SKB_CB(skb)->hw_queue = - local->hw.offchannel_tx_hw_queue; - - /* This will handle all kinds of coalescing and immediate TX */ - ret = ieee80211_start_roc_work(local, sdata, params->chan, - params->wait, cookie, skb, - IEEE80211_ROC_TYPE_MGMT_TX); - if (ret) - ieee80211_free_txskb(&local->hw, skb); - out_unlock: - mutex_unlock(&local->mtx); - return ret; -} - -static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, - struct wireless_dev *wdev, - u64 cookie) -{ - struct ieee80211_local *local = wiphy_priv(wiphy); - - return ieee80211_cancel_roc(local, cookie, true); -} - static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, struct wireless_dev *wdev, u16 frame_type, bool reg) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d832bd5..b03d5410 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1483,6 +1483,11 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, void ieee80211_configure_filter(struct ieee80211_local *local); u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); +u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local); +struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, + struct sk_buff *skb, u64 *cookie, + gfp_t gfp); + /* STA code */ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, @@ -1577,16 +1582,22 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_local *local); void ieee80211_sched_scan_end(struct ieee80211_local *local); void ieee80211_sched_scan_stopped_work(struct work_struct *work); -/* off-channel helpers */ +/* off-channel/mgmt-tx */ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local); void ieee80211_offchannel_return(struct ieee80211_local *local); void ieee80211_roc_setup(struct ieee80211_local *local); void ieee80211_start_next_roc(struct ieee80211_local *local); void ieee80211_roc_purge(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); -void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free); -void ieee80211_sw_roc_work(struct work_struct *work); -void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); +int ieee80211_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev, + struct ieee80211_channel *chan, + unsigned int duration, u64 *cookie); +int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, + struct wireless_dev *wdev, u64 cookie); +int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_mgmt_tx_params *params, u64 *cookie); +int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, + struct wireless_dev *wdev, u64 cookie); /* channel switch handling */ void ieee80211_csa_finalize_work(struct work_struct *work); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 0fe9f74..b737437 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -187,7 +187,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local) false); } -void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) +static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) { if (roc->notified) return; @@ -299,7 +299,8 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) } } -void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free) +static void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, + bool free) { struct ieee80211_roc_work *dep, *tmp; @@ -328,7 +329,7 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free) roc->to_be_freed = true; } -void ieee80211_sw_roc_work(struct work_struct *work) +static void ieee80211_sw_roc_work(struct work_struct *work) { struct ieee80211_roc_work *roc = container_of(work, struct ieee80211_roc_work, work.work); @@ -455,6 +456,473 @@ void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw) } EXPORT_SYMBOL_GPL(ieee80211_remain_on_channel_expired); +static bool ieee80211_coalesce_started_roc(struct ieee80211_local *local, + struct ieee80211_roc_work *new_roc, + struct ieee80211_roc_work *cur_roc) +{ + unsigned long now = jiffies; + unsigned long remaining = cur_roc->hw_start_time + + msecs_to_jiffies(cur_roc->duration) - + now; + + if (WARN_ON(!cur_roc->started || !cur_roc->hw_begun)) + return false; + + /* if it doesn't fit entirely, schedule a new one */ + if (new_roc->duration > jiffies_to_msecs(remaining)) + return false; + + ieee80211_handle_roc_started(new_roc); + + /* add to dependents so we send the expired event properly */ + list_add_tail(&new_roc->list, &cur_roc->dependents); + return true; +} + +static int ieee80211_start_roc_work(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_channel *channel, + unsigned int duration, u64 *cookie, + struct sk_buff *txskb, + enum ieee80211_roc_type type) +{ + struct ieee80211_roc_work *roc, *tmp; + bool queued = false; + int ret; + + lockdep_assert_held(&local->mtx); + + if (local->use_chanctx && !local->ops->remain_on_channel) + return -EOPNOTSUPP; + + roc = kzalloc(sizeof(*roc), GFP_KERNEL); + if (!roc) + return -ENOMEM; + + /* + * If the duration is zero, then the driver + * wouldn't actually do anything. Set it to + * 10 for now. + * + * TODO: cancel the off-channel operation + * when we get the SKB's TX status and + * the wait time was zero before. + */ + if (!duration) + duration = 10; + + roc->chan = channel; + roc->duration = duration; + roc->req_duration = duration; + roc->frame = txskb; + roc->type = type; + roc->sdata = sdata; + INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); + INIT_LIST_HEAD(&roc->dependents); + + /* + * cookie is either the roc cookie (for normal roc) + * or the SKB (for mgmt TX) + */ + if (!txskb) { + roc->cookie = ieee80211_mgmt_tx_cookie(local); + *cookie = roc->cookie; + } else { + roc->mgmt_tx_cookie = *cookie; + } + + /* if there's one pending or we're scanning, queue this one */ + if (!list_empty(&local->roc_list) || + local->scanning || ieee80211_is_radar_required(local)) + goto out_check_combine; + + /* if not HW assist, just queue & schedule work */ + if (!local->ops->remain_on_channel) { + ieee80211_queue_delayed_work(&local->hw, &roc->work, 0); + goto out_queue; + } + + /* otherwise actually kick it off here (for error handling) */ + + ret = drv_remain_on_channel(local, sdata, channel, duration, type); + if (ret) { + kfree(roc); + return ret; + } + + roc->started = true; + goto out_queue; + + out_check_combine: + list_for_each_entry(tmp, &local->roc_list, list) { + if (tmp->chan != channel || tmp->sdata != sdata) + continue; + + /* + * Extend this ROC if possible: + * + * If it hasn't started yet, just increase the duration + * and add the new one to the list of dependents. + * If the type of the new ROC has higher priority, modify the + * type of the previous one to match that of the new one. + */ + if (!tmp->started) { + list_add_tail(&roc->list, &tmp->dependents); + tmp->duration = max(tmp->duration, roc->duration); + tmp->type = max(tmp->type, roc->type); + queued = true; + break; + } + + /* If it has already started, it's more difficult ... */ + if (local->ops->remain_on_channel) { + /* + * In the offloaded ROC case, if it hasn't begun, add + * this new one to the dependent list to be handled + * when the master one begins. If it has begun, + * check if it fits entirely within the existing one, + * in which case it will just be dependent as well. + * Otherwise, schedule it by itself. + */ + if (!tmp->hw_begun) { + list_add_tail(&roc->list, &tmp->dependents); + queued = true; + break; + } + + if (ieee80211_coalesce_started_roc(local, roc, tmp)) + queued = true; + } else if (del_timer_sync(&tmp->work.timer)) { + unsigned long new_end; + + /* + * In the software ROC case, cancel the timer, if + * that fails then the finish work is already + * queued/pending and thus we queue the new ROC + * normally, if that succeeds then we can extend + * the timer duration and TX the frame (if any.) + */ + + list_add_tail(&roc->list, &tmp->dependents); + queued = true; + + new_end = jiffies + msecs_to_jiffies(roc->duration); + + /* ok, it was started & we canceled timer */ + if (time_after(new_end, tmp->work.timer.expires)) + mod_timer(&tmp->work.timer, new_end); + else + add_timer(&tmp->work.timer); + + ieee80211_handle_roc_started(roc); + } + break; + } + + out_queue: + if (!queued) + list_add_tail(&roc->list, &local->roc_list); + + return 0; +} + +int ieee80211_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev, + struct ieee80211_channel *chan, + unsigned int duration, u64 *cookie) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct ieee80211_local *local = sdata->local; + int ret; + + mutex_lock(&local->mtx); + ret = ieee80211_start_roc_work(local, sdata, chan, + duration, cookie, NULL, + IEEE80211_ROC_TYPE_NORMAL); + mutex_unlock(&local->mtx); + + return ret; +} + +static int ieee80211_cancel_roc(struct ieee80211_local *local, + u64 cookie, bool mgmt_tx) +{ + struct ieee80211_roc_work *roc, *tmp, *found = NULL; + int ret; + + mutex_lock(&local->mtx); + list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { + struct ieee80211_roc_work *dep, *tmp2; + + list_for_each_entry_safe(dep, tmp2, &roc->dependents, list) { + if (!mgmt_tx && dep->cookie != cookie) + continue; + else if (mgmt_tx && dep->mgmt_tx_cookie != cookie) + continue; + /* found dependent item -- just remove it */ + list_del(&dep->list); + mutex_unlock(&local->mtx); + + ieee80211_roc_notify_destroy(dep, true); + return 0; + } + + if (!mgmt_tx && roc->cookie != cookie) + continue; + else if (mgmt_tx && roc->mgmt_tx_cookie != cookie) + continue; + + found = roc; + break; + } + + if (!found) { + mutex_unlock(&local->mtx); + return -ENOENT; + } + + /* + * We found the item to cancel, so do that. Note that it + * may have dependents, which we also cancel (and send + * the expired signal for.) Not doing so would be quite + * tricky here, but we may need to fix it later. + */ + + if (local->ops->remain_on_channel) { + if (found->started) { + ret = drv_cancel_remain_on_channel(local); + if (WARN_ON_ONCE(ret)) { + mutex_unlock(&local->mtx); + return ret; + } + } + + list_del(&found->list); + + if (found->started) + ieee80211_start_next_roc(local); + mutex_unlock(&local->mtx); + + ieee80211_roc_notify_destroy(found, true); + } else { + /* work may be pending so use it all the time */ + found->abort = true; + ieee80211_queue_delayed_work(&local->hw, &found->work, 0); + + mutex_unlock(&local->mtx); + + /* work will clean up etc */ + flush_delayed_work(&found->work); + WARN_ON(!found->to_be_freed); + kfree(found); + } + + return 0; +} + +int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, + struct wireless_dev *wdev, u64 cookie) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct ieee80211_local *local = sdata->local; + + return ieee80211_cancel_roc(local, cookie, false); +} + +int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_mgmt_tx_params *params, u64 *cookie) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb, *ack_skb; + struct sta_info *sta; + const struct ieee80211_mgmt *mgmt = (void *)params->buf; + bool need_offchan = false; + u32 flags; + int ret; + u8 *data; + + if (params->dont_wait_for_ack) + flags = IEEE80211_TX_CTL_NO_ACK; + else + flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX | + IEEE80211_TX_CTL_REQ_TX_STATUS; + + if (params->no_cck) + flags |= IEEE80211_TX_CTL_NO_CCK_RATE; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_ADHOC: + if (!sdata->vif.bss_conf.ibss_joined) + need_offchan = true; + /* fall through */ +#ifdef CONFIG_MAC80211_MESH + case NL80211_IFTYPE_MESH_POINT: + if (ieee80211_vif_is_mesh(&sdata->vif) && + !sdata->u.mesh.mesh_id_len) + need_offchan = true; + /* fall through */ +#endif + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_P2P_GO: + if (sdata->vif.type != NL80211_IFTYPE_ADHOC && + !ieee80211_vif_is_mesh(&sdata->vif) && + !rcu_access_pointer(sdata->bss->beacon)) + need_offchan = true; + if (!ieee80211_is_action(mgmt->frame_control) || + mgmt->u.action.category == WLAN_CATEGORY_PUBLIC || + mgmt->u.action.category == WLAN_CATEGORY_SELF_PROTECTED || + mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) + break; + rcu_read_lock(); + sta = sta_info_get(sdata, mgmt->da); + rcu_read_unlock(); + if (!sta) + return -ENOLINK; + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + sdata_lock(sdata); + if (!sdata->u.mgd.associated || + (params->offchan && params->wait && + local->ops->remain_on_channel && + memcmp(sdata->u.mgd.associated->bssid, + mgmt->bssid, ETH_ALEN))) + need_offchan = true; + sdata_unlock(sdata); + break; + case NL80211_IFTYPE_P2P_DEVICE: + need_offchan = true; + break; + default: + return -EOPNOTSUPP; + } + + /* configurations requiring offchan cannot work if no channel has been + * specified + */ + if (need_offchan && !params->chan) + return -EINVAL; + + mutex_lock(&local->mtx); + + /* Check if the operating channel is the requested channel */ + if (!need_offchan) { + struct ieee80211_chanctx_conf *chanctx_conf; + + rcu_read_lock(); + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + + if (chanctx_conf) { + need_offchan = params->chan && + (params->chan != + chanctx_conf->def.chan); + } else if (!params->chan) { + ret = -EINVAL; + rcu_read_unlock(); + goto out_unlock; + } else { + need_offchan = true; + } + rcu_read_unlock(); + } + + if (need_offchan && !params->offchan) { + ret = -EBUSY; + goto out_unlock; + } + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + params->len); + if (!skb) { + ret = -ENOMEM; + goto out_unlock; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + + data = skb_put(skb, params->len); + memcpy(data, params->buf, params->len); + + /* Update CSA counters */ + if (sdata->vif.csa_active && + (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_MESH_POINT || + sdata->vif.type == NL80211_IFTYPE_ADHOC) && + params->n_csa_offsets) { + int i; + struct beacon_data *beacon = NULL; + + rcu_read_lock(); + + if (sdata->vif.type == NL80211_IFTYPE_AP) + beacon = rcu_dereference(sdata->u.ap.beacon); + else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) + beacon = rcu_dereference(sdata->u.ibss.presp); + else if (ieee80211_vif_is_mesh(&sdata->vif)) + beacon = rcu_dereference(sdata->u.mesh.beacon); + + if (beacon) + for (i = 0; i < params->n_csa_offsets; i++) + data[params->csa_offsets[i]] = + beacon->csa_current_counter; + + rcu_read_unlock(); + } + + IEEE80211_SKB_CB(skb)->flags = flags; + + skb->dev = sdata->dev; + + if (!params->dont_wait_for_ack) { + /* make a copy to preserve the frame contents + * in case of encryption. + */ + ack_skb = ieee80211_make_ack_skb(local, skb, cookie, + GFP_KERNEL); + if (IS_ERR(ack_skb)) { + ret = PTR_ERR(ack_skb); + kfree_skb(skb); + goto out_unlock; + } + } else { + /* Assign a dummy non-zero cookie, it's not sent to + * userspace in this case but we rely on its value + * internally in the need_offchan case to distinguish + * mgmt-tx from remain-on-channel. + */ + *cookie = 0xffffffff; + } + + if (!need_offchan) { + ieee80211_tx_skb(sdata, skb); + ret = 0; + goto out_unlock; + } + + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN | + IEEE80211_TX_INTFL_OFFCHAN_TX_OK; + if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) + IEEE80211_SKB_CB(skb)->hw_queue = + local->hw.offchannel_tx_hw_queue; + + /* This will handle all kinds of coalescing and immediate TX */ + ret = ieee80211_start_roc_work(local, sdata, params->chan, + params->wait, cookie, skb, + IEEE80211_ROC_TYPE_MGMT_TX); + if (ret) + ieee80211_free_txskb(&local->hw, skb); + out_unlock: + mutex_unlock(&local->mtx); + return ret; +} + +int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, + struct wireless_dev *wdev, u64 cookie) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + return ieee80211_cancel_roc(local, cookie, true); +} + void ieee80211_roc_setup(struct ieee80211_local *local) { INIT_WORK(&local->hw_roc_start, ieee80211_hw_roc_start); -- cgit v1.1 From 5ee00dbd52c57f37d74306ce6e8db26171f599b3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Nov 2015 14:25:49 +0100 Subject: mac80211: simplify ack_skb handling Since the cookie is assigned inside ieee80211_make_ack_skb() now, we no longer need to return the ack_skb as the cookie and can simplify the function's return and the callers. Also rename it to ieee80211_attach_ack_skb() to more accurately reflect its purpose. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 18 ++++++++---------- net/mac80211/ieee80211_i.h | 5 ++--- net/mac80211/offchannel.c | 8 +++----- 3 files changed, 13 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b8ef33e6..2d1c4c3 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2987,9 +2987,8 @@ u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local) return local->roc_cookie_counter; } -struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, - struct sk_buff *skb, u64 *cookie, - gfp_t gfp) +int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, + u64 *cookie, gfp_t gfp) { unsigned long spin_flags; struct sk_buff *ack_skb; @@ -2997,7 +2996,7 @@ struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, ack_skb = skb_copy(skb, gfp); if (!ack_skb) - return ERR_PTR(-ENOMEM); + return -ENOMEM; spin_lock_irqsave(&local->ack_status_lock, spin_flags); id = idr_alloc(&local->ack_status_frames, ack_skb, @@ -3006,7 +3005,7 @@ struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, if (id < 0) { kfree_skb(ack_skb); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } IEEE80211_SKB_CB(skb)->ack_frame_id = id; @@ -3014,7 +3013,7 @@ struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, *cookie = ieee80211_mgmt_tx_cookie(local); IEEE80211_SKB_CB(ack_skb)->ack.cookie = *cookie; - return ack_skb; + return 0; } static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, @@ -3092,7 +3091,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_qos_hdr *nullfunc; - struct sk_buff *skb, *ack_skb; + struct sk_buff *skb; int size = sizeof(*nullfunc); __le16 fc; bool qos; @@ -3160,10 +3159,9 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, if (qos) nullfunc->qos_ctrl = cpu_to_le16(7); - ack_skb = ieee80211_make_ack_skb(local, skb, cookie, GFP_ATOMIC); - if (IS_ERR(ack_skb)) { + ret = ieee80211_attach_ack_skb(local, skb, cookie, GFP_ATOMIC); + if (ret) { kfree_skb(skb); - ret = PTR_ERR(ack_skb); goto unlock; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b03d5410..0c50031 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1484,9 +1484,8 @@ void ieee80211_configure_filter(struct ieee80211_local *local); u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local); -struct sk_buff *ieee80211_make_ack_skb(struct ieee80211_local *local, - struct sk_buff *skb, u64 *cookie, - gfp_t gfp); +int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, + u64 *cookie, gfp_t gfp); /* STA code */ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index b737437..6a8178f 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -733,7 +733,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; - struct sk_buff *skb, *ack_skb; + struct sk_buff *skb; struct sta_info *sta; const struct ieee80211_mgmt *mgmt = (void *)params->buf; bool need_offchan = false; @@ -876,10 +876,8 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, /* make a copy to preserve the frame contents * in case of encryption. */ - ack_skb = ieee80211_make_ack_skb(local, skb, cookie, - GFP_KERNEL); - if (IS_ERR(ack_skb)) { - ret = PTR_ERR(ack_skb); + ret = ieee80211_attach_ack_skb(local, skb, cookie, GFP_KERNEL); + if (ret) { kfree_skb(skb); goto out_unlock; } -- cgit v1.1 From aaa016ccd5df89d73483d0d51ee1f692978ccc35 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Nov 2015 23:53:51 +0100 Subject: mac80211: rewrite remain-on-channel logic Jouni found a bug in the remain-on-channel logic: when a short item is queued, a long item is combined with it extending the original one, and then the long item is deleted, the timeout doesn't go back to the short one, and the short item ends up taking a long time. In this case, this showed as blocking scan when running two test cases back to back - the scan from the second was delayed even though all the remain-on-channel items should long have been gone. Fixing this with the current data structures turns out to be a bit complicated, we just remove the long item from the dependents list right now and don't recalculate the timeouts. There's a somewhat similar bug where we delete the short item and all the dependents go with it; to fix this we'd have to move them from the dependents to the real list. Instead of trying to do that, rewrite the code to not have all this complexity in the data structures: use a single list and allow more than one entry in it being marked as started. This makes the code a bit more complex, the worker needs to understand that it might need to just remove one of the started items, while keeping the device off-channel, but that's not more complicated than the nested data structures. This then fixes both issues described, and makes it easier to also limit the overall off-channel time when combining. TODO: as before, with hardware remain-on-channel, deleting an item after combining results in cancelling them all - we can keep track of the time elapsed and only cancel after that to fix this. Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 7 +- net/mac80211/main.c | 1 + net/mac80211/offchannel.c | 599 +++++++++++++++++++++++---------------------- 3 files changed, 316 insertions(+), 291 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 0c50031..c30b684 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -325,19 +325,15 @@ struct mesh_preq_queue { struct ieee80211_roc_work { struct list_head list; - struct list_head dependents; - - struct delayed_work work; struct ieee80211_sub_if_data *sdata; struct ieee80211_channel *chan; bool started, abort, hw_begun, notified; - bool to_be_freed; bool on_channel; - unsigned long hw_start_time; + unsigned long start_time; u32 duration, req_duration; struct sk_buff *frame; @@ -1335,6 +1331,7 @@ struct ieee80211_local { /* * Remain-on-channel support */ + struct delayed_work roc_work; struct list_head roc_list; struct work_struct hw_roc_start, hw_roc_done; unsigned long hw_roc_start_time; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 858f6b1..6bcf0fa 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1149,6 +1149,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) rtnl_unlock(); + cancel_delayed_work_sync(&local->roc_work); cancel_work_sync(&local->restart_work); cancel_work_sync(&local->reconfig_filter); cancel_work_sync(&local->tdls_chsw_work); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 6a8178f..cfd3356 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -187,11 +187,76 @@ void ieee80211_offchannel_return(struct ieee80211_local *local) false); } -static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) +static void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) { - if (roc->notified) + /* was never transmitted */ + if (roc->frame) { + cfg80211_mgmt_tx_status(&roc->sdata->wdev, roc->mgmt_tx_cookie, + roc->frame->data, roc->frame->len, + false, GFP_KERNEL); + ieee80211_free_txskb(&roc->sdata->local->hw, roc->frame); + } + + if (!roc->mgmt_tx_cookie) + cfg80211_remain_on_channel_expired(&roc->sdata->wdev, + roc->cookie, roc->chan, + GFP_KERNEL); + + list_del(&roc->list); + kfree(roc); +} + +static unsigned long ieee80211_end_finished_rocs(struct ieee80211_local *local, + unsigned long now) +{ + struct ieee80211_roc_work *roc, *tmp; + long remaining_dur_min = LONG_MAX; + + lockdep_assert_held(&local->mtx); + + list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { + long remaining; + + if (!roc->started) + break; + + remaining = roc->start_time + + msecs_to_jiffies(roc->duration) - + now; + + if (roc->abort || remaining <= 0) + ieee80211_roc_notify_destroy(roc); + else + remaining_dur_min = min(remaining_dur_min, remaining); + } + + return remaining_dur_min; +} + +static bool ieee80211_recalc_sw_work(struct ieee80211_local *local, + unsigned long now) +{ + long dur = ieee80211_end_finished_rocs(local, now); + + if (dur == LONG_MAX) + return false; + + mod_delayed_work(local->workqueue, &local->roc_work, dur); + return true; +} + +static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc, + unsigned long start_time) +{ + struct ieee80211_local *local = roc->sdata->local; + + if (WARN_ON(roc->notified)) return; + roc->start_time = start_time; + roc->started = true; + roc->hw_begun = true; + if (roc->mgmt_tx_cookie) { if (!WARN_ON(!roc->frame)) { ieee80211_tx_skb_tid_band(roc->sdata, roc->frame, 7, @@ -205,40 +270,26 @@ static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) } roc->notified = true; + + if (!local->ops->remain_on_channel) + ieee80211_recalc_sw_work(local, start_time); } static void ieee80211_hw_roc_start(struct work_struct *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, hw_roc_start); - struct ieee80211_roc_work *roc, *dep, *tmp; + struct ieee80211_roc_work *roc; mutex_lock(&local->mtx); - if (list_empty(&local->roc_list)) - goto out_unlock; - - roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work, - list); - - if (!roc->started) - goto out_unlock; - - roc->hw_begun = true; - roc->hw_start_time = local->hw_roc_start_time; - - ieee80211_handle_roc_started(roc); - list_for_each_entry_safe(dep, tmp, &roc->dependents, list) { - ieee80211_handle_roc_started(dep); + list_for_each_entry(roc, &local->roc_list, list) { + if (!roc->started) + break; - if (dep->duration > roc->duration) { - u32 dur = dep->duration; - dep->duration = dur - roc->duration; - roc->duration = dur; - list_move(&dep->list, &roc->list); - } + ieee80211_handle_roc_started(roc, local->hw_roc_start_time); } - out_unlock: + mutex_unlock(&local->mtx); } @@ -254,34 +305,40 @@ void ieee80211_ready_on_channel(struct ieee80211_hw *hw) } EXPORT_SYMBOL_GPL(ieee80211_ready_on_channel); -void ieee80211_start_next_roc(struct ieee80211_local *local) +static void _ieee80211_start_next_roc(struct ieee80211_local *local) { - struct ieee80211_roc_work *roc; + struct ieee80211_roc_work *roc, *tmp; + enum ieee80211_roc_type type; + u32 min_dur, max_dur; lockdep_assert_held(&local->mtx); - if (list_empty(&local->roc_list)) { - ieee80211_run_deferred_scan(local); + if (WARN_ON(list_empty(&local->roc_list))) return; - } roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work, list); - if (WARN_ON_ONCE(roc->started)) + if (WARN_ON(roc->started)) return; - if (local->ops->remain_on_channel) { - int ret, duration = roc->duration; - - /* XXX: duplicated, see ieee80211_start_roc_work() */ - if (!duration) - duration = 10; + min_dur = roc->duration; + max_dur = roc->duration; + type = roc->type; - ret = drv_remain_on_channel(local, roc->sdata, roc->chan, - duration, roc->type); + list_for_each_entry(tmp, &local->roc_list, list) { + if (tmp == roc) + continue; + if (tmp->sdata != roc->sdata || tmp->chan != roc->chan) + break; + max_dur = max(tmp->duration, max_dur); + min_dur = min(tmp->duration, min_dur); + type = max(tmp->type, type); + } - roc->started = true; + if (local->ops->remain_on_channel) { + int ret = drv_remain_on_channel(local, roc->sdata, roc->chan, + max_dur, type); if (ret) { wiphy_warn(local->hw.wiphy, @@ -290,74 +347,24 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) * queue the work struct again to avoid recursion * when multiple failures occur */ - ieee80211_remain_on_channel_expired(&local->hw); + list_for_each_entry(tmp, &local->roc_list, list) { + if (tmp->sdata != roc->sdata || + tmp->chan != roc->chan) + break; + tmp->started = true; + tmp->abort = true; + } + ieee80211_queue_work(&local->hw, &local->hw_roc_done); + return; } - } else { - /* delay it a bit */ - ieee80211_queue_delayed_work(&local->hw, &roc->work, - round_jiffies_relative(HZ/2)); - } -} - -static void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, - bool free) -{ - struct ieee80211_roc_work *dep, *tmp; - - if (WARN_ON(roc->to_be_freed)) - return; - - /* was never transmitted */ - if (roc->frame) { - cfg80211_mgmt_tx_status(&roc->sdata->wdev, roc->mgmt_tx_cookie, - roc->frame->data, roc->frame->len, - false, GFP_KERNEL); - ieee80211_free_txskb(&roc->sdata->local->hw, roc->frame); - } - - if (!roc->mgmt_tx_cookie) - cfg80211_remain_on_channel_expired(&roc->sdata->wdev, - roc->cookie, roc->chan, - GFP_KERNEL); - - list_for_each_entry_safe(dep, tmp, &roc->dependents, list) - ieee80211_roc_notify_destroy(dep, true); - - if (free) - kfree(roc); - else - roc->to_be_freed = true; -} - -static void ieee80211_sw_roc_work(struct work_struct *work) -{ - struct ieee80211_roc_work *roc = - container_of(work, struct ieee80211_roc_work, work.work); - struct ieee80211_sub_if_data *sdata = roc->sdata; - struct ieee80211_local *local = sdata->local; - bool started, on_channel; - - mutex_lock(&local->mtx); - - if (roc->to_be_freed) - goto out_unlock; - - if (roc->abort) - goto finish; - - if (WARN_ON(list_empty(&local->roc_list))) - goto out_unlock; - - if (WARN_ON(roc != list_first_entry(&local->roc_list, - struct ieee80211_roc_work, - list))) - goto out_unlock; - - if (!roc->started) { - struct ieee80211_roc_work *dep; - - WARN_ON(local->use_chanctx); + /* we'll notify about the start once the HW calls back */ + list_for_each_entry(tmp, &local->roc_list, list) { + if (tmp->sdata != roc->sdata || tmp->chan != roc->chan) + break; + tmp->started = true; + } + } else { /* If actually operating on the desired channel (with at least * 20 MHz channel width) don't stop all the operations but still * treat it as though the ROC operation started properly, so @@ -377,27 +384,72 @@ static void ieee80211_sw_roc_work(struct work_struct *work) ieee80211_hw_config(local, 0); } - /* tell userspace or send frame */ - ieee80211_handle_roc_started(roc); - list_for_each_entry(dep, &roc->dependents, list) - ieee80211_handle_roc_started(dep); + ieee80211_queue_delayed_work(&local->hw, &local->roc_work, + msecs_to_jiffies(min_dur)); + + /* tell userspace or send frame(s) */ + list_for_each_entry(tmp, &local->roc_list, list) { + if (tmp->sdata != roc->sdata || tmp->chan != roc->chan) + break; + + tmp->on_channel = roc->on_channel; + ieee80211_handle_roc_started(tmp, jiffies); + } + } +} + +void ieee80211_start_next_roc(struct ieee80211_local *local) +{ + struct ieee80211_roc_work *roc; + + lockdep_assert_held(&local->mtx); + + if (list_empty(&local->roc_list)) { + ieee80211_run_deferred_scan(local); + return; + } + + roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work, + list); + + if (WARN_ON_ONCE(roc->started)) + return; + + if (local->ops->remain_on_channel) { + _ieee80211_start_next_roc(local); + } else { + /* delay it a bit */ + ieee80211_queue_delayed_work(&local->hw, &local->roc_work, + round_jiffies_relative(HZ/2)); + } +} + +static void __ieee80211_roc_work(struct ieee80211_local *local) +{ + struct ieee80211_roc_work *roc; + bool on_channel; + + lockdep_assert_held(&local->mtx); + + if (WARN_ON(local->ops->remain_on_channel)) + return; - /* if it was pure TX, just finish right away */ - if (!roc->duration) - goto finish; + roc = list_first_entry_or_null(&local->roc_list, + struct ieee80211_roc_work, list); + if (!roc) + return; - roc->started = true; - ieee80211_queue_delayed_work(&local->hw, &roc->work, - msecs_to_jiffies(roc->duration)); + if (!roc->started) { + WARN_ON(local->use_chanctx); + _ieee80211_start_next_roc(local); } else { - /* finish this ROC */ - finish: - list_del(&roc->list); - started = roc->started; on_channel = roc->on_channel; - ieee80211_roc_notify_destroy(roc, !roc->abort); + if (ieee80211_recalc_sw_work(local, jiffies)) + return; - if (started && !on_channel) { + /* careful - roc pointer became invalid during recalc */ + + if (!on_channel) { ieee80211_flush_queues(local, NULL, false); local->tmp_channel = NULL; @@ -407,14 +459,17 @@ static void ieee80211_sw_roc_work(struct work_struct *work) } ieee80211_recalc_idle(local); - - if (started) - ieee80211_start_next_roc(local); - else if (list_empty(&local->roc_list)) - ieee80211_run_deferred_scan(local); + ieee80211_start_next_roc(local); } +} - out_unlock: +static void ieee80211_roc_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, roc_work.work); + + mutex_lock(&local->mtx); + __ieee80211_roc_work(local); mutex_unlock(&local->mtx); } @@ -422,27 +477,14 @@ static void ieee80211_hw_roc_done(struct work_struct *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, hw_roc_done); - struct ieee80211_roc_work *roc; mutex_lock(&local->mtx); - if (list_empty(&local->roc_list)) - goto out_unlock; - - roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work, - list); - - if (!roc->started) - goto out_unlock; - - list_del(&roc->list); - - ieee80211_roc_notify_destroy(roc, true); + ieee80211_end_finished_rocs(local, jiffies); /* if there's another roc, start it now */ ieee80211_start_next_roc(local); - out_unlock: mutex_unlock(&local->mtx); } @@ -456,26 +498,41 @@ void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw) } EXPORT_SYMBOL_GPL(ieee80211_remain_on_channel_expired); -static bool ieee80211_coalesce_started_roc(struct ieee80211_local *local, - struct ieee80211_roc_work *new_roc, - struct ieee80211_roc_work *cur_roc) +static bool +ieee80211_coalesce_hw_started_roc(struct ieee80211_local *local, + struct ieee80211_roc_work *new_roc, + struct ieee80211_roc_work *cur_roc) { unsigned long now = jiffies; - unsigned long remaining = cur_roc->hw_start_time + - msecs_to_jiffies(cur_roc->duration) - - now; + unsigned long remaining; + + if (WARN_ON(!cur_roc->started)) + return false; - if (WARN_ON(!cur_roc->started || !cur_roc->hw_begun)) + /* if it was scheduled in the hardware, but not started yet, + * we can only combine if the older one had a longer duration + */ + if (!cur_roc->hw_begun && new_roc->duration > cur_roc->duration) return false; + remaining = cur_roc->start_time + + msecs_to_jiffies(cur_roc->duration) - + now; + /* if it doesn't fit entirely, schedule a new one */ if (new_roc->duration > jiffies_to_msecs(remaining)) return false; - ieee80211_handle_roc_started(new_roc); + /* add just after the current one so we combine their finish later */ + list_add(&new_roc->list, &cur_roc->list); + + /* if the existing one has already begun then let this one also + * begin, otherwise they'll both be marked properly by the work + * struct that runs once the driver notifies us of the beginning + */ + if (cur_roc->hw_begun) + ieee80211_handle_roc_started(new_roc, now); - /* add to dependents so we send the expired event properly */ - list_add_tail(&new_roc->list, &cur_roc->dependents); return true; } @@ -487,7 +544,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, enum ieee80211_roc_type type) { struct ieee80211_roc_work *roc, *tmp; - bool queued = false; + bool queued = false, combine_started = true; int ret; lockdep_assert_held(&local->mtx); @@ -517,8 +574,6 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, roc->frame = txskb; roc->type = type; roc->sdata = sdata; - INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); - INIT_LIST_HEAD(&roc->dependents); /* * cookie is either the roc cookie (for normal roc) @@ -531,95 +586,88 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, roc->mgmt_tx_cookie = *cookie; } - /* if there's one pending or we're scanning, queue this one */ - if (!list_empty(&local->roc_list) || - local->scanning || ieee80211_is_radar_required(local)) - goto out_check_combine; - - /* if not HW assist, just queue & schedule work */ - if (!local->ops->remain_on_channel) { - ieee80211_queue_delayed_work(&local->hw, &roc->work, 0); - goto out_queue; - } - - /* otherwise actually kick it off here (for error handling) */ + /* if there's no need to queue, handle it immediately */ + if (list_empty(&local->roc_list) && + !local->scanning && !ieee80211_is_radar_required(local)) { + /* if not HW assist, just queue & schedule work */ + if (!local->ops->remain_on_channel) { + list_add_tail(&roc->list, &local->roc_list); + ieee80211_queue_delayed_work(&local->hw, + &local->roc_work, 0); + } else { + /* otherwise actually kick it off here + * (for error handling) + */ + ret = drv_remain_on_channel(local, sdata, channel, + duration, type); + if (ret) { + kfree(roc); + return ret; + } + roc->started = true; + list_add_tail(&roc->list, &local->roc_list); + } - ret = drv_remain_on_channel(local, sdata, channel, duration, type); - if (ret) { - kfree(roc); - return ret; + return 0; } - roc->started = true; - goto out_queue; + /* otherwise handle queueing */ - out_check_combine: list_for_each_entry(tmp, &local->roc_list, list) { if (tmp->chan != channel || tmp->sdata != sdata) continue; /* - * Extend this ROC if possible: - * - * If it hasn't started yet, just increase the duration - * and add the new one to the list of dependents. - * If the type of the new ROC has higher priority, modify the - * type of the previous one to match that of the new one. + * Extend this ROC if possible: If it hasn't started, add + * just after the new one to combine. */ if (!tmp->started) { - list_add_tail(&roc->list, &tmp->dependents); - tmp->duration = max(tmp->duration, roc->duration); - tmp->type = max(tmp->type, roc->type); + list_add(&roc->list, &tmp->list); queued = true; break; } - /* If it has already started, it's more difficult ... */ - if (local->ops->remain_on_channel) { - /* - * In the offloaded ROC case, if it hasn't begun, add - * this new one to the dependent list to be handled - * when the master one begins. If it has begun, - * check if it fits entirely within the existing one, - * in which case it will just be dependent as well. - * Otherwise, schedule it by itself. - */ - if (!tmp->hw_begun) { - list_add_tail(&roc->list, &tmp->dependents); - queued = true; - break; - } - - if (ieee80211_coalesce_started_roc(local, roc, tmp)) - queued = true; - } else if (del_timer_sync(&tmp->work.timer)) { - unsigned long new_end; + if (!combine_started) + continue; - /* - * In the software ROC case, cancel the timer, if - * that fails then the finish work is already - * queued/pending and thus we queue the new ROC - * normally, if that succeeds then we can extend - * the timer duration and TX the frame (if any.) + if (!local->ops->remain_on_channel) { + /* If there's no hardware remain-on-channel, and + * doing so won't push us over the maximum r-o-c + * we allow, then we can just add the new one to + * the list and mark it as having started now. + * If it would push over the limit, don't try to + * combine with other started ones (that haven't + * been running as long) but potentially sort it + * with others that had the same fate. */ + unsigned long now = jiffies; + u32 elapsed = jiffies_to_msecs(now - tmp->start_time); + struct wiphy *wiphy = local->hw.wiphy; + u32 max_roc = wiphy->max_remain_on_channel_duration; - list_add_tail(&roc->list, &tmp->dependents); - queued = true; - - new_end = jiffies + msecs_to_jiffies(roc->duration); - - /* ok, it was started & we canceled timer */ - if (time_after(new_end, tmp->work.timer.expires)) - mod_timer(&tmp->work.timer, new_end); - else - add_timer(&tmp->work.timer); + if (elapsed + roc->duration > max_roc) { + combine_started = false; + continue; + } - ieee80211_handle_roc_started(roc); + list_add(&roc->list, &tmp->list); + queued = true; + roc->on_channel = tmp->on_channel; + ieee80211_handle_roc_started(roc, now); + break; } - break; + + queued = ieee80211_coalesce_hw_started_roc(local, roc, tmp); + if (queued) + break; + /* if it wasn't queued, perhaps it can be combined with + * another that also couldn't get combined previously, + * but no need to check for already started ones, since + * that can't work. + */ + combine_started = false; } - out_queue: if (!queued) list_add_tail(&roc->list, &local->roc_list); @@ -651,21 +699,6 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, mutex_lock(&local->mtx); list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { - struct ieee80211_roc_work *dep, *tmp2; - - list_for_each_entry_safe(dep, tmp2, &roc->dependents, list) { - if (!mgmt_tx && dep->cookie != cookie) - continue; - else if (mgmt_tx && dep->mgmt_tx_cookie != cookie) - continue; - /* found dependent item -- just remove it */ - list_del(&dep->list); - mutex_unlock(&local->mtx); - - ieee80211_roc_notify_destroy(dep, true); - return 0; - } - if (!mgmt_tx && roc->cookie != cookie) continue; else if (mgmt_tx && roc->mgmt_tx_cookie != cookie) @@ -680,42 +713,44 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, return -ENOENT; } - /* - * We found the item to cancel, so do that. Note that it - * may have dependents, which we also cancel (and send - * the expired signal for.) Not doing so would be quite - * tricky here, but we may need to fix it later. - */ + if (!found->started) { + ieee80211_roc_notify_destroy(found); + goto out_unlock; + } if (local->ops->remain_on_channel) { - if (found->started) { - ret = drv_cancel_remain_on_channel(local); - if (WARN_ON_ONCE(ret)) { - mutex_unlock(&local->mtx); - return ret; - } + ret = drv_cancel_remain_on_channel(local); + if (WARN_ON_ONCE(ret)) { + mutex_unlock(&local->mtx); + return ret; } - list_del(&found->list); + /* TODO: + * if multiple items were combined here then we really shouldn't + * cancel them all - we should wait for as much time as needed + * for the longest remaining one, and only then cancel ... + */ + list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { + if (!roc->started) + break; + if (roc == found) + found = NULL; + ieee80211_roc_notify_destroy(roc); + } - if (found->started) - ieee80211_start_next_roc(local); - mutex_unlock(&local->mtx); + /* that really must not happen - it was started */ + WARN_ON(found); - ieee80211_roc_notify_destroy(found, true); + ieee80211_start_next_roc(local); } else { - /* work may be pending so use it all the time */ + /* go through work struct to return to the operating channel */ found->abort = true; - ieee80211_queue_delayed_work(&local->hw, &found->work, 0); - - mutex_unlock(&local->mtx); - - /* work will clean up etc */ - flush_delayed_work(&found->work); - WARN_ON(!found->to_be_freed); - kfree(found); + mod_delayed_work(local->workqueue, &local->roc_work, 0); } + out_unlock: + mutex_unlock(&local->mtx); + return 0; } @@ -925,6 +960,7 @@ void ieee80211_roc_setup(struct ieee80211_local *local) { INIT_WORK(&local->hw_roc_start, ieee80211_hw_roc_start); INIT_WORK(&local->hw_roc_done, ieee80211_hw_roc_done); + INIT_DELAYED_WORK(&local->roc_work, ieee80211_roc_work); INIT_LIST_HEAD(&local->roc_list); } @@ -932,36 +968,27 @@ void ieee80211_roc_purge(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { struct ieee80211_roc_work *roc, *tmp; - LIST_HEAD(tmp_list); + bool work_to_do = false; mutex_lock(&local->mtx); list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { if (sdata && roc->sdata != sdata) continue; - if (roc->started && local->ops->remain_on_channel) { - /* can race, so ignore return value */ - drv_cancel_remain_on_channel(local); - } - - list_move_tail(&roc->list, &tmp_list); - roc->abort = true; - } - mutex_unlock(&local->mtx); - - list_for_each_entry_safe(roc, tmp, &tmp_list, list) { - if (local->ops->remain_on_channel) { - list_del(&roc->list); - ieee80211_roc_notify_destroy(roc, true); + if (roc->started) { + if (local->ops->remain_on_channel) { + /* can race, so ignore return value */ + drv_cancel_remain_on_channel(local); + ieee80211_roc_notify_destroy(roc); + } else { + roc->abort = true; + work_to_do = true; + } } else { - ieee80211_queue_delayed_work(&local->hw, &roc->work, 0); - - /* work will clean up etc */ - flush_delayed_work(&roc->work); - WARN_ON(!roc->to_be_freed); - kfree(roc); + ieee80211_roc_notify_destroy(roc); } } - - WARN_ON_ONCE(!list_empty(&tmp_list)); + if (work_to_do) + __ieee80211_roc_work(local); + mutex_unlock(&local->mtx); } -- cgit v1.1 From 1aeb135f84fe40cf6ba1e3610ad2ca4cb9628089 Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Mon, 23 Nov 2015 19:27:16 +0100 Subject: cfg80211: reg: Refactor calculation of bandwidth flags The same piece of code appears at two places. Make a function from it. Signed-off-by: Michal Sojka Signed-off-by: Johannes Berg --- net/wireless/reg.c | 91 ++++++++++++++++++++++-------------------------------- 1 file changed, 37 insertions(+), 54 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 43b3e57..0a4f548 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1166,6 +1166,41 @@ static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd, #endif } +static uint32_t reg_rule_to_chan_bw_flags(const struct ieee80211_regdomain *regd, + const struct ieee80211_reg_rule *reg_rule, + const struct ieee80211_channel *chan) +{ + const struct ieee80211_freq_range *freq_range = NULL; + u32 max_bandwidth_khz, bw_flags = 0; + + freq_range = ®_rule->freq_range; + + max_bandwidth_khz = freq_range->max_bandwidth_khz; + /* Check if auto calculation requested */ + if (reg_rule->flags & NL80211_RRF_AUTO_BW) + max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule); + + /* If we get a reg_rule we can assume that at least 5Mhz fit */ + if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq), + MHZ_TO_KHZ(10))) + bw_flags |= IEEE80211_CHAN_NO_10MHZ; + if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq), + MHZ_TO_KHZ(20))) + bw_flags |= IEEE80211_CHAN_NO_20MHZ; + + if (max_bandwidth_khz < MHZ_TO_KHZ(10)) + bw_flags |= IEEE80211_CHAN_NO_10MHZ; + if (max_bandwidth_khz < MHZ_TO_KHZ(20)) + bw_flags |= IEEE80211_CHAN_NO_20MHZ; + if (max_bandwidth_khz < MHZ_TO_KHZ(40)) + bw_flags |= IEEE80211_CHAN_NO_HT40; + if (max_bandwidth_khz < MHZ_TO_KHZ(80)) + bw_flags |= IEEE80211_CHAN_NO_80MHZ; + if (max_bandwidth_khz < MHZ_TO_KHZ(160)) + bw_flags |= IEEE80211_CHAN_NO_160MHZ; + return bw_flags; +} + /* * Note that right now we assume the desired channel bandwidth * is always 20 MHz for each individual channel (HT40 uses 20 MHz @@ -1178,11 +1213,9 @@ static void handle_channel(struct wiphy *wiphy, u32 flags, bw_flags = 0; const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; - const struct ieee80211_freq_range *freq_range = NULL; struct wiphy *request_wiphy = NULL; struct regulatory_request *lr = get_last_request(); const struct ieee80211_regdomain *regd; - u32 max_bandwidth_khz; request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); @@ -1223,31 +1256,7 @@ static void handle_channel(struct wiphy *wiphy, chan_reg_rule_print_dbg(regd, chan, reg_rule); power_rule = ®_rule->power_rule; - freq_range = ®_rule->freq_range; - - max_bandwidth_khz = freq_range->max_bandwidth_khz; - /* Check if auto calculation requested */ - if (reg_rule->flags & NL80211_RRF_AUTO_BW) - max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule); - - /* If we get a reg_rule we can assume that at least 5Mhz fit */ - if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq), - MHZ_TO_KHZ(10))) - bw_flags |= IEEE80211_CHAN_NO_10MHZ; - if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq), - MHZ_TO_KHZ(20))) - bw_flags |= IEEE80211_CHAN_NO_20MHZ; - - if (max_bandwidth_khz < MHZ_TO_KHZ(10)) - bw_flags |= IEEE80211_CHAN_NO_10MHZ; - if (max_bandwidth_khz < MHZ_TO_KHZ(20)) - bw_flags |= IEEE80211_CHAN_NO_20MHZ; - if (max_bandwidth_khz < MHZ_TO_KHZ(40)) - bw_flags |= IEEE80211_CHAN_NO_HT40; - if (max_bandwidth_khz < MHZ_TO_KHZ(80)) - bw_flags |= IEEE80211_CHAN_NO_80MHZ; - if (max_bandwidth_khz < MHZ_TO_KHZ(160)) - bw_flags |= IEEE80211_CHAN_NO_160MHZ; + bw_flags = reg_rule_to_chan_bw_flags(regd, reg_rule, chan); if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && request_wiphy && request_wiphy == wiphy && @@ -1760,8 +1769,6 @@ static void handle_channel_custom(struct wiphy *wiphy, u32 bw_flags = 0; const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; - const struct ieee80211_freq_range *freq_range = NULL; - u32 max_bandwidth_khz; u32 bw; for (bw = MHZ_TO_KHZ(20); bw >= MHZ_TO_KHZ(5); bw = bw / 2) { @@ -1786,31 +1793,7 @@ static void handle_channel_custom(struct wiphy *wiphy, chan_reg_rule_print_dbg(regd, chan, reg_rule); power_rule = ®_rule->power_rule; - freq_range = ®_rule->freq_range; - - max_bandwidth_khz = freq_range->max_bandwidth_khz; - /* Check if auto calculation requested */ - if (reg_rule->flags & NL80211_RRF_AUTO_BW) - max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule); - - /* If we get a reg_rule we can assume that at least 5Mhz fit */ - if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq), - MHZ_TO_KHZ(10))) - bw_flags |= IEEE80211_CHAN_NO_10MHZ; - if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq), - MHZ_TO_KHZ(20))) - bw_flags |= IEEE80211_CHAN_NO_20MHZ; - - if (max_bandwidth_khz < MHZ_TO_KHZ(10)) - bw_flags |= IEEE80211_CHAN_NO_10MHZ; - if (max_bandwidth_khz < MHZ_TO_KHZ(20)) - bw_flags |= IEEE80211_CHAN_NO_20MHZ; - if (max_bandwidth_khz < MHZ_TO_KHZ(40)) - bw_flags |= IEEE80211_CHAN_NO_HT40; - if (max_bandwidth_khz < MHZ_TO_KHZ(80)) - bw_flags |= IEEE80211_CHAN_NO_80MHZ; - if (max_bandwidth_khz < MHZ_TO_KHZ(160)) - bw_flags |= IEEE80211_CHAN_NO_160MHZ; + bw_flags = reg_rule_to_chan_bw_flags(regd, reg_rule, chan); chan->dfs_state_entered = jiffies; chan->dfs_state = NL80211_DFS_USABLE; -- cgit v1.1 From c39b336deb2ec92751488531a05fd14bbe7fa89e Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 26 Nov 2015 20:49:38 +0200 Subject: mac80211: Allow a STA to join an IBSS with 80+80 MHz channel While it was possible to create an IBSS with 80+80 MHz channel, joining such an IBSS resulted in falling back to 20 MHz channel with VHT disabled due to a missing switch case for 80+80. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 337bb5d..f7fc0e0 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -428,6 +428,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, chandef.width = sdata->u.ibss.chandef.width; break; case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_160: chandef = sdata->u.ibss.chandef; chandef.chan = cbss->channel; -- cgit v1.1 From 7d37fcd409199f76da522e6f6670a354ac468002 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 1 Dec 2015 23:15:26 +0100 Subject: mac80211: reject zero cookie in mgmt-tx/roc cancel When cancelling, you can cancel "any" (first in list) mgmt-tx or remain-on-channel operation by using the value 0 for the cookie along with the *opposite* operation, i.e. * cancel the first mgmt-tx by cancelling roc with 0 cookie * cancel the first roc by cancelling mgmt-tx with 0 cookie This isn't really that bad since userspace should only pass cookies that we gave it, but could lead to hard-to-debug issues so better prevent it and reject zero values since we never hand those out. Signed-off-by: Johannes Berg --- net/mac80211/offchannel.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index cfd3356..6719b27 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -697,6 +697,9 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, struct ieee80211_roc_work *roc, *tmp, *found = NULL; int ret; + if (!cookie) + return -ENOENT; + mutex_lock(&local->mtx); list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { if (!mgmt_tx && roc->cookie != cookie) -- cgit v1.1 From 3ef0952ca85e28226b09a6d833c30e3e604a63c8 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 3 Dec 2015 21:12:32 +0100 Subject: ipv6: Only act upon NETDEV_*_TYPE_CHANGE if we have ipv6 addresses An interface changing type may not have IPv6 addresses. Don't call the address configuration type change in this case. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6936d0d..5e9111d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3287,7 +3287,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, case NETDEV_PRE_TYPE_CHANGE: case NETDEV_POST_TYPE_CHANGE: - addrconf_type_change(dev, event); + if (idev) + addrconf_type_change(dev, event); break; } -- cgit v1.1 From a1a66b1100373ead1fa2383bc3dee42c508bb504 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 3 Dec 2015 21:12:33 +0100 Subject: batman-adv: Act on NETDEV_*_TYPE_CHANGE events A network interface can change type. It may change from a type which batman does not support, e.g. hdlc, to one it does, e.g. hdlc-eth. When an interface changes type, it sends two notifications. Handle these notifications. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/batman-adv/hard-interface.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index aa8867e..a58184f 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -709,7 +709,8 @@ static int batadv_hard_if_event(struct notifier_block *this, } hard_iface = batadv_hardif_get_by_netdev(net_dev); - if (!hard_iface && event == NETDEV_REGISTER) + if (!hard_iface && (event == NETDEV_REGISTER || + event == NETDEV_POST_TYPE_CHANGE)) hard_iface = batadv_hardif_add_interface(net_dev); if (!hard_iface) @@ -724,6 +725,7 @@ static int batadv_hard_if_event(struct notifier_block *this, batadv_hardif_deactivate_interface(hard_iface); break; case NETDEV_UNREGISTER: + case NETDEV_PRE_TYPE_CHANGE: list_del_rcu(&hard_iface->list); batadv_hardif_remove_interface(hard_iface); -- cgit v1.1 From b618aaa91b5870e7bd139987ac4b7bf0851142d0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Dec 2015 15:01:31 +0100 Subject: net: constify netif_is_* helpers net_device param As suggested by Eric, these helpers should have const dev param. Suggested-by: Eric Dumazet Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d1706e8..e5c3954 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5734,7 +5734,7 @@ EXPORT_SYMBOL(netdev_lower_dev_get_private); int dev_get_nest_level(struct net_device *dev, - bool (*type_check)(struct net_device *dev)) + bool (*type_check)(const struct net_device *dev)) { struct net_device *lower = NULL; struct list_head *iter; -- cgit v1.1 From 474fe9f7f551b151222db07a968b15bf05ffe4c4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jul 2015 21:06:44 -0400 Subject: 9p/trans_virtio: don't bother with p9_tag_lookup() Just store the pointer to req instead of that to req->tc as opaque data. Signed-off-by: Al Viro --- net/9p/trans_virtio.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 6e70ddb..9fc6a56 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -143,7 +143,6 @@ static void p9_virtio_close(struct p9_client *client) static void req_done(struct virtqueue *vq) { struct virtio_chan *chan = vq->vdev->priv; - struct p9_fcall *rc; unsigned int len; struct p9_req_t *req; unsigned long flags; @@ -152,8 +151,8 @@ static void req_done(struct virtqueue *vq) while (1) { spin_lock_irqsave(&chan->lock, flags); - rc = virtqueue_get_buf(chan->vq, &len); - if (rc == NULL) { + req = virtqueue_get_buf(chan->vq, &len); + if (req == NULL) { spin_unlock_irqrestore(&chan->lock, flags); break; } @@ -161,9 +160,6 @@ static void req_done(struct virtqueue *vq) spin_unlock_irqrestore(&chan->lock, flags); /* Wakeup if anyone waiting for VirtIO ring space. */ wake_up(chan->vc_wq); - p9_debug(P9_DEBUG_TRANS, ": rc %p\n", rc); - p9_debug(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); - req = p9_tag_lookup(chan->client, rc->tag); p9_client_cb(chan->client, req, REQ_STATUS_RCVD); } } @@ -284,7 +280,7 @@ req_retry: if (in) sgs[out_sgs + in_sgs++] = chan->sg + out; - err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc, + err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); if (err < 0) { if (err == -ENOSPC) { @@ -469,7 +465,7 @@ req_retry_pinned: } BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs)); - err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc, + err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); if (err < 0) { if (err == -ENOSPC) { -- cgit v1.1 From ea3793ee29d3621faf857fa8ef5425e9ff9a756d Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Sun, 6 Dec 2015 21:11:34 +0000 Subject: core: enable more fine-grained datagram reception control The __skb_recv_datagram routine in core/ datagram.c provides a general skb reception factility supposed to be utilized by protocol modules providing datagram sockets. It encompasses both the actual recvmsg code and a surrounding 'sleep until data is available' loop. This is inconvenient if a protocol module has to use additional locking in order to maintain some per-socket state the generic datagram socket code is unaware of (as the af_unix code does). The patch below moves the recvmsg proper code into a new __skb_try_recv_datagram routine which doesn't sleep and renames wait_for_more_packets to __skb_wait_for_more_packets, both routines being exported interfaces. The original __skb_recv_datagram routine is reimplemented on top of these two functions such that its user-visible behaviour remains unchanged. Signed-off-by: Rainer Weikusat Signed-off-by: David S. Miller --- net/core/datagram.c | 77 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index d62af69..7daff66 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -83,8 +83,8 @@ static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int syn /* * Wait for the last received packet to be different from skb */ -static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, - const struct sk_buff *skb) +int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, + const struct sk_buff *skb) { int error; DEFINE_WAIT_FUNC(wait, receiver_wake_function); @@ -130,6 +130,7 @@ out_noerr: error = 1; goto out; } +EXPORT_SYMBOL(__skb_wait_for_more_packets); static struct sk_buff *skb_set_peeked(struct sk_buff *skb) { @@ -161,13 +162,15 @@ done: } /** - * __skb_recv_datagram - Receive a datagram skbuff + * __skb_try_recv_datagram - Receive a datagram skbuff * @sk: socket * @flags: MSG_ flags * @peeked: returns non-zero if this packet has been seen before * @off: an offset in bytes to peek skb from. Returns an offset * within an skb where data actually starts * @err: error code returned + * @last: set to last peeked message to inform the wait function + * what to look for when peeking * * Get a datagram skbuff, understands the peeking, nonblocking wakeups * and possible races. This replaces identical code in packet, raw and @@ -175,9 +178,11 @@ done: * the long standing peek and read race for datagram sockets. If you * alter this routine remember it must be re-entrant. * - * This function will lock the socket if a skb is returned, so the caller - * needs to unlock the socket in that case (usually by calling - * skb_free_datagram) + * This function will lock the socket if a skb is returned, so + * the caller needs to unlock the socket in that case (usually by + * calling skb_free_datagram). Returns NULL with *err set to + * -EAGAIN if no data was available or to some other value if an + * error was detected. * * * It does not lock socket since today. This function is * * free of race conditions. This measure should/can improve @@ -191,13 +196,13 @@ done: * quite explicitly by POSIX 1003.1g, don't change them without having * the standard around please. */ -struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, - int *peeked, int *off, int *err) +struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, + int *peeked, int *off, int *err, + struct sk_buff **last) { struct sk_buff_head *queue = &sk->sk_receive_queue; - struct sk_buff *skb, *last; + struct sk_buff *skb; unsigned long cpu_flags; - long timeo; /* * Caller is allowed not to check sk->sk_err before skb_recv_datagram() */ @@ -206,8 +211,6 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, if (error) goto no_packet; - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - do { /* Again only user level code calls this function, so nothing * interrupt level will suddenly eat the receive_queue. @@ -217,10 +220,10 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, */ int _off = *off; - last = (struct sk_buff *)queue; + *last = (struct sk_buff *)queue; spin_lock_irqsave(&queue->lock, cpu_flags); skb_queue_walk(queue, skb) { - last = skb; + *last = skb; *peeked = skb->peeked; if (flags & MSG_PEEK) { if (_off >= skb->len && (skb->len || _off || @@ -231,8 +234,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, skb = skb_set_peeked(skb); error = PTR_ERR(skb); - if (IS_ERR(skb)) - goto unlock_err; + if (IS_ERR(skb)) { + spin_unlock_irqrestore(&queue->lock, + cpu_flags); + goto no_packet; + } atomic_inc(&skb->users); } else @@ -242,25 +248,38 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, *off = _off; return skb; } + spin_unlock_irqrestore(&queue->lock, cpu_flags); + } while (sk_can_busy_loop(sk) && + sk_busy_loop(sk, flags & MSG_DONTWAIT)); - if (sk_can_busy_loop(sk) && - sk_busy_loop(sk, flags & MSG_DONTWAIT)) - continue; + error = -EAGAIN; - /* User doesn't want to wait */ - error = -EAGAIN; - if (!timeo) - goto no_packet; +no_packet: + *err = error; + return NULL; +} +EXPORT_SYMBOL(__skb_try_recv_datagram); - } while (!wait_for_more_packets(sk, err, &timeo, last)); +struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, + int *peeked, int *off, int *err) +{ + struct sk_buff *skb, *last; + long timeo; - return NULL; + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + do { + skb = __skb_try_recv_datagram(sk, flags, peeked, off, err, + &last); + if (skb) + return skb; + + if (*err != EAGAIN) + break; + } while (timeo && + !__skb_wait_for_more_packets(sk, err, &timeo, last)); -unlock_err: - spin_unlock_irqrestore(&queue->lock, cpu_flags); -no_packet: - *err = error; return NULL; } EXPORT_SYMBOL(__skb_recv_datagram); -- cgit v1.1 From 64874280889e7c0b2c9266705363627d4c92cf01 Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Sun, 6 Dec 2015 21:11:38 +0000 Subject: af_unix: fix unix_dgram_recvmsg entry locking The current unix_dgram_recvsmg code acquires the u->readlock mutex in order to protect access to the peek offset prior to calling __skb_recv_datagram for actually receiving data. This implies that a blocking reader will go to sleep with this mutex held if there's presently no data to return to userspace. Two non-desirable side effects of this are that a later non-blocking read call on the same socket will block on the ->readlock mutex until the earlier blocking call releases it (or the readers is interrupted) and that later blocking read calls will wait longer than the effective socket read timeout says they should: The timeout will only start 'ticking' once such a reader hits the schedule_timeout in wait_for_more_packets (core.c) while the time it already had to wait until it could acquire the mutex is unaccounted for. The patch avoids both by using the __skb_try_recv_datagram and __skb_wait_for_more packets functions created by the first patch to implement a unix_dgram_recvmsg read loop which releases the readlock mutex prior to going to sleep and reacquires it as needed afterwards. Non-blocking readers will thus immediately return with -EAGAIN if there's no data available regardless of any concurrent blocking readers and all blocking readers will end up sleeping via schedule_timeout, thus honouring the configured socket receive timeout. Signed-off-by: Rainer Weikusat Signed-off-by: David S. Miller --- net/unix/af_unix.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 502e572..1c3c1f3 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2078,8 +2078,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, struct scm_cookie scm; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); - int noblock = flags & MSG_DONTWAIT; - struct sk_buff *skb; + struct sk_buff *skb, *last; + long timeo; int err; int peeked, skip; @@ -2087,26 +2087,32 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, if (flags&MSG_OOB) goto out; - err = mutex_lock_interruptible(&u->readlock); - if (unlikely(err)) { - /* recvmsg() in non blocking mode is supposed to return -EAGAIN - * sk_rcvtimeo is not honored by mutex_lock_interruptible() - */ - err = noblock ? -EAGAIN : -ERESTARTSYS; - goto out; - } + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - skip = sk_peek_offset(sk, flags); + do { + mutex_lock(&u->readlock); - skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err); - if (!skb) { + skip = sk_peek_offset(sk, flags); + skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err, + &last); + if (skb) + break; + + mutex_unlock(&u->readlock); + + if (err != -EAGAIN) + break; + } while (timeo && + !__skb_wait_for_more_packets(sk, &err, &timeo, last)); + + if (!skb) { /* implies readlock unlocked */ unix_state_lock(sk); /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && (sk->sk_shutdown & RCV_SHUTDOWN)) err = 0; unix_state_unlock(sk); - goto out_unlock; + goto out; } if (wq_has_sleeper(&u->peer_wait)) @@ -2164,7 +2170,6 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, out_free: skb_free_datagram(sk, skb); -out_unlock: mutex_unlock(&u->readlock); out: return err; -- cgit v1.1 From 1b894521e60c1b91db1e8ba1278660e5c89f1b5f Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Sun, 6 Dec 2015 21:19:15 +0200 Subject: mac80211: handle HW ROC expired properly In case of HW ROC, when the driver reports that the ROC expired, it is not sufficient to purge the ROCs based on the remaining time, as it possible that the device finished the ROC session before the actual requested duration. To handle such cases, in case of ROC expired notification from the driver, complete all the ROCs which are marked with hw_begun, regardless of the remaining duration. Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- net/mac80211/offchannel.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 6719b27..8b2f4ea 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -224,7 +224,11 @@ static unsigned long ieee80211_end_finished_rocs(struct ieee80211_local *local, msecs_to_jiffies(roc->duration) - now; - if (roc->abort || remaining <= 0) + /* In case of HW ROC, it is possible that the HW finished the + * ROC session before the actual requested time. In such a case + * end the ROC session (disregarding the remaining time). + */ + if (roc->abort || roc->hw_begun || remaining <= 0) ieee80211_roc_notify_destroy(roc); else remaining_dur_min = min(remaining_dur_min, remaining); -- cgit v1.1 From 4baee937b8d551c89f61542a575378e407b63415 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 7 Dec 2015 13:57:32 +0100 Subject: net: dsa: remove DSA link polling Since no more DSA driver uses the polling callback, and since the phylib handles the link detection, remove the link polling work and timer code. Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 43 ------------------------------------------- 1 file changed, 43 deletions(-) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index b7448c8..0f41f71 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -508,33 +508,6 @@ static int dsa_switch_resume(struct dsa_switch *ds) } #endif - -/* link polling *************************************************************/ -static void dsa_link_poll_work(struct work_struct *ugly) -{ - struct dsa_switch_tree *dst; - int i; - - dst = container_of(ugly, struct dsa_switch_tree, link_poll_work); - - for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; - - if (ds != NULL && ds->drv->poll_link != NULL) - ds->drv->poll_link(ds); - } - - mod_timer(&dst->link_poll_timer, round_jiffies(jiffies + HZ)); -} - -static void dsa_link_poll_timer(unsigned long _dst) -{ - struct dsa_switch_tree *dst = (void *)_dst; - - schedule_work(&dst->link_poll_work); -} - - /* platform driver init and cleanup *****************************************/ static int dev_is_class(struct device *dev, void *class) { @@ -877,8 +850,6 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, } dst->ds[i] = ds; - if (ds->drv->poll_link != NULL) - dst->link_poll_needed = 1; ++configured; } @@ -897,15 +868,6 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, wmb(); dev->dsa_ptr = (void *)dst; - if (dst->link_poll_needed) { - INIT_WORK(&dst->link_poll_work, dsa_link_poll_work); - init_timer(&dst->link_poll_timer); - dst->link_poll_timer.data = (unsigned long)dst; - dst->link_poll_timer.function = dsa_link_poll_timer; - dst->link_poll_timer.expires = round_jiffies(jiffies + HZ); - add_timer(&dst->link_poll_timer); - } - return 0; } @@ -972,11 +934,6 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) { int i; - if (dst->link_poll_needed) - del_timer_sync(&dst->link_poll_timer); - - flush_work(&dst->link_poll_work); - for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; -- cgit v1.1 From b0dc635d923cd5aafa4e99973f529bf68c582738 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 7 Dec 2015 13:57:33 +0100 Subject: net: dsa: cleanup resources upon module removal Make sure that we unassign the master_netdev dsa_ptr to make the packet processing go through the regular Ethernet receive path. Suggested-by: Florian Fainelli Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 0f41f71..d9e0172 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -985,6 +985,14 @@ static int dsa_suspend(struct device *d) struct dsa_switch_tree *dst = platform_get_drvdata(pdev); int i, ret = 0; + dst->master_netdev->dsa_ptr = NULL; + + /* If we used a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point get sent + * without the tag and go through the regular receive path. + */ + wmb(); + for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; -- cgit v1.1 From 679fb46c57859b59a70257477bfbdfc7edfac4f5 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 7 Dec 2015 13:57:34 +0100 Subject: net: dsa: Add missing master netdev dev_put() calls Upon probe failure or unbinding, add missing dev_put() calls. Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index d9e0172..d22d303e 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -919,8 +919,10 @@ static int dsa_probe(struct platform_device *pdev) platform_set_drvdata(pdev, dst); ret = dsa_setup_dst(dst, dev, &pdev->dev, pd); - if (ret) + if (ret) { + dev_put(dev); goto out; + } return 0; @@ -940,6 +942,8 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) if (ds) dsa_switch_destroy(ds); } + + dev_put(dst->master_netdev); } static int dsa_remove(struct platform_device *pdev) -- cgit v1.1 From cda5c15b23fb9d683a491e8bd137d11d8552ac02 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 7 Dec 2015 13:57:35 +0100 Subject: net: dsa: move dsa slave destroy code to slave.c Move dsa slave dedicated code from dsa_switch_destroy to a new dsa_slave_destroy function in slave.c. Add the netif_carrier_off and phy_disconnect calls in order to correctly cleanup the netdev state and PHY state machine. Signed-off-by: Frode Isaksen Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 3 +-- net/dsa/dsa_priv.h | 1 + net/dsa/slave.c | 11 +++++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index d22d303e..208d1b2 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -456,8 +456,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) if (!ds->ports[port]) continue; - unregister_netdev(ds->ports[port]); - free_netdev(ds->ports[port]); + dsa_slave_destroy(ds->ports[port]); } mdiobus_unregister(ds->slave_mii_bus); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 311796c8..1d1a546 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -61,6 +61,7 @@ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); int dsa_slave_create(struct dsa_switch *ds, struct device *parent, int port, char *name); +void dsa_slave_destroy(struct net_device *slave_dev); int dsa_slave_suspend(struct net_device *slave_dev); int dsa_slave_resume(struct net_device *slave_dev); int dsa_slave_netdevice_event(struct notifier_block *unused, diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 7bc787b..1e9e942 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1212,6 +1212,17 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, return 0; } +void dsa_slave_destroy(struct net_device *slave_dev) +{ + struct dsa_slave_priv *p = netdev_priv(slave_dev); + + netif_carrier_off(slave_dev); + if (p->phy) + phy_disconnect(p->phy); + unregister_netdev(slave_dev); + free_netdev(slave_dev); +} + static bool dsa_slave_dev_check(struct net_device *dev) { return dev->netdev_ops == &dsa_slave_netdev_ops; -- cgit v1.1 From 760a4322470e3990b14e09bfe80c9c75c77f33dd Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Tue, 8 Dec 2015 14:47:56 +0000 Subject: net: Fix inverted test in __skb_recv_datagram As the kernel generally uses negated error numbers, *err needs to be compared with -EAGAIN (d'oh). Signed-off-by: Rainer Weikusat Fixes: ea3793ee29d3 ("core: enable more fine-grained datagram reception control") Signed-off-by: David S. Miller --- net/core/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index 7daff66..fa9dc64 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -275,7 +275,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, if (skb) return skb; - if (*err != EAGAIN) + if (*err != -EAGAIN) break; } while (timeo && !__skb_wait_for_more_packets(sk, err, &timeo, last)); -- cgit v1.1 From 8ac2837c89c8c0fcad557e4380aeef80580390f9 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 9 Dec 2015 10:51:12 +0800 Subject: Revert "Merge branch 'vsock-virtio'" This reverts commit 0d76d6e8b2507983a2cae4c09880798079007421 and merge commit c402293bd76fbc93e52ef8c0947ab81eea3ae019, reversing changes made to c89359a42e2a49656451569c382eed63e781153c. The virtio-vsock device specification is not finalized yet. Michael Tsirkin voiced concerned about merging this code when the hardware interface (and possibly the userspace interface) could still change. Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/Kconfig | 18 - net/vmw_vsock/Makefile | 2 - net/vmw_vsock/af_vsock.c | 70 -- net/vmw_vsock/virtio_transport.c | 466 ----------- net/vmw_vsock/virtio_transport_common.c | 1272 ------------------------------- 5 files changed, 1828 deletions(-) delete mode 100644 net/vmw_vsock/virtio_transport.c delete mode 100644 net/vmw_vsock/virtio_transport_common.c (limited to 'net') diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig index 74e0bc8..14810ab 100644 --- a/net/vmw_vsock/Kconfig +++ b/net/vmw_vsock/Kconfig @@ -26,21 +26,3 @@ config VMWARE_VMCI_VSOCKETS To compile this driver as a module, choose M here: the module will be called vmw_vsock_vmci_transport. If unsure, say N. - -config VIRTIO_VSOCKETS - tristate "virtio transport for Virtual Sockets" - depends on VSOCKETS && VIRTIO - select VIRTIO_VSOCKETS_COMMON - help - This module implements a virtio transport for Virtual Sockets. - - Enable this transport if your Virtual Machine runs on Qemu/KVM. - - To compile this driver as a module, choose M here: the module - will be called virtio_vsock_transport. If unsure, say N. - -config VIRTIO_VSOCKETS_COMMON - tristate - ---help--- - This option is selected by any driver which needs to access - the virtio_vsock. diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile index cf4c294..2ce52d7 100644 --- a/net/vmw_vsock/Makefile +++ b/net/vmw_vsock/Makefile @@ -1,7 +1,5 @@ obj-$(CONFIG_VSOCKETS) += vsock.o obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o -obj-$(CONFIG_VIRTIO_VSOCKETS) += virtio_transport.o -obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += virtio_transport_common.o vsock-y += af_vsock.o vsock_addr.o diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 77247a2..7fd1220 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -223,17 +223,6 @@ static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) return NULL; } -static struct sock *__vsock_find_unbound_socket(struct sockaddr_vm *addr) -{ - struct vsock_sock *vsk; - - list_for_each_entry(vsk, vsock_unbound_sockets, bound_table) - if (addr->svm_port == vsk->local_addr.svm_port) - return sk_vsock(vsk); - - return NULL; -} - static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst) { @@ -309,21 +298,6 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) } EXPORT_SYMBOL_GPL(vsock_find_bound_socket); -struct sock *vsock_find_unbound_socket(struct sockaddr_vm *addr) -{ - struct sock *sk; - - spin_lock_bh(&vsock_table_lock); - sk = __vsock_find_unbound_socket(addr); - if (sk) - sock_hold(sk); - - spin_unlock_bh(&vsock_table_lock); - - return sk; -} -EXPORT_SYMBOL_GPL(vsock_find_unbound_socket); - struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst) { @@ -558,50 +532,6 @@ static int __vsock_bind_stream(struct vsock_sock *vsk, return 0; } -int vsock_bind_dgram_generic(struct vsock_sock *vsk, struct sockaddr_vm *addr) -{ - static u32 port = LAST_RESERVED_PORT + 1; - struct sockaddr_vm new_addr; - - vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); - - if (addr->svm_port == VMADDR_PORT_ANY) { - bool found = false; - unsigned int i; - - for (i = 0; i < MAX_PORT_RETRIES; i++) { - if (port <= LAST_RESERVED_PORT) - port = LAST_RESERVED_PORT + 1; - - new_addr.svm_port = port++; - - if (!__vsock_find_unbound_socket(&new_addr)) { - found = true; - break; - } - } - - if (!found) - return -EADDRNOTAVAIL; - } else { - /* If port is in reserved range, ensure caller - * has necessary privileges. - */ - if (addr->svm_port <= LAST_RESERVED_PORT && - !capable(CAP_NET_BIND_SERVICE)) { - return -EACCES; - } - - if (__vsock_find_unbound_socket(&new_addr)) - return -EADDRINUSE; - } - - vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port); - - return 0; -} -EXPORT_SYMBOL_GPL(vsock_bind_dgram_generic); - static int __vsock_bind_dgram(struct vsock_sock *vsk, struct sockaddr_vm *addr) { diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c deleted file mode 100644 index df65dca..0000000 --- a/net/vmw_vsock/virtio_transport.c +++ /dev/null @@ -1,466 +0,0 @@ -/* - * virtio transport for vsock - * - * Copyright (C) 2013-2015 Red Hat, Inc. - * Author: Asias He - * Stefan Hajnoczi - * - * Some of the code is take from Gerd Hoffmann 's - * early virtio-vsock proof-of-concept bits. - * - * This work is licensed under the terms of the GNU GPL, version 2. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static struct workqueue_struct *virtio_vsock_workqueue; -static struct virtio_vsock *the_virtio_vsock; -static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ -static void virtio_vsock_rx_fill(struct virtio_vsock *vsock); - -struct virtio_vsock { - /* Virtio device */ - struct virtio_device *vdev; - /* Virtio virtqueue */ - struct virtqueue *vqs[VSOCK_VQ_MAX]; - /* Wait queue for send pkt */ - wait_queue_head_t queue_wait; - /* Work item to send pkt */ - struct work_struct tx_work; - /* Work item to recv pkt */ - struct work_struct rx_work; - /* Mutex to protect send pkt*/ - struct mutex tx_lock; - /* Mutex to protect recv pkt*/ - struct mutex rx_lock; - /* Number of recv buffers */ - int rx_buf_nr; - /* Number of max recv buffers */ - int rx_buf_max_nr; - /* Used for global tx buf limitation */ - u32 total_tx_buf; - /* Guest context id, just like guest ip address */ - u32 guest_cid; -}; - -static struct virtio_vsock *virtio_vsock_get(void) -{ - return the_virtio_vsock; -} - -static u32 virtio_transport_get_local_cid(void) -{ - struct virtio_vsock *vsock = virtio_vsock_get(); - - return vsock->guest_cid; -} - -static int -virtio_transport_send_pkt(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info) -{ - u32 src_cid, src_port, dst_cid, dst_port; - int ret, in_sg = 0, out_sg = 0; - struct virtio_transport *trans; - struct virtio_vsock_pkt *pkt; - struct virtio_vsock *vsock; - struct scatterlist hdr, buf, *sgs[2]; - struct virtqueue *vq; - u32 pkt_len = info->pkt_len; - DEFINE_WAIT(wait); - - vsock = virtio_vsock_get(); - if (!vsock) - return -ENODEV; - - src_cid = virtio_transport_get_local_cid(); - src_port = vsk->local_addr.svm_port; - if (!info->remote_cid) { - dst_cid = vsk->remote_addr.svm_cid; - dst_port = vsk->remote_addr.svm_port; - } else { - dst_cid = info->remote_cid; - dst_port = info->remote_port; - } - - trans = vsk->trans; - vq = vsock->vqs[VSOCK_VQ_TX]; - - if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) - pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; - pkt_len = virtio_transport_get_credit(trans, pkt_len); - /* Do not send zero length OP_RW pkt*/ - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) - return pkt_len; - - /* Respect global tx buf limitation */ - mutex_lock(&vsock->tx_lock); - while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) { - prepare_to_wait_exclusive(&vsock->queue_wait, &wait, - TASK_UNINTERRUPTIBLE); - mutex_unlock(&vsock->tx_lock); - schedule(); - mutex_lock(&vsock->tx_lock); - finish_wait(&vsock->queue_wait, &wait); - } - vsock->total_tx_buf += pkt_len; - mutex_unlock(&vsock->tx_lock); - - pkt = virtio_transport_alloc_pkt(vsk, info, pkt_len, - src_cid, src_port, - dst_cid, dst_port); - if (!pkt) { - mutex_lock(&vsock->tx_lock); - vsock->total_tx_buf -= pkt_len; - mutex_unlock(&vsock->tx_lock); - virtio_transport_put_credit(trans, pkt_len); - return -ENOMEM; - } - - pr_debug("%s:info->pkt_len= %d\n", __func__, info->pkt_len); - - /* Will be released in virtio_transport_send_pkt_work */ - sock_hold(&trans->vsk->sk); - virtio_transport_inc_tx_pkt(pkt); - - /* Put pkt in the virtqueue */ - sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); - sgs[out_sg++] = &hdr; - if (info->msg && info->pkt_len > 0) { - sg_init_one(&buf, pkt->buf, pkt->len); - sgs[out_sg++] = &buf; - } - - mutex_lock(&vsock->tx_lock); - while ((ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, - GFP_KERNEL)) < 0) { - prepare_to_wait_exclusive(&vsock->queue_wait, &wait, - TASK_UNINTERRUPTIBLE); - mutex_unlock(&vsock->tx_lock); - schedule(); - mutex_lock(&vsock->tx_lock); - finish_wait(&vsock->queue_wait, &wait); - } - virtqueue_kick(vq); - mutex_unlock(&vsock->tx_lock); - - return pkt_len; -} - -static struct virtio_transport_pkt_ops virtio_ops = { - .send_pkt = virtio_transport_send_pkt, -}; - -static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) -{ - int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; - struct virtio_vsock_pkt *pkt; - struct scatterlist hdr, buf, *sgs[2]; - struct virtqueue *vq; - int ret; - - vq = vsock->vqs[VSOCK_VQ_RX]; - - do { - pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) { - pr_debug("%s: fail to allocate pkt\n", __func__); - goto out; - } - - /* TODO: use mergeable rx buffer */ - pkt->buf = kmalloc(buf_len, GFP_KERNEL); - if (!pkt->buf) { - pr_debug("%s: fail to allocate pkt->buf\n", __func__); - goto err; - } - - sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); - sgs[0] = &hdr; - - sg_init_one(&buf, pkt->buf, buf_len); - sgs[1] = &buf; - ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL); - if (ret) - goto err; - vsock->rx_buf_nr++; - } while (vq->num_free); - if (vsock->rx_buf_nr > vsock->rx_buf_max_nr) - vsock->rx_buf_max_nr = vsock->rx_buf_nr; -out: - virtqueue_kick(vq); - return; -err: - virtqueue_kick(vq); - virtio_transport_free_pkt(pkt); - return; -} - -static void virtio_transport_send_pkt_work(struct work_struct *work) -{ - struct virtio_vsock *vsock = - container_of(work, struct virtio_vsock, tx_work); - struct virtio_vsock_pkt *pkt; - bool added = false; - struct virtqueue *vq; - unsigned int len; - struct sock *sk; - - vq = vsock->vqs[VSOCK_VQ_TX]; - mutex_lock(&vsock->tx_lock); - do { - virtqueue_disable_cb(vq); - while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { - sk = &pkt->trans->vsk->sk; - virtio_transport_dec_tx_pkt(pkt); - /* Release refcnt taken in virtio_transport_send_pkt */ - sock_put(sk); - vsock->total_tx_buf -= pkt->len; - virtio_transport_free_pkt(pkt); - added = true; - } - } while (!virtqueue_enable_cb(vq)); - mutex_unlock(&vsock->tx_lock); - - if (added) - wake_up(&vsock->queue_wait); -} - -static void virtio_transport_recv_pkt_work(struct work_struct *work) -{ - struct virtio_vsock *vsock = - container_of(work, struct virtio_vsock, rx_work); - struct virtio_vsock_pkt *pkt; - struct virtqueue *vq; - unsigned int len; - - vq = vsock->vqs[VSOCK_VQ_RX]; - mutex_lock(&vsock->rx_lock); - do { - virtqueue_disable_cb(vq); - while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { - pkt->len = len; - virtio_transport_recv_pkt(pkt); - vsock->rx_buf_nr--; - } - } while (!virtqueue_enable_cb(vq)); - - if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2) - virtio_vsock_rx_fill(vsock); - mutex_unlock(&vsock->rx_lock); -} - -static void virtio_vsock_ctrl_done(struct virtqueue *vq) -{ -} - -static void virtio_vsock_tx_done(struct virtqueue *vq) -{ - struct virtio_vsock *vsock = vq->vdev->priv; - - if (!vsock) - return; - queue_work(virtio_vsock_workqueue, &vsock->tx_work); -} - -static void virtio_vsock_rx_done(struct virtqueue *vq) -{ - struct virtio_vsock *vsock = vq->vdev->priv; - - if (!vsock) - return; - queue_work(virtio_vsock_workqueue, &vsock->rx_work); -} - -static int -virtio_transport_socket_init(struct vsock_sock *vsk, struct vsock_sock *psk) -{ - struct virtio_transport *trans; - int ret; - - ret = virtio_transport_do_socket_init(vsk, psk); - if (ret) - return ret; - - trans = vsk->trans; - trans->ops = &virtio_ops; - return ret; -} - -static struct vsock_transport virtio_transport = { - .get_local_cid = virtio_transport_get_local_cid, - - .init = virtio_transport_socket_init, - .destruct = virtio_transport_destruct, - .release = virtio_transport_release, - .connect = virtio_transport_connect, - .shutdown = virtio_transport_shutdown, - - .dgram_bind = virtio_transport_dgram_bind, - .dgram_dequeue = virtio_transport_dgram_dequeue, - .dgram_enqueue = virtio_transport_dgram_enqueue, - .dgram_allow = virtio_transport_dgram_allow, - - .stream_dequeue = virtio_transport_stream_dequeue, - .stream_enqueue = virtio_transport_stream_enqueue, - .stream_has_data = virtio_transport_stream_has_data, - .stream_has_space = virtio_transport_stream_has_space, - .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, - .stream_is_active = virtio_transport_stream_is_active, - .stream_allow = virtio_transport_stream_allow, - - .notify_poll_in = virtio_transport_notify_poll_in, - .notify_poll_out = virtio_transport_notify_poll_out, - .notify_recv_init = virtio_transport_notify_recv_init, - .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, - .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, - .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, - .notify_send_init = virtio_transport_notify_send_init, - .notify_send_pre_block = virtio_transport_notify_send_pre_block, - .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, - .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, - - .set_buffer_size = virtio_transport_set_buffer_size, - .set_min_buffer_size = virtio_transport_set_min_buffer_size, - .set_max_buffer_size = virtio_transport_set_max_buffer_size, - .get_buffer_size = virtio_transport_get_buffer_size, - .get_min_buffer_size = virtio_transport_get_min_buffer_size, - .get_max_buffer_size = virtio_transport_get_max_buffer_size, -}; - -static int virtio_vsock_probe(struct virtio_device *vdev) -{ - vq_callback_t *callbacks[] = { - virtio_vsock_ctrl_done, - virtio_vsock_rx_done, - virtio_vsock_tx_done, - }; - const char *names[] = { - "ctrl", - "rx", - "tx", - }; - struct virtio_vsock *vsock = NULL; - u32 guest_cid; - int ret; - - ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); - if (ret) - return ret; - - /* Only one virtio-vsock device per guest is supported */ - if (the_virtio_vsock) { - ret = -EBUSY; - goto out; - } - - vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); - if (!vsock) { - ret = -ENOMEM; - goto out; - } - - vsock->vdev = vdev; - - ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, - vsock->vqs, callbacks, names); - if (ret < 0) - goto out; - - vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid), - &guest_cid, sizeof(guest_cid)); - vsock->guest_cid = le32_to_cpu(guest_cid); - pr_debug("%s:guest_cid=%d\n", __func__, vsock->guest_cid); - - ret = vsock_core_init(&virtio_transport); - if (ret < 0) - goto out_vqs; - - vsock->rx_buf_nr = 0; - vsock->rx_buf_max_nr = 0; - - vdev->priv = the_virtio_vsock = vsock; - init_waitqueue_head(&vsock->queue_wait); - mutex_init(&vsock->tx_lock); - mutex_init(&vsock->rx_lock); - INIT_WORK(&vsock->rx_work, virtio_transport_recv_pkt_work); - INIT_WORK(&vsock->tx_work, virtio_transport_send_pkt_work); - - mutex_lock(&vsock->rx_lock); - virtio_vsock_rx_fill(vsock); - mutex_unlock(&vsock->rx_lock); - - mutex_unlock(&the_virtio_vsock_mutex); - return 0; - -out_vqs: - vsock->vdev->config->del_vqs(vsock->vdev); -out: - kfree(vsock); - mutex_unlock(&the_virtio_vsock_mutex); - return ret; -} - -static void virtio_vsock_remove(struct virtio_device *vdev) -{ - struct virtio_vsock *vsock = vdev->priv; - - mutex_lock(&the_virtio_vsock_mutex); - the_virtio_vsock = NULL; - vsock_core_exit(); - mutex_unlock(&the_virtio_vsock_mutex); - - kfree(vsock); -} - -static struct virtio_device_id id_table[] = { - { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID }, - { 0 }, -}; - -static unsigned int features[] = { -}; - -static struct virtio_driver virtio_vsock_driver = { - .feature_table = features, - .feature_table_size = ARRAY_SIZE(features), - .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, - .id_table = id_table, - .probe = virtio_vsock_probe, - .remove = virtio_vsock_remove, -}; - -static int __init virtio_vsock_init(void) -{ - int ret; - - virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); - if (!virtio_vsock_workqueue) - return -ENOMEM; - ret = register_virtio_driver(&virtio_vsock_driver); - if (ret) - destroy_workqueue(virtio_vsock_workqueue); - return ret; -} - -static void __exit virtio_vsock_exit(void) -{ - unregister_virtio_driver(&virtio_vsock_driver); - destroy_workqueue(virtio_vsock_workqueue); -} - -module_init(virtio_vsock_init); -module_exit(virtio_vsock_exit); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Asias He"); -MODULE_DESCRIPTION("virtio transport for vsock"); -MODULE_DEVICE_TABLE(virtio, id_table); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c deleted file mode 100644 index 28f790d..0000000 --- a/net/vmw_vsock/virtio_transport_common.c +++ /dev/null @@ -1,1272 +0,0 @@ -/* - * common code for virtio vsock - * - * Copyright (C) 2013-2015 Red Hat, Inc. - * Author: Asias He - * Stefan Hajnoczi - * - * This work is licensed under the terms of the GNU GPL, version 2. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#define COOKIEBITS 24 -#define COOKIEMASK (((u32)1 << COOKIEBITS) - 1) -#define VSOCK_TIMEOUT_INIT 4 - -#define SHA_MESSAGE_WORDS 16 -#define SHA_VSOCK_WORDS 5 - -static u32 vsockcookie_secret[2][SHA_MESSAGE_WORDS - SHA_VSOCK_WORDS + - SHA_DIGEST_WORDS]; - -static DEFINE_PER_CPU(__u32[SHA_MESSAGE_WORDS + SHA_DIGEST_WORDS + - SHA_WORKSPACE_WORDS], vsock_cookie_scratch); - -static u32 cookie_hash(u32 saddr, u32 daddr, u16 sport, u16 dport, - u32 count, int c) -{ - __u32 *tmp = this_cpu_ptr(vsock_cookie_scratch); - - memcpy(tmp + SHA_VSOCK_WORDS, vsockcookie_secret[c], - sizeof(vsockcookie_secret[c])); - tmp[0] = saddr; - tmp[1] = daddr; - tmp[2] = sport; - tmp[3] = dport; - tmp[4] = count; - sha_transform(tmp + SHA_MESSAGE_WORDS, (__u8 *)tmp, - tmp + SHA_MESSAGE_WORDS + SHA_DIGEST_WORDS); - - return tmp[17]; -} - -static u32 -virtio_vsock_secure_cookie(u32 saddr, u32 daddr, u32 sport, u32 dport, - u32 count) -{ - u32 h1, h2; - - h1 = cookie_hash(saddr, daddr, sport, dport, 0, 0); - h2 = cookie_hash(saddr, daddr, sport, dport, count, 1); - - return h1 + (count << COOKIEBITS) + (h2 & COOKIEMASK); -} - -static u32 -virtio_vsock_check_cookie(u32 saddr, u32 daddr, u32 sport, u32 dport, - u32 count, u32 cookie, u32 maxdiff) -{ - u32 diff; - u32 ret; - - cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0); - - diff = (count - (cookie >> COOKIEBITS)) & ((u32)-1 >> COOKIEBITS); - pr_debug("%s: diff=%x\n", __func__, diff); - if (diff >= maxdiff) - return (u32)-1; - - ret = (cookie - - cookie_hash(saddr, daddr, sport, dport, count - diff, 1)) - & COOKIEMASK; - pr_debug("%s: ret=%x\n", __func__, diff); - - return ret; -} - -void virtio_vsock_dumppkt(const char *func, const struct virtio_vsock_pkt *pkt) -{ - pr_debug("%s: pkt=%p, op=%d, len=%d, %d:%d---%d:%d, len=%d\n", - func, pkt, - le16_to_cpu(pkt->hdr.op), - le32_to_cpu(pkt->hdr.len), - le32_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.src_port), - le32_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.dst_port), - pkt->len); -} -EXPORT_SYMBOL_GPL(virtio_vsock_dumppkt); - -struct virtio_vsock_pkt * -virtio_transport_alloc_pkt(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info, - size_t len, - u32 src_cid, - u32 src_port, - u32 dst_cid, - u32 dst_port) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt *pkt; - int err; - - BUG_ON(!trans); - - pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) - return NULL; - - pkt->hdr.type = cpu_to_le16(info->type); - pkt->hdr.op = cpu_to_le16(info->op); - pkt->hdr.src_cid = cpu_to_le32(src_cid); - pkt->hdr.src_port = cpu_to_le32(src_port); - pkt->hdr.dst_cid = cpu_to_le32(dst_cid); - pkt->hdr.dst_port = cpu_to_le32(dst_port); - pkt->hdr.flags = cpu_to_le32(info->flags); - pkt->len = len; - pkt->trans = trans; - if (info->type == VIRTIO_VSOCK_TYPE_DGRAM) - pkt->hdr.len = cpu_to_le32(len + (info->dgram_len << 16)); - else if (info->type == VIRTIO_VSOCK_TYPE_STREAM) - pkt->hdr.len = cpu_to_le32(len); - - if (info->msg && len > 0) { - pkt->buf = kmalloc(len, GFP_KERNEL); - if (!pkt->buf) - goto out_pkt; - err = memcpy_from_msg(pkt->buf, info->msg, len); - if (err) - goto out; - } - - return pkt; - -out: - kfree(pkt->buf); -out_pkt: - kfree(pkt); - return NULL; -} -EXPORT_SYMBOL_GPL(virtio_transport_alloc_pkt); - -struct sock * -virtio_transport_get_pending(struct sock *listener, - struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vlistener; - struct vsock_sock *vpending; - struct sockaddr_vm src; - struct sockaddr_vm dst; - struct sock *pending; - - vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); - vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); - - vlistener = vsock_sk(listener); - list_for_each_entry(vpending, &vlistener->pending_links, - pending_links) { - if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && - vsock_addr_equals_addr(&dst, &vpending->local_addr)) { - pending = sk_vsock(vpending); - sock_hold(pending); - return pending; - } - } - - return NULL; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_pending); - -static void virtio_transport_inc_rx_pkt(struct virtio_vsock_pkt *pkt) -{ - pkt->trans->rx_bytes += pkt->len; -} - -static void virtio_transport_dec_rx_pkt(struct virtio_vsock_pkt *pkt) -{ - pkt->trans->rx_bytes -= pkt->len; - pkt->trans->fwd_cnt += pkt->len; -} - -void virtio_transport_inc_tx_pkt(struct virtio_vsock_pkt *pkt) -{ - mutex_lock(&pkt->trans->tx_lock); - pkt->hdr.fwd_cnt = cpu_to_le32(pkt->trans->fwd_cnt); - pkt->hdr.buf_alloc = cpu_to_le32(pkt->trans->buf_alloc); - mutex_unlock(&pkt->trans->tx_lock); -} -EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); - -void virtio_transport_dec_tx_pkt(struct virtio_vsock_pkt *pkt) -{ -} -EXPORT_SYMBOL_GPL(virtio_transport_dec_tx_pkt); - -u32 virtio_transport_get_credit(struct virtio_transport *trans, u32 credit) -{ - u32 ret; - - mutex_lock(&trans->tx_lock); - ret = trans->peer_buf_alloc - (trans->tx_cnt - trans->peer_fwd_cnt); - if (ret > credit) - ret = credit; - trans->tx_cnt += ret; - mutex_unlock(&trans->tx_lock); - - pr_debug("%s: ret=%d, buf_alloc=%d, peer_buf_alloc=%d," - "tx_cnt=%d, fwd_cnt=%d, peer_fwd_cnt=%d\n", __func__, - ret, trans->buf_alloc, trans->peer_buf_alloc, - trans->tx_cnt, trans->fwd_cnt, trans->peer_fwd_cnt); - - return ret; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_credit); - -void virtio_transport_put_credit(struct virtio_transport *trans, u32 credit) -{ - mutex_lock(&trans->tx_lock); - trans->tx_cnt -= credit; - mutex_unlock(&trans->tx_lock); -} -EXPORT_SYMBOL_GPL(virtio_transport_put_credit); - -static int virtio_transport_send_credit_update(struct vsock_sock *vsk, int type, struct virtio_vsock_hdr *hdr) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, - .type = type, - }; - - if (hdr && type == VIRTIO_VSOCK_TYPE_DGRAM) { - info.remote_cid = le32_to_cpu(hdr->src_cid); - info.remote_port = le32_to_cpu(hdr->src_port); - } - - pr_debug("%s: sk=%p send_credit_update\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} - -static int virtio_transport_send_credit_request(struct vsock_sock *vsk, int type) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_CREDIT_REQUEST, - .type = type, - }; - - pr_debug("%s: sk=%p send_credit_request\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} - -static ssize_t -virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt *pkt; - size_t bytes, total = 0; - int err = -EFAULT; - - mutex_lock(&trans->rx_lock); - while (total < len && trans->rx_bytes > 0 && - !list_empty(&trans->rx_queue)) { - pkt = list_first_entry(&trans->rx_queue, - struct virtio_vsock_pkt, list); - - bytes = len - total; - if (bytes > pkt->len - pkt->off) - bytes = pkt->len - pkt->off; - - err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); - if (err) - goto out; - total += bytes; - pkt->off += bytes; - if (pkt->off == pkt->len) { - virtio_transport_dec_rx_pkt(pkt); - list_del(&pkt->list); - virtio_transport_free_pkt(pkt); - } - } - mutex_unlock(&trans->rx_lock); - - /* Send a credit pkt to peer */ - virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, - NULL); - - return total; - -out: - mutex_unlock(&trans->rx_lock); - if (total) - err = total; - return err; -} - -ssize_t -virtio_transport_stream_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len, int flags) -{ - if (flags & MSG_PEEK) - return -EOPNOTSUPP; - - return virtio_transport_stream_do_dequeue(vsk, msg, len); -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); - -struct dgram_skb { - struct list_head list; - struct sk_buff *skb; - u16 id; -}; - -static struct dgram_skb *dgram_id_to_skb(struct virtio_transport *trans, - u16 id) -{ - struct dgram_skb *dgram_skb; - - list_for_each_entry(dgram_skb, &trans->incomplete_dgrams, list) { - if (dgram_skb->id == id) - return dgram_skb; - } - - return NULL; -} - -static void -virtio_transport_recv_dgram(struct sock *sk, - struct virtio_vsock_pkt *pkt) -{ - struct sk_buff *skb = NULL; - struct vsock_sock *vsk; - struct virtio_transport *trans; - size_t size; - u16 dgram_id, pkt_off, dgram_len, pkt_len; - u32 flags, len; - struct dgram_skb *dgram_skb; - - vsk = vsock_sk(sk); - trans = vsk->trans; - - /* len: dgram_len | pkt_len */ - len = le32_to_cpu(pkt->hdr.len); - dgram_len = len >> 16; - pkt_len = len & 0xFFFF; - - /* flags: dgram_id | pkt_off */ - flags = le32_to_cpu(pkt->hdr.flags); - dgram_id = flags >> 16; - pkt_off = flags & 0xFFFF; - - pr_debug("%s: dgram_len=%d, pkt_len=%d, id=%d, off=%d\n", __func__, - dgram_len, pkt_len, dgram_id, pkt_off); - - dgram_skb = dgram_id_to_skb(trans, dgram_id); - if (dgram_skb) { - /* This pkt is for a existing dgram */ - skb = dgram_skb->skb; - pr_debug("%s:found skb\n", __func__); - } - - /* Packet payload must be within datagram bounds */ - if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) - goto drop; - if (pkt_len > dgram_len) - goto drop; - if (pkt_off > dgram_len) - goto drop; - if (dgram_len - pkt_off < pkt_len) - goto drop; - - if (!skb) { - /* This pkt is for a new dgram */ - pr_debug("%s:create skb\n", __func__); - - size = sizeof(pkt->hdr) + dgram_len; - /* Attach the packet to the socket's receive queue as an sk_buff. */ - dgram_skb = kzalloc(sizeof(struct dgram_skb), GFP_ATOMIC); - if (!dgram_skb) - goto drop; - - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) { - kfree(dgram_skb); - dgram_skb = NULL; - goto drop; - } - dgram_skb->id = dgram_id; - dgram_skb->skb = skb; - list_add_tail(&dgram_skb->list, &trans->incomplete_dgrams); - - /* sk_receive_skb() will do a sock_put(), so hold here. */ - sock_hold(sk); - skb_put(skb, size); - memcpy(skb->data, &pkt->hdr, sizeof(pkt->hdr)); - } - - memcpy(skb->data + sizeof(pkt->hdr) + pkt_off, pkt->buf, pkt_len); - - pr_debug("%s:C, off=%d, pkt_len=%d, dgram_len=%d\n", __func__, - pkt_off, pkt_len, dgram_len); - - /* We are done with this dgram */ - if (pkt_off + pkt_len == dgram_len) { - pr_debug("%s:dgram_id=%d is done\n", __func__, dgram_id); - list_del(&dgram_skb->list); - kfree(dgram_skb); - sk_receive_skb(sk, skb, 0); - } - virtio_transport_free_pkt(pkt); - return; - -drop: - if (dgram_skb) { - list_del(&dgram_skb->list); - kfree(dgram_skb); - kfree_skb(skb); - sock_put(sk); - } - virtio_transport_free_pkt(pkt); -} - -int -virtio_transport_dgram_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len, int flags) -{ - struct virtio_vsock_hdr *hdr; - struct sk_buff *skb; - int noblock; - int err; - int dgram_len; - - noblock = flags & MSG_DONTWAIT; - - if (flags & MSG_OOB || flags & MSG_ERRQUEUE) - return -EOPNOTSUPP; - - /* Retrieve the head sk_buff from the socket's receive queue. */ - err = 0; - skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); - if (err) - return err; - if (!skb) - return -EAGAIN; - - hdr = (struct virtio_vsock_hdr *)skb->data; - if (!hdr) - goto out; - - dgram_len = le32_to_cpu(hdr->len) >> 16; - /* Place the datagram payload in the user's iovec. */ - err = skb_copy_datagram_msg(skb, sizeof(*hdr), msg, dgram_len); - if (err) - goto out; - - if (msg->msg_name) { - /* Provide the address of the sender. */ - DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); - vsock_addr_init(vm_addr, le32_to_cpu(hdr->src_cid), le32_to_cpu(hdr->src_port)); - msg->msg_namelen = sizeof(*vm_addr); - } - err = dgram_len; - - /* Send a credit pkt to peer */ - virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_DGRAM, hdr); - - pr_debug("%s:done, recved =%d\n", __func__, dgram_len); -out: - skb_free_datagram(&vsk->sk, skb); - return err; -} -EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); - -s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - s64 bytes; - - mutex_lock(&trans->rx_lock); - bytes = trans->rx_bytes; - mutex_unlock(&trans->rx_lock); - - return bytes; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); - -static s64 virtio_transport_has_space(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - s64 bytes; - - bytes = trans->peer_buf_alloc - (trans->tx_cnt - trans->peer_fwd_cnt); - if (bytes < 0) - bytes = 0; - - return bytes; -} - -s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - s64 bytes; - - mutex_lock(&trans->tx_lock); - bytes = virtio_transport_has_space(vsk); - mutex_unlock(&trans->tx_lock); - - pr_debug("%s: bytes=%lld\n", __func__, bytes); - - return bytes; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); - -int virtio_transport_do_socket_init(struct vsock_sock *vsk, - struct vsock_sock *psk) -{ - struct virtio_transport *trans; - - trans = kzalloc(sizeof(*trans), GFP_KERNEL); - if (!trans) - return -ENOMEM; - - vsk->trans = trans; - trans->vsk = vsk; - if (psk) { - struct virtio_transport *ptrans = psk->trans; - trans->buf_size = ptrans->buf_size; - trans->buf_size_min = ptrans->buf_size_min; - trans->buf_size_max = ptrans->buf_size_max; - trans->peer_buf_alloc = ptrans->peer_buf_alloc; - } else { - trans->buf_size = VIRTIO_VSOCK_DEFAULT_BUF_SIZE; - trans->buf_size_min = VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE; - trans->buf_size_max = VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE; - } - - trans->buf_alloc = trans->buf_size; - - pr_debug("%s: trans->buf_alloc=%d\n", __func__, trans->buf_alloc); - - mutex_init(&trans->rx_lock); - mutex_init(&trans->tx_lock); - INIT_LIST_HEAD(&trans->rx_queue); - INIT_LIST_HEAD(&trans->incomplete_dgrams); - - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); - -u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - return trans->buf_size; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_buffer_size); - -u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - return trans->buf_size_min; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_min_buffer_size); - -u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - return trans->buf_size_max; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_max_buffer_size); - -void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) -{ - struct virtio_transport *trans = vsk->trans; - - if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) - val = VIRTIO_VSOCK_MAX_BUF_SIZE; - if (val < trans->buf_size_min) - trans->buf_size_min = val; - if (val > trans->buf_size_max) - trans->buf_size_max = val; - trans->buf_size = val; - trans->buf_alloc = val; -} -EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size); - -void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val) -{ - struct virtio_transport *trans = vsk->trans; - - if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) - val = VIRTIO_VSOCK_MAX_BUF_SIZE; - if (val > trans->buf_size) - trans->buf_size = val; - trans->buf_size_min = val; -} -EXPORT_SYMBOL_GPL(virtio_transport_set_min_buffer_size); - -void virtio_transport_set_max_buffer_size(struct vsock_sock *vsk, u64 val) -{ - struct virtio_transport *trans = vsk->trans; - - if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) - val = VIRTIO_VSOCK_MAX_BUF_SIZE; - if (val < trans->buf_size) - trans->buf_size = val; - trans->buf_size_max = val; -} -EXPORT_SYMBOL_GPL(virtio_transport_set_max_buffer_size); - -int -virtio_transport_notify_poll_in(struct vsock_sock *vsk, - size_t target, - bool *data_ready_now) -{ - if (vsock_stream_has_data(vsk)) - *data_ready_now = true; - else - *data_ready_now = false; - - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); - -int -virtio_transport_notify_poll_out(struct vsock_sock *vsk, - size_t target, - bool *space_avail_now) -{ - s64 free_space; - - free_space = vsock_stream_has_space(vsk); - if (free_space > 0) - *space_avail_now = true; - else if (free_space == 0) - *space_avail_now = false; - - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); - -int virtio_transport_notify_recv_init(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); - -int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); - -int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); - -int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, - size_t target, ssize_t copied, bool data_read, - struct vsock_transport_recv_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); - -int virtio_transport_notify_send_init(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); - -int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); - -int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); - -int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, - ssize_t written, struct vsock_transport_send_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); - -u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - return trans->buf_size; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); - -bool virtio_transport_stream_is_active(struct vsock_sock *vsk) -{ - return true; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); - -bool virtio_transport_stream_allow(u32 cid, u32 port) -{ - return true; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); - -int virtio_transport_dgram_bind(struct vsock_sock *vsk, - struct sockaddr_vm *addr) -{ - return vsock_bind_dgram_generic(vsk, addr); -} -EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); - -bool virtio_transport_dgram_allow(u32 cid, u32 port) -{ - return true; -} -EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); - -int virtio_transport_connect(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_REQUEST, - .type = VIRTIO_VSOCK_TYPE_STREAM, - }; - - pr_debug("%s: vsk=%p send_request\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} -EXPORT_SYMBOL_GPL(virtio_transport_connect); - -int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_SHUTDOWN, - .type = VIRTIO_VSOCK_TYPE_STREAM, - .flags = (mode & RCV_SHUTDOWN ? - VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | - (mode & SEND_SHUTDOWN ? - VIRTIO_VSOCK_SHUTDOWN_SEND : 0), - }; - - pr_debug("%s: vsk=%p: send_shutdown\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} -EXPORT_SYMBOL_GPL(virtio_transport_shutdown); - -void virtio_transport_release(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - struct sock *sk = &vsk->sk; - struct dgram_skb *dgram_skb; - struct dgram_skb *dgram_skb_tmp; - - pr_debug("%s: vsk=%p\n", __func__, vsk); - - /* Tell other side to terminate connection */ - if (sk->sk_type == SOCK_STREAM && sk->sk_state == SS_CONNECTED) { - virtio_transport_shutdown(vsk, SHUTDOWN_MASK); - } - - /* Free incomplete dgrams */ - lock_sock(sk); - list_for_each_entry_safe(dgram_skb, dgram_skb_tmp, - &trans->incomplete_dgrams, list) { - list_del(&dgram_skb->list); - kfree_skb(dgram_skb->skb); - kfree(dgram_skb); - sock_put(sk); /* held in virtio_transport_recv_dgram() */ - } - release_sock(sk); -} -EXPORT_SYMBOL_GPL(virtio_transport_release); - -int -virtio_transport_dgram_enqueue(struct vsock_sock *vsk, - struct sockaddr_vm *remote_addr, - struct msghdr *msg, - size_t dgram_len) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_RW, - .type = VIRTIO_VSOCK_TYPE_DGRAM, - .msg = msg, - }; - size_t total_written = 0, pkt_off = 0, written; - u16 dgram_id; - - /* The max size of a single dgram we support is 64KB */ - if (dgram_len > VIRTIO_VSOCK_MAX_DGRAM_SIZE) - return -EMSGSIZE; - - info.dgram_len = dgram_len; - vsk->remote_addr = *remote_addr; - - dgram_id = trans->dgram_id++; - - /* TODO: To optimize, if we have enough credit to send the pkt already, - * do not ask the peer to send credit to use */ - virtio_transport_send_credit_request(vsk, VIRTIO_VSOCK_TYPE_DGRAM); - - while (total_written < dgram_len) { - info.pkt_len = dgram_len - total_written; - info.flags = dgram_id << 16 | pkt_off; - written = trans->ops->send_pkt(vsk, &info); - if (written < 0) - return -ENOMEM; - if (written == 0) { - /* TODO: if written = 0, we need a sleep & wakeup - * instead of sleep */ - pr_debug("%s: SHOULD WAIT written==0", __func__); - msleep(10); - } - total_written += written; - pkt_off += written; - pr_debug("%s:id=%d, dgram_len=%zu, off=%zu, total_written=%zu, written=%zu\n", - __func__, dgram_id, dgram_len, pkt_off, total_written, written); - } - - return dgram_len; -} -EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); - -ssize_t -virtio_transport_stream_enqueue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_RW, - .type = VIRTIO_VSOCK_TYPE_STREAM, - .msg = msg, - .pkt_len = len, - }; - - return trans->ops->send_pkt(vsk, &info); -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); - -void virtio_transport_destruct(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - pr_debug("%s: vsk=%p\n", __func__, vsk); - kfree(trans); -} -EXPORT_SYMBOL_GPL(virtio_transport_destruct); - -static int virtio_transport_send_ack(struct vsock_sock *vsk, u32 cookie) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_ACK, - .type = VIRTIO_VSOCK_TYPE_STREAM, - .flags = cpu_to_le32(cookie), - }; - - pr_debug("%s: sk=%p send_offer\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} - -static int virtio_transport_send_reset(struct vsock_sock *vsk, - struct virtio_vsock_pkt *pkt) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_RST, - .type = VIRTIO_VSOCK_TYPE_STREAM, - }; - - pr_debug("%s\n", __func__); - - /* Send RST only if the original pkt is not a RST pkt */ - if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) - return 0; - - return trans->ops->send_pkt(vsk, &info); -} - -static int -virtio_transport_recv_connecting(struct sock *sk, - struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vsk = vsock_sk(sk); - int err; - int skerr; - u32 cookie; - - pr_debug("%s: vsk=%p\n", __func__, vsk); - switch (le16_to_cpu(pkt->hdr.op)) { - case VIRTIO_VSOCK_OP_RESPONSE: - cookie = le32_to_cpu(pkt->hdr.flags); - pr_debug("%s: got RESPONSE and send ACK, cookie=%x\n", __func__, cookie); - err = virtio_transport_send_ack(vsk, cookie); - if (err < 0) { - skerr = -err; - goto destroy; - } - sk->sk_state = SS_CONNECTED; - sk->sk_socket->state = SS_CONNECTED; - vsock_insert_connected(vsk); - sk->sk_state_change(sk); - break; - case VIRTIO_VSOCK_OP_INVALID: - pr_debug("%s: got invalid\n", __func__); - break; - case VIRTIO_VSOCK_OP_RST: - pr_debug("%s: got rst\n", __func__); - skerr = ECONNRESET; - err = 0; - goto destroy; - default: - pr_debug("%s: got def\n", __func__); - skerr = EPROTO; - err = -EINVAL; - goto destroy; - } - return 0; - -destroy: - virtio_transport_send_reset(vsk, pkt); - sk->sk_state = SS_UNCONNECTED; - sk->sk_err = skerr; - sk->sk_error_report(sk); - return err; -} - -static int -virtio_transport_recv_connected(struct sock *sk, - struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vsk = vsock_sk(sk); - struct virtio_transport *trans = vsk->trans; - int err = 0; - - switch (le16_to_cpu(pkt->hdr.op)) { - case VIRTIO_VSOCK_OP_RW: - pkt->len = le32_to_cpu(pkt->hdr.len); - pkt->off = 0; - pkt->trans = trans; - - mutex_lock(&trans->rx_lock); - virtio_transport_inc_rx_pkt(pkt); - list_add_tail(&pkt->list, &trans->rx_queue); - mutex_unlock(&trans->rx_lock); - - sk->sk_data_ready(sk); - return err; - case VIRTIO_VSOCK_OP_CREDIT_UPDATE: - sk->sk_write_space(sk); - break; - case VIRTIO_VSOCK_OP_SHUTDOWN: - pr_debug("%s: got shutdown\n", __func__); - if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) - vsk->peer_shutdown |= RCV_SHUTDOWN; - if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) - vsk->peer_shutdown |= SEND_SHUTDOWN; - if (le32_to_cpu(pkt->hdr.flags)) - sk->sk_state_change(sk); - break; - case VIRTIO_VSOCK_OP_RST: - pr_debug("%s: got rst\n", __func__); - sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; - if (vsock_stream_has_data(vsk) <= 0) - sk->sk_state = SS_DISCONNECTING; - sk->sk_state_change(sk); - break; - default: - err = -EINVAL; - break; - } - - virtio_transport_free_pkt(pkt); - return err; -} - -static int -virtio_transport_send_response(struct vsock_sock *vsk, - struct virtio_vsock_pkt *pkt) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_RESPONSE, - .type = VIRTIO_VSOCK_TYPE_STREAM, - .remote_cid = le32_to_cpu(pkt->hdr.src_cid), - .remote_port = le32_to_cpu(pkt->hdr.src_port), - }; - u32 cookie; - - cookie = virtio_vsock_secure_cookie(le32_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.src_port), - le32_to_cpu(pkt->hdr.dst_port), - jiffies / (HZ * 60)); - info.flags = cpu_to_le32(cookie); - - pr_debug("%s: send_response, cookie=%x\n", __func__, le32_to_cpu(cookie)); - - return trans->ops->send_pkt(vsk, &info); -} - -/* Handle server socket */ -static int -virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vsk = vsock_sk(sk); - struct vsock_sock *vpending; - struct sock *pending; - int err; - u32 cookie; - - switch (le16_to_cpu(pkt->hdr.op)) { - case VIRTIO_VSOCK_OP_REQUEST: - err = virtio_transport_send_response(vsk, pkt); - if (err < 0) { - // FIXME vsk should be vpending - virtio_transport_send_reset(vsk, pkt); - return err; - } - break; - case VIRTIO_VSOCK_OP_ACK: - cookie = le32_to_cpu(pkt->hdr.flags); - err = virtio_vsock_check_cookie(le32_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.src_port), - le32_to_cpu(pkt->hdr.dst_port), - jiffies / (HZ * 60), - le32_to_cpu(pkt->hdr.flags), - VSOCK_TIMEOUT_INIT); - pr_debug("%s: cookie=%x, err=%d\n", __func__, cookie, err); - if (err) - return err; - - /* So no pending socket are responsible for this pkt, create one */ - pr_debug("%s: create pending\n", __func__); - pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, - sk->sk_type, 0); - if (!pending) { - virtio_transport_send_reset(vsk, pkt); - return -ENOMEM; - } - sk->sk_ack_backlog++; - pending->sk_state = SS_CONNECTING; - - vpending = vsock_sk(pending); - vsock_addr_init(&vpending->local_addr, le32_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.dst_port)); - vsock_addr_init(&vpending->remote_addr, le32_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.src_port)); - vsock_add_pending(sk, pending); - - pr_debug("%s: get pending\n", __func__); - pending = virtio_transport_get_pending(sk, pkt); - vpending = vsock_sk(pending); - lock_sock(pending); - switch (pending->sk_state) { - case SS_CONNECTING: - if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_ACK) { - pr_debug("%s: op=%d != OP_ACK\n", __func__, - le16_to_cpu(pkt->hdr.op)); - virtio_transport_send_reset(vpending, pkt); - pending->sk_err = EPROTO; - pending->sk_state = SS_UNCONNECTED; - sock_put(pending); - } else { - pending->sk_state = SS_CONNECTED; - vsock_insert_connected(vpending); - - vsock_remove_pending(sk, pending); - vsock_enqueue_accept(sk, pending); - - sk->sk_data_ready(sk); - } - err = 0; - break; - default: - pr_debug("%s: sk->sk_ack_backlog=%d\n", __func__, - sk->sk_ack_backlog); - virtio_transport_send_reset(vpending, pkt); - err = -EINVAL; - break; - } - if (err < 0) - vsock_remove_pending(sk, pending); - release_sock(pending); - - /* Release refcnt obtained in virtio_transport_get_pending */ - sock_put(pending); - break; - default: - break; - } - - return 0; -} - -static void virtio_transport_space_update(struct sock *sk, - struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vsk = vsock_sk(sk); - struct virtio_transport *trans = vsk->trans; - bool space_available; - - /* buf_alloc and fwd_cnt is always included in the hdr */ - mutex_lock(&trans->tx_lock); - trans->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); - trans->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); - space_available = virtio_transport_has_space(vsk); - mutex_unlock(&trans->tx_lock); - - if (space_available) - sk->sk_write_space(sk); -} - -/* We are under the virtio-vsock's vsock->rx_lock or - * vhost-vsock's vq->mutex lock */ -void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) -{ - struct virtio_transport *trans; - struct sockaddr_vm src, dst; - struct vsock_sock *vsk; - struct sock *sk; - - vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); - vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); - - virtio_vsock_dumppkt(__func__, pkt); - - if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_DGRAM) { - sk = vsock_find_unbound_socket(&dst); - if (!sk) - goto free_pkt; - - vsk = vsock_sk(sk); - trans = vsk->trans; - BUG_ON(!trans); - - virtio_transport_space_update(sk, pkt); - - lock_sock(sk); - switch (le16_to_cpu(pkt->hdr.op)) { - case VIRTIO_VSOCK_OP_CREDIT_UPDATE: - virtio_transport_free_pkt(pkt); - break; - case VIRTIO_VSOCK_OP_CREDIT_REQUEST: - virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_DGRAM, - &pkt->hdr); - virtio_transport_free_pkt(pkt); - break; - case VIRTIO_VSOCK_OP_RW: - virtio_transport_recv_dgram(sk, pkt); - break; - default: - virtio_transport_free_pkt(pkt); - break; - } - release_sock(sk); - - /* Release refcnt obtained when we fetched this socket out of - * the unbound list. - */ - sock_put(sk); - return; - } else if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) { - /* The socket must be in connected or bound table - * otherwise send reset back - */ - sk = vsock_find_connected_socket(&src, &dst); - if (!sk) { - sk = vsock_find_bound_socket(&dst); - if (!sk) { - pr_debug("%s: can not find bound_socket\n", __func__); - virtio_vsock_dumppkt(__func__, pkt); - /* Ignore this pkt instead of sending reset back */ - /* TODO send a RST unless this packet is a RST (to avoid infinite loops) */ - goto free_pkt; - } - } - - vsk = vsock_sk(sk); - trans = vsk->trans; - BUG_ON(!trans); - - virtio_transport_space_update(sk, pkt); - - lock_sock(sk); - switch (sk->sk_state) { - case VSOCK_SS_LISTEN: - virtio_transport_recv_listen(sk, pkt); - virtio_transport_free_pkt(pkt); - break; - case SS_CONNECTING: - virtio_transport_recv_connecting(sk, pkt); - virtio_transport_free_pkt(pkt); - break; - case SS_CONNECTED: - virtio_transport_recv_connected(sk, pkt); - break; - default: - virtio_transport_free_pkt(pkt); - break; - } - release_sock(sk); - - /* Release refcnt obtained when we fetched this socket out of the - * bound or connected list. - */ - sock_put(sk); - } - return; - -free_pkt: - virtio_transport_free_pkt(pkt); -} -EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); - -void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) -{ - kfree(pkt->buf); - kfree(pkt); -} -EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); - -static int __init virtio_vsock_common_init(void) -{ - get_random_bytes(vsockcookie_secret, sizeof(vsockcookie_secret)); - return 0; -} - -static void __exit virtio_vsock_common_exit(void) -{ -} - -module_init(virtio_vsock_common_init); -module_exit(virtio_vsock_common_exit); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Asias He"); -MODULE_DESCRIPTION("common code for virtio vsock"); -- cgit v1.1 From 297dbde19cf6a0ccb6fd4396c6220a5912ed61e8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 17:38:51 -0500 Subject: netprio_cgroup: limit the maximum css->id to USHRT_MAX netprio builds per-netdev contiguous priomap array which is indexed by css->id. The array is allocated using kzalloc() effectively limiting the maximum ID supported to some thousand range. This patch caps the maximum supported css->id to USHRT_MAX which should be way above what is actually useable. This allows reducing sock->sk_cgrp_prioidx to u16 from u32. The freed up part will be used to overload the cgroup related fields. sock->sk_cgrp_prioidx's position is swapped with sk_mark so that the two cgroup related fields are adjacent. Signed-off-by: Tejun Heo Acked-by: Daniel Wagner Cc: Daniel Borkmann CC: Neil Horman Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index cbd0a19..2b9159b 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -27,6 +27,12 @@ #include +/* + * netprio allocates per-net_device priomap array which is indexed by + * css->id. Limiting css ID to 16bits doesn't lose anything. + */ +#define NETPRIO_ID_MAX USHRT_MAX + #define PRIOMAP_MIN_SZ 128 /* @@ -144,6 +150,9 @@ static int cgrp_css_online(struct cgroup_subsys_state *css) struct net_device *dev; int ret = 0; + if (css->id > NETPRIO_ID_MAX) + return -ENOSPC; + if (!parent_css) return 0; -- cgit v1.1 From 2a56a1fec290bf0bc4676bbf4efdb3744953a3e7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 17:38:52 -0500 Subject: net: wrap sock->sk_cgrp_prioidx and ->sk_classid inside a struct Introduce sock->sk_cgrp_data which is a struct sock_cgroup_data. ->sk_cgroup_prioidx and ->sk_classid are moved into it. The struct and its accessors are defined in cgroup-defs.h. This is to prepare for overloading the fields with a cgroup pointer. This patch mostly performs equivalent conversions but the followings are noteworthy. * Equality test before updating classid is removed from sock_update_classid(). This shouldn't make any noticeable difference and a similar test will be implemented on the helper side later. * sock_update_netprioidx() now takes struct sock_cgroup_data and can be moved to netprio_cgroup.h without causing include dependency loop. Moved. * The dummy version of sock_update_netprioidx() converted to a static inline function while at it. Signed-off-by: Tejun Heo Signed-off-by: David S. Miller --- net/Kconfig | 6 ++++++ net/core/dev.c | 3 ++- net/core/netclassid_cgroup.c | 4 ++-- net/core/netprio_cgroup.c | 3 ++- net/core/scm.c | 4 ++-- net/core/sock.c | 15 ++------------- net/netfilter/nft_meta.c | 2 +- net/netfilter/xt_cgroup.c | 3 ++- 8 files changed, 19 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 127da94..11f8c22 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -250,9 +250,14 @@ config XPS depends on SMP default y +config SOCK_CGROUP_DATA + bool + default n + config CGROUP_NET_PRIO bool "Network priority cgroup" depends on CGROUPS + select SOCK_CGROUP_DATA ---help--- Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis. @@ -260,6 +265,7 @@ config CGROUP_NET_PRIO config CGROUP_NET_CLASSID bool "Network classid cgroup" depends on CGROUPS + select SOCK_CGROUP_DATA ---help--- Cgroup subsystem for use as general purpose socket classid marker that is being used in cls_cgroup and for netfilter matching. diff --git a/net/core/dev.c b/net/core/dev.c index e5c3954..8f705fc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2929,7 +2929,8 @@ static void skb_update_prio(struct sk_buff *skb) struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); if (!skb->priority && skb->sk && map) { - unsigned int prioidx = skb->sk->sk_cgrp_prioidx; + unsigned int prioidx = + sock_cgroup_prioidx(&skb->sk->sk_cgrp_data); if (prioidx < map->priomap_len) skb->priority = map->priomap[prioidx]; diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 2e4df84..e60ded4 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -62,8 +62,8 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n) struct socket *sock = sock_from_file(file, &err); if (sock) - sock->sk->sk_classid = (u32)(unsigned long)v; - + sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, + (unsigned long)v); return 0; } diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 2b9159b..de42aa7 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -223,7 +223,8 @@ static int update_netprio(const void *v, struct file *file, unsigned n) int err; struct socket *sock = sock_from_file(file, &err); if (sock) - sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v; + sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, + (unsigned long)v); return 0; } diff --git a/net/core/scm.c b/net/core/scm.c index 8a1741b..14596fb 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -289,8 +289,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) /* Bump the usage count and install the file. */ sock = sock_from_file(fp[i], &err); if (sock) { - sock_update_netprioidx(sock->sk); - sock_update_classid(sock->sk); + sock_update_netprioidx(&sock->sk->sk_cgrp_data); + sock_update_classid(&sock->sk->sk_cgrp_data); } fd_install(new_fd, get_file(fp[i])); } diff --git a/net/core/sock.c b/net/core/sock.c index 7965ef4..947741d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1393,17 +1393,6 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) module_put(owner); } -#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) -void sock_update_netprioidx(struct sock *sk) -{ - if (in_interrupt()) - return; - - sk->sk_cgrp_prioidx = task_netprioidx(current); -} -EXPORT_SYMBOL_GPL(sock_update_netprioidx); -#endif - /** * sk_alloc - All socket objects are allocated here * @net: the applicable net namespace @@ -1432,8 +1421,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, net); atomic_set(&sk->sk_wmem_alloc, 1); - sock_update_classid(sk); - sock_update_netprioidx(sk); + sock_update_classid(&sk->sk_cgrp_data); + sock_update_netprioidx(&sk->sk_cgrp_data); } return sk; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 9dfaf4d..1915cab 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -174,7 +174,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, sk = skb_to_full_sk(skb); if (!sk || !sk_fullsock(sk)) goto err; - *dest = sk->sk_classid; + *dest = sock_cgroup_classid(&sk->sk_cgrp_data); break; #endif default: diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index a1d126f..54eaeb4 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -42,7 +42,8 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) if (skb->sk == NULL || !sk_fullsock(skb->sk)) return false; - return (info->id == skb->sk->sk_classid) ^ info->invert; + return (info->id == sock_cgroup_classid(&skb->sk->sk_cgrp_data)) ^ + info->invert; } static struct xt_match cgroup_mt_reg __read_mostly = { -- cgit v1.1 From bd1060a1d67128bb8fbe2e1384c518912cbe54e7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 17:38:53 -0500 Subject: sock, cgroup: add sock->sk_cgroup In cgroup v1, dealing with cgroup membership was difficult because the number of membership associations was unbound. As a result, cgroup v1 grew several controllers whose primary purpose is either tagging membership or pull in configuration knobs from other subsystems so that cgroup membership test can be avoided. net_cls and net_prio controllers are examples of the latter. They allow configuring network-specific attributes from cgroup side so that network subsystem can avoid testing cgroup membership; unfortunately, these are not only cumbersome but also problematic. Both net_cls and net_prio aren't properly hierarchical. Both inherit configuration from the parent on creation but there's no interaction afterwards. An ancestor doesn't restrict the behavior in its subtree in anyway and configuration changes aren't propagated downwards. Especially when combined with cgroup delegation, this is problematic because delegatees can mess up whatever network configuration implemented at the system level. net_prio would allow the delegatees to set whatever priority value regardless of CAP_NET_ADMIN and net_cls the same for classid. While it is possible to solve these issues from controller side by implementing hierarchical allowable ranges in both controllers, it would involve quite a bit of complexity in the controllers and further obfuscate network configuration as it becomes even more difficult to tell what's actually being configured looking from the network side. While not much can be done for v1 at this point, as membership handling is sane on cgroup v2, it'd be better to make cgroup matching behave like other network matches and classifiers than introducing further complications. In preparation, this patch updates sock->sk_cgrp_data handling so that it points to the v2 cgroup that sock was created in until either net_prio or net_cls is used. Once either of the two is used, sock->sk_cgrp_data reverts to its previous role of carrying prioidx and classid. This is to avoid adding yet another cgroup related field to struct sock. As the mode switching can happen at most once per boot, the switching mechanism is aimed at lowering hot path overhead. It may leak a finite, likely small, number of cgroup refs and report spurious prioidx or classid on switching; however, dynamic updates of prioidx and classid have always been racy and lossy - socks between creation and fd installation are never updated, config changes don't update existing sockets at all, and prioidx may index with dead and recycled cgroup IDs. Non-critical inaccuracies from small race windows won't make any noticeable difference. This patch doesn't make use of the pointer yet. The following patch will implement netfilter match for cgroup2 membership. v2: Use sock_cgroup_data to avoid inflating struct sock w/ another cgroup specific field. v3: Add comments explaining why sock_data_prioidx() and sock_data_classid() use different fallback values. Signed-off-by: Tejun Heo Cc: Daniel Borkmann Cc: Daniel Wagner CC: Neil Horman Signed-off-by: David S. Miller --- net/core/netclassid_cgroup.c | 7 ++++++- net/core/netprio_cgroup.c | 7 ++++++- net/core/sock.c | 2 ++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index e60ded4..04257a0 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -61,9 +61,12 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n) int err; struct socket *sock = sock_from_file(file, &err); - if (sock) + if (sock) { + spin_lock(&cgroup_sk_update_lock); sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, (unsigned long)v); + spin_unlock(&cgroup_sk_update_lock); + } return 0; } @@ -98,6 +101,8 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, { struct cgroup_cls_state *cs = css_cls_state(css); + cgroup_sk_alloc_disable(); + cs->classid = (u32)value; update_classid(css, (void *)(unsigned long)cs->classid); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index de42aa7..053d60c 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -209,6 +209,8 @@ static ssize_t write_priomap(struct kernfs_open_file *of, if (!dev) return -ENODEV; + cgroup_sk_alloc_disable(); + rtnl_lock(); ret = netprio_set_prio(of_css(of), dev, prio); @@ -222,9 +224,12 @@ static int update_netprio(const void *v, struct file *file, unsigned n) { int err; struct socket *sock = sock_from_file(file, &err); - if (sock) + if (sock) { + spin_lock(&cgroup_sk_update_lock); sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, (unsigned long)v); + spin_unlock(&cgroup_sk_update_lock); + } return 0; } diff --git a/net/core/sock.c b/net/core/sock.c index 947741d..1278d7b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1363,6 +1363,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, if (!try_module_get(prot->owner)) goto out_free_sec; sk_tx_queue_clear(sk); + cgroup_sk_alloc(&sk->sk_cgrp_data); } return sk; @@ -1385,6 +1386,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) owner = prot->owner; slab = prot->slab; + cgroup_sk_free(&sk->sk_cgrp_data); security_sk_free(sk); if (slab != NULL) kmem_cache_free(slab, sk); -- cgit v1.1 From 33d5a7b14bfd02e60af9d223db8dfff0cbcabe6b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 28 Nov 2015 21:53:04 +0100 Subject: netfilter: nf_tables: extend tracing infrastructure nft monitor mode can then decode and display this trace data. Parts of LL/Network/Transport headers are provided as separate attributes. Otherwise, printing IP address data becomes virtually impossible for userspace since in the case of the netdev family we really don't want userspace to have to know all the possible link layer types and/or sizes just to display/print an ip address. We also don't want userspace to have to follow ipv6 header chains to get the s/dport info, the kernel already did this work for us. To avoid bloating nft_do_chain all data required for tracing is encapsulated in nft_traceinfo. The structure is initialized unconditionally(!) for each nft_do_chain invocation. This unconditionall call will be moved under a static key in a followup patch. With lots of help from Patrick McHardy and Pablo Neira. Signed-off-by: Florian Westphal Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Makefile | 2 +- net/netfilter/nf_tables_api.c | 12 +- net/netfilter/nf_tables_core.c | 45 +++++-- net/netfilter/nf_tables_trace.c | 271 ++++++++++++++++++++++++++++++++++++++++ net/netfilter/nfnetlink.c | 1 + 5 files changed, 312 insertions(+), 19 deletions(-) create mode 100644 net/netfilter/nf_tables_trace.c (limited to 'net') diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 7638c36..2293484 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -67,7 +67,7 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o # nf_tables -nf_tables-objs += nf_tables_core.o nf_tables_api.o +nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 93cc473..c4969a0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4446,22 +4446,22 @@ static void nft_verdict_uninit(const struct nft_data *data) } } -static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data) +int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v) { struct nlattr *nest; - nest = nla_nest_start(skb, NFTA_DATA_VERDICT); + nest = nla_nest_start(skb, type); if (!nest) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict.code))) + if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(v->code))) goto nla_put_failure; - switch (data->verdict.code) { + switch (v->code) { case NFT_JUMP: case NFT_GOTO: if (nla_put_string(skb, NFTA_VERDICT_CHAIN, - data->verdict.chain->name)) + v->chain->name)) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -4572,7 +4572,7 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, err = nft_value_dump(skb, data, len); break; case NFT_DATA_VERDICT: - err = nft_verdict_dump(skb, data); + err = nft_verdict_dump(skb, NFTA_DATA_VERDICT, &data->verdict); break; default: err = -EINVAL; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index f3695a4..2395de7 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -44,22 +44,36 @@ static struct nf_loginfo trace_loginfo = { }, }; -static void __nft_trace_packet(const struct nft_pktinfo *pkt, - const struct nft_chain *chain, - int rulenum, enum nft_trace type) +static noinline void __nft_trace_packet(struct nft_traceinfo *info, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) { + const struct nft_pktinfo *pkt = info->pkt; + + if (!pkt->skb->nf_trace) + return; + + info->chain = chain; + info->type = type; + + nft_trace_notify(info); + nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", chain->table->name, chain->name, comments[type], rulenum); } -static inline void nft_trace_packet(const struct nft_pktinfo *pkt, +static inline void nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, - int rulenum, enum nft_trace type) + const struct nft_rule *rule, + int rulenum, + enum nft_trace_types type) { - if (unlikely(pkt->skb->nf_trace)) - __nft_trace_packet(pkt, chain, rulenum, type); + if (unlikely(info->trace)) { + info->rule = rule; + __nft_trace_packet(info, chain, rulenum, type); + } } static void nft_cmp_fast_eval(const struct nft_expr *expr, @@ -121,7 +135,9 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) struct nft_stats *stats; int rulenum; unsigned int gencursor = nft_genmask_cur(net); + struct nft_traceinfo info; + nft_trace_init(&info, pkt, ®s.verdict, basechain); do_chain: rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); @@ -151,7 +167,8 @@ next_rule: regs.verdict.code = NFT_CONTINUE; continue; case NFT_CONTINUE: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); continue; } break; @@ -161,7 +178,8 @@ next_rule: case NF_ACCEPT: case NF_DROP: case NF_QUEUE: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); return regs.verdict.code; } @@ -174,7 +192,8 @@ next_rule: stackptr++; /* fall through */ case NFT_GOTO: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); chain = regs.verdict.chain; goto do_chain; @@ -182,7 +201,8 @@ next_rule: rulenum++; /* fall through */ case NFT_RETURN: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RETURN); break; default: WARN_ON(1); @@ -196,7 +216,8 @@ next_rule: goto next_rule; } - nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY); + nft_trace_packet(&info, basechain, NULL, -1, + NFT_TRACETYPE_POLICY); rcu_read_lock_bh(); stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats)); diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c new file mode 100644 index 0000000..36fd7ad --- /dev/null +++ b/net/netfilter/nf_tables_trace.c @@ -0,0 +1,271 @@ +/* + * (C) 2015 Red Hat GmbH + * Author: Florian Westphal + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NFT_TRACETYPE_LL_HSIZE 20 +#define NFT_TRACETYPE_NETWORK_HSIZE 40 +#define NFT_TRACETYPE_TRANSPORT_HSIZE 20 + +static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) +{ + __be32 id; + + /* using skb address as ID results in a limited number of + * values (and quick reuse). + * + * So we attempt to use as many skb members that will not + * change while skb is with netfilter. + */ + id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb), + skb->skb_iif); + + return nla_put_be32(nlskb, NFTA_TRACE_ID, id); +} + +static int trace_fill_header(struct sk_buff *nlskb, u16 type, + const struct sk_buff *skb, + int off, unsigned int len) +{ + struct nlattr *nla; + + if (len == 0) + return 0; + + nla = nla_reserve(nlskb, type, len); + if (!nla || skb_copy_bits(skb, off, nla_data(nla), len)) + return -1; + + return 0; +} + +static int nf_trace_fill_ll_header(struct sk_buff *nlskb, + const struct sk_buff *skb) +{ + struct vlan_ethhdr veth; + int off; + + BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE); + + off = skb_mac_header(skb) - skb->data; + if (off != -ETH_HLEN) + return -1; + + if (skb_copy_bits(skb, off, &veth, ETH_HLEN)) + return -1; + + veth.h_vlan_proto = skb->vlan_proto; + veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); + veth.h_vlan_encapsulated_proto = skb->protocol; + + return nla_put(nlskb, NFTA_TRACE_LL_HEADER, sizeof(veth), &veth); +} + +static int nf_trace_fill_dev_info(struct sk_buff *nlskb, + const struct net_device *indev, + const struct net_device *outdev) +{ + if (indev) { + if (nla_put_be32(nlskb, NFTA_TRACE_IIF, + htonl(indev->ifindex))) + return -1; + + if (nla_put_be16(nlskb, NFTA_TRACE_IIFTYPE, + htons(indev->type))) + return -1; + } + + if (outdev) { + if (nla_put_be32(nlskb, NFTA_TRACE_OIF, + htonl(outdev->ifindex))) + return -1; + + if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE, + htons(outdev->type))) + return -1; + } + + return 0; +} + +static int nf_trace_fill_pkt_info(struct sk_buff *nlskb, + const struct nft_pktinfo *pkt) +{ + const struct sk_buff *skb = pkt->skb; + unsigned int len = min_t(unsigned int, + pkt->xt.thoff - skb_network_offset(skb), + NFT_TRACETYPE_NETWORK_HSIZE); + int off = skb_network_offset(skb); + + if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len)) + return -1; + + len = min_t(unsigned int, skb->len - pkt->xt.thoff, + NFT_TRACETYPE_TRANSPORT_HSIZE); + + if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb, + pkt->xt.thoff, len)) + return -1; + + if (!skb_mac_header_was_set(skb)) + return 0; + + if (skb_vlan_tag_get(skb)) + return nf_trace_fill_ll_header(nlskb, skb); + + off = skb_mac_header(skb) - skb->data; + len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE); + return trace_fill_header(nlskb, NFTA_TRACE_LL_HEADER, + skb, off, len); +} + +static int nf_trace_fill_rule_info(struct sk_buff *nlskb, + const struct nft_traceinfo *info) +{ + if (!info->rule) + return 0; + + /* a continue verdict with ->type == RETURN means that this is + * an implicit return (end of chain reached). + * + * Since no rule matched, the ->rule pointer is invalid. + */ + if (info->type == NFT_TRACETYPE_RETURN && + info->verdict->code == NFT_CONTINUE) + return 0; + + return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE, + cpu_to_be64(info->rule->handle)); +} + +void nft_trace_notify(struct nft_traceinfo *info) +{ + const struct nft_pktinfo *pkt = info->pkt; + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + struct sk_buff *skb; + unsigned int size; + int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE; + + if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE)) + return; + + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + + nla_total_size(NFT_TABLE_MAXNAMELEN) + + nla_total_size(NFT_CHAIN_MAXNAMELEN) + + nla_total_size(sizeof(__be64)) + /* rule handle */ + nla_total_size(sizeof(__be32)) + /* trace type */ + nla_total_size(0) + /* VERDICT, nested */ + nla_total_size(sizeof(u32)) + /* verdict code */ + nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */ + nla_total_size(sizeof(u32)) + /* id */ + nla_total_size(NFT_TRACETYPE_LL_HSIZE) + + nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) + + nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) + + nla_total_size(sizeof(u32)) + /* iif */ + nla_total_size(sizeof(__be16)) + /* iiftype */ + nla_total_size(sizeof(u32)) + /* oif */ + nla_total_size(sizeof(__be16)) + /* oiftype */ + nla_total_size(sizeof(u32)) + /* mark */ + nla_total_size(sizeof(u32)) + /* nfproto */ + nla_total_size(sizeof(u32)); /* policy */ + + skb = nlmsg_new(size, GFP_ATOMIC); + if (!skb) + return; + + nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0); + if (!nlh) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = info->basechain->type->family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf))) + goto nla_put_failure; + + if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) + goto nla_put_failure; + + if (trace_fill_id(skb, pkt->skb)) + goto nla_put_failure; + + if (info->chain) { + if (nla_put_string(skb, NFTA_TRACE_CHAIN, + info->chain->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_TRACE_TABLE, + info->chain->table->name)) + goto nla_put_failure; + } + + if (nf_trace_fill_rule_info(skb, info)) + goto nla_put_failure; + + switch (info->type) { + case NFT_TRACETYPE_UNSPEC: + case __NFT_TRACETYPE_MAX: + break; + case NFT_TRACETYPE_RETURN: + case NFT_TRACETYPE_RULE: + if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict)) + goto nla_put_failure; + break; + case NFT_TRACETYPE_POLICY: + if (nla_put_be32(skb, NFTA_TRACE_POLICY, + info->basechain->policy)) + goto nla_put_failure; + break; + } + + if (pkt->skb->mark && + nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark))) + goto nla_put_failure; + + if (!info->packet_dumped) { + if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out)) + goto nla_put_failure; + + if (nf_trace_fill_pkt_info(skb, pkt)) + goto nla_put_failure; + info->packet_dumped = true; + } + + nlmsg_end(skb, nlh); + nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC); + return; + + nla_put_failure: + WARN_ON_ONCE(1); + kfree_skb(skb); +} + +void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, + const struct nft_verdict *verdict, + const struct nft_chain *chain) +{ + info->basechain = nft_base_chain(chain); + info->trace = true; + info->packet_dumped = false; + info->pkt = pkt; + info->verdict = verdict; +} diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 46453ab..28591fa 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -49,6 +49,7 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP, [NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES, [NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT, + [NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES, }; void nfnl_lock(__u8 subsys_id) -- cgit v1.1 From e639f7ab079b5256660018511d87aa34b54f1a9d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 28 Nov 2015 21:53:05 +0100 Subject: netfilter: nf_tables: wrap tracing with a static key Only needed when meta nftrace rule(s) were added. The assumption is that no such rules are active, so the call to nft_trace_init is "never" needed. When nftrace rules are active, we always call the nft_trace_* functions, but will only send netlink messages when all of the following are true: - traceinfo structure was initialised - skb->nf_trace == 1 - at least one subscriber to trace group. Adding an extra conditional (static_branch ... && skb->nf_trace) nft_trace_init( ..) Is possible but results in a larger nft_do_chain footprint. Signed-off-by: Florian Westphal Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nft_meta_bridge.c | 1 + net/netfilter/nf_tables_core.c | 9 ++++++--- net/netfilter/nf_tables_trace.c | 4 ++++ net/netfilter/nft_meta.c | 16 ++++++++++++++++ 4 files changed, 27 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index a21269b..4b901d9 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -84,6 +84,7 @@ static const struct nft_expr_ops nft_meta_bridge_set_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_set_eval, .init = nft_meta_set_init, + .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, }; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 2395de7..67fa41d 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -50,7 +51,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info, { const struct nft_pktinfo *pkt = info->pkt; - if (!pkt->skb->nf_trace) + if (!info->trace || !pkt->skb->nf_trace) return; info->chain = chain; @@ -70,7 +71,7 @@ static inline void nft_trace_packet(struct nft_traceinfo *info, int rulenum, enum nft_trace_types type) { - if (unlikely(info->trace)) { + if (static_branch_unlikely(&nft_trace_enabled)) { info->rule = rule; __nft_trace_packet(info, chain, rulenum, type); } @@ -137,7 +138,9 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) unsigned int gencursor = nft_genmask_cur(net); struct nft_traceinfo info; - nft_trace_init(&info, pkt, ®s.verdict, basechain); + info.trace = false; + if (static_branch_unlikely(&nft_trace_enabled)) + nft_trace_init(&info, pkt, ®s.verdict, basechain); do_chain: rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 36fd7ad..e9e959f 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -24,6 +25,9 @@ #define NFT_TRACETYPE_NETWORK_HSIZE 40 #define NFT_TRACETYPE_TRANSPORT_HSIZE 20 +DEFINE_STATIC_KEY_FALSE(nft_trace_enabled); +EXPORT_SYMBOL_GPL(nft_trace_enabled); + static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) { __be32 id; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 9dfaf4d..85a465b 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -18,10 +18,12 @@ #include #include #include +#include #include #include #include /* for TCP_TIME_WAIT */ #include +#include #include void nft_meta_get_eval(const struct nft_expr *expr, @@ -297,6 +299,9 @@ int nft_meta_set_init(const struct nft_ctx *ctx, if (err < 0) return err; + if (priv->key == NFT_META_NFTRACE) + static_branch_inc(&nft_trace_enabled); + return 0; } EXPORT_SYMBOL_GPL(nft_meta_set_init); @@ -334,6 +339,16 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_meta_set_dump); +void nft_meta_set_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + + if (priv->key == NFT_META_NFTRACE) + static_branch_dec(&nft_trace_enabled); +} +EXPORT_SYMBOL_GPL(nft_meta_set_destroy); + static struct nft_expr_type nft_meta_type; static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, @@ -348,6 +363,7 @@ static const struct nft_expr_ops nft_meta_set_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_set_eval, .init = nft_meta_set_init, + .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, }; -- cgit v1.1 From e97ac12859dbf4d3ee0eddb9798867541d1d1e1e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Dec 2015 23:35:19 +0100 Subject: netfilter: ipv6: nf_defrag: fix NULL deref panic Valdis reports NULL deref in nf_ct_frag6_gather. Problem is bogus use of skb_queue_walk() -- we miss first skb in the list since we start with head->next instead of head. In case the element we're looking for was head->next we won't find a result and then trip over NULL iter. (defrag uses plain NULL-terminated list rather than one terminated by head-of-list-pointer, which is what skb_queue_walk expects). Fixes: 029f7f3b8701cc7a ("netfilter: ipv6: nf_defrag: avoid/free clone operations") Reported-by: Valdis Kletnieks Tested-by: Valdis Kletnieks Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_conntrack_reasm.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 912bc3a..6e5f0e0d 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -441,11 +441,14 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic return false; fp->next = prev->next; - skb_queue_walk(head, iter) { - if (iter->next != prev) - continue; - iter->next = fp; - break; + + iter = head; + while (iter) { + if (iter->next == prev) { + iter->next = fp; + break; + } + iter = iter->next; } skb_morph(prev, head); -- cgit v1.1 From 23509fcd4ec5eadcca7a958b354f79dedc2765cc Mon Sep 17 00:00:00 2001 From: "Rosen, Rami" Date: Tue, 8 Dec 2015 07:09:24 -0500 Subject: netfilter: nfnetlink_log: Change setter functions to be void Change return type of nfulnl_set_timeout() and nfulnl_set_qthresh() to be void. This patch changes the return type of the static methods nfulnl_set_timeout() and nfulnl_set_qthresh() to be void, as there is no justification and no need for these methods to return int. Signed-off-by: Rami Rosen Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index dea4676..70b6bd3 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -293,24 +293,20 @@ nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz) return status; } -static int +static void nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout) { spin_lock_bh(&inst->lock); inst->flushtimeout = timeout; spin_unlock_bh(&inst->lock); - - return 0; } -static int +static void nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh) { spin_lock_bh(&inst->lock); inst->qthreshold = qthresh; spin_unlock_bh(&inst->lock); - - return 0; } static int -- cgit v1.1 From 9fb0b519c7e094e741a3fc3fd4d854a8bc74b6dc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 9 Dec 2015 16:31:21 +0100 Subject: netfilter: nf_tables: fix nf_log_trace based tracing nf_log_trace() outputs bogus 'TRACE:' strings because I forgot to update the comments array. Fixes: 33d5a7b14bfd0 ("netfilter: nf_tables: extend tracing infrastructure") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 67fa41d..e9f8dff 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -23,16 +23,10 @@ #include #include -enum nft_trace { - NFT_TRACE_RULE, - NFT_TRACE_RETURN, - NFT_TRACE_POLICY, -}; - -static const char *const comments[] = { - [NFT_TRACE_RULE] = "rule", - [NFT_TRACE_RETURN] = "return", - [NFT_TRACE_POLICY] = "policy", +static const char *const comments[__NFT_TRACETYPE_MAX] = { + [NFT_TRACETYPE_POLICY] = "policy", + [NFT_TRACETYPE_RETURN] = "return", + [NFT_TRACETYPE_RULE] = "rule", }; static struct nf_loginfo trace_loginfo = { @@ -47,7 +41,7 @@ static struct nf_loginfo trace_loginfo = { static noinline void __nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, - int rulenum, enum nft_trace type) + int rulenum, enum nft_trace_types type) { const struct nft_pktinfo *pkt = info->pkt; -- cgit v1.1 From 01b1cb87d37fb19cdaa5e7002416fdde156873d0 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 16 Nov 2015 12:52:21 +0200 Subject: Bluetooth: Run page scan updates through hdev->req_workqueue Since Add/Remove Device perform the page scan updates independently from the HCI command completion we've introduced a potential race when multiple mgmt commands are queued. Doing the page scan updates through the req_workqueue ensures that the state changes are performed in a race-free manner. At the same time, to make the request helper more widely usable, extend it to also cover Inquiry Scan changes since those are behind the same HCI command. This is also reflected in the new name of the API as well as the work struct name. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 4 ++-- net/bluetooth/hci_request.c | 27 ++++++++++++++++++--------- net/bluetooth/hci_request.h | 8 ++++++-- net/bluetooth/mgmt.c | 16 ++++++++-------- 4 files changed, 34 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d57c11c..703e37f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2176,7 +2176,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES, sizeof(cp), &cp); - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); } /* Set packet type for incoming connection */ @@ -2362,7 +2362,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (test_bit(HCI_CONN_FLUSH_KEY, &conn->flags)) hci_remove_link_key(hdev, &conn->dst); - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); } params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e639671..78c026b 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -637,7 +637,7 @@ static bool disconnected_whitelist_entries(struct hci_dev *hdev) return false; } -void __hci_update_page_scan(struct hci_request *req) +void __hci_req_update_scan(struct hci_request *req) { struct hci_dev *hdev = req->hdev; u8 scan; @@ -657,22 +657,29 @@ void __hci_update_page_scan(struct hci_request *req) else scan = SCAN_DISABLED; - if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE)) - return; - if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) scan |= SCAN_INQUIRY; + if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE) && + test_bit(HCI_ISCAN, &hdev->flags) == !!(scan & SCAN_INQUIRY)) + return; + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } -void hci_update_page_scan(struct hci_dev *hdev) +static int update_scan(struct hci_request *req, unsigned long opt) { - struct hci_request req; + hci_dev_lock(req->hdev); + __hci_req_update_scan(req); + hci_dev_unlock(req->hdev); + return 0; +} - hci_req_init(&req, hdev); - __hci_update_page_scan(&req); - hci_req_run(&req, NULL); +static void scan_update_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, scan_update); + + hci_req_sync(hdev, update_scan, 0, HCI_CMD_TIMEOUT, NULL); } /* This function controls the background scanning based on hdev->pend_le_conns @@ -1270,6 +1277,7 @@ void hci_request_setup(struct hci_dev *hdev) { INIT_WORK(&hdev->discov_update, discov_update); INIT_WORK(&hdev->bg_scan_update, bg_scan_update); + INIT_WORK(&hdev->scan_update, scan_update_work); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); } @@ -1280,6 +1288,7 @@ void hci_request_cancel_all(struct hci_dev *hdev) cancel_work_sync(&hdev->discov_update); cancel_work_sync(&hdev->bg_scan_update); + cancel_work_sync(&hdev->scan_update); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 6b9e59f..cc82755 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -61,8 +61,12 @@ void hci_req_add_le_passive_scan(struct hci_request *req); /* Returns true if HCI commands were queued */ bool hci_req_stop_discovery(struct hci_request *req); -void hci_update_page_scan(struct hci_dev *hdev); -void __hci_update_page_scan(struct hci_request *req); +static inline void hci_req_update_scan(struct hci_dev *hdev) +{ + queue_work(hdev->req_workqueue, &hdev->scan_update); +} + +void __hci_req_update_scan(struct hci_request *req); int hci_update_random_address(struct hci_request *req, bool require_privacy, u8 *own_addr_type); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3d9d2e4..0d20e13 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1810,7 +1810,7 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status, * entries. */ hci_req_init(&req, hdev); - __hci_update_page_scan(&req); + __hci_req_update_scan(&req); update_class(&req); hci_req_run(&req, NULL); @@ -2058,7 +2058,7 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status, if (conn_changed || discov_changed) { new_settings(hdev, cmd->sk); - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); if (discov_changed) mgmt_update_adv_data(hdev); hci_update_background_scan(hdev); @@ -2092,7 +2092,7 @@ static int set_connectable_update_settings(struct hci_dev *hdev, return err; if (changed) { - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); hci_update_background_scan(hdev); return new_settings(hdev, sk); } @@ -5041,7 +5041,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_req_init(&req, hdev); write_fast_connectable(&req, false); - __hci_update_page_scan(&req); + __hci_req_update_scan(&req); /* Since only the advertising data flags will change, there * is no need to update the scan response data. @@ -5927,7 +5927,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, if (err) goto unlock; - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); goto added; } @@ -6024,7 +6024,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, goto unlock; } - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); @@ -6089,7 +6089,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, kfree(b); } - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) { if (p->auto_connect == HCI_AUTO_CONN_DISABLED) @@ -7397,7 +7397,7 @@ static int powered_update_hci(struct hci_dev *hdev) write_fast_connectable(&req, true); else write_fast_connectable(&req, false); - __hci_update_page_scan(&req); + __hci_req_update_scan(&req); update_class(&req); update_name(&req); update_eir(&req); -- cgit v1.1 From 196a5e97d13092f783e41001c1112d7f31518ea2 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sun, 22 Nov 2015 18:55:44 +0200 Subject: Bluetooth: Move __hci_update_background_scan up in hci_request.c This way we avoid the need to do a forward declaration in later patches. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 146 ++++++++++++++++++++++---------------------- 1 file changed, 73 insertions(+), 73 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 78c026b..7c85435 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -346,6 +346,79 @@ void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, hci_req_add_ev(req, opcode, plen, param, 0); } +/* This function controls the background scanning based on hdev->pend_le_conns + * list. If there are pending LE connection we start the background scanning, + * otherwise we stop it. + * + * This function requires the caller holds hdev->lock. + */ +static void __hci_update_background_scan(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + + if (!test_bit(HCI_UP, &hdev->flags) || + test_bit(HCI_INIT, &hdev->flags) || + hci_dev_test_flag(hdev, HCI_SETUP) || + hci_dev_test_flag(hdev, HCI_CONFIG) || + hci_dev_test_flag(hdev, HCI_AUTO_OFF) || + hci_dev_test_flag(hdev, HCI_UNREGISTER)) + return; + + /* No point in doing scanning if LE support hasn't been enabled */ + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return; + + /* If discovery is active don't interfere with it */ + if (hdev->discovery.state != DISCOVERY_STOPPED) + return; + + /* Reset RSSI and UUID filters when starting background scanning + * since these filters are meant for service discovery only. + * + * The Start Discovery and Start Service Discovery operations + * ensure to set proper values for RSSI threshold and UUID + * filter list. So it is safe to just reset them here. + */ + hci_discovery_filter_clear(hdev); + + if (list_empty(&hdev->pend_le_conns) && + list_empty(&hdev->pend_le_reports)) { + /* If there is no pending LE connections or devices + * to be scanned for, we should stop the background + * scanning. + */ + + /* If controller is not scanning we are done. */ + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) + return; + + hci_req_add_le_scan_disable(req); + + BT_DBG("%s stopping background scanning", hdev->name); + } else { + /* If there is at least one pending LE connection, we should + * keep the background scan running. + */ + + /* If controller is connecting, we should not start scanning + * since some controllers are not able to scan and connect at + * the same time. + */ + if (hci_lookup_le_connect(hdev)) + return; + + /* If controller is currently scanning, we stop it to ensure we + * don't miss any advertising (due to duplicates filter). + */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) + hci_req_add_le_scan_disable(req); + + hci_req_add_le_passive_scan(req); + + BT_DBG("%s starting background scanning", hdev->name); + } +} + void hci_req_add_le_scan_disable(struct hci_request *req) { struct hci_cp_le_set_scan_enable cp; @@ -682,79 +755,6 @@ static void scan_update_work(struct work_struct *work) hci_req_sync(hdev, update_scan, 0, HCI_CMD_TIMEOUT, NULL); } -/* This function controls the background scanning based on hdev->pend_le_conns - * list. If there are pending LE connection we start the background scanning, - * otherwise we stop it. - * - * This function requires the caller holds hdev->lock. - */ -static void __hci_update_background_scan(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - - if (!test_bit(HCI_UP, &hdev->flags) || - test_bit(HCI_INIT, &hdev->flags) || - hci_dev_test_flag(hdev, HCI_SETUP) || - hci_dev_test_flag(hdev, HCI_CONFIG) || - hci_dev_test_flag(hdev, HCI_AUTO_OFF) || - hci_dev_test_flag(hdev, HCI_UNREGISTER)) - return; - - /* No point in doing scanning if LE support hasn't been enabled */ - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - return; - - /* If discovery is active don't interfere with it */ - if (hdev->discovery.state != DISCOVERY_STOPPED) - return; - - /* Reset RSSI and UUID filters when starting background scanning - * since these filters are meant for service discovery only. - * - * The Start Discovery and Start Service Discovery operations - * ensure to set proper values for RSSI threshold and UUID - * filter list. So it is safe to just reset them here. - */ - hci_discovery_filter_clear(hdev); - - if (list_empty(&hdev->pend_le_conns) && - list_empty(&hdev->pend_le_reports)) { - /* If there is no pending LE connections or devices - * to be scanned for, we should stop the background - * scanning. - */ - - /* If controller is not scanning we are done. */ - if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) - return; - - hci_req_add_le_scan_disable(req); - - BT_DBG("%s stopping background scanning", hdev->name); - } else { - /* If there is at least one pending LE connection, we should - * keep the background scan running. - */ - - /* If controller is connecting, we should not start scanning - * since some controllers are not able to scan and connect at - * the same time. - */ - if (hci_lookup_le_connect(hdev)) - return; - - /* If controller is currently scanning, we stop it to ensure we - * don't miss any advertising (due to duplicates filter). - */ - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) - hci_req_add_le_scan_disable(req); - - hci_req_add_le_passive_scan(req); - - BT_DBG("%s starting background scanning", hdev->name); - } -} - void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason) { -- cgit v1.1 From f22525700b2ae34eb97a29a91e2eee902062b484 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 18 Nov 2015 12:49:20 +0200 Subject: Bluetooth: Move advertising instance management to hci_request.c This paves the way for eventually performing advertising changes through the hdev->req_workqueue. Some new APIs need to be exposed from mgmt.c to hci_request.c and vice-versa, but many of them will go away once hdev->req_workqueue gets used. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 2 +- net/bluetooth/hci_core.c | 19 +- net/bluetooth/hci_event.c | 4 +- net/bluetooth/hci_request.c | 533 +++++++++++++++++++++++++++++++++++++++- net/bluetooth/hci_request.h | 14 ++ net/bluetooth/mgmt.c | 574 +++----------------------------------------- 6 files changed, 583 insertions(+), 563 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2d334e0..e260021 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -683,7 +683,7 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) /* Re-enable advertising in case this was a failed connection * attempt as a peripheral. */ - mgmt_reenable_advertising(hdev); + hci_req_reenable_advertising(hdev); } static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 89af7e4..bab8958 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1549,11 +1549,6 @@ int hci_dev_do_close(struct hci_dev *hdev) if (hci_dev_test_flag(hdev, HCI_MGMT)) cancel_delayed_work_sync(&hdev->rpa_expired); - if (hdev->adv_instance_timeout) { - cancel_delayed_work_sync(&hdev->adv_instance_expire); - hdev->adv_instance_timeout = 0; - } - /* Avoid potential lockdep warnings from the *_flush() calls by * ensuring the workqueue is empty up front. */ @@ -1774,7 +1769,7 @@ static void hci_update_scan_state(struct hci_dev *hdev, u8 scan) hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - mgmt_update_adv_data(hdev); + hci_req_update_adv_data(hdev, HCI_ADV_CURRENT); mgmt_new_settings(hdev); } @@ -2112,17 +2107,6 @@ static void hci_discov_off(struct work_struct *work) mgmt_discoverable_timeout(hdev); } -static void hci_adv_timeout_expire(struct work_struct *work) -{ - struct hci_dev *hdev; - - hdev = container_of(work, struct hci_dev, adv_instance_expire.work); - - BT_DBG("%s", hdev->name); - - mgmt_adv_timeout_expired(hdev); -} - void hci_uuids_clear(struct hci_dev *hdev) { struct bt_uuid *uuid, *tmp; @@ -3003,7 +2987,6 @@ struct hci_dev *hci_alloc_dev(void) INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); - INIT_DELAYED_WORK(&hdev->adv_instance_expire, hci_adv_timeout_expire); skb_queue_head_init(&hdev->rx_q); skb_queue_head_init(&hdev->cmd_q); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 703e37f..7554da5 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1183,7 +1183,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, hci_discovery_set_state(hdev, DISCOVERY_STOPPED); else if (!hci_dev_test_flag(hdev, HCI_LE_ADV) && hdev->discovery.state == DISCOVERY_FINDING) - mgmt_reenable_advertising(hdev); + hci_req_reenable_advertising(hdev); break; @@ -2401,7 +2401,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) * is timed out due to Directed Advertising." */ if (type == LE_LINK) - mgmt_reenable_advertising(hdev); + hci_req_reenable_advertising(hdev); unlock: hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 7c85435..e6622bd1 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -23,6 +23,7 @@ #include #include +#include #include "smp.h" #include "hci_request.h" @@ -580,6 +581,524 @@ void hci_req_add_le_passive_scan(struct hci_request *req) &enable_cp); } +static u8 get_current_adv_instance(struct hci_dev *hdev) +{ + /* The "Set Advertising" setting supersedes the "Add Advertising" + * setting. Here we set the advertising data based on which + * setting was set. When neither apply, default to the global settings, + * represented by instance "0". + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + !hci_dev_test_flag(hdev, HCI_ADVERTISING)) + return hdev->cur_adv_instance; + + return 0x00; +} + +static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev) +{ + u8 instance = get_current_adv_instance(hdev); + struct adv_info *adv_instance; + + /* Ignore instance 0 */ + if (instance == 0x00) + return 0; + + adv_instance = hci_find_adv_instance(hdev, instance); + if (!adv_instance) + return 0; + + /* TODO: Take into account the "appearance" and "local-name" flags here. + * These are currently being ignored as they are not supported. + */ + return adv_instance->scan_rsp_len; +} + +void __hci_req_disable_advertising(struct hci_request *req) +{ + u8 enable = 0x00; + + hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); +} + +static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance) +{ + u32 flags; + struct adv_info *adv_instance; + + if (instance == 0x00) { + /* Instance 0 always manages the "Tx Power" and "Flags" + * fields + */ + flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS; + + /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting + * corresponds to the "connectable" instance flag. + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) + flags |= MGMT_ADV_FLAG_CONNECTABLE; + + return flags; + } + + adv_instance = hci_find_adv_instance(hdev, instance); + + /* Return 0 when we got an invalid instance identifier. */ + if (!adv_instance) + return 0; + + return adv_instance->flags; +} + +void __hci_req_enable_advertising(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_adv_param cp; + u8 own_addr_type, enable = 0x01; + bool connectable; + u8 instance; + u32 flags; + + if (hci_conn_num(hdev, LE_LINK) > 0) + return; + + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + __hci_req_disable_advertising(req); + + /* Clear the HCI_LE_ADV bit temporarily so that the + * hci_update_random_address knows that it's safe to go ahead + * and write a new random address. The flag will be set back on + * as soon as the SET_ADV_ENABLE HCI command completes. + */ + hci_dev_clear_flag(hdev, HCI_LE_ADV); + + instance = get_current_adv_instance(hdev); + flags = get_adv_instance_flags(hdev, instance); + + /* If the "connectable" instance flag was not set, then choose between + * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. + */ + connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) || + mgmt_get_connectable(hdev); + + /* Set require_privacy to true only when non-connectable + * advertising is used. In that case it is fine to use a + * non-resolvable private address. + */ + if (hci_update_random_address(req, !connectable, &own_addr_type) < 0) + return; + + memset(&cp, 0, sizeof(cp)); + cp.min_interval = cpu_to_le16(hdev->le_adv_min_interval); + cp.max_interval = cpu_to_le16(hdev->le_adv_max_interval); + + if (connectable) + cp.type = LE_ADV_IND; + else if (get_cur_adv_instance_scan_rsp_len(hdev)) + cp.type = LE_ADV_SCAN_IND; + else + cp.type = LE_ADV_NONCONN_IND; + + cp.own_address_type = own_addr_type; + cp.channel_map = hdev->le_adv_channel_map; + + hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp); + + hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); +} + +static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) +{ + u8 ad_len = 0; + size_t name_len; + + name_len = strlen(hdev->dev_name); + if (name_len > 0) { + size_t max_len = HCI_MAX_AD_LENGTH - ad_len - 2; + + if (name_len > max_len) { + name_len = max_len; + ptr[1] = EIR_NAME_SHORT; + } else + ptr[1] = EIR_NAME_COMPLETE; + + ptr[0] = name_len + 1; + + memcpy(ptr + 2, hdev->dev_name, name_len); + + ad_len += (name_len + 2); + ptr += (name_len + 2); + } + + return ad_len; +} + +static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, + u8 *ptr) +{ + struct adv_info *adv_instance; + + adv_instance = hci_find_adv_instance(hdev, instance); + if (!adv_instance) + return 0; + + /* TODO: Set the appropriate entries based on advertising instance flags + * here once flags other than 0 are supported. + */ + memcpy(ptr, adv_instance->scan_rsp_data, + adv_instance->scan_rsp_len); + + return adv_instance->scan_rsp_len; +} + +static void update_inst_scan_rsp_data(struct hci_request *req, u8 instance) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_scan_rsp_data cp; + u8 len; + + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return; + + memset(&cp, 0, sizeof(cp)); + + if (instance) + len = create_instance_scan_rsp_data(hdev, instance, cp.data); + else + len = create_default_scan_rsp_data(hdev, cp.data); + + if (hdev->scan_rsp_data_len == len && + !memcmp(cp.data, hdev->scan_rsp_data, len)) + return; + + memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data)); + hdev->scan_rsp_data_len = len; + + cp.length = len; + + hci_req_add(req, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp); +} + +void __hci_req_update_scan_rsp_data(struct hci_request *req, int instance) +{ + if (instance == HCI_ADV_CURRENT) + instance = get_current_adv_instance(req->hdev); + + update_inst_scan_rsp_data(req, get_current_adv_instance(req->hdev)); +} + +static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) +{ + struct adv_info *adv_instance = NULL; + u8 ad_len = 0, flags = 0; + u32 instance_flags; + + /* Return 0 when the current instance identifier is invalid. */ + if (instance) { + adv_instance = hci_find_adv_instance(hdev, instance); + if (!adv_instance) + return 0; + } + + instance_flags = get_adv_instance_flags(hdev, instance); + + /* The Add Advertising command allows userspace to set both the general + * and limited discoverable flags. + */ + if (instance_flags & MGMT_ADV_FLAG_DISCOV) + flags |= LE_AD_GENERAL; + + if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV) + flags |= LE_AD_LIMITED; + + if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) { + /* If a discovery flag wasn't provided, simply use the global + * settings. + */ + if (!flags) + flags |= mgmt_get_adv_discov_flags(hdev); + + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + flags |= LE_AD_NO_BREDR; + + /* If flags would still be empty, then there is no need to + * include the "Flags" AD field". + */ + if (flags) { + ptr[0] = 0x02; + ptr[1] = EIR_FLAGS; + ptr[2] = flags; + + ad_len += 3; + ptr += 3; + } + } + + if (adv_instance) { + memcpy(ptr, adv_instance->adv_data, + adv_instance->adv_data_len); + ad_len += adv_instance->adv_data_len; + ptr += adv_instance->adv_data_len; + } + + /* Provide Tx Power only if we can provide a valid value for it */ + if (hdev->adv_tx_power != HCI_TX_POWER_INVALID && + (instance_flags & MGMT_ADV_FLAG_TX_POWER)) { + ptr[0] = 0x02; + ptr[1] = EIR_TX_POWER; + ptr[2] = (u8)hdev->adv_tx_power; + + ad_len += 3; + ptr += 3; + } + + return ad_len; +} + +static void update_inst_adv_data(struct hci_request *req, u8 instance) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_adv_data cp; + u8 len; + + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return; + + memset(&cp, 0, sizeof(cp)); + + len = create_instance_adv_data(hdev, instance, cp.data); + + /* There's nothing to do if the data hasn't changed */ + if (hdev->adv_data_len == len && + memcmp(cp.data, hdev->adv_data, len) == 0) + return; + + memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); + hdev->adv_data_len = len; + + cp.length = len; + + hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); +} + +void __hci_req_update_adv_data(struct hci_request *req, int instance) +{ + if (instance == HCI_ADV_CURRENT) + instance = get_current_adv_instance(req->hdev); + + update_inst_adv_data(req, instance); +} + +int hci_req_update_adv_data(struct hci_dev *hdev, int instance) +{ + struct hci_request req; + + hci_req_init(&req, hdev); + __hci_req_update_adv_data(&req, instance); + + return hci_req_run(&req, NULL); +} + +static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + BT_DBG("%s status %u", hdev->name, status); +} + +void hci_req_reenable_advertising(struct hci_dev *hdev) +{ + struct hci_request req; + u8 instance; + + if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && + !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + return; + + instance = get_current_adv_instance(hdev); + + hci_req_init(&req, hdev); + + if (instance) { + __hci_req_schedule_adv_instance(&req, instance, true); + } else { + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); + __hci_req_enable_advertising(&req); + } + + hci_req_run(&req, adv_enable_complete); +} + +static void adv_timeout_expire(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + adv_instance_expire.work); + + struct hci_request req; + u8 instance; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + hdev->adv_instance_timeout = 0; + + instance = get_current_adv_instance(hdev); + if (instance == 0x00) + goto unlock; + + hci_req_init(&req, hdev); + + hci_req_clear_adv_instance(hdev, &req, instance, false); + + if (list_empty(&hdev->adv_instances)) + __hci_req_disable_advertising(&req); + + if (!skb_queue_empty(&req.cmd_q)) + hci_req_run(&req, NULL); + +unlock: + hci_dev_unlock(hdev); +} + +int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, + bool force) +{ + struct hci_dev *hdev = req->hdev; + struct adv_info *adv_instance = NULL; + u16 timeout; + + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + return -EPERM; + + if (hdev->adv_instance_timeout) + return -EBUSY; + + adv_instance = hci_find_adv_instance(hdev, instance); + if (!adv_instance) + return -ENOENT; + + /* A zero timeout means unlimited advertising. As long as there is + * only one instance, duration should be ignored. We still set a timeout + * in case further instances are being added later on. + * + * If the remaining lifetime of the instance is more than the duration + * then the timeout corresponds to the duration, otherwise it will be + * reduced to the remaining instance lifetime. + */ + if (adv_instance->timeout == 0 || + adv_instance->duration <= adv_instance->remaining_time) + timeout = adv_instance->duration; + else + timeout = adv_instance->remaining_time; + + /* The remaining time is being reduced unless the instance is being + * advertised without time limit. + */ + if (adv_instance->timeout) + adv_instance->remaining_time = + adv_instance->remaining_time - timeout; + + hdev->adv_instance_timeout = timeout; + queue_delayed_work(hdev->req_workqueue, + &hdev->adv_instance_expire, + msecs_to_jiffies(timeout * 1000)); + + /* If we're just re-scheduling the same instance again then do not + * execute any HCI commands. This happens when a single instance is + * being advertised. + */ + if (!force && hdev->cur_adv_instance == instance && + hci_dev_test_flag(hdev, HCI_LE_ADV)) + return 0; + + hdev->cur_adv_instance = instance; + __hci_req_update_adv_data(req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(req, HCI_ADV_CURRENT); + __hci_req_enable_advertising(req); + + return 0; +} + +static void cancel_adv_timeout(struct hci_dev *hdev) +{ + if (hdev->adv_instance_timeout) { + hdev->adv_instance_timeout = 0; + cancel_delayed_work(&hdev->adv_instance_expire); + } +} + +/* For a single instance: + * - force == true: The instance will be removed even when its remaining + * lifetime is not zero. + * - force == false: the instance will be deactivated but kept stored unless + * the remaining lifetime is zero. + * + * For instance == 0x00: + * - force == true: All instances will be removed regardless of their timeout + * setting. + * - force == false: Only instances that have a timeout will be removed. + */ +void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, + u8 instance, bool force) +{ + struct adv_info *adv_instance, *n, *next_instance = NULL; + int err; + u8 rem_inst; + + /* Cancel any timeout concerning the removed instance(s). */ + if (!instance || hdev->cur_adv_instance == instance) + cancel_adv_timeout(hdev); + + /* Get the next instance to advertise BEFORE we remove + * the current one. This can be the same instance again + * if there is only one instance. + */ + if (instance && hdev->cur_adv_instance == instance) + next_instance = hci_get_next_instance(hdev, instance); + + if (instance == 0x00) { + list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, + list) { + if (!(force || adv_instance->timeout)) + continue; + + rem_inst = adv_instance->instance; + err = hci_remove_adv_instance(hdev, rem_inst); + if (!err) + mgmt_advertising_removed(NULL, hdev, rem_inst); + } + hdev->cur_adv_instance = 0x00; + } else { + adv_instance = hci_find_adv_instance(hdev, instance); + + if (force || (adv_instance && adv_instance->timeout && + !adv_instance->remaining_time)) { + /* Don't advertise a removed instance. */ + if (next_instance && + next_instance->instance == instance) + next_instance = NULL; + + err = hci_remove_adv_instance(hdev, instance); + if (!err) + mgmt_advertising_removed(NULL, hdev, instance); + } + } + + if (list_empty(&hdev->adv_instances)) { + hdev->cur_adv_instance = 0x00; + hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); + } + + if (!req || !hdev_is_powered(hdev) || + hci_dev_test_flag(hdev, HCI_ADVERTISING)) + return; + + if (next_instance) + __hci_req_schedule_adv_instance(req, next_instance->instance, + false); +} + static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) { struct hci_dev *hdev = req->hdev; @@ -1031,14 +1550,6 @@ unlock: hci_dev_unlock(hdev); } -static void cancel_adv_timeout(struct hci_dev *hdev) -{ - if (hdev->adv_instance_timeout) { - hdev->adv_instance_timeout = 0; - cancel_delayed_work(&hdev->adv_instance_expire); - } -} - static void disable_advertising(struct hci_request *req) { u8 enable = 0x00; @@ -1280,6 +1791,7 @@ void hci_request_setup(struct hci_dev *hdev) INIT_WORK(&hdev->scan_update, scan_update_work); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); + INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); } void hci_request_cancel_all(struct hci_dev *hdev) @@ -1291,4 +1803,9 @@ void hci_request_cancel_all(struct hci_dev *hdev) cancel_work_sync(&hdev->scan_update); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); + + if (hdev->adv_instance_timeout) { + cancel_delayed_work_sync(&hdev->adv_instance_expire); + hdev->adv_instance_timeout = 0; + } } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index cc82755..5358b1b 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -58,6 +58,20 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, void hci_req_add_le_scan_disable(struct hci_request *req); void hci_req_add_le_passive_scan(struct hci_request *req); +#define HCI_ADV_CURRENT (-1) + +void hci_req_reenable_advertising(struct hci_dev *hdev); +void __hci_req_enable_advertising(struct hci_request *req); +void __hci_req_disable_advertising(struct hci_request *req); +void __hci_req_update_adv_data(struct hci_request *req, int instance); +int hci_req_update_adv_data(struct hci_dev *hdev, int instance); +void __hci_req_update_scan_rsp_data(struct hci_request *req, int instance); + +int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, + bool force); +void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, + u8 instance, bool force); + /* Returns true if HCI commands were queued */ bool hci_req_stop_discovery(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0d20e13..6d0f002 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -841,98 +841,7 @@ static struct mgmt_pending_cmd *pending_find_data(u16 opcode, return mgmt_pending_find_data(HCI_CHANNEL_CONTROL, opcode, hdev, data); } -static u8 get_current_adv_instance(struct hci_dev *hdev) -{ - /* The "Set Advertising" setting supersedes the "Add Advertising" - * setting. Here we set the advertising data based on which - * setting was set. When neither apply, default to the global settings, - * represented by instance "0". - */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && - !hci_dev_test_flag(hdev, HCI_ADVERTISING)) - return hdev->cur_adv_instance; - - return 0x00; -} - -static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) -{ - u8 ad_len = 0; - size_t name_len; - - name_len = strlen(hdev->dev_name); - if (name_len > 0) { - size_t max_len = HCI_MAX_AD_LENGTH - ad_len - 2; - - if (name_len > max_len) { - name_len = max_len; - ptr[1] = EIR_NAME_SHORT; - } else - ptr[1] = EIR_NAME_COMPLETE; - - ptr[0] = name_len + 1; - - memcpy(ptr + 2, hdev->dev_name, name_len); - - ad_len += (name_len + 2); - ptr += (name_len + 2); - } - - return ad_len; -} - -static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, - u8 *ptr) -{ - struct adv_info *adv_instance; - - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return 0; - - /* TODO: Set the appropriate entries based on advertising instance flags - * here once flags other than 0 are supported. - */ - memcpy(ptr, adv_instance->scan_rsp_data, - adv_instance->scan_rsp_len); - - return adv_instance->scan_rsp_len; -} - -static void update_inst_scan_rsp_data(struct hci_request *req, u8 instance) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_scan_rsp_data cp; - u8 len; - - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - return; - - memset(&cp, 0, sizeof(cp)); - - if (instance) - len = create_instance_scan_rsp_data(hdev, instance, cp.data); - else - len = create_default_scan_rsp_data(hdev, cp.data); - - if (hdev->scan_rsp_data_len == len && - !memcmp(cp.data, hdev->scan_rsp_data, len)) - return; - - memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data)); - hdev->scan_rsp_data_len = len; - - cp.length = len; - - hci_req_add(req, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp); -} - -static void update_scan_rsp_data(struct hci_request *req) -{ - update_inst_scan_rsp_data(req, get_current_adv_instance(req->hdev)); -} - -static u8 get_adv_discov_flags(struct hci_dev *hdev) +u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev) { struct mgmt_pending_cmd *cmd; @@ -956,7 +865,7 @@ static u8 get_adv_discov_flags(struct hci_dev *hdev) return 0; } -static bool get_connectable(struct hci_dev *hdev) +bool mgmt_get_connectable(struct hci_dev *hdev) { struct mgmt_pending_cmd *cmd; @@ -973,163 +882,6 @@ static bool get_connectable(struct hci_dev *hdev) return hci_dev_test_flag(hdev, HCI_CONNECTABLE); } -static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance) -{ - u32 flags; - struct adv_info *adv_instance; - - if (instance == 0x00) { - /* Instance 0 always manages the "Tx Power" and "Flags" - * fields - */ - flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS; - - /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting - * corresponds to the "connectable" instance flag. - */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) - flags |= MGMT_ADV_FLAG_CONNECTABLE; - - return flags; - } - - adv_instance = hci_find_adv_instance(hdev, instance); - - /* Return 0 when we got an invalid instance identifier. */ - if (!adv_instance) - return 0; - - return adv_instance->flags; -} - -static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev) -{ - u8 instance = get_current_adv_instance(hdev); - struct adv_info *adv_instance; - - /* Ignore instance 0 */ - if (instance == 0x00) - return 0; - - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return 0; - - /* TODO: Take into account the "appearance" and "local-name" flags here. - * These are currently being ignored as they are not supported. - */ - return adv_instance->scan_rsp_len; -} - -static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) -{ - struct adv_info *adv_instance = NULL; - u8 ad_len = 0, flags = 0; - u32 instance_flags; - - /* Return 0 when the current instance identifier is invalid. */ - if (instance) { - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return 0; - } - - instance_flags = get_adv_instance_flags(hdev, instance); - - /* The Add Advertising command allows userspace to set both the general - * and limited discoverable flags. - */ - if (instance_flags & MGMT_ADV_FLAG_DISCOV) - flags |= LE_AD_GENERAL; - - if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV) - flags |= LE_AD_LIMITED; - - if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) { - /* If a discovery flag wasn't provided, simply use the global - * settings. - */ - if (!flags) - flags |= get_adv_discov_flags(hdev); - - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - flags |= LE_AD_NO_BREDR; - - /* If flags would still be empty, then there is no need to - * include the "Flags" AD field". - */ - if (flags) { - ptr[0] = 0x02; - ptr[1] = EIR_FLAGS; - ptr[2] = flags; - - ad_len += 3; - ptr += 3; - } - } - - if (adv_instance) { - memcpy(ptr, adv_instance->adv_data, - adv_instance->adv_data_len); - ad_len += adv_instance->adv_data_len; - ptr += adv_instance->adv_data_len; - } - - /* Provide Tx Power only if we can provide a valid value for it */ - if (hdev->adv_tx_power != HCI_TX_POWER_INVALID && - (instance_flags & MGMT_ADV_FLAG_TX_POWER)) { - ptr[0] = 0x02; - ptr[1] = EIR_TX_POWER; - ptr[2] = (u8)hdev->adv_tx_power; - - ad_len += 3; - ptr += 3; - } - - return ad_len; -} - -static void update_inst_adv_data(struct hci_request *req, u8 instance) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_adv_data cp; - u8 len; - - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - return; - - memset(&cp, 0, sizeof(cp)); - - len = create_instance_adv_data(hdev, instance, cp.data); - - /* There's nothing to do if the data hasn't changed */ - if (hdev->adv_data_len == len && - memcmp(cp.data, hdev->adv_data, len) == 0) - return; - - memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); - hdev->adv_data_len = len; - - cp.length = len; - - hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); -} - -static void update_adv_data(struct hci_request *req) -{ - update_inst_adv_data(req, get_current_adv_instance(req->hdev)); -} - -int mgmt_update_adv_data(struct hci_dev *hdev) -{ - struct hci_request req; - - hci_req_init(&req, hdev); - update_adv_data(&req); - - return hci_req_run(&req, NULL); -} - static void create_eir(struct hci_dev *hdev, u8 *data) { u8 *ptr = data; @@ -1247,70 +999,6 @@ static void update_class(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); } -static void disable_advertising(struct hci_request *req) -{ - u8 enable = 0x00; - - hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); -} - -static void enable_advertising(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_adv_param cp; - u8 own_addr_type, enable = 0x01; - bool connectable; - u8 instance; - u32 flags; - - if (hci_conn_num(hdev, LE_LINK) > 0) - return; - - if (hci_dev_test_flag(hdev, HCI_LE_ADV)) - disable_advertising(req); - - /* Clear the HCI_LE_ADV bit temporarily so that the - * hci_update_random_address knows that it's safe to go ahead - * and write a new random address. The flag will be set back on - * as soon as the SET_ADV_ENABLE HCI command completes. - */ - hci_dev_clear_flag(hdev, HCI_LE_ADV); - - instance = get_current_adv_instance(hdev); - flags = get_adv_instance_flags(hdev, instance); - - /* If the "connectable" instance flag was not set, then choose between - * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. - */ - connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) || - get_connectable(hdev); - - /* Set require_privacy to true only when non-connectable - * advertising is used. In that case it is fine to use a - * non-resolvable private address. - */ - if (hci_update_random_address(req, !connectable, &own_addr_type) < 0) - return; - - memset(&cp, 0, sizeof(cp)); - cp.min_interval = cpu_to_le16(hdev->le_adv_min_interval); - cp.max_interval = cpu_to_le16(hdev->le_adv_max_interval); - - if (connectable) - cp.type = LE_ADV_IND; - else if (get_cur_adv_instance_scan_rsp_len(hdev)) - cp.type = LE_ADV_SCAN_IND; - else - cp.type = LE_ADV_NONCONN_IND; - - cp.own_address_type = own_addr_type; - cp.channel_map = hdev->le_adv_channel_map; - - hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp); - - hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); -} - static void service_cache_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -1346,10 +1034,11 @@ static void rpa_expired(struct work_struct *work) return; /* The generation of a new RPA and programming it into the - * controller happens in the enable_advertising() function. + * controller happens in the hci_req_enable_advertising() + * function. */ hci_req_init(&req, hdev); - enable_advertising(&req); + __hci_req_enable_advertising(&req); hci_req_run(&req, NULL); } @@ -1417,8 +1106,7 @@ static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode) } } -static void advertising_added(struct sock *sk, struct hci_dev *hdev, - u8 instance) +void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance) { struct mgmt_ev_advertising_added ev; @@ -1427,8 +1115,8 @@ static void advertising_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_ADVERTISING_ADDED, hdev, &ev, sizeof(ev), sk); } -static void advertising_removed(struct sock *sk, struct hci_dev *hdev, - u8 instance) +void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, + u8 instance) { struct mgmt_ev_advertising_removed ev; @@ -1437,65 +1125,6 @@ static void advertising_removed(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_ADVERTISING_REMOVED, hdev, &ev, sizeof(ev), sk); } -static int schedule_adv_instance(struct hci_request *req, u8 instance, - bool force) { - struct hci_dev *hdev = req->hdev; - struct adv_info *adv_instance = NULL; - u16 timeout; - - if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) - return -EPERM; - - if (hdev->adv_instance_timeout) - return -EBUSY; - - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return -ENOENT; - - /* A zero timeout means unlimited advertising. As long as there is - * only one instance, duration should be ignored. We still set a timeout - * in case further instances are being added later on. - * - * If the remaining lifetime of the instance is more than the duration - * then the timeout corresponds to the duration, otherwise it will be - * reduced to the remaining instance lifetime. - */ - if (adv_instance->timeout == 0 || - adv_instance->duration <= adv_instance->remaining_time) - timeout = adv_instance->duration; - else - timeout = adv_instance->remaining_time; - - /* The remaining time is being reduced unless the instance is being - * advertised without time limit. - */ - if (adv_instance->timeout) - adv_instance->remaining_time = - adv_instance->remaining_time - timeout; - - hdev->adv_instance_timeout = timeout; - queue_delayed_work(hdev->workqueue, - &hdev->adv_instance_expire, - msecs_to_jiffies(timeout * 1000)); - - /* If we're just re-scheduling the same instance again then do not - * execute any HCI commands. This happens when a single instance is - * being advertised. - */ - if (!force && hdev->cur_adv_instance == instance && - hci_dev_test_flag(hdev, HCI_LE_ADV)) - return 0; - - hdev->cur_adv_instance = instance; - update_adv_data(req); - update_scan_rsp_data(req); - enable_advertising(req); - - return 0; -} - static void cancel_adv_timeout(struct hci_dev *hdev) { if (hdev->adv_instance_timeout) { @@ -1504,76 +1133,6 @@ static void cancel_adv_timeout(struct hci_dev *hdev) } } -/* For a single instance: - * - force == true: The instance will be removed even when its remaining - * lifetime is not zero. - * - force == false: the instance will be deactivated but kept stored unless - * the remaining lifetime is zero. - * - * For instance == 0x00: - * - force == true: All instances will be removed regardless of their timeout - * setting. - * - force == false: Only instances that have a timeout will be removed. - */ -static void clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, - u8 instance, bool force) -{ - struct adv_info *adv_instance, *n, *next_instance = NULL; - int err; - u8 rem_inst; - - /* Cancel any timeout concerning the removed instance(s). */ - if (!instance || hdev->cur_adv_instance == instance) - cancel_adv_timeout(hdev); - - /* Get the next instance to advertise BEFORE we remove - * the current one. This can be the same instance again - * if there is only one instance. - */ - if (instance && hdev->cur_adv_instance == instance) - next_instance = hci_get_next_instance(hdev, instance); - - if (instance == 0x00) { - list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, - list) { - if (!(force || adv_instance->timeout)) - continue; - - rem_inst = adv_instance->instance; - err = hci_remove_adv_instance(hdev, rem_inst); - if (!err) - advertising_removed(NULL, hdev, rem_inst); - } - hdev->cur_adv_instance = 0x00; - } else { - adv_instance = hci_find_adv_instance(hdev, instance); - - if (force || (adv_instance && adv_instance->timeout && - !adv_instance->remaining_time)) { - /* Don't advertise a removed instance. */ - if (next_instance && - next_instance->instance == instance) - next_instance = NULL; - - err = hci_remove_adv_instance(hdev, instance); - if (!err) - advertising_removed(NULL, hdev, instance); - } - } - - if (list_empty(&hdev->adv_instances)) { - hdev->cur_adv_instance = 0x00; - hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); - } - - if (!req || !hdev_is_powered(hdev) || - hci_dev_test_flag(hdev, HCI_ADVERTISING)) - return; - - if (next_instance) - schedule_adv_instance(req, next_instance->instance, false); -} - static int clean_up_hci_state(struct hci_dev *hdev) { struct hci_request req; @@ -1589,10 +1148,10 @@ static int clean_up_hci_state(struct hci_dev *hdev) hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } - clear_adv_instance(hdev, NULL, 0x00, false); + hci_req_clear_adv_instance(hdev, NULL, 0x00, false); if (hci_dev_test_flag(hdev, HCI_LE_ADV)) - disable_advertising(&req); + __hci_req_disable_advertising(&req); discov_stopped = hci_req_stop_discovery(&req); @@ -1975,7 +1534,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, sizeof(scan), &scan); update_ad: - update_adv_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); err = hci_req_run(&req, set_discoverable_complete); if (err < 0) @@ -2060,7 +1619,7 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status, new_settings(hdev, cmd->sk); hci_req_update_scan(hdev); if (discov_changed) - mgmt_update_adv_data(hdev); + hci_req_update_adv_data(hdev, HCI_ADV_CURRENT); hci_update_background_scan(hdev); } @@ -2151,7 +1710,7 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); } - update_adv_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); } else if (cp->val != test_bit(HCI_PSCAN, &hdev->flags)) { if (cp->val) { scan = SCAN_PAGE; @@ -2181,7 +1740,7 @@ no_scan_update: /* Update the advertising parameters if necessary */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) - enable_advertising(&req); + __hci_req_enable_advertising(&req); err = hci_req_run(&req, set_connectable_complete); if (err < 0) { @@ -2466,8 +2025,8 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) struct hci_request req; hci_req_init(&req, hdev); - update_adv_data(&req); - update_scan_rsp_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); hci_req_run(&req, NULL); hci_update_background_scan(hdev); } @@ -2518,7 +2077,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) enabled = lmp_host_le_capable(hdev); if (!val) - clear_adv_instance(hdev, NULL, 0x00, true); + hci_req_clear_adv_instance(hdev, NULL, 0x00, true); if (!hdev_is_powered(hdev) || val == enabled) { bool changed = false; @@ -2565,7 +2124,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_cp.simul = 0x00; } else { if (hci_dev_test_flag(hdev, HCI_LE_ADV)) - disable_advertising(&req); + __hci_req_disable_advertising(&req); } hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(hci_cp), @@ -3856,7 +3415,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, * no need to udpate the advertising data here. */ if (lmp_le_capable(hdev)) - update_scan_rsp_data(&req); + __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); err = hci_req_run(&req, set_name_complete); if (err < 0) @@ -4600,7 +4159,7 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status, hci_req_init(&req, hdev); - err = schedule_adv_instance(&req, instance, true); + err = __hci_req_schedule_adv_instance(&req, instance, true); if (!err) err = hci_req_run(&req, enable_advertising_instance); @@ -4697,11 +4256,11 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, * We cannot use update_[adv|scan_rsp]_data() here as the * HCI_ADVERTISING flag is not yet set. */ - update_inst_adv_data(&req, 0x00); - update_inst_scan_rsp_data(&req, 0x00); - enable_advertising(&req); + __hci_req_update_adv_data(&req, 0x00); + __hci_req_update_scan_rsp_data(&req, 0x00); + __hci_req_enable_advertising(&req); } else { - disable_advertising(&req); + __hci_req_disable_advertising(&req); } err = hci_req_run(&req, set_advertising_complete); @@ -5033,8 +4592,8 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) goto unlock; } - /* We need to flip the bit already here so that update_adv_data - * generates the correct flags. + /* We need to flip the bit already here so that + * hci_req_update_adv_data generates the correct flags. */ hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); @@ -5046,7 +4605,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) /* Since only the advertising data flags will change, there * is no need to update the scan response data. */ - update_adv_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); err = hci_req_run(&req, set_bredr_complete); if (err < 0) @@ -6583,7 +6142,7 @@ static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev, rand, sizeof(rand)); } - flags = get_adv_discov_flags(hdev); + flags = mgmt_get_adv_discov_flags(hdev); if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) flags |= LE_AD_NO_BREDR; @@ -6772,7 +6331,7 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status, cancel_adv_timeout(hdev); hci_remove_adv_instance(hdev, instance); - advertising_removed(cmd ? cmd->sk : NULL, hdev, instance); + mgmt_advertising_removed(cmd ? cmd->sk : NULL, hdev, instance); } if (!cmd) @@ -6794,31 +6353,6 @@ unlock: hci_dev_unlock(hdev); } -void mgmt_adv_timeout_expired(struct hci_dev *hdev) -{ - u8 instance; - struct hci_request req; - - hdev->adv_instance_timeout = 0; - - instance = get_current_adv_instance(hdev); - if (instance == 0x00) - return; - - hci_dev_lock(hdev); - hci_req_init(&req, hdev); - - clear_adv_instance(hdev, &req, instance, false); - - if (list_empty(&hdev->adv_instances)) - disable_advertising(&req); - - if (!skb_queue_empty(&req.cmd_q)) - hci_req_run(&req, NULL); - - hci_dev_unlock(hdev); -} - static int add_advertising(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { @@ -6897,7 +6431,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, * actually added. */ if (hdev->adv_instance_cnt > prev_instance_cnt) - advertising_added(sk, hdev, cp->instance); + mgmt_advertising_added(sk, hdev, cp->instance); hci_dev_set_flag(hdev, HCI_ADVERTISING_INSTANCE); @@ -6944,7 +6478,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); - err = schedule_adv_instance(&req, schedule_instance, true); + err = __hci_req_schedule_adv_instance(&req, schedule_instance, true); if (!err) err = hci_req_run(&req, add_advertising_complete); @@ -7024,10 +6558,10 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); - clear_adv_instance(hdev, &req, cp->instance, true); + hci_req_clear_adv_instance(hdev, &req, cp->instance, true); if (list_empty(&hdev->adv_instances)) - disable_advertising(&req); + __hci_req_disable_advertising(&req); /* If no HCI commands have been collected so far or the HCI_ADVERTISING * flag is set or the device isn't powered then we have no HCI @@ -7367,8 +6901,8 @@ static int powered_update_hci(struct hci_dev *hdev) if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && (hci_dev_test_flag(hdev, HCI_ADVERTISING) || !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))) { - update_adv_data(&req); - update_scan_rsp_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); } if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && @@ -7380,11 +6914,12 @@ static int powered_update_hci(struct hci_dev *hdev) } if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - enable_advertising(&req); + __hci_req_enable_advertising(&req); else if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && hdev->cur_adv_instance) - schedule_adv_instance(&req, hdev->cur_adv_instance, - true); + __hci_req_schedule_adv_instance(&req, + hdev->cur_adv_instance, + true); } link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); @@ -7505,7 +7040,7 @@ void mgmt_discoverable_timeout(struct hci_dev *hdev) * only update AD if advertising was enabled using Set Advertising. */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - update_adv_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); hci_req_run(&req, NULL); @@ -8352,35 +7887,6 @@ void mgmt_discovering(struct hci_dev *hdev, u8 discovering) mgmt_event(MGMT_EV_DISCOVERING, hdev, &ev, sizeof(ev), NULL); } -static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) -{ - BT_DBG("%s status %u", hdev->name, status); -} - -void mgmt_reenable_advertising(struct hci_dev *hdev) -{ - struct hci_request req; - u8 instance; - - if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) - return; - - instance = get_current_adv_instance(hdev); - - hci_req_init(&req, hdev); - - if (instance) { - schedule_adv_instance(&req, instance, true); - } else { - update_adv_data(&req); - update_scan_rsp_data(&req); - enable_advertising(&req); - } - - hci_req_run(&req, adv_enable_complete); -} - static struct hci_mgmt_chan chan = { .channel = HCI_CHANNEL_CONTROL, .handler_count = ARRAY_SIZE(mgmt_handlers), -- cgit v1.1 From 53c0ba74510c1182786dcd1e3710215467777601 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sun, 22 Nov 2015 16:43:43 +0300 Subject: Bluetooth: Move connectable changes to hdev->req_workqueue This way the connectable changes are synchronized against each other, which helps avoid potential races. The connectable mode is also linked together with LE advertising which makes is more convenient to have it behind the same workqueue. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 39 ++++++++++++++++++++ net/bluetooth/mgmt.c | 86 +++++++-------------------------------------- 2 files changed, 51 insertions(+), 74 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e6622bd1..167c906 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1274,6 +1274,43 @@ static void scan_update_work(struct work_struct *work) hci_req_sync(hdev, update_scan, 0, HCI_CMD_TIMEOUT, NULL); } +static int connectable_update(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + + hci_dev_lock(hdev); + + __hci_req_update_scan(req); + + /* If BR/EDR is not enabled and we disable advertising as a + * by-product of disabling connectable, we need to update the + * advertising flags. + */ + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + __hci_req_update_adv_data(req, HCI_ADV_CURRENT); + + /* Update the advertising parameters if necessary */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + __hci_req_enable_advertising(req); + + __hci_update_background_scan(req); + + hci_dev_unlock(hdev); + + return 0; +} + +static void connectable_update_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + connectable_update); + u8 status; + + hci_req_sync(hdev, connectable_update, 0, HCI_CMD_TIMEOUT, &status); + mgmt_set_connectable_complete(hdev, status); +} + void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason) { @@ -1789,6 +1826,7 @@ void hci_request_setup(struct hci_dev *hdev) INIT_WORK(&hdev->discov_update, discov_update); INIT_WORK(&hdev->bg_scan_update, bg_scan_update); INIT_WORK(&hdev->scan_update, scan_update_work); + INIT_WORK(&hdev->connectable_update, connectable_update_work); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); @@ -1801,6 +1839,7 @@ void hci_request_cancel_all(struct hci_dev *hdev) cancel_work_sync(&hdev->discov_update); cancel_work_sync(&hdev->bg_scan_update); cancel_work_sync(&hdev->scan_update); + cancel_work_sync(&hdev->connectable_update); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 6d0f002..d8b76ca 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1580,12 +1580,9 @@ static void write_fast_connectable(struct hci_request *req, bool enable) hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); } -static void set_connectable_complete(struct hci_dev *hdev, u8 status, - u16 opcode) +void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; - struct mgmt_mode *cp; - bool conn_changed, discov_changed; BT_DBG("status 0x%02x", status); @@ -1601,27 +1598,8 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status, goto remove_cmd; } - cp = cmd->param; - if (cp->val) { - conn_changed = !hci_dev_test_and_set_flag(hdev, - HCI_CONNECTABLE); - discov_changed = false; - } else { - conn_changed = hci_dev_test_and_clear_flag(hdev, - HCI_CONNECTABLE); - discov_changed = hci_dev_test_and_clear_flag(hdev, - HCI_DISCOVERABLE); - } - send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev); - - if (conn_changed || discov_changed) { - new_settings(hdev, cmd->sk); - hci_req_update_scan(hdev); - if (discov_changed) - hci_req_update_adv_data(hdev, HCI_ADV_CURRENT); - hci_update_background_scan(hdev); - } + new_settings(hdev, cmd->sk); remove_cmd: mgmt_pending_remove(cmd); @@ -1664,8 +1642,6 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_mode *cp = data; struct mgmt_pending_cmd *cmd; - struct hci_request req; - u8 scan; int err; BT_DBG("request for %s", hdev->name); @@ -1699,57 +1675,19 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - hci_req_init(&req, hdev); - - /* If BR/EDR is not enabled and we disable advertising as a - * by-product of disabling connectable, we need to update the - * advertising flags. - */ - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { - if (!cp->val) { - hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); - hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); - } - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); - } else if (cp->val != test_bit(HCI_PSCAN, &hdev->flags)) { - if (cp->val) { - scan = SCAN_PAGE; - } else { - /* If we don't have any whitelist entries just - * disable all scanning. If there are entries - * and we had both page and inquiry scanning - * enabled then fall back to only page scanning. - * Otherwise no changes are needed. - */ - if (list_empty(&hdev->whitelist)) - scan = SCAN_DISABLED; - else if (test_bit(HCI_ISCAN, &hdev->flags)) - scan = SCAN_PAGE; - else - goto no_scan_update; - - if (test_bit(HCI_ISCAN, &hdev->flags) && - hdev->discov_timeout > 0) - cancel_delayed_work(&hdev->discov_off); - } + if (cp->val) { + hci_dev_set_flag(hdev, HCI_CONNECTABLE); + } else { + if (hdev->discov_timeout > 0) + cancel_delayed_work(&hdev->discov_off); - hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_CONNECTABLE); } -no_scan_update: - /* Update the advertising parameters if necessary */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) - __hci_req_enable_advertising(&req); - - err = hci_req_run(&req, set_connectable_complete); - if (err < 0) { - mgmt_pending_remove(cmd); - if (err == -ENODATA) - err = set_connectable_update_settings(hdev, sk, - cp->val); - goto failed; - } + queue_work(hdev->req_workqueue, &hdev->connectable_update); + err = 0; failed: hci_dev_unlock(hdev); -- cgit v1.1 From 14bf5eac7a4f4bf0729ff8eb358de4fab967cee1 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sun, 22 Nov 2015 19:00:22 +0200 Subject: Bluetooth: Perform Class of Device changes through hdev->req_workqueue The Class of Device needs to be changed e.g. for limited discoverable mode. In preparation of moving the discoverable mode to hci_request.c and hdev->req_workqueue, move the Class of Device helpers there first. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 40 +++++++++++++++++++++++++++++++++ net/bluetooth/hci_request.h | 2 ++ net/bluetooth/mgmt.c | 54 ++++++--------------------------------------- 3 files changed, 49 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 167c906..e5e827b 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1311,6 +1311,46 @@ static void connectable_update_work(struct work_struct *work) mgmt_set_connectable_complete(hdev, status); } +static u8 get_service_classes(struct hci_dev *hdev) +{ + struct bt_uuid *uuid; + u8 val = 0; + + list_for_each_entry(uuid, &hdev->uuids, list) + val |= uuid->svc_hint; + + return val; +} + +void __hci_req_update_class(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + u8 cod[3]; + + BT_DBG("%s", hdev->name); + + if (!hdev_is_powered(hdev)) + return; + + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + return; + + if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE)) + return; + + cod[0] = hdev->minor_class; + cod[1] = hdev->major_class; + cod[2] = get_service_classes(hdev); + + if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) + cod[1] |= 0x20; + + if (memcmp(cod, hdev->dev_class, 3) == 0) + return; + + hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); +} + void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason) { diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 5358b1b..4192034 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -72,6 +72,8 @@ int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, u8 instance, bool force); +void __hci_req_update_class(struct hci_request *req); + /* Returns true if HCI commands were queued */ bool hci_req_stop_discovery(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d8b76ca..f5a4ee9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -959,46 +959,6 @@ static void update_eir(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp); } -static u8 get_service_classes(struct hci_dev *hdev) -{ - struct bt_uuid *uuid; - u8 val = 0; - - list_for_each_entry(uuid, &hdev->uuids, list) - val |= uuid->svc_hint; - - return val; -} - -static void update_class(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - u8 cod[3]; - - BT_DBG("%s", hdev->name); - - if (!hdev_is_powered(hdev)) - return; - - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - return; - - if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE)) - return; - - cod[0] = hdev->minor_class; - cod[1] = hdev->major_class; - cod[2] = get_service_classes(hdev); - - if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) - cod[1] |= 0x20; - - if (memcmp(cod, hdev->dev_class, 3) == 0) - return; - - hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); -} - static void service_cache_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -1013,7 +973,7 @@ static void service_cache_off(struct work_struct *work) hci_dev_lock(hdev); update_eir(&req); - update_class(&req); + __hci_req_update_class(&req); hci_dev_unlock(hdev); @@ -1370,7 +1330,7 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status, */ hci_req_init(&req, hdev); __hci_req_update_scan(&req); - update_class(&req); + __hci_req_update_class(&req); hci_req_run(&req, NULL); remove_cmd: @@ -2177,7 +2137,7 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_req_init(&req, hdev); - update_class(&req); + __hci_req_update_class(&req); update_eir(&req); err = hci_req_run(&req, add_uuid_complete); @@ -2277,7 +2237,7 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, update_class: hci_req_init(&req, hdev); - update_class(&req); + __hci_req_update_class(&req); update_eir(&req); err = hci_req_run(&req, remove_uuid_complete); @@ -2356,7 +2316,7 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, update_eir(&req); } - update_class(&req); + __hci_req_update_class(&req); err = hci_req_run(&req, set_class_complete); if (err < 0) { @@ -6871,7 +6831,7 @@ static int powered_update_hci(struct hci_dev *hdev) else write_fast_connectable(&req, false); __hci_req_update_scan(&req); - update_class(&req); + __hci_req_update_class(&req); update_name(&req); update_eir(&req); } @@ -6972,7 +6932,7 @@ void mgmt_discoverable_timeout(struct hci_dev *hdev) hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, sizeof(scan), &scan); } - update_class(&req); + __hci_req_update_class(&req); /* Advertising instances don't use the global discoverable setting, so * only update AD if advertising was enabled using Set Advertising. -- cgit v1.1 From aed1a8851db022c3bd22af41a343068b8c6e40c1 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sun, 22 Nov 2015 17:24:44 +0300 Subject: Bluetooth: Move discoverable changes to hdev->req_workqueue The discoverable mode is intrinsically linked with the connectable mode e.g. through sharing the same HCI command (Write Scan Enable) for BR/EDR. It makes therefore sense to move it to hci_request.c and run the changes through the same hdev->req_workqueue. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 64 ++++++++++++++++++++++++++++++++ net/bluetooth/mgmt.c | 90 +++++++-------------------------------------- 2 files changed, 77 insertions(+), 77 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e5e827b..8f72218 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1351,6 +1351,68 @@ void __hci_req_update_class(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); } +static void write_iac(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_write_current_iac_lap cp; + + if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) + return; + + if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) { + /* Limited discoverable mode */ + cp.num_iac = min_t(u8, hdev->num_iac, 2); + cp.iac_lap[0] = 0x00; /* LIAC */ + cp.iac_lap[1] = 0x8b; + cp.iac_lap[2] = 0x9e; + cp.iac_lap[3] = 0x33; /* GIAC */ + cp.iac_lap[4] = 0x8b; + cp.iac_lap[5] = 0x9e; + } else { + /* General discoverable mode */ + cp.num_iac = 1; + cp.iac_lap[0] = 0x33; /* GIAC */ + cp.iac_lap[1] = 0x8b; + cp.iac_lap[2] = 0x9e; + } + + hci_req_add(req, HCI_OP_WRITE_CURRENT_IAC_LAP, + (cp.num_iac * 3) + 1, &cp); +} + +static int discoverable_update(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + + hci_dev_lock(hdev); + + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { + write_iac(req); + __hci_req_update_scan(req); + __hci_req_update_class(req); + } + + /* Advertising instances don't use the global discoverable setting, so + * only update AD if advertising was enabled using Set Advertising. + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) + __hci_req_update_adv_data(req, HCI_ADV_CURRENT); + + hci_dev_unlock(hdev); + + return 0; +} + +static void discoverable_update_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + discoverable_update); + u8 status; + + hci_req_sync(hdev, discoverable_update, 0, HCI_CMD_TIMEOUT, &status); + mgmt_set_discoverable_complete(hdev, status); +} + void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason) { @@ -1867,6 +1929,7 @@ void hci_request_setup(struct hci_dev *hdev) INIT_WORK(&hdev->bg_scan_update, bg_scan_update); INIT_WORK(&hdev->scan_update, scan_update_work); INIT_WORK(&hdev->connectable_update, connectable_update_work); + INIT_WORK(&hdev->discoverable_update, discoverable_update_work); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); @@ -1880,6 +1943,7 @@ void hci_request_cancel_all(struct hci_dev *hdev) cancel_work_sync(&hdev->bg_scan_update); cancel_work_sync(&hdev->scan_update); cancel_work_sync(&hdev->connectable_update); + cancel_work_sync(&hdev->discoverable_update); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f5a4ee9..8846cb3 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1282,13 +1282,9 @@ static u8 mgmt_le_support(struct hci_dev *hdev) return MGMT_STATUS_SUCCESS; } -static void set_discoverable_complete(struct hci_dev *hdev, u8 status, - u16 opcode) +void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; - struct mgmt_mode *cp; - struct hci_request req; - bool changed; BT_DBG("status 0x%02x", status); @@ -1305,33 +1301,14 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status, goto remove_cmd; } - cp = cmd->param; - if (cp->val) { - changed = !hci_dev_test_and_set_flag(hdev, HCI_DISCOVERABLE); - - if (hdev->discov_timeout > 0) { - int to = msecs_to_jiffies(hdev->discov_timeout * 1000); - queue_delayed_work(hdev->workqueue, &hdev->discov_off, - to); - } - } else { - changed = hci_dev_test_and_clear_flag(hdev, HCI_DISCOVERABLE); + if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE) && + hdev->discov_timeout > 0) { + int to = msecs_to_jiffies(hdev->discov_timeout * 1000); + queue_delayed_work(hdev->req_workqueue, &hdev->discov_off, to); } send_settings_rsp(cmd->sk, MGMT_OP_SET_DISCOVERABLE, hdev); - - if (changed) - new_settings(hdev, cmd->sk); - - /* When the discoverable mode gets changed, make sure - * that class of device has the limited discoverable - * bit correctly set. Also update page scan based on whitelist - * entries. - */ - hci_req_init(&req, hdev); - __hci_req_update_scan(&req); - __hci_req_update_class(&req); - hci_req_run(&req, NULL); + new_settings(hdev, cmd->sk); remove_cmd: mgmt_pending_remove(cmd); @@ -1345,9 +1322,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_set_discoverable *cp = data; struct mgmt_pending_cmd *cmd; - struct hci_request req; u16 timeout; - u8 scan; int err; BT_DBG("request for %s", hdev->name); @@ -1447,58 +1422,19 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, cancel_delayed_work(&hdev->discov_off); hdev->discov_timeout = timeout; + if (cp->val) + hci_dev_set_flag(hdev, HCI_DISCOVERABLE); + else + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); + /* Limited discoverable mode */ if (cp->val == 0x02) hci_dev_set_flag(hdev, HCI_LIMITED_DISCOVERABLE); else hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); - hci_req_init(&req, hdev); - - /* The procedure for LE-only controllers is much simpler - just - * update the advertising data. - */ - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - goto update_ad; - - scan = SCAN_PAGE; - - if (cp->val) { - struct hci_cp_write_current_iac_lap hci_cp; - - if (cp->val == 0x02) { - /* Limited discoverable mode */ - hci_cp.num_iac = min_t(u8, hdev->num_iac, 2); - hci_cp.iac_lap[0] = 0x00; /* LIAC */ - hci_cp.iac_lap[1] = 0x8b; - hci_cp.iac_lap[2] = 0x9e; - hci_cp.iac_lap[3] = 0x33; /* GIAC */ - hci_cp.iac_lap[4] = 0x8b; - hci_cp.iac_lap[5] = 0x9e; - } else { - /* General discoverable mode */ - hci_cp.num_iac = 1; - hci_cp.iac_lap[0] = 0x33; /* GIAC */ - hci_cp.iac_lap[1] = 0x8b; - hci_cp.iac_lap[2] = 0x9e; - } - - hci_req_add(&req, HCI_OP_WRITE_CURRENT_IAC_LAP, - (hci_cp.num_iac * 3) + 1, &hci_cp); - - scan |= SCAN_INQUIRY; - } else { - hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); - } - - hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, sizeof(scan), &scan); - -update_ad: - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); - - err = hci_req_run(&req, set_discoverable_complete); - if (err < 0) - mgmt_pending_remove(cmd); + queue_work(hdev->req_workqueue, &hdev->discoverable_update); + err = 0; failed: hci_dev_unlock(hdev); -- cgit v1.1 From c366f555b8df67633b849a5088bb897d6c63aaa5 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 23 Nov 2015 15:43:06 +0200 Subject: Bluetooth: Move discoverable timeout behind hdev->req_workqueue Since the other discoverable changes are behind req_workqueue now it only makes sense to move the discoverable timeout there as well. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 13 ------------- net/bluetooth/hci_request.c | 26 ++++++++++++++++++++++++++ net/bluetooth/mgmt.c | 41 ++--------------------------------------- 3 files changed, 28 insertions(+), 52 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index bab8958..484c75f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1537,7 +1537,6 @@ int hci_dev_do_close(struct hci_dev *hdev) flush_work(&hdev->rx_work); if (hdev->discov_timeout > 0) { - cancel_delayed_work(&hdev->discov_off); hdev->discov_timeout = 0; hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); @@ -2096,17 +2095,6 @@ static void hci_error_reset(struct work_struct *work) hci_dev_do_open(hdev); } -static void hci_discov_off(struct work_struct *work) -{ - struct hci_dev *hdev; - - hdev = container_of(work, struct hci_dev, discov_off.work); - - BT_DBG("%s", hdev->name); - - mgmt_discoverable_timeout(hdev); -} - void hci_uuids_clear(struct hci_dev *hdev) { struct bt_uuid *uuid, *tmp; @@ -2986,7 +2974,6 @@ struct hci_dev *hci_alloc_dev(void) INIT_WORK(&hdev->error_reset, hci_error_reset); INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); - INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); skb_queue_head_init(&hdev->rx_q); skb_queue_head_init(&hdev->cmd_q); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 8f72218..fe14fd1 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1923,6 +1923,30 @@ static void discov_update(struct work_struct *work) } } +static void discov_off(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + discov_off.work); + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + /* When discoverable timeout triggers, then just make sure + * the limited discoverable flag is cleared. Even in the case + * of a timeout triggered from general discoverable, it is + * safe to unconditionally clear the flag. + */ + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); + hdev->discov_timeout = 0; + + hci_dev_unlock(hdev); + + hci_req_sync(hdev, discoverable_update, 0, HCI_CMD_TIMEOUT, NULL); + mgmt_new_settings(hdev); +} + void hci_request_setup(struct hci_dev *hdev) { INIT_WORK(&hdev->discov_update, discov_update); @@ -1930,6 +1954,7 @@ void hci_request_setup(struct hci_dev *hdev) INIT_WORK(&hdev->scan_update, scan_update_work); INIT_WORK(&hdev->connectable_update, connectable_update_work); INIT_WORK(&hdev->discoverable_update, discoverable_update_work); + INIT_DELAYED_WORK(&hdev->discov_off, discov_off); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); @@ -1944,6 +1969,7 @@ void hci_request_cancel_all(struct hci_dev *hdev) cancel_work_sync(&hdev->scan_update); cancel_work_sync(&hdev->connectable_update); cancel_work_sync(&hdev->discoverable_update); + cancel_delayed_work_sync(&hdev->discov_off); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 8846cb3..29b3bb7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1401,8 +1401,8 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, if (cp->val && hdev->discov_timeout > 0) { int to = msecs_to_jiffies(hdev->discov_timeout * 1000); - queue_delayed_work(hdev->workqueue, &hdev->discov_off, - to); + queue_delayed_work(hdev->req_workqueue, + &hdev->discov_off, to); } err = send_settings_rsp(sk, MGMT_OP_SET_DISCOVERABLE, hdev); @@ -6848,43 +6848,6 @@ void mgmt_set_powered_failed(struct hci_dev *hdev, int err) mgmt_pending_remove(cmd); } -void mgmt_discoverable_timeout(struct hci_dev *hdev) -{ - struct hci_request req; - - hci_dev_lock(hdev); - - /* When discoverable timeout triggers, then just make sure - * the limited discoverable flag is cleared. Even in the case - * of a timeout triggered from general discoverable, it is - * safe to unconditionally clear the flag. - */ - hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); - hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); - - hci_req_init(&req, hdev); - if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { - u8 scan = SCAN_PAGE; - hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, - sizeof(scan), &scan); - } - __hci_req_update_class(&req); - - /* Advertising instances don't use the global discoverable setting, so - * only update AD if advertising was enabled using Set Advertising. - */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); - - hci_req_run(&req, NULL); - - hdev->discov_timeout = 0; - - new_settings(hdev, NULL); - - hci_dev_unlock(hdev); -} - void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent) { -- cgit v1.1 From 00cf5040b39638588cd10ae4ffcc76a1be6ecf2c Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 25 Nov 2015 16:15:41 +0200 Subject: Bluetooth: HCI name update to hci_request.c We'll soon need this both from hci_request.c and mgmt.c so move it as a request helper function to hci_request.c. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 10 ++++++++++ net/bluetooth/hci_request.h | 2 ++ net/bluetooth/mgmt.c | 14 ++------------ 3 files changed, 14 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index fe14fd1..3150461 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -420,6 +420,16 @@ static void __hci_update_background_scan(struct hci_request *req) } } +void __hci_req_update_name(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_write_local_name cp; + + memcpy(cp.name, hdev->dev_name, sizeof(cp.name)); + + hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); +} + void hci_req_add_le_scan_disable(struct hci_request *req) { struct hci_cp_le_set_scan_enable cp; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 4192034..4e65a9c 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -55,6 +55,8 @@ void hci_req_sync_cancel(struct hci_dev *hdev, int err); struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); +void __hci_req_update_name(struct hci_request *req); + void hci_req_add_le_scan_disable(struct hci_request *req); void hci_req_add_le_passive_scan(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 29b3bb7..001a29a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3153,16 +3153,6 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev, HCI_OP_USER_PASSKEY_NEG_REPLY, 0); } -static void update_name(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_write_local_name cp; - - memcpy(cp.name, hdev->dev_name, sizeof(cp.name)); - - hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); -} - static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct mgmt_cp_set_local_name *cp; @@ -3241,7 +3231,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, hci_req_init(&req, hdev); if (lmp_bredr_capable(hdev)) { - update_name(&req); + __hci_req_update_name(&req); update_eir(&req); } @@ -6768,7 +6758,7 @@ static int powered_update_hci(struct hci_dev *hdev) write_fast_connectable(&req, false); __hci_req_update_scan(&req); __hci_req_update_class(&req); - update_name(&req); + __hci_req_update_name(&req); update_eir(&req); } -- cgit v1.1 From b1a8917c9bcbf42113dfacb6492228e094c96862 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 25 Nov 2015 16:15:42 +0200 Subject: Bluetooth: Move EIR update to hci_request.c We'll soon need to update the EIR both from hci_request.c and mgmt.c so move update_eir() as a more generic request helper to hci_request.c. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 189 +++++++++++++++++++++++++++++++++++++++++ net/bluetooth/hci_request.h | 1 + net/bluetooth/mgmt.c | 203 ++------------------------------------------ 3 files changed, 198 insertions(+), 195 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 3150461..030a1bb 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -21,6 +21,8 @@ SOFTWARE IS DISCLAIMED. */ +#include + #include #include #include @@ -430,6 +432,193 @@ void __hci_req_update_name(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); } +#define PNP_INFO_SVCLASS_ID 0x1200 + +static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 4) + return ptr; + + list_for_each_entry(uuid, &hdev->uuids, list) { + u16 uuid16; + + if (uuid->size != 16) + continue; + + uuid16 = get_unaligned_le16(&uuid->uuid[12]); + if (uuid16 < 0x1100) + continue; + + if (uuid16 == PNP_INFO_SVCLASS_ID) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID16_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + sizeof(u16) > len) { + uuids_start[1] = EIR_UUID16_SOME; + break; + } + + *ptr++ = (uuid16 & 0x00ff); + *ptr++ = (uuid16 & 0xff00) >> 8; + uuids_start[0] += sizeof(uuid16); + } + + return ptr; +} + +static u8 *create_uuid32_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 6) + return ptr; + + list_for_each_entry(uuid, &hdev->uuids, list) { + if (uuid->size != 32) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID32_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + sizeof(u32) > len) { + uuids_start[1] = EIR_UUID32_SOME; + break; + } + + memcpy(ptr, &uuid->uuid[12], sizeof(u32)); + ptr += sizeof(u32); + uuids_start[0] += sizeof(u32); + } + + return ptr; +} + +static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) +{ + u8 *ptr = data, *uuids_start = NULL; + struct bt_uuid *uuid; + + if (len < 18) + return ptr; + + list_for_each_entry(uuid, &hdev->uuids, list) { + if (uuid->size != 128) + continue; + + if (!uuids_start) { + uuids_start = ptr; + uuids_start[0] = 1; + uuids_start[1] = EIR_UUID128_ALL; + ptr += 2; + } + + /* Stop if not enough space to put next UUID */ + if ((ptr - data) + 16 > len) { + uuids_start[1] = EIR_UUID128_SOME; + break; + } + + memcpy(ptr, uuid->uuid, 16); + ptr += 16; + uuids_start[0] += 16; + } + + return ptr; +} + +static void create_eir(struct hci_dev *hdev, u8 *data) +{ + u8 *ptr = data; + size_t name_len; + + name_len = strlen(hdev->dev_name); + + if (name_len > 0) { + /* EIR Data type */ + if (name_len > 48) { + name_len = 48; + ptr[1] = EIR_NAME_SHORT; + } else + ptr[1] = EIR_NAME_COMPLETE; + + /* EIR Data length */ + ptr[0] = name_len + 1; + + memcpy(ptr + 2, hdev->dev_name, name_len); + + ptr += (name_len + 2); + } + + if (hdev->inq_tx_power != HCI_TX_POWER_INVALID) { + ptr[0] = 2; + ptr[1] = EIR_TX_POWER; + ptr[2] = (u8) hdev->inq_tx_power; + + ptr += 3; + } + + if (hdev->devid_source > 0) { + ptr[0] = 9; + ptr[1] = EIR_DEVICE_ID; + + put_unaligned_le16(hdev->devid_source, ptr + 2); + put_unaligned_le16(hdev->devid_vendor, ptr + 4); + put_unaligned_le16(hdev->devid_product, ptr + 6); + put_unaligned_le16(hdev->devid_version, ptr + 8); + + ptr += 10; + } + + ptr = create_uuid16_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); + ptr = create_uuid32_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); + ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); +} + +void __hci_req_update_eir(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_write_eir cp; + + if (!hdev_is_powered(hdev)) + return; + + if (!lmp_ext_inq_capable(hdev)) + return; + + if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) + return; + + if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE)) + return; + + memset(&cp, 0, sizeof(cp)); + + create_eir(hdev, cp.data); + + if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0) + return; + + memcpy(hdev->eir, cp.data, sizeof(cp.data)); + + hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp); +} + void hci_req_add_le_scan_disable(struct hci_request *req) { struct hci_cp_le_set_scan_enable cp; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 4e65a9c..5af4039 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -56,6 +56,7 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); void __hci_req_update_name(struct hci_request *req); +void __hci_req_update_eir(struct hci_request *req); void hci_req_add_le_scan_disable(struct hci_request *req); void hci_req_add_le_passive_scan(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 001a29a..fa5dc67 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -719,116 +719,6 @@ static u32 get_current_settings(struct hci_dev *hdev) return settings; } -#define PNP_INFO_SVCLASS_ID 0x1200 - -static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) -{ - u8 *ptr = data, *uuids_start = NULL; - struct bt_uuid *uuid; - - if (len < 4) - return ptr; - - list_for_each_entry(uuid, &hdev->uuids, list) { - u16 uuid16; - - if (uuid->size != 16) - continue; - - uuid16 = get_unaligned_le16(&uuid->uuid[12]); - if (uuid16 < 0x1100) - continue; - - if (uuid16 == PNP_INFO_SVCLASS_ID) - continue; - - if (!uuids_start) { - uuids_start = ptr; - uuids_start[0] = 1; - uuids_start[1] = EIR_UUID16_ALL; - ptr += 2; - } - - /* Stop if not enough space to put next UUID */ - if ((ptr - data) + sizeof(u16) > len) { - uuids_start[1] = EIR_UUID16_SOME; - break; - } - - *ptr++ = (uuid16 & 0x00ff); - *ptr++ = (uuid16 & 0xff00) >> 8; - uuids_start[0] += sizeof(uuid16); - } - - return ptr; -} - -static u8 *create_uuid32_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) -{ - u8 *ptr = data, *uuids_start = NULL; - struct bt_uuid *uuid; - - if (len < 6) - return ptr; - - list_for_each_entry(uuid, &hdev->uuids, list) { - if (uuid->size != 32) - continue; - - if (!uuids_start) { - uuids_start = ptr; - uuids_start[0] = 1; - uuids_start[1] = EIR_UUID32_ALL; - ptr += 2; - } - - /* Stop if not enough space to put next UUID */ - if ((ptr - data) + sizeof(u32) > len) { - uuids_start[1] = EIR_UUID32_SOME; - break; - } - - memcpy(ptr, &uuid->uuid[12], sizeof(u32)); - ptr += sizeof(u32); - uuids_start[0] += sizeof(u32); - } - - return ptr; -} - -static u8 *create_uuid128_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len) -{ - u8 *ptr = data, *uuids_start = NULL; - struct bt_uuid *uuid; - - if (len < 18) - return ptr; - - list_for_each_entry(uuid, &hdev->uuids, list) { - if (uuid->size != 128) - continue; - - if (!uuids_start) { - uuids_start = ptr; - uuids_start[0] = 1; - uuids_start[1] = EIR_UUID128_ALL; - ptr += 2; - } - - /* Stop if not enough space to put next UUID */ - if ((ptr - data) + 16 > len) { - uuids_start[1] = EIR_UUID128_SOME; - break; - } - - memcpy(ptr, uuid->uuid, 16); - ptr += 16; - uuids_start[0] += 16; - } - - return ptr; -} - static struct mgmt_pending_cmd *pending_find(u16 opcode, struct hci_dev *hdev) { return mgmt_pending_find(HCI_CHANNEL_CONTROL, opcode, hdev); @@ -882,83 +772,6 @@ bool mgmt_get_connectable(struct hci_dev *hdev) return hci_dev_test_flag(hdev, HCI_CONNECTABLE); } -static void create_eir(struct hci_dev *hdev, u8 *data) -{ - u8 *ptr = data; - size_t name_len; - - name_len = strlen(hdev->dev_name); - - if (name_len > 0) { - /* EIR Data type */ - if (name_len > 48) { - name_len = 48; - ptr[1] = EIR_NAME_SHORT; - } else - ptr[1] = EIR_NAME_COMPLETE; - - /* EIR Data length */ - ptr[0] = name_len + 1; - - memcpy(ptr + 2, hdev->dev_name, name_len); - - ptr += (name_len + 2); - } - - if (hdev->inq_tx_power != HCI_TX_POWER_INVALID) { - ptr[0] = 2; - ptr[1] = EIR_TX_POWER; - ptr[2] = (u8) hdev->inq_tx_power; - - ptr += 3; - } - - if (hdev->devid_source > 0) { - ptr[0] = 9; - ptr[1] = EIR_DEVICE_ID; - - put_unaligned_le16(hdev->devid_source, ptr + 2); - put_unaligned_le16(hdev->devid_vendor, ptr + 4); - put_unaligned_le16(hdev->devid_product, ptr + 6); - put_unaligned_le16(hdev->devid_version, ptr + 8); - - ptr += 10; - } - - ptr = create_uuid16_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); - ptr = create_uuid32_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); - ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); -} - -static void update_eir(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_write_eir cp; - - if (!hdev_is_powered(hdev)) - return; - - if (!lmp_ext_inq_capable(hdev)) - return; - - if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) - return; - - if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE)) - return; - - memset(&cp, 0, sizeof(cp)); - - create_eir(hdev, cp.data); - - if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0) - return; - - memcpy(hdev->eir, cp.data, sizeof(cp.data)); - - hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp); -} - static void service_cache_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -972,7 +785,7 @@ static void service_cache_off(struct work_struct *work) hci_dev_lock(hdev); - update_eir(&req); + __hci_req_update_eir(&req); __hci_req_update_class(&req); hci_dev_unlock(hdev); @@ -2074,7 +1887,7 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_req_init(&req, hdev); __hci_req_update_class(&req); - update_eir(&req); + __hci_req_update_eir(&req); err = hci_req_run(&req, add_uuid_complete); if (err < 0) { @@ -2174,7 +1987,7 @@ update_class: hci_req_init(&req, hdev); __hci_req_update_class(&req); - update_eir(&req); + __hci_req_update_eir(&req); err = hci_req_run(&req, remove_uuid_complete); if (err < 0) { @@ -2249,7 +2062,7 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); cancel_delayed_work_sync(&hdev->service_cache); hci_dev_lock(hdev); - update_eir(&req); + __hci_req_update_eir(&req); } __hci_req_update_class(&req); @@ -3232,7 +3045,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, if (lmp_bredr_capable(hdev)) { __hci_req_update_name(&req); - update_eir(&req); + __hci_req_update_eir(&req); } /* The name is stored in the scan response data and so @@ -3917,7 +3730,7 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, NULL, 0); hci_req_init(&req, hdev); - update_eir(&req); + __hci_req_update_eir(&req); hci_req_run(&req, NULL); hci_dev_unlock(hdev); @@ -6759,7 +6572,7 @@ static int powered_update_hci(struct hci_dev *hdev) __hci_req_update_scan(&req); __hci_req_update_class(&req); __hci_req_update_name(&req); - update_eir(&req); + __hci_req_update_eir(&req); } return hci_req_run(&req, powered_complete); @@ -7380,7 +7193,7 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) hci_req_add(&req, HCI_OP_WRITE_SSP_DEBUG_MODE, sizeof(enable), &enable); - update_eir(&req); + __hci_req_update_eir(&req); } else { clear_eir(&req); } -- cgit v1.1 From bf943cbf76ecd3b9838a80d5e08777b0f4ccc665 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 25 Nov 2015 16:15:43 +0200 Subject: Bluetooth: Move fast connectable code to hci_request.c We'll soon need this both in hci_request.c and mgmt.c so move it to hci_request.c as a generic helper. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 35 +++++++++++++++++++++++++++++++++++ net/bluetooth/hci_request.h | 1 + net/bluetooth/mgmt.c | 43 ++++--------------------------------------- 3 files changed, 40 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 030a1bb..0abd83d 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -349,6 +349,41 @@ void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, hci_req_add_ev(req, opcode, plen, param, 0); } +void __hci_req_write_fast_connectable(struct hci_request *req, bool enable) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_write_page_scan_activity acp; + u8 type; + + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + return; + + if (hdev->hci_ver < BLUETOOTH_VER_1_2) + return; + + if (enable) { + type = PAGE_SCAN_TYPE_INTERLACED; + + /* 160 msec page scan interval */ + acp.interval = cpu_to_le16(0x0100); + } else { + type = PAGE_SCAN_TYPE_STANDARD; /* default */ + + /* default 1.28 sec page scan */ + acp.interval = cpu_to_le16(0x0800); + } + + acp.window = cpu_to_le16(0x0012); + + if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval || + __cpu_to_le16(hdev->page_scan_window) != acp.window) + hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, + sizeof(acp), &acp); + + if (hdev->page_scan_type != type) + hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); +} + /* This function controls the background scanning based on hdev->pend_le_conns * list. If there are pending LE connection we start the background scanning, * otherwise we stop it. diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 5af4039..d3dd24d 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -55,6 +55,7 @@ void hci_req_sync_cancel(struct hci_dev *hdev, int err); struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); +void __hci_req_write_fast_connectable(struct hci_request *req, bool enable); void __hci_req_update_name(struct hci_request *req); void __hci_req_update_eir(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index fa5dc67..0a7e6f4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1254,41 +1254,6 @@ failed: return err; } -static void write_fast_connectable(struct hci_request *req, bool enable) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_write_page_scan_activity acp; - u8 type; - - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - return; - - if (hdev->hci_ver < BLUETOOTH_VER_1_2) - return; - - if (enable) { - type = PAGE_SCAN_TYPE_INTERLACED; - - /* 160 msec page scan interval */ - acp.interval = cpu_to_le16(0x0100); - } else { - type = PAGE_SCAN_TYPE_STANDARD; /* default */ - - /* default 1.28 sec page scan */ - acp.interval = cpu_to_le16(0x0800); - } - - acp.window = cpu_to_le16(0x0012); - - if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval || - __cpu_to_le16(hdev->page_scan_window) != acp.window) - hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, - sizeof(acp), &acp); - - if (hdev->page_scan_type != type) - hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); -} - void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; @@ -4094,7 +4059,7 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); - write_fast_connectable(&req, cp->val); + __hci_req_write_fast_connectable(&req, cp->val); err = hci_req_run(&req, fast_connectable_complete); if (err < 0) { @@ -4236,7 +4201,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_req_init(&req, hdev); - write_fast_connectable(&req, false); + __hci_req_write_fast_connectable(&req, false); __hci_req_update_scan(&req); /* Since only the advertising data flags will change, there @@ -6566,9 +6531,9 @@ static int powered_update_hci(struct hci_dev *hdev) if (lmp_bredr_capable(hdev)) { if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) - write_fast_connectable(&req, true); + __hci_req_write_fast_connectable(&req, true); else - write_fast_connectable(&req, false); + __hci_req_write_fast_connectable(&req, false); __hci_req_update_scan(&req); __hci_req_update_class(&req); __hci_req_update_name(&req); -- cgit v1.1 From 2ff13894cfb877cb3d02d96a8402202f0a6f3efd Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 25 Nov 2015 16:15:44 +0200 Subject: Bluetooth: Perform HCI update for power on synchronously The request to update HCI during power on is always coming either from hdev->req_workqueue or through an ioctl, so it's safe to use hci_req_sync for it. This way we also eliminate potential races with incoming mgmt commands or other actions while powering on. Part of this refactoring is the splitting of mgmt_powered() into mgmt_power_on() and __mgmt_power_off() functions. The main reason is the different requirements as far as hdev locking is concerned, as highlighted with the __ prefix of the power off API. Since the power on in the case of clearing the AUTO_OFF flag cannot be done synchronously in the set_powered mgmt handler, the hci_power_on work callback is extended to cover this (which also simplifies the set_powered helper a lot). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 21 +++++-- net/bluetooth/hci_request.c | 100 ++++++++++++++++++++++++++++++++ net/bluetooth/hci_request.h | 2 + net/bluetooth/mgmt.c | 136 +++----------------------------------------- 4 files changed, 126 insertions(+), 133 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 484c75f..eac3f6f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1399,10 +1399,10 @@ static int hci_dev_do_open(struct hci_dev *hdev) !hci_dev_test_flag(hdev, HCI_CONFIG) && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + hci_dev_test_flag(hdev, HCI_MGMT) && hdev->dev_type == HCI_BREDR) { - hci_dev_lock(hdev); - mgmt_powered(hdev, 1); - hci_dev_unlock(hdev); + ret = __hci_req_hci_power_on(hdev); + mgmt_power_on(hdev, ret); } } else { /* Init failed, cleanup */ @@ -1559,8 +1559,9 @@ int hci_dev_do_close(struct hci_dev *hdev) auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF); - if (!auto_off && hdev->dev_type == HCI_BREDR) - mgmt_powered(hdev, 0); + if (!auto_off && hdev->dev_type == HCI_BREDR && + hci_dev_test_flag(hdev, HCI_MGMT)) + __mgmt_power_off(hdev); hci_inquiry_cache_flush(hdev); hci_pend_le_actions_clear(hdev); @@ -2013,6 +2014,16 @@ static void hci_power_on(struct work_struct *work) BT_DBG("%s", hdev->name); + if (test_bit(HCI_UP, &hdev->flags) && + hci_dev_test_flag(hdev, HCI_MGMT) && + hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) { + hci_req_sync_lock(hdev); + err = __hci_req_hci_power_on(hdev); + hci_req_sync_unlock(hdev); + mgmt_power_on(hdev, err); + return; + } + err = hci_dev_do_open(hdev); if (err < 0) { hci_dev_lock(hdev); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 0abd83d..7cc24f1 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -2181,6 +2181,106 @@ static void discov_off(struct work_struct *work) mgmt_new_settings(hdev); } +static int powered_update_hci(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + struct adv_info *adv_instance; + u8 link_sec; + + hci_dev_lock(hdev); + + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) && + !lmp_host_ssp_capable(hdev)) { + u8 mode = 0x01; + + hci_req_add(req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode); + + if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) { + u8 support = 0x01; + + hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT, + sizeof(support), &support); + } + } + + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && + lmp_bredr_capable(hdev)) { + struct hci_cp_write_le_host_supported cp; + + cp.le = 0x01; + cp.simul = 0x00; + + /* Check first if we already have the right + * host state (host features set) + */ + if (cp.le != lmp_host_le_capable(hdev) || + cp.simul != lmp_host_le_br_capable(hdev)) + hci_req_add(req, HCI_OP_WRITE_LE_HOST_SUPPORTED, + sizeof(cp), &cp); + } + + if (lmp_le_capable(hdev)) { + /* Make sure the controller has a good default for + * advertising data. This also applies to the case + * where BR/EDR was toggled during the AUTO_OFF phase. + */ + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && + (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))) { + __hci_req_update_adv_data(req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(req, HCI_ADV_CURRENT); + } + + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + hdev->cur_adv_instance == 0x00 && + !list_empty(&hdev->adv_instances)) { + adv_instance = list_first_entry(&hdev->adv_instances, + struct adv_info, list); + hdev->cur_adv_instance = adv_instance->instance; + } + + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) + __hci_req_enable_advertising(req); + else if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + hdev->cur_adv_instance) + __hci_req_schedule_adv_instance(req, + hdev->cur_adv_instance, + true); + } + + link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); + if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) + hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, + sizeof(link_sec), &link_sec); + + if (lmp_bredr_capable(hdev)) { + if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) + __hci_req_write_fast_connectable(req, true); + else + __hci_req_write_fast_connectable(req, false); + __hci_req_update_scan(req); + __hci_req_update_class(req); + __hci_req_update_name(req); + __hci_req_update_eir(req); + } + + hci_dev_unlock(hdev); + return 0; +} + +int __hci_req_hci_power_on(struct hci_dev *hdev) +{ + /* Register the available SMP channels (BR/EDR and LE) only when + * successfully powering on the controller. This late + * registration is required so that LE SMP can clearly decide if + * the public address or static address is used. + */ + smp_register(hdev); + + return __hci_req_sync(hdev, powered_update_hci, 0, HCI_CMD_TIMEOUT, + NULL); +} + void hci_request_setup(struct hci_dev *hdev) { INIT_WORK(&hdev->discov_update, discov_update); diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index d3dd24d..a24d3b5 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -55,6 +55,8 @@ void hci_req_sync_cancel(struct hci_dev *hdev, int err); struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); +int __hci_req_hci_power_on(struct hci_dev *hdev); + void __hci_req_write_fast_connectable(struct hci_request *req, bool enable); void __hci_req_update_name(struct hci_request *req); void __hci_req_update_eir(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0a7e6f4..468402a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -961,17 +961,6 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) { - cancel_delayed_work(&hdev->power_off); - - if (cp->val) { - mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev, - data, len); - err = mgmt_powered(hdev, 1); - goto failed; - } - } - if (!!cp->val == hdev_is_powered(hdev)) { err = send_settings_rsp(sk, MGMT_OP_SET_POWERED, hdev); goto failed; @@ -6434,139 +6423,33 @@ static void restart_le_actions(struct hci_dev *hdev) } } -static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode) +void mgmt_power_on(struct hci_dev *hdev, int err) { struct cmd_lookup match = { NULL, hdev }; - BT_DBG("status 0x%02x", status); + BT_DBG("err %d", err); - if (!status) { + hci_dev_lock(hdev); + + if (!err) { restart_le_actions(hdev); hci_update_background_scan(hdev); } - hci_dev_lock(hdev); - mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); new_settings(hdev, match.sk); - hci_dev_unlock(hdev); - if (match.sk) sock_put(match.sk); -} -static int powered_update_hci(struct hci_dev *hdev) -{ - struct hci_request req; - struct adv_info *adv_instance; - u8 link_sec; - - hci_req_init(&req, hdev); - - if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) && - !lmp_host_ssp_capable(hdev)) { - u8 mode = 0x01; - - hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode); - - if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) { - u8 support = 0x01; - - hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, - sizeof(support), &support); - } - } - - if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && - lmp_bredr_capable(hdev)) { - struct hci_cp_write_le_host_supported cp; - - cp.le = 0x01; - cp.simul = 0x00; - - /* Check first if we already have the right - * host state (host features set) - */ - if (cp.le != lmp_host_le_capable(hdev) || - cp.simul != lmp_host_le_br_capable(hdev)) - hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED, - sizeof(cp), &cp); - } - - if (lmp_le_capable(hdev)) { - /* Make sure the controller has a good default for - * advertising data. This also applies to the case - * where BR/EDR was toggled during the AUTO_OFF phase. - */ - if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && - (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))) { - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); - __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); - } - - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && - hdev->cur_adv_instance == 0x00 && - !list_empty(&hdev->adv_instances)) { - adv_instance = list_first_entry(&hdev->adv_instances, - struct adv_info, list); - hdev->cur_adv_instance = adv_instance->instance; - } - - if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - __hci_req_enable_advertising(&req); - else if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && - hdev->cur_adv_instance) - __hci_req_schedule_adv_instance(&req, - hdev->cur_adv_instance, - true); - } - - link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); - if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) - hci_req_add(&req, HCI_OP_WRITE_AUTH_ENABLE, - sizeof(link_sec), &link_sec); - - if (lmp_bredr_capable(hdev)) { - if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) - __hci_req_write_fast_connectable(&req, true); - else - __hci_req_write_fast_connectable(&req, false); - __hci_req_update_scan(&req); - __hci_req_update_class(&req); - __hci_req_update_name(&req); - __hci_req_update_eir(&req); - } - - return hci_req_run(&req, powered_complete); + hci_dev_unlock(hdev); } -int mgmt_powered(struct hci_dev *hdev, u8 powered) +void __mgmt_power_off(struct hci_dev *hdev) { struct cmd_lookup match = { NULL, hdev }; u8 status, zero_cod[] = { 0, 0, 0 }; - int err; - - if (!hci_dev_test_flag(hdev, HCI_MGMT)) - return 0; - - if (powered) { - /* Register the available SMP channels (BR/EDR and LE) only - * when successfully powering on the controller. This late - * registration is required so that LE SMP can clearly - * decide if the public address or static address is used. - */ - smp_register(hdev); - - if (powered_update_hci(hdev) == 0) - return 0; - - mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, - &match); - goto new_settings; - } mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); @@ -6588,13 +6471,10 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, zero_cod, sizeof(zero_cod), NULL); -new_settings: - err = new_settings(hdev, match.sk); + new_settings(hdev, match.sk); if (match.sk) sock_put(match.sk); - - return err; } void mgmt_set_powered_failed(struct hci_dev *hdev, int err) -- cgit v1.1 From 02c04afea93fbba7925984df455bc63e7d92da97 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 26 Nov 2015 12:15:58 +0200 Subject: Bluetooth: Simplify read_adv_features code The code in the Read Advertising Features mgmt command handler is unnecessarily complicated. Clean it up and remove unnecessary variables & branches. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 468402a..9ce2bb2 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5788,10 +5788,10 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, { struct mgmt_rp_read_adv_features *rp; size_t rp_len; - int err, i; - bool instance; + int err; struct adv_info *adv_instance; u32 supported_flags; + u8 *instance; BT_DBG("%s", hdev->name); @@ -5801,12 +5801,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); - rp_len = sizeof(*rp); - - instance = hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE); - if (instance) - rp_len += hdev->adv_instance_cnt; - + rp_len = sizeof(*rp) + hdev->adv_instance_cnt; rp = kmalloc(rp_len, GFP_ATOMIC); if (!rp) { hci_dev_unlock(hdev); @@ -5819,19 +5814,12 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, rp->max_adv_data_len = HCI_MAX_AD_LENGTH; rp->max_scan_rsp_len = HCI_MAX_AD_LENGTH; rp->max_instances = HCI_MAX_ADV_INSTANCES; + rp->num_instances = hdev->adv_instance_cnt; - if (instance) { - i = 0; - list_for_each_entry(adv_instance, &hdev->adv_instances, list) { - if (i >= hdev->adv_instance_cnt) - break; - - rp->instance[i] = adv_instance->instance; - i++; - } - rp->num_instances = hdev->adv_instance_cnt; - } else { - rp->num_instances = 0; + instance = rp->instance; + list_for_each_entry(adv_instance, &hdev->adv_instances, list) { + *instance = adv_instance->instance; + instance++; } hci_dev_unlock(hdev); -- cgit v1.1 From 17fd08ffb5981cff2c921eb479f46b872b02b2b9 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 26 Nov 2015 12:15:59 +0200 Subject: Bluetooth: Remove unnecessary HCI_ADVERTISING_INSTANCE flag This flag just tells us whether hdev->adv_instances is empty or not. We can equally well use the list_empty() function to get this information. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 19 ++++++++----------- net/bluetooth/mgmt.c | 8 +------- 2 files changed, 9 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 7cc24f1..adfcd6f 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -822,7 +822,7 @@ static u8 get_current_adv_instance(struct hci_dev *hdev) * setting was set. When neither apply, default to the global settings, * represented by instance "0". */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + if (!list_empty(&hdev->adv_instances) && !hci_dev_test_flag(hdev, HCI_ADVERTISING)) return hdev->cur_adv_instance; @@ -1144,7 +1144,7 @@ void hci_req_reenable_advertising(struct hci_dev *hdev) u8 instance; if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + list_empty(&hdev->adv_instances)) return; instance = get_current_adv_instance(hdev); @@ -1202,7 +1202,7 @@ int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, u16 timeout; if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + list_empty(&hdev->adv_instances)) return -EPERM; if (hdev->adv_instance_timeout) @@ -1319,10 +1319,8 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, } } - if (list_empty(&hdev->adv_instances)) { + if (list_empty(&hdev->adv_instances)) hdev->cur_adv_instance = 0x00; - hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); - } if (!req || !hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_ADVERTISING)) @@ -1525,7 +1523,7 @@ static int connectable_update(struct hci_request *req, unsigned long opt) /* Update the advertising parameters if necessary */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + !list_empty(&hdev->adv_instances)) __hci_req_enable_advertising(req); __hci_update_background_scan(req); @@ -2226,13 +2224,12 @@ static int powered_update_hci(struct hci_request *req, unsigned long opt) */ if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))) { + list_empty(&hdev->adv_instances))) { __hci_req_update_adv_data(req, HCI_ADV_CURRENT); __hci_req_update_scan_rsp_data(req, HCI_ADV_CURRENT); } - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && - hdev->cur_adv_instance == 0x00 && + if (hdev->cur_adv_instance == 0x00 && !list_empty(&hdev->adv_instances)) { adv_instance = list_first_entry(&hdev->adv_instances, struct adv_info, list); @@ -2241,7 +2238,7 @@ static int powered_update_hci(struct hci_request *req, unsigned long opt) if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) __hci_req_enable_advertising(req); - else if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + else if (!list_empty(&hdev->adv_instances) && hdev->cur_adv_instance) __hci_req_schedule_adv_instance(req, hdev->cur_adv_instance, diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 9ce2bb2..03a65e8 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3734,7 +3734,6 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status, * set up earlier, then re-enable multi-instance advertising. */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) || list_empty(&hdev->adv_instances)) goto unlock; @@ -5892,9 +5891,6 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status, cmd = pending_find(MGMT_OP_ADD_ADVERTISING, hdev); - if (status) - hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); - list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) { if (!adv_instance->pending) continue; @@ -6012,8 +6008,6 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, if (hdev->adv_instance_cnt > prev_instance_cnt) mgmt_advertising_added(sk, hdev, cp->instance); - hci_dev_set_flag(hdev, HCI_ADVERTISING_INSTANCE); - if (hdev->cur_adv_instance == cp->instance) { /* If the currently advertised instance is being changed then * cancel the current advertising and schedule the next @@ -6129,7 +6123,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (!hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) { + if (list_empty(&hdev->adv_instances)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); goto unlock; -- cgit v1.1 From d6dac32e84e407ba15f257b5df2f4cb263005ab4 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 27 Nov 2015 10:52:39 +0200 Subject: Bluetooth: Fix updating wrong instance's scan_rsp data The __hci_req_update_scan_rsp_data gets the instance to be updated which should get passed to update_inst_scan_rsp_data() instead of always enabling the current instance. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index adfcd6f..edf2199 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1018,7 +1018,7 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, int instance) if (instance == HCI_ADV_CURRENT) instance = get_current_adv_instance(req->hdev); - update_inst_scan_rsp_data(req, get_current_adv_instance(req->hdev)); + update_inst_scan_rsp_data(req, instance); } static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) -- cgit v1.1 From 550a8ca765a154ca38dcd888b4f12a173e761bdc Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 27 Nov 2015 11:11:52 +0200 Subject: Bluetooth: Remove redundant check for req.cmd_q The hci_req_run() function already checks for empty cmd_q and bails out if necessary. Also, req.cmd_q should really be treated as private data of the request and not accessed directly. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index edf2199..f1529d7 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1187,8 +1187,7 @@ static void adv_timeout_expire(struct work_struct *work) if (list_empty(&hdev->adv_instances)) __hci_req_disable_advertising(&req); - if (!skb_queue_empty(&req.cmd_q)) - hci_req_run(&req, NULL); + hci_req_run(&req, NULL); unlock: hci_dev_unlock(hdev); -- cgit v1.1 From d6b7e2cddb72a87c2597af43ba9f5f2b03a2208b Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 30 Nov 2015 11:21:44 +0200 Subject: Bluetooth: Clean up advertising initialization in powered_update_hci() The logic in powered_update_hci() to initialize the advertising data & state is a bit more complicated than it needs to be. It was previously not doing anything if HCI_LE_ENABLED wasn't set, but this was not obvious by quickly looking at the code. Now the conditions for the various actions are more explicit. Another simplification is due to the fact that __hci_req_schedule_adv_instance() takes care of setting hdev->cur_adv_instance so there's no need to set it before calling the function. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index f1529d7..14db777 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -2181,7 +2181,6 @@ static void discov_off(struct work_struct *work) static int powered_update_hci(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; - struct adv_info *adv_instance; u8 link_sec; hci_dev_lock(hdev); @@ -2216,32 +2215,27 @@ static int powered_update_hci(struct hci_request *req, unsigned long opt) sizeof(cp), &cp); } - if (lmp_le_capable(hdev)) { + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { /* Make sure the controller has a good default for * advertising data. This also applies to the case * where BR/EDR was toggled during the AUTO_OFF phase. */ - if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && - (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - list_empty(&hdev->adv_instances))) { - __hci_req_update_adv_data(req, HCI_ADV_CURRENT); - __hci_req_update_scan_rsp_data(req, HCI_ADV_CURRENT); - } + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + list_empty(&hdev->adv_instances)) { + __hci_req_update_adv_data(req, 0x00); + __hci_req_update_scan_rsp_data(req, 0x00); + + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) + __hci_req_enable_advertising(req); + } else if (!list_empty(&hdev->adv_instances)) { + struct adv_info *adv_instance; - if (hdev->cur_adv_instance == 0x00 && - !list_empty(&hdev->adv_instances)) { adv_instance = list_first_entry(&hdev->adv_instances, struct adv_info, list); - hdev->cur_adv_instance = adv_instance->instance; - } - - if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - __hci_req_enable_advertising(req); - else if (!list_empty(&hdev->adv_instances) && - hdev->cur_adv_instance) __hci_req_schedule_adv_instance(req, - hdev->cur_adv_instance, + adv_instance->instance, true); + } } link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); -- cgit v1.1 From cab054ab47fa3fdf1c597a9874363680bfdab33e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 30 Nov 2015 11:21:45 +0200 Subject: Bluetooth: Clean up current advertising instance tracking We can simplify a lot of code by making sure hdev->cur_adv_instance is always up-to-date. This allows e.g. the removal of the get_current_adv_instance() helper function and the special HCI_ADV_CURRENT value. This patch also makes selecting instance 0x00 explicit in the various calls where advertising instances aren't enabled, e.g. when HCI_ADVERTISING is set or we've just finished enabling LE. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 12 +++++--- net/bluetooth/hci_request.c | 68 ++++++++++----------------------------------- net/bluetooth/hci_request.h | 8 ++---- net/bluetooth/mgmt.c | 10 ++++--- 4 files changed, 32 insertions(+), 66 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index eac3f6f..9fb443a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1769,7 +1769,7 @@ static void hci_update_scan_state(struct hci_dev *hdev, u8 scan) hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - hci_req_update_adv_data(hdev, HCI_ADV_CURRENT); + hci_req_update_adv_data(hdev, hdev->cur_adv_instance); mgmt_new_settings(hdev); } @@ -2610,9 +2610,12 @@ int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance) BT_DBG("%s removing %dMR", hdev->name, instance); - if (hdev->cur_adv_instance == instance && hdev->adv_instance_timeout) { - cancel_delayed_work(&hdev->adv_instance_expire); - hdev->adv_instance_timeout = 0; + if (hdev->cur_adv_instance == instance) { + if (hdev->adv_instance_timeout) { + cancel_delayed_work(&hdev->adv_instance_expire); + hdev->adv_instance_timeout = 0; + } + hdev->cur_adv_instance = 0x00; } list_del(&adv_instance->list); @@ -2639,6 +2642,7 @@ void hci_adv_instances_clear(struct hci_dev *hdev) } hdev->adv_instance_cnt = 0; + hdev->cur_adv_instance = 0x00; } /* This function requires the caller holds hdev->lock */ diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 14db777..9997c31 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -815,23 +815,9 @@ void hci_req_add_le_passive_scan(struct hci_request *req) &enable_cp); } -static u8 get_current_adv_instance(struct hci_dev *hdev) -{ - /* The "Set Advertising" setting supersedes the "Add Advertising" - * setting. Here we set the advertising data based on which - * setting was set. When neither apply, default to the global settings, - * represented by instance "0". - */ - if (!list_empty(&hdev->adv_instances) && - !hci_dev_test_flag(hdev, HCI_ADVERTISING)) - return hdev->cur_adv_instance; - - return 0x00; -} - static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev) { - u8 instance = get_current_adv_instance(hdev); + u8 instance = hdev->cur_adv_instance; struct adv_info *adv_instance; /* Ignore instance 0 */ @@ -890,7 +876,6 @@ void __hci_req_enable_advertising(struct hci_request *req) struct hci_cp_le_set_adv_param cp; u8 own_addr_type, enable = 0x01; bool connectable; - u8 instance; u32 flags; if (hci_conn_num(hdev, LE_LINK) > 0) @@ -906,8 +891,7 @@ void __hci_req_enable_advertising(struct hci_request *req) */ hci_dev_clear_flag(hdev, HCI_LE_ADV); - instance = get_current_adv_instance(hdev); - flags = get_adv_instance_flags(hdev, instance); + flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance); /* If the "connectable" instance flag was not set, then choose between * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. @@ -985,7 +969,7 @@ static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, return adv_instance->scan_rsp_len; } -static void update_inst_scan_rsp_data(struct hci_request *req, u8 instance) +void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) { struct hci_dev *hdev = req->hdev; struct hci_cp_le_set_scan_rsp_data cp; @@ -1013,14 +997,6 @@ static void update_inst_scan_rsp_data(struct hci_request *req, u8 instance) hci_req_add(req, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp); } -void __hci_req_update_scan_rsp_data(struct hci_request *req, int instance) -{ - if (instance == HCI_ADV_CURRENT) - instance = get_current_adv_instance(req->hdev); - - update_inst_scan_rsp_data(req, instance); -} - static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) { struct adv_info *adv_instance = NULL; @@ -1089,7 +1065,7 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) return ad_len; } -static void update_inst_adv_data(struct hci_request *req, u8 instance) +void __hci_req_update_adv_data(struct hci_request *req, u8 instance) { struct hci_dev *hdev = req->hdev; struct hci_cp_le_set_adv_data cp; @@ -1115,15 +1091,7 @@ static void update_inst_adv_data(struct hci_request *req, u8 instance) hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); } -void __hci_req_update_adv_data(struct hci_request *req, int instance) -{ - if (instance == HCI_ADV_CURRENT) - instance = get_current_adv_instance(req->hdev); - - update_inst_adv_data(req, instance); -} - -int hci_req_update_adv_data(struct hci_dev *hdev, int instance) +int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance) { struct hci_request req; @@ -1141,21 +1109,19 @@ static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) void hci_req_reenable_advertising(struct hci_dev *hdev) { struct hci_request req; - u8 instance; if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && list_empty(&hdev->adv_instances)) return; - instance = get_current_adv_instance(hdev); - hci_req_init(&req, hdev); - if (instance) { - __hci_req_schedule_adv_instance(&req, instance, true); + if (hdev->cur_adv_instance) { + __hci_req_schedule_adv_instance(&req, hdev->cur_adv_instance, + true); } else { - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); - __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); + __hci_req_update_adv_data(&req, 0x00); + __hci_req_update_scan_rsp_data(&req, 0x00); __hci_req_enable_advertising(&req); } @@ -1176,7 +1142,7 @@ static void adv_timeout_expire(struct work_struct *work) hdev->adv_instance_timeout = 0; - instance = get_current_adv_instance(hdev); + instance = hdev->cur_adv_instance; if (instance == 0x00) goto unlock; @@ -1246,8 +1212,8 @@ int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, return 0; hdev->cur_adv_instance = instance; - __hci_req_update_adv_data(req, HCI_ADV_CURRENT); - __hci_req_update_scan_rsp_data(req, HCI_ADV_CURRENT); + __hci_req_update_adv_data(req, instance); + __hci_req_update_scan_rsp_data(req, instance); __hci_req_enable_advertising(req); return 0; @@ -1301,7 +1267,6 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, if (!err) mgmt_advertising_removed(NULL, hdev, rem_inst); } - hdev->cur_adv_instance = 0x00; } else { adv_instance = hci_find_adv_instance(hdev, instance); @@ -1318,9 +1283,6 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, } } - if (list_empty(&hdev->adv_instances)) - hdev->cur_adv_instance = 0x00; - if (!req || !hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_ADVERTISING)) return; @@ -1518,7 +1480,7 @@ static int connectable_update(struct hci_request *req, unsigned long opt) * advertising flags. */ if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - __hci_req_update_adv_data(req, HCI_ADV_CURRENT); + __hci_req_update_adv_data(req, hdev->cur_adv_instance); /* Update the advertising parameters if necessary */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || @@ -1627,7 +1589,7 @@ static int discoverable_update(struct hci_request *req, unsigned long opt) * only update AD if advertising was enabled using Set Advertising. */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - __hci_req_update_adv_data(req, HCI_ADV_CURRENT); + __hci_req_update_adv_data(req, 0x00); hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index a24d3b5..64ff8c0 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -64,14 +64,12 @@ void __hci_req_update_eir(struct hci_request *req); void hci_req_add_le_scan_disable(struct hci_request *req); void hci_req_add_le_passive_scan(struct hci_request *req); -#define HCI_ADV_CURRENT (-1) - void hci_req_reenable_advertising(struct hci_dev *hdev); void __hci_req_enable_advertising(struct hci_request *req); void __hci_req_disable_advertising(struct hci_request *req); -void __hci_req_update_adv_data(struct hci_request *req, int instance); -int hci_req_update_adv_data(struct hci_dev *hdev, int instance); -void __hci_req_update_scan_rsp_data(struct hci_request *req, int instance); +void __hci_req_update_adv_data(struct hci_request *req, u8 instance); +int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance); +void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance); int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, bool force); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 03a65e8..621f6fd 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1626,8 +1626,8 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) struct hci_request req; hci_req_init(&req, hdev); - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); - __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); + __hci_req_update_adv_data(&req, 0x00); + __hci_req_update_scan_rsp_data(&req, 0x00); hci_req_run(&req, NULL); hci_update_background_scan(hdev); } @@ -3006,7 +3006,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, * no need to udpate the advertising data here. */ if (lmp_le_capable(hdev)) - __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(&req, hdev->cur_adv_instance); err = hci_req_run(&req, set_name_complete); if (err < 0) @@ -3799,6 +3799,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, bool changed; if (cp->val) { + hdev->cur_adv_instance = 0x00; changed = !hci_dev_test_and_set_flag(hdev, HCI_ADVERTISING); if (cp->val == 0x02) hci_dev_set_flag(hdev, HCI_ADVERTISING_CONNECTABLE); @@ -3846,6 +3847,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, * We cannot use update_[adv|scan_rsp]_data() here as the * HCI_ADVERTISING flag is not yet set. */ + hdev->cur_adv_instance = 0x00; __hci_req_update_adv_data(&req, 0x00); __hci_req_update_scan_rsp_data(&req, 0x00); __hci_req_enable_advertising(&req); @@ -4195,7 +4197,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) /* Since only the advertising data flags will change, there * is no need to update the scan response data. */ - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); + __hci_req_update_adv_data(&req, hdev->cur_adv_instance); err = hci_req_run(&req, set_bredr_complete); if (err < 0) -- cgit v1.1 From 2f99536a5b34d5b0f54723067d68f6cef3f0fdc6 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 3 Dec 2015 12:45:19 +0200 Subject: Bluetooth: Use continuous scanning when creating LE connections All LE connections are now triggered through a preceding passive scan and waiting for a connectable advertising report. This means we've got the best possible guarantee that the device is within range and should be able to request the controller to perform continuous scanning. This way we minimize the risk that we miss out on any advertising packets. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org # 4.3+ --- net/bluetooth/hci_conn.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index e260021..48a7eac 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -726,8 +726,12 @@ static void hci_req_add_le_create_conn(struct hci_request *req, if (hci_update_random_address(req, false, &own_addr_type)) return; + /* Set window to be the same value as the interval to enable + * continuous scanning. + */ cp.scan_interval = cpu_to_le16(hdev->le_scan_interval); - cp.scan_window = cpu_to_le16(hdev->le_scan_window); + cp.scan_window = cp.scan_interval; + bacpy(&cp.peer_addr, &conn->dst); cp.peer_addr_type = conn->dst_type; cp.own_address_type = own_addr_type; -- cgit v1.1 From acb9f911ea1f828822001d72b21f7cc06e6718c7 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 3 Dec 2015 12:45:20 +0200 Subject: Bluetooth: Don't treat connection timeout as a failure When we're doing background scanning and connection attempts it's possible we timeout trying to connect and go back to scanning again. The timeout triggers a HCI_LE_Create_Connection_Cancel which will trigger a Connection Complete with "Unknown Connection Identifier" error status. Since we go back to scanning this isn't really a failure and shouldn't be presented as such to user space through mgmt. The exception to this is if the connection attempt was due to an explicit request on an L2CAP socket (indicated by params->explicit_connect being true). Since the socket will get an error it's consistent to also notify the failure on mgmt in this case. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 48a7eac..32575b4 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -668,8 +668,16 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) conn->state = BT_CLOSED; - mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type, - status); + /* If the status indicates successful cancellation of + * the attempt (i.e. Unkown Connection Id) there's no point of + * notifying failure since we'll go back to keep trying to + * connect. The only exception is explicit connect requests + * where a timeout + cancel does indicate an actual failure. + */ + if (status != HCI_ERROR_UNKNOWN_CONN_ID || + (params && params->explicit_connect)) + mgmt_connect_failed(hdev, &conn->dst, conn->type, + conn->dst_type, status); hci_connect_cfm(conn, status); -- cgit v1.1 From 1a11ec89dba7bcfb1cb502fc5945533f317ddd03 Mon Sep 17 00:00:00 2001 From: Yichen Zhao Date: Tue, 1 Dec 2015 11:11:01 -0800 Subject: Bluetooth: Fix locking in bt_accept_dequeue after disconnection Fix a crash that may happen when bt_accept_dequeue is run after a Bluetooth connection has been disconnected. bt_accept_unlink was called after release_sock, permitting bt_accept_unlink to run twice on the same socket and cause a NULL pointer dereference. [50510.241632] BUG: unable to handle kernel NULL pointer dereference at 00000000000001a8 [50510.241694] IP: [] bt_accept_unlink+0x47/0xa0 [bluetooth] [50510.241759] PGD 0 [50510.241776] Oops: 0002 [#1] SMP [50510.241802] Modules linked in: rtl8192cu rtl_usb rtlwifi rtl8192c_common 8021q garp stp mrp llc rfcomm bnep nls_iso8859_1 intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp arc4 ath9k ath9k_common ath9k_hw ath kvm eeepc_wmi asus_wmi mac80211 snd_hda_codec_hdmi snd_hda_codec_realtek sparse_keymap crct10dif_pclmul snd_hda_codec_generic crc32_pclmul snd_hda_intel snd_hda_controller cfg80211 snd_hda_codec i915 snd_hwdep snd_pcm ghash_clmulni_intel snd_timer snd soundcore serio_raw cryptd drm_kms_helper drm i2c_algo_bit shpchp ath3k mei_me lpc_ich btusb bluetooth 6lowpan_iphc mei lp parport wmi video mac_hid psmouse ahci libahci r8169 mii [50510.242279] CPU: 0 PID: 934 Comm: krfcommd Not tainted 3.16.0-49-generic #65~14.04.1-Ubuntu [50510.242327] Hardware name: ASUSTeK Computer INC. VM40B/VM40B, BIOS 1501 12/09/2014 [50510.242370] task: ffff8800d9068a30 ti: ffff8800d7a54000 task.ti: ffff8800d7a54000 [50510.242413] RIP: 0010:[] [] bt_accept_unlink+0x47/0xa0 [bluetooth] [50510.242480] RSP: 0018:ffff8800d7a57d58 EFLAGS: 00010246 [50510.242511] RAX: 0000000000000000 RBX: ffff880119bb8c00 RCX: ffff880119bb8eb0 [50510.242552] RDX: ffff880119bb8eb0 RSI: 00000000fffffe01 RDI: ffff880119bb8c00 [50510.242592] RBP: ffff8800d7a57d60 R08: 0000000000000283 R09: 0000000000000001 [50510.242633] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800d8da9eb0 [50510.242673] R13: ffff8800d74fdb80 R14: ffff880119bb8c00 R15: ffff8800d8da9c00 [50510.242715] FS: 0000000000000000(0000) GS:ffff88011fa00000(0000) knlGS:0000000000000000 [50510.242761] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [50510.242794] CR2: 00000000000001a8 CR3: 0000000001c13000 CR4: 00000000001407f0 [50510.242835] Stack: [50510.242849] ffff880119bb8eb0 ffff8800d7a57da0 ffffffffc0124506 ffff8800d8da9eb0 [50510.242899] ffff8800d8da9c00 ffff8800d9068a30 0000000000000000 ffff8800d74fdb80 [50510.242949] ffff8800d6f85208 ffff8800d7a57e08 ffffffffc0159985 000000000000001f [50510.242999] Call Trace: [50510.243027] [] bt_accept_dequeue+0xb6/0x180 [bluetooth] [50510.243085] [] l2cap_sock_accept+0x125/0x220 [bluetooth] [50510.243128] [] ? wake_up_state+0x20/0x20 [50510.243163] [] kernel_accept+0x4e/0xa0 [50510.243200] [] rfcomm_run+0x1ad/0x890 [rfcomm] [50510.243238] [] ? rfcomm_process_rx+0x8a0/0x8a0 [rfcomm] [50510.243281] [] kthread+0xd2/0xf0 [50510.243312] [] ? kthread_create_on_node+0x1c0/0x1c0 [50510.243353] [] ret_from_fork+0x58/0x90 [50510.243387] [] ? kthread_create_on_node+0x1c0/0x1c0 [50510.243424] Code: 00 48 8b 93 b8 02 00 00 48 8d 83 b0 02 00 00 48 89 51 08 48 89 0a 48 89 83 b0 02 00 00 48 89 83 b8 02 00 00 48 8b 83 c0 02 00 00 <66> 83 a8 a8 01 00 00 01 48 c7 83 c0 02 00 00 00 00 00 00 f0 ff [50510.243685] RIP [] bt_accept_unlink+0x47/0xa0 [bluetooth] [50510.243737] RSP [50510.243758] CR2: 00000000000001a8 [50510.249457] ---[ end trace bb984f932c4e3ab3 ]--- Signed-off-by: Yichen Zhao Signed-off-by: Marcel Holtmann --- net/bluetooth/af_bluetooth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 5785e8e..cb4e8d4 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -186,8 +186,8 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) /* FIXME: Is this check still needed */ if (sk->sk_state == BT_CLOSED) { - release_sock(sk); bt_accept_unlink(sk); + release_sock(sk); continue; } -- cgit v1.1 From 5e5c08cbee7d75d026ff50a5051f2ed19b4ba301 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 9 Dec 2015 22:46:22 +0100 Subject: 6lowpan: clarify Kconfig entries for upcoming GHC support Acked-by: Jukka Rissanen Signed-off-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index 7fa0f38..6af7a46 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -6,11 +6,12 @@ menuconfig 6LOWPAN "6LoWPAN" which is supported by IEEE 802.15.4 or Bluetooth stacks. menuconfig 6LOWPAN_NHC - tristate "Next Header Compression Support" + tristate "Next Header and Generic Header Compression Support" depends on 6LOWPAN default y ---help--- - Support for next header compression. + Support for next header and generic header compression defined in + RFC6282 and RFC7400. if 6LOWPAN_NHC -- cgit v1.1 From 7e568f50c19c731938fee24a0f048f35120080f3 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 9 Dec 2015 22:46:23 +0100 Subject: 6lowpan: add nhc module for GHC hop-by-hopextension header detection Acked-by: Jukka Rissanen Signed-off-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/Kconfig | 6 ++++++ net/6lowpan/Makefile | 3 +++ net/6lowpan/nhc_ghc_ext_hop.c | 27 +++++++++++++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 net/6lowpan/nhc_ghc_ext_hop.c (limited to 'net') diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index 6af7a46..1bd49eb 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -59,4 +59,10 @@ config 6LOWPAN_NHC_UDP ---help--- 6LoWPAN IPv6 UDP Header compression according to RFC6282. +config 6LOWPAN_GHC_EXT_HDR_HOP + tristate "GHC Hop-by-Hop Options Header Support" + ---help--- + 6LoWPAN IPv6 Hop-by-Hop option generic header compression according + to RFC7400. + endif diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index c6ffc55..ba20e01 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -10,3 +10,6 @@ obj-$(CONFIG_6LOWPAN_NHC_IPV6) += nhc_ipv6.o obj-$(CONFIG_6LOWPAN_NHC_MOBILITY) += nhc_mobility.o obj-$(CONFIG_6LOWPAN_NHC_ROUTING) += nhc_routing.o obj-$(CONFIG_6LOWPAN_NHC_UDP) += nhc_udp.o + +#rfc7400 ghcs +obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_HOP) += nhc_ghc_ext_hop.o diff --git a/net/6lowpan/nhc_ghc_ext_hop.c b/net/6lowpan/nhc_ghc_ext_hop.c new file mode 100644 index 0000000..baec86f --- /dev/null +++ b/net/6lowpan/nhc_ghc_ext_hop.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN Extension Header compression according to RFC7400 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_GHC_EXT_HOP_IDLEN 1 +#define LOWPAN_GHC_EXT_HOP_ID_0 0xb0 +#define LOWPAN_GHC_EXT_HOP_MASK_0 0xfe + +static void hop_ghid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_GHC_EXT_HOP_ID_0; + nhc->idmask[0] = LOWPAN_GHC_EXT_HOP_MASK_0; +} + +LOWPAN_NHC(ghc_ext_hop, "RFC7400 Hop-by-Hop Extension Header", NEXTHDR_HOP, 0, + hop_ghid_setup, LOWPAN_GHC_EXT_HOP_IDLEN, NULL, NULL); + +module_lowpan_nhc(ghc_ext_hop); +MODULE_DESCRIPTION("6LoWPAN generic header hop-by-hop extension compression"); +MODULE_LICENSE("GPL"); -- cgit v1.1 From 70cc86752e59ec26fcd31679b1eef23e8cb4b516 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 9 Dec 2015 22:46:24 +0100 Subject: 6lowpan: add nhc module for GHC UDP detection Acked-by: Jukka Rissanen Signed-off-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/Kconfig | 5 +++++ net/6lowpan/Makefile | 1 + net/6lowpan/nhc_ghc_udp.c | 27 +++++++++++++++++++++++++++ 3 files changed, 33 insertions(+) create mode 100644 net/6lowpan/nhc_ghc_udp.c (limited to 'net') diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index 1bd49eb..94d5178 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -65,4 +65,9 @@ config 6LOWPAN_GHC_EXT_HDR_HOP 6LoWPAN IPv6 Hop-by-Hop option generic header compression according to RFC7400. +config 6LOWPAN_GHC_UDP + tristate "GHC UDP Support" + ---help--- + 6LoWPAN IPv6 UDP generic header compression according to RFC7400. + endif diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index ba20e01..5e4f2f3 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -13,3 +13,4 @@ obj-$(CONFIG_6LOWPAN_NHC_UDP) += nhc_udp.o #rfc7400 ghcs obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_HOP) += nhc_ghc_ext_hop.o +obj-$(CONFIG_6LOWPAN_GHC_UDP) += nhc_ghc_udp.o diff --git a/net/6lowpan/nhc_ghc_udp.c b/net/6lowpan/nhc_ghc_udp.c new file mode 100644 index 0000000..17beefa --- /dev/null +++ b/net/6lowpan/nhc_ghc_udp.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN UDP compression according to RFC7400 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_GHC_UDP_IDLEN 1 +#define LOWPAN_GHC_UDP_ID_0 0xd0 +#define LOWPAN_GHC_UDP_MASK_0 0xf8 + +static void udp_ghid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_GHC_UDP_ID_0; + nhc->idmask[0] = LOWPAN_GHC_UDP_MASK_0; +} + +LOWPAN_NHC(ghc_udp, "RFC7400 UDP", NEXTHDR_UDP, 0, + udp_ghid_setup, LOWPAN_GHC_UDP_IDLEN, NULL, NULL); + +module_lowpan_nhc(ghc_udp); +MODULE_DESCRIPTION("6LoWPAN generic header UDP compression"); +MODULE_LICENSE("GPL"); -- cgit v1.1 From c39da3bb5b978ca03f1702c99965f3db1204516a Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 9 Dec 2015 22:46:25 +0100 Subject: 6lowpan: add nhc module for GHC ICMPv6 detection Acked-by: Jukka Rissanen Signed-off-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/Kconfig | 5 +++++ net/6lowpan/Makefile | 1 + net/6lowpan/nhc_ghc_icmpv6.c | 27 +++++++++++++++++++++++++++ 3 files changed, 33 insertions(+) create mode 100644 net/6lowpan/nhc_ghc_icmpv6.c (limited to 'net') diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index 94d5178..0a3f5a8 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -70,4 +70,9 @@ config 6LOWPAN_GHC_UDP ---help--- 6LoWPAN IPv6 UDP generic header compression according to RFC7400. +config 6LOWPAN_GHC_ICMPV6 + tristate "GHC ICMPv6 Support" + ---help--- + 6LoWPAN IPv6 ICMPv6 generic header compression according to RFC7400. + endif diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index 5e4f2f3..86af3fd 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -14,3 +14,4 @@ obj-$(CONFIG_6LOWPAN_NHC_UDP) += nhc_udp.o #rfc7400 ghcs obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_HOP) += nhc_ghc_ext_hop.o obj-$(CONFIG_6LOWPAN_GHC_UDP) += nhc_ghc_udp.o +obj-$(CONFIG_6LOWPAN_GHC_ICMPV6) += nhc_ghc_icmpv6.o diff --git a/net/6lowpan/nhc_ghc_icmpv6.c b/net/6lowpan/nhc_ghc_icmpv6.c new file mode 100644 index 0000000..32e7c2c --- /dev/null +++ b/net/6lowpan/nhc_ghc_icmpv6.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN ICMPv6 compression according to RFC7400 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_GHC_ICMPV6_IDLEN 1 +#define LOWPAN_GHC_ICMPV6_ID_0 0xdf +#define LOWPAN_GHC_ICMPV6_MASK_0 0xff + +static void icmpv6_ghid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_GHC_ICMPV6_ID_0; + nhc->idmask[0] = LOWPAN_GHC_ICMPV6_MASK_0; +} + +LOWPAN_NHC(ghc_icmpv6, "RFC7400 ICMPv6", NEXTHDR_ICMP, 0, + icmpv6_ghid_setup, LOWPAN_GHC_ICMPV6_IDLEN, NULL, NULL); + +module_lowpan_nhc(ghc_icmpv6); +MODULE_DESCRIPTION("6LoWPAN generic header ICMPv6 compression"); +MODULE_LICENSE("GPL"); -- cgit v1.1 From 20616a5a1e3bb47c385c6d5f27520e7a3cc82864 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 9 Dec 2015 22:46:26 +0100 Subject: 6lowpan: add nhc module for GHC destination extension header detection Acked-by: Jukka Rissanen Signed-off-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/Kconfig | 6 ++++++ net/6lowpan/Makefile | 1 + net/6lowpan/nhc_ghc_ext_dest.c | 27 +++++++++++++++++++++++++++ 3 files changed, 34 insertions(+) create mode 100644 net/6lowpan/nhc_ghc_ext_dest.c (limited to 'net') diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index 0a3f5a8..e5184e6 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -75,4 +75,10 @@ config 6LOWPAN_GHC_ICMPV6 ---help--- 6LoWPAN IPv6 ICMPv6 generic header compression according to RFC7400. +config 6LOWPAN_GHC_EXT_HDR_DEST + tristate "GHC Destination Options Header Support" + ---help--- + 6LoWPAN IPv6 destination option generic header compression according + to RFC7400. + endif diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index 86af3fd..fc4bac0 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -15,3 +15,4 @@ obj-$(CONFIG_6LOWPAN_NHC_UDP) += nhc_udp.o obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_HOP) += nhc_ghc_ext_hop.o obj-$(CONFIG_6LOWPAN_GHC_UDP) += nhc_ghc_udp.o obj-$(CONFIG_6LOWPAN_GHC_ICMPV6) += nhc_ghc_icmpv6.o +obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_DEST) += nhc_ghc_ext_dest.o diff --git a/net/6lowpan/nhc_ghc_ext_dest.c b/net/6lowpan/nhc_ghc_ext_dest.c new file mode 100644 index 0000000..9887b3a --- /dev/null +++ b/net/6lowpan/nhc_ghc_ext_dest.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN Extension Header compression according to RFC7400 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_GHC_EXT_DEST_IDLEN 1 +#define LOWPAN_GHC_EXT_DEST_ID_0 0xb6 +#define LOWPAN_GHC_EXT_DEST_MASK_0 0xfe + +static void dest_ghid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_GHC_EXT_DEST_ID_0; + nhc->idmask[0] = LOWPAN_GHC_EXT_DEST_MASK_0; +} + +LOWPAN_NHC(ghc_ext_dest, "RFC7400 Destination Extension Header", NEXTHDR_DEST, + 0, dest_ghid_setup, LOWPAN_GHC_EXT_DEST_IDLEN, NULL, NULL); + +module_lowpan_nhc(ghc_ext_dest); +MODULE_DESCRIPTION("6LoWPAN generic header destination extension compression"); +MODULE_LICENSE("GPL"); -- cgit v1.1 From 2f4799478c94928802c79edd12711a0e9e8b6f1b Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 9 Dec 2015 22:46:27 +0100 Subject: 6lowpan: add nhc module for GHC fragmentation extension header detection Acked-by: Jukka Rissanen Signed-off-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/Kconfig | 6 ++++++ net/6lowpan/Makefile | 1 + net/6lowpan/nhc_ghc_ext_frag.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+) create mode 100644 net/6lowpan/nhc_ghc_ext_frag.c (limited to 'net') diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index e5184e6..13abcc5 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -81,4 +81,10 @@ config 6LOWPAN_GHC_EXT_HDR_DEST 6LoWPAN IPv6 destination option generic header compression according to RFC7400. +config 6LOWPAN_GHC_EXT_HDR_FRAG + tristate "GHC Fragmentation Options Header Support" + ---help--- + 6LoWPAN IPv6 fragmentation option generic header compression + according to RFC7400. + endif diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index fc4bac0..fb3f48d 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -16,3 +16,4 @@ obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_HOP) += nhc_ghc_ext_hop.o obj-$(CONFIG_6LOWPAN_GHC_UDP) += nhc_ghc_udp.o obj-$(CONFIG_6LOWPAN_GHC_ICMPV6) += nhc_ghc_icmpv6.o obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_DEST) += nhc_ghc_ext_dest.o +obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG) += nhc_ghc_ext_frag.o diff --git a/net/6lowpan/nhc_ghc_ext_frag.c b/net/6lowpan/nhc_ghc_ext_frag.c new file mode 100644 index 0000000..1308b79 --- /dev/null +++ b/net/6lowpan/nhc_ghc_ext_frag.c @@ -0,0 +1,28 @@ +/* + * 6LoWPAN Extension Header compression according to RFC7400 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_GHC_EXT_FRAG_IDLEN 1 +#define LOWPAN_GHC_EXT_FRAG_ID_0 0xb4 +#define LOWPAN_GHC_EXT_FRAG_MASK_0 0xfe + +static void frag_ghid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_GHC_EXT_FRAG_ID_0; + nhc->idmask[0] = LOWPAN_GHC_EXT_FRAG_MASK_0; +} + +LOWPAN_NHC(ghc_ext_frag, "RFC7400 Fragmentation Extension Header", + NEXTHDR_FRAGMENT, 0, frag_ghid_setup, + LOWPAN_GHC_EXT_FRAG_IDLEN, NULL, NULL); + +module_lowpan_nhc(ghc_ext_frag); +MODULE_DESCRIPTION("6LoWPAN generic header fragmentation extension compression"); +MODULE_LICENSE("GPL"); -- cgit v1.1 From 43f26e17d02f5c772cedc3ee16b192ed79764474 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Wed, 9 Dec 2015 22:46:28 +0100 Subject: 6lowpan: add nhc module for GHC routing extension header detection Acked-by: Jukka Rissanen Signed-off-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/Kconfig | 6 ++++++ net/6lowpan/Makefile | 1 + net/6lowpan/nhc_ghc_ext_route.c | 27 +++++++++++++++++++++++++++ 3 files changed, 34 insertions(+) create mode 100644 net/6lowpan/nhc_ghc_ext_route.c (limited to 'net') diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index 13abcc5..bcb9d8a 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -87,4 +87,10 @@ config 6LOWPAN_GHC_EXT_HDR_FRAG 6LoWPAN IPv6 fragmentation option generic header compression according to RFC7400. +config 6LOWPAN_GHC_EXT_HDR_ROUTE + tristate "GHC Routing Options Header Support" + ---help--- + 6LoWPAN IPv6 routing option generic header compression according + to RFC7400. + endif diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index fb3f48d..9e35a5d 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_6LOWPAN_GHC_UDP) += nhc_ghc_udp.o obj-$(CONFIG_6LOWPAN_GHC_ICMPV6) += nhc_ghc_icmpv6.o obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_DEST) += nhc_ghc_ext_dest.o obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_FRAG) += nhc_ghc_ext_frag.o +obj-$(CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE) += nhc_ghc_ext_route.o diff --git a/net/6lowpan/nhc_ghc_ext_route.c b/net/6lowpan/nhc_ghc_ext_route.c new file mode 100644 index 0000000..d7e5bd7 --- /dev/null +++ b/net/6lowpan/nhc_ghc_ext_route.c @@ -0,0 +1,27 @@ +/* + * 6LoWPAN Extension Header compression according to RFC7400 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "nhc.h" + +#define LOWPAN_GHC_EXT_ROUTE_IDLEN 1 +#define LOWPAN_GHC_EXT_ROUTE_ID_0 0xb2 +#define LOWPAN_GHC_EXT_ROUTE_MASK_0 0xfe + +static void route_ghid_setup(struct lowpan_nhc *nhc) +{ + nhc->id[0] = LOWPAN_GHC_EXT_ROUTE_ID_0; + nhc->idmask[0] = LOWPAN_GHC_EXT_ROUTE_MASK_0; +} + +LOWPAN_NHC(ghc_ext_route, "RFC7400 Routing Extension Header", NEXTHDR_ROUTING, + 0, route_ghid_setup, LOWPAN_GHC_EXT_ROUTE_IDLEN, NULL, NULL); + +module_lowpan_nhc(ghc_ext_route); +MODULE_DESCRIPTION("6LoWPAN generic header routing extension compression"); +MODULE_LICENSE("GPL"); -- cgit v1.1 From 00f59314111a6b18ee65b238b38c470dbdbf3be5 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 9 Dec 2015 22:46:29 +0100 Subject: 6lowpan: add lowpan dev register helpers This patch introduces register and unregister functionality for lowpan interfaces. While register a lowpan interface there are several things which need to be initialize by the 6lowpan subsystem. Upcoming functionality need to register/unregister per interface components e.g. debugfs entry. Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/core.c | 33 +++++++++++++++++++++++++++++++-- net/bluetooth/6lowpan.c | 8 +++----- net/ieee802154/6lowpan/core.c | 6 ++---- 3 files changed, 36 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c index 83b19e0..80fc509 100644 --- a/net/6lowpan/core.c +++ b/net/6lowpan/core.c @@ -15,7 +15,8 @@ #include -void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype) +int lowpan_register_netdevice(struct net_device *dev, + enum lowpan_lltypes lltype) { dev->addr_len = EUI64_ADDR_LEN; dev->type = ARPHRD_6LOWPAN; @@ -23,8 +24,36 @@ void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype) dev->priv_flags |= IFF_NO_QUEUE; lowpan_priv(dev)->lltype = lltype; + + return register_netdevice(dev); +} +EXPORT_SYMBOL(lowpan_register_netdevice); + +int lowpan_register_netdev(struct net_device *dev, + enum lowpan_lltypes lltype) +{ + int ret; + + rtnl_lock(); + ret = lowpan_register_netdevice(dev, lltype); + rtnl_unlock(); + return ret; +} +EXPORT_SYMBOL(lowpan_register_netdev); + +void lowpan_unregister_netdevice(struct net_device *dev) +{ + unregister_netdevice(dev); +} +EXPORT_SYMBOL(lowpan_unregister_netdevice); + +void lowpan_unregister_netdev(struct net_device *dev) +{ + rtnl_lock(); + lowpan_unregister_netdevice(dev); + rtnl_unlock(); } -EXPORT_SYMBOL(lowpan_netdev_setup); +EXPORT_SYMBOL(lowpan_unregister_netdev); static int __init lowpan_module_init(void) { diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 9e9cca3..d040365 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -825,9 +825,7 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev) list_add_rcu(&(*dev)->list, &bt_6lowpan_devices); spin_unlock(&devices_lock); - lowpan_netdev_setup(netdev, LOWPAN_LLTYPE_BTLE); - - err = register_netdev(netdev); + err = lowpan_register_netdev(netdev, LOWPAN_LLTYPE_BTLE); if (err < 0) { BT_INFO("register_netdev failed %d", err); spin_lock(&devices_lock); @@ -890,7 +888,7 @@ static void delete_netdev(struct work_struct *work) struct lowpan_dev *entry = container_of(work, struct lowpan_dev, delete_netdev); - unregister_netdev(entry->netdev); + lowpan_unregister_netdev(entry->netdev); /* The entry pointer is deleted by the netdev destructor. */ } @@ -1348,7 +1346,7 @@ static void disconnect_devices(void) ifdown(entry->netdev); BT_DBG("Unregistering netdev %s %p", entry->netdev->name, entry->netdev); - unregister_netdev(entry->netdev); + lowpan_unregister_netdev(entry->netdev); kfree(entry); } } diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 20c49c7..737c87a 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -161,9 +161,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev, wdev->needed_headroom; ldev->needed_tailroom = wdev->needed_tailroom; - lowpan_netdev_setup(ldev, LOWPAN_LLTYPE_IEEE802154); - - ret = register_netdevice(ldev); + ret = lowpan_register_netdevice(ldev, LOWPAN_LLTYPE_IEEE802154); if (ret < 0) { dev_put(wdev); return ret; @@ -180,7 +178,7 @@ static void lowpan_dellink(struct net_device *ldev, struct list_head *head) ASSERT_RTNL(); wdev->ieee802154_ptr->lowpan_dev = NULL; - unregister_netdevice(ldev); + lowpan_unregister_netdevice(ldev); dev_put(wdev); } -- cgit v1.1 From b1815fd949e5bd06d118019acf68f87c9414f705 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 9 Dec 2015 22:46:30 +0100 Subject: 6lowpan: add debugfs support This patch will introduce a 6lowpan entry into the debugfs if enabled. Inside this 6lowpan directory we create a subdirectories of all 6lowpan interfaces to offer a per interface debugfs support. Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/6lowpan/6lowpan_i.h | 28 ++++++++++++++++++++++++++ net/6lowpan/Kconfig | 8 ++++++++ net/6lowpan/Makefile | 1 + net/6lowpan/core.c | 28 +++++++++++++++++++++++++- net/6lowpan/debugfs.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 net/6lowpan/6lowpan_i.h create mode 100644 net/6lowpan/debugfs.c (limited to 'net') diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h new file mode 100644 index 0000000..d16bb4b --- /dev/null +++ b/net/6lowpan/6lowpan_i.h @@ -0,0 +1,28 @@ +#ifndef __6LOWPAN_I_H +#define __6LOWPAN_I_H + +#include + +#ifdef CONFIG_6LOWPAN_DEBUGFS +int lowpan_dev_debugfs_init(struct net_device *dev); +void lowpan_dev_debugfs_exit(struct net_device *dev); + +int __init lowpan_debugfs_init(void); +void lowpan_debugfs_exit(void); +#else +static inline int lowpan_dev_debugfs_init(struct net_device *dev) +{ + return 0; +} + +static inline void lowpan_dev_debugfs_exit(struct net_device *dev) { } + +static inline int __init lowpan_debugfs_init(void) +{ + return 0; +} + +static inline void lowpan_debugfs_exit(void) { } +#endif /* CONFIG_6LOWPAN_DEBUGFS */ + +#endif /* __6LOWPAN_I_H */ diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index bcb9d8a..9c05151 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -5,6 +5,14 @@ menuconfig 6LOWPAN This enables IPv6 over Low power Wireless Personal Area Network - "6LoWPAN" which is supported by IEEE 802.15.4 or Bluetooth stacks. +config 6LOWPAN_DEBUGFS + bool "6LoWPAN debugfs support" + depends on 6LOWPAN + depends on DEBUG_FS + ---help--- + This enables 6LoWPAN debugfs support. For example to manipulate + IPHC context information at runtime. + menuconfig 6LOWPAN_NHC tristate "Next Header and Generic Header Compression Support" depends on 6LOWPAN diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index 9e35a5d..e44f3bf 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_6LOWPAN) += 6lowpan.o 6lowpan-y := core.o iphc.o nhc.o +6lowpan-$(CONFIG_6LOWPAN_DEBUGFS) += debugfs.o #rfc6282 nhcs obj-$(CONFIG_6LOWPAN_NHC_DEST) += nhc_dest.o diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c index 80fc509..c7f06f5 100644 --- a/net/6lowpan/core.c +++ b/net/6lowpan/core.c @@ -15,9 +15,13 @@ #include +#include "6lowpan_i.h" + int lowpan_register_netdevice(struct net_device *dev, enum lowpan_lltypes lltype) { + int ret; + dev->addr_len = EUI64_ADDR_LEN; dev->type = ARPHRD_6LOWPAN; dev->mtu = IPV6_MIN_MTU; @@ -25,7 +29,15 @@ int lowpan_register_netdevice(struct net_device *dev, lowpan_priv(dev)->lltype = lltype; - return register_netdevice(dev); + ret = lowpan_dev_debugfs_init(dev); + if (ret < 0) + return ret; + + ret = register_netdevice(dev); + if (ret < 0) + lowpan_dev_debugfs_exit(dev); + + return ret; } EXPORT_SYMBOL(lowpan_register_netdevice); @@ -44,6 +56,7 @@ EXPORT_SYMBOL(lowpan_register_netdev); void lowpan_unregister_netdevice(struct net_device *dev) { unregister_netdevice(dev); + lowpan_dev_debugfs_exit(dev); } EXPORT_SYMBOL(lowpan_unregister_netdevice); @@ -57,6 +70,12 @@ EXPORT_SYMBOL(lowpan_unregister_netdev); static int __init lowpan_module_init(void) { + int ret; + + ret = lowpan_debugfs_init(); + if (ret < 0) + return ret; + request_module_nowait("ipv6"); request_module_nowait("nhc_dest"); @@ -69,6 +88,13 @@ static int __init lowpan_module_init(void) return 0; } + +static void __exit lowpan_module_exit(void) +{ + lowpan_debugfs_exit(); +} + module_init(lowpan_module_init); +module_exit(lowpan_module_exit); MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c new file mode 100644 index 0000000..88eef84 --- /dev/null +++ b/net/6lowpan/debugfs.c @@ -0,0 +1,53 @@ +/* This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Authors: + * (C) 2015 Pengutronix, Alexander Aring + * Copyright (c) 2015 Nordic Semiconductor. All Rights Reserved. + */ + +#include + +#include "6lowpan_i.h" + +static struct dentry *lowpan_debugfs; + +int lowpan_dev_debugfs_init(struct net_device *dev) +{ + struct lowpan_priv *lpriv = lowpan_priv(dev); + + /* creating the root */ + lpriv->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs); + if (!lpriv->iface_debugfs) + goto fail; + + return 0; + +fail: + return -EINVAL; +} + +void lowpan_dev_debugfs_exit(struct net_device *dev) +{ + debugfs_remove_recursive(lowpan_priv(dev)->iface_debugfs); +} + +int __init lowpan_debugfs_init(void) +{ + lowpan_debugfs = debugfs_create_dir("6lowpan", NULL); + if (!lowpan_debugfs) + return -EINVAL; + + return 0; +} + +void lowpan_debugfs_exit(void) +{ + debugfs_remove_recursive(lowpan_debugfs); +} -- cgit v1.1 From c38383530fb5e160b739aff4bf08c1cc2dfcc659 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 9 Dec 2015 23:23:56 +0100 Subject: mac802154: tx: fix synced xmit deadlock This patch reverts 6001d52 ("mac802154: tx: don't allow if down while sync tx"). This has side effects with stop callback which flush the transmit workqueue. The stop callback will wait until the workqueue is flushed and holding the rtnl lock. That means it can happen that the stop callback waits forever because it try to lock the rtnl mutex which is already hold by stop callback. Cc: Michael Hennerich Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/mac802154/driver-ops.h | 3 --- net/mac802154/tx.c | 9 --------- 2 files changed, 12 deletions(-) (limited to 'net') diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h index 0550f33..fd9daf2 100644 --- a/net/mac802154/driver-ops.h +++ b/net/mac802154/driver-ops.h @@ -18,9 +18,6 @@ drv_xmit_async(struct ieee802154_local *local, struct sk_buff *skb) static inline int drv_xmit_sync(struct ieee802154_local *local, struct sk_buff *skb) { - /* don't allow other operations while sync xmit */ - ASSERT_RTNL(); - might_sleep(); return local->ops->xmit_sync(&local->hw, skb); diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index 3827f35..7e25345 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -38,12 +38,6 @@ void ieee802154_xmit_worker(struct work_struct *work) struct net_device *dev = skb->dev; int res; - rtnl_lock(); - - /* check if ifdown occurred while schedule */ - if (!netif_running(dev)) - goto err_tx; - res = drv_xmit_sync(local, skb); if (res) goto err_tx; @@ -53,14 +47,11 @@ void ieee802154_xmit_worker(struct work_struct *work) dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; - rtnl_unlock(); - return; err_tx: /* Restart the netif queue on each sub_if_data object. */ ieee802154_wake_queue(&local->hw); - rtnl_unlock(); kfree_skb(skb); netdev_dbg(dev, "transmission failed\n"); } -- cgit v1.1 From 4ada1282d86865671abdfcf9410b895af8491213 Mon Sep 17 00:00:00 2001 From: Danny Schweizer Date: Fri, 11 Dec 2015 10:04:54 +0100 Subject: Bluetooth: Do not filter multicast addresses by default A Linux PC is connected with another device over Bluetooth PAN using a BNEP interface. Whenever a packet is tried to be sent over the BNEP interface, the function "bnep_net_xmit()" in "net/bluetooth/bnep/netdev.c" is called. This function calls "bnep_net_mc_filter()", which checks (if the destination address is multicast) if the address is set in a certain multicast filter (&s->mc_filter). If it is not, then it is not sent out. This filter is only changed in two other functions, found in net/bluetooth/bnep/core.c": in "bnep_ctrl_set_mc_filter()", which is only called if a message of type "BNEP_FILTER_MULTI_ADDR_SET" is received. Otherwise, it is set in "bnep_add_connection()", where it is set to a default value which only adds the broadcast address to the filter: set_bit(bnep_mc_hash(dev->broadcast), (ulong *) &s->mc_filter); To sum up, if the BNEP interface does not receive any message of type "BNEP_FILTER_MULTI_ADDR_SET", it will not send out any messages with multicast destination addresses except for broadcast. However, in the BNEP specification (page 27 in http://grouper.ieee.org/groups/802/15/Bluetooth/BNEP.pdf), it is said that per default, all multicast addresses should not be filtered, i.e. the BNEP interface should be able to send packets with any multicast destination address. It seems that the default case is wrong: the multicast filter should not block almost all multicast addresses, but should not filter out any. This leads to the problem that e.g. Neighbor Solicitation messages sent with Bluetooth PAN over the BNEP interface to a multicast destination address other than broadcast are blocked and not sent out. Therefore, in the default case, we set the mc_filter to ~0LL to not filter out any multicast addresses. Signed-off-by: Danny Schweizer Signed-off-by: Marcel Holtmann --- net/bluetooth/bnep/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 1641367..fbf251f 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -608,8 +608,11 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) s->msg.msg_flags = MSG_NOSIGNAL; #ifdef CONFIG_BT_BNEP_MC_FILTER - /* Set default mc filter */ - set_bit(bnep_mc_hash(dev->broadcast), (ulong *) &s->mc_filter); + /* Set default mc filter to not filter out any mc addresses + * as defined in the BNEP specification (revision 0.95a) + * http://grouper.ieee.org/groups/802/15/Bluetooth/BNEP.pdf + */ + s->mc_filter = ~0LL; #endif #ifdef CONFIG_BT_BNEP_PROTO_FILTER -- cgit v1.1 From 6e71b29908e9e9bffc03b8e991c9e58a0fa92d9c Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 9 Dec 2015 06:56:41 -0800 Subject: mpls_iptunnel: add static qualifier to mpls_output This gets rid of the following compile warn: net/mpls/mpls_iptunnel.c:40:5: warning: no previous prototype for mpls_output [-Wmissing-prototypes] Signed-off-by: Roopa Prabhu Acked-by: Robert Shearman Signed-off-by: David S. Miller --- net/mpls/mpls_iptunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 67591ae..cdd01e6 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -37,7 +37,7 @@ static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en) return en->labels * sizeof(struct mpls_shim_hdr); } -int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb) +static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct mpls_iptunnel_encap *tun_encap_info; struct mpls_shim_hdr *hdr; -- cgit v1.1 From 19576c9478682a398276c994ea0d2696474df32b Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 9 Dec 2015 14:07:40 +0100 Subject: netfilter: cttimeout: add netns support Add a per-netns list of timeout objects and adjust code to use it. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_timeout.c | 2 +- net/netfilter/nfnetlink_cttimeout.c | 82 ++++++++++++++++++++++-------------- net/netfilter/xt_CT.c | 2 +- 3 files changed, 53 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index 93da609..26e7420 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -25,7 +25,7 @@ #include struct ctnl_timeout * -(*nf_ct_timeout_find_get_hook)(const char *name) __read_mostly; +(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name) __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook); void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout) __read_mostly; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index c7a2d0e..3921d54 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -38,8 +38,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning"); -static LIST_HEAD(cttimeout_list); - static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING, .len = CTNL_TIMEOUT_NAME_MAX - 1}, @@ -90,7 +88,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); - list_for_each_entry(timeout, &cttimeout_list, head) { + list_for_each_entry(timeout, &net->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -145,7 +143,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, timeout->l3num = l3num; timeout->l4proto = l4proto; atomic_set(&timeout->refcnt, 1); - list_add_tail_rcu(&timeout->head, &cttimeout_list); + list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list); return 0; err: @@ -209,6 +207,7 @@ nla_put_failure: static int ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct ctnl_timeout *cur, *last; if (cb->args[2]) @@ -219,7 +218,7 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[1] = 0; rcu_read_lock(); - list_for_each_entry_rcu(cur, &cttimeout_list, head) { + list_for_each_entry_rcu(cur, &net->nfct_timeout_list, head) { if (last) { if (cur != last) continue; @@ -245,6 +244,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(skb->sk); int ret = -ENOENT; char *name; struct ctnl_timeout *cur; @@ -260,7 +260,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, return -EINVAL; name = nla_data(cda[CTA_TIMEOUT_NAME]); - list_for_each_entry(cur, &cttimeout_list, head) { + list_for_each_entry(cur, &net->nfct_timeout_list, head) { struct sk_buff *skb2; if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) @@ -301,17 +301,17 @@ static void untimeout(struct nf_conntrack_tuple_hash *i, RCU_INIT_POINTER(timeout_ext->timeout, NULL); } -static void ctnl_untimeout(struct ctnl_timeout *timeout) +static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) { struct nf_conntrack_tuple_hash *h; const struct hlist_nulls_node *nn; int i; local_bh_disable(); - for (i = 0; i < init_net.ct.htable_size; i++) { + for (i = 0; i < net->ct.htable_size; i++) { spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); - if (i < init_net.ct.htable_size) { - hlist_nulls_for_each_entry(h, nn, &init_net.ct.hash[i], hnnode) + if (i < net->ct.htable_size) { + hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) untimeout(h, timeout); } spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); @@ -320,7 +320,7 @@ static void ctnl_untimeout(struct ctnl_timeout *timeout) } /* try to delete object, fail if it is still in use. */ -static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) +static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) { int ret = 0; @@ -329,7 +329,7 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); nf_ct_l4proto_put(timeout->l4proto); - ctnl_untimeout(timeout); + ctnl_untimeout(net, timeout); kfree_rcu(timeout, rcu_head); } else { /* still in use, restore reference counter. */ @@ -344,23 +344,24 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(skb->sk); char *name; struct ctnl_timeout *cur; int ret = -ENOENT; if (!cda[CTA_TIMEOUT_NAME]) { - list_for_each_entry(cur, &cttimeout_list, head) - ctnl_timeout_try_del(cur); + list_for_each_entry(cur, &net->nfct_timeout_list, head) + ctnl_timeout_try_del(net, cur); return 0; } name = nla_data(cda[CTA_TIMEOUT_NAME]); - list_for_each_entry(cur, &cttimeout_list, head) { + list_for_each_entry(cur, &net->nfct_timeout_list, head) { if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; - ret = ctnl_timeout_try_del(cur); + ret = ctnl_timeout_try_del(net, cur); if (ret < 0) return ret; @@ -511,12 +512,13 @@ err: } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -static struct ctnl_timeout *ctnl_timeout_find_get(const char *name) +static struct ctnl_timeout * +ctnl_timeout_find_get(struct net *net, const char *name) { struct ctnl_timeout *timeout, *matching = NULL; rcu_read_lock(); - list_for_each_entry_rcu(timeout, &cttimeout_list, head) { + list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -569,10 +571,39 @@ static const struct nfnetlink_subsystem cttimeout_subsys = { MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT); +static int __net_init cttimeout_net_init(struct net *net) +{ + INIT_LIST_HEAD(&net->nfct_timeout_list); + + return 0; +} + +static void __net_exit cttimeout_net_exit(struct net *net) +{ + struct ctnl_timeout *cur, *tmp; + + ctnl_untimeout(net, NULL); + + list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { + list_del_rcu(&cur->head); + nf_ct_l4proto_put(cur->l4proto); + kfree_rcu(cur, rcu_head); + } +} + +static struct pernet_operations cttimeout_ops = { + .init = cttimeout_net_init, + .exit = cttimeout_net_exit, +}; + static int __init cttimeout_init(void) { int ret; + ret = register_pernet_subsys(&cttimeout_ops); + if (ret < 0) + return ret; + ret = nfnetlink_subsys_register(&cttimeout_subsys); if (ret < 0) { pr_err("cttimeout_init: cannot register cttimeout with " @@ -586,28 +617,17 @@ static int __init cttimeout_init(void) return 0; err_out: + unregister_pernet_subsys(&cttimeout_ops); return ret; } static void __exit cttimeout_exit(void) { - struct ctnl_timeout *cur, *tmp; - pr_info("cttimeout: unregistering from nfnetlink.\n"); nfnetlink_subsys_unregister(&cttimeout_subsys); - /* Make sure no conntrack objects refer to custom timeouts anymore. */ - ctnl_untimeout(NULL); - - list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) { - list_del_rcu(&cur->head); - /* We are sure that our objects have no clients at this point, - * it's safe to release them all without checking refcnt. - */ - nf_ct_l4proto_put(cur->l4proto); - kfree_rcu(cur, rcu_head); - } + unregister_pernet_subsys(&cttimeout_ops); #ifdef CONFIG_NF_CONNTRACK_TIMEOUT RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index e7ac07e..6669e68 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -143,7 +143,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, goto out; } - timeout = timeout_find_get(timeout_name); + timeout = timeout_find_get(par->net, timeout_name); if (timeout == NULL) { ret = -ENOENT; pr_info("No such timeout policy \"%s\"\n", timeout_name); -- cgit v1.1 From 4ec8ff0edccffe7a77f18e2a1e2ce86f03e08b5c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 17:38:54 -0500 Subject: netfilter: prepare xt_cgroup for multi revisions xt_cgroup will grow cgroup2 path based match. Postfix existing symbols with _v0 and prepare for multi revision registration. Signed-off-by: Tejun Heo Cc: Daniel Borkmann Cc: Daniel Wagner CC: Neil Horman Cc: Jan Engelhardt Cc: Pablo Neira Ayuso Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_cgroup.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 54eaeb4..1730025 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -24,9 +24,9 @@ MODULE_DESCRIPTION("Xtables: process control group matching"); MODULE_ALIAS("ipt_cgroup"); MODULE_ALIAS("ip6t_cgroup"); -static int cgroup_mt_check(const struct xt_mtchk_param *par) +static int cgroup_mt_check_v0(const struct xt_mtchk_param *par) { - struct xt_cgroup_info *info = par->matchinfo; + struct xt_cgroup_info_v0 *info = par->matchinfo; if (info->invert & ~1) return -EINVAL; @@ -35,9 +35,9 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par) } static bool -cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) +cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { - const struct xt_cgroup_info *info = par->matchinfo; + const struct xt_cgroup_info_v0 *info = par->matchinfo; if (skb->sk == NULL || !sk_fullsock(skb->sk)) return false; @@ -46,27 +46,29 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) info->invert; } -static struct xt_match cgroup_mt_reg __read_mostly = { - .name = "cgroup", - .revision = 0, - .family = NFPROTO_UNSPEC, - .checkentry = cgroup_mt_check, - .match = cgroup_mt, - .matchsize = sizeof(struct xt_cgroup_info), - .me = THIS_MODULE, - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_IN), +static struct xt_match cgroup_mt_reg[] __read_mostly = { + { + .name = "cgroup", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = cgroup_mt_check_v0, + .match = cgroup_mt_v0, + .matchsize = sizeof(struct xt_cgroup_info_v0), + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN), + }, }; static int __init cgroup_mt_init(void) { - return xt_register_match(&cgroup_mt_reg); + return xt_register_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg)); } static void __exit cgroup_mt_exit(void) { - xt_unregister_match(&cgroup_mt_reg); + xt_unregister_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg)); } module_init(cgroup_mt_init); -- cgit v1.1 From c38c4597e4bf3e99860eac98211748e1ecb0e139 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 17:38:55 -0500 Subject: netfilter: implement xt_cgroup cgroup2 path match This patch implements xt_cgroup path match which matches cgroup2 membership of the associated socket. The match is recursive and invertible. For rationales on introducing another cgroup based match, please refer to a preceding commit "sock, cgroup: add sock->sk_cgroup". v3: Folded into xt_cgroup as a new revision interface as suggested by Pablo. v2: Included linux/limits.h from xt_cgroup2.h for PATH_MAX. Added explicit alignment to the priv field. Both suggested by Jan. Signed-off-by: Tejun Heo Cc: Daniel Borkmann Cc: Daniel Wagner CC: Neil Horman Cc: Jan Engelhardt Cc: Pablo Neira Ayuso Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_cgroup.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'net') diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 1730025..a086a91 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -34,6 +34,37 @@ static int cgroup_mt_check_v0(const struct xt_mtchk_param *par) return 0; } +static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) +{ + struct xt_cgroup_info_v1 *info = par->matchinfo; + struct cgroup *cgrp; + + if ((info->invert_path & ~1) || (info->invert_classid & ~1)) + return -EINVAL; + + if (!info->has_path && !info->has_classid) { + pr_info("xt_cgroup: no path or classid specified\n"); + return -EINVAL; + } + + if (info->has_path && info->has_classid) { + pr_info("xt_cgroup: both path and classid specified\n"); + return -EINVAL; + } + + if (info->has_path) { + cgrp = cgroup_get_from_path(info->path); + if (IS_ERR(cgrp)) { + pr_info("xt_cgroup: invalid path, errno=%ld\n", + PTR_ERR(cgrp)); + return -EINVAL; + } + info->priv = cgrp; + } + + return 0; +} + static bool cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { @@ -46,6 +77,31 @@ cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) info->invert; } +static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_cgroup_info_v1 *info = par->matchinfo; + struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data; + struct cgroup *ancestor = info->priv; + + if (!skb->sk || !sk_fullsock(skb->sk)) + return false; + + if (ancestor) + return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^ + info->invert_path; + else + return (info->classid == sock_cgroup_classid(skcd)) ^ + info->invert_classid; +} + +static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par) +{ + struct xt_cgroup_info_v1 *info = par->matchinfo; + + if (info->priv) + cgroup_put(info->priv); +} + static struct xt_match cgroup_mt_reg[] __read_mostly = { { .name = "cgroup", @@ -59,6 +115,19 @@ static struct xt_match cgroup_mt_reg[] __read_mostly = { (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), }, + { + .name = "cgroup", + .revision = 1, + .family = NFPROTO_UNSPEC, + .checkentry = cgroup_mt_check_v1, + .match = cgroup_mt_v1, + .matchsize = sizeof(struct xt_cgroup_info_v1), + .destroy = cgroup_mt_destroy_v1, + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN), + }, }; static int __init cgroup_mt_init(void) -- cgit v1.1 From 5241c2d7c52757e6df79877ba282762df0caea9f Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 14 Dec 2015 20:55:22 +0100 Subject: ipv6: addrconf: drop ieee802154 specific things This patch removes ARPHRD_IEEE802154 from addrconf handling. In the earlier days of 802.15.4 6LoWPAN, the interface type was ARPHRD_IEEE802154 which introduced several issues, because 802.15.4 interfaces used the same type. Since commit 965e613d299c ("ieee802154: 6lowpan: fix ARPHRD to ARPHRD_6LOWPAN") we use ARPHRD_6LOWPAN for 6LoWPAN interfaces. This patch will remove ARPHRD_IEEE802154 which is currently deadcode, because ARPHRD_IEEE802154 doesn't reach the minimum 1280 MTU of IPv6. Also we use 6LoWPAN EUI64 specific defines instead using link-layer constanst from 802.15.4 link-layer header. Cc: David S. Miller Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5e9111d..7082fb7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -70,7 +70,7 @@ #include #include -#include +#include #include #include #include @@ -1947,9 +1947,9 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev) { - if (dev->addr_len != IEEE802154_ADDR_LEN) + if (dev->addr_len != EUI64_ADDR_LEN) return -1; - memcpy(eui, dev->dev_addr, 8); + memcpy(eui, dev->dev_addr, EUI64_ADDR_LEN); eui[0] ^= 2; return 0; } @@ -2041,7 +2041,6 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) case ARPHRD_IPGRE: return addrconf_ifid_gre(eui, dev); case ARPHRD_6LOWPAN: - case ARPHRD_IEEE802154: return addrconf_ifid_eui64(eui, dev); case ARPHRD_IEEE1394: return addrconf_ifid_ieee1394(eui, dev); @@ -3066,7 +3065,6 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_FDDI) && (dev->type != ARPHRD_ARCNET) && (dev->type != ARPHRD_INFINIBAND) && - (dev->type != ARPHRD_IEEE802154) && (dev->type != ARPHRD_IEEE1394) && (dev->type != ARPHRD_TUNNEL6) && (dev->type != ARPHRD_6LOWPAN)) { -- cgit v1.1 From 0506eb01f70bd4d7e999c11488a6a892e01c42e2 Mon Sep 17 00:00:00 2001 From: Eugene Crosser Date: Fri, 11 Dec 2015 12:27:50 +0100 Subject: iucv: prevent information leak in iucv_message Initialize storage for the future IUCV header that will be included in the transmitted packet. Some of the header fields are unused with HiperSockets transport, and will contain data left from some other functions. Signed-off-by: Eugene Crosser Signed-off-by: Ursula Braun Reviewed-by: Thomas Richter Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 3ea4c98..5bc473b 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1031,7 +1031,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); struct sk_buff *skb; - struct iucv_message txmsg; + struct iucv_message txmsg = {0}; struct cmsghdr *cmsg; int cmsg_done; long timeo; -- cgit v1.1 From 979f66b32dbbf928635dbf44fd9843d27c4ed8f9 Mon Sep 17 00:00:00 2001 From: Eugene Crosser Date: Fri, 11 Dec 2015 12:27:51 +0100 Subject: iucv: call skb_linearize() when needed When the linear buffer of the received sk_buff is shorter than the header, use skb_linearize(). sk_buffs with short linear buffer happen on the sending side under high traffic, and some kernel configurations, when allocated buffer starts just before page boundary, and IUCV transport has to send it as two separate QDIO buffer elements, with fist element shorter than the header. Signed-off-by: Eugene Crosser Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 5bc473b..ef50a94 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2084,11 +2084,7 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) return NET_RX_SUCCESS; } - /* write stuff from iucv_msg to skb cb */ - if (skb->len < sizeof(struct af_iucv_trans_hdr)) { - kfree_skb(skb); - return NET_RX_SUCCESS; - } + /* write stuff from iucv_msg to skb cb */ skb_pull(skb, sizeof(struct af_iucv_trans_hdr)); skb_reset_transport_header(skb); skb_reset_network_header(skb); @@ -2119,6 +2115,20 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, char nullstring[8]; int err = 0; + if (skb->len < (ETH_HLEN + sizeof(struct af_iucv_trans_hdr))) { + WARN_ONCE(1, "AF_IUCV too short skb, len=%d, min=%d", + (int)skb->len, + (int)(ETH_HLEN + sizeof(struct af_iucv_trans_hdr))); + kfree_skb(skb); + return NET_RX_SUCCESS; + } + if (skb_headlen(skb) < (ETH_HLEN + sizeof(struct af_iucv_trans_hdr))) + if (skb_linearize(skb)) { + WARN_ONCE(1, "AF_IUCV skb_linearize failed, len=%d", + (int)skb->len); + kfree_skb(skb); + return NET_RX_SUCCESS; + } skb_pull(skb, ETH_HLEN); trans_hdr = (struct af_iucv_trans_hdr *)skb->data; EBCASC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName)); -- cgit v1.1 From 9c55d3b5459bffe8ac098175e672a90260c0cfa4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 3 Dec 2015 10:49:42 +0100 Subject: nfnetlink: add nfnl_dereference_protected helper to avoid overly long line in followup patch. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 28591fa..aebf5cd 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -33,6 +33,10 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); +#define nfnl_dereference_protected(id) \ + rcu_dereference_protected(table[(id)].subsys, \ + lockdep_nfnl_is_held((id))) + static char __initdata nfversion[] = "0.30"; static struct { @@ -208,8 +212,7 @@ replay: } else { rcu_read_unlock(); nfnl_lock(subsys_id); - if (rcu_dereference_protected(table[subsys_id].subsys, - lockdep_is_held(&table[subsys_id].mutex)) != ss || + if (nfnl_dereference_protected(subsys_id) != ss || nfnetlink_find_client(type, ss) != nc) err = -EAGAIN; else if (nc->call) @@ -299,15 +302,13 @@ replay: skb->sk = oskb->sk; nfnl_lock(subsys_id); - ss = rcu_dereference_protected(table[subsys_id].subsys, - lockdep_is_held(&table[subsys_id].mutex)); + ss = nfnl_dereference_protected(subsys_id); if (!ss) { #ifdef CONFIG_MODULES nfnl_unlock(subsys_id); request_module("nfnetlink-subsys-%d", subsys_id); nfnl_lock(subsys_id); - ss = rcu_dereference_protected(table[subsys_id].subsys, - lockdep_is_held(&table[subsys_id].mutex)); + ss = nfnl_dereference_protected(subsys_id); if (!ss) #endif { -- cgit v1.1 From cef63419f7dbd52e535d1932a88904b3facb1546 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Tue, 4 Aug 2015 21:09:55 +0800 Subject: batman-adv: add list of unique single hop neighbors per hard-interface Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/hard-interface.c | 4 + net/batman-adv/originator.c | 157 ++++++++++++++++++++++++++++++++++++++++ net/batman-adv/originator.h | 5 ++ net/batman-adv/types.h | 22 ++++++ 4 files changed, 188 insertions(+) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index a58184f..01acccc 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -639,9 +640,12 @@ batadv_hardif_add_interface(struct net_device *net_dev) goto free_sysfs; INIT_LIST_HEAD(&hard_iface->list); + INIT_HLIST_HEAD(&hard_iface->neigh_list); INIT_WORK(&hard_iface->cleanup_work, batadv_hardif_remove_interface_finish); + spin_lock_init(&hard_iface->neigh_list_lock); + hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT; if (batadv_is_wifi_netdev(net_dev)) hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 7486df9..a8671c6 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -202,6 +202,47 @@ void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo) } /** + * batadv_hardif_neigh_free_rcu - free the hardif neigh_node + * @rcu: rcu pointer of the neigh_node + */ +static void batadv_hardif_neigh_free_rcu(struct rcu_head *rcu) +{ + struct batadv_hardif_neigh_node *hardif_neigh; + + hardif_neigh = container_of(rcu, struct batadv_hardif_neigh_node, rcu); + + spin_lock_bh(&hardif_neigh->if_incoming->neigh_list_lock); + hlist_del_init_rcu(&hardif_neigh->list); + spin_unlock_bh(&hardif_neigh->if_incoming->neigh_list_lock); + + batadv_hardif_free_ref_now(hardif_neigh->if_incoming); + kfree(hardif_neigh); +} + +/** + * batadv_hardif_neigh_free_now - decrement the hardif neighbors refcounter + * and possibly free it (without rcu callback) + * @hardif_neigh: hardif neigh neighbor to free + */ +static void +batadv_hardif_neigh_free_now(struct batadv_hardif_neigh_node *hardif_neigh) +{ + if (atomic_dec_and_test(&hardif_neigh->refcount)) + batadv_hardif_neigh_free_rcu(&hardif_neigh->rcu); +} + +/** + * batadv_hardif_neigh_free_ref - decrement the hardif neighbors refcounter + * and possibly free it + * @hardif_neigh: hardif neigh neighbor to free + */ +void batadv_hardif_neigh_free_ref(struct batadv_hardif_neigh_node *hardif_neigh) +{ + if (atomic_dec_and_test(&hardif_neigh->refcount)) + call_rcu(&hardif_neigh->rcu, batadv_hardif_neigh_free_rcu); +} + +/** * batadv_neigh_node_free_rcu - free the neigh_node * @rcu: rcu pointer of the neigh_node */ @@ -209,6 +250,7 @@ static void batadv_neigh_node_free_rcu(struct rcu_head *rcu) { struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; + struct batadv_hardif_neigh_node *hardif_neigh; struct batadv_neigh_ifinfo *neigh_ifinfo; struct batadv_algo_ops *bao; @@ -220,6 +262,14 @@ static void batadv_neigh_node_free_rcu(struct rcu_head *rcu) batadv_neigh_ifinfo_free_ref_now(neigh_ifinfo); } + hardif_neigh = batadv_hardif_neigh_get(neigh_node->if_incoming, + neigh_node->addr); + if (hardif_neigh) { + /* batadv_hardif_neigh_get() increases refcount too */ + batadv_hardif_neigh_free_now(hardif_neigh); + batadv_hardif_neigh_free_now(hardif_neigh); + } + if (bao->bat_neigh_free) bao->bat_neigh_free(neigh_node); @@ -479,6 +529,102 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node, } /** + * batadv_hardif_neigh_create - create a hardif neighbour node + * @hard_iface: the interface this neighbour is connected to + * @neigh_addr: the interface address of the neighbour to retrieve + * + * Returns the hardif neighbour node if found or created or NULL otherwise. + */ +static struct batadv_hardif_neigh_node * +batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, + const u8 *neigh_addr) +{ + struct batadv_hardif_neigh_node *hardif_neigh = NULL; + + spin_lock_bh(&hard_iface->neigh_list_lock); + + /* check if neighbor hasn't been added in the meantime */ + hardif_neigh = batadv_hardif_neigh_get(hard_iface, neigh_addr); + if (hardif_neigh) + goto out; + + if (!atomic_inc_not_zero(&hard_iface->refcount)) + goto out; + + hardif_neigh = kzalloc(sizeof(*hardif_neigh), GFP_ATOMIC); + if (!hardif_neigh) { + batadv_hardif_free_ref(hard_iface); + goto out; + } + + INIT_HLIST_NODE(&hardif_neigh->list); + ether_addr_copy(hardif_neigh->addr, neigh_addr); + hardif_neigh->if_incoming = hard_iface; + hardif_neigh->last_seen = jiffies; + + atomic_set(&hardif_neigh->refcount, 1); + + hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list); + +out: + spin_unlock_bh(&hard_iface->neigh_list_lock); + return hardif_neigh; +} + +/** + * batadv_hardif_neigh_get_or_create - retrieve or create a hardif neighbour + * node + * @hard_iface: the interface this neighbour is connected to + * @neigh_addr: the interface address of the neighbour to retrieve + * + * Returns the hardif neighbour node if found or created or NULL otherwise. + */ +static struct batadv_hardif_neigh_node * +batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface, + const u8 *neigh_addr) +{ + struct batadv_hardif_neigh_node *hardif_neigh = NULL; + + /* first check without locking to avoid the overhead */ + hardif_neigh = batadv_hardif_neigh_get(hard_iface, neigh_addr); + if (hardif_neigh) + return hardif_neigh; + + return batadv_hardif_neigh_create(hard_iface, neigh_addr); +} + +/** + * batadv_hardif_neigh_get - retrieve a hardif neighbour from the list + * @hard_iface: the interface where this neighbour is connected to + * @neigh_addr: the address of the neighbour + * + * Looks for and possibly returns a neighbour belonging to this hard interface. + * Returns NULL if the neighbour is not found. + */ +struct batadv_hardif_neigh_node * +batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, + const u8 *neigh_addr) +{ + struct batadv_hardif_neigh_node *tmp_hardif_neigh, *hardif_neigh = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_hardif_neigh, + &hard_iface->neigh_list, list) { + if (!batadv_compare_eth(tmp_hardif_neigh->addr, neigh_addr)) + continue; + + if (!atomic_inc_not_zero(&tmp_hardif_neigh->refcount)) + continue; + + hardif_neigh = tmp_hardif_neigh; + break; + } + rcu_read_unlock(); + + return hardif_neigh; +} + +/** * batadv_neigh_node_new - create and init a new neigh_node object * @orig_node: originator object representing the neighbour * @hard_iface: the interface where the neighbour is connected to @@ -493,11 +639,17 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, const u8 *neigh_addr) { struct batadv_neigh_node *neigh_node; + struct batadv_hardif_neigh_node *hardif_neigh = NULL; neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr); if (neigh_node) goto out; + hardif_neigh = batadv_hardif_neigh_get_or_create(hard_iface, + neigh_addr); + if (!hardif_neigh) + goto out; + neigh_node = kzalloc(sizeof(*neigh_node), GFP_ATOMIC); if (!neigh_node) goto out; @@ -523,11 +675,16 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); spin_unlock_bh(&orig_node->neigh_list_lock); + /* increment unique neighbor refcount */ + atomic_inc(&hardif_neigh->refcount); + batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv, "Creating new neighbor %pM for orig_node %pM on interface %s\n", neigh_addr, orig_node->orig, hard_iface->net_dev->name); out: + if (hardif_neigh) + batadv_hardif_neigh_free_ref(hardif_neigh); return neigh_node; } diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index fa18f9b..eae0557 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -41,6 +41,11 @@ void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node); void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node); struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, const u8 *addr); +struct batadv_hardif_neigh_node * +batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, + const u8 *neigh_addr); +void +batadv_hardif_neigh_free_ref(struct batadv_hardif_neigh_node *hardif_neigh); struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_orig_node *orig_node, struct batadv_hard_iface *hard_iface, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index d260efd..71c7d9f 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -100,6 +100,8 @@ struct batadv_hard_iface_bat_iv { * @bat_iv: BATMAN IV specific per hard interface data * @cleanup_work: work queue callback item for hard interface deinit * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs + * @neigh_list: list of unique single hop neighbors via this interface + * @neigh_list_lock: lock protecting neigh_list */ struct batadv_hard_iface { struct list_head list; @@ -115,6 +117,9 @@ struct batadv_hard_iface { struct batadv_hard_iface_bat_iv bat_iv; struct work_struct cleanup_work; struct dentry *debug_dir; + struct hlist_head neigh_list; + /* neigh_list_lock protects: neigh_list */ + spinlock_t neigh_list_lock; }; /** @@ -341,6 +346,23 @@ struct batadv_gw_node { }; /** + * batadv_hardif_neigh_node - unique neighbor per hard interface + * @list: list node for batadv_hard_iface::neigh_list + * @addr: the MAC address of the neighboring interface + * @if_incoming: pointer to incoming hard interface + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in a RCU-safe manner + */ +struct batadv_hardif_neigh_node { + struct hlist_node list; + u8 addr[ETH_ALEN]; + struct batadv_hard_iface *if_incoming; + unsigned long last_seen; + atomic_t refcount; + struct rcu_head rcu; +}; + +/** * struct batadv_neigh_node - structure for single hops neighbors * @list: list node for batadv_orig_node::neigh_list * @orig_node: pointer to corresponding orig_node -- cgit v1.1 From 8248a4c7c866a9c15b6b379ca98aa8c95363f502 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Tue, 4 Aug 2015 21:09:56 +0800 Subject: batman-adv: add bat_hardif_neigh_init algo ops call Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/originator.c | 4 ++++ net/batman-adv/types.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index a8671c6..27dd326 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -539,6 +539,7 @@ static struct batadv_hardif_neigh_node * batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, const u8 *neigh_addr) { + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hardif_neigh_node *hardif_neigh = NULL; spin_lock_bh(&hard_iface->neigh_list_lock); @@ -564,6 +565,9 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, atomic_set(&hardif_neigh->refcount, 1); + if (bat_priv->bat_algo_ops->bat_hardif_neigh_init) + bat_priv->bat_algo_ops->bat_hardif_neigh_init(hardif_neigh); + hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list); out: diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 71c7d9f..838d55e 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1153,6 +1153,7 @@ struct batadv_forw_packet { * @bat_primary_iface_set: called when primary interface is selected / changed * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue * @bat_ogm_emit: send scheduled OGM + * @bat_hardif_neigh_init: called on creation of single hop entry * @bat_neigh_cmp: compare the metrics of two neighbors for their respective * outgoing interfaces * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or better @@ -1178,6 +1179,7 @@ struct batadv_algo_ops { void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface); void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet); /* neigh_node handling API */ + void (*bat_hardif_neigh_init)(struct batadv_hardif_neigh_node *neigh); int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, -- cgit v1.1 From 7587405ab93e5383e64ac311f460c30a02a8e9cb Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Tue, 4 Aug 2015 21:09:57 +0800 Subject: batman-adv: export single hop neighbor list via debugfs Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ net/batman-adv/debugfs.c | 9 ++++++++ net/batman-adv/originator.c | 34 +++++++++++++++++++++++++++++ net/batman-adv/originator.h | 2 ++ net/batman-adv/types.h | 2 ++ 5 files changed, 100 insertions(+) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 912d9c3..1efdb5c 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1862,6 +1862,58 @@ next: } /** + * batadv_iv_hardif_neigh_print - print a single hop neighbour node + * @seq: neighbour table seq_file struct + * @hardif_neigh: hardif neighbour information + */ +static void +batadv_iv_hardif_neigh_print(struct seq_file *seq, + struct batadv_hardif_neigh_node *hardif_neigh) +{ + int last_secs, last_msecs; + + last_secs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen) / 1000; + last_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen) % 1000; + + seq_printf(seq, " %10s %pM %4i.%03is\n", + hardif_neigh->if_incoming->net_dev->name, + hardif_neigh->addr, last_secs, last_msecs); +} + +/** + * batadv_iv_ogm_neigh_print - print the single hop neighbour list + * @bat_priv: the bat priv with all the soft interface information + * @seq: neighbour table seq_file struct + */ +static void batadv_iv_neigh_print(struct batadv_priv *bat_priv, + struct seq_file *seq) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_hardif_neigh_node *hardif_neigh; + struct batadv_hard_iface *hard_iface; + int batman_count = 0; + + seq_printf(seq, " %10s %-13s %s\n", + "IF", "Neighbor", "last-seen"); + + rcu_read_lock(); + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->soft_iface != net_dev) + continue; + + hlist_for_each_entry_rcu(hardif_neigh, + &hard_iface->neigh_list, list) { + batadv_iv_hardif_neigh_print(seq, hardif_neigh); + batman_count++; + } + } + rcu_read_unlock(); + + if (batman_count == 0) + seq_puts(seq, "No batman nodes in range ...\n"); +} + +/** * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors * @neigh1: the first neighbor object of the comparison * @if_outgoing1: outgoing interface for the first neighbor @@ -1954,6 +2006,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .bat_ogm_emit = batadv_iv_ogm_emit, .bat_neigh_cmp = batadv_iv_ogm_neigh_cmp, .bat_neigh_is_equiv_or_better = batadv_iv_ogm_neigh_is_eob, + .bat_neigh_print = batadv_iv_neigh_print, .bat_orig_print = batadv_iv_ogm_orig_print, .bat_orig_free = batadv_iv_ogm_orig_free, .bat_orig_add_if = batadv_iv_ogm_orig_add_if, diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index c4c1e80..037ad0a 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -262,6 +262,13 @@ static int batadv_algorithms_open(struct inode *inode, struct file *file) return single_open(file, batadv_algo_seq_print_text, NULL); } +static int neighbors_open(struct inode *inode, struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + + return single_open(file, batadv_hardif_neigh_seq_print_text, net_dev); +} + static int batadv_originators_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; @@ -375,6 +382,7 @@ static struct batadv_debuginfo *batadv_general_debuginfos[] = { }; /* The following attributes are per soft interface */ +static BATADV_DEBUGINFO(neighbors, S_IRUGO, neighbors_open); static BATADV_DEBUGINFO(originators, S_IRUGO, batadv_originators_open); static BATADV_DEBUGINFO(gateways, S_IRUGO, batadv_gateways_open); static BATADV_DEBUGINFO(transtable_global, S_IRUGO, @@ -394,6 +402,7 @@ static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open); #endif static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { + &batadv_debuginfo_neighbors, &batadv_debuginfo_originators, &batadv_debuginfo_gateways, &batadv_debuginfo_transtable_global, diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 27dd326..3c782a33 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -693,6 +693,40 @@ out: } /** + * batadv_hardif_neigh_seq_print_text - print the single hop neighbour list + * @seq: neighbour table seq_file struct + * @offset: not used + * + * Always returns 0. + */ +int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hard_iface *primary_if; + + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) + return 0; + + seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n", + BATADV_SOURCE_VERSION, primary_if->net_dev->name, + primary_if->net_dev->dev_addr, net_dev->name, + bat_priv->bat_algo_ops->name); + + batadv_hardif_free_ref(primary_if); + + if (!bat_priv->bat_algo_ops->bat_neigh_print) { + seq_puts(seq, + "No printing function for this routing protocol\n"); + return 0; + } + + bat_priv->bat_algo_ops->bat_neigh_print(bat_priv, seq); + return 0; +} + +/** * batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object * @rcu: rcu pointer of the orig_ifinfo object */ diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index eae0557..2955775 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -62,6 +62,8 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, struct batadv_hard_iface *if_outgoing); void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo); +int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset); + struct batadv_orig_ifinfo * batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_outgoing); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 838d55e..815c960 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1159,6 +1159,7 @@ struct batadv_forw_packet { * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or better * than neigh2 for their respective outgoing interface from the metric * prospective + * @bat_neigh_print: print the single hop neighbor list (optional) * @bat_neigh_free: free the resources allocated by the routing algorithm for a * neigh_node object * @bat_orig_print: print the originator table (optional) @@ -1189,6 +1190,7 @@ struct batadv_algo_ops { struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2); + void (*bat_neigh_print)(struct batadv_priv *priv, struct seq_file *seq); void (*bat_neigh_free)(struct batadv_neigh_node *neigh); /* orig_node handling API */ void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq, -- cgit v1.1 From 4ff1e2a738c2c954ea2c0d6a7c2b06056d8d3849 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Tue, 4 Aug 2015 21:09:58 +0800 Subject: batman-adv: update last seen field of single hop originators Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 1efdb5c..3738a2f 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1379,6 +1379,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); + struct batadv_hardif_neigh_node *hardif_neigh = NULL; struct batadv_neigh_node *router = NULL; struct batadv_neigh_node *router_router = NULL; struct batadv_orig_node *orig_neigh_node; @@ -1423,6 +1424,13 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, goto out; } + if (is_single_hop_neigh) { + hardif_neigh = batadv_hardif_neigh_get(if_incoming, + ethhdr->h_source); + if (hardif_neigh) + hardif_neigh->last_seen = jiffies; + } + router = batadv_orig_router_get(orig_node, if_outgoing); if (router) { router_router = batadv_orig_router_get(router->orig_node, @@ -1557,6 +1565,8 @@ out: batadv_neigh_node_free_ref(router_router); if (orig_neigh_router) batadv_neigh_node_free_ref(orig_neigh_router); + if (hardif_neigh) + batadv_hardif_neigh_free_ref(hardif_neigh); kfree_skb(skb_priv); } -- cgit v1.1 From 18165f6f6570318ad0bb1e60c2ae597cdfd09a50 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Sat, 8 Aug 2015 02:01:50 +0200 Subject: batman-adv: rename equiv/equal or better to similar or better Since the function applies a threshold and also slightly worse values are accepted, ''equal or better'' does not represent the intention of the function. ''Similar or better'' represents that better. Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 8 ++++---- net/batman-adv/main.c | 2 +- net/batman-adv/routing.c | 6 +++--- net/batman-adv/types.h | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 3738a2f..5677169 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1964,8 +1964,8 @@ out: } /** - * batadv_iv_ogm_neigh_is_eob - check if neigh1 is equally good or better than - * neigh2 from the metric prospective + * batadv_iv_ogm_neigh_is_sob - check if neigh1 is similarly good or better + * than neigh2 from the metric prospective * @neigh1: the first neighbor object of the comparison * @if_outgoing1: outgoing interface for the first neighbor * @neigh2: the second neighbor object of the comparison @@ -1975,7 +1975,7 @@ out: * the metric via neigh2, false otherwise. */ static bool -batadv_iv_ogm_neigh_is_eob(struct batadv_neigh_node *neigh1, +batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2) @@ -2015,7 +2015,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .bat_ogm_schedule = batadv_iv_ogm_schedule, .bat_ogm_emit = batadv_iv_ogm_emit, .bat_neigh_cmp = batadv_iv_ogm_neigh_cmp, - .bat_neigh_is_equiv_or_better = batadv_iv_ogm_neigh_is_eob, + .bat_neigh_is_similar_or_better = batadv_iv_ogm_neigh_is_sob, .bat_neigh_print = batadv_iv_neigh_print, .bat_orig_print = batadv_iv_ogm_orig_print, .bat_orig_free = batadv_iv_ogm_orig_free, diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index d7f17c1..45952dc 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -552,7 +552,7 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops) !bat_algo_ops->bat_ogm_schedule || !bat_algo_ops->bat_ogm_emit || !bat_algo_ops->bat_neigh_cmp || - !bat_algo_ops->bat_neigh_is_equiv_or_better) { + !bat_algo_ops->bat_neigh_is_similar_or_better) { pr_info("Routing algo '%s' does not implement required ops\n", bat_algo_ops->name); return -EINVAL; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 8d990b0..a43f02e 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -497,9 +497,9 @@ batadv_find_router(struct batadv_priv *bat_priv, /* alternative candidate should be good enough to be * considered */ - if (!bao->bat_neigh_is_equiv_or_better(cand_router, - cand->if_outgoing, - router, recv_if)) + if (!bao->bat_neigh_is_similar_or_better(cand_router, + cand->if_outgoing, + router, recv_if)) goto next; /* don't use the same router twice */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 815c960..9bdb21c 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1156,8 +1156,8 @@ struct batadv_forw_packet { * @bat_hardif_neigh_init: called on creation of single hop entry * @bat_neigh_cmp: compare the metrics of two neighbors for their respective * outgoing interfaces - * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or better - * than neigh2 for their respective outgoing interface from the metric + * @bat_neigh_is_similar_or_better: check if neigh1 is equally similar or + * better than neigh2 for their respective outgoing interface from the metric * prospective * @bat_neigh_print: print the single hop neighbor list (optional) * @bat_neigh_free: free the resources allocated by the routing algorithm for a @@ -1185,7 +1185,7 @@ struct batadv_algo_ops { struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2); - bool (*bat_neigh_is_equiv_or_better) + bool (*bat_neigh_is_similar_or_better) (struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, -- cgit v1.1 From 9d547833f02fb8b52ab824adae8f850f3c22fd4f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 15 Dec 2015 16:03:34 +0100 Subject: switchdev: vlan: Use switchdev_port* in vlan_netdev_ops We need to be able to propagate static FDB entries and certain bridge port attributes (e.g. learning, flooding) down to the port netdev driver when bridge port is a VLAN interface. Achieve that by setting ndo_bridge* and ndo_fdb* in vlan_netdev_ops to the corresponding switchdev_port* functions. This is consistent with team and bond devices. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index fded865..9f4bd13 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "vlan.h" #include "vlanproc.h" @@ -774,6 +775,12 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, #endif .ndo_fix_features = vlan_dev_fix_features, + .ndo_fdb_add = switchdev_port_fdb_add, + .ndo_fdb_del = switchdev_port_fdb_del, + .ndo_fdb_dump = switchdev_port_fdb_dump, + .ndo_bridge_setlink = switchdev_port_bridge_setlink, + .ndo_bridge_getlink = switchdev_port_bridge_getlink, + .ndo_bridge_dellink = switchdev_port_bridge_dellink, .ndo_get_lock_subclass = vlan_dev_get_lock_subclass, .ndo_get_iflink = vlan_dev_get_iflink, }; -- cgit v1.1 From 6ff64f6f9242d7e50f3e99cb280f69d1927a5fa6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 15 Dec 2015 16:03:35 +0100 Subject: switchdev: Pass original device to port netdev driver switchdev drivers need to know the netdev on which the switchdev op was invoked. For example, the STP state of a VLAN interface configured on top of a port can change while being member in a bridge. In this case, the underlying driver should only change the STP state of that particular VLAN and not of all the VLANs configured on the port. However, current switchdev infrastructure only passes the port netdev down to the driver. Solve that by passing the original device down to the driver as part of the required switchdev object / attribute. This doesn't entail any change in current switchdev drivers. It simply enables those supporting stacked devices to know the originating device and act accordingly. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 1 + net/bridge/br_stp.c | 2 ++ net/bridge/br_stp_if.c | 1 + net/bridge/br_vlan.c | 2 ++ net/core/net-sysfs.c | 1 + net/core/rtnetlink.c | 1 + net/switchdev/switchdev.c | 12 ++++++++++++ 7 files changed, 20 insertions(+) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index a642bb8..82e3e97 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -135,6 +135,7 @@ static void fdb_del_external_learn(struct net_bridge_fdb_entry *f) { struct switchdev_obj_port_fdb fdb = { .obj = { + .orig_dev = f->dst->dev, .id = SWITCHDEV_OBJ_ID_PORT_FDB, .flags = SWITCHDEV_F_DEFER, }, diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 5f3f645..b3cca12 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -40,6 +40,7 @@ void br_log_state(const struct net_bridge_port *p) void br_set_state(struct net_bridge_port *p, unsigned int state) { struct switchdev_attr attr = { + .orig_dev = p->dev, .id = SWITCHDEV_ATTR_ID_PORT_STP_STATE, .flags = SWITCHDEV_F_DEFER, .u.stp_state = state, @@ -570,6 +571,7 @@ int br_set_max_age(struct net_bridge *br, unsigned long val) int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) { struct switchdev_attr attr = { + .orig_dev = br->dev, .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, .u.ageing_time = ageing_time, diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 5396ff08..775e00f 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -37,6 +37,7 @@ static inline port_id br_make_port_id(__u8 priority, __u16 port_no) void br_init_port(struct net_bridge_port *p) { struct switchdev_attr attr = { + .orig_dev = p->dev, .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER, .u.ageing_time = p->br->ageing_time, diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 1394da6..66c4549 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -73,6 +73,7 @@ static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, u16 vid, u16 flags) { struct switchdev_obj_port_vlan v = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .flags = flags, .vid_begin = vid, @@ -120,6 +121,7 @@ static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br, u16 vid) { struct switchdev_obj_port_vlan v = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .vid_begin = vid, .vid_end = vid, diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f88a62a..bca8c35 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -471,6 +471,7 @@ static ssize_t phys_switch_id_show(struct device *dev, if (dev_isalive(netdev)) { struct switchdev_attr attr = { + .orig_dev = netdev, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 34ba7a0..d8b0113 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1027,6 +1027,7 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { int err; struct switchdev_attr attr = { + .orig_dev = dev, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index f34e535..df790d3 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -723,6 +723,7 @@ static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev, u32 filter_mask) { struct switchdev_vlan_dump dump = { + .vlan.obj.orig_dev = dev, .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .skb = skb, .filter_mask = filter_mask, @@ -757,6 +758,7 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, int nlflags) { struct switchdev_attr attr = { + .orig_dev = dev, .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, }; u16 mode = BRIDGE_MODE_UNDEF; @@ -778,6 +780,7 @@ static int switchdev_port_br_setflag(struct net_device *dev, unsigned long brport_flag) { struct switchdev_attr attr = { + .orig_dev = dev, .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, }; u8 flag = nla_get_u8(nlattr); @@ -853,6 +856,7 @@ static int switchdev_port_br_afspec(struct net_device *dev, struct nlattr *attr; struct bridge_vlan_info *vinfo; struct switchdev_obj_port_vlan vlan = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, }; int rem; @@ -975,6 +979,7 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], u16 vid, u16 nlm_flags) { struct switchdev_obj_port_fdb fdb = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, .vid = vid, }; @@ -1000,6 +1005,7 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], u16 vid) { struct switchdev_obj_port_fdb fdb = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, .vid = vid, }; @@ -1077,6 +1083,7 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *filter_dev, int idx) { struct switchdev_fdb_dump dump = { + .fdb.obj.orig_dev = dev, .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, .dev = dev, .skb = skb, @@ -1135,6 +1142,7 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) if (!dev) return NULL; + attr.orig_dev = dev; if (switchdev_port_attr_get(dev, &attr)) return NULL; @@ -1194,6 +1202,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, if (!dev) return 0; + ipv4_fib.obj.orig_dev = dev; err = switchdev_port_obj_add(dev, &ipv4_fib.obj); if (!err) fi->fib_flags |= RTNH_F_OFFLOAD; @@ -1238,6 +1247,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, if (!dev) return 0; + ipv4_fib.obj.orig_dev = dev; err = switchdev_port_obj_del(dev, &ipv4_fib.obj); if (!err) fi->fib_flags &= ~RTNH_F_OFFLOAD; @@ -1270,10 +1280,12 @@ static bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { struct switchdev_attr a_attr = { + .orig_dev = a, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; struct switchdev_attr b_attr = { + .orig_dev = b, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; -- cgit v1.1 From 53692b1de419c1b59106909c7f6b4dd3dbc768ac Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:41 -0800 Subject: sctp: Rename NETIF_F_SCTP_CSUM to NETIF_F_SCTP_CRC The SCTP checksum is really a CRC and is very different from the standards 1's complement checksum that serves as the checksum for IP protocols. This offload interface is also very different. Rename NETIF_F_SCTP_CSUM to NETIF_F_SCTP_CRC to highlight these differences. The term CSUM should be reserved in the stack to refer to the standard 1's complement IP checksum. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 2 +- net/core/ethtool.c | 4 ++-- net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +- net/sctp/output.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 9f4bd13..45b74e8 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -545,7 +545,7 @@ static int vlan_dev_init(struct net_device *dev) dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | - NETIF_F_HIGHDMA | NETIF_F_SCTP_CSUM | + NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC | NETIF_F_ALL_FCOE; dev->features |= real_dev->vlan_features | NETIF_F_LLTX | diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 29edf74..4a0cab8 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -87,7 +87,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", - [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", + [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu", [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter", [NETIF_F_RXHASH_BIT] = "rx-hashing", @@ -235,7 +235,7 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) switch (eth_cmd) { case ETHTOOL_GTXCSUM: case ETHTOOL_STXCSUM: - return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM; + return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CRC; case ETHTOOL_GRXCSUM: case ETHTOOL_SRXCSUM: return NETIF_F_RXCSUM; diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 010ddee..d952d67 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -169,7 +169,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, /* Only update csum if we really have to */ if (sctph->dest != cp->dport || payload_csum || (skb->ip_summed == CHECKSUM_PARTIAL && - !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) { + !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) { sctph->dest = cp->dport; sctp_nat_csum(skb, sctph, sctphoff); } else if (skb->ip_summed != CHECKSUM_PARTIAL) { diff --git a/net/sctp/output.c b/net/sctp/output.c index abe7c2d..9d610ed 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -534,7 +534,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) * by CRC32-C as described in . */ if (!sctp_checksum_disable) { - if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || + if (!(dst->dev->features & NETIF_F_SCTP_CRC) || (dst_xfrm(dst) != NULL) || packet->ipfragok) { sh->checksum = sctp_compute_cksum(nskb, 0); } else { -- cgit v1.1 From a188222b6ed29404ac2d4232d35d1fe0e77af370 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:43 -0800 Subject: net: Rename NETIF_F_ALL_CSUM to NETIF_F_CSUM_MASK The name NETIF_F_ALL_CSUM is a misnomer. This does not correspond to the set of features for offloading all checksums. This is a mask of the checksum offload related features bits. It is incorrect to set both NETIF_F_HW_CSUM and NETIF_F_IP_CSUM or NETIF_F_IPV6 at the same time for features of a device. This patch: - Changes instances of NETIF_F_ALL_CSUM to NETIF_F_CSUM_MASK (where NETIF_F_ALL_CSUM is being used as a mask). - Changes bonding, sfc/efx, ipvlan, macvlan, vlan, and team drivers to use NEITF_F_HW_CSUM in features list instead of NETIF_F_ALL_CSUM. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 2 +- net/core/dev.c | 10 +++++----- net/core/ethtool.c | 2 +- net/ipv4/tcp.c | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 45b74e8..ad5e2fd1 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -543,7 +543,7 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); - dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG | + dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC | NETIF_F_ALL_FCOE; diff --git a/net/core/dev.c b/net/core/dev.c index 8f705fc..5a3b5a4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2645,7 +2645,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, type)) { - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } @@ -2792,7 +2792,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device else skb_set_transport_header(skb, skb_checksum_start_offset(skb)); - if (!(features & NETIF_F_ALL_CSUM) && + if (!(features & NETIF_F_CSUM_MASK) && skb_checksum_help(skb)) goto out_kfree_skb; } @@ -7572,15 +7572,15 @@ netdev_features_t netdev_increment_features(netdev_features_t all, netdev_features_t one, netdev_features_t mask) { if (mask & NETIF_F_GEN_CSUM) - mask |= NETIF_F_ALL_CSUM; + mask |= NETIF_F_CSUM_MASK; mask |= NETIF_F_VLAN_CHALLENGED; - all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; + all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask; all &= one | ~NETIF_F_ALL_FOR_ALL; /* If one device supports hw checksumming, set for all. */ if (all & NETIF_F_GEN_CSUM) - all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); return all; } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4a0cab8..09948a7 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -235,7 +235,7 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) switch (eth_cmd) { case ETHTOOL_GTXCSUM: case ETHTOOL_STXCSUM: - return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CRC; + return NETIF_F_CSUM_MASK | NETIF_F_SCTP_CRC; case ETHTOOL_GRXCSUM: case ETHTOOL_SRXCSUM: return NETIF_F_RXCSUM; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c82cca1..cf7ef7b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1018,7 +1018,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, ssize_t res; if (!(sk->sk_route_caps & NETIF_F_SG) || - !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) + !(sk->sk_route_caps & NETIF_F_CSUM_MASK)) return sock_no_sendpage(sk->sk_socket, page, offset, size, flags); @@ -1175,7 +1175,7 @@ new_segment: /* * Check whether we can use HW checksum. */ - if (sk->sk_route_caps & NETIF_F_ALL_CSUM) + if (sk->sk_route_caps & NETIF_F_CSUM_MASK) skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); -- cgit v1.1 From c8cd0989bd151fda87bbf10887b3df18021284bc Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:44 -0800 Subject: net: Eliminate NETIF_F_GEN_CSUM and NETIF_F_V[46]_CSUM These netif flags are unnecessary convolutions. It is more straightforward to just use NETIF_F_HW_CSUM, NETIF_F_IP_CSUM, and NETIF_F_IPV6_CSUM directly. This patch also: - Cleans up can_checksum_protocol - Simplifies netdev_intersect_features Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 12 ++++++------ net/core/pktgen.c | 4 ++-- net/ipv4/ip_output.c | 2 +- net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 3 ++- net/ipv4/udp.c | 3 ++- net/ipv4/udp_offload.c | 5 +++-- net/ipv6/ip6_output.c | 2 +- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 3 ++- 8 files changed, 19 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 5a3b5a4..45b013f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6467,9 +6467,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, /* UFO needs SG and checksumming */ if (features & NETIF_F_UFO) { /* maybe split UFO into V4 and V6? */ - if (!((features & NETIF_F_GEN_CSUM) || - (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) - == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + if (!(features & NETIF_F_HW_CSUM) && + ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) != + (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) { netdev_dbg(dev, "Dropping NETIF_F_UFO since no checksum offload features.\n"); features &= ~NETIF_F_UFO; @@ -7571,7 +7571,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, netdev_features_t netdev_increment_features(netdev_features_t all, netdev_features_t one, netdev_features_t mask) { - if (mask & NETIF_F_GEN_CSUM) + if (mask & NETIF_F_HW_CSUM) mask |= NETIF_F_CSUM_MASK; mask |= NETIF_F_VLAN_CHALLENGED; @@ -7579,8 +7579,8 @@ netdev_features_t netdev_increment_features(netdev_features_t all, all &= one | ~NETIF_F_ALL_FOR_ALL; /* If one device supports hw checksumming, set for all. */ - if (all & NETIF_F_GEN_CSUM) - all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); + if (all & NETIF_F_HW_CSUM) + all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM); return all; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index de8d5cc..2be1444 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2898,7 +2898,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, if (!(pkt_dev->flags & F_UDPCSUM)) { skb->ip_summed = CHECKSUM_NONE; - } else if (odev->features & NETIF_F_V4_CSUM) { + } else if (odev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; udp4_hwcsum(skb, iph->saddr, iph->daddr); @@ -3032,7 +3032,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, if (!(pkt_dev->flags & F_UDPCSUM)) { skb->ip_summed = CHECKSUM_NONE; - } else if (odev->features & NETIF_F_V6_CSUM) { + } else if (odev->features & (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM)) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e0b94cd..568e2bc 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -911,7 +911,7 @@ static int __ip_append_data(struct sock *sk, */ if (transhdrlen && length + fragheaderlen <= mtu && - rt->dst.dev->features & NETIF_F_V4_CSUM && + rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) && !(flags & MSG_MORE) && !exthdrlen) csummode = CHECKSUM_PARTIAL; diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 5075b7e..61c7cc2 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -132,7 +132,8 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { + (!skb->dev || skb->dev->features & + (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0c7b0e6..8841e98 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -772,7 +772,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb, else if (skb_is_gso(skb)) uh->check = ~udp_v4_check(len, saddr, daddr, 0); else if (skb_dst(skb) && skb_dst(skb)->dev && - (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { + (skb_dst(skb)->dev->features & + (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) { BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index f938616..1300426 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -60,8 +60,9 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, /* Try to offload checksum if possible */ offload_csum = !!(need_csum && - (skb->dev->features & - (is_ipv6 ? NETIF_F_V6_CSUM : NETIF_F_V4_CSUM))); + ((skb->dev->features & NETIF_F_HW_CSUM) || + (skb->dev->features & (is_ipv6 ? + NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM)))); /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & features; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e6a7bd15..2f74845 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1322,7 +1322,7 @@ emsgsize: headersize == sizeof(struct ipv6hdr) && length < mtu - headersize && !(flags & MSG_MORE) && - rt->dst.dev->features & NETIF_F_V6_CSUM) + rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) csummode = CHECKSUM_PARTIAL; if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 238e70c..6ce3099 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -136,7 +136,8 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt6i_flags & RTF_LOCAL) && - (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) { + (!skb->dev || skb->dev->features & + (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + -- cgit v1.1 From 9a49850d0af7b9fd14d091dfe61ef6cb369f86b9 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:45 -0800 Subject: tcp: Fix conditions to determine checksum offload In tcp_send_sendpage and tcp_sendmsg we check the route capabilities to determine if checksum offload can be performed. This check currently does not take the IP protocol into account for devices that advertise only one of NETIF_F_IPV6_CSUM or NETIF_F_IP_CSUM. This patch adds a function to check capabilities for checksum offload with a socket called sk_check_csum_caps. This function checks for specific IPv4 or IPv6 offload support based on the family of the socket. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cf7ef7b..92b3e61 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1018,7 +1018,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, ssize_t res; if (!(sk->sk_route_caps & NETIF_F_SG) || - !(sk->sk_route_caps & NETIF_F_CSUM_MASK)) + !sk_check_csum_caps(sk)) return sock_no_sendpage(sk->sk_socket, page, offset, size, flags); @@ -1175,7 +1175,7 @@ new_segment: /* * Check whether we can use HW checksum. */ - if (sk->sk_route_caps & NETIF_F_CSUM_MASK) + if (sk_check_csum_caps(sk)) skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); -- cgit v1.1 From 6ae23ad36253a8033c5714c52b691b84456487c5 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:46 -0800 Subject: net: Add driver helper functions to determine checksum offloadability Add skb_csum_offload_chk driver helper function to determine if a device with limited checksum offload capabilities is able to offload the checksum for a given packet. This patch includes: - The skb_csum_offload_chk function. Returns true if checksum is offloadable, else false. Optionally, in the case that the checksum is not offloable, the function can call skb_checksum_help to resolve the checksum. skb_csum_offload_chk also returns whether the checksum refers to an encapsulated checksum. - Definition of skb_csum_offl_spec structure that caller uses to indicate rules about what it can offload (e.g. IPv4/v6, TCP/UDP only, whether encapsulated checksums can be offloaded, whether checksum with IPv6 extension headers can be offloaded). - Ancilary functions called skb_csum_offload_chk_help, skb_csum_off_chk_help_cmn, skb_csum_off_chk_help_cmn_v4_only. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 45b013f..914b4a2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -138,6 +138,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -2471,6 +2472,141 @@ out: } EXPORT_SYMBOL(skb_checksum_help); +/* skb_csum_offload_check - Driver helper function to determine if a device + * with limited checksum offload capabilities is able to offload the checksum + * for a given packet. + * + * Arguments: + * skb - sk_buff for the packet in question + * spec - contains the description of what device can offload + * csum_encapped - returns true if the checksum being offloaded is + * encpasulated. That is it is checksum for the transport header + * in the inner headers. + * checksum_help - when set indicates that helper function should + * call skb_checksum_help if offload checks fail + * + * Returns: + * true: Packet has passed the checksum checks and should be offloadable to + * the device (a driver may still need to check for additional + * restrictions of its device) + * false: Checksum is not offloadable. If checksum_help was set then + * skb_checksum_help was called to resolve checksum for non-GSO + * packets and when IP protocol is not SCTP + */ +bool __skb_csum_offload_chk(struct sk_buff *skb, + const struct skb_csum_offl_spec *spec, + bool *csum_encapped, + bool csum_help) +{ + struct iphdr *iph; + struct ipv6hdr *ipv6; + void *nhdr; + int protocol; + u8 ip_proto; + + if (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD)) { + if (!spec->vlan_okay) + goto need_help; + } + + /* We check whether the checksum refers to a transport layer checksum in + * the outermost header or an encapsulated transport layer checksum that + * corresponds to the inner headers of the skb. If the checksum is for + * something else in the packet we need help. + */ + if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) { + /* Non-encapsulated checksum */ + protocol = eproto_to_ipproto(vlan_get_protocol(skb)); + nhdr = skb_network_header(skb); + *csum_encapped = false; + if (spec->no_not_encapped) + goto need_help; + } else if (skb->encapsulation && spec->encap_okay && + skb_checksum_start_offset(skb) == + skb_inner_transport_offset(skb)) { + /* Encapsulated checksum */ + *csum_encapped = true; + switch (skb->inner_protocol_type) { + case ENCAP_TYPE_ETHER: + protocol = eproto_to_ipproto(skb->inner_protocol); + break; + case ENCAP_TYPE_IPPROTO: + protocol = skb->inner_protocol; + break; + } + nhdr = skb_inner_network_header(skb); + } else { + goto need_help; + } + + switch (protocol) { + case IPPROTO_IP: + if (!spec->ipv4_okay) + goto need_help; + iph = nhdr; + ip_proto = iph->protocol; + if (iph->ihl != 5 && !spec->ip_options_okay) + goto need_help; + break; + case IPPROTO_IPV6: + if (!spec->ipv6_okay) + goto need_help; + if (spec->no_encapped_ipv6 && *csum_encapped) + goto need_help; + ipv6 = nhdr; + nhdr += sizeof(*ipv6); + ip_proto = ipv6->nexthdr; + break; + default: + goto need_help; + } + +ip_proto_again: + switch (ip_proto) { + case IPPROTO_TCP: + if (!spec->tcp_okay || + skb->csum_offset != offsetof(struct tcphdr, check)) + goto need_help; + break; + case IPPROTO_UDP: + if (!spec->udp_okay || + skb->csum_offset != offsetof(struct udphdr, check)) + goto need_help; + break; + case IPPROTO_SCTP: + if (!spec->sctp_okay || + skb->csum_offset != offsetof(struct sctphdr, checksum)) + goto cant_help; + break; + case NEXTHDR_HOP: + case NEXTHDR_ROUTING: + case NEXTHDR_DEST: { + u8 *opthdr = nhdr; + + if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay) + goto need_help; + + ip_proto = opthdr[0]; + nhdr += (opthdr[1] + 1) << 3; + + goto ip_proto_again; + } + default: + goto need_help; + } + + /* Passed the tests for offloading checksum */ + return true; + +need_help: + if (csum_help && !skb_shinfo(skb)->gso_size) + skb_checksum_help(skb); +cant_help: + return false; +} +EXPORT_SYMBOL(__skb_csum_offload_chk); + __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; -- cgit v1.1 From 33f11d16142b06588eedfc1dd8cf93790979a712 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 15 Dec 2015 15:41:35 -0800 Subject: ila: Create net/ipv6/ila directory Create ila directory in preparation for supporting other hooks in the kernel than LWT for doing ILA. This includes: - Moving ila.c to ila/ila_lwt.c - Splitting out some common functions into ila_common.c Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv6/Makefile | 2 +- net/ipv6/ila.c | 229 ---------------------------------------------- net/ipv6/ila/Makefile | 7 ++ net/ipv6/ila/ila.h | 46 ++++++++++ net/ipv6/ila/ila_common.c | 95 +++++++++++++++++++ net/ipv6/ila/ila_lwt.c | 152 ++++++++++++++++++++++++++++++ 6 files changed, 301 insertions(+), 230 deletions(-) delete mode 100644 net/ipv6/ila.c create mode 100644 net/ipv6/ila/Makefile create mode 100644 net/ipv6/ila/ila.h create mode 100644 net/ipv6/ila/ila_common.c create mode 100644 net/ipv6/ila/ila_lwt.c (limited to 'net') diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 2c900c7..2fbd90b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -34,7 +34,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o obj-$(CONFIG_IPV6_MIP6) += mip6.o -obj-$(CONFIG_IPV6_ILA) += ila.o +obj-$(CONFIG_IPV6_ILA) += ila/ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_VTI) += ip6_vti.o diff --git a/net/ipv6/ila.c b/net/ipv6/ila.c deleted file mode 100644 index 1a6852e..0000000 --- a/net/ipv6/ila.c +++ /dev/null @@ -1,229 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct ila_params { - __be64 locator; - __be64 locator_match; - __wsum csum_diff; -}; - -static inline struct ila_params *ila_params_lwtunnel( - struct lwtunnel_state *lwstate) -{ - return (struct ila_params *)lwstate->data; -} - -static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) -{ - __be32 diff[] = { - ~from[0], ~from[1], to[0], to[1], - }; - - return csum_partial(diff, sizeof(diff), 0); -} - -static inline __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) -{ - if (*(__be64 *)&ip6h->daddr == p->locator_match) - return p->csum_diff; - else - return compute_csum_diff8((__be32 *)&ip6h->daddr, - (__be32 *)&p->locator); -} - -static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) -{ - __wsum diff; - struct ipv6hdr *ip6h = ipv6_hdr(skb); - size_t nhoff = sizeof(struct ipv6hdr); - - /* First update checksum */ - switch (ip6h->nexthdr) { - case NEXTHDR_TCP: - if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) { - struct tcphdr *th = (struct tcphdr *) - (skb_network_header(skb) + nhoff); - - diff = get_csum_diff(ip6h, p); - inet_proto_csum_replace_by_diff(&th->check, skb, - diff, true); - } - break; - case NEXTHDR_UDP: - if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr)))) { - struct udphdr *uh = (struct udphdr *) - (skb_network_header(skb) + nhoff); - - if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { - diff = get_csum_diff(ip6h, p); - inet_proto_csum_replace_by_diff(&uh->check, skb, - diff, true); - if (!uh->check) - uh->check = CSUM_MANGLED_0; - } - } - break; - case NEXTHDR_ICMP: - if (likely(pskb_may_pull(skb, - nhoff + sizeof(struct icmp6hdr)))) { - struct icmp6hdr *ih = (struct icmp6hdr *) - (skb_network_header(skb) + nhoff); - - diff = get_csum_diff(ip6h, p); - inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb, - diff, true); - } - break; - } - - /* Now change destination address */ - *(__be64 *)&ip6h->daddr = p->locator; -} - -static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - - if (skb->protocol != htons(ETH_P_IPV6)) - goto drop; - - update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); - - return dst->lwtstate->orig_output(net, sk, skb); - -drop: - kfree_skb(skb); - return -EINVAL; -} - -static int ila_input(struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - - if (skb->protocol != htons(ETH_P_IPV6)) - goto drop; - - update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); - - return dst->lwtstate->orig_input(skb); - -drop: - kfree_skb(skb); - return -EINVAL; -} - -static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { - [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, -}; - -static int ila_build_state(struct net_device *dev, struct nlattr *nla, - unsigned int family, const void *cfg, - struct lwtunnel_state **ts) -{ - struct ila_params *p; - struct nlattr *tb[ILA_ATTR_MAX + 1]; - size_t encap_len = sizeof(*p); - struct lwtunnel_state *newts; - const struct fib6_config *cfg6 = cfg; - int ret; - - if (family != AF_INET6) - return -EINVAL; - - ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, - ila_nl_policy); - if (ret < 0) - return ret; - - if (!tb[ILA_ATTR_LOCATOR]) - return -EINVAL; - - newts = lwtunnel_state_alloc(encap_len); - if (!newts) - return -ENOMEM; - - newts->len = encap_len; - p = ila_params_lwtunnel(newts); - - p->locator = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); - - if (cfg6->fc_dst_len > sizeof(__be64)) { - /* Precompute checksum difference for translation since we - * know both the old locator and the new one. - */ - p->locator_match = *(__be64 *)&cfg6->fc_dst; - p->csum_diff = compute_csum_diff8( - (__be32 *)&p->locator_match, (__be32 *)&p->locator); - } - - newts->type = LWTUNNEL_ENCAP_ILA; - newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | - LWTUNNEL_STATE_INPUT_REDIRECT; - - *ts = newts; - - return 0; -} - -static int ila_fill_encap_info(struct sk_buff *skb, - struct lwtunnel_state *lwtstate) -{ - struct ila_params *p = ila_params_lwtunnel(lwtstate); - - if (nla_put_u64(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator)) - goto nla_put_failure; - - return 0; - -nla_put_failure: - return -EMSGSIZE; -} - -static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) -{ - /* No encapsulation overhead */ - return 0; -} - -static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) -{ - struct ila_params *a_p = ila_params_lwtunnel(a); - struct ila_params *b_p = ila_params_lwtunnel(b); - - return (a_p->locator != b_p->locator); -} - -static const struct lwtunnel_encap_ops ila_encap_ops = { - .build_state = ila_build_state, - .output = ila_output, - .input = ila_input, - .fill_encap = ila_fill_encap_info, - .get_encap_size = ila_encap_nlsize, - .cmp_encap = ila_encap_cmp, -}; - -static int __init ila_init(void) -{ - return lwtunnel_encap_add_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA); -} - -static void __exit ila_fini(void) -{ - lwtunnel_encap_del_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA); -} - -module_init(ila_init); -module_exit(ila_fini); -MODULE_AUTHOR("Tom Herbert "); -MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ila/Makefile b/net/ipv6/ila/Makefile new file mode 100644 index 0000000..31d136b --- /dev/null +++ b/net/ipv6/ila/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for ILA module +# + +obj-$(CONFIG_IPV6_ILA) += ila.o + +ila-objs := ila_common.o ila_lwt.o diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h new file mode 100644 index 0000000..b94081f --- /dev/null +++ b/net/ipv6/ila/ila.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2015 Tom Herbert + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + */ + +#ifndef __ILA_H +#define __ILA_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct ila_params { + __be64 locator; + __be64 locator_match; + __wsum csum_diff; +}; + +static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) +{ + __be32 diff[] = { + ~from[0], ~from[1], to[0], to[1], + }; + + return csum_partial(diff, sizeof(diff), 0); +} + +void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p); + +int ila_lwt_init(void); +void ila_lwt_fini(void); + +#endif /* __ILA_H */ diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c new file mode 100644 index 0000000..64e1904 --- /dev/null +++ b/net/ipv6/ila/ila_common.c @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ila.h" + +static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) +{ + if (*(__be64 *)&ip6h->daddr == p->locator_match) + return p->csum_diff; + else + return compute_csum_diff8((__be32 *)&ip6h->daddr, + (__be32 *)&p->locator); +} + +void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) +{ + __wsum diff; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + size_t nhoff = sizeof(struct ipv6hdr); + + /* First update checksum */ + switch (ip6h->nexthdr) { + case NEXTHDR_TCP: + if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) { + struct tcphdr *th = (struct tcphdr *) + (skb_network_header(skb) + nhoff); + + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&th->check, skb, + diff, true); + } + break; + case NEXTHDR_UDP: + if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr)))) { + struct udphdr *uh = (struct udphdr *) + (skb_network_header(skb) + nhoff); + + if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&uh->check, skb, + diff, true); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + } + break; + case NEXTHDR_ICMP: + if (likely(pskb_may_pull(skb, + nhoff + sizeof(struct icmp6hdr)))) { + struct icmp6hdr *ih = (struct icmp6hdr *) + (skb_network_header(skb) + nhoff); + + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb, + diff, true); + } + break; + } + + /* Now change destination address */ + *(__be64 *)&ip6h->daddr = p->locator; +} + +static int __init ila_init(void) +{ + int ret; + + ret = ila_lwt_init(); + + if (ret) + goto fail_lwt; + +fail_lwt: + return ret; +} + +static void __exit ila_fini(void) +{ + ila_lwt_fini(); +} + +module_init(ila_init); +module_exit(ila_fini); +MODULE_AUTHOR("Tom Herbert "); +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c new file mode 100644 index 0000000..2ae3c4f --- /dev/null +++ b/net/ipv6/ila/ila_lwt.c @@ -0,0 +1,152 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ila.h" + +static inline struct ila_params *ila_params_lwtunnel( + struct lwtunnel_state *lwstate) +{ + return (struct ila_params *)lwstate->data; +} + +static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + if (skb->protocol != htons(ETH_P_IPV6)) + goto drop; + + update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); + + return dst->lwtstate->orig_output(net, sk, skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int ila_input(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + if (skb->protocol != htons(ETH_P_IPV6)) + goto drop; + + update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); + + return dst->lwtstate->orig_input(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { + [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, +}; + +static int ila_build_state(struct net_device *dev, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts) +{ + struct ila_params *p; + struct nlattr *tb[ILA_ATTR_MAX + 1]; + size_t encap_len = sizeof(*p); + struct lwtunnel_state *newts; + const struct fib6_config *cfg6 = cfg; + int ret; + + if (family != AF_INET6) + return -EINVAL; + + ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, + ila_nl_policy); + if (ret < 0) + return ret; + + if (!tb[ILA_ATTR_LOCATOR]) + return -EINVAL; + + newts = lwtunnel_state_alloc(encap_len); + if (!newts) + return -ENOMEM; + + newts->len = encap_len; + p = ila_params_lwtunnel(newts); + + p->locator = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); + + if (cfg6->fc_dst_len > sizeof(__be64)) { + /* Precompute checksum difference for translation since we + * know both the old locator and the new one. + */ + p->locator_match = *(__be64 *)&cfg6->fc_dst; + p->csum_diff = compute_csum_diff8( + (__be32 *)&p->locator_match, (__be32 *)&p->locator); + } + + newts->type = LWTUNNEL_ENCAP_ILA; + newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | + LWTUNNEL_STATE_INPUT_REDIRECT; + + *ts = newts; + + return 0; +} + +static int ila_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct ila_params *p = ila_params_lwtunnel(lwtstate); + + if (nla_put_u64(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + /* No encapsulation overhead */ + return 0; +} + +static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct ila_params *a_p = ila_params_lwtunnel(a); + struct ila_params *b_p = ila_params_lwtunnel(b); + + return (a_p->locator != b_p->locator); +} + +static const struct lwtunnel_encap_ops ila_encap_ops = { + .build_state = ila_build_state, + .output = ila_output, + .input = ila_input, + .fill_encap = ila_fill_encap_info, + .get_encap_size = ila_encap_nlsize, + .cmp_encap = ila_encap_cmp, +}; + +int ila_lwt_init(void) +{ + return lwtunnel_encap_add_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA); +} + +void ila_lwt_fini(void) +{ + lwtunnel_encap_del_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA); +} -- cgit v1.1 From fc9e50f5a5a4e1fa9ba2756f745a13e693cf6a06 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 15 Dec 2015 15:41:37 -0800 Subject: netlink: add a start callback for starting a netlink dump The start callback allows the caller to set up a context for the dump callbacks. Presumably, the context can then be destroyed in the done callback. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 4 ++++ net/netlink/genetlink.c | 16 ++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 59651af..81dc1bb 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2915,6 +2915,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb = &nlk->cb; memset(cb, 0, sizeof(*cb)); + cb->start = control->start; cb->dump = control->dump; cb->done = control->done; cb->nlh = nlh; @@ -2927,6 +2928,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, mutex_unlock(nlk->cb_mutex); + if (cb->start) + cb->start(cb); + ret = netlink_dump(sk); sock_put(sk); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index bc0e504..8e63662 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -513,6 +513,20 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, } EXPORT_SYMBOL(genlmsg_put); +static int genl_lock_start(struct netlink_callback *cb) +{ + /* our ops are always const - netlink API doesn't propagate that */ + const struct genl_ops *ops = cb->data; + int rc = 0; + + if (ops->start) { + genl_lock(); + rc = ops->start(cb); + genl_unlock(); + } + return rc; +} + static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { /* our ops are always const - netlink API doesn't propagate that */ @@ -577,6 +591,7 @@ static int genl_family_rcv_msg(struct genl_family *family, .module = family->module, /* we have const, but the netlink API doesn't */ .data = (void *)ops, + .start = genl_lock_start, .dump = genl_lock_dumpit, .done = genl_lock_done, }; @@ -588,6 +603,7 @@ static int genl_family_rcv_msg(struct genl_family *family, } else { struct netlink_dump_control c = { .module = family->module, + .start = ops->start, .dump = ops->dumpit, .done = ops->done, }; -- cgit v1.1 From 7f00feaf107645d95a6d87e99b4d141ac0a08efd Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 15 Dec 2015 15:41:38 -0800 Subject: ila: Add generic ILA translation facility This patch implements an ILA tanslation table. This table can be configured with identifier to locator mappings, and can be be queried to resolve a mapping. Queries can be parameterized based on interface, direction (incoming or outoing), and matching locator. The table is implemented using rhashtable and is configured via netlink (through "ip ila .." in iproute). The table may be used as alternative means to do do ILA tanslations other than the lw tunnels Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv6/ila/Makefile | 2 +- net/ipv6/ila/ila.h | 2 + net/ipv6/ila/ila_common.c | 8 + net/ipv6/ila/ila_xlat.c | 680 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 691 insertions(+), 1 deletion(-) create mode 100644 net/ipv6/ila/ila_xlat.c (limited to 'net') diff --git a/net/ipv6/ila/Makefile b/net/ipv6/ila/Makefile index 31d136b..4b32e59 100644 --- a/net/ipv6/ila/Makefile +++ b/net/ipv6/ila/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_IPV6_ILA) += ila.o -ila-objs := ila_common.o ila_lwt.o +ila-objs := ila_common.o ila_lwt.o ila_xlat.o diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h index b94081f..28542cb 100644 --- a/net/ipv6/ila/ila.h +++ b/net/ipv6/ila/ila.h @@ -42,5 +42,7 @@ void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p); int ila_lwt_init(void); void ila_lwt_fini(void); +int ila_xlat_init(void); +void ila_xlat_fini(void); #endif /* __ILA_H */ diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index 64e1904..32dc9aa 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -80,12 +80,20 @@ static int __init ila_init(void) if (ret) goto fail_lwt; + ret = ila_xlat_init(); + if (ret) + goto fail_xlat; + + return 0; +fail_xlat: + ila_lwt_fini(); fail_lwt: return ret; } static void __exit ila_fini(void) { + ila_xlat_fini(); ila_lwt_fini(); } diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c new file mode 100644 index 0000000..295ca29 --- /dev/null +++ b/net/ipv6/ila/ila_xlat.c @@ -0,0 +1,680 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ila.h" + +struct ila_xlat_params { + struct ila_params ip; + __be64 identifier; + int ifindex; + unsigned int dir; +}; + +struct ila_map { + struct ila_xlat_params p; + struct rhash_head node; + struct ila_map __rcu *next; + struct rcu_head rcu; +}; + +static unsigned int ila_net_id; + +struct ila_net { + struct rhashtable rhash_table; + spinlock_t *locks; /* Bucket locks for entry manipulation */ + unsigned int locks_mask; + bool hooks_registered; +}; + +#define LOCKS_PER_CPU 10 + +static int alloc_ila_locks(struct ila_net *ilan) +{ + unsigned int i, size; + unsigned int nr_pcpus = num_possible_cpus(); + + nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL); + size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU); + + if (sizeof(spinlock_t) != 0) { +#ifdef CONFIG_NUMA + if (size * sizeof(spinlock_t) > PAGE_SIZE) + ilan->locks = vmalloc(size * sizeof(spinlock_t)); + else +#endif + ilan->locks = kmalloc_array(size, sizeof(spinlock_t), + GFP_KERNEL); + if (!ilan->locks) + return -ENOMEM; + for (i = 0; i < size; i++) + spin_lock_init(&ilan->locks[i]); + } + ilan->locks_mask = size - 1; + + return 0; +} + +static u32 hashrnd __read_mostly; +static __always_inline void __ila_hash_secret_init(void) +{ + net_get_random_once(&hashrnd, sizeof(hashrnd)); +} + +static inline u32 ila_identifier_hash(__be64 identifier) +{ + u32 *v = (u32 *)&identifier; + + return jhash_2words(v[0], v[1], hashrnd); +} + +static inline spinlock_t *ila_get_lock(struct ila_net *ilan, __be64 identifier) +{ + return &ilan->locks[ila_identifier_hash(identifier) & ilan->locks_mask]; +} + +static inline int ila_cmp_wildcards(struct ila_map *ila, __be64 loc, + int ifindex, unsigned int dir) +{ + return (ila->p.ip.locator_match && ila->p.ip.locator_match != loc) || + (ila->p.ifindex && ila->p.ifindex != ifindex) || + !(ila->p.dir & dir); +} + +static inline int ila_cmp_params(struct ila_map *ila, struct ila_xlat_params *p) +{ + return (ila->p.ip.locator_match != p->ip.locator_match) || + (ila->p.ifindex != p->ifindex) || + (ila->p.dir != p->dir); +} + +static int ila_cmpfn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct ila_map *ila = obj; + + return (ila->p.identifier != *(__be64 *)arg->key); +} + +static inline int ila_order(struct ila_map *ila) +{ + int score = 0; + + if (ila->p.ip.locator_match) + score += 1 << 0; + + if (ila->p.ifindex) + score += 1 << 1; + + return score; +} + +static const struct rhashtable_params rht_params = { + .nelem_hint = 1024, + .head_offset = offsetof(struct ila_map, node), + .key_offset = offsetof(struct ila_map, p.identifier), + .key_len = sizeof(u64), /* identifier */ + .max_size = 1048576, + .min_size = 256, + .automatic_shrinking = true, + .obj_cmpfn = ila_cmpfn, +}; + +static struct genl_family ila_nl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = ILA_GENL_NAME, + .version = ILA_GENL_VERSION, + .maxattr = ILA_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, +}; + +static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { + [ILA_ATTR_IDENTIFIER] = { .type = NLA_U64, }, + [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, + [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, + [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, + [ILA_ATTR_DIR] = { .type = NLA_U32, }, +}; + +static int parse_nl_config(struct genl_info *info, + struct ila_xlat_params *p) +{ + memset(p, 0, sizeof(*p)); + + if (info->attrs[ILA_ATTR_IDENTIFIER]) + p->identifier = (__force __be64)nla_get_u64( + info->attrs[ILA_ATTR_IDENTIFIER]); + + if (info->attrs[ILA_ATTR_LOCATOR]) + p->ip.locator = (__force __be64)nla_get_u64( + info->attrs[ILA_ATTR_LOCATOR]); + + if (info->attrs[ILA_ATTR_LOCATOR_MATCH]) + p->ip.locator_match = (__force __be64)nla_get_u64( + info->attrs[ILA_ATTR_LOCATOR_MATCH]); + + if (info->attrs[ILA_ATTR_IFINDEX]) + p->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); + + if (info->attrs[ILA_ATTR_DIR]) + p->dir = nla_get_u32(info->attrs[ILA_ATTR_DIR]); + + return 0; +} + +/* Must be called with rcu readlock */ +static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc, + int ifindex, + unsigned int dir, + struct ila_net *ilan) +{ + struct ila_map *ila; + + ila = rhashtable_lookup_fast(&ilan->rhash_table, &id, rht_params); + while (ila) { + if (!ila_cmp_wildcards(ila, loc, ifindex, dir)) + return ila; + ila = rcu_access_pointer(ila->next); + } + + return NULL; +} + +/* Must be called with rcu readlock */ +static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *p, + struct ila_net *ilan) +{ + struct ila_map *ila; + + ila = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier, + rht_params); + while (ila) { + if (!ila_cmp_params(ila, p)) + return ila; + ila = rcu_access_pointer(ila->next); + } + + return NULL; +} + +static inline void ila_release(struct ila_map *ila) +{ + kfree_rcu(ila, rcu); +} + +static void ila_free_cb(void *ptr, void *arg) +{ + struct ila_map *ila = (struct ila_map *)ptr, *next; + + /* Assume rcu_readlock held */ + while (ila) { + next = rcu_access_pointer(ila->next); + ila_release(ila); + ila = next; + } +} + +static int ila_xlat_addr(struct sk_buff *skb, int dir); + +static unsigned int +ila_nf_input(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + ila_xlat_addr(skb, ILA_DIR_IN); + return NF_ACCEPT; +} + +static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = { + { + .hook = ila_nf_input, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_PRE_ROUTING, + .priority = -1, + }, +}; + +static int ila_add_mapping(struct net *net, struct ila_xlat_params *p) +{ + struct ila_net *ilan = net_generic(net, ila_net_id); + struct ila_map *ila, *head; + spinlock_t *lock = ila_get_lock(ilan, p->identifier); + int err = 0, order; + + if (!ilan->hooks_registered) { + /* We defer registering net hooks in the namespace until the + * first mapping is added. + */ + err = nf_register_net_hooks(net, ila_nf_hook_ops, + ARRAY_SIZE(ila_nf_hook_ops)); + if (err) + return err; + + ilan->hooks_registered = true; + } + + ila = kzalloc(sizeof(*ila), GFP_KERNEL); + if (!ila) + return -ENOMEM; + + ila->p = *p; + + if (p->ip.locator_match) { + /* Precompute checksum difference for translation since we + * know both the old identifier and the new one. + */ + ila->p.ip.csum_diff = compute_csum_diff8( + (__be32 *)&p->ip.locator_match, + (__be32 *)&p->ip.locator); + } + + order = ila_order(ila); + + spin_lock(lock); + + head = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier, + rht_params); + if (!head) { + /* New entry for the rhash_table */ + err = rhashtable_lookup_insert_fast(&ilan->rhash_table, + &ila->node, rht_params); + } else { + struct ila_map *tila = head, *prev = NULL; + + do { + if (!ila_cmp_params(tila, p)) { + err = -EEXIST; + goto out; + } + + if (order > ila_order(tila)) + break; + + prev = tila; + tila = rcu_dereference_protected(tila->next, + lockdep_is_held(lock)); + } while (tila); + + if (prev) { + /* Insert in sub list of head */ + RCU_INIT_POINTER(ila->next, tila); + rcu_assign_pointer(prev->next, ila); + } else { + /* Make this ila new head */ + RCU_INIT_POINTER(ila->next, head); + err = rhashtable_replace_fast(&ilan->rhash_table, + &head->node, + &ila->node, rht_params); + if (err) + goto out; + } + } + +out: + spin_unlock(lock); + + if (err) + kfree(ila); + + return err; +} + +static int ila_del_mapping(struct net *net, struct ila_xlat_params *p) +{ + struct ila_net *ilan = net_generic(net, ila_net_id); + struct ila_map *ila, *head, *prev; + spinlock_t *lock = ila_get_lock(ilan, p->identifier); + int err = -ENOENT; + + spin_lock(lock); + + head = rhashtable_lookup_fast(&ilan->rhash_table, + &p->identifier, rht_params); + ila = head; + + prev = NULL; + + while (ila) { + if (ila_cmp_params(ila, p)) { + prev = ila; + ila = rcu_dereference_protected(ila->next, + lockdep_is_held(lock)); + continue; + } + + err = 0; + + if (prev) { + /* Not head, just delete from list */ + rcu_assign_pointer(prev->next, ila->next); + } else { + /* It is the head. If there is something in the + * sublist we need to make a new head. + */ + head = rcu_dereference_protected(ila->next, + lockdep_is_held(lock)); + if (head) { + /* Put first entry in the sublist into the + * table + */ + err = rhashtable_replace_fast( + &ilan->rhash_table, &ila->node, + &head->node, rht_params); + if (err) + goto out; + } else { + /* Entry no longer used */ + err = rhashtable_remove_fast(&ilan->rhash_table, + &ila->node, + rht_params); + } + } + + ila_release(ila); + + break; + } + +out: + spin_unlock(lock); + + return err; +} + +static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct ila_xlat_params p; + int err; + + err = parse_nl_config(info, &p); + if (err) + return err; + + return ila_add_mapping(net, &p); +} + +static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct ila_xlat_params p; + int err; + + err = parse_nl_config(info, &p); + if (err) + return err; + + ila_del_mapping(net, &p); + + return 0; +} + +static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg) +{ + if (nla_put_u64(msg, ILA_ATTR_IDENTIFIER, + (__force u64)ila->p.identifier) || + nla_put_u64(msg, ILA_ATTR_LOCATOR, + (__force u64)ila->p.ip.locator) || + nla_put_u64(msg, ILA_ATTR_LOCATOR_MATCH, + (__force u64)ila->p.ip.locator_match) || + nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->p.ifindex) || + nla_put_u32(msg, ILA_ATTR_DIR, ila->p.dir)) + return -1; + + return 0; +} + +static int ila_dump_info(struct ila_map *ila, + u32 portid, u32 seq, u32 flags, + struct sk_buff *skb, u8 cmd) +{ + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &ila_nl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + if (ila_fill_info(ila, skb) < 0) + goto nla_put_failure; + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct ila_net *ilan = net_generic(net, ila_net_id); + struct sk_buff *msg; + struct ila_xlat_params p; + struct ila_map *ila; + int ret; + + ret = parse_nl_config(info, &p); + if (ret) + return ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + rcu_read_lock(); + + ila = ila_lookup_by_params(&p, ilan); + if (ila) { + ret = ila_dump_info(ila, + info->snd_portid, + info->snd_seq, 0, msg, + info->genlhdr->cmd); + } + + rcu_read_unlock(); + + if (ret < 0) + goto out_free; + + return genlmsg_reply(msg, info); + +out_free: + nlmsg_free(msg); + return ret; +} + +struct ila_dump_iter { + struct rhashtable_iter rhiter; +}; + +static int ila_nl_dump_start(struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct ila_net *ilan = net_generic(net, ila_net_id); + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + + return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter); +} + +static int ila_nl_dump_done(struct netlink_callback *cb) +{ + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + + rhashtable_walk_exit(&iter->rhiter); + + return 0; +} + +static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + struct rhashtable_iter *rhiter = &iter->rhiter; + struct ila_map *ila; + int ret; + + ret = rhashtable_walk_start(rhiter); + if (ret && ret != -EAGAIN) + goto done; + + for (;;) { + ila = rhashtable_walk_next(rhiter); + + if (IS_ERR(ila)) { + if (PTR_ERR(ila) == -EAGAIN) + continue; + ret = PTR_ERR(ila); + goto done; + } else if (!ila) { + break; + } + + while (ila) { + ret = ila_dump_info(ila, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + skb, ILA_CMD_GET); + if (ret) + goto done; + + ila = rcu_access_pointer(ila->next); + } + } + + ret = skb->len; + +done: + rhashtable_walk_stop(rhiter); + return ret; +} + +static const struct genl_ops ila_nl_ops[] = { + { + .cmd = ILA_CMD_ADD, + .doit = ila_nl_cmd_add_mapping, + .policy = ila_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = ILA_CMD_DEL, + .doit = ila_nl_cmd_del_mapping, + .policy = ila_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = ILA_CMD_GET, + .doit = ila_nl_cmd_get_mapping, + .start = ila_nl_dump_start, + .dumpit = ila_nl_dump, + .done = ila_nl_dump_done, + .policy = ila_nl_policy, + }, +}; + +#define ILA_HASH_TABLE_SIZE 1024 + +static __net_init int ila_init_net(struct net *net) +{ + int err; + struct ila_net *ilan = net_generic(net, ila_net_id); + + err = alloc_ila_locks(ilan); + if (err) + return err; + + rhashtable_init(&ilan->rhash_table, &rht_params); + + return 0; +} + +static __net_exit void ila_exit_net(struct net *net) +{ + struct ila_net *ilan = net_generic(net, ila_net_id); + + rhashtable_free_and_destroy(&ilan->rhash_table, ila_free_cb, NULL); + + kvfree(ilan->locks); + + if (ilan->hooks_registered) + nf_unregister_net_hooks(net, ila_nf_hook_ops, + ARRAY_SIZE(ila_nf_hook_ops)); +} + +static struct pernet_operations ila_net_ops = { + .init = ila_init_net, + .exit = ila_exit_net, + .id = &ila_net_id, + .size = sizeof(struct ila_net), +}; + +static int ila_xlat_addr(struct sk_buff *skb, int dir) +{ + struct ila_map *ila; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct net *net = dev_net(skb->dev); + struct ila_net *ilan = net_generic(net, ila_net_id); + __be64 identifier, locator_match; + size_t nhoff; + + /* Assumes skb contains a valid IPv6 header that is pulled */ + + identifier = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[8]; + locator_match = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[0]; + nhoff = sizeof(struct ipv6hdr); + + rcu_read_lock(); + + ila = ila_lookup_wildcards(identifier, locator_match, + skb->dev->ifindex, dir, ilan); + if (ila) + update_ipv6_locator(skb, &ila->p.ip); + + rcu_read_unlock(); + + return 0; +} + +int ila_xlat_incoming(struct sk_buff *skb) +{ + return ila_xlat_addr(skb, ILA_DIR_IN); +} +EXPORT_SYMBOL(ila_xlat_incoming); + +int ila_xlat_outgoing(struct sk_buff *skb) +{ + return ila_xlat_addr(skb, ILA_DIR_OUT); +} +EXPORT_SYMBOL(ila_xlat_outgoing); + +int ila_xlat_init(void) +{ + int ret; + + ret = register_pernet_device(&ila_net_ops); + if (ret) + goto exit; + + ret = genl_register_family_with_ops(&ila_nl_family, + ila_nl_ops); + if (ret < 0) + goto unregister; + + return 0; + +unregister: + unregister_pernet_device(&ila_net_ops); +exit: + return ret; +} + +void ila_xlat_fini(void) +{ + genl_unregister_family(&ila_nl_family); + unregister_pernet_device(&ila_net_ops); +} -- cgit v1.1 From b613f56ec9baf30edf5d9d607b822532a273dad7 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:02 +0900 Subject: net: diag: split inet_diag_dump_one_icsk into two Currently, inet_diag_dump_one_icsk finds a socket and then dumps its information to userspace. Split it into a part that finds the socket and a part that dumps the information. Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index ab9f8a6..cfabb8f 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -350,17 +350,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, nlmsg_flags, unlh); } -int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct sk_buff *in_skb, - const struct nlmsghdr *nlh, - const struct inet_diag_req_v2 *req) +struct sock *inet_diag_find_one_icsk(struct net *net, + struct inet_hashinfo *hashinfo, + const struct inet_diag_req_v2 *req) { - struct net *net = sock_net(in_skb->sk); - struct sk_buff *rep; struct sock *sk; - int err; - err = -EINVAL; if (req->sdiag_family == AF_INET) sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], @@ -375,15 +370,33 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, req->id.idiag_if); #endif else - goto out_nosk; + return ERR_PTR(-EINVAL); - err = -ENOENT; if (!sk) - goto out_nosk; + return ERR_PTR(-ENOENT); - err = sock_diag_check_cookie(sk, req->id.idiag_cookie); - if (err) - goto out; + if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { + sock_gen_put(sk); + return ERR_PTR(-ENOENT); + } + + return sk; +} +EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); + +int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, + struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sk_buff *rep; + struct sock *sk; + int err; + + sk = inet_diag_find_one_icsk(net, hashinfo, req); + if (IS_ERR(sk)) + return PTR_ERR(sk); rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL); if (!rep) { @@ -409,7 +422,6 @@ out: if (sk) sock_gen_put(sk); -out_nosk: return err; } EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); -- cgit v1.1 From 64be0aed59ad519d6f2160868734f7e278290ac1 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:03 +0900 Subject: net: diag: Add the ability to destroy a socket. This patch adds a SOCK_DESTROY operation, a destroy function pointer to sock_diag_handler, and a diag_destroy function pointer. It does not include any implementation code. Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock_diag.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 0c1d58d..a996ce8 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -214,7 +214,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld) } EXPORT_SYMBOL_GPL(sock_diag_unregister); -static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) { int err; struct sock_diag_req *req = nlmsg_data(nlh); @@ -234,8 +234,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) hndl = sock_diag_handlers[req->sdiag_family]; if (hndl == NULL) err = -ENOENT; - else + else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY) err = hndl->dump(skb, nlh); + else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy) + err = hndl->destroy(skb, nlh); + else + err = -EOPNOTSUPP; mutex_unlock(&sock_diag_table_mutex); return err; @@ -261,7 +265,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return ret; case SOCK_DIAG_BY_FAMILY: - return __sock_diag_rcv_msg(skb, nlh); + case SOCK_DESTROY: + return __sock_diag_cmd(skb, nlh); default: return -EINVAL; } @@ -295,6 +300,18 @@ static int sock_diag_bind(struct net *net, int group) return 0; } +int sock_diag_destroy(struct sock *sk, int err) +{ + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (!sk->sk_prot->diag_destroy) + return -EOPNOTSUPP; + + return sk->sk_prot->diag_destroy(sk, err); +} +EXPORT_SYMBOL_GPL(sock_diag_destroy); + static int __net_init diag_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { -- cgit v1.1 From 6eb5d2e08f071c05ecbe135369c9ad418826cab2 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:04 +0900 Subject: net: diag: Support SOCK_DESTROY for inet sockets. This passes the SOCK_DESTROY operation to the underlying protocol diag handler, or returns -EOPNOTSUPP if that handler does not define a destroy operation. Most of this patch is just renaming functions. This is not strictly necessary, but it would be fairly counterintuitive to have the code to destroy inet sockets be in a function whose name starts with inet_diag_get. Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index cfabb8f..8bb8e7a 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -426,7 +426,7 @@ out: } EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); -static int inet_diag_get_exact(struct sk_buff *in_skb, +static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, const struct nlmsghdr *nlh, const struct inet_diag_req_v2 *req) { @@ -436,8 +436,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, handler = inet_diag_lock_handler(req->sdiag_protocol); if (IS_ERR(handler)) err = PTR_ERR(handler); - else + else if (cmd == SOCK_DIAG_BY_FAMILY) err = handler->dump_one(in_skb, nlh, req); + else if (cmd == SOCK_DESTROY && handler->destroy) + err = handler->destroy(in_skb, req); + else + err = -EOPNOTSUPP; inet_diag_unlock_handler(handler); return err; @@ -950,7 +954,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, req.idiag_states = rc->idiag_states; req.id = rc->id; - return inet_diag_get_exact(in_skb, nlh, &req); + return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req); } static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -984,7 +988,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) return inet_diag_get_exact_compat(skb, nlh); } -static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct inet_diag_req_v2); struct net *net = sock_net(skb->sk); @@ -992,7 +996,8 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) if (nlmsg_len(h) < hdrlen) return -EINVAL; - if (h->nlmsg_flags & NLM_F_DUMP) { + if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && + h->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(h, hdrlen)) { struct nlattr *attr; @@ -1011,7 +1016,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) } } - return inet_diag_get_exact(skb, h, nlmsg_data(h)); + return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h)); } static @@ -1062,14 +1067,16 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) static const struct sock_diag_handler inet_diag_handler = { .family = AF_INET, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, .get_info = inet_diag_handler_get_info, + .destroy = inet_diag_handler_cmd, }; static const struct sock_diag_handler inet6_diag_handler = { .family = AF_INET6, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, .get_info = inet_diag_handler_get_info, + .destroy = inet_diag_handler_cmd, }; int inet_diag_register(const struct inet_diag_handler *h) -- cgit v1.1 From c1e64e298b8cad309091b95d8436a0255c84f54a Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:05 +0900 Subject: net: diag: Support destroying TCP sockets. This implements SOCK_DESTROY for TCP sockets. It causes all blocking calls on the socket to fail fast with ECONNABORTED and causes a protocol close of the socket. It informs the other end of the connection by sending a RST, i.e., initiating a TCP ABORT as per RFC 793. ECONNABORTED was chosen for consistency with FreeBSD. Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 13 +++++++++++++ net/ipv4/tcp.c | 32 ++++++++++++++++++++++++++++++++ net/ipv4/tcp_diag.c | 19 +++++++++++++++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 5 files changed, 66 insertions(+) (limited to 'net') diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 416dfa0..c229205 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -436,6 +436,19 @@ config INET_UDP_DIAG Support for UDP socket monitoring interface used by the ss tool. If unsure, say Y. +config INET_DIAG_DESTROY + bool "INET: allow privileged process to administratively close sockets" + depends on INET_DIAG + default n + ---help--- + Provides a SOCK_DESTROY operation that allows privileged processes + (e.g., a connection manager or a network administration tool such as + ss) to close sockets opened by other processes. Closing a socket in + this way interrupts any blocking read/write/connect operations on + the socket and causes future socket calls to behave as if the socket + had been disconnected. + If unsure, say N. + menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 92b3e61..2c0e340 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3080,6 +3080,38 @@ void tcp_done(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_done); +int tcp_abort(struct sock *sk, int err) +{ + if (!sk_fullsock(sk)) { + sock_gen_put(sk); + return -EOPNOTSUPP; + } + + /* Don't race with userspace socket closes such as tcp_close. */ + lock_sock(sk); + + /* Don't race with BH socket closes such as inet_csk_listen_stop. */ + local_bh_disable(); + bh_lock_sock(sk); + + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_err = err; + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); + sk->sk_error_report(sk); + if (tcp_need_reset(sk->sk_state)) + tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_done(sk); + } + + bh_unlock_sock(sk); + local_bh_enable(); + release_sock(sk); + sock_put(sk); + return 0; +} +EXPORT_SYMBOL_GPL(tcp_abort); + extern struct tcp_congestion_ops tcp_reno; static __initdata unsigned long thash_entries; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index b316040..4d61093 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -10,6 +10,8 @@ */ #include +#include +#include #include #include @@ -46,12 +48,29 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); } +#ifdef CONFIG_INET_DIAG_DESTROY +static int tcp_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req); + + if (IS_ERR(sk)) + return PTR_ERR(sk); + + return sock_diag_destroy(sk, ECONNABORTED); +} +#endif + static const struct inet_diag_handler tcp_diag_handler = { .dump = tcp_diag_dump, .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, .idiag_type = IPPROTO_TCP, .idiag_info_size = sizeof(struct tcp_info), +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = tcp_diag_destroy, +#endif }; static int __init tcp_diag_init(void) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index db00343..7aa13bd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2342,6 +2342,7 @@ struct proto tcp_prot = { .destroy_cgroup = tcp_destroy_cgroup, .proto_cgroup = tcp_proto_cgroup, #endif + .diag_destroy = tcp_abort, }; EXPORT_SYMBOL(tcp_prot); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c16e3fb..5382c26 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1890,6 +1890,7 @@ struct proto tcpv6_prot = { .proto_cgroup = tcp_proto_cgroup, #endif .clear_sk = tcp_v6_clear_sk, + .diag_destroy = tcp_abort, }; static const struct inet6_protocol tcpv6_protocol = { -- cgit v1.1 From 6857a02af5386e9f5d11734363741dbe6b0a6959 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Dec 2015 15:33:39 -0800 Subject: sctp: use GFP_KERNEL in sctp_init() modules init functions being called from process context, we better use GFP_KERNEL allocations to increase our chances to get these high-order pages we want for SCTP hash tables. This mostly matters if SCTP module is loaded once memory got fragmented. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sctp/protocol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 3d9ea9a..6c2c0ac 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1419,7 +1419,7 @@ static __init int sctp_init(void) if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0) continue; sctp_assoc_hashtable = (struct sctp_hashbucket *) - __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order); + __get_free_pages(GFP_KERNEL | __GFP_NOWARN, order); } while (!sctp_assoc_hashtable && --order > 0); if (!sctp_assoc_hashtable) { pr_err("Failed association hash alloc\n"); @@ -1452,7 +1452,7 @@ static __init int sctp_init(void) if ((sctp_port_hashsize > (64 * 1024)) && order > 0) continue; sctp_port_hashtable = (struct sctp_bind_hashbucket *) - __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order); + __get_free_pages(GFP_KERNEL | __GFP_NOWARN, order); } while (!sctp_port_hashtable && --order > 0); if (!sctp_port_hashtable) { pr_err("Failed bind hash alloc\n"); -- cgit v1.1 From c169c59dd5177de2befcd5aa2cee9a1c8abeff61 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 2 Sep 2015 20:09:56 +0200 Subject: batman-adv: detect local excess vlans in TT request If the local representation of the global TT table of one originator has more VLAN entries than the respective TT update, there is some inconsistency present. By detecting and reporting this inconsistency, the global table gets updated and the excess VLAN will get removed in the process. Reported-by: Alessandro Bolletta Signed-off-by: Simon Wunderlich Acked-by: Antonio Quartulli Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/translation-table.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 4228b10..17822de 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -2411,8 +2411,8 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, { struct batadv_tvlv_tt_vlan_data *tt_vlan_tmp; struct batadv_orig_node_vlan *vlan; + int i, orig_num_vlan; u32 crc; - int i; /* check if each received CRC matches the locally stored one */ for (i = 0; i < num_vlan; i++) { @@ -2438,6 +2438,18 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, return false; } + /* check if any excess VLANs exist locally for the originator + * which are not mentioned in the TVLV from the originator. + */ + rcu_read_lock(); + orig_num_vlan = 0; + hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) + orig_num_vlan++; + rcu_read_unlock(); + + if (orig_num_vlan > num_vlan) + return false; + return true; } -- cgit v1.1 From ad7e2c466d8b0a7056cd248e1df6bb7296e014f7 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 26 Aug 2015 16:33:34 +0200 Subject: batman-adv: unify flags access style in tt global add This should slightly improve readability Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/translation-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 17822de..5cf4311 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1435,7 +1435,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, * TT_CLIENT_WIFI, therefore they have to be copied in the * client entry */ - tt_global_entry->common.flags |= flags; + common->flags |= flags; /* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only * one originator left in the list and we previously received a -- cgit v1.1 From 01f6b5c76a68e294e90a040c651adb90126e802d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 26 Aug 2015 10:31:50 +0200 Subject: batman-adv: Use chain pointer when purging fragments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chain pointer was already created in batadv_frag_purge_orig to make the checks more readable. Just use the chain pointer everywhere instead of having the same dereference + array access in the most lines of this function. Signed-off-by: Sven Eckelmann Acked-by: Martin Hundebøll Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/fragmentation.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 700c96c..20d9282 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -71,14 +71,14 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node, for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) { chain = &orig_node->fragments[i]; - spin_lock_bh(&orig_node->fragments[i].lock); + spin_lock_bh(&chain->lock); if (!check_cb || check_cb(chain)) { - batadv_frag_clear_chain(&orig_node->fragments[i].head); - orig_node->fragments[i].size = 0; + batadv_frag_clear_chain(&chain->head); + chain->size = 0; } - spin_unlock_bh(&orig_node->fragments[i].lock); + spin_unlock_bh(&chain->lock); } } -- cgit v1.1 From c05a57f6fb6f398cde873c5ebe13ae26b3843b7e Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 26 Aug 2015 10:31:51 +0200 Subject: batman-adv: Fix typo 'wether' -> 'whether' Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 45952dc..5dbcb2e 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -908,7 +908,7 @@ end: * appropriate handlers * @bat_priv: the bat priv with all the soft interface information * @tvlv_handler: tvlv callback function handling the tvlv content - * @ogm_source: flag indicating wether the tvlv is an ogm or a unicast packet + * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet * @orig_node: orig node emitting the ogm packet * @src: source mac address of the unicast packet * @dst: destination mac address of the unicast packet @@ -961,7 +961,7 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, * batadv_tvlv_containers_process - parse the given tvlv buffer to call the * appropriate handlers * @bat_priv: the bat priv with all the soft interface information - * @ogm_source: flag indicating wether the tvlv is an ogm or a unicast packet + * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet * @orig_node: orig node emitting the ogm packet * @src: source mac address of the unicast packet * @dst: destination mac address of the unicast packet -- cgit v1.1 From 5a1dd8a4773d4c24e925cc6154826d555a85c370 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Fri, 11 Sep 2015 18:04:13 +0200 Subject: batman-adv: lock crc access in bridge loop avoidance We have found some networks in which nodes were constantly requesting other nodes BLA claim tables to synchronize, just to ask for that again once completed. The reason was that the crc checksum of the asked nodes were out of sync due to missing locking and multiple writes to the same crc checksum when adding/removing entries. Therefore the asked nodes constantly reported the wrong crc, which caused repeating requests. To avoid multiple functions changing a backbone gateways crc entry at the same time, lock it using a spinlock. Signed-off-by: Simon Wunderlich Tested-by: Alfons Name Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bridge_loop_avoidance.c | 35 +++++++++++++++++++++++++++++----- net/batman-adv/types.h | 2 ++ 2 files changed, 32 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 191a702..99dcae3 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -260,7 +260,9 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) } /* all claims gone, initialize CRC */ + spin_lock_bh(&backbone_gw->crc_lock); backbone_gw->crc = BATADV_BLA_CRC_INIT; + spin_unlock_bh(&backbone_gw->crc_lock); } /** @@ -408,6 +410,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, entry->lasttime = jiffies; entry->crc = BATADV_BLA_CRC_INIT; entry->bat_priv = bat_priv; + spin_lock_init(&entry->crc_lock); atomic_set(&entry->request_sent, 0); atomic_set(&entry->wait_periods, 0); ether_addr_copy(entry->orig, orig); @@ -557,7 +560,9 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, __be16 crc; memcpy(mac, batadv_announce_mac, 4); + spin_lock_bh(&backbone_gw->crc_lock); crc = htons(backbone_gw->crc); + spin_unlock_bh(&backbone_gw->crc_lock); memcpy(&mac[4], &crc, 2); batadv_bla_send_claim(bat_priv, mac, backbone_gw->vid, @@ -618,14 +623,18 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, "bla_add_claim(): changing ownership for %pM, vid %d\n", mac, BATADV_PRINT_VID(vid)); + spin_lock_bh(&claim->backbone_gw->crc_lock); claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); + spin_unlock_bh(&claim->backbone_gw->crc_lock); batadv_backbone_gw_free_ref(claim->backbone_gw); } /* set (new) backbone gw */ atomic_inc(&backbone_gw->refcount); claim->backbone_gw = backbone_gw; + spin_lock_bh(&backbone_gw->crc_lock); backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); + spin_unlock_bh(&backbone_gw->crc_lock); backbone_gw->lasttime = jiffies; claim_free_ref: @@ -653,7 +662,9 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, batadv_choose_claim, claim); batadv_claim_free_ref(claim); /* reference from the hash is gone */ + spin_lock_bh(&claim->backbone_gw->crc_lock); claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); + spin_unlock_bh(&claim->backbone_gw->crc_lock); /* don't need the reference from hash_find() anymore */ batadv_claim_free_ref(claim); @@ -664,7 +675,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, u8 *backbone_addr, unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; - u16 crc; + u16 backbone_crc, crc; if (memcmp(an_addr, batadv_announce_mac, 4) != 0) return 0; @@ -683,12 +694,16 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", BATADV_PRINT_VID(vid), backbone_gw->orig, crc); - if (backbone_gw->crc != crc) { + spin_lock_bh(&backbone_gw->crc_lock); + backbone_crc = backbone_gw->crc; + spin_unlock_bh(&backbone_gw->crc_lock); + + if (backbone_crc != crc) { batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", backbone_gw->orig, BATADV_PRINT_VID(backbone_gw->vid), - backbone_gw->crc, crc); + backbone_crc, crc); batadv_bla_send_request(backbone_gw); } else { @@ -1647,6 +1662,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) struct batadv_bla_claim *claim; struct batadv_hard_iface *primary_if; struct hlist_head *head; + u16 backbone_crc; u32 i; bool is_own; u8 *primary_addr; @@ -1669,11 +1685,15 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) hlist_for_each_entry_rcu(claim, head, hash_entry) { is_own = batadv_compare_eth(claim->backbone_gw->orig, primary_addr); + + spin_lock_bh(&claim->backbone_gw->crc_lock); + backbone_crc = claim->backbone_gw->crc; + spin_unlock_bh(&claim->backbone_gw->crc_lock); seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n", claim->addr, BATADV_PRINT_VID(claim->vid), claim->backbone_gw->orig, (is_own ? 'x' : ' '), - claim->backbone_gw->crc); + backbone_crc); } rcu_read_unlock(); } @@ -1692,6 +1712,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) struct batadv_hard_iface *primary_if; struct hlist_head *head; int secs, msecs; + u16 backbone_crc; u32 i; bool is_own; u8 *primary_addr; @@ -1722,10 +1743,14 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) if (is_own) continue; + spin_lock_bh(&backbone_gw->crc_lock); + backbone_crc = backbone_gw->crc; + spin_unlock_bh(&backbone_gw->crc_lock); + seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n", backbone_gw->orig, BATADV_PRINT_VID(backbone_gw->vid), secs, - msecs, backbone_gw->crc); + msecs, backbone_crc); } rcu_read_unlock(); } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 9bdb21c..7c386db 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -906,6 +906,7 @@ struct batadv_socket_packet { * backbone gateway - no bcast traffic is formwared until the situation was * resolved * @crc: crc16 checksum over all claims + * @crc_lock: lock protecting crc * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner */ @@ -919,6 +920,7 @@ struct batadv_bla_backbone_gw { atomic_t wait_periods; atomic_t request_sent; u16 crc; + spinlock_t crc_lock; /* protects crc */ atomic_t refcount; struct rcu_head rcu; }; -- cgit v1.1 From 566178f853c1aa57be9c16007c7cca07df5d51b6 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Wed, 16 Dec 2015 13:55:04 +0800 Subject: net: sctp: dynamically enable or disable pf state As we all know, the value of pf_retrans >= max_retrans_path can disable pf state. The variables of pf_retrans and max_retrans_path can be changed by the userspace application. Sometimes the user expects to disable pf state while the 2 variables are changed to enable pf state. So it is necessary to introduce a new variable to disable pf state. According to the suggestions from Vlad Yasevich, extra1 and extra2 are removed. The initialization of pf_enable is added. Acked-by: Vlad Yasevich Signed-off-by: Zhu Yanjun Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/protocol.c | 3 +++ net/sctp/sm_sideeffect.c | 5 ++++- net/sctp/sysctl.c | 7 +++++++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 6c2c0ac..010aced 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1223,6 +1223,9 @@ static int __net_init sctp_defaults_init(struct net *net) /* Max.Burst - 4 */ net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST; + /* Enable pf state by default */ + net->sctp.pf_enable = 1; + /* Association.Max.Retrans - 10 attempts * Path.Max.Retrans - 5 attempts (per destination address) * Max.Init.Retransmits - 8 attempts diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 6098d4c..05cd164 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -477,6 +477,8 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, struct sctp_transport *transport, int is_hb) { + struct net *net = sock_net(asoc->base.sk); + /* The check for association's overall error counter exceeding the * threshold is done in the state function. */ @@ -503,7 +505,8 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, * is SCTP_ACTIVE, then mark this transport as Partially Failed, * see SCTP Quick Failover Draft, section 5.1 */ - if ((transport->state == SCTP_ACTIVE) && + if (net->sctp.pf_enable && + (transport->state == SCTP_ACTIVE) && (asoc->pf_retrans < transport->pathmaxrxt) && (transport->error_count > asoc->pf_retrans)) { diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 26d50c5..ccbfc93 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -308,6 +308,13 @@ static struct ctl_table sctp_net_table[] = { .extra1 = &max_autoclose_min, .extra2 = &max_autoclose_max, }, + { + .procname = "pf_enable", + .data = &init_net.sctp.pf_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { /* sentinel */ } }; -- cgit v1.1 From b3379041ddf68ba46c31fce741ddb71675b39d23 Mon Sep 17 00:00:00 2001 From: Hubert Sokolowski Date: Tue, 15 Dec 2015 13:20:30 +0000 Subject: net: Pass ndm_state to route netlink FDB notifications. Before this change applications monitoring FDB notifications were not able to determine whether a new FDB entry is permament or not: bridge fdb add f1:f2:f3:f4:f5:f8 dev sw0p1 temp self bridge fdb add f1:f2:f3:f4:f5:f9 dev sw0p1 self bridge monitor fdb f1:f2:f3:f4:f5:f8 dev sw0p1 self permanent f1:f2:f3:f4:f5:f9 dev sw0p1 self permanent With this change ndm_state from the original netlink message is passed to the new netlink message sent as notification. bridge fdb add f1:f2:f3:f4:f5:f6 dev sw0p1 self bridge fdb add f1:f2:f3:f4:f5:f7 dev sw0p1 temp self bridge monitor fdb f1:f2:f3:f4:f5:f6 dev sw0p1 self permanent f1:f2:f3:f4:f5:f7 dev sw0p1 self static Signed-off-by: Hubert Sokolowski Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d8b0113..baf49cb 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2564,7 +2564,7 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb, struct net_device *dev, u8 *addr, u16 vid, u32 pid, u32 seq, int type, unsigned int flags, - int nlflags) + int nlflags, u16 ndm_state) { struct nlmsghdr *nlh; struct ndmsg *ndm; @@ -2580,7 +2580,7 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb, ndm->ndm_flags = flags; ndm->ndm_type = 0; ndm->ndm_ifindex = dev->ifindex; - ndm->ndm_state = NUD_PERMANENT; + ndm->ndm_state = ndm_state; if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) goto nla_put_failure; @@ -2601,7 +2601,8 @@ static inline size_t rtnl_fdb_nlmsg_size(void) return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN); } -static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type) +static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type, + u16 ndm_state) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -2612,7 +2613,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type) goto errout; err = nlmsg_populate_fdb_fill(skb, dev, addr, vid, - 0, 0, type, NTF_SELF, 0); + 0, 0, type, NTF_SELF, 0, ndm_state); if (err < 0) { kfree_skb(skb); goto errout; @@ -2747,7 +2748,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) nlh->nlmsg_flags); if (!err) { - rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH); + rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH, + ndm->ndm_state); ndm->ndm_flags &= ~NTF_SELF; } } @@ -2848,7 +2850,8 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid); if (!err) { - rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH); + rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH, + ndm->ndm_state); ndm->ndm_flags &= ~NTF_SELF; } } @@ -2876,7 +2879,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0, portid, seq, RTM_NEWNEIGH, NTF_SELF, - NLM_F_MULTI); + NLM_F_MULTI, NUD_PERMANENT); if (err < 0) return err; skip: -- cgit v1.1 From 32bc201e1974976b7d3fea9a9b17bb7392ca6394 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 16 Dec 2015 17:50:11 +0800 Subject: ipv6: allow routes to be configured with expire values Add the support for adding expire value to routes, requested by Tom Gundersen for systemd-networkd, and NetworkManager wants it too. implement it by adding the new RTNETLINK attribute RTA_EXPIRES. Signed-off-by: Xin Long Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c83b6a5..3c8834b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2709,6 +2709,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_PREF] = { .type = NLA_U8 }, [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, + [RTA_EXPIRES] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2809,6 +2810,15 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_ENCAP_TYPE]) cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); + if (tb[RTA_EXPIRES]) { + unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ); + + if (addrconf_finite_timeout(timeout)) { + cfg->fc_expires = jiffies_to_clock_t(timeout * HZ); + cfg->fc_flags |= RTF_EXPIRES; + } + } + err = 0; errout: return err; -- cgit v1.1 From 715f504b118998c41a2079a17e16bf5a8a114885 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 16 Dec 2015 17:22:47 +0100 Subject: ipv6: add IPV6_HDRINCL option for raw sockets Same as in Windows, we miss IPV6_HDRINCL for SOL_IPV6 and SOL_RAW. The SOL_IP/IP_HDRINCL is not available for IPv6 sockets. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/raw.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 9914098..fa59dd7 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -972,6 +972,11 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, return -EFAULT; switch (optname) { + case IPV6_HDRINCL: + if (sk->sk_type != SOCK_RAW) + return -EINVAL; + inet_sk(sk)->hdrincl = !!val; + return 0; case IPV6_CHECKSUM: if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 && level == IPPROTO_IPV6) { @@ -1016,7 +1021,8 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, return -EOPNOTSUPP; return rawv6_seticmpfilter(sk, level, optname, optval, optlen); case SOL_IPV6: - if (optname == IPV6_CHECKSUM) + if (optname == IPV6_CHECKSUM || + optname == IPV6_HDRINCL) break; default: return ipv6_setsockopt(sk, level, optname, optval, optlen); @@ -1037,7 +1043,8 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname, return -EOPNOTSUPP; return rawv6_seticmpfilter(sk, level, optname, optval, optlen); case SOL_IPV6: - if (optname == IPV6_CHECKSUM) + if (optname == IPV6_CHECKSUM || + optname == IPV6_HDRINCL) break; default: return compat_ipv6_setsockopt(sk, level, optname, @@ -1057,6 +1064,9 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; switch (optname) { + case IPV6_HDRINCL: + val = inet_sk(sk)->hdrincl; + break; case IPV6_CHECKSUM: /* * We allow getsockopt() for IPPROTO_IPV6-level @@ -1094,7 +1104,8 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, return -EOPNOTSUPP; return rawv6_geticmpfilter(sk, level, optname, optval, optlen); case SOL_IPV6: - if (optname == IPV6_CHECKSUM) + if (optname == IPV6_CHECKSUM || + optname == IPV6_HDRINCL) break; default: return ipv6_getsockopt(sk, level, optname, optval, optlen); @@ -1115,7 +1126,8 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname, return -EOPNOTSUPP; return rawv6_geticmpfilter(sk, level, optname, optval, optlen); case SOL_IPV6: - if (optname == IPV6_CHECKSUM) + if (optname == IPV6_CHECKSUM || + optname == IPV6_HDRINCL) break; default: return compat_ipv6_getsockopt(sk, level, optname, -- cgit v1.1 From b4aae759c22e71a3c32144f0b3bc4f2fa4aaae98 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 10 Dec 2015 18:04:07 +0100 Subject: netfilter: meta: add support for setting skb->pkttype This allows to redirect bridged packets to local machine: ether type ip ether daddr set aa:53:08:12:34:56 meta pkttype set unicast Without 'set unicast', ip stack discards PACKET_OTHERHOST skbs. It is also useful to add support for a '-m cluster like' nft rule (where switch floods packets to several nodes, and each cluster node node processes a subset of packets for load distribution). Mangling is restricted to HOST/OTHER/BROAD/MULTICAST, i.e. you cannot set skb->pkt_type to PACKET_KERNEL or change PACKET_LOOPBACK to PACKET_HOST. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'net') diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 5bcd1b0..fe885bf 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -26,6 +26,8 @@ #include #include +#include /* NF_BR_PRE_ROUTING */ + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -190,6 +192,13 @@ err: } EXPORT_SYMBOL_GPL(nft_meta_get_eval); +/* don't change or set _LOOPBACK, _USER, etc. */ +static bool pkt_type_ok(u32 p) +{ + return p == PACKET_HOST || p == PACKET_BROADCAST || + p == PACKET_MULTICAST || p == PACKET_OTHERHOST; +} + void nft_meta_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -205,6 +214,11 @@ void nft_meta_set_eval(const struct nft_expr *expr, case NFT_META_PRIORITY: skb->priority = value; break; + case NFT_META_PKTTYPE: + if (skb->pkt_type != value && + pkt_type_ok(value) && pkt_type_ok(skb->pkt_type)) + skb->pkt_type = value; + break; case NFT_META_NFTRACE: skb->nf_trace = 1; break; @@ -273,6 +287,24 @@ int nft_meta_get_init(const struct nft_ctx *ctx, } EXPORT_SYMBOL_GPL(nft_meta_get_init); +static int nft_meta_set_init_pkttype(const struct nft_ctx *ctx) +{ + unsigned int hooks; + + switch (ctx->afi->family) { + case NFPROTO_BRIDGE: + hooks = 1 << NF_BR_PRE_ROUTING; + break; + case NFPROTO_NETDEV: + hooks = 1 << NF_NETDEV_INGRESS; + break; + default: + return -EOPNOTSUPP; + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +} + int nft_meta_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -290,6 +322,12 @@ int nft_meta_set_init(const struct nft_ctx *ctx, case NFT_META_NFTRACE: len = sizeof(u8); break; + case NFT_META_PKTTYPE: + err = nft_meta_set_init_pkttype(ctx); + if (err) + return err; + len = sizeof(u8); + break; default: return -EOPNOTSUPP; } -- cgit v1.1 From 8cb964daeba8b626f8fbf779a50635684e42abdb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 18 Dec 2015 15:37:37 +0100 Subject: ila: add NETFILTER dependency The recently added generic ILA translation facility fails to build when CONFIG_NETFILTER is disabled: net/ipv6/ila/ila_xlat.c:229:20: warning: 'struct nf_hook_state' declared inside parameter list net/ipv6/ila/ila_xlat.c:235:27: error: array type has incomplete element type 'struct nf_hook_ops' static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = { This adds an explicit Kconfig dependency to avoid that case. Signed-off-by: Arnd Bergmann Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility") Signed-off-by: David S. Miller --- net/ipv6/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 983bb99..bb7dabe 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -94,6 +94,7 @@ config IPV6_MIP6 config IPV6_ILA tristate "IPv6: Identifier Locator Addressing (ILA)" + depends on NETFILTER select LWTUNNEL ---help--- Support for IPv6 Identifier Locator Addressing (ILA). -- cgit v1.1 From cc9da6cc4f56e05cc9e591459fe0192727ff58b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 16 Dec 2015 16:44:38 +0100 Subject: ipv6: addrconf: use stable address generator for ARPHRD_NONE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new address generator mode, using the stable address generator with an automatically generated secret. This is intended as a default address generator mode for device types with no EUI64 implementation. The new generator is used for ARPHRD_NONE interfaces initially, adding default IPv6 autoconf support to e.g. tun interfaces. If the addrgenmode is set to 'random', either by default or manually, and no stable secret is available, then a random secret is used as input for the stable-privacy address generator. The secret can be read and modified like manually configured secrets, using the proc interface. Modifying the secret will change the addrgen mode to 'stable-privacy' to indicate that it operates on a known secret. Existing behaviour of the 'stable-privacy' mode is kept unchanged. If a known secret is available when the device is created, then the mode will default to 'stable-privacy' as before. The mode can be manually set to 'random' but it will behave exactly like 'stable-privacy' in this case. The secret will not change. Cc: Hannes Frederic Sowa Cc: 吉藤英明 Signed-off-by: Bjørn Mork Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 45 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 233efa6..819b777 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2319,6 +2319,12 @@ static void manage_tempaddrs(struct inet6_dev *idev, } } +static bool is_addr_mode_generate_stable(struct inet6_dev *idev) +{ + return idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY || + idev->addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM; +} + void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) { struct prefix_info *pinfo; @@ -2432,8 +2438,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) in6_dev->token.s6_addr + 8, 8); read_unlock_bh(&in6_dev->lock); tokenized = true; - } else if (in6_dev->addr_gen_mode == - IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + } else if (is_addr_mode_generate_stable(in6_dev) && !ipv6_generate_stable_address(&addr, 0, in6_dev)) { addr_flags |= IFA_F_STABLE_PRIVACY; @@ -3033,6 +3038,17 @@ retry: return 0; } +static void ipv6_gen_mode_random_init(struct inet6_dev *idev) +{ + struct ipv6_stable_secret *s = &idev->cnf.stable_secret; + + if (s->initialized) + return; + s = &idev->cnf.stable_secret; + get_random_bytes(&s->secret, sizeof(s->secret)); + s->initialized = true; +} + static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) { struct in6_addr addr; @@ -3043,13 +3059,18 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) { + switch (idev->addr_gen_mode) { + case IN6_ADDR_GEN_MODE_RANDOM: + ipv6_gen_mode_random_init(idev); + /* fallthrough */ + case IN6_ADDR_GEN_MODE_STABLE_PRIVACY: if (!ipv6_generate_stable_address(&addr, 0, idev)) addrconf_add_linklocal(idev, &addr, IFA_F_STABLE_PRIVACY); else if (prefix_route) addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); - } else if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) { + break; + case IN6_ADDR_GEN_MODE_EUI64: /* addrconf_add_linklocal also adds a prefix_route and we * only need to care about prefix routes if ipv6_generate_eui64 * couldn't generate one. @@ -3058,6 +3079,11 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) addrconf_add_linklocal(idev, &addr, 0); else if (prefix_route) addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); + break; + case IN6_ADDR_GEN_MODE_NONE: + default: + /* will not add any link local address */ + break; } } @@ -3073,7 +3099,8 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_INFINIBAND) && (dev->type != ARPHRD_IEEE1394) && (dev->type != ARPHRD_TUNNEL6) && - (dev->type != ARPHRD_6LOWPAN)) { + (dev->type != ARPHRD_6LOWPAN) && + (dev->type != ARPHRD_NONE)) { /* Alas, we support only Ethernet autoconfiguration. */ return; } @@ -3082,6 +3109,11 @@ static void addrconf_dev_config(struct net_device *dev) if (IS_ERR(idev)) return; + /* this device type has no EUI support */ + if (dev->type == ARPHRD_NONE && + idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) + idev->addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM; + addrconf_addr_gen(idev, false); } @@ -4926,7 +4958,8 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) if (mode != IN6_ADDR_GEN_MODE_EUI64 && mode != IN6_ADDR_GEN_MODE_NONE && - mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY) + mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + mode != IN6_ADDR_GEN_MODE_RANDOM) return -EINVAL; if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY && -- cgit v1.1 From 6dd9a14e92e54895e143f10fef4d0b9abe109aa9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 16 Dec 2015 13:20:44 -0800 Subject: net: Allow accepted sockets to be bound to l3mdev domain Allow accepted sockets to derive their sk_bound_dev_if setting from the l3mdev domain in which the packets originated. A sysctl setting is added to control the behavior which is similar to sk_mark and sysctl_tcp_fwmark_accept. This effectively allow a process to have a "VRF-global" listen socket, with child sockets bound to the VRF device in which the packet originated. A similar behavior can be achieved using sk_mark, but a solution using marks is incomplete as it does not handle duplicate addresses in different L3 domains/VRFs. Allowing sockets to inherit the sk_bound_dev_if from l3mdev domain provides a complete solution. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 4 ++-- net/ipv4/sysctl_net_ipv4.c | 11 +++++++++++ net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 1 + net/ipv6/syncookies.c | 4 ++-- 5 files changed, 17 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 4cbe9f0..643a86c 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -351,7 +351,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack.v64 = 0; treq->tfo_listener = false; - ireq->ir_iif = sk->sk_bound_dev_if; + ireq->ir_iif = inet_request_bound_dev_if(sk, skb); /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -371,7 +371,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark, + flowi4_init_output(&fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a0bd7a5..41ff1f8 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -915,6 +915,17 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_NET_L3_MASTER_DEV + { + .procname = "tcp_l3mdev_accept", + .data = &init_net.ipv4.sysctl_tcp_l3mdev_accept, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, +#endif { .procname = "tcp_mtu_probing", .data = &init_net.ipv4.sysctl_tcp_mtu_probing, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2d656ee..7b1fddc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6204,7 +6204,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_openreq_init(req, &tmp_opt, skb, sk); /* Note: tcp_v6_init_req() might override ir_iif for link locals */ - inet_rsk(req)->ir_iif = sk->sk_bound_dev_if; + inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb); af_ops->init_req(req, sk, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 205e674..46e92fb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1276,6 +1276,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, ireq = inet_rsk(req); sk_daddr_set(newsk, ireq->ir_rmt_addr); sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); + newsk->sk_bound_dev_if = ireq->ir_iif; newinet->inet_saddr = ireq->ir_loc_addr; inet_opt = ireq->opt; rcu_assign_pointer(newinet->inet_opt, inet_opt); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index eaf7ac4..2906ef2 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -193,7 +193,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->pktopts = skb; } - ireq->ir_iif = sk->sk_bound_dev_if; + ireq->ir_iif = inet_request_bound_dev_if(sk, skb); /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) @@ -224,7 +224,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.daddr = ireq->ir_v6_rmt_addr; final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); fl6.saddr = ireq->ir_v6_loc_addr; - fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_oif = ireq->ir_iif; fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; -- cgit v1.1 From 05c74e5e53f6cb07502c3e6a820f33e2777b6605 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 17 Dec 2015 23:51:53 +0100 Subject: bpf: add bpf_skb_load_bytes helper When hacking tc programs with eBPF, one of the issues that come up from time to time is to load addresses from headers. In eBPF as in classic BPF, we have BPF_LD | BPF_ABS | BPF_{B,H,W} instructions that extract a byte, half-word or word out of the skb data though helpers such as bpf_load_pointer() (interpreter case). F.e. extracting a whole IPv6 address could possibly look like ... union v6addr { struct { __u32 p1; __u32 p2; __u32 p3; __u32 p4; }; __u8 addr[16]; }; [...] a.p1 = htonl(load_word(skb, off)); a.p2 = htonl(load_word(skb, off + 4)); a.p3 = htonl(load_word(skb, off + 8)); a.p4 = htonl(load_word(skb, off + 12)); [...] /* access to a.addr[...] */ This work adds a complementary helper bpf_skb_load_bytes() (we also have bpf_skb_store_bytes()) as an alternative where the same call would look like from an eBPF program: ret = bpf_skb_load_bytes(skb, off, addr, sizeof(addr)); Same verifier restrictions apply as in ffeedafbf023 ("bpf: introduce current->pid, tgid, uid, gid, comm accessors") case, where stack memory access needs to be statically verified and thus guaranteed to be initialized in first use (otherwise verifier cannot tell whether a subsequent access to it is valid or not as it's runtime dependent). Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 672eefb..34bf6fc 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1245,6 +1245,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) } #define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) +#define BPF_LDST_LEN 16U static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { @@ -1252,7 +1253,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) int offset = (int) r2; void *from = (void *) (long) r3; unsigned int len = (unsigned int) r4; - char buf[16]; + char buf[BPF_LDST_LEN]; void *ptr; /* bpf verifier guarantees that: @@ -1299,6 +1300,36 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = { .arg5_type = ARG_ANYTHING, }; +static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1; + int offset = (int) r2; + void *to = (void *)(unsigned long) r3; + unsigned int len = (unsigned int) r4; + void *ptr; + + if (unlikely((u32) offset > 0xffff || len > BPF_LDST_LEN)) + return -EFAULT; + + ptr = skb_header_pointer(skb, offset, len, to); + if (unlikely(!ptr)) + return -EFAULT; + if (ptr != to) + memcpy(to, ptr, len); + + return 0; +} + +const struct bpf_func_proto bpf_skb_load_bytes_proto = { + .func = bpf_skb_load_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_STACK, + .arg4_type = ARG_CONST_STACK_SIZE, +}; + #define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f) #define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10) @@ -1654,6 +1685,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) switch (func_id) { case BPF_FUNC_skb_store_bytes: return &bpf_skb_store_bytes_proto; + case BPF_FUNC_skb_load_bytes: + return &bpf_skb_load_bytes_proto; case BPF_FUNC_l3_csum_replace: return &bpf_l3_csum_replace_proto; case BPF_FUNC_l4_csum_replace: -- cgit v1.1 From 8b614aebecdf2b1f72d51b1527f5a75d218b78e2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 17 Dec 2015 23:51:54 +0100 Subject: bpf: move clearing of A/X into classic to eBPF migration prologue Back in the days where eBPF (or back then "internal BPF" ;->) was not exposed to user space, and only the classic BPF programs internally translated into eBPF programs, we missed the fact that for classic BPF A and X needed to be cleared. It was fixed back then via 83d5b7ef99c9 ("net: filter: initialize A and X registers"), and thus classic BPF specifics were added to the eBPF interpreter core to work around it. This added some confusion for JIT developers later on that take the eBPF interpreter code as an example for deriving their JIT. F.e. in f75298f5c3fe ("s390/bpf: clear correct BPF accumulator register"), at least X could leak stack memory. Furthermore, since this is only needed for classic BPF translations and not for eBPF (verifier takes care that read access to regs cannot be done uninitialized), more complexity is added to JITs as they need to determine whether they deal with migrations or native eBPF where they can just omit clearing A/X in their prologue and thus reduce image size a bit, see f.e. cde66c2d88da ("s390/bpf: Only clear A and X for converted BPF programs"). In other cases (x86, arm64), A and X is being cleared in the prologue also for eBPF case, which is unnecessary. Lets move this into the BPF migration in bpf_convert_filter() where it actually belongs as long as the number of eBPF JITs are still few. It can thus be done generically; allowing us to remove the quirk from __bpf_prog_run() and to slightly reduce JIT image size in case of eBPF, while reducing code duplication on this matter in current(/future) eBPF JITs. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Reviewed-by: Michael Holzheu Tested-by: Michael Holzheu Cc: Zi Shen Lim Cc: Yang Shi Acked-by: Yang Shi Acked-by: Zi Shen Lim Signed-off-by: David S. Miller --- net/core/filter.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 34bf6fc..b513eb8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -381,9 +381,22 @@ do_pass: new_insn = new_prog; fp = prog; - if (new_insn) - *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); - new_insn++; + /* Classic BPF related prologue emission. */ + if (new_insn) { + /* Classic BPF expects A and X to be reset first. These need + * to be guaranteed to be the first two instructions. + */ + *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A); + *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X); + + /* All programs must keep CTX in callee saved BPF_REG_CTX. + * In eBPF case it's done by the compiler, here we need to + * do this ourself. Initial CTX is present in BPF_REG_ARG1. + */ + *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1); + } else { + new_insn += 3; + } for (i = 0; i < len; fp++, i++) { struct bpf_insn tmp_insns[6] = { }; -- cgit v1.1 From 23bf88078afdb8f9b8071dd9f32754ebab7ba3dc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 17 Dec 2015 23:51:55 +0100 Subject: bpf: fix misleading comment in bpf_convert_filter Comment says "User BPF's register A is mapped to our BPF register 6", which is actually wrong as the mapping is on register 0. This can already be inferred from the code itself. So just remove it before someone makes assumptions based on that. Only code tells truth. ;) Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index b513eb8..c770196 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -348,12 +348,6 @@ static bool convert_bpf_extensions(struct sock_filter *fp, * jump offsets, 2nd pass remapping: * new_prog = kmalloc(sizeof(struct bpf_insn) * new_len); * bpf_convert_filter(old_prog, old_len, new_prog, &new_len); - * - * User BPF's register A is mapped to our BPF register 6, user BPF - * register X is mapped to BPF register 7; frame pointer is always - * register 10; Context 'void *ctx' is stored in register 1, that is, - * for socket filters: ctx == 'struct sk_buff *', for seccomp: - * ctx == 'struct seccomp_data *'. */ static int bpf_convert_filter(struct sock_filter *prog, int len, struct bpf_insn *new_prog, int *new_len) -- cgit v1.1 From 07f6f4a31e5a8dee67960fc07bb0b37c5f879d4d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Dec 2015 16:14:11 -0800 Subject: tcp: diag: add support for request sockets to tcp_abort() Adding support for SYN_RECV request sockets to tcp_abort() is quite easy after our tcp listener rewrite. Note that we also need to better handle listeners, or we might leak not yet accepted children, because of a missing inet_csk_listen_stop() call. Signed-off-by: Eric Dumazet Cc: Lorenzo Colitti Tested-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2c0e340..cc7aaa5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3083,6 +3083,15 @@ EXPORT_SYMBOL_GPL(tcp_done); int tcp_abort(struct sock *sk, int err) { if (!sk_fullsock(sk)) { + if (sk->sk_state == TCP_NEW_SYN_RECV) { + struct request_sock *req = inet_reqsk(sk); + + local_bh_disable(); + inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, + req); + local_bh_enable(); + return 0; + } sock_gen_put(sk); return -EOPNOTSUPP; } -- cgit v1.1 From 7eb7404f7ee4bf59cb034897ab678aba2755c5e0 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 18 Dec 2015 23:33:25 +0800 Subject: Bluetooth: use list_for_each_entry* Use list_for_each_entry*() instead of list_for_each*() to simplify the code. Signed-off-by: Geliang Tang Signed-off-by: Marcel Holtmann --- net/bluetooth/af_bluetooth.c | 12 ++++++------ net/bluetooth/cmtp/capi.c | 8 ++------ net/bluetooth/hci_core.c | 8 +++----- net/bluetooth/rfcomm/core.c | 46 ++++++++++++++------------------------------ 4 files changed, 25 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index cb4e8d4..955eda9 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -174,13 +174,13 @@ EXPORT_SYMBOL(bt_accept_unlink); struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) { - struct list_head *p, *n; + struct bt_sock *s, *n; struct sock *sk; BT_DBG("parent %p", parent); - list_for_each_safe(p, n, &bt_sk(parent)->accept_q) { - sk = (struct sock *) list_entry(p, struct bt_sock, accept_q); + list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { + sk = (struct sock *)s; lock_sock(sk); @@ -388,11 +388,11 @@ EXPORT_SYMBOL(bt_sock_stream_recvmsg); static inline unsigned int bt_accept_poll(struct sock *parent) { - struct list_head *p, *n; + struct bt_sock *s, *n; struct sock *sk; - list_for_each_safe(p, n, &bt_sk(parent)->accept_q) { - sk = (struct sock *) list_entry(p, struct bt_sock, accept_q); + list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { + sk = (struct sock *)s; if (sk->sk_state == BT_CONNECTED || (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) && sk->sk_state == BT_CONNECT2)) diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 9a503387..46ac686 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -100,10 +100,8 @@ static void cmtp_application_del(struct cmtp_session *session, struct cmtp_appli static struct cmtp_application *cmtp_application_get(struct cmtp_session *session, int pattern, __u16 value) { struct cmtp_application *app; - struct list_head *p; - list_for_each(p, &session->applications) { - app = list_entry(p, struct cmtp_application, list); + list_for_each_entry(app, &session->applications, list) { switch (pattern) { case CMTP_MSGNUM: if (app->msgnum == value) @@ -511,14 +509,12 @@ static int cmtp_proc_show(struct seq_file *m, void *v) struct capi_ctr *ctrl = m->private; struct cmtp_session *session = ctrl->driverdata; struct cmtp_application *app; - struct list_head *p; seq_printf(m, "%s\n\n", cmtp_procinfo(ctrl)); seq_printf(m, "addr %s\n", session->name); seq_printf(m, "ctrl %d\n", session->num); - list_for_each(p, &session->applications) { - app = list_entry(p, struct cmtp_application, list); + list_for_each_entry(app, &session->applications, list) { seq_printf(m, "appl %d -> %d\n", app->appl, app->mapping); } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9fb443a..47bcef754 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2713,12 +2713,10 @@ struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list, void hci_bdaddr_list_clear(struct list_head *bdaddr_list) { - struct list_head *p, *n; + struct bdaddr_list *b, *n; - list_for_each_safe(p, n, bdaddr_list) { - struct bdaddr_list *b = list_entry(p, struct bdaddr_list, list); - - list_del(p); + list_for_each_entry_safe(b, n, bdaddr_list, list) { + list_del(&b->list); kfree(b); } } diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 29709fb..f7eb02f 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -692,11 +692,9 @@ static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s) static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst) { - struct rfcomm_session *s; - struct list_head *p, *n; + struct rfcomm_session *s, *n; struct l2cap_chan *chan; - list_for_each_safe(p, n, &session_list) { - s = list_entry(p, struct rfcomm_session, list); + list_for_each_entry_safe(s, n, &session_list, list) { chan = l2cap_pi(s->sock->sk)->chan; if ((!bacmp(src, BDADDR_ANY) || !bacmp(&chan->src, src)) && @@ -709,16 +707,14 @@ static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst) static struct rfcomm_session *rfcomm_session_close(struct rfcomm_session *s, int err) { - struct rfcomm_dlc *d; - struct list_head *p, *n; + struct rfcomm_dlc *d, *n; s->state = BT_CLOSED; BT_DBG("session %p state %ld err %d", s, s->state, err); /* Close all dlcs */ - list_for_each_safe(p, n, &s->dlcs) { - d = list_entry(p, struct rfcomm_dlc, list); + list_for_each_entry_safe(d, n, &s->dlcs, list) { d->state = BT_CLOSED; __rfcomm_dlc_close(d, err); } @@ -1771,13 +1767,11 @@ static struct rfcomm_session *rfcomm_recv_frame(struct rfcomm_session *s, static void rfcomm_process_connect(struct rfcomm_session *s) { - struct rfcomm_dlc *d; - struct list_head *p, *n; + struct rfcomm_dlc *d, *n; BT_DBG("session %p state %ld", s, s->state); - list_for_each_safe(p, n, &s->dlcs) { - d = list_entry(p, struct rfcomm_dlc, list); + list_for_each_entry_safe(d, n, &s->dlcs, list) { if (d->state == BT_CONFIG) { d->mtu = s->mtu; if (rfcomm_check_security(d)) { @@ -1843,14 +1837,11 @@ static int rfcomm_process_tx(struct rfcomm_dlc *d) static void rfcomm_process_dlcs(struct rfcomm_session *s) { - struct rfcomm_dlc *d; - struct list_head *p, *n; + struct rfcomm_dlc *d, *n; BT_DBG("session %p state %ld", s, s->state); - list_for_each_safe(p, n, &s->dlcs) { - d = list_entry(p, struct rfcomm_dlc, list); - + list_for_each_entry_safe(d, n, &s->dlcs, list) { if (test_bit(RFCOMM_TIMED_OUT, &d->flags)) { __rfcomm_dlc_close(d, ETIMEDOUT); continue; @@ -1985,14 +1976,11 @@ static struct rfcomm_session *rfcomm_check_connection(struct rfcomm_session *s) static void rfcomm_process_sessions(void) { - struct list_head *p, *n; + struct rfcomm_session *s, *n; rfcomm_lock(); - list_for_each_safe(p, n, &session_list) { - struct rfcomm_session *s; - s = list_entry(p, struct rfcomm_session, list); - + list_for_each_entry_safe(s, n, &session_list, list) { if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) { s->state = BT_DISCONN; rfcomm_send_disc(s, 0); @@ -2075,15 +2063,12 @@ failed: static void rfcomm_kill_listener(void) { - struct rfcomm_session *s; - struct list_head *p, *n; + struct rfcomm_session *s, *n; BT_DBG(""); - list_for_each_safe(p, n, &session_list) { - s = list_entry(p, struct rfcomm_session, list); + list_for_each_entry_safe(s, n, &session_list, list) rfcomm_session_del(s); - } } static int rfcomm_run(void *unused) @@ -2113,8 +2098,7 @@ static int rfcomm_run(void *unused) static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) { struct rfcomm_session *s; - struct rfcomm_dlc *d; - struct list_head *p, *n; + struct rfcomm_dlc *d, *n; BT_DBG("conn %p status 0x%02x encrypt 0x%02x", conn, status, encrypt); @@ -2122,9 +2106,7 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) if (!s) return; - list_for_each_safe(p, n, &s->dlcs) { - d = list_entry(p, struct rfcomm_dlc, list); - + list_for_each_entry_safe(d, n, &s->dlcs, list) { if (test_and_clear_bit(RFCOMM_SEC_PENDING, &d->flags)) { rfcomm_dlc_clear_timer(d); if (status || encrypt == 0x00) { -- cgit v1.1 From 92e17ee72a60b126263cbcd749e5da688e0198a3 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Tue, 15 Dec 2015 12:25:35 +0100 Subject: 6lowpan: fix debugfs interface entry name This patches moves the debugfs interface related register after netdevice register. The function lowpan_dev_debugfs_init will use "dev->name" which can be before register_netdevice a format string. The function register_netdevice will evaluate the format string if necessary and replace "dev->name" to the real interface name. Reported-by: Lukasz Duda Signed-off-by: Alexander Aring Acked-by: Lukasz Duda Signed-off-by: Marcel Holtmann --- net/6lowpan/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c index c7f06f5..faf65ba 100644 --- a/net/6lowpan/core.c +++ b/net/6lowpan/core.c @@ -29,13 +29,13 @@ int lowpan_register_netdevice(struct net_device *dev, lowpan_priv(dev)->lltype = lltype; - ret = lowpan_dev_debugfs_init(dev); + ret = register_netdevice(dev); if (ret < 0) return ret; - ret = register_netdevice(dev); + ret = lowpan_dev_debugfs_init(dev); if (ret < 0) - lowpan_dev_debugfs_exit(dev); + unregister_netdevice(dev); return ret; } -- cgit v1.1 From 5c29482dd17835def5cb97918f8f83a881c9918a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 22 Dec 2015 23:11:49 +0800 Subject: net-sysfs: use to_net_dev in net_namespace() Use to_net_dev() instead of open-coding it. Signed-off-by: Geliang Tang Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index bca8c35..b6c8a66 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1453,8 +1453,8 @@ static void netdev_release(struct device *d) static const void *net_namespace(struct device *d) { - struct net_device *dev; - dev = container_of(d, struct net_device, dev); + struct net_device *dev = to_net_dev(d); + return dev_net(dev); } -- cgit v1.1 From f2830d09895a64ba7a1a2c6ef41106c72e3654b2 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 19 Dec 2015 12:55:43 -0800 Subject: RDS: don't pretend to use cpu notifiers It looks like an attempt to use CPU notifier here which was never completed. Nobody tried to wire it up completely since 2k9. So I unwind this code and get rid of everything not required. Oh look! 19 lines were removed while code still does the same thing. Acked-by: Santosh Shilimkar Tested-by: Santosh Shilimkar Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- net/rds/page.c | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/rds/page.c b/net/rds/page.c index 9005a2c..5a14e6d 100644 --- a/net/rds/page.c +++ b/net/rds/page.c @@ -179,37 +179,18 @@ out: } EXPORT_SYMBOL_GPL(rds_page_remainder_alloc); -static int rds_page_remainder_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +void rds_page_exit(void) { - struct rds_page_remainder *rem; - long cpu = (long)hcpu; + unsigned int cpu; - rem = &per_cpu(rds_page_remainders, cpu); + for_each_possible_cpu(cpu) { + struct rds_page_remainder *rem; - rdsdebug("cpu %ld action 0x%lx\n", cpu, action); + rem = &per_cpu(rds_page_remainders, cpu); + rdsdebug("cpu %u\n", cpu); - switch (action) { - case CPU_DEAD: if (rem->r_page) __free_page(rem->r_page); rem->r_page = NULL; - break; } - - return 0; -} - -static struct notifier_block rds_page_remainder_nb = { - .notifier_call = rds_page_remainder_cpu_notify, -}; - -void rds_page_exit(void) -{ - int i; - - for_each_possible_cpu(i) - rds_page_remainder_cpu_notify(&rds_page_remainder_nb, - (unsigned long)CPU_DEAD, - (void *)(long)i); } -- cgit v1.1 From 2010b93e9317cc12acd20c4aed385af7f9d1681e Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Tue, 22 Dec 2015 00:03:44 +0900 Subject: net: tcp: deal with listen sockets properly in tcp_abort. When closing a listen socket, tcp_abort currently calls tcp_done without clearing the request queue. If the socket has a child socket that is established but not yet accepted, the child socket is then left without a parent, causing a leak. Fix this by setting the socket state to TCP_CLOSE and calling inet_csk_listen_stop with the socket lock held, like tcp_close does. Tested using net_test. With this patch, calling SOCK_DESTROY on a listen socket that has an established but not yet accepted child socket results in the parent and the child being closed, such that they no longer appear in sock_diag dumps. Reported-by: Eric Dumazet Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cc7aaa5..7bb1b09 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3099,6 +3099,11 @@ int tcp_abort(struct sock *sk, int err) /* Don't race with userspace socket closes such as tcp_close. */ lock_sock(sk); + if (sk->sk_state == TCP_LISTEN) { + tcp_set_state(sk, TCP_CLOSE); + inet_csk_listen_stop(sk); + } + /* Don't race with BH socket closes such as inet_csk_listen_stop. */ local_bh_disable(); bh_lock_sock(sk); -- cgit v1.1 From e46787f0dd9385449fd77246d4fddb8634350af8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Dec 2015 21:29:25 +0100 Subject: tcp: send_reset: test for non-NULL sk first tcp_md5_do_lookup requires a full socket, so once we extend _send_reset() to also accept timewait socket we would have to change if (!sk && hash_location) to something like if ((!sk || !sk_fullsock(sk)) && hash_location) { ... } else { (sk && sk_fullsock(sk)) tcp_md5_do_lookup() } Switch the two branches: check if we have a socket first, then fall back to a listener lookup if we saw a md5 option (hash_location). Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 11 +++++------ net/ipv6/tcp_ipv6.c | 6 +++--- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 46e92fb..eb29c2f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -587,7 +587,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) } rep; struct ip_reply_arg arg; #ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *key; + struct tcp_md5sig_key *key = NULL; const __u8 *hash_location = NULL; unsigned char newhash[16]; int genhash; @@ -627,7 +627,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); #ifdef CONFIG_TCP_MD5SIG hash_location = tcp_parse_md5sig_option(th); - if (!sk && hash_location) { + if (sk) { + key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *) + &ip_hdr(skb)->saddr, AF_INET); + } else if (hash_location) { /* * active side is lost. Try to find listening socket through * source port, and then find md5 key through listening socket. @@ -651,10 +654,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) goto release_sk1; - } else { - key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *) - &ip_hdr(skb)->saddr, - AF_INET) : NULL; } if (key) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f03d2b0..32fa0de 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -854,7 +854,9 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_TCP_MD5SIG hash_location = tcp_parse_md5sig_option(th); - if (!sk && hash_location) { + if (sk) { + key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr); + } else if (hash_location) { /* * active side is lost. Try to find listening socket through * source port, and then find md5 key through listening socket. @@ -877,8 +879,6 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) goto release_sk1; - } else { - key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL; } #endif -- cgit v1.1 From 271c3b9b7bdae09c7da467ac1ae96e3298754977 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Dec 2015 21:29:26 +0100 Subject: tcp: honour SO_BINDTODEVICE for TW_RST case too Hannes points out that when we generate tcp reset for timewait sockets we pretend we found no socket and pass NULL sk to tcp_vX_send_reset(). Make it cope with inet tw sockets and then provide tw sk. This makes RSTs appear on correct interface when SO_BINDTODEVICE is used. Packetdrill test case: // want default route to be used, we rely on BINDTODEVICE `ip route del 192.0.2.0/24 via 192.168.0.2 dev tun0` 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 // test case still works due to BINDTODEVICE 0.001 setsockopt(3, SOL_SOCKET, SO_BINDTODEVICE, "tun0", 4) = 0 0.100...0.200 connect(3, ..., ...) = 0 0.100 > S 0:0(0) 0.200 < S. 0:0(0) ack 1 win 32792 0.200 > . 1:1(0) ack 1 0.210 close(3) = 0 0.210 > F. 1:1(0) ack 1 win 29200 0.300 < . 1:1(0) ack 2 win 46 // more data while in FIN_WAIT2, expect RST 1.300 < P. 1:1001(1000) ack 1 win 46 // fails without this change -- default route is used 1.301 > R 1:1(0) win 0 Reported-by: Hannes Frederic Sowa Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 12 +++++++++--- net/ipv4/tcp_minisocks.c | 7 ++----- net/ipv6/tcp_ipv6.c | 6 ++++-- 3 files changed, 15 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index eb29c2f..fc4f726 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -627,7 +627,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); #ifdef CONFIG_TCP_MD5SIG hash_location = tcp_parse_md5sig_option(th); - if (sk) { + if (sk && sk_fullsock(sk)) { key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *) &ip_hdr(skb)->saddr, AF_INET); } else if (hash_location) { @@ -674,7 +674,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) ip_hdr(skb)->saddr, /* XXX */ arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; - arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; + arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0; + /* When socket is gone, all binding information is lost. * routing might fail in this case. No choice here, if we choose to force * input interface, we will misroute in case of asymmetric route. @@ -682,6 +683,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) if (sk) arg.bound_dev_if = sk->sk_bound_dev_if; + BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != + offsetof(struct inet_timewait_sock, tw_bound_dev_if)); + arg.tos = ip_hdr(skb)->tos; ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -1705,7 +1709,9 @@ do_time_wait: tcp_v4_timewait_ack(sk, skb); break; case TCP_TW_RST: - goto no_tcp_socket; + tcp_v4_send_reset(sk, skb); + inet_twsk_deschedule_put(inet_twsk(sk)); + goto discard_it; case TCP_TW_SUCCESS:; } goto discard_it; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ac6b196..75632a9 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -131,7 +131,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, goto kill; if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt)) - goto kill_with_rst; + return TCP_TW_RST; /* Dup ACK? */ if (!th->ack || @@ -145,11 +145,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, * reset. */ if (!th->fin || - TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { -kill_with_rst: - inet_twsk_deschedule_put(tw); + TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) return TCP_TW_RST; - } /* FIN arrived, enter true time-wait state. */ tw->tw_substate = TCP_TIME_WAIT; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 32fa0de..9ecb012 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -854,7 +854,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_TCP_MD5SIG hash_location = tcp_parse_md5sig_option(th); - if (sk) { + if (sk && sk_fullsock(sk)) { key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr); } else if (hash_location) { /* @@ -1516,7 +1516,9 @@ do_time_wait: break; case TCP_TW_RST: tcp_v6_restore_cb(skb); - goto no_tcp_socket; + tcp_v6_send_reset(sk, skb); + inet_twsk_deschedule_put(inet_twsk(sk)); + goto discard_it; case TCP_TW_SUCCESS: ; } -- cgit v1.1 From aeb7ed14fe5df3a4ce019c8d4ce0b2922a091196 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 23 Dec 2015 20:42:21 +0800 Subject: bridge: use kobj_to_dev instead of to_dev kobj_to_dev has been defined in linux/device.h, so I replace to_dev with it. Signed-off-by: Geliang Tang Signed-off-by: David S. Miller --- net/bridge/br_sysfs_br.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 8365bd5..6b80914 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -22,7 +22,6 @@ #include "br_private.h" -#define to_dev(obj) container_of(obj, struct device, kobj) #define to_bridge(cd) ((struct net_bridge *)netdev_priv(to_net_dev(cd))) /* @@ -814,7 +813,7 @@ static ssize_t brforward_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { - struct device *dev = to_dev(kobj); + struct device *dev = kobj_to_dev(kobj); struct net_bridge *br = to_bridge(dev); int n; -- cgit v1.1 From 039f50629b7f860f36644ed1f34b27da9aa62f43 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 24 Dec 2015 14:34:54 -0800 Subject: ip_tunnel: Move stats update to iptunnel_xmit() By moving stats update into iptunnel_xmit(), we can simplify iptunnel_xmit() usage. With this change there is no need to call another function (iptunnel_xmit_stats()) to update stats in tunnel xmit code path. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 7 +++---- net/ipv4/ip_tunnel.c | 7 ++----- net/ipv4/ip_tunnel_core.c | 9 +++++---- net/ipv4/ip_vti.c | 2 +- net/ipv4/udp_tunnel.c | 11 +++++------ net/ipv6/sit.c | 7 ++----- net/tipc/udp_media.c | 12 +++--------- 7 files changed, 21 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 04a48c0..7c51c4e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -561,10 +561,9 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) tunnel_id_to_key(tun_info->key.tun_id), 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - err = iptunnel_xmit(skb->sk, rt, skb, fl.saddr, - key->u.ipv4.dst, IPPROTO_GRE, - key->tos, key->ttl, df, false); - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); + + iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, + key->tos, key->ttl, df, false); return; err_free_rt: diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 0f6e9ee..c7bd72e 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -656,7 +656,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, struct rtable *rt; /* Route to the other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; - int err; bool connected; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); @@ -794,10 +793,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, return; } - err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, - tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); - + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, + df, !net_eq(tunnel->net, dev_net(dev))); return; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 1db8418..eb52ce9 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -47,12 +47,13 @@ #include #include -int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, - __be32 src, __be32 dst, __u8 proto, - __u8 tos, __u8 ttl, __be16 df, bool xnet) +void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 proto, + __u8 tos, __u8 ttl, __be16 df, bool xnet) { int pkt_len = skb->len - skb_inner_network_offset(skb); struct net *net = dev_net(rt->dst.dev); + struct net_device *dev = skb->dev; struct iphdr *iph; int err; @@ -81,7 +82,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, err = ip_local_out(net, sk, skb); if (unlikely(net_xmit_eval(err))) pkt_len = 0; - return pkt_len; + iptunnel_xmit_stats(dev, pkt_len); } EXPORT_SYMBOL_GPL(iptunnel_xmit); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 02d9c21..5cf10b7 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -199,7 +199,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, err = dst_output(tunnel->net, skb->sk, skb); if (net_xmit_eval(err) == 0) err = skb->len; - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); + iptunnel_xmit_stats(dev, err); return NETDEV_TX_OK; tx_error_icmp: diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index aba4286..0ec0881 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -74,10 +74,10 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); -int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, - __be32 src, __be32 dst, __u8 tos, __u8 ttl, - __be16 df, __be16 src_port, __be16 dst_port, - bool xnet, bool nocheck) +void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 tos, __u8 ttl, + __be16 df, __be16 src_port, __be16 dst_port, + bool xnet, bool nocheck) { struct udphdr *uh; @@ -91,8 +91,7 @@ int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, udp_set_csum(nocheck, skb, src, dst, skb->len); - return iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, - tos, ttl, df, xnet); + iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet); } EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index dcccae8..e794ef6 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -820,7 +820,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, const struct in6_addr *addr6; int addr_type; u8 ttl; - int err; u8 protocol = IPPROTO_IPV6; int t_hlen = tunnel->hlen + sizeof(struct iphdr); @@ -983,10 +982,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb_set_inner_ipproto(skb, IPPROTO_IPV6); - err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, - protocol, tos, ttl, df, - !net_eq(tunnel->net, dev_net(dev))); - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, + df, !net_eq(tunnel->net, dev_net(dev))); return NETDEV_TX_OK; tx_error_icmp: diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 6af78c6..d63a911 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -182,15 +182,9 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, goto tx_error; } ttl = ip4_dst_hoplimit(&rt->dst); - err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, - src->ipv4.s_addr, - dst->ipv4.s_addr, 0, ttl, 0, - src->udp_port, dst->udp_port, - false, true); - if (err < 0) { - ip_rt_put(rt); - goto tx_error; - } + udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, + dst->ipv4.s_addr, 0, ttl, 0, src->udp_port, + dst->udp_port, false, true); #if IS_ENABLED(CONFIG_IPV6) } else { struct dst_entry *ndst; -- cgit v1.1 From df05ef874b284d833c2d9795a6350c6a373ab6c9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Dec 2015 19:39:32 +0100 Subject: netfilter: nf_tables: release objects on netns destruction We have to release the existing objects on netns removal otherwise we leak them. Chains are unregistered in first place to make sure no packets are walking on our rules and sets anymore. The object release happens by when we unregister the family via nft_release_afinfo() which is called from nft_unregister_afinfo() from the corresponding __net_exit path in every family. Reported-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nf_tables_bridge.c | 2 +- net/ipv4/netfilter/nf_tables_arp.c | 2 +- net/ipv4/netfilter/nf_tables_ipv4.c | 2 +- net/ipv6/netfilter/nf_tables_ipv6.c | 2 +- net/netfilter/nf_tables_api.c | 47 +++++++++++++++++++++++++++++++-- net/netfilter/nf_tables_inet.c | 2 +- net/netfilter/nf_tables_netdev.c | 2 +- 7 files changed, 51 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 62f6b1b..7fcdd72 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -141,7 +141,7 @@ err: static void nf_tables_bridge_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.bridge); + nft_unregister_afinfo(net, net->nft.bridge); kfree(net->nft.bridge); } diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 9d09d4f..cd84d42 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -57,7 +57,7 @@ err: static void nf_tables_arp_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.arp); + nft_unregister_afinfo(net, net->nft.arp); kfree(net->nft.arp); } diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index ca9dc3c..e44ba3b 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -78,7 +78,7 @@ err: static void nf_tables_ipv4_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.ipv4); + nft_unregister_afinfo(net, net->nft.ipv4); kfree(net->nft.ipv4); } diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 120ea91..30b22f4 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -77,7 +77,7 @@ err: static void nf_tables_ipv6_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.ipv6); + nft_unregister_afinfo(net, net->nft.ipv6); kfree(net->nft.ipv6); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4a23f77..8522731 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -41,6 +41,8 @@ int nft_register_afinfo(struct net *net, struct nft_af_info *afi) } EXPORT_SYMBOL_GPL(nft_register_afinfo); +static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi); + /** * nft_unregister_afinfo - unregister nf_tables address family info * @@ -48,9 +50,10 @@ EXPORT_SYMBOL_GPL(nft_register_afinfo); * * Unregister the address family for use with nf_tables. */ -void nft_unregister_afinfo(struct nft_af_info *afi) +void nft_unregister_afinfo(struct net *net, struct nft_af_info *afi) { nfnl_lock(NFNL_SUBSYS_NFTABLES); + __nft_release_afinfo(net, afi); list_del_rcu(&afi->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } @@ -4579,7 +4582,7 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, } EXPORT_SYMBOL_GPL(nft_data_dump); -static int nf_tables_init_net(struct net *net) +static int __net_init nf_tables_init_net(struct net *net) { INIT_LIST_HEAD(&net->nft.af_info); INIT_LIST_HEAD(&net->nft.commit_list); @@ -4587,6 +4590,46 @@ static int nf_tables_init_net(struct net *net) return 0; } +/* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */ +static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) +{ + struct nft_table *table, *nt; + struct nft_chain *chain, *nc; + struct nft_rule *rule, *nr; + struct nft_set *set, *ns; + struct nft_ctx ctx = { + .net = net, + .afi = afi, + }; + + list_for_each_entry_safe(table, nt, &afi->tables, list) { + list_for_each_entry(chain, &table->chains, list) + nf_tables_unregister_hooks(table, chain, afi->nops); + /* No packets are walking on these chains anymore. */ + ctx.table = table; + list_for_each_entry(chain, &table->chains, list) { + ctx.chain = chain; + list_for_each_entry_safe(rule, nr, &chain->rules, list) { + list_del(&rule->list); + chain->use--; + nf_tables_rule_destroy(&ctx, rule); + } + } + list_for_each_entry_safe(set, ns, &table->sets, list) { + list_del(&set->list); + table->use--; + nft_set_destroy(set); + } + list_for_each_entry_safe(chain, nc, &table->chains, list) { + list_del(&chain->list); + table->use--; + nf_tables_chain_destroy(chain); + } + list_del(&table->list); + nf_tables_table_destroy(&ctx); + } +} + static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, }; diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c index 9dd2d21..6b5f762 100644 --- a/net/netfilter/nf_tables_inet.c +++ b/net/netfilter/nf_tables_inet.c @@ -57,7 +57,7 @@ err: static void __net_exit nf_tables_inet_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.inet); + nft_unregister_afinfo(net, net->nft.inet); kfree(net->nft.inet); } diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 7b9c053..2bfd1fb 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -139,7 +139,7 @@ err: static void nf_tables_netdev_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.netdev); + nft_unregister_afinfo(net, net->nft.netdev); kfree(net->nft.netdev); } -- cgit v1.1 From 5ebe0b0eec9d6f703b137f9b938c52f7b91dd9d6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Dec 2015 19:40:49 +0100 Subject: netfilter: nf_tables: destroy basechain and rules on netdevice removal If the netdevice is destroyed, the resources that are attached should be released too as they belong to the device that is now gone. Suggested-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 31 +++++++++++++++++++++------ net/netfilter/nf_tables_netdev.c | 45 ++++++++++++++++------------------------ 2 files changed, 43 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8522731..5729844 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -131,8 +131,8 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } -int nft_register_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops) +static int nft_register_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops) { struct net *net = read_pnet(&basechain->pnet); @@ -141,10 +141,9 @@ int nft_register_basechain(struct nft_base_chain *basechain, return nf_register_net_hooks(net, basechain->ops, hook_nops); } -EXPORT_SYMBOL_GPL(nft_register_basechain); -void nft_unregister_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops) +static void nft_unregister_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops) { struct net *net = read_pnet(&basechain->pnet); @@ -153,7 +152,6 @@ void nft_unregister_basechain(struct nft_base_chain *basechain, nf_unregister_net_hooks(net, basechain->ops, hook_nops); } -EXPORT_SYMBOL_GPL(nft_unregister_basechain); static int nf_tables_register_hooks(const struct nft_table *table, struct nft_chain *chain, @@ -4590,6 +4588,27 @@ static int __net_init nf_tables_init_net(struct net *net) return 0; } +int __nft_release_basechain(struct nft_ctx *ctx) +{ + struct nft_rule *rule, *nr; + + BUG_ON(!(ctx->chain->flags & NFT_BASE_CHAIN)); + + nf_tables_unregister_hooks(ctx->chain->table, ctx->chain, + ctx->afi->nops); + list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { + list_del(&rule->list); + ctx->chain->use--; + nf_tables_rule_destroy(ctx, rule); + } + list_del(&ctx->chain->list); + ctx->table->use--; + nf_tables_chain_destroy(ctx->chain); + + return 0; +} +EXPORT_SYMBOL_GPL(__nft_release_basechain); + /* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) { diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 2bfd1fb..3e9c87b 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -156,35 +156,17 @@ static const struct nf_chain_type nft_filter_chain_netdev = { .hook_mask = (1 << NF_NETDEV_INGRESS), }; -static void nft_netdev_event(unsigned long event, struct nft_af_info *afi, - struct net_device *dev, struct nft_table *table, - struct nft_base_chain *basechain) +static void nft_netdev_event(unsigned long event, struct net_device *dev, + struct nft_ctx *ctx) { - switch (event) { - case NETDEV_REGISTER: - if (strcmp(basechain->dev_name, dev->name) != 0) - return; + struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - BUG_ON(!(basechain->flags & NFT_BASECHAIN_DISABLED)); - - dev_hold(dev); - basechain->ops[0].dev = dev; - basechain->flags &= ~NFT_BASECHAIN_DISABLED; - if (!(table->flags & NFT_TABLE_F_DORMANT)) - nft_register_basechain(basechain, afi->nops); - break; + switch (event) { case NETDEV_UNREGISTER: if (strcmp(basechain->dev_name, dev->name) != 0) return; - BUG_ON(basechain->flags & NFT_BASECHAIN_DISABLED); - - if (!(table->flags & NFT_TABLE_F_DORMANT)) - nft_unregister_basechain(basechain, afi->nops); - - dev_put(basechain->ops[0].dev); - basechain->ops[0].dev = NULL; - basechain->flags |= NFT_BASECHAIN_DISABLED; + __nft_release_basechain(ctx); break; case NETDEV_CHANGENAME: if (dev->ifindex != basechain->ops[0].dev->ifindex) @@ -201,20 +183,29 @@ static int nf_tables_netdev_event(struct notifier_block *this, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct nft_af_info *afi; struct nft_table *table; - struct nft_chain *chain; + struct nft_chain *chain, *nr; + struct nft_ctx ctx = { + .net = dev_net(dev), + }; + + if (event != NETDEV_UNREGISTER && + event != NETDEV_CHANGENAME) + return NOTIFY_DONE; nfnl_lock(NFNL_SUBSYS_NFTABLES); list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) { + ctx.afi = afi; if (afi->family != NFPROTO_NETDEV) continue; list_for_each_entry(table, &afi->tables, list) { - list_for_each_entry(chain, &table->chains, list) { + ctx.table = table; + list_for_each_entry_safe(chain, nr, &table->chains, list) { if (!(chain->flags & NFT_BASE_CHAIN)) continue; - nft_netdev_event(event, afi, dev, table, - nft_base_chain(chain)); + ctx.chain = chain; + nft_netdev_event(event, dev, &ctx); } } } -- cgit v1.1 From f4c756b4ea7d2921391febcaed4ce2511872a0e1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Dec 2015 19:40:50 +0100 Subject: netfilter: nf_tables: remove check against removal of inactive objects The following sequence inside a batch, although not very useful, is valid: add table foo ... delete table foo This may be generated by some robot while applying some incremental upgrade, so remove the defensive checks against this. This patch keeps the check on the get/dump path by now, we have to replace the inactive flag by introducing object generations. Reported-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5729844..28cbc45 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -832,8 +832,6 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk, table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; ctx.afi = afi; ctx.table = table; @@ -1493,14 +1491,10 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); if (IS_ERR(chain)) return PTR_ERR(chain); - if (chain->flags & NFT_CHAIN_INACTIVE) - return -ENOENT; if (chain->use > 0) return -EBUSY; @@ -2192,8 +2186,6 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; if (nla[NFTA_RULE_CHAIN]) { chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); @@ -2362,8 +2354,6 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; } nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla); @@ -2898,8 +2888,6 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); - if (set->flags & NFT_SET_INACTIVE) - return -ENOENT; if (!list_empty(&set->bindings)) return -EBUSY; @@ -3022,8 +3010,7 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[], - bool trans) + const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi; @@ -3036,8 +3023,6 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net, table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]); if (IS_ERR(table)) return PTR_ERR(table); - if (!trans && (table->flags & NFT_TABLE_INACTIVE)) - return -ENOENT; nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla); return 0; @@ -3146,9 +3131,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) return err; err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh, - (void *)nla, false); + (void *)nla); if (err < 0) return err; + if (ctx.table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); if (IS_ERR(set)) @@ -3212,9 +3199,11 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb, struct nft_ctx ctx; int err; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, false); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla); if (err < 0) return err; + if (ctx.table->flags & NFT_TABLE_INACTIVE) + return -ENOENT; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); if (IS_ERR(set)) @@ -3536,7 +3525,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) return -EINVAL; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, true); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla); if (err < 0) return err; @@ -3630,7 +3619,7 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) return -EINVAL; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, false); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla); if (err < 0) return err; -- cgit v1.1 From 7b8002a1511fcbcb0596cac90d67ad5c8182d0aa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Dec 2015 18:41:56 +0100 Subject: netfilter: nfnetlink: pass down netns pointer to call() and call_rcu() Adapt callsites to avoid recurrent lookup of the netns pointer. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 108 +++++++++++++++-------------------- net/netfilter/nf_conntrack_netlink.c | 96 ++++++++++++++----------------- net/netfilter/nf_tables_api.c | 30 ++++------ net/netfilter/nfnetlink.c | 6 +- net/netfilter/nfnetlink_acct.c | 21 +++---- net/netfilter/nfnetlink_cthelper.c | 18 +++--- net/netfilter/nfnetlink_cttimeout.c | 42 ++++++-------- net/netfilter/nfnetlink_log.c | 15 ++--- net/netfilter/nfnetlink_queue.c | 36 +++++------- net/netfilter/nft_compat.c | 6 +- net/netfilter/xt_osf.c | 7 ++- 11 files changed, 169 insertions(+), 216 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 54f3d7c..95db43f 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -825,20 +825,17 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, return 0; } -static int -ip_set_none(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { return -EOPNOTSUPP; } -static int -ip_set_create(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_create(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct net *net = sock_net(ctnl); struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *clash = NULL; ip_set_id_t index = IPSET_INVALID_ID; @@ -976,12 +973,11 @@ ip_set_destroy_set(struct ip_set *set) kfree(set); } -static int -ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_destroy(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *s; ip_set_id_t i; int ret = 0; @@ -1052,12 +1048,11 @@ ip_set_flush_set(struct ip_set *set) spin_unlock_bh(&set->lock); } -static int -ip_set_flush(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *s; ip_set_id_t i; @@ -1092,12 +1087,11 @@ ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = { .len = IPSET_MAXNAMELEN - 1 }, }; -static int -ip_set_rename(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_rename(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *s; const char *name2; ip_set_id_t i; @@ -1142,12 +1136,11 @@ out: * so the ip_set_list always contains valid pointers to the sets. */ -static int -ip_set_swap(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *from, *to; ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; @@ -1413,10 +1406,9 @@ out: return ret < 0 ? ret : skb->len; } -static int -ip_set_dump(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; @@ -1500,12 +1492,11 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, return ret; } -static int -ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; const struct nlattr *nla; @@ -1555,12 +1546,11 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, return ret; } -static int -ip_set_udel(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; const struct nlattr *nla; @@ -1610,12 +1600,11 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, return ret; } -static int -ip_set_utest(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; int ret = 0; @@ -1646,12 +1635,11 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, /* Get headed data of a set */ -static int -ip_set_header(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_header(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { - struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + struct ip_set_net *inst = ip_set_pernet(net); const struct ip_set *set; struct sk_buff *skb2; struct nlmsghdr *nlh2; @@ -1703,10 +1691,9 @@ static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = { [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, }; -static int -ip_set_type(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { struct sk_buff *skb2; struct nlmsghdr *nlh2; @@ -1762,10 +1749,9 @@ ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = { [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, }; -static int -ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const attr[]) +static int ip_set_protocol(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) { struct sk_buff *skb2; struct nlmsghdr *nlh2; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9f52729..dbb1bb3 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1113,12 +1113,11 @@ static int ctnetlink_flush_conntrack(struct net *net, return 0; } -static int -ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; struct nf_conn *ct; @@ -1168,12 +1167,11 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, return 0; } -static int -ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; struct nf_conn *ct; @@ -1330,10 +1328,10 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) return ctnetlink_dump_list(skb, cb, true); } -static int -ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_get_ct_dying(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -1352,10 +1350,10 @@ ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) return ctnetlink_dump_list(skb, cb, false); } -static int -ctnetlink_get_ct_unconfirmed(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -1865,12 +1863,11 @@ err1: return ERR_PTR(err); } -static int -ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -2034,10 +2031,10 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static int -ctnetlink_stat_ct_cpu(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_stat_ct_cpu(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -2080,10 +2077,9 @@ nlmsg_failure: return -1; } -static int -ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { struct sk_buff *skb2; int err; @@ -2729,12 +2725,12 @@ out: return skb->len; } -static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb, +static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { int err; - struct net *net = sock_net(ctnl); struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; struct nf_conntrack_tuple tuple; @@ -2768,12 +2764,10 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb, return err; } -static int -ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; struct sk_buff *skb2; @@ -2784,7 +2778,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_DUMP) { if (cda[CTA_EXPECT_MASTER]) - return ctnetlink_dump_exp_ct(ctnl, skb, nlh, cda); + return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda); else { struct netlink_dump_control c = { .dump = ctnetlink_exp_dump_table, @@ -2850,12 +2844,10 @@ out: return err == -EAGAIN ? -ENOBUFS : err; } -static int -ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -3136,12 +3128,10 @@ err_ct: return err; } -static int -ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_new_expect(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(ctnl); struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -3242,10 +3232,10 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static int -ctnetlink_stat_exp_cpu(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int ctnetlink_stat_exp_cpu(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 28cbc45..69cb5be 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -543,15 +543,14 @@ done: return skb->len; } -static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_gettable(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nft_af_info *afi; const struct nft_table *table; struct sk_buff *skb2; - struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -1097,8 +1096,8 @@ done: return skb->len; } -static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_getchain(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -1106,7 +1105,6 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb, const struct nft_table *table; const struct nft_chain *chain; struct sk_buff *skb2; - struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -1923,8 +1921,8 @@ done: return skb->len; } -static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_getrule(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); @@ -1933,7 +1931,6 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb, const struct nft_chain *chain; const struct nft_rule *rule; struct sk_buff *skb2; - struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; int err; @@ -2604,11 +2601,10 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb) return 0; } -static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_getset(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { - struct net *net = sock_net(skb->sk); const struct nft_set *set; struct nft_ctx ctx; struct sk_buff *skb2; @@ -3190,11 +3186,10 @@ nla_put_failure: return -ENOSPC; } -static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { - struct net *net = sock_net(skb->sk); const struct nft_set *set; struct nft_ctx ctx; int err; @@ -3723,11 +3718,10 @@ err: return err; } -static int nf_tables_getgen(struct sock *nlsk, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int nf_tables_getgen(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { - struct net *net = sock_net(skb->sk); struct sk_buff *skb2; int err; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 9ed4534..7012154 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -206,7 +206,7 @@ replay: } if (nc->call_rcu) { - err = nc->call_rcu(net->nfnl, skb, nlh, + err = nc->call_rcu(net, net->nfnl, skb, nlh, (const struct nlattr **)cda); rcu_read_unlock(); } else { @@ -216,8 +216,8 @@ replay: nfnetlink_find_client(type, ss) != nc) err = -EAGAIN; else if (nc->call) - err = nc->call(net->nfnl, skb, nlh, - (const struct nlattr **)cda); + err = nc->call(net, net->nfnl, skb, nlh, + (const struct nlattr **)cda); else err = -EINVAL; nfnl_unlock(subsys_id); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index fefbf5f..5274b04 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -46,12 +46,11 @@ struct nfacct_filter { #define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES) #define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */ -static int -nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_acct_new(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { struct nf_acct *nfacct, *matching = NULL; - struct net *net = sock_net(nfnl); char *acct_name; unsigned int size = 0; u32 flags = 0; @@ -253,11 +252,10 @@ nfacct_filter_alloc(const struct nlattr * const attr) return filter; } -static int -nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_acct_get(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { - struct net *net = sock_net(nfnl); int ret = -ENOENT; struct nf_acct *cur; char *acct_name; @@ -333,11 +331,10 @@ static int nfnl_acct_try_del(struct nf_acct *cur) return ret; } -static int -nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_acct_del(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { - struct net *net = sock_net(nfnl); char *acct_name; struct nf_acct *cur; int ret = -ENOENT; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 54330fb..e924e95 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -286,9 +286,9 @@ nfnl_cthelper_update(const struct nlattr * const tb[], return 0; } -static int -nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; @@ -498,9 +498,9 @@ out: return skb->len; } -static int -nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { int ret = -ENOENT, i; struct nf_conntrack_helper *cur; @@ -570,9 +570,9 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, return ret; } -static int -nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { char *helper_name = NULL; struct nf_conntrack_helper *cur; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 3921d54..5d010f2 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -65,16 +65,15 @@ ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto, return ret; } -static int -cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { __u16 l3num; __u8 l4num; struct nf_conntrack_l4proto *l4proto; struct ctnl_timeout *timeout, *matching = NULL; - struct net *net = sock_net(skb->sk); char *name; int ret; @@ -239,12 +238,11 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static int -cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int cttimeout_get_timeout(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(skb->sk); int ret = -ENOENT; char *name; struct ctnl_timeout *cur; @@ -339,15 +337,14 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) return ret; } -static int -cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int cttimeout_del_timeout(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { - struct net *net = sock_net(skb->sk); - char *name; struct ctnl_timeout *cur; int ret = -ENOENT; + char *name; if (!cda[CTA_TIMEOUT_NAME]) { list_for_each_entry(cur, &net->nfct_timeout_list, head) @@ -370,15 +367,14 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, return ret; } -static int -cttimeout_default_set(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const cda[]) +static int cttimeout_default_set(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { __u16 l3num; __u8 l4num; struct nf_conntrack_l4proto *l4proto; - struct net *net = sock_net(skb->sk); unsigned int *timeouts; int ret; @@ -460,14 +456,14 @@ nla_put_failure: return -1; } -static int cttimeout_default_get(struct sock *ctnl, struct sk_buff *skb, +static int cttimeout_default_get(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { __u16 l3num; __u8 l4num; struct nf_conntrack_l4proto *l4proto; - struct net *net = sock_net(skb->sk); struct sk_buff *skb2; int ret, err; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 70b6bd3..6a57f10 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -785,10 +785,9 @@ static struct notifier_block nfulnl_rtnl_notifier = { .notifier_call = nfulnl_rcv_nl_event, }; -static int -nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) +static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { return -ENOTSUPP; } @@ -809,16 +808,14 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = { [NFULA_CFG_FLAGS] = { .type = NLA_U16 }, }; -static int -nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfula[]) +static int nfulnl_recv_config(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nfula[]) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int16_t group_num = ntohs(nfmsg->res_id); struct nfulnl_instance *inst; struct nfulnl_msg_config_cmd *cmd = NULL; - struct net *net = sock_net(ctnl); struct nfnl_log_net *log = nfnl_log_pernet(net); int ret = 0; u16 flags = 0; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 861c661..3d1f16c 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -957,10 +957,10 @@ static int nfq_id_after(unsigned int id, unsigned int max) return (int)(id - max) > 0; } -static int -nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) +static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nf_queue_entry *entry, *tmp; @@ -969,8 +969,6 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, struct nfqnl_instance *queue; LIST_HEAD(batch_list); u16 queue_num = ntohs(nfmsg->res_id); - - struct net *net = sock_net(ctnl); struct nfnl_queue_net *q = nfnl_queue_pernet(net); queue = verdict_instance_lookup(q, queue_num, @@ -1029,14 +1027,13 @@ static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct, return ct; } -static int -nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) +static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, + struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); - struct nfqnl_msg_verdict_hdr *vhdr; struct nfqnl_instance *queue; unsigned int verdict; @@ -1044,8 +1041,6 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, enum ip_conntrack_info uninitialized_var(ctinfo); struct nfnl_ct_hook *nfnl_ct; struct nf_conn *ct = NULL; - - struct net *net = sock_net(ctnl); struct nfnl_queue_net *q = nfnl_queue_pernet(net); queue = instance_lookup(q, queue_num); @@ -1092,10 +1087,9 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, return 0; } -static int -nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) +static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { return -ENOTSUPP; } @@ -1110,16 +1104,14 @@ static const struct nf_queue_handler nfqh = { .nf_hook_drop = &nfqnl_nf_hook_drop, }; -static int -nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) +static int nfqnl_recv_config(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); struct nfqnl_instance *queue; struct nfqnl_msg_config_cmd *cmd = NULL; - struct net *net = sock_net(ctnl); struct nfnl_queue_net *q = nfnl_queue_pernet(net); int ret = 0; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 9c8fab0..454841b 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -519,9 +519,9 @@ nla_put_failure: return -1; } -static int -nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +static int nfnl_compat_get(struct net *net, struct sock *nfnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const tb[]) { int ret = 0, target; struct nfgenmsg *nfmsg; diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index df8801e..4e3c3af 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -61,8 +61,8 @@ static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = { [OSF_ATTR_FINGER] = { .len = sizeof(struct xt_osf_user_finger) }, }; -static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, +static int xt_osf_add_callback(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const osf_attrs[]) { struct xt_osf_user_finger *f; @@ -104,7 +104,8 @@ static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb, return err; } -static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb, +static int xt_osf_remove_callback(struct net *net, struct sock *ctnl, + struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const osf_attrs[]) { -- cgit v1.1 From 5913beaf0d70f97135ed7191c028fd88b3848864 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Dec 2015 19:41:57 +0100 Subject: netfilter: nfnetlink: pass down netns pointer to commit() and abort() callbacks Adapt callsites to avoid recurrent lookup of the netns pointer. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 6 ++---- net/netfilter/nfnetlink.c | 6 +++--- 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 69cb5be..f5c3971 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3865,9 +3865,8 @@ static void nf_tables_commit_release(struct nft_trans *trans) kfree(trans); } -static int nf_tables_commit(struct sk_buff *skb) +static int nf_tables_commit(struct net *net, struct sk_buff *skb) { - struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; struct nft_trans_elem *te; @@ -4002,9 +4001,8 @@ static void nf_tables_abort_release(struct nft_trans *trans) kfree(trans); } -static int nf_tables_abort(struct sk_buff *skb) +static int nf_tables_abort(struct net *net, struct sk_buff *skb) { - struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; struct nft_trans_elem *te; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 7012154..a7ba233 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -425,15 +425,15 @@ next: } done: if (status & NFNL_BATCH_REPLAY) { - ss->abort(oskb); + ss->abort(net, oskb); nfnl_err_reset(&err_list); nfnl_unlock(subsys_id); kfree_skb(skb); goto replay; } else if (status == NFNL_BATCH_DONE) { - ss->commit(oskb); + ss->commit(net, oskb); } else { - ss->abort(oskb); + ss->abort(net, oskb); } nfnl_err_deliver(&err_list, oskb); -- cgit v1.1 From ce2e56cdfbb010e22073d303161e74c144ebe731 Mon Sep 17 00:00:00 2001 From: Shikha Singh Date: Fri, 20 Nov 2015 06:40:19 -0500 Subject: NFC: digital: Add Type4A tags support The definition of DIGITAL_PROTO_NFCA_RF_TECH is modified to support ISO14443 Type4A tags. Without this change it is not possible to start polling for ISO14443 Type4A tags from the initiator side. Signed-off-by: Shikha Singh Signed-off-by: Samuel Ortiz --- net/nfc/digital_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index 23c2a11..dd9003f 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -20,7 +20,8 @@ #include "digital.h" #define DIGITAL_PROTO_NFCA_RF_TECH \ - (NFC_PROTO_JEWEL_MASK | NFC_PROTO_MIFARE_MASK | NFC_PROTO_NFC_DEP_MASK) + (NFC_PROTO_JEWEL_MASK | NFC_PROTO_MIFARE_MASK | \ + NFC_PROTO_NFC_DEP_MASK | NFC_PROTO_ISO14443_MASK) #define DIGITAL_PROTO_NFCB_RF_TECH NFC_PROTO_ISO14443_B_MASK -- cgit v1.1 From 2a84193f14c4196ee94bf1d44c5f28bcabe7e840 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Wed, 23 Dec 2015 23:45:15 +0100 Subject: NFC: nci: Fix error check of nci_hci_create_pipe() result net/nfc/nci/hci.c: In function nci_hci_connect_gate : net/nfc/nci/hci.c:679: warning: comparison is always false due to limited range of data type In case of error, nci_hci_create_pipe() returns NCI_HCI_INVALID_PIPE, and not a negative error code. Correct the check to fix this. Acked-by: Geert Uytterhoeven Reported-by: Dan Carpenter Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- net/nfc/nci/hci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 2aedac1..a0ab26d 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -676,7 +676,7 @@ int nci_hci_connect_gate(struct nci_dev *ndev, break; default: pipe = nci_hci_create_pipe(ndev, dest_host, dest_gate, &r); - if (pipe < 0) + if (pipe == NCI_HCI_INVALID_PIPE) return r; pipe_created = true; break; -- cgit v1.1 From 9afec6d3866b8451abcf1a7a1a381a3be6c83386 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Wed, 23 Dec 2015 23:45:18 +0100 Subject: nfc: netlink: HCI event connectivity implementation Add support for missing HCI event EVT_CONNECTIVITY and forward it to userspace. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 13 +++++++++++++ net/nfc/netlink.c | 37 +++++++++++++++++++++++++++++++++++++ net/nfc/nfc.h | 1 + 3 files changed, 51 insertions(+) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index 1fe3d3b..122bb81 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -953,6 +953,19 @@ out: } EXPORT_SYMBOL(nfc_se_transaction); +int nfc_se_connectivity(struct nfc_dev *dev, u8 se_idx) +{ + int rc; + + pr_debug("connectivity: %x\n", se_idx); + + device_lock(&dev->dev); + rc = nfc_genl_se_connectivity(dev, se_idx); + device_unlock(&dev->dev); + return rc; +} +EXPORT_SYMBOL(nfc_se_connectivity); + static void nfc_release(struct device *d) { struct nfc_dev *dev = to_nfc_dev(d); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index f58c1fb..ea023b3 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -552,6 +552,43 @@ free_msg: return -EMSGSIZE; } +int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx) +{ + struct nfc_se *se; + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_SE_CONNECTIVITY); + if (!hdr) + goto free_msg; + + se = nfc_find_se(dev, se_idx); + if (!se) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || + nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) || + nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, u32 portid, u32 seq, struct netlink_callback *cb, diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index c20b784..6c6f76b 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -105,6 +105,7 @@ int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type); int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx); int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx, struct nfc_evt_transaction *evt_transaction); +int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx); struct nfc_dev *nfc_get_device(unsigned int idx); -- cgit v1.1 From c6dc65d885b98898bf287aaf44e020077b41769f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 23 Dec 2015 23:45:27 +0100 Subject: NFC: nci: memory leak in nci_core_conn_create() I've moved the check for "number_destination_params" forward a few lines to avoid leaking "cmd". Fixes: caa575a86ec1 ('NFC: nci: fix possible crash in nci_core_conn_create') Acked-by: Christophe Ricard Signed-off-by: Dan Carpenter Signed-off-by: Samuel Ortiz --- net/nfc/nci/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 10c99a5..fbb7a2b 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -610,14 +610,14 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, struct nci_core_conn_create_cmd *cmd; struct core_conn_create_data data; + if (!number_destination_params) + return -EINVAL; + data.length = params_len + sizeof(struct nci_core_conn_create_cmd); cmd = kzalloc(data.length, GFP_KERNEL); if (!cmd) return -ENOMEM; - if (!number_destination_params) - return -EINVAL; - cmd->destination_type = destination_type; cmd->number_destination_params = number_destination_params; memcpy(cmd->params, params, params_len); -- cgit v1.1 From f3a4094558ddf8afa8bb58250d548e15e059c65a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 30 Dec 2015 16:28:25 +0100 Subject: ethtool: Add phy statistics Ethernet PHYs can maintain statistics, for example errors while idle and receive errors. Add an ethtool mechanism to retrieve these statistics, using the same model as MAC statistics. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/core/ethtool.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 09948a7..daf0470 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -191,6 +191,23 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) return ret; } +static int phy_get_sset_count(struct phy_device *phydev) +{ + int ret; + + if (phydev->drv->get_sset_count && + phydev->drv->get_strings && + phydev->drv->get_stats) { + mutex_lock(&phydev->lock); + ret = phydev->drv->get_sset_count(phydev); + mutex_unlock(&phydev->lock); + + return ret; + } + + return -EOPNOTSUPP; +} + static int __ethtool_get_sset_count(struct net_device *dev, int sset) { const struct ethtool_ops *ops = dev->ethtool_ops; @@ -204,6 +221,13 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset) if (sset == ETH_SS_TUNABLES) return ARRAY_SIZE(tunable_strings); + if (sset == ETH_SS_PHY_STATS) { + if (dev->phydev) + return phy_get_sset_count(dev->phydev); + else + return -EOPNOTSUPP; + } + if (ops->get_sset_count && ops->get_strings) return ops->get_sset_count(dev, sset); else @@ -223,7 +247,17 @@ static void __ethtool_get_strings(struct net_device *dev, sizeof(rss_hash_func_strings)); else if (stringset == ETH_SS_TUNABLES) memcpy(data, tunable_strings, sizeof(tunable_strings)); - else + else if (stringset == ETH_SS_PHY_STATS) { + struct phy_device *phydev = dev->phydev; + + if (phydev) { + mutex_lock(&phydev->lock); + phydev->drv->get_strings(phydev, data); + mutex_unlock(&phydev->lock); + } else { + return; + } + } else /* ops->get_strings is valid because checked earlier */ ops->get_strings(dev, stringset, data); } @@ -1401,6 +1435,47 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) return ret; } +static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_stats stats; + struct phy_device *phydev = dev->phydev; + u64 *data; + int ret, n_stats; + + if (!phydev) + return -EOPNOTSUPP; + + n_stats = phy_get_sset_count(phydev); + + if (n_stats < 0) + return n_stats; + WARN_ON(n_stats == 0); + + if (copy_from_user(&stats, useraddr, sizeof(stats))) + return -EFAULT; + + stats.n_stats = n_stats; + data = kmalloc_array(n_stats, sizeof(u64), GFP_USER); + if (!data) + return -ENOMEM; + + mutex_lock(&phydev->lock); + phydev->drv->get_stats(phydev, &stats, data); + mutex_unlock(&phydev->lock); + + ret = -EFAULT; + if (copy_to_user(useraddr, &stats, sizeof(stats))) + goto out; + useraddr += sizeof(stats); + if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64))) + goto out; + ret = 0; + + out: + kfree(data); + return ret; +} + static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr) { struct ethtool_perm_addr epaddr; @@ -1779,6 +1854,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GSSET_INFO: case ETHTOOL_GSTRINGS: case ETHTOOL_GSTATS: + case ETHTOOL_GPHYSTATS: case ETHTOOL_GTSO: case ETHTOOL_GPERMADDR: case ETHTOOL_GUFO: @@ -1991,6 +2067,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_STUNABLE: rc = ethtool_set_tunable(dev, useraddr); break; + case ETHTOOL_GPHYSTATS: + rc = ethtool_get_phy_stats(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } -- cgit v1.1 From c7862a5f0de5f521c545f3436f0aa190964342dd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Dec 2015 18:21:44 +0100 Subject: netfilter: nft_limit: allow to invert matching criteria This patch allows you to invert the ratelimit matching criteria, so you can match packets over the ratelimit. This is required to support what hashlimit does. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_limit.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 5d67938..99d1857 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -26,6 +26,7 @@ struct nft_limit { u64 rate; u64 nsecs; u32 burst; + bool invert; }; static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost) @@ -44,11 +45,11 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost) if (delta >= 0) { limit->tokens = delta; spin_unlock_bh(&limit_lock); - return false; + return limit->invert; } limit->tokens = tokens; spin_unlock_bh(&limit_lock); - return true; + return !limit->invert; } static int nft_limit_init(struct nft_limit *limit, @@ -78,6 +79,12 @@ static int nft_limit_init(struct nft_limit *limit, limit->rate = rate; } + if (tb[NFTA_LIMIT_FLAGS]) { + u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS])); + + if (flags & NFT_LIMIT_F_INV) + limit->invert = true; + } limit->last = ktime_get_ns(); return 0; @@ -86,13 +93,15 @@ static int nft_limit_init(struct nft_limit *limit, static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit, enum nft_limit_type type) { + u32 flags = limit->invert ? NFT_LIMIT_F_INV : 0; u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC); u64 rate = limit->rate - limit->burst; if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate)) || nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs)) || nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) || - nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type))) + nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)) || + nla_put_be32(skb, NFTA_LIMIT_FLAGS, htonl(flags))) goto nla_put_failure; return 0; @@ -120,6 +129,7 @@ static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { [NFTA_LIMIT_UNIT] = { .type = NLA_U64 }, [NFTA_LIMIT_BURST] = { .type = NLA_U32 }, [NFTA_LIMIT_TYPE] = { .type = NLA_U32 }, + [NFTA_LIMIT_FLAGS] = { .type = NLA_U32 }, }; static int nft_limit_pkts_init(const struct nft_ctx *ctx, -- cgit v1.1 From 502061f81d3eb4518d2e72178e494a8547788ad0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 3 Jan 2016 21:02:18 +0100 Subject: netfilter: nf_tables: add packet duplication to the netdev family You can use this to duplicate packets and inject them at the egress path of the specified interface. This duplication allows you to inspect traffic from the dummy or any other interface dedicated to this purpose. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 16 +++++++ net/netfilter/Makefile | 6 +++ net/netfilter/nf_dup_netdev.c | 40 +++++++++++++++++ net/netfilter/nft_dup_netdev.c | 97 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 159 insertions(+) create mode 100644 net/netfilter/nf_dup_netdev.c create mode 100644 net/netfilter/nft_dup_netdev.c (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 4692782..8514cc4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -563,6 +563,22 @@ config NFT_COMPAT x_tables match/target extensions over the nf_tables framework. +if NF_TABLES_NETDEV + +config NF_DUP_NETDEV + tristate "Netfilter packet duplication support" + help + This option enables the generic packet duplication infrastructure + for Netfilter. + +config NFT_DUP_NETDEV + tristate "Netfilter nf_tables netdev packet duplication support" + select NF_DUP_NETDEV + help + This option enables packet duplication for the "netdev" family. + +endif # NF_TABLES_NETDEV + endif # NF_TABLES config NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 2293484..5c99133 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -66,6 +66,9 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o # SYNPROXY obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o +# generic packet duplication from netdev family +obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o + # nf_tables nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o @@ -90,6 +93,9 @@ obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o obj-$(CONFIG_NFT_REDIR) += nft_redir.o +# nf_tables netdev +obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o + # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c new file mode 100644 index 0000000..8414ee1 --- /dev/null +++ b/net/netfilter/nf_dup_netdev.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2015 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif) +{ + struct net_device *dev; + struct sk_buff *skb; + + dev = dev_get_by_index_rcu(pkt->net, oif); + if (dev == NULL) + return; + + skb = skb_clone(pkt->skb, GFP_ATOMIC); + if (skb == NULL) + return; + + if (skb_mac_header_was_set(skb)) + skb_push(skb, skb->mac_len); + + skb->dev = dev; + skb_sender_cpu_clear(skb); + dev_queue_xmit(skb); +} +EXPORT_SYMBOL_GPL(nf_dup_netdev_egress); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c new file mode 100644 index 0000000..2cc1e0ef --- /dev/null +++ b/net/netfilter/nft_dup_netdev.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2015 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_dup_netdev { + enum nft_registers sreg_dev:8; +}; + +static void nft_dup_netdev_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_dup_netdev *priv = nft_expr_priv(expr); + int oif = regs->data[priv->sreg_dev]; + + nf_dup_netdev_egress(pkt, oif); +} + +static const struct nla_policy nft_dup_netdev_policy[NFTA_DUP_MAX + 1] = { + [NFTA_DUP_SREG_DEV] = { .type = NLA_U32 }, +}; + +static int nft_dup_netdev_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_dup_netdev *priv = nft_expr_priv(expr); + + if (tb[NFTA_DUP_SREG_DEV] == NULL) + return -EINVAL; + + priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); + return nft_validate_register_load(priv->sreg_dev, sizeof(int)); +} + +static const struct nft_expr_ops nft_dup_netdev_ingress_ops; + +static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_dup_netdev *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_dup_netdev_type; +static const struct nft_expr_ops nft_dup_netdev_ops = { + .type = &nft_dup_netdev_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_dup_netdev)), + .eval = nft_dup_netdev_eval, + .init = nft_dup_netdev_init, + .dump = nft_dup_netdev_dump, +}; + +static struct nft_expr_type nft_dup_netdev_type __read_mostly = { + .family = NFPROTO_NETDEV, + .name = "dup", + .ops = &nft_dup_netdev_ops, + .policy = nft_dup_netdev_policy, + .maxattr = NFTA_DUP_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_dup_netdev_module_init(void) +{ + return nft_register_expr(&nft_dup_netdev_type); +} + +static void __exit nft_dup_netdev_module_exit(void) +{ + nft_unregister_expr(&nft_dup_netdev_type); +} + +module_init(nft_dup_netdev_module_init); +module_exit(nft_dup_netdev_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NFT_AF_EXPR(5, "dup"); -- cgit v1.1 From 16e5c1fc36040e592128a164499bc25eb138a80f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Dec 2015 00:06:05 -0500 Subject: convert a bunch of open-coded instances of memdup_user_nul() A _lot_ of ->write() instances were open-coding it; some are converted to memdup_user_nul(), a lot more remain... Signed-off-by: Al Viro --- net/rxrpc/ar-key.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index da3cc09..3f65716 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -896,15 +896,9 @@ int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen) if (optlen <= 0 || optlen > PAGE_SIZE - 1) return -EINVAL; - description = kmalloc(optlen + 1, GFP_KERNEL); - if (!description) - return -ENOMEM; - - if (copy_from_user(description, optval, optlen)) { - kfree(description); - return -EFAULT; - } - description[optlen] = 0; + description = memdup_user_nul(optval, optlen); + if (IS_ERR(description)) + return PTR_ERR(description); key = request_key(&key_type_rxrpc, description, NULL); if (IS_ERR(key)) { @@ -933,15 +927,9 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval, if (optlen <= 0 || optlen > PAGE_SIZE - 1) return -EINVAL; - description = kmalloc(optlen + 1, GFP_KERNEL); - if (!description) - return -ENOMEM; - - if (copy_from_user(description, optval, optlen)) { - kfree(description); - return -EFAULT; - } - description[optlen] = 0; + description = memdup_user_nul(optval, optlen); + if (IS_ERR(description)) + return PTR_ERR(description); key = request_key(&key_type_keyring, description, NULL); if (IS_ERR(key)) { -- cgit v1.1 From 222e4adec52418dc1c80a30eff80b33ec954745e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 2 Jan 2016 13:31:21 -0500 Subject: ... and a couple in net/9p Signed-off-by: Al Viro --- net/9p/trans_virtio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 9fc6a56..199bc76 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -105,7 +105,7 @@ static struct list_head virtio_chan_list; /* How many bytes left in this page. */ static unsigned int rest_of_page(void *data) { - return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); + return PAGE_SIZE - offset_in_page(data); } /** @@ -365,7 +365,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, return -ENOMEM; *need_drop = 0; - p -= (*offs = (unsigned long)p % PAGE_SIZE); + p -= (*offs = offset_in_page(p)); for (index = 0; index < nr_pages; index++) { if (is_vmalloc_addr(p)) (*pages)[index] = vmalloc_to_page(p); -- cgit v1.1 From 39e6dea28adc874f7021e5580c13cab0b58407ea Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 25 Nov 2015 13:39:38 +0100 Subject: netfilter: nf_tables: add forward expression to the netdev family You can use this to forward packets from ingress to the egress path of the specified interface. This provides a fast path to bounce packets from one interface to another specific destination interface. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 6 +++ net/netfilter/Makefile | 1 + net/netfilter/nft_fwd_netdev.c | 98 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+) create mode 100644 net/netfilter/nft_fwd_netdev.c (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 8514cc4..8c067e6 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -577,6 +577,12 @@ config NFT_DUP_NETDEV help This option enables packet duplication for the "netdev" family. +config NFT_FWD_NETDEV + tristate "Netfilter nf_tables netdev packet forwarding support" + select NF_DUP_NETDEV + help + This option enables packet forwarding for the "netdev" family. + endif # NF_TABLES_NETDEV endif # NF_TABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 5c99133..6913454 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -95,6 +95,7 @@ obj-$(CONFIG_NFT_REDIR) += nft_redir.o # nf_tables netdev obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o +obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c new file mode 100644 index 0000000..763ebc3 --- /dev/null +++ b/net/netfilter/nft_fwd_netdev.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2015 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_fwd_netdev { + enum nft_registers sreg_dev:8; +}; + +static void nft_fwd_netdev_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_fwd_netdev *priv = nft_expr_priv(expr); + int oif = regs->data[priv->sreg_dev]; + + nf_dup_netdev_egress(pkt, oif); + regs->verdict.code = NF_DROP; +} + +static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = { + [NFTA_FWD_SREG_DEV] = { .type = NLA_U32 }, +}; + +static int nft_fwd_netdev_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_fwd_netdev *priv = nft_expr_priv(expr); + + if (tb[NFTA_FWD_SREG_DEV] == NULL) + return -EINVAL; + + priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]); + return nft_validate_register_load(priv->sreg_dev, sizeof(int)); +} + +static const struct nft_expr_ops nft_fwd_netdev_ingress_ops; + +static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_fwd_netdev *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_FWD_SREG_DEV, priv->sreg_dev)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_fwd_netdev_type; +static const struct nft_expr_ops nft_fwd_netdev_ops = { + .type = &nft_fwd_netdev_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fwd_netdev)), + .eval = nft_fwd_netdev_eval, + .init = nft_fwd_netdev_init, + .dump = nft_fwd_netdev_dump, +}; + +static struct nft_expr_type nft_fwd_netdev_type __read_mostly = { + .family = NFPROTO_NETDEV, + .name = "fwd", + .ops = &nft_fwd_netdev_ops, + .policy = nft_fwd_netdev_policy, + .maxattr = NFTA_FWD_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_fwd_netdev_module_init(void) +{ + return nft_register_expr(&nft_fwd_netdev_type); +} + +static void __exit nft_fwd_netdev_module_exit(void) +{ + nft_unregister_expr(&nft_fwd_netdev_type); +} + +module_init(nft_fwd_netdev_module_init); +module_exit(nft_fwd_netdev_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NFT_AF_EXPR(5, "fwd"); -- cgit v1.1 From ad6d950393138830edae2efcc500aa69b467b89c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 3 Jan 2016 22:41:24 +0100 Subject: netfilter: nf_ct_helper: define pr_fmt() Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_ftp.c | 17 ++++++++--------- net/netfilter/nf_conntrack_irc.c | 7 ++++--- net/netfilter/nf_conntrack_sane.c | 19 +++++++++---------- net/netfilter/nf_conntrack_sip.c | 5 +++-- net/netfilter/nf_conntrack_tftp.c | 7 ++++--- 5 files changed, 28 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index b666959..883c691 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -10,6 +10,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -505,11 +507,11 @@ skip_nl_seq: different IP address. Simply don't record it for NAT. */ if (cmd.l3num == PF_INET) { - pr_debug("conntrack_ftp: NOT RECORDING: %pI4 != %pI4\n", + pr_debug("NOT RECORDING: %pI4 != %pI4\n", &cmd.u3.ip, &ct->tuplehash[dir].tuple.src.u3.ip); } else { - pr_debug("conntrack_ftp: NOT RECORDING: %pI6 != %pI6\n", + pr_debug("NOT RECORDING: %pI6 != %pI6\n", cmd.u3.ip6, ct->tuplehash[dir].tuple.src.u3.ip6); } @@ -586,8 +588,7 @@ static void nf_conntrack_ftp_fini(void) if (ftp[i][j].me == NULL) continue; - pr_debug("nf_ct_ftp: unregistering helper for pf: %d " - "port: %d\n", + pr_debug("unregistering helper for pf: %d port: %d\n", ftp[i][j].tuple.src.l3num, ports[i]); nf_conntrack_helper_unregister(&ftp[i][j]); } @@ -625,14 +626,12 @@ static int __init nf_conntrack_ftp_init(void) else sprintf(ftp[i][j].name, "ftp-%d", ports[i]); - pr_debug("nf_ct_ftp: registering helper for pf: %d " - "port: %d\n", + pr_debug("registering helper for pf: %d port: %d\n", ftp[i][j].tuple.src.l3num, ports[i]); ret = nf_conntrack_helper_register(&ftp[i][j]); if (ret) { - printk(KERN_ERR "nf_ct_ftp: failed to register" - " helper for pf: %d port: %d\n", - ftp[i][j].tuple.src.l3num, ports[i]); + pr_err("failed to register helper for pf: %d port: %d\n", + ftp[i][j].tuple.src.l3num, ports[i]); nf_conntrack_ftp_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 0fd2976..8b6da27 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -9,6 +9,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -237,7 +239,7 @@ static int __init nf_conntrack_irc_init(void) int i, ret; if (max_dcc_channels < 1) { - printk(KERN_ERR "nf_ct_irc: max_dcc_channels must not be zero\n"); + pr_err("max_dcc_channels must not be zero\n"); return -EINVAL; } @@ -267,8 +269,7 @@ static int __init nf_conntrack_irc_init(void) ret = nf_conntrack_helper_register(&irc[i]); if (ret) { - printk(KERN_ERR "nf_ct_irc: failed to register helper " - "for pf: %u port: %u\n", + pr_err("failed to register helper for pf: %u port: %u\n", irc[i].tuple.src.l3num, ports[i]); nf_conntrack_irc_fini(); return ret; diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 4a2134f..7523a57 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -17,6 +17,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -120,14 +122,14 @@ static int help(struct sk_buff *skb, ct_sane_info->state = SANE_STATE_NORMAL; if (datalen < sizeof(struct sane_reply_net_start)) { - pr_debug("nf_ct_sane: NET_START reply too short\n"); + pr_debug("NET_START reply too short\n"); goto out; } reply = sb_ptr; if (reply->status != htonl(SANE_STATUS_SUCCESS)) { /* saned refused the command */ - pr_debug("nf_ct_sane: unsuccessful SANE_STATUS = %u\n", + pr_debug("unsuccessful SANE_STATUS = %u\n", ntohl(reply->status)); goto out; } @@ -148,7 +150,7 @@ static int help(struct sk_buff *skb, &tuple->src.u3, &tuple->dst.u3, IPPROTO_TCP, NULL, &reply->port); - pr_debug("nf_ct_sane: expect: "); + pr_debug("expect: "); nf_ct_dump_tuple(&exp->tuple); /* Can't expect this? Best to drop packet now. */ @@ -178,8 +180,7 @@ static void nf_conntrack_sane_fini(void) for (i = 0; i < ports_c; i++) { for (j = 0; j < 2; j++) { - pr_debug("nf_ct_sane: unregistering helper for pf: %d " - "port: %d\n", + pr_debug("unregistering helper for pf: %d port: %d\n", sane[i][j].tuple.src.l3num, ports[i]); nf_conntrack_helper_unregister(&sane[i][j]); } @@ -216,14 +217,12 @@ static int __init nf_conntrack_sane_init(void) else sprintf(sane[i][j].name, "sane-%d", ports[i]); - pr_debug("nf_ct_sane: registering helper for pf: %d " - "port: %d\n", + pr_debug("registering helper for pf: %d port: %d\n", sane[i][j].tuple.src.l3num, ports[i]); ret = nf_conntrack_helper_register(&sane[i][j]); if (ret) { - printk(KERN_ERR "nf_ct_sane: failed to " - "register helper for pf: %d port: %d\n", - sane[i][j].tuple.src.l3num, ports[i]); + pr_err("failed to register helper for pf: %d port: %d\n", + sane[i][j].tuple.src.l3num, ports[i]); nf_conntrack_sane_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 885b4ab..3e06402 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -10,6 +10,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -1665,8 +1667,7 @@ static int __init nf_conntrack_sip_init(void) ret = nf_conntrack_helper_register(&sip[i][j]); if (ret) { - printk(KERN_ERR "nf_ct_sip: failed to register" - " helper for pf: %u port: %u\n", + pr_err("failed to register helper for pf: %u port: %u\n", sip[i][j].tuple.src.l3num, ports[i]); nf_conntrack_sip_fini(); return ret; diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index e68ab4f..36f9640 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -5,6 +5,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -138,9 +140,8 @@ static int __init nf_conntrack_tftp_init(void) ret = nf_conntrack_helper_register(&tftp[i][j]); if (ret) { - printk(KERN_ERR "nf_ct_tftp: failed to register" - " helper for pf: %u port: %u\n", - tftp[i][j].tuple.src.l3num, ports[i]); + pr_err("failed to register helper for pf: %u port: %u\n", + tftp[i][j].tuple.src.l3num, ports[i]); nf_conntrack_tftp_fini(); return ret; } -- cgit v1.1 From d3d20725407955d0bb107939f23535d2e7dadbee Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 4 Jan 2016 14:23:21 +0200 Subject: Bluetooth: hci_bcm: move all Broadcom ACPI IDs to BCM HCI driver The IDs should all be for Broadcom BCM43241 module, and hci_bcm is now the proper driver for them. This removes one of two different ways of handling PM with the module. Cc: Johannes Berg Signed-off-by: Heikki Krogerus Signed-off-by: Marcel Holtmann --- net/rfkill/rfkill-gpio.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 9312722..4b1e3f3 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -163,10 +163,6 @@ static int rfkill_gpio_remove(struct platform_device *pdev) #ifdef CONFIG_ACPI static const struct acpi_device_id rfkill_acpi_match[] = { - { "BCM2E1A", RFKILL_TYPE_BLUETOOTH }, - { "BCM2E3D", RFKILL_TYPE_BLUETOOTH }, - { "BCM2E40", RFKILL_TYPE_BLUETOOTH }, - { "BCM2E64", RFKILL_TYPE_BLUETOOTH }, { "BCM4752", RFKILL_TYPE_GPS }, { "LNV4752", RFKILL_TYPE_GPS }, { }, -- cgit v1.1 From 29663b0cc1d5b9b6e2f6caf41e86c599a0310def Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 23 Dec 2015 22:36:32 +0100 Subject: mac802154: constify ieee802154_llsec_ops structure The ieee802154_llsec_ops structure is never modified, so declare it as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- net/mac802154/mac_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index 8606da4..3db1634 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -126,7 +126,7 @@ static void mac802154_get_mac_params(struct net_device *dev, params->lbt = wpan_dev->lbt; } -static struct ieee802154_llsec_ops mac802154_llsec_ops = { +static const struct ieee802154_llsec_ops mac802154_llsec_ops = { .get_params = mac802154_get_params, .set_params = mac802154_set_params, .add_key = mac802154_add_key, -- cgit v1.1 From 98f40b3e22aed519bc545ba3cc7d884ede9428c9 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 29 Dec 2015 13:06:59 +0100 Subject: l2tp: rely on ppp layer for skb scrubbing Since 79c441ae505c ("ppp: implement x-netns support"), the PPP layer calls skb_scrub_packet() whenever the skb is received on the PPP device. Manually resetting packet meta-data in the L2TP layer is thus redundant. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index d93f113..652c250 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -230,26 +230,11 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int if (sk->sk_state & PPPOX_BOUND) { struct pppox_sock *po; + l2tp_dbg(session, PPPOL2TP_MSG_DATA, "%s: recv %d byte data frame, passing to ppp\n", session->name, data_len); - /* We need to forget all info related to the L2TP packet - * gathered in the skb as we are going to reuse the same - * skb for the inner packet. - * Namely we need to: - * - reset xfrm (IPSec) information as it applies to - * the outer L2TP packet and not to the inner one - * - release the dst to force a route lookup on the inner - * IP packet since skb->dst currently points to the dst - * of the UDP tunnel - * - reset netfilter information as it doesn't apply - * to the inner packet either - */ - secpath_reset(skb); - skb_dst_drop(skb); - nf_reset(skb); - po = pppox_sk(sk); ppp_input(&po->chan, skb); } else { -- cgit v1.1 From 197c949e7798fbf28cfadc69d9ca0c2abbf93191 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Dec 2015 08:51:12 -0500 Subject: udp: properly support MSG_PEEK with truncated buffers Backport of this upstream commit into stable kernels : 89c22d8c3b27 ("net: Fix skb csum races when peeking") exposed a bug in udp stack vs MSG_PEEK support, when user provides a buffer smaller than skb payload. In this case, skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); returns -EFAULT. This bug does not happen in upstream kernels since Al Viro did a great job to replace this into : skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr), msg); This variant is safe vs short buffers. For the time being, instead reverting Herbert Xu patch and add back skb->ip_summed invalid changes, simply store the result of udp_lib_checksum_complete() so that we avoid computing the checksum a second time, and avoid the problematic skb_copy_and_csum_datagram_iovec() call. This patch can be applied on recent kernels as it avoids a double checksumming, then backported to stable kernels as a bug fix. Signed-off-by: Eric Dumazet Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/udp.c | 6 ++++-- net/ipv6/udp.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8841e98..ac14ae4 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1271,6 +1271,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int peeked, off = 0; int err; int is_udplite = IS_UDPLITE(sk); + bool checksum_valid = false; bool slow; if (flags & MSG_ERRQUEUE) @@ -1296,11 +1297,12 @@ try_again: */ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { - if (udp_lib_checksum_complete(skb)) + checksum_valid = !udp_lib_checksum_complete(skb); + if (!checksum_valid) goto csum_copy_err; } - if (skb_csum_unnecessary(skb)) + if (checksum_valid || skb_csum_unnecessary(skb)) err = skb_copy_datagram_msg(skb, sizeof(struct udphdr), msg, copied); else { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9da3287..00775ee 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -402,6 +402,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int peeked, off = 0; int err; int is_udplite = IS_UDPLITE(sk); + bool checksum_valid = false; int is_udp4; bool slow; @@ -433,11 +434,12 @@ try_again: */ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { - if (udp_lib_checksum_complete(skb)) + checksum_valid = !udp_lib_checksum_complete(skb); + if (!checksum_valid) goto csum_copy_err; } - if (skb_csum_unnecessary(skb)) + if (checksum_valid || skb_csum_unnecessary(skb)) err = skb_copy_datagram_msg(skb, sizeof(struct udphdr), msg, copied); else { -- cgit v1.1 From ef456144da8ef507c8cf504284b6042e9201a05c Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 4 Jan 2016 17:41:45 -0500 Subject: soreuseport: define reuseport groups struct sock_reuseport is an optional shared structure referenced by each socket belonging to a reuseport group. When a socket is bound to an address/port not yet in use and the reuseport flag has been set, the structure will be allocated and attached to the newly bound socket. When subsequent calls to bind are made for the same address/port, the shared structure will be updated to include the new socket and the newly bound socket will reference the group structure. Usually, when an incoming packet was destined for a reuseport group, all sockets in the same group needed to be considered before a dispatching decision was made. With this structure, an appropriate socket can be found after looking up just one socket in the group. This shared structure will also allow for more complicated decisions to be made when selecting a socket (eg a BPF filter). This work is based off a similar implementation written by Ying Cai for implementing policy-based reuseport selection. Signed-off-by: Craig Gallek Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/Makefile | 2 +- net/core/sock_reuseport.c | 173 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 net/core/sock_reuseport.c (limited to 'net') diff --git a/net/core/Makefile b/net/core/Makefile index 086b01f..0b835de 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ - sock_diag.o dev_ioctl.o tso.o + sock_diag.o dev_ioctl.o tso.o sock_reuseport.o obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c new file mode 100644 index 0000000..963c8d5 --- /dev/null +++ b/net/core/sock_reuseport.c @@ -0,0 +1,173 @@ +/* + * To speed up listener socket lookup, create an array to store all sockets + * listening on the same port. This allows a decision to be made after finding + * the first socket. + */ + +#include +#include + +#define INIT_SOCKS 128 + +static DEFINE_SPINLOCK(reuseport_lock); + +static struct sock_reuseport *__reuseport_alloc(u16 max_socks) +{ + size_t size = sizeof(struct sock_reuseport) + + sizeof(struct sock *) * max_socks; + struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC); + + if (!reuse) + return NULL; + + reuse->max_socks = max_socks; + + return reuse; +} + +int reuseport_alloc(struct sock *sk) +{ + struct sock_reuseport *reuse; + + /* bh lock used since this function call may precede hlist lock in + * soft irq of receive path or setsockopt from process context + */ + spin_lock_bh(&reuseport_lock); + WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)), + "multiple allocations for the same socket"); + reuse = __reuseport_alloc(INIT_SOCKS); + if (!reuse) { + spin_unlock_bh(&reuseport_lock); + return -ENOMEM; + } + + reuse->socks[0] = sk; + reuse->num_socks = 1; + rcu_assign_pointer(sk->sk_reuseport_cb, reuse); + + spin_unlock_bh(&reuseport_lock); + + return 0; +} +EXPORT_SYMBOL(reuseport_alloc); + +static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) +{ + struct sock_reuseport *more_reuse; + u32 more_socks_size, i; + + more_socks_size = reuse->max_socks * 2U; + if (more_socks_size > U16_MAX) + return NULL; + + more_reuse = __reuseport_alloc(more_socks_size); + if (!more_reuse) + return NULL; + + more_reuse->max_socks = more_socks_size; + more_reuse->num_socks = reuse->num_socks; + + memcpy(more_reuse->socks, reuse->socks, + reuse->num_socks * sizeof(struct sock *)); + + for (i = 0; i < reuse->num_socks; ++i) + rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb, + more_reuse); + + kfree_rcu(reuse, rcu); + return more_reuse; +} + +/** + * reuseport_add_sock - Add a socket to the reuseport group of another. + * @sk: New socket to add to the group. + * @sk2: Socket belonging to the existing reuseport group. + * May return ENOMEM and not add socket to group under memory pressure. + */ +int reuseport_add_sock(struct sock *sk, const struct sock *sk2) +{ + struct sock_reuseport *reuse; + + spin_lock_bh(&reuseport_lock); + reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)), + WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)), + "socket already in reuseport group"); + + if (reuse->num_socks == reuse->max_socks) { + reuse = reuseport_grow(reuse); + if (!reuse) { + spin_unlock_bh(&reuseport_lock); + return -ENOMEM; + } + } + + reuse->socks[reuse->num_socks] = sk; + /* paired with smp_rmb() in reuseport_select_sock() */ + smp_wmb(); + reuse->num_socks++; + rcu_assign_pointer(sk->sk_reuseport_cb, reuse); + + spin_unlock_bh(&reuseport_lock); + + return 0; +} +EXPORT_SYMBOL(reuseport_add_sock); + +void reuseport_detach_sock(struct sock *sk) +{ + struct sock_reuseport *reuse; + int i; + + spin_lock_bh(&reuseport_lock); + reuse = rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)); + rcu_assign_pointer(sk->sk_reuseport_cb, NULL); + + for (i = 0; i < reuse->num_socks; i++) { + if (reuse->socks[i] == sk) { + reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; + reuse->num_socks--; + if (reuse->num_socks == 0) + kfree_rcu(reuse, rcu); + break; + } + } + spin_unlock_bh(&reuseport_lock); +} +EXPORT_SYMBOL(reuseport_detach_sock); + +/** + * reuseport_select_sock - Select a socket from an SO_REUSEPORT group. + * @sk: First socket in the group. + * @hash: Use this hash to select. + * Returns a socket that should receive the packet (or NULL on error). + */ +struct sock *reuseport_select_sock(struct sock *sk, u32 hash) +{ + struct sock_reuseport *reuse; + struct sock *sk2 = NULL; + u16 socks; + + rcu_read_lock(); + reuse = rcu_dereference(sk->sk_reuseport_cb); + + /* if memory allocation failed or add call is not yet complete */ + if (!reuse) + goto out; + + socks = READ_ONCE(reuse->num_socks); + if (likely(socks)) { + /* paired with smp_wmb() in reuseport_add_sock() */ + smp_rmb(); + + sk2 = reuse->socks[reciprocal_scale(hash, socks)]; + } + +out: + rcu_read_unlock(); + return sk2; +} +EXPORT_SYMBOL(reuseport_select_sock); -- cgit v1.1 From e32ea7e747271a0abcd37e265005e97cc81d9df5 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 4 Jan 2016 17:41:46 -0500 Subject: soreuseport: fast reuseport UDP socket selection Include a struct sock_reuseport instance when a UDP socket binds to a specific address for the first time with the reuseport flag set. When selecting a socket for an incoming UDP packet, use the information available in sock_reuseport if present. This required adding an additional field to the UDP source address equality function to differentiate between exact and wildcard matches. The original use case allowed wildcard matches when checking for existing port uses during bind. The new use case of adding a socket to a reuseport group requires exact address matching. Performance test (using a machine with 2 CPU sockets and a total of 48 cores): Create reuseport groups of varying size. Use one socket from this group per user thread (pinning each thread to a different core) calling recvmmsg in a tight loop. Record number of messages received per second while saturating a 10G link. 10 sockets: 18% increase (~2.8M -> 3.3M pkts/s) 20 sockets: 14% increase (~2.9M -> 3.3M pkts/s) 40 sockets: 13% increase (~3.0M -> 3.4M pkts/s) This work is based off a similar implementation written by Ying Cai for implementing policy-based reuseport selection. Signed-off-by: Craig Gallek Signed-off-by: David S. Miller --- net/ipv4/udp.c | 119 +++++++++++++++++++++++++++++++-------- net/ipv6/inet6_connection_sock.c | 4 +- net/ipv6/udp.c | 48 +++++++++++++--- 3 files changed, 138 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ac14ae4..762b01f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -113,6 +113,7 @@ #include #include #include "udp_impl.h" +#include struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); @@ -137,7 +138,8 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, unsigned long *bitmap, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2), + const struct sock *sk2, + bool match_wildcard), unsigned int log) { struct sock *sk2; @@ -152,8 +154,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && (!sk2->sk_reuseport || !sk->sk_reuseport || + rcu_access_pointer(sk->sk_reuseport_cb) || !uid_eq(uid, sock_i_uid(sk2))) && - saddr_comp(sk, sk2)) { + saddr_comp(sk, sk2, true)) { if (!bitmap) return 1; __set_bit(udp_sk(sk2)->udp_port_hash >> log, bitmap); @@ -170,7 +173,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, struct udp_hslot *hslot2, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2)) + const struct sock *sk2, + bool match_wildcard)) { struct sock *sk2; struct hlist_nulls_node *node; @@ -186,8 +190,9 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && (!sk2->sk_reuseport || !sk->sk_reuseport || + rcu_access_pointer(sk->sk_reuseport_cb) || !uid_eq(uid, sock_i_uid(sk2))) && - saddr_comp(sk, sk2)) { + saddr_comp(sk, sk2, true)) { res = 1; break; } @@ -196,6 +201,35 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, return res; } +static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot, + int (*saddr_same)(const struct sock *sk1, + const struct sock *sk2, + bool match_wildcard)) +{ + struct net *net = sock_net(sk); + struct hlist_nulls_node *node; + kuid_t uid = sock_i_uid(sk); + struct sock *sk2; + + sk_nulls_for_each(sk2, node, &hslot->head) { + if (net_eq(sock_net(sk2), net) && + sk2 != sk && + sk2->sk_family == sk->sk_family && + ipv6_only_sock(sk2) == ipv6_only_sock(sk) && + (udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) && + (sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && + (*saddr_same)(sk, sk2, false)) { + return reuseport_add_sock(sk, sk2); + } + } + + /* Initial allocation may have already happened via setsockopt */ + if (!rcu_access_pointer(sk->sk_reuseport_cb)) + return reuseport_alloc(sk); + return 0; +} + /** * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 * @@ -207,7 +241,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, */ int udp_lib_get_port(struct sock *sk, unsigned short snum, int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2), + const struct sock *sk2, + bool match_wildcard), unsigned int hash2_nulladdr) { struct udp_hslot *hslot, *hslot2; @@ -290,6 +325,14 @@ found: udp_sk(sk)->udp_port_hash = snum; udp_sk(sk)->udp_portaddr_hash ^= snum; if (sk_unhashed(sk)) { + if (sk->sk_reuseport && + udp_reuseport_add_sock(sk, hslot, saddr_comp)) { + inet_sk(sk)->inet_num = 0; + udp_sk(sk)->udp_port_hash = 0; + udp_sk(sk)->udp_portaddr_hash ^= snum; + goto fail_unlock; + } + sk_nulls_add_node_rcu(sk, &hslot->head); hslot->count++; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -309,13 +352,22 @@ fail: } EXPORT_SYMBOL(udp_lib_get_port); -static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses + * match_wildcard == false: addresses must be exactly the same, i.e. + * 0.0.0.0 only equals to 0.0.0.0 + */ +static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2, + bool match_wildcard) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); - return (!ipv6_only_sock(sk2) && - (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr || - inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)); + if (!ipv6_only_sock(sk2)) { + if (inet1->inet_rcv_saddr == inet2->inet_rcv_saddr) + return 1; + if (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr) + return match_wildcard; + } + return 0; } static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr, @@ -459,8 +511,14 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { + struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); + sk2 = reuseport_select_sock(sk, hash); + if (sk2) { + result = sk2; + goto found; + } matches = 1; } } else if (score == badness && reuseport) { @@ -478,6 +536,7 @@ begin: if (get_nulls_value(node) != slot2) goto begin; if (result) { +found: if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, @@ -540,8 +599,14 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { + struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); + sk2 = reuseport_select_sock(sk, hash); + if (sk2) { + result = sk2; + goto found; + } matches = 1; } } else if (score == badness && reuseport) { @@ -560,6 +625,7 @@ begin: goto begin; if (result) { +found: if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, @@ -587,7 +653,8 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { - return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); + return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, + &udp_table); } EXPORT_SYMBOL_GPL(udp4_lib_lookup); @@ -1398,6 +1465,8 @@ void udp_lib_unhash(struct sock *sk) hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); spin_lock_bh(&hslot->lock); + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_detach_sock(sk); if (sk_nulls_del_node_init_rcu(sk)) { hslot->count--; inet_sk(sk)->inet_num = 0; @@ -1425,22 +1494,28 @@ void udp_lib_rehash(struct sock *sk, u16 newhash) hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); nhslot2 = udp_hashslot2(udptable, newhash); udp_sk(sk)->udp_portaddr_hash = newhash; - if (hslot2 != nhslot2) { + + if (hslot2 != nhslot2 || + rcu_access_pointer(sk->sk_reuseport_cb)) { hslot = udp_hashslot(udptable, sock_net(sk), udp_sk(sk)->udp_port_hash); /* we must lock primary chain too */ spin_lock_bh(&hslot->lock); - - spin_lock(&hslot2->lock); - hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); - hslot2->count--; - spin_unlock(&hslot2->lock); - - spin_lock(&nhslot2->lock); - hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, - &nhslot2->head); - nhslot2->count++; - spin_unlock(&nhslot2->lock); + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_detach_sock(sk); + + if (hslot2 != nhslot2) { + spin_lock(&hslot2->lock); + hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hslot2->count--; + spin_unlock(&hslot2->lock); + + spin_lock(&nhslot2->lock); + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + &nhslot2->head); + nhslot2->count++; + spin_unlock(&nhslot2->lock); + } spin_unlock_bh(&hslot->lock); } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index a7ca2cd..36c3f01 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -51,12 +51,12 @@ int inet6_csk_bind_conflict(const struct sock *sk, (sk2->sk_state != TCP_TIME_WAIT && !uid_eq(uid, sock_i_uid((struct sock *)sk2))))) { - if (ipv6_rcv_saddr_equal(sk, sk2)) + if (ipv6_rcv_saddr_equal(sk, sk2, true)) break; } if (!relax && reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN && - ipv6_rcv_saddr_equal(sk, sk2)) + ipv6_rcv_saddr_equal(sk, sk2, true)) break; } } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 00775ee..6204b89 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -76,7 +77,14 @@ static u32 udp6_ehashfn(const struct net *net, udp_ipv6_hash_secret + net_hash_mix(net)); } -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) +/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 + * only, and any IPv4 addresses if not IPv6 only + * match_wildcard == false: addresses must be exactly the same, i.e. + * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, + * and 0.0.0.0 equals to 0.0.0.0 only + */ +int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard) { const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); int sk2_ipv6only = inet_v6_ipv6only(sk2); @@ -84,16 +92,24 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; /* if both are mapped, treat as IPv4 */ - if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) - return (!sk2_ipv6only && - (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr || - sk->sk_rcv_saddr == sk2->sk_rcv_saddr)); + if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { + if (!sk2_ipv6only) { + if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr) + return 1; + if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr) + return match_wildcard; + } + return 0; + } + + if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) + return 1; - if (addr_type2 == IPV6_ADDR_ANY && + if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) return 1; - if (addr_type == IPV6_ADDR_ANY && + if (addr_type == IPV6_ADDR_ANY && match_wildcard && !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED)) return 1; @@ -253,8 +269,14 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { + struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); + sk2 = reuseport_select_sock(sk, hash); + if (sk2) { + result = sk2; + goto found; + } matches = 1; } } else if (score == badness && reuseport) { @@ -273,6 +295,7 @@ begin: goto begin; if (result) { +found: if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, @@ -332,8 +355,14 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { + struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); + sk2 = reuseport_select_sock(sk, hash); + if (sk2) { + result = sk2; + goto found; + } matches = 1; } } else if (score == badness && reuseport) { @@ -352,6 +381,7 @@ begin: goto begin; if (result) { +found: if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score(result, net, hnum, saddr, sport, @@ -549,8 +579,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int err; struct net *net = dev_net(skb->dev); - sk = __udp6_lib_lookup(net, daddr, uh->dest, - saddr, uh->source, inet6_iif(skb), udptable); + sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, + inet6_iif(skb), udptable); if (!sk) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); -- cgit v1.1 From 538950a1b7527a0a52ccd9337e3fcd304f027f13 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 4 Jan 2016 17:41:47 -0500 Subject: soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF Expose socket options for setting a classic or extended BPF program for use when selecting sockets in an SO_REUSEPORT group. These options can be used on the first socket to belong to a group before bind or on any socket in the group after bind. This change includes refactoring of the existing sk_filter code to allow reuse of the existing BPF filter validation checks. Signed-off-by: Craig Gallek Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 121 +++++++++++++++++++++++++++++++++++++--------- net/core/sock.c | 29 +++++++++++ net/core/sock_reuseport.c | 88 +++++++++++++++++++++++++++++++-- net/ipv4/udp.c | 14 +++--- net/ipv4/udp_diag.c | 4 +- net/ipv6/udp.c | 14 +++--- 6 files changed, 229 insertions(+), 41 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index c770196..35e6fed 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -50,6 +50,7 @@ #include #include #include +#include /** * sk_filter - run a packet through a socket filter @@ -1167,17 +1168,32 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) return 0; } -/** - * sk_attach_filter - attach a socket filter - * @fprog: the filter program - * @sk: the socket to use - * - * Attach the user's filter code. We first run some sanity checks on - * it to make sure it does not explode on us later. If an error - * occurs or there is insufficient memory for the filter a negative - * errno code is returned. On success the return is zero. - */ -int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk) +{ + struct bpf_prog *old_prog; + int err; + + if (bpf_prog_size(prog->len) > sysctl_optmem_max) + return -ENOMEM; + + if (sk_unhashed(sk)) { + err = reuseport_alloc(sk); + if (err) + return err; + } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) { + /* The socket wasn't bound with SO_REUSEPORT */ + return -EINVAL; + } + + old_prog = reuseport_attach_prog(sk, prog); + if (old_prog) + bpf_prog_destroy(old_prog); + + return 0; +} + +static +struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk) { unsigned int fsize = bpf_classic_proglen(fprog); unsigned int bpf_fsize = bpf_prog_size(fprog->len); @@ -1185,19 +1201,19 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) int err; if (sock_flag(sk, SOCK_FILTER_LOCKED)) - return -EPERM; + return ERR_PTR(-EPERM); /* Make sure new filter is there and in the right amounts. */ if (fprog->filter == NULL) - return -EINVAL; + return ERR_PTR(-EINVAL); prog = bpf_prog_alloc(bpf_fsize, 0); if (!prog) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (copy_from_user(prog->insns, fprog->filter, fsize)) { __bpf_prog_free(prog); - return -EFAULT; + return ERR_PTR(-EFAULT); } prog->len = fprog->len; @@ -1205,13 +1221,30 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) err = bpf_prog_store_orig_filter(prog, fprog); if (err) { __bpf_prog_free(prog); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - prog = bpf_prepare_filter(prog, NULL); + return bpf_prepare_filter(prog, NULL); +} + +/** + * sk_attach_filter - attach a socket filter + * @fprog: the filter program + * @sk: the socket to use + * + * Attach the user's filter code. We first run some sanity checks on + * it to make sure it does not explode on us later. If an error + * occurs or there is insufficient memory for the filter a negative + * errno code is returned. On success the return is zero. + */ +int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +{ + struct bpf_prog *prog = __get_filter(fprog, sk); + int err; + if (IS_ERR(prog)) return PTR_ERR(prog); @@ -1225,23 +1258,50 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) } EXPORT_SYMBOL_GPL(sk_attach_filter); -int sk_attach_bpf(u32 ufd, struct sock *sk) +int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) { - struct bpf_prog *prog; + struct bpf_prog *prog = __get_filter(fprog, sk); int err; + if (IS_ERR(prog)) + return PTR_ERR(prog); + + err = __reuseport_attach_prog(prog, sk); + if (err < 0) { + __bpf_prog_release(prog); + return err; + } + + return 0; +} + +static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk) +{ + struct bpf_prog *prog; + if (sock_flag(sk, SOCK_FILTER_LOCKED)) - return -EPERM; + return ERR_PTR(-EPERM); prog = bpf_prog_get(ufd); if (IS_ERR(prog)) - return PTR_ERR(prog); + return prog; if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { bpf_prog_put(prog); - return -EINVAL; + return ERR_PTR(-EINVAL); } + return prog; +} + +int sk_attach_bpf(u32 ufd, struct sock *sk) +{ + struct bpf_prog *prog = __get_bpf(ufd, sk); + int err; + + if (IS_ERR(prog)) + return PTR_ERR(prog); + err = __sk_attach_prog(prog, sk); if (err < 0) { bpf_prog_put(prog); @@ -1251,6 +1311,23 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) return 0; } +int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) +{ + struct bpf_prog *prog = __get_bpf(ufd, sk); + int err; + + if (IS_ERR(prog)) + return PTR_ERR(prog); + + err = __reuseport_attach_prog(prog, sk); + if (err < 0) { + bpf_prog_put(prog); + return err; + } + + return 0; +} + #define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) #define BPF_LDST_LEN 16U diff --git a/net/core/sock.c b/net/core/sock.c index 565bab7..5127023 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -134,6 +134,7 @@ #include #include +#include #include @@ -932,6 +933,32 @@ set_rcvbuf: } break; + case SO_ATTACH_REUSEPORT_CBPF: + ret = -EINVAL; + if (optlen == sizeof(struct sock_fprog)) { + struct sock_fprog fprog; + + ret = -EFAULT; + if (copy_from_user(&fprog, optval, sizeof(fprog))) + break; + + ret = sk_reuseport_attach_filter(&fprog, sk); + } + break; + + case SO_ATTACH_REUSEPORT_EBPF: + ret = -EINVAL; + if (optlen == sizeof(u32)) { + u32 ufd; + + ret = -EFAULT; + if (copy_from_user(&ufd, optval, sizeof(ufd))) + break; + + ret = sk_reuseport_attach_bpf(ufd, sk); + } + break; + case SO_DETACH_FILTER: ret = sk_detach_filter(sk); break; @@ -1443,6 +1470,8 @@ void sk_destruct(struct sock *sk) sk_filter_uncharge(sk, filter); RCU_INIT_POINTER(sk->sk_filter, NULL); } + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_detach_sock(sk); sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 963c8d5..ae0969c 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -1,10 +1,12 @@ /* * To speed up listener socket lookup, create an array to store all sockets * listening on the same port. This allows a decision to be made after finding - * the first socket. + * the first socket. An optional BPF program can also be configured for + * selecting the socket index from the array of available sockets. */ #include +#include #include #define INIT_SOCKS 128 @@ -22,6 +24,7 @@ static struct sock_reuseport *__reuseport_alloc(u16 max_socks) reuse->max_socks = max_socks; + RCU_INIT_POINTER(reuse->prog, NULL); return reuse; } @@ -67,6 +70,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) more_reuse->max_socks = more_socks_size; more_reuse->num_socks = reuse->num_socks; + more_reuse->prog = reuse->prog; memcpy(more_reuse->socks, reuse->socks, reuse->num_socks * sizeof(struct sock *)); @@ -75,6 +79,10 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb, more_reuse); + /* Note: we use kfree_rcu here instead of reuseport_free_rcu so + * that reuse and more_reuse can temporarily share a reference + * to prog. + */ kfree_rcu(reuse, rcu); return more_reuse; } @@ -116,6 +124,16 @@ int reuseport_add_sock(struct sock *sk, const struct sock *sk2) } EXPORT_SYMBOL(reuseport_add_sock); +static void reuseport_free_rcu(struct rcu_head *head) +{ + struct sock_reuseport *reuse; + + reuse = container_of(head, struct sock_reuseport, rcu); + if (reuse->prog) + bpf_prog_destroy(reuse->prog); + kfree(reuse); +} + void reuseport_detach_sock(struct sock *sk) { struct sock_reuseport *reuse; @@ -131,7 +149,7 @@ void reuseport_detach_sock(struct sock *sk) reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; reuse->num_socks--; if (reuse->num_socks == 0) - kfree_rcu(reuse, rcu); + call_rcu(&reuse->rcu, reuseport_free_rcu); break; } } @@ -139,15 +157,53 @@ void reuseport_detach_sock(struct sock *sk) } EXPORT_SYMBOL(reuseport_detach_sock); +static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks, + struct bpf_prog *prog, struct sk_buff *skb, + int hdr_len) +{ + struct sk_buff *nskb = NULL; + u32 index; + + if (skb_shared(skb)) { + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return NULL; + skb = nskb; + } + + /* temporarily advance data past protocol header */ + if (!pskb_pull(skb, hdr_len)) { + consume_skb(nskb); + return NULL; + } + index = bpf_prog_run_save_cb(prog, skb); + __skb_push(skb, hdr_len); + + consume_skb(nskb); + + if (index >= socks) + return NULL; + + return reuse->socks[index]; +} + /** * reuseport_select_sock - Select a socket from an SO_REUSEPORT group. * @sk: First socket in the group. - * @hash: Use this hash to select. + * @hash: When no BPF filter is available, use this hash to select. + * @skb: skb to run through BPF filter. + * @hdr_len: BPF filter expects skb data pointer at payload data. If + * the skb does not yet point at the payload, this parameter represents + * how far the pointer needs to advance to reach the payload. * Returns a socket that should receive the packet (or NULL on error). */ -struct sock *reuseport_select_sock(struct sock *sk, u32 hash) +struct sock *reuseport_select_sock(struct sock *sk, + u32 hash, + struct sk_buff *skb, + int hdr_len) { struct sock_reuseport *reuse; + struct bpf_prog *prog; struct sock *sk2 = NULL; u16 socks; @@ -158,12 +214,16 @@ struct sock *reuseport_select_sock(struct sock *sk, u32 hash) if (!reuse) goto out; + prog = rcu_dereference(reuse->prog); socks = READ_ONCE(reuse->num_socks); if (likely(socks)) { /* paired with smp_wmb() in reuseport_add_sock() */ smp_rmb(); - sk2 = reuse->socks[reciprocal_scale(hash, socks)]; + if (prog && skb) + sk2 = run_bpf(reuse, socks, prog, skb, hdr_len); + else + sk2 = reuse->socks[reciprocal_scale(hash, socks)]; } out: @@ -171,3 +231,21 @@ out: return sk2; } EXPORT_SYMBOL(reuseport_select_sock); + +struct bpf_prog * +reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog) +{ + struct sock_reuseport *reuse; + struct bpf_prog *old_prog; + + spin_lock_bh(&reuseport_lock); + reuse = rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)); + old_prog = rcu_dereference_protected(reuse->prog, + lockdep_is_held(&reuseport_lock)); + rcu_assign_pointer(reuse->prog, prog); + spin_unlock_bh(&reuseport_lock); + + return old_prog; +} +EXPORT_SYMBOL(reuseport_attach_prog); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 762b01f..8353783 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -514,7 +514,7 @@ begin: struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash); + sk2 = reuseport_select_sock(sk, hash, NULL, 0); if (sk2) { result = sk2; goto found; @@ -553,7 +553,7 @@ found: */ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, - int dif, struct udp_table *udptable) + int dif, struct udp_table *udptable, struct sk_buff *skb) { struct sock *sk, *result; struct hlist_nulls_node *node; @@ -602,7 +602,8 @@ begin: struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash); + sk2 = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); if (sk2) { result = sk2; goto found; @@ -647,14 +648,14 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, iph->daddr, dport, inet_iif(skb), - udptable); + udptable, skb); } struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, - &udp_table); + &udp_table, NULL); } EXPORT_SYMBOL_GPL(udp4_lib_lookup); @@ -702,7 +703,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) struct net *net = dev_net(skb->dev); sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, - iph->saddr, uh->source, skb->dev->ifindex, udptable); + iph->saddr, uh->source, skb->dev->ifindex, udptable, + NULL); if (!sk) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; /* No socket for error */ diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 6116604..df1966f 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -44,7 +44,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, sk = __udp4_lib_lookup(net, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_dst[0], req->id.idiag_dport, - req->id.idiag_if, tbl); + req->id.idiag_if, tbl, NULL); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) sk = __udp6_lib_lookup(net, @@ -52,7 +52,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, req->id.idiag_sport, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, - req->id.idiag_if, tbl); + req->id.idiag_if, tbl, NULL); #endif else goto out_nosk; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6204b89..56fcb55 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -272,7 +272,7 @@ begin: struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash); + sk2 = reuseport_select_sock(sk, hash, NULL, 0); if (sk2) { result = sk2; goto found; @@ -310,7 +310,8 @@ found: struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, - int dif, struct udp_table *udptable) + int dif, struct udp_table *udptable, + struct sk_buff *skb) { struct sock *sk, *result; struct hlist_nulls_node *node; @@ -358,7 +359,8 @@ begin: struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash); + sk2 = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); if (sk2) { result = sk2; goto found; @@ -407,13 +409,13 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, return sk; return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), - udptable); + udptable, skb); } struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif) { - return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); + return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL); } EXPORT_SYMBOL_GPL(udp6_lib_lookup); @@ -580,7 +582,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, - inet6_iif(skb), udptable); + inet6_iif(skb), udptable, skb); if (!sk) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); -- cgit v1.1 From 0d3b7f64c84d53658daf28e2f9772e38acb9340d Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Jan 2016 13:19:31 +0200 Subject: Bluetooth: Change eir_has_data_type() to more generic eir_get_data() To make the EIR parsing helper more general purpose, make it return the found data and its length rather than just saying whether the data was present or not. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 6 +++--- net/bluetooth/mgmt.c | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7554da5..c162af5 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3833,9 +3833,9 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, data.ssp_mode = 0x01; if (hci_dev_test_flag(hdev, HCI_MGMT)) - name_known = eir_has_data_type(info->data, - sizeof(info->data), - EIR_NAME_COMPLETE); + name_known = eir_get_data(info->data, + sizeof(info->data), + EIR_NAME_COMPLETE, NULL); else name_known = true; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 621f6fd..3297a4e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7266,7 +7266,8 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, /* Copy EIR or advertising data into event */ memcpy(ev->eir, eir, eir_len); - if (dev_class && !eir_has_data_type(ev->eir, eir_len, EIR_CLASS_OF_DEV)) + if (dev_class && !eir_get_data(ev->eir, eir_len, EIR_CLASS_OF_DEV, + NULL)) eir_len = eir_append_data(ev->eir, eir_len, EIR_CLASS_OF_DEV, dev_class, 3); -- cgit v1.1 From 78b781ca0d35191ebf8d8cad8beec810270f0f2e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Jan 2016 13:19:32 +0200 Subject: Bluetooth: Add support for Start Limited Discovery command This patch implements the mgmt Start Limited Discovery command. Most of existing Start Discovery code is reused since the only difference is the presence of a 'limited' flag as part of the discovery state. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 11 +++++++--- net/bluetooth/mgmt.c | 53 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 52 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 9997c31..41b5f38 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1737,8 +1737,8 @@ static int le_scan_disable(struct hci_request *req, unsigned long opt) static int bredr_inquiry(struct hci_request *req, unsigned long opt) { u8 length = opt; - /* General inquiry access code (GIAC) */ - u8 lap[3] = { 0x33, 0x8b, 0x9e }; + const u8 giac[3] = { 0x33, 0x8b, 0x9e }; + const u8 liac[3] = { 0x00, 0x8b, 0x9e }; struct hci_cp_inquiry cp; BT_DBG("%s", req->hdev->name); @@ -1748,7 +1748,12 @@ static int bredr_inquiry(struct hci_request *req, unsigned long opt) hci_dev_unlock(req->hdev); memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); + + if (req->hdev->discovery.limited) + memcpy(&cp.lap, liac, sizeof(cp.lap)); + else + memcpy(&cp.lap, giac, sizeof(cp.lap)); + cp.length = length; hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3297a4e..5a5089c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -103,6 +103,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_ADD_ADVERTISING, MGMT_OP_REMOVE_ADVERTISING, MGMT_OP_GET_ADV_SIZE_INFO, + MGMT_OP_START_LIMITED_DISCOVERY, }; static const u16 mgmt_events[] = { @@ -3283,6 +3284,9 @@ void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status) if (!cmd) cmd = pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev); + if (!cmd) + cmd = pending_find(MGMT_OP_START_LIMITED_DISCOVERY, hdev); + if (cmd) { cmd->cmd_complete(cmd, mgmt_status(status)); mgmt_pending_remove(cmd); @@ -3318,8 +3322,8 @@ static bool discovery_type_is_valid(struct hci_dev *hdev, uint8_t type, return true; } -static int start_discovery(struct sock *sk, struct hci_dev *hdev, - void *data, u16 len) +static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev, + u16 op, void *data, u16 len) { struct mgmt_cp_start_discovery *cp = data; struct mgmt_pending_cmd *cmd; @@ -3331,7 +3335,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, + err = mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_NOT_POWERED, &cp->type, sizeof(cp->type)); goto failed; @@ -3339,15 +3343,14 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, if (hdev->discovery.state != DISCOVERY_STOPPED || hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, - MGMT_STATUS_BUSY, &cp->type, - sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_BUSY, + &cp->type, sizeof(cp->type)); goto failed; } if (!discovery_type_is_valid(hdev, cp->type, &status)) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, - status, &cp->type, sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, op, status, + &cp->type, sizeof(cp->type)); goto failed; } @@ -3358,8 +3361,12 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, hdev->discovery.type = cp->type; hdev->discovery.report_invalid_rssi = false; + if (op == MGMT_OP_START_LIMITED_DISCOVERY) + hdev->discovery.limited = true; + else + hdev->discovery.limited = false; - cmd = mgmt_pending_add(sk, MGMT_OP_START_DISCOVERY, hdev, data, len); + cmd = mgmt_pending_add(sk, op, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -3376,6 +3383,21 @@ failed: return err; } +static int start_discovery(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + return start_discovery_internal(sk, hdev, MGMT_OP_START_DISCOVERY, + data, len); +} + +static int start_limited_discovery(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + return start_discovery_internal(sk, hdev, + MGMT_OP_START_LIMITED_DISCOVERY, + data, len); +} + static int service_discovery_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { @@ -6313,6 +6335,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { HCI_MGMT_VAR_LEN }, { remove_advertising, MGMT_REMOVE_ADVERTISING_SIZE }, { get_adv_size_info, MGMT_GET_ADV_SIZE_INFO_SIZE }, + { start_limited_discovery, MGMT_START_DISCOVERY_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) @@ -7237,6 +7260,18 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, return; } + if (hdev->discovery.limited) { + /* Check for limited discoverable bit */ + if (dev_class) { + if (!(dev_class[1] & 0x20)) + return; + } else { + u8 *flags = eir_get_data(eir, eir_len, EIR_FLAGS, NULL); + if (!flags || !(flags[0] & LE_AD_LIMITED)) + return; + } + } + /* Make sure that the buffer is big enough. The 5 extra bytes * are for the potential CoD field. */ -- cgit v1.1 From d6c0256a60e685214cc8cc2b886809f11efc0084 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 30 Dec 2015 23:50:46 +0800 Subject: sctp: add the rhashtable apis for sctp global transport hashtable tranport hashtbale will replace the association hashtable to do the lookup for transport, and then get association by t->assoc, rhashtable apis will be used because of it's resizable, scalable and using rcu. lport + rport + paddr will be the base hashkey to locate the chain, with net to protect one netns from another, then plus the laddr to compare to get the target. this patch will provider the lookup functions: - sctp_epaddr_lookup_transport - sctp_addrs_lookup_transport hash/unhash functions: - sctp_hash_transport - sctp_unhash_transport init/destroy functions: - sctp_transport_hashtable_init - sctp_transport_hashtable_destroy Signed-off-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/input.c | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) (limited to 'net') diff --git a/net/sctp/input.c b/net/sctp/input.c index b6493b3..bac8278 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -782,6 +782,137 @@ hit: return ep; } +/* rhashtable for transport */ +struct sctp_hash_cmp_arg { + const union sctp_addr *laddr; + const union sctp_addr *paddr; + const struct net *net; +}; + +static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct sctp_hash_cmp_arg *x = arg->key; + const struct sctp_transport *t = ptr; + struct sctp_association *asoc = t->asoc; + const struct net *net = x->net; + + if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port)) + return 1; + if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr)) + return 1; + if (!net_eq(sock_net(asoc->base.sk), net)) + return 1; + if (!sctp_bind_addr_match(&asoc->base.bind_addr, + x->laddr, sctp_sk(asoc->base.sk))) + return 1; + + return 0; +} + +static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct sctp_transport *t = data; + const union sctp_addr *paddr = &t->ipaddr; + const struct net *net = sock_net(t->asoc->base.sk); + u16 lport = htons(t->asoc->base.bind_addr.port); + u32 addr; + + if (paddr->sa.sa_family == AF_INET6) + addr = jhash(&paddr->v6.sin6_addr, 16, seed); + else + addr = paddr->v4.sin_addr.s_addr; + + return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 | + (__force __u32)lport, net_hash_mix(net), seed); +} + +static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed) +{ + const struct sctp_hash_cmp_arg *x = data; + const union sctp_addr *paddr = x->paddr; + const struct net *net = x->net; + u16 lport = x->laddr->v4.sin_port; + u32 addr; + + if (paddr->sa.sa_family == AF_INET6) + addr = jhash(&paddr->v6.sin6_addr, 16, seed); + else + addr = paddr->v4.sin_addr.s_addr; + + return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 | + (__force __u32)lport, net_hash_mix(net), seed); +} + +static const struct rhashtable_params sctp_hash_params = { + .head_offset = offsetof(struct sctp_transport, node), + .hashfn = sctp_hash_key, + .obj_hashfn = sctp_hash_obj, + .obj_cmpfn = sctp_hash_cmp, + .automatic_shrinking = true, +}; + +int sctp_transport_hashtable_init(void) +{ + return rhashtable_init(&sctp_transport_hashtable, &sctp_hash_params); +} + +void sctp_transport_hashtable_destroy(void) +{ + rhashtable_destroy(&sctp_transport_hashtable); +} + +void sctp_hash_transport(struct sctp_transport *t) +{ + struct sctp_sockaddr_entry *addr; + struct sctp_hash_cmp_arg arg; + + addr = list_entry(t->asoc->base.bind_addr.address_list.next, + struct sctp_sockaddr_entry, list); + arg.laddr = &addr->a; + arg.paddr = &t->ipaddr; + arg.net = sock_net(t->asoc->base.sk); + +reinsert: + if (rhashtable_lookup_insert_key(&sctp_transport_hashtable, &arg, + &t->node, sctp_hash_params) == -EBUSY) + goto reinsert; +} + +void sctp_unhash_transport(struct sctp_transport *t) +{ + rhashtable_remove_fast(&sctp_transport_hashtable, &t->node, + sctp_hash_params); +} + +struct sctp_transport *sctp_addrs_lookup_transport( + struct net *net, + const union sctp_addr *laddr, + const union sctp_addr *paddr) +{ + struct sctp_hash_cmp_arg arg = { + .laddr = laddr, + .paddr = paddr, + .net = net, + }; + + return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg, + sctp_hash_params); +} + +struct sctp_transport *sctp_epaddr_lookup_transport( + const struct sctp_endpoint *ep, + const union sctp_addr *paddr) +{ + struct sctp_sockaddr_entry *addr; + struct net *net = sock_net(ep->base.sk); + + addr = list_entry(ep->base.bind_addr.address_list.next, + struct sctp_sockaddr_entry, list); + + return sctp_addrs_lookup_transport(net, &addr->a, paddr); +} + /* Insert association into the hash table. */ static void __sctp_hash_established(struct sctp_association *asoc) { -- cgit v1.1 From 4f0087812648b7611157ae22954acfaed820d24e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 30 Dec 2015 23:50:47 +0800 Subject: sctp: apply rhashtable api to send/recv path apply lookup apis to two functions, for __sctp_endpoint_lookup_assoc and __sctp_lookup_association, it's invoked in the protection of sock lock, it will be safe, but sctp_lookup_association need to call rcu_read_lock() and to detect the t->dead to protect it. Signed-off-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/associola.c | 5 +++++ net/sctp/endpointola.c | 35 ++++++++--------------------------- net/sctp/input.c | 39 ++++++++++----------------------------- net/sctp/protocol.c | 6 ++++++ 4 files changed, 29 insertions(+), 56 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 559afd0..2bf8ec9 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -383,6 +383,7 @@ void sctp_association_free(struct sctp_association *asoc) list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); list_del_rcu(pos); + sctp_unhash_transport(transport); sctp_transport_free(transport); } @@ -500,6 +501,8 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, /* Remove this peer from the list. */ list_del_rcu(&peer->transports); + /* Remove this peer from the transport hashtable */ + sctp_unhash_transport(peer); /* Get the first transport of asoc. */ pos = asoc->peer.transport_addr_list.next; @@ -699,6 +702,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* Attach the remote transport to our asoc. */ list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list); asoc->peer.transport_count++; + /* Add this peer into the transport hashtable */ + sctp_hash_transport(peer); /* If we do not yet have a primary path, set one. */ if (!asoc->peer.primary_path) { diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 9da76ba..8838bf4 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -314,8 +314,8 @@ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, } /* Find the association that goes with this chunk. - * We do a linear search of the associations for this endpoint. - * We return the matching transport address too. + * We lookup the transport from hashtable at first, then get association + * through t->assoc. */ static struct sctp_association *__sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, @@ -323,12 +323,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( struct sctp_transport **transport) { struct sctp_association *asoc = NULL; - struct sctp_association *tmp; - struct sctp_transport *t = NULL; - struct sctp_hashbucket *head; - struct sctp_ep_common *epb; - int hash; - int rport; + struct sctp_transport *t; *transport = NULL; @@ -337,26 +332,12 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( */ if (!ep->base.bind_addr.port) goto out; + t = sctp_epaddr_lookup_transport(ep, paddr); + if (!t || t->asoc->temp) + goto out; - rport = ntohs(paddr->v4.sin_port); - - hash = sctp_assoc_hashfn(sock_net(ep->base.sk), ep->base.bind_addr.port, - rport); - head = &sctp_assoc_hashtable[hash]; - read_lock(&head->lock); - sctp_for_each_hentry(epb, &head->chain) { - tmp = sctp_assoc(epb); - if (tmp->ep != ep || rport != tmp->peer.port) - continue; - - t = sctp_assoc_lookup_paddr(tmp, paddr); - if (t) { - asoc = tmp; - *transport = t; - break; - } - } - read_unlock(&head->lock); + *transport = t; + asoc = t->asoc; out: return asoc; } diff --git a/net/sctp/input.c b/net/sctp/input.c index bac8278..6f075d8 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -981,38 +981,19 @@ static struct sctp_association *__sctp_lookup_association( const union sctp_addr *peer, struct sctp_transport **pt) { - struct sctp_hashbucket *head; - struct sctp_ep_common *epb; - struct sctp_association *asoc; - struct sctp_transport *transport; - int hash; + struct sctp_transport *t; - /* Optimize here for direct hit, only listening connections can - * have wildcards anyways. - */ - hash = sctp_assoc_hashfn(net, ntohs(local->v4.sin_port), - ntohs(peer->v4.sin_port)); - head = &sctp_assoc_hashtable[hash]; - read_lock(&head->lock); - sctp_for_each_hentry(epb, &head->chain) { - asoc = sctp_assoc(epb); - transport = sctp_assoc_is_match(asoc, net, local, peer); - if (transport) - goto hit; - } + t = sctp_addrs_lookup_transport(net, local, peer); + if (!t || t->dead || t->asoc->temp) + return NULL; - read_unlock(&head->lock); + sctp_association_hold(t->asoc); + *pt = t; - return NULL; - -hit: - *pt = transport; - sctp_association_hold(asoc); - read_unlock(&head->lock); - return asoc; + return t->asoc; } -/* Look up an association. BH-safe. */ +/* Look up an association. protected by RCU read lock */ static struct sctp_association *sctp_lookup_association(struct net *net, const union sctp_addr *laddr, @@ -1021,9 +1002,9 @@ struct sctp_association *sctp_lookup_association(struct net *net, { struct sctp_association *asoc; - local_bh_disable(); + rcu_read_lock(); asoc = __sctp_lookup_association(net, laddr, paddr, transportp); - local_bh_enable(); + rcu_read_unlock(); return asoc; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 010aced..631cfb3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1467,6 +1467,9 @@ static __init int sctp_init(void) INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); } + if (sctp_transport_hashtable_init()) + goto err_thash_alloc; + pr_info("Hash tables configured (established %d bind %d)\n", sctp_assoc_hashsize, sctp_port_hashsize); @@ -1521,6 +1524,8 @@ err_register_defaults: get_order(sctp_port_hashsize * sizeof(struct sctp_bind_hashbucket))); err_bhash_alloc: + sctp_transport_hashtable_destroy(); +err_thash_alloc: kfree(sctp_ep_hashtable); err_ehash_alloc: free_pages((unsigned long)sctp_assoc_hashtable, @@ -1567,6 +1572,7 @@ static __exit void sctp_exit(void) free_pages((unsigned long)sctp_port_hashtable, get_order(sctp_port_hashsize * sizeof(struct sctp_bind_hashbucket))); + sctp_transport_hashtable_destroy(); percpu_counter_destroy(&sctp_sockets_allocated); -- cgit v1.1 From 39f66a7dce3213fb0a0c6256929c816df27c7548 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 30 Dec 2015 23:50:48 +0800 Subject: sctp: apply rhashtable api to sctp procfs Traversal the transport rhashtable, get the association only once through the condition assoc->peer.primary_path != transport. Signed-off-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/proc.c | 316 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 173 insertions(+), 143 deletions(-) (limited to 'net') diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 0697eda..dfa7eec 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -281,88 +281,136 @@ void sctp_eps_proc_exit(struct net *net) remove_proc_entry("eps", net->sctp.proc_net_sctp); } +struct sctp_ht_iter { + struct seq_net_private p; + struct rhashtable_iter hti; +}; -static void *sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) +static struct sctp_transport *sctp_transport_get_next(struct seq_file *seq) { - if (*pos >= sctp_assoc_hashsize) - return NULL; + struct sctp_ht_iter *iter = seq->private; + struct sctp_transport *t; - if (*pos < 0) - *pos = 0; + t = rhashtable_walk_next(&iter->hti); + for (; t; t = rhashtable_walk_next(&iter->hti)) { + if (IS_ERR(t)) { + if (PTR_ERR(t) == -EAGAIN) + continue; + break; + } - if (*pos == 0) - seq_printf(seq, " ASSOC SOCK STY SST ST HBKT " - "ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT " - "RPORT LADDRS <-> RADDRS " - "HBINT INS OUTS MAXRT T1X T2X RTXC " - "wmema wmemq sndbuf rcvbuf\n"); + if (net_eq(sock_net(t->asoc->base.sk), seq_file_net(seq)) && + t->asoc->peer.primary_path == t) + break; + } - return (void *)pos; + return t; } -static void sctp_assocs_seq_stop(struct seq_file *seq, void *v) +static struct sctp_transport *sctp_transport_get_idx(struct seq_file *seq, + loff_t pos) +{ + void *obj; + + while (pos && (obj = sctp_transport_get_next(seq)) && !IS_ERR(obj)) + pos--; + + return obj; +} + +static int sctp_transport_walk_start(struct seq_file *seq) { + struct sctp_ht_iter *iter = seq->private; + int err; + + err = rhashtable_walk_init(&sctp_transport_hashtable, &iter->hti); + if (err) + return err; + + err = rhashtable_walk_start(&iter->hti); + + return err == -EAGAIN ? 0 : err; } +static void sctp_transport_walk_stop(struct seq_file *seq) +{ + struct sctp_ht_iter *iter = seq->private; + + rhashtable_walk_stop(&iter->hti); + rhashtable_walk_exit(&iter->hti); +} + +static void *sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) +{ + int err = sctp_transport_walk_start(seq); + + if (err) + return ERR_PTR(err); + + return *pos ? sctp_transport_get_idx(seq, *pos) : SEQ_START_TOKEN; +} + +static void sctp_assocs_seq_stop(struct seq_file *seq, void *v) +{ + sctp_transport_walk_stop(seq); +} static void *sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - if (++*pos >= sctp_assoc_hashsize) - return NULL; + ++*pos; - return pos; + return sctp_transport_get_next(seq); } /* Display sctp associations (/proc/net/sctp/assocs). */ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) { - struct sctp_hashbucket *head; - struct sctp_ep_common *epb; + struct sctp_transport *transport; struct sctp_association *assoc; + struct sctp_ep_common *epb; struct sock *sk; - int hash = *(loff_t *)v; - - if (hash >= sctp_assoc_hashsize) - return -ENOMEM; - head = &sctp_assoc_hashtable[hash]; - local_bh_disable(); - read_lock(&head->lock); - sctp_for_each_hentry(epb, &head->chain) { - assoc = sctp_assoc(epb); - sk = epb->sk; - if (!net_eq(sock_net(sk), seq_file_net(seq))) - continue; - seq_printf(seq, - "%8pK %8pK %-3d %-3d %-2d %-4d " - "%4d %8d %8d %7u %5lu %-5d %5d ", - assoc, sk, sctp_sk(sk)->type, sk->sk_state, - assoc->state, hash, - assoc->assoc_id, - assoc->sndbuf_used, - atomic_read(&assoc->rmem_alloc), - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), - sock_i_ino(sk), - epb->bind_addr.port, - assoc->peer.port); - seq_printf(seq, " "); - sctp_seq_dump_local_addrs(seq, epb); - seq_printf(seq, "<-> "); - sctp_seq_dump_remote_addrs(seq, assoc); - seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d " - "%8d %8d %8d %8d", - assoc->hbinterval, assoc->c.sinit_max_instreams, - assoc->c.sinit_num_ostreams, assoc->max_retrans, - assoc->init_retries, assoc->shutdown_retries, - assoc->rtx_data_chunks, - atomic_read(&sk->sk_wmem_alloc), - sk->sk_wmem_queued, - sk->sk_sndbuf, - sk->sk_rcvbuf); - seq_printf(seq, "\n"); + if (v == SEQ_START_TOKEN) { + seq_printf(seq, " ASSOC SOCK STY SST ST HBKT " + "ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT " + "RPORT LADDRS <-> RADDRS " + "HBINT INS OUTS MAXRT T1X T2X RTXC " + "wmema wmemq sndbuf rcvbuf\n"); + return 0; } - read_unlock(&head->lock); - local_bh_enable(); + + transport = (struct sctp_transport *)v; + assoc = transport->asoc; + epb = &assoc->base; + sk = epb->sk; + + seq_printf(seq, + "%8pK %8pK %-3d %-3d %-2d %-4d " + "%4d %8d %8d %7u %5lu %-5d %5d ", + assoc, sk, sctp_sk(sk)->type, sk->sk_state, + assoc->state, 0, + assoc->assoc_id, + assoc->sndbuf_used, + atomic_read(&assoc->rmem_alloc), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), + sock_i_ino(sk), + epb->bind_addr.port, + assoc->peer.port); + seq_printf(seq, " "); + sctp_seq_dump_local_addrs(seq, epb); + seq_printf(seq, "<-> "); + sctp_seq_dump_remote_addrs(seq, assoc); + seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d " + "%8d %8d %8d %8d", + assoc->hbinterval, assoc->c.sinit_max_instreams, + assoc->c.sinit_num_ostreams, assoc->max_retrans, + assoc->init_retries, assoc->shutdown_retries, + assoc->rtx_data_chunks, + atomic_read(&sk->sk_wmem_alloc), + sk->sk_wmem_queued, + sk->sk_sndbuf, + sk->sk_rcvbuf); + seq_printf(seq, "\n"); return 0; } @@ -378,7 +426,7 @@ static const struct seq_operations sctp_assoc_ops = { static int sctp_assocs_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &sctp_assoc_ops, - sizeof(struct seq_net_private)); + sizeof(struct sctp_ht_iter)); } static const struct file_operations sctp_assocs_seq_fops = { @@ -409,112 +457,94 @@ void sctp_assocs_proc_exit(struct net *net) static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos) { - if (*pos >= sctp_assoc_hashsize) - return NULL; - - if (*pos < 0) - *pos = 0; + int err = sctp_transport_walk_start(seq); - if (*pos == 0) - seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX " - "REM_ADDR_RTX START STATE\n"); + if (err) + return ERR_PTR(err); - return (void *)pos; + return *pos ? sctp_transport_get_idx(seq, *pos) : SEQ_START_TOKEN; } static void *sctp_remaddr_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - if (++*pos >= sctp_assoc_hashsize) - return NULL; + ++*pos; - return pos; + return sctp_transport_get_next(seq); } static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v) { + sctp_transport_walk_stop(seq); } static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) { - struct sctp_hashbucket *head; - struct sctp_ep_common *epb; struct sctp_association *assoc; struct sctp_transport *tsp; - int hash = *(loff_t *)v; - if (hash >= sctp_assoc_hashsize) - return -ENOMEM; + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX " + "REM_ADDR_RTX START STATE\n"); + return 0; + } - head = &sctp_assoc_hashtable[hash]; - local_bh_disable(); - read_lock(&head->lock); - rcu_read_lock(); - sctp_for_each_hentry(epb, &head->chain) { - if (!net_eq(sock_net(epb->sk), seq_file_net(seq))) + tsp = (struct sctp_transport *)v; + assoc = tsp->asoc; + + list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list, + transports) { + if (tsp->dead) continue; - assoc = sctp_assoc(epb); - list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list, - transports) { - if (tsp->dead) - continue; + /* + * The remote address (ADDR) + */ + tsp->af_specific->seq_dump_addr(seq, &tsp->ipaddr); + seq_printf(seq, " "); + /* + * The association ID (ASSOC_ID) + */ + seq_printf(seq, "%d ", tsp->asoc->assoc_id); + + /* + * If the Heartbeat is active (HB_ACT) + * Note: 1 = Active, 0 = Inactive + */ + seq_printf(seq, "%d ", timer_pending(&tsp->hb_timer)); + + /* + * Retransmit time out (RTO) + */ + seq_printf(seq, "%lu ", tsp->rto); + + /* + * Maximum path retransmit count (PATH_MAX_RTX) + */ + seq_printf(seq, "%d ", tsp->pathmaxrxt); + + /* + * remote address retransmit count (REM_ADDR_RTX) + * Note: We don't have a way to tally this at the moment + * so lets just leave it as zero for the moment + */ + seq_puts(seq, "0 "); + + /* + * remote address start time (START). This is also not + * currently implemented, but we can record it with a + * jiffies marker in a subsequent patch + */ + seq_puts(seq, "0 "); + + /* + * The current state of this destination. I.e. + * SCTP_ACTIVE, SCTP_INACTIVE, ... + */ + seq_printf(seq, "%d", tsp->state); - /* - * The remote address (ADDR) - */ - tsp->af_specific->seq_dump_addr(seq, &tsp->ipaddr); - seq_printf(seq, " "); - - /* - * The association ID (ASSOC_ID) - */ - seq_printf(seq, "%d ", tsp->asoc->assoc_id); - - /* - * If the Heartbeat is active (HB_ACT) - * Note: 1 = Active, 0 = Inactive - */ - seq_printf(seq, "%d ", timer_pending(&tsp->hb_timer)); - - /* - * Retransmit time out (RTO) - */ - seq_printf(seq, "%lu ", tsp->rto); - - /* - * Maximum path retransmit count (PATH_MAX_RTX) - */ - seq_printf(seq, "%d ", tsp->pathmaxrxt); - - /* - * remote address retransmit count (REM_ADDR_RTX) - * Note: We don't have a way to tally this at the moment - * so lets just leave it as zero for the moment - */ - seq_puts(seq, "0 "); - - /* - * remote address start time (START). This is also not - * currently implemented, but we can record it with a - * jiffies marker in a subsequent patch - */ - seq_puts(seq, "0 "); - - /* - * The current state of this destination. I.e. - * SCTP_ACTIVE, SCTP_INACTIVE, ... - */ - seq_printf(seq, "%d", tsp->state); - - seq_printf(seq, "\n"); - } + seq_printf(seq, "\n"); } - rcu_read_unlock(); - read_unlock(&head->lock); - local_bh_enable(); - return 0; - } static const struct seq_operations sctp_remaddr_ops = { @@ -533,7 +563,7 @@ void sctp_remaddr_proc_exit(struct net *net) static int sctp_remaddr_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &sctp_remaddr_ops, - sizeof(struct seq_net_private)); + sizeof(struct sctp_ht_iter)); } static const struct file_operations sctp_remaddr_seq_fops = { -- cgit v1.1 From b5eff7128366c4a7a9b502097a968ec9cae2bea2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 30 Dec 2015 23:50:49 +0800 Subject: sctp: drop the old assoc hashtable of sctp transport hashtable will replace the association hashtable, so association hashtable is not used in sctp any more, so drop the codes about that. Signed-off-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/input.c | 61 ------------------------------------------------ net/sctp/protocol.c | 30 ++---------------------- net/sctp/sm_sideeffect.c | 2 -- net/sctp/socket.c | 6 +---- 4 files changed, 3 insertions(+), 96 deletions(-) (limited to 'net') diff --git a/net/sctp/input.c b/net/sctp/input.c index 6f075d8..d9a6e66 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -913,67 +913,6 @@ struct sctp_transport *sctp_epaddr_lookup_transport( return sctp_addrs_lookup_transport(net, &addr->a, paddr); } -/* Insert association into the hash table. */ -static void __sctp_hash_established(struct sctp_association *asoc) -{ - struct net *net = sock_net(asoc->base.sk); - struct sctp_ep_common *epb; - struct sctp_hashbucket *head; - - epb = &asoc->base; - - /* Calculate which chain this entry will belong to. */ - epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port, - asoc->peer.port); - - head = &sctp_assoc_hashtable[epb->hashent]; - - write_lock(&head->lock); - hlist_add_head(&epb->node, &head->chain); - write_unlock(&head->lock); -} - -/* Add an association to the hash. Local BH-safe. */ -void sctp_hash_established(struct sctp_association *asoc) -{ - if (asoc->temp) - return; - - local_bh_disable(); - __sctp_hash_established(asoc); - local_bh_enable(); -} - -/* Remove association from the hash table. */ -static void __sctp_unhash_established(struct sctp_association *asoc) -{ - struct net *net = sock_net(asoc->base.sk); - struct sctp_hashbucket *head; - struct sctp_ep_common *epb; - - epb = &asoc->base; - - epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port, - asoc->peer.port); - - head = &sctp_assoc_hashtable[epb->hashent]; - - write_lock(&head->lock); - hlist_del_init(&epb->node); - write_unlock(&head->lock); -} - -/* Remove association from the hash table. Local BH-safe. */ -void sctp_unhash_established(struct sctp_association *asoc) -{ - if (asoc->temp) - return; - - local_bh_disable(); - __sctp_unhash_established(asoc); - local_bh_enable(); -} - /* Look up an association. */ static struct sctp_association *__sctp_lookup_association( struct net *net, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 631cfb3..ab0d538 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1416,24 +1416,6 @@ static __init int sctp_init(void) for (order = 0; (1UL << order) < goal; order++) ; - do { - sctp_assoc_hashsize = (1UL << order) * PAGE_SIZE / - sizeof(struct sctp_hashbucket); - if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0) - continue; - sctp_assoc_hashtable = (struct sctp_hashbucket *) - __get_free_pages(GFP_KERNEL | __GFP_NOWARN, order); - } while (!sctp_assoc_hashtable && --order > 0); - if (!sctp_assoc_hashtable) { - pr_err("Failed association hash alloc\n"); - status = -ENOMEM; - goto err_ahash_alloc; - } - for (i = 0; i < sctp_assoc_hashsize; i++) { - rwlock_init(&sctp_assoc_hashtable[i].lock); - INIT_HLIST_HEAD(&sctp_assoc_hashtable[i].chain); - } - /* Allocate and initialize the endpoint hash table. */ sctp_ep_hashsize = 64; sctp_ep_hashtable = @@ -1470,8 +1452,7 @@ static __init int sctp_init(void) if (sctp_transport_hashtable_init()) goto err_thash_alloc; - pr_info("Hash tables configured (established %d bind %d)\n", - sctp_assoc_hashsize, sctp_port_hashsize); + pr_info("Hash tables configured (bind %d)\n", sctp_port_hashsize); sctp_sysctl_register(); @@ -1528,10 +1509,6 @@ err_bhash_alloc: err_thash_alloc: kfree(sctp_ep_hashtable); err_ehash_alloc: - free_pages((unsigned long)sctp_assoc_hashtable, - get_order(sctp_assoc_hashsize * - sizeof(struct sctp_hashbucket))); -err_ahash_alloc: percpu_counter_destroy(&sctp_sockets_allocated); err_percpu_counter_init: kmem_cache_destroy(sctp_chunk_cachep); @@ -1565,13 +1542,10 @@ static __exit void sctp_exit(void) sctp_sysctl_unregister(); - free_pages((unsigned long)sctp_assoc_hashtable, - get_order(sctp_assoc_hashsize * - sizeof(struct sctp_hashbucket))); - kfree(sctp_ep_hashtable); free_pages((unsigned long)sctp_port_hashtable, get_order(sctp_port_hashsize * sizeof(struct sctp_bind_hashbucket))); + kfree(sctp_ep_hashtable); sctp_transport_hashtable_destroy(); percpu_counter_destroy(&sctp_sockets_allocated); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 05cd164..4f170ad3 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -866,7 +866,6 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds, (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK)) return; - sctp_unhash_established(asoc); sctp_association_free(asoc); } @@ -1269,7 +1268,6 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, asoc = cmd->obj.asoc; BUG_ON(asoc->peer.primary_path == NULL); sctp_endpoint_add_asoc(ep, asoc); - sctp_hash_established(asoc); break; case SCTP_CMD_UPDATE_ASSOC: diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b5f4811..9bb80ec 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1228,7 +1228,6 @@ out_free: * To the hash table, try to unhash it, just in case, its a noop * if it wasn't hashed so we're safe */ - sctp_unhash_established(asoc); sctp_association_free(asoc); } return err; @@ -1504,7 +1503,6 @@ static void sctp_close(struct sock *sk, long timeout) * ABORT or SHUTDOWN based on the linger options. */ if (sctp_state(asoc, CLOSED)) { - sctp_unhash_established(asoc); sctp_association_free(asoc); continue; } @@ -1986,10 +1984,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) goto out_unlock; out_free: - if (new_asoc) { - sctp_unhash_established(asoc); + if (new_asoc) sctp_association_free(asoc); - } out_unlock: release_sock(sk); -- cgit v1.1 From c79c0666915418f9c0f01a6d0e93179416fb0c9e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 30 Dec 2015 23:50:50 +0800 Subject: sctp: remove the local_bh_disable/enable in sctp_endpoint_lookup_assoc sctp_endpoint_lookup_assoc is called in the protection of sock lock there is no need to call local_bh_disable in this function. so remove them. Signed-off-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/endpointola.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'net') diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 8838bf4..52838ea 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -317,7 +317,7 @@ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, * We lookup the transport from hashtable at first, then get association * through t->assoc. */ -static struct sctp_association *__sctp_endpoint_lookup_assoc( +struct sctp_association *sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, const union sctp_addr *paddr, struct sctp_transport **transport) @@ -342,21 +342,6 @@ out: return asoc; } -/* Lookup association on an endpoint based on a peer address. BH-safe. */ -struct sctp_association *sctp_endpoint_lookup_assoc( - const struct sctp_endpoint *ep, - const union sctp_addr *paddr, - struct sctp_transport **transport) -{ - struct sctp_association *asoc; - - local_bh_disable(); - asoc = __sctp_endpoint_lookup_assoc(ep, paddr, transport); - local_bh_enable(); - - return asoc; -} - /* Look for any peeled off association from the endpoint that matches the * given peer address. */ -- cgit v1.1 From a72a5e2d34ec2921c0d9a7545093087e4cb90d0a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 5 Jan 2016 22:17:55 +0100 Subject: inet: kill unused skb_free op The only user was removed in commit 029f7f3b8701cc7a ("netfilter: ipv6: nf_defrag: avoid/free clone operations"). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ieee802154/6lowpan/reassembly.c | 1 - net/ipv4/inet_fragment.c | 10 +--------- net/ipv4/ip_fragment.c | 1 - net/ipv6/reassembly.c | 1 - 4 files changed, 1 insertion(+), 12 deletions(-) (limited to 'net') diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 6b437e8..30d875d 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -624,7 +624,6 @@ int __init lowpan_net_frag_init(void) lowpan_frags.hashfn = lowpan_hashfn; lowpan_frags.constructor = lowpan_frag_init; lowpan_frags.destructor = NULL; - lowpan_frags.skb_free = NULL; lowpan_frags.qsize = sizeof(struct frag_queue); lowpan_frags.match = lowpan_frag_match; lowpan_frags.frag_expire = lowpan_frag_expire; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index fe144da..3a88b0c 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -285,14 +285,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) } EXPORT_SYMBOL(inet_frag_kill); -static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, - struct sk_buff *skb) -{ - if (f->skb_free) - f->skb_free(skb); - kfree_skb(skb); -} - void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) { struct sk_buff *fp; @@ -309,7 +301,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) struct sk_buff *xp = fp->next; sum_truesize += fp->truesize; - frag_kfree_skb(nf, f, fp); + kfree_skb(fp); fp = xp; } sum = sum_truesize + f->qsize; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1fe55ae..3f00810 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -891,7 +891,6 @@ void __init ipfrag_init(void) ip4_frags.hashfn = ip4_hashfn; ip4_frags.constructor = ip4_frag_init; ip4_frags.destructor = ip4_frag_free; - ip4_frags.skb_free = NULL; ip4_frags.qsize = sizeof(struct ipq); ip4_frags.match = ip4_frag_match; ip4_frags.frag_expire = ip_expire; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 45f5ae5..18f3498 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -755,7 +755,6 @@ int __init ipv6_frag_init(void) ip6_frags.hashfn = ip6_hashfn; ip6_frags.constructor = ip6_frag_init; ip6_frags.destructor = NULL; - ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.match = ip6_frag_match; ip6_frags.frag_expire = ip6_frag_expire; -- cgit v1.1 From 1134158ba3d656b8dbc79a23d482129a531ba0ae Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Tue, 5 Jan 2016 15:08:07 -0500 Subject: soreuseport: pass skb to secondary UDP socket lookup This socket-lookup path did not pass along the skb in question in my original BPF-based socket selection patch. The skb in the udpN_lib_lookup2 path can be used for BPF-based socket selection just like it is in the 'traditional' udpN_lib_lookup path. udpN_lib_lookup2 kicks in when there are greater than 10 sockets in the same hlist slot. Coincidentally, I chose 10 sockets per reuseport group in my functional test, so the lookup2 path was not excersised. This adds an additional set of tests with 20 sockets. Fixes: 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF") Fixes: 3ca8e4029969 ("soreuseport: BPF selection functional test") Suggested-by: Eric Dumazet Signed-off-by: Craig Gallek Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 10 ++++++---- net/ipv6/udp.c | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8353783..3a66731 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -493,7 +493,8 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, static struct sock *udp4_lib_lookup2(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, unsigned int slot2) + struct udp_hslot *hslot2, unsigned int slot2, + struct sk_buff *skb) { struct sock *sk, *result; struct hlist_nulls_node *node; @@ -514,7 +515,8 @@ begin: struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash, NULL, 0); + sk2 = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); if (sk2) { result = sk2; goto found; @@ -573,7 +575,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, result = udp4_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, slot2); + hslot2, slot2, skb); if (!result) { hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); slot2 = hash2 & udptable->mask; @@ -583,7 +585,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, result = udp4_lib_lookup2(net, saddr, sport, htonl(INADDR_ANY), hnum, dif, - hslot2, slot2); + hslot2, slot2, skb); } rcu_read_unlock(); return result; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 56fcb55..5d2c2af 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -251,7 +251,8 @@ static inline int compute_score2(struct sock *sk, struct net *net, static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, unsigned int slot2) + struct udp_hslot *hslot2, unsigned int slot2, + struct sk_buff *skb) { struct sock *sk, *result; struct hlist_nulls_node *node; @@ -272,7 +273,8 @@ begin: struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash, NULL, 0); + sk2 = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); if (sk2) { result = sk2; goto found; @@ -331,7 +333,7 @@ struct sock *__udp6_lib_lookup(struct net *net, result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, slot2); + hslot2, slot2, skb); if (!result) { hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); slot2 = hash2 & udptable->mask; @@ -341,7 +343,7 @@ struct sock *__udp6_lib_lookup(struct net *net, result = udp6_lib_lookup2(net, saddr, sport, &in6addr_any, hnum, dif, - hslot2, slot2); + hslot2, slot2, skb); } rcu_read_unlock(); return result; -- cgit v1.1 From 00ce3a15d811978fcb204a1a3f5f8c059096fa5e Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Tue, 5 Jan 2016 10:57:13 -0500 Subject: soreuseport: change consume_skb to kfree_skb in error case Fixes: 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF") Suggested-by: Daniel Borkmann Signed-off-by: Craig Gallek Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/sock_reuseport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index ae0969c..1df98c5 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -173,7 +173,7 @@ static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks, /* temporarily advance data past protocol header */ if (!pskb_pull(skb, hdr_len)) { - consume_skb(nskb); + kfree_skb(nskb); return NULL; } index = bpf_prog_run_save_cb(prog, skb); -- cgit v1.1 From 787b306cf3296bdce5c8559206b237c1ae107484 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Jan 2016 14:38:40 +0100 Subject: Bluetooth: avoid rebuilding hci_sock all the time Instead, allow using string formatting with send_monitor_note() and access init_utsname(). Signed-off-by: Johannes Berg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_sock.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 41f579b..1298d72 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -25,9 +25,8 @@ /* Bluetooth HCI sockets. */ #include +#include #include -#include -#include #include #include @@ -385,17 +384,26 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) return skb; } -static void send_monitor_note(struct sock *sk, const char *text) +static void __printf(2, 3) +send_monitor_note(struct sock *sk, const char *fmt, ...) { - size_t len = strlen(text); + size_t len; struct hci_mon_hdr *hdr; struct sk_buff *skb; + va_list args; + + va_start(args, fmt); + len = vsnprintf(NULL, 0, fmt, args); + va_end(args); skb = bt_skb_alloc(len + 1, GFP_ATOMIC); if (!skb) return; - strcpy(skb_put(skb, len + 1), text); + va_start(args, fmt); + vsprintf(skb_put(skb, len), fmt, args); + *skb_put(skb, 1) = 0; + va_end(args); __net_timestamp(skb); @@ -897,10 +905,11 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, */ hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); - send_monitor_note(sk, "Linux version " UTS_RELEASE - " (" UTS_MACHINE ")"); - send_monitor_note(sk, "Bluetooth subsystem version " - BT_SUBSYS_VERSION); + send_monitor_note(sk, "Linux version %s (%s)", + init_utsname()->release, + init_utsname()->machine); + send_monitor_note(sk, "Bluetooth subsystem version %s", + BT_SUBSYS_VERSION); send_monitor_replay(sk); atomic_inc(&monitor_promisc); -- cgit v1.1 From 08474cc1e6ea71237cab7e4a651a623c9dea1084 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Wed, 6 Jan 2016 13:01:04 +0100 Subject: bridge: Propagate vlan add failure to user Disallow adding interfaces to a bridge when vlan filtering operation failed. Send the failure code to the user. Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/bridge/br_if.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 8d1d4a2..c367b3e 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -511,8 +511,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (br_fdb_insert(br, p, dev->dev_addr, 0)) netdev_err(dev, "failed insert local address bridge forwarding table\n"); - if (nbp_vlan_init(p)) + err = nbp_vlan_init(p); + if (err) { netdev_err(dev, "failed to initialize vlan filtering on this port\n"); + goto err6; + } spin_lock_bh(&br->lock); changed_addr = br_stp_recalculate_bridge_id(br); @@ -533,6 +536,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) return 0; +err6: + list_del_rcu(&p->list); + br_fdb_delete_by_port(br, p, 0, 1); + nbp_update_port_count(br); + netdev_upper_dev_unlink(dev, br->dev); + err5: dev->priv_flags &= ~IFF_BRIDGE_PORT; netdev_rx_handler_unregister(dev); -- cgit v1.1 From 6b72a770202a0ad843312436dd50ed4690d7cc65 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Wed, 6 Jan 2016 13:01:06 +0100 Subject: bridge: add vlan filtering change notification Notifying hardware about bridge vlan-aware changes. Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 66c4549..190fb33 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -626,9 +626,21 @@ void br_recalculate_fwd_mask(struct net_bridge *br) int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) { + struct switchdev_attr attr = { + .orig_dev = br->dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, + .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, + .u.vlan_filtering = val, + }; + int err; + if (br->vlan_enabled == val) return 0; + err = switchdev_port_attr_set(br->dev, &attr); + if (err && err != -EOPNOTSUPP) + return err; + br->vlan_enabled = val; br_manage_promisc(br); recalculate_group_addr(br); @@ -639,13 +651,15 @@ int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) { + int err; + if (!rtnl_trylock()) return restart_syscall(); - __br_vlan_filter_toggle(br, val); + err = __br_vlan_filter_toggle(br, val); rtnl_unlock(); - return 0; + return err; } int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) -- cgit v1.1 From 404cdbf0894a0707dd19179d2e21a3ab37f33f54 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Wed, 6 Jan 2016 13:01:07 +0100 Subject: bridge: add vlan filtering change for new bridged device Notifying hardware about newly bridged port vlan-aware changes. Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 190fb33..85e43af 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -907,6 +907,12 @@ err_rhtbl: int nbp_vlan_init(struct net_bridge_port *p) { + struct switchdev_attr attr = { + .orig_dev = p->br->dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, + .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, + .u.vlan_filtering = p->br->vlan_enabled, + }; struct net_bridge_vlan_group *vg; int ret = -ENOMEM; @@ -914,6 +920,10 @@ int nbp_vlan_init(struct net_bridge_port *p) if (!vg) goto out; + ret = switchdev_port_attr_set(p->dev, &attr); + if (ret && ret != -EOPNOTSUPP) + goto err_vlan_enabled; + ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params); if (ret) goto err_rhtbl; @@ -933,6 +943,7 @@ err_vlan_add: RCU_INIT_POINTER(p->vlgrp, NULL); synchronize_rcu(); rhashtable_destroy(&vg->vlan_hash); +err_vlan_enabled: err_rhtbl: kfree(vg); -- cgit v1.1 From c7f5d105495a38ed09e70d825f75d9d7d5407264 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 5 Nov 2015 11:34:57 -0500 Subject: net: Add eth_platform_get_mac_address() helper. A repeating pattern in drivers has become to use OF node information and, if not found, platform specific host information to extract the ethernet address for a given device. Currently this is done with a call to of_get_mac_address() and then some ifdef'd stuff for SPARC. Consolidate this into a portable routine, and provide the arch_get_platform_mac_address() weak function hook for all architectures to implement if they want. Signed-off-by: David S. Miller --- net/ethernet/eth.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'net') diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 9e63f25..1038717 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -52,6 +52,8 @@ #include #include #include +#include +#include #include #include #include @@ -485,3 +487,32 @@ static int __init eth_offload_init(void) } fs_initcall(eth_offload_init); + +unsigned char * __weak arch_get_platform_mac_address(void) +{ + return NULL; +} + +int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr) +{ + const unsigned char *addr; + struct device_node *dp; + + if (dev_is_pci(dev)) + dp = pci_device_to_OF_node(to_pci_dev(dev)); + else + dp = dev->of_node; + + addr = NULL; + if (dp) + addr = of_get_mac_address(dp); + if (!addr) + addr = arch_get_platform_mac_address(); + + if (!addr) + return -ENODEV; + + ether_addr_copy(mac_addr, addr); + return 0; +} +EXPORT_SYMBOL(eth_platform_get_mac_address); -- cgit v1.1 From 2220943a21e26d97d7fd8f83c004b947326b469d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:13 +0100 Subject: phy: Centralise print about attached phy Many Ethernet drivers contain the same netdev_info() print statement about the attached phy. Move it into the phy device code. Additionally add a varargs function which can be used to append additional information. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/slave.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 1e9e942..5f45e68 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1080,11 +1080,10 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, netdev_err(slave_dev, "failed to connect to port %d: %d\n", p->port, ret); return ret; } - } else { - netdev_info(slave_dev, "attached PHY at address %d [%s]\n", - p->phy->addr, p->phy->drv->name); } + phy_attached_info(p->phy); + return 0; } -- cgit v1.1 From e5a03bfd873c29eb786655ef2e95e53ed242b404 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:16 +0100 Subject: phy: Add an mdio_device structure Not all devices attached to an MDIO bus are phys. So add an mdio_device structure to represent the generic parts of an mdio device, and place this structure into the phy_device. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/dsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 208d1b2..fa4daba 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -439,7 +439,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) if (of_phy_is_fixed_link(port_dn)) { phydev = of_phy_find_device(port_dn); if (phydev) { - int addr = phydev->addr; + int addr = phydev->mdio.addr; phy_device_free(phydev); of_node_put(port_dn); -- cgit v1.1 From 7f854420fbfe9d49afe2ffb1df052cfe8e215541 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:18 +0100 Subject: phy: Add API for {un}registering an mdio device to a bus. Rather than have drivers directly manipulate the mii_bus structure, provide and API for registering and unregistering devices on an MDIO bus, and performing lookups. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 5f45e68..2771713 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -997,7 +998,7 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p, { struct dsa_switch *ds = p->parent; - p->phy = ds->slave_mii_bus->phy_map[addr]; + p->phy = mdiobus_get_phy(ds->slave_mii_bus, addr); if (!p->phy) { netdev_err(slave_dev, "no phy at %d\n", addr); return -ENODEV; -- cgit v1.1 From 0071f56e46dadb88dc3ad1f8d9cf9c3ae014735d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 6 Jan 2016 20:11:20 +0100 Subject: dsa: Register netdev before phy When the phy is connected, an info message is printed. If the netdev it is attached to has not been registered yet, the name 'uninitialised' in the output. By registering the netdev first, then connecting they phy, we can avoid this. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/slave.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 2771713..40b9ca7 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1189,13 +1189,6 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, p->old_link = -1; p->old_duplex = -1; - ret = dsa_slave_phy_setup(p, slave_dev); - if (ret) { - netdev_err(master, "error %d setting up slave phy\n", ret); - free_netdev(slave_dev); - return ret; - } - ds->ports[port] = slave_dev; ret = register_netdev(slave_dev); if (ret) { @@ -1209,6 +1202,13 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, netif_carrier_off(slave_dev); + ret = dsa_slave_phy_setup(p, slave_dev); + if (ret) { + netdev_err(master, "error %d setting up slave phy\n", ret); + free_netdev(slave_dev); + return ret; + } + return 0; } -- cgit v1.1 From 60d2c7f9ab1cac8b2d44307b660eb7813091dbf0 Mon Sep 17 00:00:00 2001 From: Ken-ichirou MATSUZAWA Date: Tue, 5 Jan 2016 09:28:05 +0900 Subject: netfilter: nfnetlink_queue: validate dependencies to avoid breaking atomicity Check that dependencies are fulfilled before updating the queue instance, otherwise we can leave things in intermediate state on errors in nfqnl_recv_config(). Signed-off-by: Ken-ichirou MATSUZAWA Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 72 ++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 3d1f16c..fe360f7 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1113,6 +1113,7 @@ static int nfqnl_recv_config(struct net *net, struct sock *ctnl, struct nfqnl_instance *queue; struct nfqnl_msg_config_cmd *cmd = NULL; struct nfnl_queue_net *q = nfnl_queue_pernet(net); + __u32 flags = 0, mask = 0; int ret = 0; if (nfqa[NFQA_CFG_CMD]) { @@ -1125,6 +1126,29 @@ static int nfqnl_recv_config(struct net *net, struct sock *ctnl, } } + /* Check if we support these flags in first place, dependencies should + * be there too not to break atomicity. + */ + if (nfqa[NFQA_CFG_FLAGS]) { + if (!nfqa[NFQA_CFG_MASK]) { + /* A mask is needed to specify which flags are being + * changed. + */ + return -EINVAL; + } + + flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS])); + mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK])); + + if (flags >= NFQA_CFG_F_MAX) + return -EOPNOTSUPP; + +#if !IS_ENABLED(CONFIG_NETWORK_SECMARK) + if (flags & mask & NFQA_CFG_F_SECCTX) + return -EOPNOTSUPP; +#endif + } + rcu_read_lock(); queue = instance_lookup(q, queue_num); if (queue && queue->peer_portid != NETLINK_CB(skb).portid) { @@ -1162,60 +1186,28 @@ static int nfqnl_recv_config(struct net *net, struct sock *ctnl, } } + if (!queue) { + ret = -ENODEV; + goto err_out_unlock; + } + if (nfqa[NFQA_CFG_PARAMS]) { - struct nfqnl_msg_config_params *params; + struct nfqnl_msg_config_params *params = + nla_data(nfqa[NFQA_CFG_PARAMS]); - if (!queue) { - ret = -ENODEV; - goto err_out_unlock; - } - params = nla_data(nfqa[NFQA_CFG_PARAMS]); nfqnl_set_mode(queue, params->copy_mode, ntohl(params->copy_range)); } if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) { - __be32 *queue_maxlen; + __be32 *queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); - if (!queue) { - ret = -ENODEV; - goto err_out_unlock; - } - queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); spin_lock_bh(&queue->lock); queue->queue_maxlen = ntohl(*queue_maxlen); spin_unlock_bh(&queue->lock); } if (nfqa[NFQA_CFG_FLAGS]) { - __u32 flags, mask; - - if (!queue) { - ret = -ENODEV; - goto err_out_unlock; - } - - if (!nfqa[NFQA_CFG_MASK]) { - /* A mask is needed to specify which flags are being - * changed. - */ - ret = -EINVAL; - goto err_out_unlock; - } - - flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS])); - mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK])); - - if (flags >= NFQA_CFG_F_MAX) { - ret = -EOPNOTSUPP; - goto err_out_unlock; - } -#if !IS_ENABLED(CONFIG_NETWORK_SECMARK) - if (flags & mask & NFQA_CFG_F_SECCTX) { - ret = -EOPNOTSUPP; - goto err_out_unlock; - } -#endif spin_lock_bh(&queue->lock); queue->flags &= ~mask; queue->flags |= flags & mask; -- cgit v1.1 From 17bc6b4884340b045e779be38ba9f574256866a2 Mon Sep 17 00:00:00 2001 From: Ken-ichirou MATSUZAWA Date: Tue, 5 Jan 2016 09:29:54 +0900 Subject: netfilter: nfnetlink_queue: don't handle options after unbind This patch stops processing after destroying a queue instance. Signed-off-by: Ken-ichirou MATSUZAWA Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index fe360f7..57951ce 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1176,7 +1176,7 @@ static int nfqnl_recv_config(struct net *net, struct sock *ctnl, goto err_out_unlock; } instance_destroy(q, queue); - break; + goto err_out_unlock; case NFQNL_CFG_CMD_PF_BIND: case NFQNL_CFG_CMD_PF_UNBIND: break; -- cgit v1.1 From 21c3c971d1eb5d5598ddb1eda2fc3e4d2c992182 Mon Sep 17 00:00:00 2001 From: Ken-ichirou MATSUZAWA Date: Tue, 5 Jan 2016 09:31:40 +0900 Subject: netfilter: nfnetlink_queue: just returns error for unknown command This patch stops processing options for unknown command. Signed-off-by: Ken-ichirou MATSUZAWA Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 57951ce..c1f6df4 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1182,7 +1182,7 @@ static int nfqnl_recv_config(struct net *net, struct sock *ctnl, break; default: ret = -ENOTSUPP; - break; + goto err_out_unlock; } } -- cgit v1.1 From 71b2e5f5ca3b163b90e487a96fd0cabbaf16792b Mon Sep 17 00:00:00 2001 From: Ken-ichirou MATSUZAWA Date: Tue, 5 Jan 2016 09:32:59 +0900 Subject: netfilter: nfnetlink_queue: autoload nf_conntrack_netlink module NFQA_CFG_F_CONNTRACK config flag This patch enables to load nf_conntrack_netlink module if NFQA_CFG_F_CONNTRACK config flag is specified. Signed-off-by: Ken-ichirou MATSUZAWA Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index c1f6df4..1d39365 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1147,6 +1147,17 @@ static int nfqnl_recv_config(struct net *net, struct sock *ctnl, if (flags & mask & NFQA_CFG_F_SECCTX) return -EOPNOTSUPP; #endif + if ((flags & mask & NFQA_CFG_F_CONNTRACK) && + !rcu_access_pointer(nfnl_ct_hook)) { +#ifdef CONFIG_MODULES + nfnl_unlock(NFNL_SUBSYS_QUEUE); + request_module("ip_conntrack_netlink"); + nfnl_lock(NFNL_SUBSYS_QUEUE); + if (rcu_access_pointer(nfnl_ct_hook)) + return -EAGAIN; +#endif + return -EOPNOTSUPP; + } } rcu_read_lock(); -- cgit v1.1 From eb075954e9fde114f57adc39a9ea6d379c13f81e Mon Sep 17 00:00:00 2001 From: Ken-ichirou MATSUZAWA Date: Tue, 5 Jan 2016 09:34:34 +0900 Subject: netfilter: nfnetlink_log: just returns error for unknown command This patch stops processing options for unknown command. Signed-off-by: Ken-ichirou MATSUZAWA Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 6a57f10..8ca9320 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -888,7 +888,7 @@ static int nfulnl_recv_config(struct net *net, struct sock *ctnl, goto out_put; default: ret = -ENOTSUPP; - break; + goto out_put; } } else if (!inst) { ret = -ENODEV; -- cgit v1.1 From e6d8ecac9e68265aee9be711c5bd29406129666f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Falgueras=20Garc=C3=ADa?= Date: Tue, 5 Jan 2016 14:03:32 +0100 Subject: netfilter: nf_tables: Add new attributes into nft_set to store user data. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User data is stored at after 'nft_set_ops' private data into 'data[]' flexible array. The field 'udata' points to user data and 'udlen' stores its length. Add new flag NFTA_SET_USERDATA. Signed-off-by: Carlos Falgueras García Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f5c3971..2011977 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2323,6 +2323,8 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_ID] = { .type = NLA_U32 }, [NFTA_SET_TIMEOUT] = { .type = NLA_U64 }, [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 }, + [NFTA_SET_USERDATA] = { .type = NLA_BINARY, + .len = NFT_USERDATA_MAXLEN }, }; static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { @@ -2482,6 +2484,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; } + if (nla_put(skb, NFTA_SET_USERDATA, set->udlen, set->udata)) + goto nla_put_failure; + desc = nla_nest_start(skb, NFTA_SET_DESC); if (desc == NULL) goto nla_put_failure; @@ -2691,6 +2696,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, u64 timeout; u32 ktype, dtype, flags, policy, gc_int; struct nft_set_desc desc; + unsigned char *udata; + u16 udlen; int err; if (nla[NFTA_SET_TABLE] == NULL || @@ -2803,12 +2810,16 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (IS_ERR(ops)) return PTR_ERR(ops); + udlen = 0; + if (nla[NFTA_SET_USERDATA]) + udlen = nla_len(nla[NFTA_SET_USERDATA]); + size = 0; if (ops->privsize != NULL) size = ops->privsize(nla); err = -ENOMEM; - set = kzalloc(sizeof(*set) + size, GFP_KERNEL); + set = kzalloc(sizeof(*set) + size + udlen, GFP_KERNEL); if (set == NULL) goto err1; @@ -2817,6 +2828,12 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (err < 0) goto err2; + udata = NULL; + if (udlen) { + udata = set->data + size; + nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen); + } + INIT_LIST_HEAD(&set->bindings); write_pnet(&set->pnet, net); set->ops = ops; @@ -2827,6 +2844,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, set->flags = flags; set->size = desc.size; set->policy = policy; + set->udlen = udlen; + set->udata = udata; set->timeout = timeout; set->gc_int = gc_int; -- cgit v1.1 From ce1e7989d989e36ee3b032d46aab28b7d5e30428 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 8 Jan 2016 10:29:12 +0100 Subject: netfilter: nft_byteorder: provide 64bit le/be conversion Needed to convert the (64bit) conntrack counters to BE ordering. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_byteorder.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'net') diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index fde5145..383c171 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -8,6 +8,7 @@ * Development of this code funded by Astaro AG (http://www.astaro.com/) */ +#include #include #include #include @@ -39,6 +40,27 @@ static void nft_byteorder_eval(const struct nft_expr *expr, d = (void *)dst; switch (priv->size) { + case 8: { + u64 src64; + + switch (priv->op) { + case NFT_BYTEORDER_NTOH: + for (i = 0; i < priv->len / 8; i++) { + src64 = get_unaligned_be64(&src[i]); + src64 = be64_to_cpu((__force __be64)src64); + put_unaligned_be64(src64, &dst[i]); + } + break; + case NFT_BYTEORDER_HTON: + for (i = 0; i < priv->len / 8; i++) { + src64 = get_unaligned_be64(&src[i]); + src64 = (__force u64)cpu_to_be64(src64); + put_unaligned_be64(src64, &dst[i]); + } + break; + } + break; + } case 4: switch (priv->op) { case NFT_BYTEORDER_NTOH: @@ -101,6 +123,7 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, switch (priv->size) { case 2: case 4: + case 8: break; default: return -EINVAL; -- cgit v1.1 From 48f66c905a976bf0ff092fc24f08d9addd82a245 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 7 Jan 2016 21:34:24 +0100 Subject: netfilter: nft_ct: add byte/packet counter support If the accounting extension isn't present, we'll return a counter value of 0. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'net') diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 8cbca34..6f74109 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,18 @@ struct nft_ct { }; }; +static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c, + enum nft_ct_keys k, + enum ip_conntrack_dir d) +{ + if (d < IP_CT_DIR_MAX) + return k == NFT_CT_BYTES ? atomic64_read(&c[d].bytes) : + atomic64_read(&c[d].packets); + + return nft_ct_get_eval_counter(c, k, IP_CT_DIR_ORIGINAL) + + nft_ct_get_eval_counter(c, k, IP_CT_DIR_REPLY); +} + static void nft_ct_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -114,6 +127,17 @@ static void nft_ct_get_eval(const struct nft_expr *expr, NF_CT_LABELS_MAX_SIZE - size); return; } + case NFT_CT_BYTES: /* fallthrough */ + case NFT_CT_PKTS: { + const struct nf_conn_acct *acct = nf_conn_acct_find(ct); + u64 count = 0; + + if (acct) + count = nft_ct_get_eval_counter(acct->counter, + priv->key, priv->dir); + memcpy(dest, &count, sizeof(count)); + return; + } #endif default: break; @@ -291,6 +315,13 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, return -EINVAL; len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all); break; + case NFT_CT_BYTES: + case NFT_CT_PKTS: + /* no direction? return sum of original + reply */ + if (tb[NFTA_CT_DIRECTION] == NULL) + priv->dir = IP_CT_DIR_MAX; + len = sizeof(u64); + break; default: return -EOPNOTSUPP; } @@ -373,6 +404,13 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) case NFT_CT_PROTO_DST: if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) goto nla_put_failure; + break; + case NFT_CT_BYTES: + case NFT_CT_PKTS: + if (priv->dir < IP_CT_DIR_MAX && + nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) + goto nla_put_failure; + break; default: break; } -- cgit v1.1 From 30d3d83a7dcef7a26c5c48e548bdd6a76754cbcd Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Wed, 6 Jan 2016 17:22:45 -0500 Subject: ipv4: fix endianness warnings in ip_tunnel_core.c Eliminate endianness mismatch warnings (reported by sparse) in this file by using appropriate nla_put_*()/nla_get_*() calls. Signed-off-by: Lance Richardson Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index eb52ce9..859d415 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -251,7 +251,7 @@ static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr, tun_info = lwt_tun_info(new_state); if (tb[LWTUNNEL_IP_ID]) - tun_info->key.tun_id = nla_get_u64(tb[LWTUNNEL_IP_ID]); + tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP_ID]); if (tb[LWTUNNEL_IP_DST]) tun_info->key.u.ipv4.dst = nla_get_be32(tb[LWTUNNEL_IP_DST]); @@ -266,7 +266,7 @@ static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr, tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]); if (tb[LWTUNNEL_IP_FLAGS]) - tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP_FLAGS]); + tun_info->key.tun_flags = nla_get_be16(tb[LWTUNNEL_IP_FLAGS]); tun_info->mode = IP_TUNNEL_INFO_TX; tun_info->options_len = 0; @@ -281,12 +281,12 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb, { struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); - if (nla_put_u64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id) || + if (nla_put_be64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id) || nla_put_be32(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) || nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) || nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) || - nla_put_u16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags)) + nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags)) return -ENOMEM; return 0; @@ -346,7 +346,7 @@ static int ip6_tun_build_state(struct net_device *dev, struct nlattr *attr, tun_info = lwt_tun_info(new_state); if (tb[LWTUNNEL_IP6_ID]) - tun_info->key.tun_id = nla_get_u64(tb[LWTUNNEL_IP6_ID]); + tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP6_ID]); if (tb[LWTUNNEL_IP6_DST]) tun_info->key.u.ipv6.dst = nla_get_in6_addr(tb[LWTUNNEL_IP6_DST]); @@ -361,7 +361,7 @@ static int ip6_tun_build_state(struct net_device *dev, struct nlattr *attr, tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]); if (tb[LWTUNNEL_IP6_FLAGS]) - tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP6_FLAGS]); + tun_info->key.tun_flags = nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]); tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6; tun_info->options_len = 0; @@ -376,12 +376,12 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb, { struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate); - if (nla_put_u64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) || + if (nla_put_be64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) || nla_put_in6_addr(skb, LWTUNNEL_IP6_DST, &tun_info->key.u.ipv6.dst) || nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) || nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.tos) || nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.ttl) || - nla_put_u16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags)) + nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags)) return -ENOMEM; return 0; -- cgit v1.1 From ad64b8be71e3a37ea43745aa69817c4bcd489987 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Wed, 6 Jan 2016 17:22:47 -0500 Subject: ipv4: eliminate lock count warnings in ping.c Add lock release/acquire annotations to ping_seq_start() and ping_seq_stop() to satisfy sparse. Signed-off-by: Lance Richardson Signed-off-by: David S. Miller --- net/ipv4/ping.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index e89094a..c117b21 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1063,6 +1063,7 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) } void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) + __acquires(ping_table.lock) { struct ping_iter_state *state = seq->private; state->bucket = 0; @@ -1094,6 +1095,7 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) EXPORT_SYMBOL_GPL(ping_seq_next); void ping_seq_stop(struct seq_file *seq, void *v) + __releases(ping_table.lock) { read_unlock_bh(&ping_table.lock); } -- cgit v1.1 From 4a4d045eb2c174472b68f366108bf76f1802f803 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Mon, 23 Nov 2015 20:30:15 +0100 Subject: batman-adv: Start new development cycle Signed-off-by: Simon Wunderlich Signed-off-by: Antonio Quartulli --- net/batman-adv/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index ebd8af0..da9f16c 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2015.2" +#define BATADV_SOURCE_VERSION "2016.0" #endif /* B.A.T.M.A.N. parameters */ -- cgit v1.1 From 008a374487070a391c12aa39288fd8511f822cab Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 3 Nov 2015 19:20:34 +0100 Subject: batman-adv: Fix lockdep annotation of batadv_tlv_container_remove The function handles tlv containers and not tlv handlers. Thus the lockdep_assert_held has to check for the container_list lock. Fixes: 2c72d655b044 ("batman-adv: Annotate deleting functions with external lock via lockdep") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 5dbcb2e..95fd418 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -747,7 +747,7 @@ static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv, struct batadv_tvlv_container *tvlv) { - lockdep_assert_held(&bat_priv->tvlv.handler_list_lock); + lockdep_assert_held(&bat_priv->tvlv.container_list_lock); if (!tvlv) return; -- cgit v1.1 From 143d157c9ecfa09ed777bf33635eb27fabce3e0a Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Sun, 9 Aug 2015 23:56:50 +0800 Subject: batman-adv: remove leftovers of unused BATADV_PRIMARIES_FIRST_HOP flag Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 3 --- net/batman-adv/packet.h | 3 +-- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 5677169..2467024 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -361,7 +361,6 @@ batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface) unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; - batadv_ogm_packet->flags = BATADV_PRIMARIES_FIRST_HOP; batadv_ogm_packet->ttl = BATADV_TTL; } @@ -842,8 +841,6 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, "Forwarding packet: tq: %i, ttl: %i\n", batadv_ogm_packet->tq, batadv_ogm_packet->ttl); - /* switch of primaries first hop flag when forwarding */ - batadv_ogm_packet->flags &= ~BATADV_PRIMARIES_FIRST_HOP; if (is_single_hop_neigh) batadv_ogm_packet->flags |= BATADV_DIRECTLINK; else diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 11f996b..0558e32 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -72,8 +72,7 @@ enum batadv_subtype { * enum batadv_iv_flags - flags used in B.A.T.M.A.N. IV OGM packets * @BATADV_NOT_BEST_NEXT_HOP: flag is set when ogm packet is forwarded and was * previously received from someone else than the best neighbor. - * @BATADV_PRIMARIES_FIRST_HOP: flag is set when the primary interface address - * is used, and the packet travels its first hop. + * @BATADV_PRIMARIES_FIRST_HOP: flag unused. * @BATADV_DIRECTLINK: flag is for the first hop or if rebroadcasted from a * one hop neighbor on the interface where it was originally received. */ -- cgit v1.1 From d68081a24081f9a1910a41778a8411d924255471 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Mon, 9 Nov 2015 16:20:52 +0100 Subject: batman-adv: purge bridge loop avoidance when its disabled When bridge loop avoidance is disabled through sysfs, the internal datastructures are not disabled, but only BLA operations are disabled. To be sure that they are removed, purge the data immediately. That is especially useful if a firmwares network state is changed, and the BLA wait periods should restart on the new network. Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bridge_loop_avoidance.c | 20 ++++++++++++++++++++ net/batman-adv/bridge_loop_avoidance.h | 2 ++ net/batman-adv/sysfs.c | 4 +++- 3 files changed, 25 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 99dcae3..d5d71ac 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1168,6 +1168,26 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, } } +/** + * batadv_bla_status_update - purge bla interfaces if necessary + * @net_dev: the soft interface net device + */ +void batadv_bla_status_update(struct net_device *net_dev) +{ + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hard_iface *primary_if; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + return; + + /* this function already purges everything when bla is disabled, + * so just call that one. + */ + batadv_bla_update_orig_address(bat_priv, primary_if, primary_if); + batadv_hardif_free_ref(primary_if); +} + /* periodic work to do: * * purge structures when they are too old * * send announcements diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 025152b..7ea199b 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -22,6 +22,7 @@ #include +struct net_device; struct seq_file; struct sk_buff; @@ -42,6 +43,7 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, struct batadv_hard_iface *oldif); +void batadv_bla_status_update(struct net_device *net_dev); int batadv_bla_init(struct batadv_priv *bat_priv); void batadv_bla_free(struct batadv_priv *bat_priv); diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 9de3c88..48e2aad 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -40,6 +40,7 @@ #include "distributed-arp-table.h" #include "gateway_client.h" #include "gateway_common.h" +#include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "network-coding.h" #include "packet.h" @@ -549,7 +550,8 @@ static ssize_t batadv_store_isolation_mark(struct kobject *kobj, BATADV_ATTR_SIF_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL); BATADV_ATTR_SIF_BOOL(bonding, S_IRUGO | S_IWUSR, NULL); #ifdef CONFIG_BATMAN_ADV_BLA -BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR, NULL); +BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR, + batadv_bla_status_update); #endif #ifdef CONFIG_BATMAN_ADV_DAT BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR, -- cgit v1.1 From e1544f3c87778ab4af9689d571570d6abfd2f6c2 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Mon, 9 Nov 2015 16:20:53 +0100 Subject: batman-adv: increase BLA wait periods to 6 If networks take a long time to come up, e.g. due to lossy links, then the bridge loop avoidance wait time to suppress broadcasts may not wait long enough and detect a backbone before the mesh is brought up. Increasing the wait period further to 60 seconds makes this scenario less likely. Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index da9f16c..9dbd910 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -109,7 +109,7 @@ #define BATADV_MAX_AGGREGATION_MS 100 #define BATADV_BLA_PERIOD_LENGTH 10000 /* 10 seconds */ -#define BATADV_BLA_BACKBONE_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 3) +#define BATADV_BLA_BACKBONE_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 6) #define BATADV_BLA_CLAIM_TIMEOUT (BATADV_BLA_PERIOD_LENGTH * 10) #define BATADV_BLA_WAIT_PERIODS 3 -- cgit v1.1 From 9e728e84389ba8317d1444bdf256e34ad467f3da Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Tue, 17 Nov 2015 14:11:26 +0100 Subject: batman-adv: only call post function if something changed Currently, the post function is also called on errors or if there were no changes, which is redundant for the functions currently using these facilities. Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/sysfs.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 48e2aad..fe87777 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -242,10 +242,13 @@ ssize_t batadv_show_vlan_##_name(struct kobject *kobj, \ static int batadv_store_bool_attr(char *buff, size_t count, struct net_device *net_dev, - const char *attr_name, atomic_t *attr) + const char *attr_name, atomic_t *attr, + bool *changed) { int enabled = -1; + *changed = false; + if (buff[count - 1] == '\n') buff[count - 1] = '\0'; @@ -272,6 +275,8 @@ static int batadv_store_bool_attr(char *buff, size_t count, atomic_read(attr) == 1 ? "enabled" : "disabled", enabled == 1 ? "enabled" : "disabled"); + *changed = true; + atomic_set(attr, (unsigned int)enabled); return count; } @@ -282,11 +287,12 @@ __batadv_store_bool_attr(char *buff, size_t count, struct attribute *attr, atomic_t *attr_store, struct net_device *net_dev) { + bool changed; int ret; ret = batadv_store_bool_attr(buff, count, net_dev, attr->name, - attr_store); - if (post_func && ret) + attr_store, &changed); + if (post_func && changed) post_func(net_dev); return ret; -- cgit v1.1 From d737ccbed3e62dd45d631cf69183de005144d05b Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 13 Sep 2015 09:44:45 +0200 Subject: batman-adv: Add function to convert string to batadv throughput The code to convert the throughput information from a string to the batman-adv internal (100Kibit/s) representation is duplicated in batadv_parse_gw_bandwidth. Move this functionality to its own function batadv_parse_throughput to reduce the code complexity. Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/gateway_common.c | 117 +++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 68 deletions(-) (limited to 'net') diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 0cb5e6b..b51bfac 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -31,27 +31,23 @@ #include "packet.h" /** - * batadv_parse_gw_bandwidth - parse supplied string buffer to extract download - * and upload bandwidth information + * batadv_parse_throughput - parse supplied string buffer to extract throughput + * information * @net_dev: the soft interface net device * @buff: string buffer to parse - * @down: pointer holding the returned download bandwidth information - * @up: pointer holding the returned upload bandwidth information + * @description: text shown when throughput string cannot be parsed + * @throughput: pointer holding the returned throughput information * * Returns false on parse error and true otherwise. */ -static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, - u32 *down, u32 *up) +static bool batadv_parse_throughput(struct net_device *net_dev, char *buff, + const char *description, u32 *throughput) { enum batadv_bandwidth_units bw_unit_type = BATADV_BW_UNIT_KBIT; - char *slash_ptr, *tmp_ptr; - u64 ldown, lup; + u64 lthroughput; + char *tmp_ptr; int ret; - slash_ptr = strchr(buff, '/'); - if (slash_ptr) - *slash_ptr = 0; - if (strlen(buff) > 4) { tmp_ptr = buff + strlen(buff) - 4; @@ -63,90 +59,75 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, *tmp_ptr = '\0'; } - ret = kstrtou64(buff, 10, &ldown); + ret = kstrtou64(buff, 10, <hroughput); if (ret) { batadv_err(net_dev, - "Download speed of gateway mode invalid: %s\n", - buff); + "Invalid throughput speed for %s: %s\n", + description, buff); return false; } switch (bw_unit_type) { case BATADV_BW_UNIT_MBIT: /* prevent overflow */ - if (U64_MAX / 10 < ldown) { + if (U64_MAX / 10 < lthroughput) { batadv_err(net_dev, - "Download speed of gateway mode too large: %s\n", - buff); + "Throughput speed for %s too large: %s\n", + description, buff); return false; } - ldown *= 10; + lthroughput *= 10; break; case BATADV_BW_UNIT_KBIT: default: - ldown = div_u64(ldown, 100); + lthroughput = div_u64(lthroughput, 100); break; } - if (U32_MAX < ldown) { + if (lthroughput > U32_MAX) { batadv_err(net_dev, - "Download speed of gateway mode too large: %s\n", - buff); + "Throughput speed for %s too large: %s\n", + description, buff); return false; } - *down = ldown; - - /* we also got some upload info */ - if (slash_ptr) { - bw_unit_type = BATADV_BW_UNIT_KBIT; - - if (strlen(slash_ptr + 1) > 4) { - tmp_ptr = slash_ptr + 1 - 4 + strlen(slash_ptr + 1); + *throughput = lthroughput; - if (strncasecmp(tmp_ptr, "mbit", 4) == 0) - bw_unit_type = BATADV_BW_UNIT_MBIT; + return true; +} - if ((strncasecmp(tmp_ptr, "kbit", 4) == 0) || - (bw_unit_type == BATADV_BW_UNIT_MBIT)) - *tmp_ptr = '\0'; - } +/** + * batadv_parse_gw_bandwidth - parse supplied string buffer to extract download + * and upload bandwidth information + * @net_dev: the soft interface net device + * @buff: string buffer to parse + * @down: pointer holding the returned download bandwidth information + * @up: pointer holding the returned upload bandwidth information + * + * Return: false on parse error and true otherwise. + */ +static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, + u32 *down, u32 *up) +{ + char *slash_ptr; + bool ret; - ret = kstrtou64(slash_ptr + 1, 10, &lup); - if (ret) { - batadv_err(net_dev, - "Upload speed of gateway mode invalid: %s\n", - slash_ptr + 1); - return false; - } + slash_ptr = strchr(buff, '/'); + if (slash_ptr) + *slash_ptr = 0; - switch (bw_unit_type) { - case BATADV_BW_UNIT_MBIT: - /* prevent overflow */ - if (U64_MAX / 10 < lup) { - batadv_err(net_dev, - "Upload speed of gateway mode too large: %s\n", - slash_ptr + 1); - return false; - } - - lup *= 10; - break; - case BATADV_BW_UNIT_KBIT: - default: - lup = div_u64(lup, 100); - break; - } + ret = batadv_parse_throughput(net_dev, buff, "download gateway speed", + down); + if (!ret) + return false; - if (U32_MAX < lup) { - batadv_err(net_dev, - "Upload speed of gateway mode too large: %s\n", - slash_ptr + 1); + /* we also got some upload info */ + if (slash_ptr) { + ret = batadv_parse_throughput(net_dev, slash_ptr + 1, + "upload gateway speed", up); + if (!ret) return false; - } - - *up = lup; } return true; -- cgit v1.1 From c799443ee13ef37221732839f1cca6f11c798b7a Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 15 Nov 2015 08:04:43 +0100 Subject: batman-adv: Delete unnecessary checks before the function call "kfree_skb" The kfree_skb() function tests whether its argument is NULL and then returns immediately. Thus the test around the calls is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/main.c | 2 +- net/batman-adv/network-coding.c | 4 +--- net/batman-adv/send.c | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 95fd418..5b678f3 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -1184,7 +1184,7 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, ret = true; out: - if (skb && !ret) + if (!ret) kfree_skb(skb); if (orig_node) batadv_orig_node_free_ref(orig_node); diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index f5276be..c98b0ab 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -244,9 +244,7 @@ static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path) */ static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet) { - if (nc_packet->skb) - kfree_skb(nc_packet->skb); - + kfree_skb(nc_packet->skb); batadv_nc_path_free_ref(nc_packet->nc_path); kfree(nc_packet); } diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index f664324..782fa33 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -407,8 +407,7 @@ void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface) static void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) { - if (forw_packet->skb) - kfree_skb(forw_packet->skb); + kfree_skb(forw_packet->skb); if (forw_packet->if_incoming) batadv_hardif_free_ref(forw_packet->if_incoming); if (forw_packet->if_outgoing) -- cgit v1.1 From 8bbb7cb2324d6a5fb7ccdc4ab0099dc18b91b690 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 15 Nov 2015 09:00:42 +0100 Subject: batman-adv: Less checks in batadv_tvlv_unicast_send() * Let us return directly if a call of the batadv_orig_hash_find() function returned a null pointer. * Omit the initialisation for the variable "skb" at the beginning. * Replace an assignment by a call of the kfree_skb() function and delete the affected variable "ret" then. Signed-off-by: Markus Elfring Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/main.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 5b678f3..4b5d61f 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -1143,15 +1143,14 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, struct batadv_unicast_tvlv_packet *unicast_tvlv_packet; struct batadv_tvlv_hdr *tvlv_hdr; struct batadv_orig_node *orig_node; - struct sk_buff *skb = NULL; + struct sk_buff *skb; unsigned char *tvlv_buff; unsigned int tvlv_len; ssize_t hdr_len = sizeof(*unicast_tvlv_packet); - bool ret = false; orig_node = batadv_orig_hash_find(bat_priv, dst); if (!orig_node) - goto out; + return; tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len; @@ -1180,14 +1179,10 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, tvlv_buff += sizeof(*tvlv_hdr); memcpy(tvlv_buff, tvlv_value, tvlv_value_len); - if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) - ret = true; - -out: - if (!ret) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP) kfree_skb(skb); - if (orig_node) - batadv_orig_node_free_ref(orig_node); +out: + batadv_orig_node_free_ref(orig_node); } /** -- cgit v1.1 From f75a33aeed0776f52da05276c2ef98e16d680a6b Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 3 Nov 2015 19:20:34 +0100 Subject: batman-adv: Delete an unnecessary check before the function call "batadv_softif_vlan_free_ref" The batadv_softif_vlan_free_ref() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/translation-table.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index ec67def..5852fda 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -3356,8 +3356,7 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst, ret = true; out: - if (vlan) - batadv_softif_vlan_free_ref(vlan); + batadv_softif_vlan_free_ref(vlan); if (tt_global_entry) batadv_tt_global_entry_free_ref(tt_global_entry); if (tt_local_entry) -- cgit v1.1 From e087f34f28d8597f7c82f079337939367ba96537 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 3 Nov 2015 19:20:34 +0100 Subject: batman-adv: Split a condition check Let us split a check for a condition at the beginning of the batadv_is_ap_isolated() function so that a direct return can be performed in this function if the variable "vlan" contained a null pointer. Signed-off-by: Markus Elfring Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/translation-table.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 5852fda..a22080c 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -3339,7 +3339,10 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst, bool ret = false; vlan = batadv_softif_vlan_get(bat_priv, vid); - if (!vlan || !atomic_read(&vlan->ap_isolation)) + if (!vlan) + return false; + + if (!atomic_read(&vlan->ap_isolation)) goto out; tt_local_entry = batadv_tt_local_hash_find(bat_priv, dst, vid); -- cgit v1.1 From 426fc6c8119820164dd44e99862dda85159eef93 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 6 Sep 2015 21:38:45 +0200 Subject: batman-adv: Fix kernel-doc parsing of main structs kernel-doc is not able to skip an #ifdef between the kernel documentation block and the start of the struct. Moving the #ifdef before the kernel doc block avoids this problem Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/types.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 7c386db..876ac33 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -423,13 +423,14 @@ struct batadv_neigh_ifinfo { struct rcu_head rcu; }; +#ifdef CONFIG_BATMAN_ADV_BLA + /** * struct batadv_bcast_duplist_entry - structure for LAN broadcast suppression * @orig[ETH_ALEN]: mac address of orig node orginating the broadcast * @crc: crc32 checksum of broadcast payload * @entrytime: time when the broadcast packet was received */ -#ifdef CONFIG_BATMAN_ADV_BLA struct batadv_bcast_duplist_entry { u8 orig[ETH_ALEN]; __be32 crc; @@ -571,6 +572,8 @@ struct batadv_priv_tt { struct delayed_work work; }; +#ifdef CONFIG_BATMAN_ADV_BLA + /** * struct batadv_priv_bla - per mesh interface bridge loope avoidance data * @num_requests; number of bla requests in flight @@ -583,7 +586,6 @@ struct batadv_priv_tt { * @claim_dest: local claim data (e.g. claim group) * @work: work queue callback item for cleanups & bla announcements */ -#ifdef CONFIG_BATMAN_ADV_BLA struct batadv_priv_bla { atomic_t num_requests; struct batadv_hashtable *claim_hash; @@ -597,6 +599,8 @@ struct batadv_priv_bla { }; #endif +#ifdef CONFIG_BATMAN_ADV_DEBUG + /** * struct batadv_priv_debug_log - debug logging data * @log_buff: buffer holding the logs (ring bufer) @@ -605,7 +609,6 @@ struct batadv_priv_bla { * @lock: lock protecting log_buff, log_start & log_end * @queue_wait: log reader's wait queue */ -#ifdef CONFIG_BATMAN_ADV_DEBUG struct batadv_priv_debug_log { char log_buff[BATADV_LOG_BUF_LEN]; unsigned long log_start; @@ -647,13 +650,14 @@ struct batadv_priv_tvlv { spinlock_t handler_list_lock; /* protects handler_list */ }; +#ifdef CONFIG_BATMAN_ADV_DAT + /** * struct batadv_priv_dat - per mesh interface DAT private data * @addr: node DAT address * @hash: hashtable representing the local ARP cache * @work: work queue callback item for cache purging */ -#ifdef CONFIG_BATMAN_ADV_DAT struct batadv_priv_dat { batadv_dat_addr_t addr; struct batadv_hashtable *hash; @@ -893,6 +897,8 @@ struct batadv_socket_packet { u8 icmp_packet[BATADV_ICMP_MAX_PACKET_SIZE]; }; +#ifdef CONFIG_BATMAN_ADV_BLA + /** * struct batadv_bla_backbone_gw - batman-adv gateway bridged into the LAN * @orig: originator address of backbone node (mac address of primary iface) @@ -910,7 +916,6 @@ struct batadv_socket_packet { * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner */ -#ifdef CONFIG_BATMAN_ADV_BLA struct batadv_bla_backbone_gw { u8 orig[ETH_ALEN]; unsigned short vid; -- cgit v1.1 From 006a199d5d1d4e1666b0d8b4f51b5a978ddc6aab Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 6 Sep 2015 21:38:46 +0200 Subject: batman-adv: Fix kerneldoc member names in for main structs Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/types.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 876ac33..d93501ed 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -223,12 +223,12 @@ struct batadv_orig_bat_iv { * @orig: originator ethernet address * @ifinfo_list: list for routers per outgoing interface * @last_bonding_candidate: pointer to last ifinfo of last used router - * @batadv_dat_addr_t: address of the orig node in the distributed hash + * @dat_addr: address of the orig node in the distributed hash * @last_seen: time when last packet from this node was received * @bcast_seqno_reset: time when the broadcast seqno window was reset * @mcast_handler_lock: synchronizes mcast-capability and -flag changes * @mcast_flags: multicast flags announced by the orig node - * @mcast_want_all_unsnoop_node: a list node for the + * @mcast_want_all_unsnoopables_node: a list node for the * mcast.want_all_unsnoopables list * @mcast_want_all_ipv4_node: a list node for the mcast.want_all_ipv4 list * @mcast_want_all_ipv6_node: a list node for the mcast.want_all_ipv6 list @@ -427,7 +427,7 @@ struct batadv_neigh_ifinfo { /** * struct batadv_bcast_duplist_entry - structure for LAN broadcast suppression - * @orig[ETH_ALEN]: mac address of orig node orginating the broadcast + * @orig: mac address of orig node orginating the broadcast * @crc: crc32 checksum of broadcast payload * @entrytime: time when the broadcast packet was received */ @@ -576,7 +576,7 @@ struct batadv_priv_tt { /** * struct batadv_priv_bla - per mesh interface bridge loope avoidance data - * @num_requests; number of bla requests in flight + * @num_requests: number of bla requests in flight * @claim_hash: hash table containing mesh nodes this host has claimed * @backbone_hash: hash table containing all detected backbone gateways * @bcast_duplist: recently received broadcast packets array (for broadcast @@ -799,7 +799,7 @@ struct batadv_softif_vlan { * @dat: distributed arp table data * @mcast: multicast data * @network_coding: bool indicating whether network coding is enabled - * @batadv_priv_nc: network coding data + * @nc: network coding data */ struct batadv_priv { atomic_t mesh_state; @@ -934,7 +934,7 @@ struct batadv_bla_backbone_gw { * struct batadv_bla_claim - claimed non-mesh client structure * @addr: mac address of claimed non-mesh client * @vid: vlan id this client was detected on - * @batadv_bla_backbone_gw: pointer to backbone gw claiming this client + * @backbone_gw: pointer to backbone gw claiming this client * @lasttime: last time we heard of claim (locals only) * @hash_entry: hlist node for batadv_priv_bla::claim_hash * @refcount: number of contexts the object is used -- cgit v1.1 From 8a3719a184cfd122eba9212c8d4a2fab5c9fb628 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 6 Sep 2015 21:38:47 +0200 Subject: batman-adv: Remove kerneldoc for missing struct members Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/types.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index d93501ed..1a67a1a 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -371,9 +371,7 @@ struct batadv_hardif_neigh_node { * @ifinfo_lock: lock protecting private ifinfo members and list * @if_incoming: pointer to incoming hard interface * @last_seen: when last packet via this neighbor was received - * @last_ttl: last received ttl from this neigh node * @rcu: struct used for freeing in an RCU-safe manner - * @bat_iv: B.A.T.M.A.N. IV private structure */ struct batadv_neigh_node { struct hlist_node list; @@ -1257,8 +1255,6 @@ struct batadv_dat_candidate { * struct batadv_tvlv_container - container for tvlv appended to OGMs * @list: hlist node for batadv_priv_tvlv::container_list * @tvlv_hdr: tvlv header information needed to construct the tvlv - * @value_len: length of the buffer following this struct which contains - * the actual tvlv payload * @refcount: number of contexts the object is used */ struct batadv_tvlv_container { -- cgit v1.1 From ed21d170e878b6b067a3216040b7b935c8007196 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 6 Sep 2015 21:38:48 +0200 Subject: batman-adv: Add kerneldoc for batadv_neigh_node::refcount Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 1a67a1a..3437b66 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -371,6 +371,7 @@ struct batadv_hardif_neigh_node { * @ifinfo_lock: lock protecting private ifinfo members and list * @if_incoming: pointer to incoming hard interface * @last_seen: when last packet via this neighbor was received + * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner */ struct batadv_neigh_node { -- cgit v1.1 From 4d41e12593a9a6c4aaf113d44c8c619067b2b0aa Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Sun, 10 Jan 2016 21:06:22 +0100 Subject: switchdev: Adding MDB entry offload Define HW multicast entry: MAC and VID. Using a MAC address simplifies support for both IPV4 and IPv6. Signed-off-by: Elad Raz Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index df790d3..ebc661d 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -345,6 +345,8 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj) return sizeof(struct switchdev_obj_ipv4_fib); case SWITCHDEV_OBJ_ID_PORT_FDB: return sizeof(struct switchdev_obj_port_fdb); + case SWITCHDEV_OBJ_ID_PORT_MDB: + return sizeof(struct switchdev_obj_port_mdb); default: BUG(); } -- cgit v1.1 From f1fecb1d10ecc2f94d19e67827b9f678b36bfc61 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Sun, 10 Jan 2016 21:06:23 +0100 Subject: bridge: Reflect MDB entries to hardware Offload MDB changes per port to hardware Signed-off-by: Elad Raz Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'net') diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index cd8deea..30e105f 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -7,6 +7,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_IPV6) #include #include @@ -210,10 +211,32 @@ static inline size_t rtnl_mdb_nlmsg_size(void) static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry, int type) { + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = SWITCHDEV_OBJ_ID_PORT_MDB, + .flags = SWITCHDEV_F_DEFER, + }, + .vid = entry->vid, + }; + struct net_device *port_dev; struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; + port_dev = __dev_get_by_index(net, entry->ifindex); + if (entry->addr.proto == htons(ETH_P_IP)) + ip_eth_mc_map(entry->addr.u.ip4, mdb.addr); +#if IS_ENABLED(CONFIG_IPV6) + else + ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr); +#endif + + mdb.obj.orig_dev = port_dev; + if (port_dev && type == RTM_NEWMDB) + switchdev_port_obj_add(port_dev, &mdb.obj); + else if (port_dev && type == RTM_DELMDB) + switchdev_port_obj_del(port_dev, &mdb.obj); + skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC); if (!skb) goto errout; -- cgit v1.1 From 13b287e8d1cad951634389f85b8c9b816bd3bb1e Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 7 Jan 2016 16:38:43 +0200 Subject: ipv4: Namespaceify tcp_keepalive_time sysctl knob Different net namespaces might have different requirements as to the keepalive time of tcp sockets. This might be required in cases where different firewall rules are in place which require tcp timeout sockets to be increased/decreased independently of the host. Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 2 ++ net/ipv4/tcp_timer.c | 1 - 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 41ff1f8..1886cc8 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -337,13 +337,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "tcp_keepalive_time", - .data = &sysctl_tcp_keepalive_time, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { .procname = "tcp_keepalive_probes", .data = &sysctl_tcp_keepalive_probes, .maxlen = sizeof(int), @@ -961,6 +954,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_keepalive_time", + .data = &init_net.ipv4.sysctl_tcp_keepalive_time, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fc4f726..6e14ff9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2385,6 +2385,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; + net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; + return 0; fail: tcp_sk_exit(net); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 193ba1f..166f27b 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -24,7 +24,6 @@ int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; -int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME; int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES; int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; -- cgit v1.1 From 9bd6861bd4326e3afd3f14a9ec8a723771fb20bb Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 7 Jan 2016 16:38:44 +0200 Subject: ipv4: Namespecify tcp_keepalive_probes sysctl knob This is required to have full tcp keepalive mechanism namespace support. Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_timer.c | 1 - 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1886cc8..e99fbb7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -337,13 +337,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "tcp_keepalive_probes", - .data = &sysctl_tcp_keepalive_probes, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_keepalive_intvl", .data = &sysctl_tcp_keepalive_intvl, .maxlen = sizeof(int), @@ -961,6 +954,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "tcp_keepalive_probes", + .data = &init_net.ipv4.sysctl_tcp_keepalive_probes, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6e14ff9..ed98de8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2386,6 +2386,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; + net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; return 0; fail: diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 166f27b..0ccb120 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -24,7 +24,6 @@ int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; -int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES; int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; -- cgit v1.1 From b840d15d39128d08ed4486085e5507d2617b9ae1 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 7 Jan 2016 16:38:45 +0200 Subject: ipv4: Namespecify the tcp_keepalive_intvl sysctl knob This is the final part required to namespaceify the tcp keep alive mechanism. Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_timer.c | 1 - 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e99fbb7..46ce410 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -337,13 +337,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "tcp_keepalive_intvl", - .data = &sysctl_tcp_keepalive_intvl, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { .procname = "tcp_retries1", .data = &sysctl_tcp_retries1, .maxlen = sizeof(int), @@ -961,6 +954,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_keepalive_intvl", + .data = &init_net.ipv4.sysctl_tcp_keepalive_intvl, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ed98de8..65947c1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2387,6 +2387,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; + net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; return 0; fail: diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0ccb120..a4730a2 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -24,7 +24,6 @@ int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; -int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; int sysctl_tcp_orphan_retries __read_mostly; -- cgit v1.1 From fdc5432a7b44ab7de17141beec19d946b9344e91 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 7 Jan 2016 15:50:22 +0100 Subject: net, sched: add skb_at_tc_ingress helper Add a skb_at_tc_ingress() as this will be needed elsewhere as well and can hide the ugly ifdef. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net') diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 5faaa54..b3c8bb4 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -79,12 +79,8 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct cls_bpf_head *head = rcu_dereference_bh(tp->root); + bool at_ingress = skb_at_tc_ingress(skb); struct cls_bpf_prog *prog; -#ifdef CONFIG_NET_CLS_ACT - bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; -#else - bool at_ingress = false; -#endif int ret = -1; if (unlikely(!skb_mac_header_was_set(skb))) -- cgit v1.1 From f8ffad69c9f8b8dfb0b633425d4ef4d2493ba61a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 7 Jan 2016 15:50:23 +0100 Subject: bpf: add skb_postpush_rcsum and fix dev_forward_skb occasions Add a small helper skb_postpush_rcsum() and fix up redirect locations that need CHECKSUM_COMPLETE fixups on ingress. dev_forward_skb() expects a proper csum that covers also Ethernet header, f.e. since 2c26d34bbcc0 ("net/core: Handle csum for CHECKSUM_COMPLETE VXLAN forwarding"), we also do skb_postpull_rcsum() after pulling Ethernet header off via eth_type_trans(). When using eBPF in a netns setup f.e. with vxlan in collect metadata mode, I can trigger the following csum issue with an IPv6 setup: [ 505.144065] dummy1: hw csum failure [...] [ 505.144108] Call Trace: [ 505.144112] [] dump_stack+0x44/0x5c [ 505.144134] [] netdev_rx_csum_fault+0x3a/0x40 [ 505.144142] [] __skb_checksum_complete+0xcf/0xe0 [ 505.144149] [] nf_ip6_checksum+0xb2/0x120 [ 505.144161] [] icmpv6_error+0x17e/0x328 [nf_conntrack_ipv6] [ 505.144170] [] ? ip6t_do_table+0x2fa/0x645 [ip6_tables] [ 505.144177] [] ? ipv6_get_l4proto+0x65/0xd0 [nf_conntrack_ipv6] [ 505.144189] [] nf_conntrack_in+0xc2/0x5a0 [nf_conntrack] [ 505.144196] [] ipv6_conntrack_in+0x1c/0x20 [nf_conntrack_ipv6] [ 505.144204] [] nf_iterate+0x5d/0x70 [ 505.144210] [] nf_hook_slow+0x66/0xc0 [ 505.144218] [] ipv6_rcv+0x3f2/0x4f0 [ 505.144225] [] ? ip6_make_skb+0x1b0/0x1b0 [ 505.144232] [] __netif_receive_skb_core+0x36b/0x9a0 [ 505.144239] [] ? __netif_receive_skb+0x18/0x60 [ 505.144245] [] __netif_receive_skb+0x18/0x60 [ 505.144252] [] process_backlog+0x9f/0x140 [ 505.144259] [] net_rx_action+0x145/0x320 [...] What happens is that on ingress, we push Ethernet header back in, either from cls_bpf or right before skb_do_redirect(), but without updating csum. The "hw csum failure" can be fixed by using the new skb_postpush_rcsum() helper for the dev_forward_skb() case to correct the csum diff again. Thanks to Hannes Frederic Sowa for the csum_partial() idea! Fixes: 3896d655f4d4 ("bpf: introduce bpf_clone_redirect() helper") Fixes: 27b29f63058d ("bpf: add bpf_redirect() helper") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 35e6fed..0db92b5 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1368,8 +1368,9 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) /* skb_store_bits cannot return -EFAULT here */ skb_store_bits(skb, offset, ptr, len); - if (BPF_RECOMPUTE_CSUM(flags) && skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0)); + if (BPF_RECOMPUTE_CSUM(flags)) + skb_postpush_rcsum(skb, ptr, len); + return 0; } @@ -1525,8 +1526,12 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) if (unlikely(!skb2)) return -ENOMEM; - if (BPF_IS_REDIRECT_INGRESS(flags)) + if (BPF_IS_REDIRECT_INGRESS(flags)) { + if (skb_at_tc_ingress(skb2)) + skb_postpush_rcsum(skb2, skb_mac_header(skb2), + skb2->mac_len); return dev_forward_skb(dev, skb2); + } skb2->dev = dev; skb_sender_cpu_clear(skb2); @@ -1569,8 +1574,12 @@ int skb_do_redirect(struct sk_buff *skb) return -EINVAL; } - if (BPF_IS_REDIRECT_INGRESS(ri->flags)) + if (BPF_IS_REDIRECT_INGRESS(ri->flags)) { + if (skb_at_tc_ingress(skb)) + skb_postpush_rcsum(skb, skb_mac_header(skb), + skb->mac_len); return dev_forward_skb(dev, skb); + } skb->dev = dev; skb_sender_cpu_clear(skb); -- cgit v1.1 From 1f211a1b929c804100e138c5d3d656992cfd5622 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 7 Jan 2016 22:29:47 +0100 Subject: net, sched: add clsact qdisc This work adds a generalization of the ingress qdisc as a qdisc holding only classifiers. The clsact qdisc works on ingress, but also on egress. In both cases, it's execution happens without taking the qdisc lock, and the main difference for the egress part compared to prior version of [1] is that this can be applied with _any_ underlying real egress qdisc (also classless ones). Besides solving the use-case of [1], that is, allowing for more programmability on assigning skb->priority for the mqprio case that is supported by most popular 10G+ NICs, it also opens up a lot more flexibility for other tc applications. The main work on classification can already be done at clsact egress time if the use-case allows and state stored for later retrieval f.e. again in skb->priority with major/minors (which is checked by most classful qdiscs before consulting tc_classify()) and/or in other skb fields like skb->tc_index for some light-weight post-processing to get to the eventual classid in case of a classful qdisc. Another use case is that the clsact egress part allows to have a central egress counterpart to the ingress classifiers, so that classifiers can easily share state (e.g. in cls_bpf via eBPF maps) for ingress and egress. Currently, default setups like mq + pfifo_fast would require for this to use, for example, prio qdisc instead (to get a tc_classify() run) and to duplicate the egress classifier for each queue. With clsact, it allows for leaving the setup as is, it can additionally assign skb->priority to put the skb in one of pfifo_fast's bands and it can share state with maps. Moreover, we can access the skb's dst entry (f.e. to retrieve tclassid) w/o the need to perform a skb_dst_force() to hold on to it any longer. In lwt case, we can also use this facility to setup dst metadata via cls_bpf (bpf_skb_set_tunnel_key()) without needing a real egress qdisc just for that (case of IFF_NO_QUEUE devices, for example). The realization can be done without any changes to the scheduler core framework. All it takes is that we have two a-priori defined minors/child classes, where we can mux between ingress and egress classifier list (dev->ingress_cl_list and dev->egress_cl_list, latter stored close to dev->_tx to avoid extra cacheline miss for moderate loads). The egress part is a bit similar modelled to handle_ing() and patched to a noop in case the functionality is not used. Both handlers are now called sch_handle_ingress() and sch_handle_egress(), code sharing among the two doesn't seem practical as there are various minor differences in both paths, so that making them conditional in a single handler would rather slow things down. Full compatibility to ingress qdisc is provided as well. Since both piggyback on TC_H_CLSACT, only one of them (ingress/clsact) can exist per netdevice, and thus ingress qdisc specific behaviour can be retained for user space. This means, either a user does 'tc qdisc add dev foo ingress' and configures ingress qdisc as usual, or the 'tc qdisc add dev foo clsact' alternative, where both, ingress and egress classifier can be configured as in the below example. ingress qdisc supports attaching classifier to any minor number whereas clsact has two fixed minors for muxing between the lists, therefore to not break user space setups, they are better done as two separate qdiscs. I decided to extend the sch_ingress module with clsact functionality so that commonly used code can be reused, the module is being aliased with sch_clsact so that it can be auto-loaded properly. Alternative would have been to add a flag when initializing ingress to alter its behaviour plus aliasing to a different name (as it's more than just ingress). However, the first would end up, based on the flag, choosing the new/old behaviour by calling different function implementations to handle each anyway, the latter would require to register ingress qdisc once again under different alias. So, this really begs to provide a minimal, cleaner approach to have Qdisc_ops and Qdisc_class_ops by its own that share callbacks used by both. Example, adding qdisc: # tc qdisc add dev foo clsact # tc qdisc show dev foo qdisc mq 0: root qdisc pfifo_fast 0: parent :1 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc pfifo_fast 0: parent :2 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc pfifo_fast 0: parent :3 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc pfifo_fast 0: parent :4 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 qdisc clsact ffff: parent ffff:fff1 Adding filters (deleting, etc works analogous by specifying ingress/egress): # tc filter add dev foo ingress bpf da obj bar.o sec ingress # tc filter add dev foo egress bpf da obj bar.o sec egress # tc filter show dev foo ingress filter protocol all pref 49152 bpf filter protocol all pref 49152 bpf handle 0x1 bar.o:[ingress] direct-action # tc filter show dev foo egress filter protocol all pref 49152 bpf filter protocol all pref 49152 bpf handle 0x1 bar.o:[egress] direct-action A 'tc filter show dev foo' or 'tc filter show dev foo parent ffff:' will show an empty list for clsact. Either using the parent names (ingress/egress) or specifying the full major/minor will then show the related filter lists. Prior work on a mqprio prequeue() facility [1] was done mainly by John Fastabend. [1] http://patchwork.ozlabs.org/patch/512949/ Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/Kconfig | 3 ++ net/core/dev.c | 82 ++++++++++++++++++++++++++++++++++++++++----- net/sched/Kconfig | 14 +++++--- net/sched/cls_bpf.c | 2 +- net/sched/sch_ingress.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 174 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 11f8c22..1743546 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -48,6 +48,9 @@ config COMPAT_NETLINK_MESSAGES config NET_INGRESS bool +config NET_EGRESS + bool + menu "Networking options" source "net/packet/Kconfig" diff --git a/net/core/dev.c b/net/core/dev.c index 914b4a2..0ca95d5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1676,6 +1676,22 @@ void net_dec_ingress_queue(void) EXPORT_SYMBOL_GPL(net_dec_ingress_queue); #endif +#ifdef CONFIG_NET_EGRESS +static struct static_key egress_needed __read_mostly; + +void net_inc_egress_queue(void) +{ + static_key_slow_inc(&egress_needed); +} +EXPORT_SYMBOL_GPL(net_inc_egress_queue); + +void net_dec_egress_queue(void) +{ + static_key_slow_dec(&egress_needed); +} +EXPORT_SYMBOL_GPL(net_dec_egress_queue); +#endif + static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL /* We are not allowed to call static_key_slow_dec() from irq context @@ -3007,7 +3023,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, bool contended; int rc; - qdisc_pkt_len_init(skb); qdisc_calculate_pkt_len(skb, q); /* * Heuristic to force contended enqueues to serialize on a @@ -3100,6 +3115,49 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(dev_loopback_xmit); +#ifdef CONFIG_NET_EGRESS +static struct sk_buff * +sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) +{ + struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list); + struct tcf_result cl_res; + + if (!cl) + return skb; + + /* skb->tc_verd and qdisc_skb_cb(skb)->pkt_len were already set + * earlier by the caller. + */ + qdisc_bstats_cpu_update(cl->q, skb); + + switch (tc_classify(skb, cl, &cl_res, false)) { + case TC_ACT_OK: + case TC_ACT_RECLASSIFY: + skb->tc_index = TC_H_MIN(cl_res.classid); + break; + case TC_ACT_SHOT: + qdisc_qstats_cpu_drop(cl->q); + *ret = NET_XMIT_DROP; + goto drop; + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + *ret = NET_XMIT_SUCCESS; +drop: + kfree_skb(skb); + return NULL; + case TC_ACT_REDIRECT: + /* No need to push/pop skb's mac_header here on egress! */ + skb_do_redirect(skb); + *ret = NET_XMIT_SUCCESS; + return NULL; + default: + break; + } + + return skb; +} +#endif /* CONFIG_NET_EGRESS */ + static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_XPS @@ -3226,6 +3284,17 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) skb_update_prio(skb); + qdisc_pkt_len_init(skb); +#ifdef CONFIG_NET_CLS_ACT + skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); +# ifdef CONFIG_NET_EGRESS + if (static_key_false(&egress_needed)) { + skb = sch_handle_egress(skb, &rc, dev); + if (!skb) + goto out; + } +# endif +#endif /* If device/qdisc don't need skb->dst, release it right now while * its hot in this cpu cache. */ @@ -3247,9 +3316,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); -#endif trace_net_dev_queue(skb); if (q->enqueue) { rc = __dev_xmit_skb(skb, q, dev, txq); @@ -3806,9 +3872,9 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev, EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); #endif -static inline struct sk_buff *handle_ing(struct sk_buff *skb, - struct packet_type **pt_prev, - int *ret, struct net_device *orig_dev) +static inline struct sk_buff * +sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, + struct net_device *orig_dev) { #ifdef CONFIG_NET_CLS_ACT struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list); @@ -4002,7 +4068,7 @@ another_round: skip_taps: #ifdef CONFIG_NET_INGRESS if (static_key_false(&ingress_needed)) { - skb = handle_ing(skb, &pt_prev, &ret, orig_dev); + skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev); if (!skb) goto out; diff --git a/net/sched/Kconfig b/net/sched/Kconfig index daa3343..8283082 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -310,15 +310,21 @@ config NET_SCH_PIE If unsure, say N. config NET_SCH_INGRESS - tristate "Ingress Qdisc" + tristate "Ingress/classifier-action Qdisc" depends on NET_CLS_ACT select NET_INGRESS + select NET_EGRESS ---help--- - Say Y here if you want to use classifiers for incoming packets. + Say Y here if you want to use classifiers for incoming and/or outgoing + packets. This qdisc doesn't do anything else besides running classifiers, + which can also have actions attached to them. In case of outgoing packets, + classifiers that this qdisc holds are executed in the transmit path + before real enqueuing to an egress qdisc happens. + If unsure, say Y. - To compile this code as a module, choose M here: the - module will be called sch_ingress. + To compile this code as a module, choose M here: the module will be + called sch_ingress with alias of sch_clsact. config NET_SCH_PLUG tristate "Plug network traffic until release (PLUG)" diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index b3c8bb4..8dc8430 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -291,7 +291,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, prog->bpf_name = name; prog->filter = fp; - if (fp->dst_needed) + if (fp->dst_needed && !(tp->q->flags & TCQ_F_INGRESS)) netif_keep_dst(qdisc_dev(tp->q)); return 0; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index e7c648f..10adbc6 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -1,4 +1,5 @@ -/* net/sched/sch_ingress.c - Ingress qdisc +/* net/sched/sch_ingress.c - Ingress and clsact qdisc + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -98,17 +99,100 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; +static unsigned long clsact_get(struct Qdisc *sch, u32 classid) +{ + switch (TC_H_MIN(classid)) { + case TC_H_MIN(TC_H_MIN_INGRESS): + case TC_H_MIN(TC_H_MIN_EGRESS): + return TC_H_MIN(classid); + default: + return 0; + } +} + +static unsigned long clsact_bind_filter(struct Qdisc *sch, + unsigned long parent, u32 classid) +{ + return clsact_get(sch, classid); +} + +static struct tcf_proto __rcu **clsact_find_tcf(struct Qdisc *sch, + unsigned long cl) +{ + struct net_device *dev = qdisc_dev(sch); + + switch (cl) { + case TC_H_MIN(TC_H_MIN_INGRESS): + return &dev->ingress_cl_list; + case TC_H_MIN(TC_H_MIN_EGRESS): + return &dev->egress_cl_list; + default: + return NULL; + } +} + +static int clsact_init(struct Qdisc *sch, struct nlattr *opt) +{ + net_inc_ingress_queue(); + net_inc_egress_queue(); + + sch->flags |= TCQ_F_CPUSTATS; + + return 0; +} + +static void clsact_destroy(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + + tcf_destroy_chain(&dev->ingress_cl_list); + tcf_destroy_chain(&dev->egress_cl_list); + + net_dec_ingress_queue(); + net_dec_egress_queue(); +} + +static const struct Qdisc_class_ops clsact_class_ops = { + .leaf = ingress_leaf, + .get = clsact_get, + .put = ingress_put, + .walk = ingress_walk, + .tcf_chain = clsact_find_tcf, + .bind_tcf = clsact_bind_filter, + .unbind_tcf = ingress_put, +}; + +static struct Qdisc_ops clsact_qdisc_ops __read_mostly = { + .cl_ops = &clsact_class_ops, + .id = "clsact", + .init = clsact_init, + .destroy = clsact_destroy, + .dump = ingress_dump, + .owner = THIS_MODULE, +}; + static int __init ingress_module_init(void) { - return register_qdisc(&ingress_qdisc_ops); + int ret; + + ret = register_qdisc(&ingress_qdisc_ops); + if (!ret) { + ret = register_qdisc(&clsact_qdisc_ops); + if (ret) + unregister_qdisc(&ingress_qdisc_ops); + } + + return ret; } static void __exit ingress_module_exit(void) { unregister_qdisc(&ingress_qdisc_ops); + unregister_qdisc(&clsact_qdisc_ops); } module_init(ingress_module_init); module_exit(ingress_module_exit); +MODULE_ALIAS("sch_clsact"); MODULE_LICENSE("GPL"); -- cgit v1.1 From 3d171f3907329d4b1ce31d5ec9c852c5f0269578 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Fri, 8 Jan 2016 13:47:23 +0100 Subject: ipv6: always add flag an address that failed DAD with DADFAILED The userspace needs to know why is the address being removed so that it can perhaps obtain a new address. Without the DADFAILED flag it's impossible to distinguish removal of a temporary and tentative address due to DAD failure from other reasons (device removed, manual address removal). Signed-off-by: Lubomir Rintel Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8697551b..38eedde 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1772,12 +1772,13 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) { + if (dad_failed) + ifp->flags |= IFA_F_DADFAILED; + if (ifp->flags&IFA_F_PERMANENT) { spin_lock_bh(&ifp->lock); addrconf_del_dad_work(ifp); ifp->flags |= IFA_F_TENTATIVE; - if (dad_failed) - ifp->flags |= IFA_F_DADFAILED; spin_unlock_bh(&ifp->lock); if (dad_failed) ipv6_ifa_notify(0, ifp); -- cgit v1.1 From a78cb84c62c427807d917c5aa8797740f00b0bbe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Jan 2016 08:37:20 -0800 Subject: net: add scheduling point in recvmmsg/sendmmsg Applications often have to reduce number of datagrams they receive or send per system call to avoid starvation problems. Really the kernel should take care of this by using cond_resched(), so that applications can experiment bigger batch sizes. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/socket.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index d730ef9..91c2de6 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2041,6 +2041,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (err) break; ++datagrams; + cond_resched(); } fput_light(sock->file, fput_needed); @@ -2236,6 +2237,7 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, /* Out of band data, return right away */ if (msg_sys.msg_flags & MSG_OOB) break; + cond_resched(); } out_put: -- cgit v1.1 From 5ea030429fed07ea47e45152202d6ecb24133374 Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Sat, 9 Jan 2016 16:07:09 -0700 Subject: openvswitch: clean up unused function commit 6b001e682e90 ("openvswitch: Use Geneve device.") The commit above deleted the only call site of ovs_tunnel_route_lookup() and now that function is not used any more. So let's delete the function definition as well. Signed-off-by: Jean Sacren Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/openvswitch/vport.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'net') diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 8ea3a96..f00bb15 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -204,26 +204,6 @@ int __ovs_vport_ops_register(struct vport_ops *ops); }) void ovs_vport_ops_unregister(struct vport_ops *ops); - -static inline struct rtable *ovs_tunnel_route_lookup(struct net *net, - const struct ip_tunnel_key *key, - u32 mark, - struct flowi4 *fl, - u8 protocol) -{ - struct rtable *rt; - - memset(fl, 0, sizeof(*fl)); - fl->daddr = key->u.ipv4.dst; - fl->saddr = key->u.ipv4.src; - fl->flowi4_tos = RT_TOS(key->tos); - fl->flowi4_mark = mark; - fl->flowi4_proto = protocol; - - rt = ip_route_output_key(net, fl); - return rt; -} - void ovs_vport_send(struct vport *vport, struct sk_buff *skb); #endif /* vport.h */ -- cgit v1.1 From 2f7066ada15c865eeab5a3f6c69dcf58d196e349 Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Sat, 9 Jan 2016 16:07:10 -0700 Subject: openvswitch: fix struct geneve_port member name commit 6b001e682e90 ("openvswitch: Use Geneve device.") The commit above introduced 'port_no' as the name for the member of struct geneve_port. The correct name should be 'dst_port' as described in the kernel doc. Let's fix that member name and all the pertinent instances so that both doc and code would be consistent. Signed-off-by: Jean Sacren Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/openvswitch/vport-geneve.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index e41cd12..30ab8e1 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -34,7 +34,7 @@ static struct vport_ops ovs_geneve_vport_ops; * @dst_port: destination port. */ struct geneve_port { - u16 port_no; + u16 dst_port; }; static inline struct geneve_port *geneve_vport(const struct vport *vport) @@ -47,7 +47,7 @@ static int geneve_get_options(const struct vport *vport, { struct geneve_port *geneve_port = geneve_vport(vport); - if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, geneve_port->port_no)) + if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, geneve_port->dst_port)) return -EMSGSIZE; return 0; } @@ -83,7 +83,7 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms) return vport; geneve_port = geneve_vport(vport); - geneve_port->port_no = dst_port; + geneve_port->dst_port = dst_port; rtnl_lock(); dev = geneve_dev_create_fb(net, parms->name, NET_NAME_USER, dst_port); -- cgit v1.1 From c5420eb12f8e26dd2951c5acc954ca4848f488cb Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Sat, 9 Jan 2016 16:07:11 -0700 Subject: openvswitch: update kernel doc for struct vport commit be4ace6e6b1b ("openvswitch: Move dev pointer into vport itself") The commit above added @dev and moved @rcu to the bottom of struct vport, but the change was not reflected in the kernel doc. So let's update the kernel doc as well. Signed-off-by: Jean Sacren Cc: Thomas Graf Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/openvswitch/vport.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index f00bb15..c10899cb 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -70,7 +70,7 @@ struct vport_portids { /** * struct vport - one port within a datapath - * @rcu: RCU callback head for deferred destruction. + * @dev: Pointer to net_device. * @dp: Datapath to which this port belongs. * @upcall_portids: RCU protected 'struct vport_portids'. * @port_no: Index into @dp's @ports array. @@ -78,6 +78,7 @@ struct vport_portids { * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. * @ops: Class structure. * @detach_list: list used for detaching vport in net-exit call. + * @rcu: RCU callback head for deferred destruction. */ struct vport { struct net_device *dev; -- cgit v1.1 From 617cfc753049a4e1e161ae5e5e00e92d56be2b90 Mon Sep 17 00:00:00 2001 From: Alexander Kuleshov Date: Sun, 10 Jan 2016 21:26:57 +0600 Subject: net/rtnetlink: remove unused sz_idx variable The sz_idx variable is defined in the rtnetlink_rcv_msg(), but not used anywhere. Let's remove it. Signed-off-by: Alexander Kuleshov Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index baf49cb..d735e85 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3351,7 +3351,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); rtnl_doit_func doit; - int sz_idx, kind; + int kind; int family; int type; int err; @@ -3367,7 +3367,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return 0; family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; - sz_idx = type>>2; kind = type&3; if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN)) -- cgit v1.1 From 781c53bc5d5628065a46c70f02f5a0450f5842f4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 Jan 2016 01:16:38 +0100 Subject: bpf: export helper function flags and reject invalid ones Export flags used by eBPF helper functions through UAPI, so they can be used by programs (instead of them redefining all flags each time or just using the hard-coded values). It also gives a better overview what flags are used where and we can further get rid of the extra macros defined in filter.c. Moreover, reject invalid flags. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 0db92b5..7c55cad 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1328,8 +1328,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) return 0; } -#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) -#define BPF_LDST_LEN 16U +#define BPF_LDST_LEN 16U static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { @@ -1340,6 +1339,9 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) char buf[BPF_LDST_LEN]; void *ptr; + if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM))) + return -EINVAL; + /* bpf verifier guarantees that: * 'from' pointer points to bpf program stack * 'len' bytes of it were initialized @@ -1359,7 +1361,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) if (unlikely(!ptr)) return -EFAULT; - if (BPF_RECOMPUTE_CSUM(flags)) + if (flags & BPF_F_RECOMPUTE_CSUM) skb_postpull_rcsum(skb, ptr, len); memcpy(ptr, from, len); @@ -1368,7 +1370,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) /* skb_store_bits cannot return -EFAULT here */ skb_store_bits(skb, offset, ptr, len); - if (BPF_RECOMPUTE_CSUM(flags)) + if (flags & BPF_F_RECOMPUTE_CSUM) skb_postpush_rcsum(skb, ptr, len); return 0; @@ -1415,15 +1417,14 @@ const struct bpf_func_proto bpf_skb_load_bytes_proto = { .arg4_type = ARG_CONST_STACK_SIZE, }; -#define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f) -#define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10) - static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; int offset = (int) r2; __sum16 sum, *ptr; + if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK))) + return -EINVAL; if (unlikely((u32) offset > 0xffff)) return -EFAULT; @@ -1435,7 +1436,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) if (unlikely(!ptr)) return -EFAULT; - switch (BPF_HEADER_FIELD_SIZE(flags)) { + switch (flags & BPF_F_HDR_FIELD_MASK) { case 2: csum_replace2(ptr, from, to); break; @@ -1467,10 +1468,12 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = { static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; - bool is_pseudo = !!BPF_IS_PSEUDO_HEADER(flags); + bool is_pseudo = flags & BPF_F_PSEUDO_HDR; int offset = (int) r2; __sum16 sum, *ptr; + if (unlikely(flags & ~(BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) + return -EINVAL; if (unlikely((u32) offset > 0xffff)) return -EFAULT; @@ -1482,7 +1485,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) if (unlikely(!ptr)) return -EFAULT; - switch (BPF_HEADER_FIELD_SIZE(flags)) { + switch (flags & BPF_F_HDR_FIELD_MASK) { case 2: inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); break; @@ -1511,13 +1514,14 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = { .arg5_type = ARG_ANYTHING, }; -#define BPF_IS_REDIRECT_INGRESS(flags) ((flags) & 1) - static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2; struct net_device *dev; + if (unlikely(flags & ~(BPF_F_INGRESS))) + return -EINVAL; + dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex); if (unlikely(!dev)) return -EINVAL; @@ -1526,7 +1530,7 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) if (unlikely(!skb2)) return -ENOMEM; - if (BPF_IS_REDIRECT_INGRESS(flags)) { + if (flags & BPF_F_INGRESS) { if (skb_at_tc_ingress(skb2)) skb_postpush_rcsum(skb2, skb_mac_header(skb2), skb2->mac_len); @@ -1553,12 +1557,17 @@ struct redirect_info { }; static DEFINE_PER_CPU(struct redirect_info, redirect_info); + static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); + if (unlikely(flags & ~(BPF_F_INGRESS))) + return TC_ACT_SHOT; + ri->ifindex = ifindex; ri->flags = flags; + return TC_ACT_REDIRECT; } @@ -1574,7 +1583,7 @@ int skb_do_redirect(struct sk_buff *skb) return -EINVAL; } - if (BPF_IS_REDIRECT_INGRESS(ri->flags)) { + if (ri->flags & BPF_F_INGRESS) { if (skb_at_tc_ingress(skb)) skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len); -- cgit v1.1 From c6c33454072fc9fe961e2b25f22a619e4fa98838 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 Jan 2016 01:16:39 +0100 Subject: bpf: support ipv6 for bpf_skb_{set,get}_tunnel_key After IPv6 support has recently been added to metadata dst and related encaps, add support for populating/reading it from an eBPF program. Commit d3aa45ce6b ("bpf: add helpers to access tunnel metadata") started with initial IPv4-only support back then (due to IPv6 metadata support not being available yet). To stay compatible with older programs, we need to test for the passed structure size. Also TOS and TTL support from the ip_tunnel_info key has been added. Tested with vxlan devs in collect meta data mode with IPv4, IPv6 and in compat mode over different network namespaces. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 7c55cad..77cdfb4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1680,19 +1680,49 @@ bool bpf_helper_changes_skb_data(void *func) return false; } +static unsigned short bpf_tunnel_key_af(u64 flags) +{ + return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET; +} + static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) { struct sk_buff *skb = (struct sk_buff *) (long) r1; struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2; - struct ip_tunnel_info *info = skb_tunnel_info(skb); + const struct ip_tunnel_info *info = skb_tunnel_info(skb); + u8 compat[sizeof(struct bpf_tunnel_key)]; - if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags || !info)) - return -EINVAL; - if (ip_tunnel_info_af(info) != AF_INET) + if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) return -EINVAL; + if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) + return -EPROTO; + if (unlikely(size != sizeof(struct bpf_tunnel_key))) { + switch (size) { + case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): + /* Fixup deprecated structure layouts here, so we have + * a common path later on. + */ + if (ip_tunnel_info_af(info) != AF_INET) + return -EINVAL; + to = (struct bpf_tunnel_key *)compat; + break; + default: + return -EINVAL; + } + } to->tunnel_id = be64_to_cpu(info->key.tun_id); - to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); + to->tunnel_tos = info->key.tos; + to->tunnel_ttl = info->key.ttl; + + if (flags & BPF_F_TUNINFO_IPV6) + memcpy(to->remote_ipv6, &info->key.u.ipv6.src, + sizeof(to->remote_ipv6)); + else + to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); + + if (unlikely(size != sizeof(struct bpf_tunnel_key))) + memcpy((void *)(long) r2, to, size); return 0; } @@ -1714,10 +1744,25 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) struct sk_buff *skb = (struct sk_buff *) (long) r1; struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2; struct metadata_dst *md = this_cpu_ptr(md_dst); + u8 compat[sizeof(struct bpf_tunnel_key)]; struct ip_tunnel_info *info; - if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags)) + if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6))) return -EINVAL; + if (unlikely(size != sizeof(struct bpf_tunnel_key))) { + switch (size) { + case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): + /* Fixup deprecated structure layouts here, so we have + * a common path later on. + */ + memcpy(compat, from, size); + memset(compat + size, 0, sizeof(compat) - size); + from = (struct bpf_tunnel_key *)compat; + break; + default: + return -EINVAL; + } + } skb_dst_drop(skb); dst_hold((struct dst_entry *) md); @@ -1725,9 +1770,19 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) info = &md->u.tun_info; info->mode = IP_TUNNEL_INFO_TX; + info->key.tun_flags = TUNNEL_KEY; info->key.tun_id = cpu_to_be64(from->tunnel_id); - info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); + info->key.tos = from->tunnel_tos; + info->key.ttl = from->tunnel_ttl; + + if (flags & BPF_F_TUNINFO_IPV6) { + info->mode |= IP_TUNNEL_INFO_IPV6; + memcpy(&info->key.u.ipv6.dst, from->remote_ipv6, + sizeof(from->remote_ipv6)); + } else { + info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); + } return 0; } -- cgit v1.1 From 7aaed57c5c2890634cfadf725173c7c68ea4cb4f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Jan 2016 08:58:00 -0800 Subject: phonet: properly unshare skbs in phonet_rcv() Ivaylo Dimitrov reported a regression caused by commit 7866a621043f ("dev: add per net_device packet type chains"). skb->dev becomes NULL and we crash in __netif_receive_skb_core(). Before above commit, different kind of bugs or corruptions could happen without major crash. But the root cause is that phonet_rcv() can queue skb without checking if skb is shared or not. Many thanks to Ivaylo Dimitrov for his help, diagnosis and tests. Reported-by: Ivaylo Dimitrov Tested-by: Ivaylo Dimitrov Signed-off-by: Eric Dumazet Cc: Remi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/af_phonet.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 10d42f3..f925753 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -377,6 +377,10 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, struct sockaddr_pn sa; u16 len; + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; + /* check we have at least a full Phonet header */ if (!pskb_pull(skb, sizeof(struct phonethdr))) goto out; -- cgit v1.1 From 229394e8e62a4191d592842cf67e80c62a492937 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Tue, 12 Jan 2016 20:17:08 +0100 Subject: net: bpf: reject invalid shifts On ARM64, a BUG() is triggered in the eBPF JIT if a filter with a constant shift that can't be encoded in the immediate field of the UBFM/SBFM instructions is passed to the JIT. Since these shifts amounts, which are negative or >= regsize, are invalid, reject them in the eBPF verifier and the classic BPF filter checker, for all architectures. Signed-off-by: Rabin Vincent Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/filter.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 77cdfb4..94d2620 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -785,6 +785,11 @@ static int bpf_check_classic(const struct sock_filter *filter, if (ftest->k == 0) return -EINVAL; break; + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_K: + if (ftest->k >= 32) + return -EINVAL; + break; case BPF_LD | BPF_MEM: case BPF_LDX | BPF_MEM: case BPF_ST: -- cgit v1.1