From 827da44c61419f29ae3be198c342e2147f1a10cb Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:51:58 -0700 Subject: net: Explicitly initialize u64_stats_sync structures for lockdep In order to enable lockdep on seqcount/seqlock structures, we must explicitly initialize any locks. The u64_stats_sync structure, uses a seqcount, and thus we need to introduce a u64_stats_init() function and use it to initialize the structure. This unfortunately adds a lot of fairly trivial initialization code to a number of drivers. But the benefit of ensuring correctness makes this worth while. Because these changes are required for lockdep to be enabled, and the changes are quite trivial, I've not yet split this patch out into 30-some separate patches, as I figured it would be better to get the various maintainers thoughts on how to best merge this change along with the seqcount lockdep enablement. Feedback would be appreciated! Signed-off-by: John Stultz Acked-by: Julian Anastasov Signed-off-by: Peter Zijlstra Cc: Alexey Kuznetsov Cc: "David S. Miller" Cc: Eric Dumazet Cc: Hideaki YOSHIFUJI Cc: James Morris Cc: Jesse Gross Cc: Mathieu Desnoyers Cc: "Michael S. Tsirkin" Cc: Mirko Lindner Cc: Patrick McHardy Cc: Roger Luethi Cc: Rusty Russell Cc: Simon Horman Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Thomas Petazzoni Cc: Wensong Zhang Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-2-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- net/ipv6/addrconf.c | 14 ++++++++++++++ net/ipv6/af_inet6.c | 14 ++++++++++++++ net/ipv6/ip6_gre.c | 15 +++++++++++++++ net/ipv6/ip6_tunnel.c | 7 +++++++ net/ipv6/sit.c | 15 +++++++++++++++ 5 files changed, 65 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cd3fb30..d62d589 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -281,10 +281,24 @@ static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp, static int snmp6_alloc_dev(struct inet6_dev *idev) { + int i; + if (snmp_mib_init((void __percpu **)idev->stats.ipv6, sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip; + + for_each_possible_cpu(i) { + struct ipstats_mib *addrconf_stats; + addrconf_stats = per_cpu_ptr(idev->stats.ipv6[0], i); + u64_stats_init(&addrconf_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + addrconf_stats = per_cpu_ptr(idev->stats.ipv6[1], i); + u64_stats_init(&addrconf_stats->syncp); +#endif + } + + idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device), GFP_KERNEL); if (!idev->stats.icmpv6dev) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7c96100..a8f8559 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -719,6 +719,8 @@ static void ipv6_packet_cleanup(void) static int __net_init ipv6_init_mibs(struct net *net) { + int i; + if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, sizeof(struct udp_mib), __alignof__(struct udp_mib)) < 0) @@ -731,6 +733,18 @@ static int __net_init ipv6_init_mibs(struct net *net) sizeof(struct ipstats_mib), __alignof__(struct ipstats_mib)) < 0) goto err_ip_mib; + + for_each_possible_cpu(i) { + struct ipstats_mib *af_inet6_stats; + af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[0], i); + u64_stats_init(&af_inet6_stats->syncp); +#if SNMP_ARRAY_SZ == 2 + af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[1], i); + u64_stats_init(&af_inet6_stats->syncp); +#endif + } + + if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, sizeof(struct icmpv6_mib), __alignof__(struct icmpv6_mib)) < 0) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index bf4a9a0..8acb286 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1252,6 +1252,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev) static int ip6gre_tunnel_init(struct net_device *dev) { struct ip6_tnl *tunnel; + int i; tunnel = netdev_priv(dev); @@ -1269,6 +1270,13 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6gre_tunnel_stats; + ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6gre_tunnel_stats->syncp); + } + + return 0; } @@ -1449,6 +1457,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { struct ip6_tnl *tunnel; + int i; tunnel = netdev_priv(dev); @@ -1462,6 +1471,12 @@ static int ip6gre_tap_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6gre_tap_stats; + ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6gre_tap_stats->syncp); + } + return 0; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 583b77e..df1fa58 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1494,12 +1494,19 @@ static inline int ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); + int i; t->dev = dev; t->net = dev_net(dev); dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + + for_each_possible_cpu(i) { + struct pcpu_tstats *ip6_tnl_stats; + ip6_tnl_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ip6_tnl_stats->syncp); + } return 0; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1926945..365dc54 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1310,6 +1310,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) static int ipip6_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); + int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1322,6 +1323,12 @@ static int ipip6_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + for_each_possible_cpu(i) { + struct pcpu_tstats *ipip6_tunnel_stats; + ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ipip6_tunnel_stats->syncp); + } + return 0; } @@ -1331,6 +1338,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); + int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1344,6 +1352,13 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) dev->tstats = alloc_percpu(struct pcpu_tstats); if (!dev->tstats) return -ENOMEM; + + for_each_possible_cpu(i) { + struct pcpu_tstats *ipip6_fb_stats; + ipip6_fb_stats = per_cpu_ptr(dev->tstats, i); + u64_stats_init(&ipip6_fb_stats->syncp); + } + dev_hold(dev); rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); return 0; -- cgit v1.1 From 5ac68e7c34a4797aa4ca9615e5a6603bda1abe9b Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 7 Oct 2013 15:52:01 -0700 Subject: ipv6: Fix possible ipv6 seqlock deadlock While enabling lockdep on seqlocks, I ran across the warning below caused by the ipv6 stats being updated in both irq and non-irq context. This patch changes from IP6_INC_STATS_BH to IP6_INC_STATS (suggested by Eric Dumazet) to resolve this problem. [ 11.120383] ================================= [ 11.121024] [ INFO: inconsistent lock state ] [ 11.121663] 3.12.0-rc1+ #68 Not tainted [ 11.122229] --------------------------------- [ 11.122867] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 11.123741] init/4483 [HC0[0]:SC1[3]:HE1:SE0] takes: [ 11.124505] (&stats->syncp.seq#6){+.?...}, at: [] ndisc_send_ns+0xe2/0x130 [ 11.125736] {SOFTIRQ-ON-W} state was registered at: [ 11.126447] [] __lock_acquire+0x5c7/0x1af0 [ 11.127222] [] lock_acquire+0x96/0xd0 [ 11.127925] [] write_seqcount_begin+0x33/0x40 [ 11.128766] [] ip6_dst_lookup_tail+0x3a3/0x460 [ 11.129582] [] ip6_dst_lookup_flow+0x2e/0x80 [ 11.130014] [] ip6_datagram_connect+0x150/0x4e0 [ 11.130014] [] inet_dgram_connect+0x25/0x70 [ 11.130014] [] SYSC_connect+0xa1/0xc0 [ 11.130014] [] SyS_connect+0x11/0x20 [ 11.130014] [] SyS_socketcall+0x12b/0x300 [ 11.130014] [] syscall_call+0x7/0xb [ 11.130014] irq event stamp: 1184 [ 11.130014] hardirqs last enabled at (1184): [] local_bh_enable+0x71/0x110 [ 11.130014] hardirqs last disabled at (1183): [] local_bh_enable+0x3d/0x110 [ 11.130014] softirqs last enabled at (0): [] copy_process.part.42+0x45d/0x11a0 [ 11.130014] softirqs last disabled at (1147): [] irq_exit+0xa5/0xb0 [ 11.130014] [ 11.130014] other info that might help us debug this: [ 11.130014] Possible unsafe locking scenario: [ 11.130014] [ 11.130014] CPU0 [ 11.130014] ---- [ 11.130014] lock(&stats->syncp.seq#6); [ 11.130014] [ 11.130014] lock(&stats->syncp.seq#6); [ 11.130014] [ 11.130014] *** DEADLOCK *** [ 11.130014] [ 11.130014] 3 locks held by init/4483: [ 11.130014] #0: (rcu_read_lock){.+.+..}, at: [] SyS_setpriority+0x4c/0x620 [ 11.130014] #1: (((&ifa->dad_timer))){+.-...}, at: [] call_timer_fn+0x0/0xf0 [ 11.130014] #2: (rcu_read_lock){.+.+..}, at: [] ndisc_send_skb+0x54/0x5d0 [ 11.130014] [ 11.130014] stack backtrace: [ 11.130014] CPU: 0 PID: 4483 Comm: init Not tainted 3.12.0-rc1+ #68 [ 11.130014] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 11.130014] 00000000 00000000 c55e5c10 c1bb0e71 c57128b0 c55e5c4c c1badf79 c1ec1123 [ 11.130014] c1ec1484 00001183 00000000 00000000 00000001 00000003 00000001 00000000 [ 11.130014] c1ec1484 00000004 c5712dcc 00000000 c55e5c84 c10de492 00000004 c10755f2 [ 11.130014] Call Trace: [ 11.130014] [] dump_stack+0x4b/0x66 [ 11.130014] [] print_usage_bug+0x1d3/0x1dd [ 11.130014] [] mark_lock+0x282/0x2f0 [ 11.130014] [] ? kvm_clock_read+0x22/0x30 [ 11.130014] [] ? check_usage_backwards+0x150/0x150 [ 11.130014] [] __lock_acquire+0x584/0x1af0 [ 11.130014] [] ? sched_clock_cpu+0xef/0x190 [ 11.130014] [] ? mark_held_locks+0x8c/0xf0 [ 11.130014] [] lock_acquire+0x96/0xd0 [ 11.130014] [] ? ndisc_send_ns+0xe2/0x130 [ 11.130014] [] ndisc_send_skb+0x293/0x5d0 [ 11.130014] [] ? ndisc_send_ns+0xe2/0x130 [ 11.130014] [] ndisc_send_ns+0xe2/0x130 [ 11.130014] [] ? mod_timer+0xf2/0x160 [ 11.130014] [] ? addrconf_dad_timer+0xce/0x150 [ 11.130014] [] addrconf_dad_timer+0x10a/0x150 [ 11.130014] [] ? addrconf_dad_completed+0x1c0/0x1c0 [ 11.130014] [] call_timer_fn+0x73/0xf0 [ 11.130014] [] ? __internal_add_timer+0xb0/0xb0 [ 11.130014] [] ? addrconf_dad_completed+0x1c0/0x1c0 [ 11.130014] [] run_timer_softirq+0x141/0x1e0 [ 11.130014] [] ? __do_softirq+0x70/0x1b0 [ 11.130014] [] __do_softirq+0xc0/0x1b0 [ 11.130014] [] irq_exit+0xa5/0xb0 [ 11.130014] [] smp_apic_timer_interrupt+0x35/0x50 [ 11.130014] [] apic_timer_interrupt+0x32/0x38 [ 11.130014] [] ? SyS_setpriority+0xfd/0x620 [ 11.130014] [] ? lock_release+0x9/0x240 [ 11.130014] [] ? SyS_setpriority+0xe7/0x620 [ 11.130014] [] ? _raw_read_unlock+0x1d/0x30 [ 11.130014] [] SyS_setpriority+0x111/0x620 [ 11.130014] [] ? SyS_setpriority+0x4c/0x620 [ 11.130014] [] syscall_call+0x7/0xb Signed-off-by: John Stultz Acked-by: Eric Dumazet Signed-off-by: Peter Zijlstra Cc: Alexey Kuznetsov Cc: "David S. Miller" Cc: Hideaki YOSHIFUJI Cc: James Morris Cc: Mathieu Desnoyers Cc: Patrick McHardy Cc: Steven Rostedt Cc: netdev@vger.kernel.org Link: http://lkml.kernel.org/r/1381186321-4906-5-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 91fb4e8..ad138b1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -909,7 +909,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, out_err_release: if (err == -ENETUNREACH) - IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); dst_release(*dst); *dst = NULL; return err; -- cgit v1.1