From a7f1884554b81bd68cd435d72f09a3527629ac43 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 12 May 2016 14:43:54 +0200 Subject: netfilter: nfnetlink_queue: fix timestamp attribute Since 4.4 we erronously use timestamp of the netlink skb (which is zero). Bugzilla: https://bugzilla.netfilter.org/show_bug.cgi?id=1066 Fixes: b28b1e826f818c30ea7 ("netfilter: nfnetlink_queue: use y2038 safe timestamp") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index cb5b630..e34256a 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -499,7 +499,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (entskb->tstamp.tv64) { struct nfqnl_msg_packet_timestamp ts; - struct timespec64 kts = ktime_to_timespec64(skb->tstamp); + struct timespec64 kts = ktime_to_timespec64(entskb->tstamp); ts.sec = cpu_to_be64(kts.tv_sec); ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC); -- cgit v1.1 From 275ae411e56f8f900fa364da29c4706f9af4e1f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20Stehl=C3=A9?= Date: Tue, 24 May 2016 16:53:49 +0200 Subject: perf/x86/intel/rapl: Fix pmus free during cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On rapl cleanup path, kfree() is given by mistake the address of the pointer of the structure to free (rapl_pmus->pmus + i). Pass the pointer instead (rapl_pmus->pmus[i]). Fixes: 9de8d686955b "perf/x86/intel/rapl: Convert it to a per package facility" Signed-off-by: Vincent Stehlé Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1464101629-14905-1-git-send-email-vincent.stehle@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/events/intel/rapl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 99c4bab..e30eef4 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -714,7 +714,7 @@ static void cleanup_rapl_pmus(void) int i; for (i = 0; i < rapl_pmus->maxpkg; i++) - kfree(rapl_pmus->pmus + i); + kfree(rapl_pmus->pmus[i]); kfree(rapl_pmus); } -- cgit v1.1 From 720b287d8382616a29524dd98264ac1e3a069e9f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 13 May 2016 22:48:51 +0200 Subject: netfilter: conntrack: remove leftover binary sysctl define Users got removed in f8572d8f2a2ba ("sysctl net: Remove unused binary sysctl code"). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0f1a45b..2933db3 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -489,8 +489,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { { } }; -#define NET_NF_CONNTRACK_MAX 2089 - static struct ctl_table nf_ct_netfilter_table[] = { { .procname = "nf_conntrack_max", -- cgit v1.1 From dc3ee32e96d74dd6c80eed63af5065cb75899299 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 13 May 2016 21:18:52 -0500 Subject: netfilter: nf_queue: Make the queue_handler pernet Florian Weber reported: > Under full load (unshare() in loop -> OOM conditions) we can > get kernel panic: > > BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 > IP: [] nfqnl_nf_hook_drop+0x35/0x70 > [..] > task: ffff88012dfa3840 ti: ffff88012dffc000 task.ti: ffff88012dffc000 > RIP: 0010:[] [] nfqnl_nf_hook_drop+0x35/0x70 > RSP: 0000:ffff88012dfffd80 EFLAGS: 00010206 > RAX: 0000000000000008 RBX: ffffffff81add0c0 RCX: ffff88013fd80000 > [..] > Call Trace: > [] nf_queue_nf_hook_drop+0x18/0x20 > [] nf_unregister_net_hook+0xdb/0x150 > [] netfilter_net_exit+0x2f/0x60 > [] ops_exit_list.isra.4+0x38/0x60 > [] setup_net+0xc2/0x120 > [] copy_net_ns+0x79/0x120 > [] create_new_namespaces+0x11b/0x1e0 > [] unshare_nsproxy_namespaces+0x57/0xa0 > [] SyS_unshare+0x1b2/0x340 > [] entry_SYSCALL_64_fastpath+0x1e/0xa8 > Code: 65 00 48 89 e5 41 56 41 55 41 54 53 83 e8 01 48 8b 97 70 12 00 00 48 98 49 89 f4 4c 8b 74 c2 18 4d 8d 6e 08 49 81 c6 88 00 00 00 <49> 8b 5d 00 48 85 db 74 1a 48 89 df 4c 89 e2 48 c7 c6 90 68 47 > The simple fix for this requires a new pernet variable for struct nf_queue that indicates when it is safe to use the dynamically allocated nf_queue state. As we need a variable anyway make nf_register_queue_handler and nf_unregister_queue_handler pernet. This allows the existing logic of when it is safe to use the state from the nfnetlink_queue module to be reused with no changes except for making it per net. The syncrhonize_rcu from nf_unregister_queue_handler is moved to a new function nfnl_queue_net_exit_batch so that the worst case of having a syncrhonize_rcu in the pernet exit path is not experienced in batch mode. Reported-by: Florian Westphal Signed-off-by: "Eric W. Biederman" Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 4 ++-- include/net/netns/netfilter.h | 2 ++ net/netfilter/nf_queue.c | 17 ++++++++--------- net/netfilter/nfnetlink_queue.c | 18 ++++++++++++------ 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 9c5638a..0dbce55 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -28,8 +28,8 @@ struct nf_queue_handler { struct nf_hook_ops *ops); }; -void nf_register_queue_handler(const struct nf_queue_handler *qh); -void nf_unregister_queue_handler(void); +void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); +void nf_unregister_queue_handler(struct net *net); void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); void nf_queue_entry_get_refs(struct nf_queue_entry *entry); diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 38aa498..36d7235 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -5,11 +5,13 @@ struct proc_dir_entry; struct nf_logger; +struct nf_queue_handler; struct netns_nf { #if defined CONFIG_PROC_FS struct proc_dir_entry *proc_netfilter; #endif + const struct nf_queue_handler __rcu *queue_handler; const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO]; #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 5baa8e2..b19ad20 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -26,23 +26,21 @@ * Once the queue is registered it must reinject all packets it * receives, no matter what. */ -static const struct nf_queue_handler __rcu *queue_handler __read_mostly; /* return EBUSY when somebody else is registered, return EEXIST if the * same handler is registered, return 0 in case of success. */ -void nf_register_queue_handler(const struct nf_queue_handler *qh) +void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh) { /* should never happen, we only have one queueing backend in kernel */ - WARN_ON(rcu_access_pointer(queue_handler)); - rcu_assign_pointer(queue_handler, qh); + WARN_ON(rcu_access_pointer(net->nf.queue_handler)); + rcu_assign_pointer(net->nf.queue_handler, qh); } EXPORT_SYMBOL(nf_register_queue_handler); /* The caller must flush their queue before this */ -void nf_unregister_queue_handler(void) +void nf_unregister_queue_handler(struct net *net) { - RCU_INIT_POINTER(queue_handler, NULL); - synchronize_rcu(); + RCU_INIT_POINTER(net->nf.queue_handler, NULL); } EXPORT_SYMBOL(nf_unregister_queue_handler); @@ -103,7 +101,7 @@ void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops) const struct nf_queue_handler *qh; rcu_read_lock(); - qh = rcu_dereference(queue_handler); + qh = rcu_dereference(net->nf.queue_handler); if (qh) qh->nf_hook_drop(net, ops); rcu_read_unlock(); @@ -122,9 +120,10 @@ int nf_queue(struct sk_buff *skb, struct nf_queue_entry *entry = NULL; const struct nf_afinfo *afinfo; const struct nf_queue_handler *qh; + struct net *net = state->net; /* QUEUE == DROP if no one is waiting, to be safe. */ - qh = rcu_dereference(queue_handler); + qh = rcu_dereference(net->nf.queue_handler); if (!qh) { status = -ESRCH; goto err; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index e34256a..309ac02 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1377,21 +1377,29 @@ static int __net_init nfnl_queue_net_init(struct net *net) net->nf.proc_netfilter, &nfqnl_file_ops)) return -ENOMEM; #endif + nf_register_queue_handler(net, &nfqh); return 0; } static void __net_exit nfnl_queue_net_exit(struct net *net) { + nf_unregister_queue_handler(net); #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); #endif } +static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list) +{ + synchronize_rcu(); +} + static struct pernet_operations nfnl_queue_net_ops = { - .init = nfnl_queue_net_init, - .exit = nfnl_queue_net_exit, - .id = &nfnl_queue_net_id, - .size = sizeof(struct nfnl_queue_net), + .init = nfnl_queue_net_init, + .exit = nfnl_queue_net_exit, + .exit_batch = nfnl_queue_net_exit_batch, + .id = &nfnl_queue_net_id, + .size = sizeof(struct nfnl_queue_net), }; static int __init nfnetlink_queue_init(void) @@ -1412,7 +1420,6 @@ static int __init nfnetlink_queue_init(void) } register_netdevice_notifier(&nfqnl_dev_notifier); - nf_register_queue_handler(&nfqh); return status; cleanup_netlink_notifier: @@ -1424,7 +1431,6 @@ out: static void __exit nfnetlink_queue_fini(void) { - nf_unregister_queue_handler(); unregister_netdevice_notifier(&nfqnl_dev_notifier); nfnetlink_subsys_unregister(&nfqnl_subsys); netlink_unregister_notifier(&nfqnl_rtnl_notifier); -- cgit v1.1 From 38272dc4f1b17437871b786d567e1242d0904f5a Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 24 May 2016 09:21:27 +0000 Subject: perf symbols: Check kptr_restrict for root If kptr_restrict is set to 2, even root is not allowed to see pointers. This patch checks kptr_restrict even if euid == 0. For root, report error if kptr_restrict is 2. Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1464081688-167940-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 20f9cb3..54c4ff2 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1933,17 +1933,17 @@ int setup_intlist(struct intlist **list, const char *list_str, static bool symbol__read_kptr_restrict(void) { bool value = false; + FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r"); - if (geteuid() != 0) { - FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r"); - if (fp != NULL) { - char line[8]; + if (fp != NULL) { + char line[8]; - if (fgets(line, sizeof(line), fp) != NULL) - value = atoi(line) != 0; + if (fgets(line, sizeof(line), fp) != NULL) + value = (geteuid() != 0) ? + (atoi(line) != 0) : + (atoi(line) == 2); - fclose(fp); - } + fclose(fp); } return value; -- cgit v1.1 From 3dc6c1d54ff4cc9ce7e8513c286c970304cde20b Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 24 May 2016 09:21:28 +0000 Subject: perf record: Fix crash when kptr is restricted Before this patch, a simple 'perf record' could fail if kptr_restrict is set to 1 (for normal user) or 2 (for root): # perf record ls WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted, check /proc/sys/kernel/kptr_restrict. Samples in kernel functions may not be resolved if a suitable vmlinux file is not found in the buildid cache or in the vmlinux path. Samples in kernel modules won't be resolved at all. If some relocation was applied (e.g. kexec) symbols may be misresolved even with a suitable vmlinux or kallsyms file. Segmentation fault (core dumped) This patch skips perf_event__synthesize_kernel_mmap() when kptr is not available. Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Fixes: 45e90056904b ("perf machine: Do not bail out if not managing to read ref reloc symbol") Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1464081688-167940-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f6fcc68..9b141f1 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -673,6 +673,8 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, int err; union perf_event *event; + if (symbol_conf.kptr_restrict) + return -1; if (map == NULL) return -1; -- cgit v1.1 From 5ea5888b2fbf5b230da62b2a21c8247bebb6c9cf Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 27 May 2016 11:35:51 +0000 Subject: perf ctf: Convert invalid chars in a string before set value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We observed some crazy apps on Android set their comm to unprintable string. For example: # cat /proc/10607/task/*/comm tencent.qqmusic ... Binder_2 日志输出线 <-- Chinese word 'log output thread' WifiManager ... 'perf data convert' fails to convert perf.data with such string to CTF format. For example: # cat << EOF > ./badguy.c #include int main(int argc, char *argv[]) { prctl(PR_SET_NAME, "\xe6\x97\xa5\xe5\xbf\x97\xe8\xbe\x93\xe5\x87\xba\xe7\xba\xbf"); while(1) sleep(1); return 0; } EOF # gcc ./badguy.c # perf record -e sched:* ./a.out # perf data convert --to-ctf ./bad.ctf CTF stream 4 flush failed [ perf data convert: Converted 'perf.data' into CTF data './bad.ctf' ] [ perf data convert: Converted and wrote 0.008 MB (78 samples) ] # babeltrace ./bad.ctf/ [error] Packet size (18446744073709551615 bits) is larger than remaining file size (262144 bits). [error] Stream index creation error. [error] Open file stream error. [warning] [Context] Cannot open_trace of format ctf at path ./bad.ctf. [warning] [Context] cannot open trace "./bad.ctf" from ./bad.ctf/ for reading. [error] Cannot open any trace for reading. [error] opening trace "./bad.ctf/" for reading. [error] none of the specified trace paths could be opened. This patch converts unprintable characters to hexadecimal word. After applying this patch the above test works correctly: # ~/perf data convert --to-ctf ./good.ctf [ perf data convert: Converted 'perf.data' into CTF data './good.ctf' ] [ perf data convert: Converted and wrote 0.008 MB (78 samples) ] # babeltrace ./good.ctf .. [23:14:35.491665268] (+0.000001100) sched:sched_wakeup: { cpu_id = 4 }, { perf_ip = 0xFFFFFFFF810AEF33, perf_tid = 0, perf_pid = 0, perf_id = 5123, perf_period = 1, common_type = 270, common_flags = 45, common_preempt_count = 4, common_pid = 0, comm = "\xe6\x97\xa5\xe5\xbf\x97\xe8\xbe\x93\xe5\x87\xba\xe7\xba\xbf", pid = 1057, prio = 120, success = 1, target_cpu = 4 } [23:14:35.491666230] (+0.000000962) sched:sched_wakeup: { cpu_id = 4 }, { perf_ip = 0xFFFFFFFF810AEF33, perf_tid = 0, perf_pid = 0, perf_id = 5122, perf_period = 1, common_type = 270, common_flags = 45, common_preempt_count = 4, common_pid = 0, comm = "\xe6\x97\xa5\xe5\xbf\x97\xe8\xbe\x93\xe5\x87\xba\xe7\xba\xbf", pid = 1057, prio = 120, success = 1, target_cpu = 4 } .. Committer note: To build perf with libabeltrace, use: $ mkdir -p /tmp/build/perf $ make LIBBABELTRACE=1 LIBBABELTRACE_DIR=/usr/local O=/tmp/build/perf -C tools/perf install-bin Or equivalent (no O=, fixup LIBBABELTRACE_DIR, etc). Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1464348951-179595-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data-convert-bt.c | 41 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index bbf69d2..9f53020 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -204,6 +204,44 @@ static unsigned long long adjust_signedness(unsigned long long value_int, int si return (value_int & value_mask) | ~value_mask; } +static int string_set_value(struct bt_ctf_field *field, const char *string) +{ + char *buffer = NULL; + size_t len = strlen(string), i, p; + int err; + + for (i = p = 0; i < len; i++, p++) { + if (isprint(string[i])) { + if (!buffer) + continue; + buffer[p] = string[i]; + } else { + char numstr[5]; + + snprintf(numstr, sizeof(numstr), "\\x%02x", + (unsigned int)(string[i]) & 0xff); + + if (!buffer) { + buffer = zalloc(i + (len - i) * 4 + 2); + if (!buffer) { + pr_err("failed to set unprintable string '%s'\n", string); + return bt_ctf_field_string_set_value(field, "UNPRINTABLE-STRING"); + } + if (i > 0) + strncpy(buffer, string, i); + } + strncat(buffer + p, numstr, 4); + p += 3; + } + } + + if (!buffer) + return bt_ctf_field_string_set_value(field, string); + err = bt_ctf_field_string_set_value(field, buffer); + free(buffer); + return err; +} + static int add_tracepoint_field_value(struct ctf_writer *cw, struct bt_ctf_event_class *event_class, struct bt_ctf_event *event, @@ -270,8 +308,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, } if (flags & FIELD_IS_STRING) - ret = bt_ctf_field_string_set_value(field, - data + offset + i * len); + ret = string_set_value(field, data + offset + i * len); else { unsigned long long value_int; -- cgit v1.1 From 94abd778a7bb00ed5d00f56d9fbfcbf5b7c02a5c Mon Sep 17 00:00:00 2001 From: Jaap Jan Meijer Date: Thu, 12 May 2016 18:25:08 +0200 Subject: brcmfmac: add fallback for devices that do not report per-chain values If brcmf_cfg80211_get_station fails to determine the RSSI from the per-chain values get the value individually as a fallback. Fixes: 1f0dc59a6de9 ("brcmfmac: rework .get_station() callback") Signed-off-by: Jaap Jan Meijer Acked-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index d0631b6..62f475e 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -2540,12 +2540,14 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev, const u8 *mac, struct station_info *sinfo) { struct brcmf_if *ifp = netdev_priv(ndev); + struct brcmf_scb_val_le scb_val; s32 err = 0; struct brcmf_sta_info_le sta_info_le; u32 sta_flags; u32 is_tdls_peer; s32 total_rssi; s32 count_rssi; + int rssi; u32 i; brcmf_dbg(TRACE, "Enter, MAC %pM\n", mac); @@ -2629,6 +2631,20 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev, sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL); total_rssi /= count_rssi; sinfo->signal = total_rssi; + } else if (test_bit(BRCMF_VIF_STATUS_CONNECTED, + &ifp->vif->sme_state)) { + memset(&scb_val, 0, sizeof(scb_val)); + err = brcmf_fil_cmd_data_get(ifp, BRCMF_C_GET_RSSI, + &scb_val, sizeof(scb_val)); + if (err) { + brcmf_err("Could not get rssi (%d)\n", err); + goto done; + } else { + rssi = le32_to_cpu(scb_val.val); + sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL); + sinfo->signal = rssi; + brcmf_dbg(CONN, "RSSI %d dBm\n", rssi); + } } } done: -- cgit v1.1 From de26859dcf363d520cc44e59f6dcaf20ebe0aadf Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 21 May 2016 11:50:35 -0500 Subject: rtlwifi: Fix scheduling while atomic error from commit 49f86ec21c01 Commit 49f86ec21c01 ("rtlwifi: Change long delays to sleeps") was correct for most cases; however, driver rtl8192ce calls the affected routines while in atomic context. The kernel bug output is as follows: BUG: scheduling while atomic: wpa_supplicant/627/0x00000002 [...] [] __schedule+0x899/0xad0 [] schedule+0x3c/0x90 [] schedule_hrtimeout_range_clock+0xa2/0x120 [] ? hrtimer_init+0x120/0x120 [] ? schedule_hrtimeout_range_clock+0x96/0x120 [] schedule_hrtimeout_range+0x13/0x20 [] usleep_range+0x4f/0x70 [] rtl_rfreg_delay+0x38/0x50 [rtlwifi] [] rtl92c_phy_config_rf_with_headerfile+0xc7/0xe0 [rtl8192ce] To fix this bug, three of the changes from delay to sleep are reverted. Unfortunately, one of the changes involves a delay of 50 msec. The calling code will be modified so that this long delay can be avoided; however, this change is being pushed now to fix the problem in kernel 4.6.0. Fixes: 49f86ec21c01 ("rtlwifi: Change long delays to sleeps") Reported-by: James Feeney Signed-off-by: Larry Finger Cc: James Feeney Cc: Stable [4.6+] Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 0f48048..3a0faa8 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -54,7 +54,7 @@ EXPORT_SYMBOL(channel5g_80m); void rtl_addr_delay(u32 addr) { if (addr == 0xfe) - msleep(50); + mdelay(50); else if (addr == 0xfd) msleep(5); else if (addr == 0xfc) @@ -75,7 +75,7 @@ void rtl_rfreg_delay(struct ieee80211_hw *hw, enum radio_path rfpath, u32 addr, rtl_addr_delay(addr); } else { rtl_set_rfreg(hw, rfpath, addr, mask, data); - usleep_range(1, 2); + udelay(1); } } EXPORT_SYMBOL(rtl_rfreg_delay); @@ -86,7 +86,7 @@ void rtl_bb_delay(struct ieee80211_hw *hw, u32 addr, u32 data) rtl_addr_delay(addr); } else { rtl_set_bbreg(hw, addr, MASKDWORD, data); - usleep_range(1, 2); + udelay(1); } } EXPORT_SYMBOL(rtl_bb_delay); -- cgit v1.1 From 2547476a5e4061f6addb88d5fc837d3a950f54c4 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Sat, 21 May 2016 13:45:35 +0200 Subject: Fix typos Signed-off-by: Andrea Gelmini Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 2 +- arch/arc/include/asm/entry-compact.h | 4 ++-- arch/arc/include/asm/mmu_context.h | 2 +- arch/arc/include/asm/pgtable.h | 2 +- arch/arc/include/asm/processor.h | 2 +- arch/arc/include/asm/smp.h | 2 +- arch/arc/include/asm/thread_info.h | 2 +- arch/arc/include/asm/uaccess.h | 2 +- arch/arc/include/uapi/asm/swab.h | 2 +- arch/arc/kernel/perf_event.c | 2 +- arch/arc/kernel/setup.c | 2 +- arch/arc/kernel/signal.c | 2 +- arch/arc/kernel/troubleshoot.c | 2 +- arch/arc/mm/cache.c | 6 +++--- arch/arc/mm/dma.c | 2 +- 15 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 02fabef..d4df6be 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -127,7 +127,7 @@ libs-y += arch/arc/lib/ $(LIBGCC) boot := arch/arc/boot -#default target for make without any arguements. +#default target for make without any arguments. KBUILD_IMAGE := bootpImage all: $(KBUILD_IMAGE) diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index e0e1faf0..14c310f 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -76,8 +76,8 @@ * We need to be a bit more cautious here. What if a kernel bug in * L1 ISR, caused SP to go whaco (some small value which looks like * USER stk) and then we take L2 ISR. - * Above brlo alone would treat it as a valid L1-L2 sceanrio - * instead of shouting alound + * Above brlo alone would treat it as a valid L1-L2 scenario + * instead of shouting around * The only feasible way is to make sure this L2 happened in * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in * L1 ISR before it switches stack diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h index 1fd467e..b0b87f2 100644 --- a/arch/arc/include/asm/mmu_context.h +++ b/arch/arc/include/asm/mmu_context.h @@ -83,7 +83,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm) local_flush_tlb_all(); /* - * Above checke for rollover of 8 bit ASID in 32 bit container. + * Above check for rollover of 8 bit ASID in 32 bit container. * If the container itself wrapped around, set it to a non zero * "generation" to distinguish from no context */ diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 034bbdc..858f98e 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -47,7 +47,7 @@ * Page Tables are purely for Linux VM's consumption and the bits below are * suited to that (uniqueness). Hence some are not implemented in the TLB and * some have different value in TLB. - * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible becoz they live in + * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible because they live in * seperate PD0 and PD1, which combined forms a translation entry) * while for PTE perspective, they are 8 and 9 respectively * with MMU v3: Most bits (except SHARED) represent the exact hardware pos diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index f9048994..16b630f 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -78,7 +78,7 @@ struct task_struct; #define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp) /* - * Where abouts of Task's sp, fp, blink when it was last seen in kernel mode. + * Where about of Task's sp, fp, blink when it was last seen in kernel mode. * Look in process.c for details of kernel stack layout */ #define TSK_K_ESP(tsk) (tsk->thread.ksp) diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index 9913804..89fdd1b 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -86,7 +86,7 @@ static inline const char *arc_platform_smp_cpuinfo(void) * (1) These insn were introduced only in 4.10 release. So for older released * support needed. * - * (2) In a SMP setup, the LLOCK/SCOND atomiticity across CPUs needs to be + * (2) In a SMP setup, the LLOCK/SCOND atomicity across CPUs needs to be * gaurantted by the platform (not something which core handles). * Assuming a platform won't, SMP Linux needs to use spinlocks + local IRQ * disabling for atomicity. diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index 3af6745..2d79e52 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -103,7 +103,7 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void) /* * _TIF_ALLWORK_MASK includes SYSCALL_TRACE, but we don't need it. - * SYSCALL_TRACE is anways seperately/unconditionally tested right after a + * SYSCALL_TRACE is anyway seperately/unconditionally tested right after a * syscall, so all that reamins to be tested is _TIF_WORK_MASK */ diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index d1da603..a78d567 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -32,7 +32,7 @@ #define __kernel_ok (segment_eq(get_fs(), KERNEL_DS)) /* - * Algorthmically, for __user_ok() we want do: + * Algorithmically, for __user_ok() we want do: * (start < TASK_SIZE) && (start+len < TASK_SIZE) * where TASK_SIZE could either be retrieved from thread_info->addr_limit or * emitted directly in code. diff --git a/arch/arc/include/uapi/asm/swab.h b/arch/arc/include/uapi/asm/swab.h index 095599a..71f3918 100644 --- a/arch/arc/include/uapi/asm/swab.h +++ b/arch/arc/include/uapi/asm/swab.h @@ -74,7 +74,7 @@ __tmp ^ __in; \ }) -#elif (ARC_BSWAP_TYPE == 2) /* Custom single cycle bwap instruction */ +#elif (ARC_BSWAP_TYPE == 2) /* Custom single cycle bswap instruction */ #define __arch_swab32(x) \ ({ \ diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 6fd4802..08f03d9 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -108,7 +108,7 @@ static void arc_perf_event_update(struct perf_event *event, int64_t delta = new_raw_count - prev_raw_count; /* - * We don't afaraid of hwc->prev_count changing beneath our feet + * We aren't afraid of hwc->prev_count changing beneath our feet * because there's no way for us to re-enter this function anytime. */ local64_set(&hwc->prev_count, new_raw_count); diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index f63b8bf..2ee7a4d 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -392,7 +392,7 @@ void __init setup_arch(char **cmdline_p) /* * If we are here, it is established that @uboot_arg didn't * point to DT blob. Instead if u-boot says it is cmdline, - * Appent to embedded DT cmdline. + * append to embedded DT cmdline. * setup_machine_fdt() would have populated @boot_command_line */ if (uboot_tag == 1) { diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 004b7f0..6cb3736 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -34,7 +34,7 @@ * -ViXS were still seeing crashes when using insmod to load drivers. * It turned out that the code to change Execute permssions for TLB entries * of user was not guarded for interrupts (mod_tlb_permission) - * This was cauing TLB entries to be overwritten on unrelated indexes + * This was causing TLB entries to be overwritten on unrelated indexes * * Vineetg: July 15th 2008: Bug #94183 * -Exception happens in Delay slot of a JMP, and before user space resumes, diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index a6f91e8..934150e 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -276,7 +276,7 @@ static int tlb_stats_open(struct inode *inode, struct file *file) return 0; } -/* called on user read(): display the couters */ +/* called on user read(): display the counters */ static ssize_t tlb_stats_output(struct file *file, /* file descriptor */ char __user *user_buf, /* user buffer */ size_t len, /* length of buffer */ diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 9e5eddb..5a294b2 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -215,7 +215,7 @@ slc_chk: * ------------------ * This ver of MMU supports variable page sizes (1k-16k): although Linux will * only support 8k (default), 16k and 4k. - * However from hardware perspective, smaller page sizes aggrevate aliasing + * However from hardware perspective, smaller page sizes aggravate aliasing * meaning more vaddr bits needed to disambiguate the cache-line-op ; * the existing scheme of piggybacking won't work for certain configurations. * Two new registers IC_PTAG and DC_PTAG inttoduced. @@ -302,7 +302,7 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, /* * This is technically for MMU v4, using the MMU v3 programming model - * Special work for HS38 aliasing I-cache configuratino with PAE40 + * Special work for HS38 aliasing I-cache configuration with PAE40 * - upper 8 bits of paddr need to be written into PTAG_HI * - (and needs to be written before the lower 32 bits) * Note that PTAG_HI is hoisted outside the line loop @@ -936,7 +936,7 @@ void arc_cache_init(void) ic->ver, CONFIG_ARC_MMU_VER); /* - * In MMU v4 (HS38x) the alising icache config uses IVIL/PTAG + * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG * pair to provide vaddr/paddr respectively, just as in MMU v3 */ if (is_isa_arcv2() && ic->alias) diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 8c8e36f..73d7e4c 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -10,7 +10,7 @@ * DMA Coherent API Notes * * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is - * implemented by accessintg it using a kernel virtual address, with + * implemented by accessing it using a kernel virtual address, with * Cache bit off in the TLB entry. * * The default DMA address == Phy address which is 0x8000_0000 based. -- cgit v1.1 From 49acadff2a0cb4f7ff4efe0fb6c23f5fad81a3b3 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 25 May 2016 15:11:29 +0300 Subject: arc: Get rid of root core-frequency property Now when we switched to usage of real clk devices for CPU core frequency those root properties make no sense any longer. Se we're just getting rid of them here to not confuse readers of our .dts files. Signed-off-by: Alexey Brodkin Cc: Christian Ruppert Cc: Noam Camus Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/abilis_tb100.dtsi | 2 -- arch/arc/boot/dts/abilis_tb101.dtsi | 2 -- arch/arc/boot/dts/axc001.dtsi | 1 - arch/arc/boot/dts/axc003.dtsi | 1 - arch/arc/boot/dts/axc003_idu.dtsi | 1 - arch/arc/boot/dts/eznps.dts | 1 - arch/arc/boot/dts/nsim_700.dts | 1 - arch/arc/boot/dts/nsimosci.dts | 1 - arch/arc/boot/dts/nsimosci_hs.dts | 1 - arch/arc/boot/dts/nsimosci_hs_idu.dts | 1 - arch/arc/boot/dts/skeleton.dtsi | 1 - arch/arc/boot/dts/skeleton_hs.dtsi | 1 - arch/arc/boot/dts/skeleton_hs_idu.dtsi | 1 - arch/arc/boot/dts/vdk_axc003.dtsi | 1 - arch/arc/boot/dts/vdk_axc003_idu.dtsi | 1 - 15 files changed, 17 deletions(-) diff --git a/arch/arc/boot/dts/abilis_tb100.dtsi b/arch/arc/boot/dts/abilis_tb100.dtsi index 3942634..02410b2 100644 --- a/arch/arc/boot/dts/abilis_tb100.dtsi +++ b/arch/arc/boot/dts/abilis_tb100.dtsi @@ -23,8 +23,6 @@ / { - clock-frequency = <500000000>; /* 500 MHZ */ - soc100 { bus-frequency = <166666666>; diff --git a/arch/arc/boot/dts/abilis_tb101.dtsi b/arch/arc/boot/dts/abilis_tb101.dtsi index b046722..f9e7686 100644 --- a/arch/arc/boot/dts/abilis_tb101.dtsi +++ b/arch/arc/boot/dts/abilis_tb101.dtsi @@ -23,8 +23,6 @@ / { - clock-frequency = <500000000>; /* 500 MHZ */ - soc100 { bus-frequency = <166666666>; diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi index 3e02f15..6ae2c47 100644 --- a/arch/arc/boot/dts/axc001.dtsi +++ b/arch/arc/boot/dts/axc001.dtsi @@ -15,7 +15,6 @@ / { compatible = "snps,arc"; - clock-frequency = <750000000>; /* 750 MHZ */ #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index 378e455..14df46f 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -14,7 +14,6 @@ / { compatible = "snps,arc"; - clock-frequency = <90000000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 64c94b2..3d6cfa3 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -14,7 +14,6 @@ / { compatible = "snps,arc"; - clock-frequency = <90000000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/eznps.dts b/arch/arc/boot/dts/eznps.dts index b89f6c3..1e0d225 100644 --- a/arch/arc/boot/dts/eznps.dts +++ b/arch/arc/boot/dts/eznps.dts @@ -18,7 +18,6 @@ / { compatible = "ezchip,arc-nps"; - clock-frequency = <83333333>; /* 83.333333 MHZ */ #address-cells = <1>; #size-cells = <1>; interrupt-parent = <&intc>; diff --git a/arch/arc/boot/dts/nsim_700.dts b/arch/arc/boot/dts/nsim_700.dts index 5d5e373..6397051 100644 --- a/arch/arc/boot/dts/nsim_700.dts +++ b/arch/arc/boot/dts/nsim_700.dts @@ -11,7 +11,6 @@ / { compatible = "snps,nsim"; - clock-frequency = <80000000>; /* 80 MHZ */ #address-cells = <1>; #size-cells = <1>; interrupt-parent = <&core_intc>; diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts index b5b060a..763d66c 100644 --- a/arch/arc/boot/dts/nsimosci.dts +++ b/arch/arc/boot/dts/nsimosci.dts @@ -11,7 +11,6 @@ / { compatible = "snps,nsimosci"; - clock-frequency = <20000000>; /* 20 MHZ */ #address-cells = <1>; #size-cells = <1>; interrupt-parent = <&core_intc>; diff --git a/arch/arc/boot/dts/nsimosci_hs.dts b/arch/arc/boot/dts/nsimosci_hs.dts index 325e730..4eb97c5 100644 --- a/arch/arc/boot/dts/nsimosci_hs.dts +++ b/arch/arc/boot/dts/nsimosci_hs.dts @@ -11,7 +11,6 @@ / { compatible = "snps,nsimosci_hs"; - clock-frequency = <20000000>; /* 20 MHZ */ #address-cells = <1>; #size-cells = <1>; interrupt-parent = <&core_intc>; diff --git a/arch/arc/boot/dts/nsimosci_hs_idu.dts b/arch/arc/boot/dts/nsimosci_hs_idu.dts index ee03d71..853f897 100644 --- a/arch/arc/boot/dts/nsimosci_hs_idu.dts +++ b/arch/arc/boot/dts/nsimosci_hs_idu.dts @@ -11,7 +11,6 @@ / { compatible = "snps,nsimosci_hs"; - clock-frequency = <5000000>; /* 5 MHZ */ #address-cells = <1>; #size-cells = <1>; interrupt-parent = <&core_intc>; diff --git a/arch/arc/boot/dts/skeleton.dtsi b/arch/arc/boot/dts/skeleton.dtsi index 3a10cc6..65808fe 100644 --- a/arch/arc/boot/dts/skeleton.dtsi +++ b/arch/arc/boot/dts/skeleton.dtsi @@ -13,7 +13,6 @@ / { compatible = "snps,arc"; - clock-frequency = <80000000>; /* 80 MHZ */ #address-cells = <1>; #size-cells = <1>; chosen { }; diff --git a/arch/arc/boot/dts/skeleton_hs.dtsi b/arch/arc/boot/dts/skeleton_hs.dtsi index 71fd308..2dfe803 100644 --- a/arch/arc/boot/dts/skeleton_hs.dtsi +++ b/arch/arc/boot/dts/skeleton_hs.dtsi @@ -8,7 +8,6 @@ / { compatible = "snps,arc"; - clock-frequency = <80000000>; /* 80 MHZ */ #address-cells = <1>; #size-cells = <1>; chosen { }; diff --git a/arch/arc/boot/dts/skeleton_hs_idu.dtsi b/arch/arc/boot/dts/skeleton_hs_idu.dtsi index d1cb25a..4c11079 100644 --- a/arch/arc/boot/dts/skeleton_hs_idu.dtsi +++ b/arch/arc/boot/dts/skeleton_hs_idu.dtsi @@ -8,7 +8,6 @@ / { compatible = "snps,arc"; - clock-frequency = <80000000>; /* 80 MHZ */ #address-cells = <1>; #size-cells = <1>; chosen { }; diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi index ad4ee43..0fd6ba9 100644 --- a/arch/arc/boot/dts/vdk_axc003.dtsi +++ b/arch/arc/boot/dts/vdk_axc003.dtsi @@ -14,7 +14,6 @@ / { compatible = "snps,arc"; - clock-frequency = <50000000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi index a3cb626..82214cd 100644 --- a/arch/arc/boot/dts/vdk_axc003_idu.dtsi +++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi @@ -15,7 +15,6 @@ / { compatible = "snps,arc"; - clock-frequency = <50000000>; #address-cells = <1>; #size-cells = <1>; -- cgit v1.1 From b7a8daa9f3d1688e994f5557577d3252c94ec282 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 14 May 2016 22:19:53 +0900 Subject: netfilter: nf_ct_helper: Fix helper unregister count. helpers should unregister the only registered ports. but, helper cannot have correct registered ports value when failed to register. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_ftp.c | 1 + net/netfilter/nf_conntrack_irc.c | 1 + net/netfilter/nf_conntrack_sane.c | 1 + net/netfilter/nf_conntrack_sip.c | 1 + net/netfilter/nf_conntrack_tftp.c | 1 + 5 files changed, 5 insertions(+) diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 883c691..19efeba 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -632,6 +632,7 @@ static int __init nf_conntrack_ftp_init(void) if (ret) { pr_err("failed to register helper for pf: %d port: %d\n", ftp[i][j].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_ftp_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 8b6da27..f97ac61 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -271,6 +271,7 @@ static int __init nf_conntrack_irc_init(void) if (ret) { pr_err("failed to register helper for pf: %u port: %u\n", irc[i].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_irc_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 7523a57..3fcbaab 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -223,6 +223,7 @@ static int __init nf_conntrack_sane_init(void) if (ret) { pr_err("failed to register helper for pf: %d port: %d\n", sane[i][j].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_sane_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 3e06402..f72ba55 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1669,6 +1669,7 @@ static int __init nf_conntrack_sip_init(void) if (ret) { pr_err("failed to register helper for pf: %u port: %u\n", sip[i][j].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_sip_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 36f9640..2e65b543 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -142,6 +142,7 @@ static int __init nf_conntrack_tftp_init(void) if (ret) { pr_err("failed to register helper for pf: %u port: %u\n", tftp[i][j].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_tftp_fini(); return ret; } -- cgit v1.1 From 83170f3beccccd7ceb4f9a0ac0c4dc736afde90c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 26 May 2016 19:08:10 +0200 Subject: netfilter: nf_dup_ipv6: set again FLOWI_FLAG_KNOWN_NH at flowi6_flags With the commit 48e8aa6e3137 ("ipv6: Set FLOWI_FLAG_KNOWN_NH at flowi6_flags") ip6_pol_route() callers were asked to to set the FLOWI_FLAG_KNOWN_NH properly and xt_TEE was updated accordingly, but with the later refactor in commit bbde9fc1824a ("netfilter: factor out packet duplication for IPv4/IPv6") the flowi6_flags update was lost. This commit re-add it just before the routing decision. Fixes: bbde9fc1824a ("netfilter: factor out packet duplication for IPv4/IPv6") Signed-off-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_dup_ipv6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index 6989c70..4a84b5a 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -33,6 +33,7 @@ static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb, fl6.daddr = *gw; fl6.flowlabel = (__force __be32)(((iph->flow_lbl[0] & 0xF) << 16) | (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]); + fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { dst_release(dst); -- cgit v1.1 From eaa2bcd6d1d410a52df8c7b05e76d86c0319b7b0 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Fri, 27 May 2016 13:34:04 -0400 Subject: netfilter: nf_tables: validate NFTA_SET_TABLE parameter If the NFTA_SET_TABLE parameter is missing and the NLM_F_DUMP flag is not set, then a NULL pointer dereference is triggered in nf_tables_set_lookup because ctx.table is NULL. Signed-off-by: Phil Turnbull Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2011977..6947e25 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2641,6 +2641,8 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, /* Only accept unspec with dump */ if (nfmsg->nfgen_family == NFPROTO_UNSPEC) return -EAFNOSUPPORT; + if (!nla[NFTA_SET_TABLE]) + return -EINVAL; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) -- cgit v1.1 From e69e7e03ed225abf3e1c43545aa3bcb68dc81d5f Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Mon, 30 May 2016 15:58:28 +0800 Subject: ALSA: hda/realtek - ALC256 speaker noise issue That is some different register for ALC255 and ALC256. ALC256 can't fit with some ALC255 register. This issue is cause from LDO output voltage control. This patch is updated the right LDO register value. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 52 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d53c25e..daf4f64f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3618,13 +3618,20 @@ static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec, static void alc_headset_mode_unplugged(struct hda_codec *codec) { static struct coef_fw coef0255[] = { - WRITE_COEF(0x1b, 0x0c0b), /* LDO and MISC control */ WRITE_COEF(0x45, 0xd089), /* UAJ function set to menual mode */ UPDATE_COEFEX(0x57, 0x05, 1<<14, 0), /* Direct Drive HP Amp control(Set to verb control)*/ WRITE_COEF(0x06, 0x6104), /* Set MIC2 Vref gate with HP */ WRITE_COEFEX(0x57, 0x03, 0x8aa6), /* Direct Drive HP Amp control */ {} }; + static struct coef_fw coef0255_1[] = { + WRITE_COEF(0x1b, 0x0c0b), /* LDO and MISC control */ + {} + }; + static struct coef_fw coef0256[] = { + WRITE_COEF(0x1b, 0x0c4b), /* LDO and MISC control */ + {} + }; static struct coef_fw coef0233[] = { WRITE_COEF(0x1b, 0x0c0b), WRITE_COEF(0x45, 0xc429), @@ -3677,7 +3684,11 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0255: + alc_process_coef_fw(codec, coef0255_1); + alc_process_coef_fw(codec, coef0255); + break; case 0x10ec0256: + alc_process_coef_fw(codec, coef0256); alc_process_coef_fw(codec, coef0255); break; case 0x10ec0233: @@ -3896,6 +3907,12 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) WRITE_COEFEX(0x57, 0x03, 0x8ea6), {} }; + static struct coef_fw coef0256[] = { + WRITE_COEF(0x45, 0xd489), /* Set to CTIA type */ + WRITE_COEF(0x1b, 0x0c6b), + WRITE_COEFEX(0x57, 0x03, 0x8ea6), + {} + }; static struct coef_fw coef0233[] = { WRITE_COEF(0x45, 0xd429), WRITE_COEF(0x1b, 0x0c2b), @@ -3936,9 +3953,11 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0255: - case 0x10ec0256: alc_process_coef_fw(codec, coef0255); break; + case 0x10ec0256: + alc_process_coef_fw(codec, coef0256); + break; case 0x10ec0233: case 0x10ec0283: alc_process_coef_fw(codec, coef0233); @@ -3978,6 +3997,12 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) WRITE_COEFEX(0x57, 0x03, 0x8ea6), {} }; + static struct coef_fw coef0256[] = { + WRITE_COEF(0x45, 0xe489), /* Set to OMTP Type */ + WRITE_COEF(0x1b, 0x0c6b), + WRITE_COEFEX(0x57, 0x03, 0x8ea6), + {} + }; static struct coef_fw coef0233[] = { WRITE_COEF(0x45, 0xe429), WRITE_COEF(0x1b, 0x0c2b), @@ -4018,9 +4043,11 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0255: - case 0x10ec0256: alc_process_coef_fw(codec, coef0255); break; + case 0x10ec0256: + alc_process_coef_fw(codec, coef0256); + break; case 0x10ec0233: case 0x10ec0283: alc_process_coef_fw(codec, coef0233); @@ -4266,7 +4293,7 @@ static void alc_fixup_headset_mode_no_hp_mic(struct hda_codec *codec, static void alc255_set_default_jack_type(struct hda_codec *codec) { /* Set to iphone type */ - static struct coef_fw fw[] = { + static struct coef_fw alc255fw[] = { WRITE_COEF(0x1b, 0x880b), WRITE_COEF(0x45, 0xd089), WRITE_COEF(0x1b, 0x080b), @@ -4274,7 +4301,22 @@ static void alc255_set_default_jack_type(struct hda_codec *codec) WRITE_COEF(0x1b, 0x0c0b), {} }; - alc_process_coef_fw(codec, fw); + static struct coef_fw alc256fw[] = { + WRITE_COEF(0x1b, 0x884b), + WRITE_COEF(0x45, 0xd089), + WRITE_COEF(0x1b, 0x084b), + WRITE_COEF(0x46, 0x0004), + WRITE_COEF(0x1b, 0x0c4b), + {} + }; + switch (codec->core.vendor_id) { + case 0x10ec0255: + alc_process_coef_fw(codec, alc255fw); + break; + case 0x10ec0256: + alc_process_coef_fw(codec, alc256fw); + break; + } msleep(30); } -- cgit v1.1 From 6fbae35a3170c3e2b1b9d7b9cc943cbe48771362 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Mon, 30 May 2016 16:44:20 +0800 Subject: ALSA: hda/realtek - Add support for new codecs ALC700/ALC701/ALC703 Support new codecs for ALC700/ALC701/ALC703. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index daf4f64f..7960316 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -346,6 +346,9 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0234: case 0x10ec0274: case 0x10ec0294: + case 0x10ec0700: + case 0x10ec0701: + case 0x10ec0703: alc_update_coef_idx(codec, 0x10, 1<<15, 0); break; case 0x10ec0662: @@ -2655,6 +2658,7 @@ enum { ALC269_TYPE_ALC256, ALC269_TYPE_ALC225, ALC269_TYPE_ALC294, + ALC269_TYPE_ALC700, }; /* @@ -2686,6 +2690,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec) case ALC269_TYPE_ALC256: case ALC269_TYPE_ALC225: case ALC269_TYPE_ALC294: + case ALC269_TYPE_ALC700: ssids = alc269_ssids; break; default: @@ -6095,6 +6100,14 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0294: spec->codec_variant = ALC269_TYPE_ALC294; break; + case 0x10ec0700: + case 0x10ec0701: + case 0x10ec0703: + spec->codec_variant = ALC269_TYPE_ALC700; + spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */ + alc_update_coef_idx(codec, 0x4a, 0, 1 << 15); /* Combo jack auto trigger control */ + break; + } if (snd_hda_codec_read(codec, 0x51, 0, AC_VERB_PARAMETERS, 0) == 0x10ec5505) { @@ -7050,6 +7063,9 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0670, "ALC670", patch_alc662), HDA_CODEC_ENTRY(0x10ec0671, "ALC671", patch_alc662), HDA_CODEC_ENTRY(0x10ec0680, "ALC680", patch_alc680), + HDA_CODEC_ENTRY(0x10ec0700, "ALC700", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0701, "ALC701", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0703, "ALC703", patch_alc269), HDA_CODEC_ENTRY(0x10ec0867, "ALC891", patch_alc882), HDA_CODEC_ENTRY(0x10ec0880, "ALC880", patch_alc880), HDA_CODEC_ENTRY(0x10ec0882, "ALC882", patch_alc882), -- cgit v1.1 From 60f2b4b8af548150cc56bf6fd213e47897964794 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 30 May 2016 19:21:22 +0530 Subject: ARC: [intc-compact] simplify code for 2 priority levels ARC700 support for 2 interrupt priorities historically allowed even slow perpherals such as emac and uart to setup high priority interrupts which was wrong from the beginning as they could possibly delay the more critical timer interrupt. The hardware support for 2 level interrupts in ARCompact is less than ideal anyways (judging from the "hacks" in low level entry code and thus is not used in productions systems I know of. So reduce the scope of this to timer only, thereby reducing a bunch of complexity. Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 22 ++-------------------- arch/arc/kernel/entry-compact.S | 18 ++---------------- arch/arc/kernel/intc-compact.c | 6 ++---- 3 files changed, 6 insertions(+), 40 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 0dcbacf..b14826a 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -186,9 +186,6 @@ if SMP config ARC_HAS_COH_CACHES def_bool n -config ARC_HAS_REENTRANT_IRQ_LV2 - def_bool n - config ARC_MCIP bool "ARConnect Multicore IP (MCIP) Support " depends on ISA_ARCV2 @@ -366,25 +363,10 @@ config NODES_SHIFT if ISA_ARCOMPACT config ARC_COMPACT_IRQ_LEVELS - bool "ARCompact IRQ Priorities: High(2)/Low(1)" + bool "Setup Timer IRQ as high Priority" default n - # Timer HAS to be high priority, for any other high priority config - select ARC_IRQ3_LV2 # if SMP, LV2 enabled ONLY if ARC implementation has LV2 re-entrancy - depends on !SMP || ARC_HAS_REENTRANT_IRQ_LV2 - -if ARC_COMPACT_IRQ_LEVELS - -config ARC_IRQ3_LV2 - bool - -config ARC_IRQ5_LV2 - bool - -config ARC_IRQ6_LV2 - bool - -endif #ARC_COMPACT_IRQ_LEVELS + depends on !SMP config ARC_FPU_SAVE_RESTORE bool "Enable FPU state persistence across context switch" diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S index 0cb0aba..98812c1 100644 --- a/arch/arc/kernel/entry-compact.S +++ b/arch/arc/kernel/entry-compact.S @@ -91,27 +91,13 @@ VECTOR mem_service ; 0x8, Mem exception (0x1) VECTOR instr_service ; 0x10, Instrn Error (0x2) ; ******************** Device ISRs ********************** -#ifdef CONFIG_ARC_IRQ3_LV2 -VECTOR handle_interrupt_level2 -#else -VECTOR handle_interrupt_level1 -#endif - -VECTOR handle_interrupt_level1 - -#ifdef CONFIG_ARC_IRQ5_LV2 -VECTOR handle_interrupt_level2 -#else -VECTOR handle_interrupt_level1 -#endif - -#ifdef CONFIG_ARC_IRQ6_LV2 +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS VECTOR handle_interrupt_level2 #else VECTOR handle_interrupt_level1 #endif -.rept 25 +.rept 28 VECTOR handle_interrupt_level1 ; Other devices .endr diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c index c5cceca..ce9deb9 100644 --- a/arch/arc/kernel/intc-compact.c +++ b/arch/arc/kernel/intc-compact.c @@ -28,10 +28,8 @@ void arc_init_IRQ(void) { int level_mask = 0; - /* setup any high priority Interrupts (Level2 in ARCompact jargon) */ - level_mask |= IS_ENABLED(CONFIG_ARC_IRQ3_LV2) << 3; - level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5; - level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6; + /* Is timer high priority Interrupt (Level2 in ARCompact jargon) */ + level_mask |= IS_ENABLED(CONFIG_ARC_COMPACT_IRQ_LEVELS) << TIMER0_IRQ; /* * Write to register, even if no LV2 IRQs configured to reset it -- cgit v1.1 From d140b9bfcad9e53f1da67ad09dd5092b44d55c7b Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 31 May 2016 11:46:47 +0530 Subject: ARC: don't enable DISCONTIGMEM unconditionally Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index b14826a..be9d0b5 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -61,7 +61,7 @@ config RWSEM_GENERIC_SPINLOCK def_bool y config ARCH_DISCONTIGMEM_ENABLE - def_bool y + def_bool n config ARCH_FLATMEM_ENABLE def_bool y @@ -453,7 +453,7 @@ config LINUX_LINK_BASE config HIGHMEM bool "High Memory Support" - select DISCONTIGMEM + select ARCH_DISCONTIGMEM_ENABLE help With ARC 2G:2G address split, only upper 2G is directly addressable by kernel. Enable this to potentially allow access to rest of 2G and PAE -- cgit v1.1 From 893e093c786c4256d52809eed697e9d70a6f6643 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 24 May 2016 11:23:51 +0200 Subject: netfilter: nf_ct_helper: bail out on duplicated helpers Don't allow registration of helpers using the same tuple: { l3proto, l4proto, src-port } We lookup for the helper from the packet path using this tuple through __nf_ct_helper_find(). Therefore, we have to avoid having two helpers with the same tuple to ensure predictible behaviour. Don't compare the helper string names anymore since it is valid to register two helpers with the same name, but using different tuples. This is also implicitly fixing up duplicated helper registration via ports= modparam since the name comparison was defeating the tuple duplication validation. Reported-by: Feng Gao Reported-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 3b40ec5..48de9be 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -361,9 +361,10 @@ EXPORT_SYMBOL_GPL(nf_ct_helper_log); int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { - int ret = 0; - struct nf_conntrack_helper *cur; + struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; unsigned int h = helper_hash(&me->tuple); + struct nf_conntrack_helper *cur; + int ret = 0; BUG_ON(me->expect_policy == NULL); BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); @@ -371,9 +372,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) mutex_lock(&nf_ct_helper_mutex); hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { - if (strncmp(cur->name, me->name, NF_CT_HELPER_NAME_LEN) == 0 && - cur->tuple.src.l3num == me->tuple.src.l3num && - cur->tuple.dst.protonum == me->tuple.dst.protonum) { + if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) { ret = -EEXIST; goto out; } -- cgit v1.1 From 62397da50bb20a6b812c949ef465d7e69fe54bb6 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Fri, 13 May 2016 12:41:48 +0200 Subject: mac80211_hwsim: Add missing check for HWSIM_ATTR_SIGNAL A wmediumd that does not send this attribute causes a NULL pointer dereference, as the attribute is accessed even if it does not exist. The attribute was required but never checked ever since userspace frame forwarding has been introduced. The issue gets more problematic once we allow wmediumd registration from user namespaces. Cc: stable@vger.kernel.org Fixes: 7882513bacb1 ("mac80211_hwsim driver support userspace frame tx/rx") Signed-off-by: Martin Willi Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 9ed0ed1..4dd5adc 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2776,6 +2776,7 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, if (!info->attrs[HWSIM_ATTR_ADDR_TRANSMITTER] || !info->attrs[HWSIM_ATTR_FLAGS] || !info->attrs[HWSIM_ATTR_COOKIE] || + !info->attrs[HWSIM_ATTR_SIGNAL] || !info->attrs[HWSIM_ATTR_TX_INFO]) goto out; -- cgit v1.1 From fe7a7c57629e8dcbc0e297363a9b2366d67a6dc5 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sun, 15 May 2016 13:19:16 -0400 Subject: mac80211: mesh: flush mesh paths unconditionally Currently, the mesh paths associated with a nexthop station are cleaned up in the following code path: __sta_info_destroy_part1 synchronize_net() __sta_info_destroy_part2 -> cleanup_single_sta -> mesh_sta_cleanup -> mesh_plink_deactivate -> mesh_path_flush_by_nexthop However, there are a couple of problems here: 1) the paths aren't flushed at all if the MPM is running in userspace (e.g. when using wpa_supplicant or authsae) 2) there is no synchronize_rcu between removing the path and readers accessing the nexthop, which means the following race is possible: CPU0 CPU1 ~~~~ ~~~~ sta_info_destroy_part1() synchronize_net() rcu_read_lock() mesh_nexthop_resolve() mpath = mesh_path_lookup() [...] -> mesh_path_flush_by_nexthop() sta = rcu_dereference( mpath->next_hop) kfree(sta) access sta <-- CRASH Fix both of these by unconditionally flushing paths before destroying the sta, and by adding a synchronize_net() after path flush to ensure no active readers can still dereference the sta. Fixes this crash: [ 348.529295] BUG: unable to handle kernel paging request at 00020040 [ 348.530014] IP: [] ieee80211_mps_set_frame_flags+0x40/0xaa [mac80211] [ 348.530014] *pde = 00000000 [ 348.530014] Oops: 0000 [#1] PREEMPT [ 348.530014] Modules linked in: drbg ansi_cprng ctr ccm ppp_generic slhc ipt_MASQUERADE nf_nat_masquerade_ipv4 8021q ] [ 348.530014] CPU: 0 PID: 20597 Comm: wget Tainted: G O 4.6.0-rc5-wt=V1 #1 [ 348.530014] Hardware name: To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080016 11/07/2014 [ 348.530014] task: f64fa280 ti: f4f9c000 task.ti: f4f9c000 [ 348.530014] EIP: 0060:[] EFLAGS: 00010246 CPU: 0 [ 348.530014] EIP is at ieee80211_mps_set_frame_flags+0x40/0xaa [mac80211] [ 348.530014] EAX: f4ce63e0 EBX: 00000088 ECX: f3788416 EDX: 00020008 [ 348.530014] ESI: 00000000 EDI: 00000088 EBP: f6409a4c ESP: f6409a40 [ 348.530014] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068 [ 348.530014] CR0: 80050033 CR2: 00020040 CR3: 33190000 CR4: 00000690 [ 348.530014] Stack: [ 348.530014] 00000000 f4ce63e0 f5f9bd80 f6409a64 f9291d80 0000ce67 f5d51e00 f4ce63e0 [ 348.530014] f3788416 f6409a80 f9291dc1 f4ce8320 f4ce63e0 f5d51e00 f4ce63e0 f4ce8320 [ 348.530014] f6409a98 f9277f6f 00000000 00000000 0000007c 00000000 f6409b2c f9278dd1 [ 348.530014] Call Trace: [ 348.530014] [] mesh_nexthop_lookup+0xbb/0xc8 [mac80211] [ 348.530014] [] mesh_nexthop_resolve+0x34/0xd8 [mac80211] [ 348.530014] [] ieee80211_xmit+0x92/0xc1 [mac80211] [ 348.530014] [] __ieee80211_subif_start_xmit+0x807/0x83c [mac80211] [ 348.530014] [] ? sch_direct_xmit+0xd7/0x1b3 [ 348.530014] [] ? __local_bh_enable_ip+0x5d/0x7b [ 348.530014] [] ? nf_nat_ipv4_out+0x4c/0xd0 [nf_nat_ipv4] [ 348.530014] [] ? iptable_nat_ipv4_fn+0xf/0xf [iptable_nat] [ 348.530014] [] ? netif_skb_features+0x14d/0x30a [ 348.530014] [] ieee80211_subif_start_xmit+0xa/0xe [mac80211] [ 348.530014] [] dev_hard_start_xmit+0x1f8/0x267 [ 348.530014] [] ? validate_xmit_skb.isra.120.part.121+0x10/0x253 [ 348.530014] [] sch_direct_xmit+0x8b/0x1b3 [ 348.530014] [] __dev_queue_xmit+0x2c8/0x513 [ 348.530014] [] dev_queue_xmit+0xa/0xc [ 348.530014] [] batadv_send_skb_packet+0xd6/0xec [batman_adv] [ 348.530014] [] batadv_send_unicast_skb+0x15/0x4a [batman_adv] [ 348.530014] [] batadv_dat_send_data+0x27e/0x310 [batman_adv] [ 348.530014] [] ? batadv_tt_global_hash_find.isra.11+0x8/0xa [batman_adv] [ 348.530014] [] batadv_dat_snoop_outgoing_arp_request+0x208/0x23d [batman_adv] [ 348.530014] [] batadv_interface_tx+0x206/0x385 [batman_adv] [ 348.530014] [] dev_hard_start_xmit+0x1f8/0x267 [ 348.530014] [] ? validate_xmit_skb.isra.120.part.121+0x10/0x253 [ 348.530014] [] sch_direct_xmit+0x8b/0x1b3 [ 348.530014] [] __dev_queue_xmit+0x2c8/0x513 [ 348.530014] [] ? igb_xmit_frame+0x57/0x72 [igb] [ 348.530014] [] dev_queue_xmit+0xa/0xc [ 348.530014] [] br_dev_queue_push_xmit+0xeb/0xfb [bridge] [ 348.530014] [] br_forward_finish+0x29/0x74 [bridge] [ 348.530014] [] ? deliver_clone+0x3b/0x3b [bridge] [ 348.530014] [] __br_forward+0x89/0xe7 [bridge] [ 348.530014] [] ? br_dev_queue_push_xmit+0xfb/0xfb [bridge] [ 348.530014] [] deliver_clone+0x34/0x3b [bridge] [ 348.530014] [] ? br_flood+0x95/0x95 [bridge] [ 348.530014] [] br_flood+0x77/0x95 [bridge] [ 348.530014] [] br_flood_forward+0x13/0x1a [bridge] [ 348.530014] [] ? br_flood+0x95/0x95 [bridge] [ 348.530014] [] br_handle_frame_finish+0x392/0x3db [bridge] [ 348.530014] [] ? nf_iterate+0x2b/0x6b [ 348.530014] [] br_handle_frame+0x1e6/0x240 [bridge] [ 348.530014] [] ? br_handle_local_finish+0x6a/0x6a [bridge] [ 348.530014] [] __netif_receive_skb_core+0x43a/0x66b [ 348.530014] [] ? br_handle_frame_finish+0x3db/0x3db [bridge] [ 348.530014] [] ? resched_curr+0x19/0x37 [ 348.530014] [] ? check_preempt_wakeup+0xbf/0xfe [ 348.530014] [] ? ktime_get_with_offset+0x5c/0xfc [ 348.530014] [] __netif_receive_skb+0x47/0x55 [ 348.530014] [] netif_receive_skb_internal+0x40/0x5a [ 348.530014] [] napi_gro_receive+0x3a/0x94 [ 348.530014] [] igb_poll+0x6fd/0x9ad [igb] [ 348.530014] [] ? swake_up_locked+0x14/0x26 [ 348.530014] [] net_rx_action+0xde/0x250 [ 348.530014] [] __do_softirq+0x8a/0x163 [ 348.530014] [] ? __hrtimer_tasklet_trampoline+0x19/0x19 [ 348.530014] [] do_softirq_own_stack+0x26/0x2c [ 348.530014] [ 348.530014] [] irq_exit+0x31/0x6f [ 348.530014] [] do_IRQ+0x8d/0xa0 [ 348.530014] [] common_interrupt+0x2c/0x40 [ 348.530014] Code: e7 8c 00 66 81 ff 88 00 75 12 85 d2 75 0e b2 c3 b8 83 e9 29 f9 e8 a7 5f f9 c6 eb 74 66 81 e3 8c 005 [ 348.530014] EIP: [] ieee80211_mps_set_frame_flags+0x40/0xaa [mac80211] SS:ESP 0068:f6409a40 [ 348.530014] CR2: 0000000000020040 [ 348.530014] ---[ end trace 48556ac26779732e ]--- [ 348.530014] Kernel panic - not syncing: Fatal exception in interrupt [ 348.530014] Kernel Offset: disabled Cc: stable@vger.kernel.org Reported-by: Fred Veldini Tested-by: Fred Veldini Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 4c6404e..21b1fdf 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -161,6 +161,10 @@ void mesh_sta_cleanup(struct sta_info *sta) del_timer_sync(&sta->mesh->plink_timer); } + /* make sure no readers can access nexthop sta from here on */ + mesh_path_flush_by_nexthop(sta); + synchronize_net(); + if (changed) ieee80211_mbss_info_change_notify(sdata, changed); } -- cgit v1.1 From 6fe04128f158c5ad27e7504bfdf1b12e63331bc9 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 19 May 2016 17:34:38 +0200 Subject: mac80211: fix fast_tx header alignment The header field is defined as u8[] but also accessed as struct ieee80211_hdr. Enforce an alignment of 2 to prevent unnecessary unaligned accesses, which can be very harmful for performance on many platforms. Fixes: e495c24731a2 ("mac80211: extend fast-xmit for more ciphers") Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index c8b8ccc..78b0ef3 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -280,7 +280,7 @@ struct ieee80211_fast_tx { u8 sa_offs, da_offs, pn_offs; u8 band; u8 hdr[30 + 2 + IEEE80211_FAST_XMIT_MAX_IV + - sizeof(rfc1042_header)]; + sizeof(rfc1042_header)] __aligned(2); struct rcu_head rcu_head; }; -- cgit v1.1 From 0358ccc8ffd8d9b76992b8deab58fb9a721fb18a Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Tue, 31 May 2016 09:12:04 -0400 Subject: ALSA: uapi: Add three missing header files to Kbuild file include/uapi/sound/Kbuild was missing the inclusion of three header files in that directory. Signed-off-by: Robert P. J. Day Signed-off-by: Takashi Iwai --- include/uapi/sound/Kbuild | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/uapi/sound/Kbuild b/include/uapi/sound/Kbuild index a7f2770..691984c 100644 --- a/include/uapi/sound/Kbuild +++ b/include/uapi/sound/Kbuild @@ -1,5 +1,6 @@ # UAPI Header export list header-y += asequencer.h +header-y += asoc.h header-y += asound.h header-y += asound_fm.h header-y += compress_offload.h @@ -10,3 +11,5 @@ header-y += hdsp.h header-y += hdspm.h header-y += sb16_csp.h header-y += sfnt_info.h +header-y += tlv.h +header-y += usb_stream.h -- cgit v1.1 From 792293cfd516a173bbd687b4b26da0f97f97abd2 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 6 May 2016 19:26:05 +0200 Subject: drm/vc4: Fix get_vblank_counter with proper no-op for Linux 4.4+ get_vblank_counter hooked up to drm_vblank_count() which alway was non-sensical but didn't hurt in the past. Since Linux 4.4 it triggers a WARN_ON_ONCE in drm_update_vblank_count on first vblank irq disable, so fix it by hooking to drm_vblank_no_hw_counter(). Tested against Raspian kernel 4.4.8 tree on RPi 2B. Signed-off-by: Mario Kleiner Reviewed-by: Eric Anholt Fixes: c8b75bca92cb ("drm/vc4: Add KMS support for Raspberry Pi.") --- drivers/gpu/drm/vc4/vc4_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 3446ece..ef7de8e 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -91,7 +91,7 @@ static struct drm_driver vc4_drm_driver = { .enable_vblank = vc4_enable_vblank, .disable_vblank = vc4_disable_vblank, - .get_vblank_counter = drm_vblank_count, + .get_vblank_counter = drm_vblank_no_hw_counter, #if defined(CONFIG_DEBUG_FS) .debugfs_init = vc4_debugfs_init, -- cgit v1.1 From ee7c10e10b632e2319ed1d2d49d63df51a611e62 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 6 May 2016 19:26:06 +0200 Subject: drm/vc4: Fix drm_vblank_put/get imbalance in page flip path. The async page flip path was missing drm_crtc_vblank_get/put completely. The sync flip path was missing a vblank put, so async flips only reported proper pageflip completion events by chance, and vblank irq's never turned off after a first vsync'ed page flip until system reboot. Tested against Raspian kernel 4.4.8 tree on RPi 2B. Signed-off-by: Mario Kleiner Reviewed-by: Eric Anholt Fixes: b501bacc6060 ("drm/vc4: Add support for async pageflips.") --- drivers/gpu/drm/vc4/vc4_crtc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 904d075..e9befb6 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -506,6 +506,7 @@ static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc) if (vc4_crtc->event) { drm_crtc_send_vblank_event(crtc, vc4_crtc->event); vc4_crtc->event = NULL; + drm_crtc_vblank_put(crtc); } spin_unlock_irqrestore(&dev->event_lock, flags); } @@ -556,6 +557,7 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb) spin_unlock_irqrestore(&dev->event_lock, flags); } + drm_crtc_vblank_put(crtc); drm_framebuffer_unreference(flip_state->fb); kfree(flip_state); @@ -598,6 +600,8 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, return ret; } + WARN_ON(drm_crtc_vblank_get(crtc) != 0); + /* Immediately update the plane's legacy fb pointer, so that later * modeset prep sees the state that will be present when the semaphore * is released. -- cgit v1.1 From e7c31f6f25b84fed961dc0dce6248878527693ae Mon Sep 17 00:00:00 2001 From: Robert Foss Date: Tue, 3 May 2016 13:48:20 -0400 Subject: drm/vc4: Return -EBUSY if there's already a pending flip event. As per the documentation in drm_crtc.h, atomic_commit should return -EBUSY if an asynchronous update is requested and there is an earlier update pending. v2: Rebase on the s/async/nonblock/ change. Signed-off-by: Robert Foss Reviewed-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_kms.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index cb37751..861a623 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -117,10 +117,18 @@ static int vc4_atomic_commit(struct drm_device *dev, return -ENOMEM; /* Make sure that any outstanding modesets have finished. */ - ret = down_interruptible(&vc4->async_modeset); - if (ret) { - kfree(c); - return ret; + if (nonblock) { + ret = down_trylock(&vc4->async_modeset); + if (ret) { + kfree(c); + return -EBUSY; + } + } else { + ret = down_interruptible(&vc4->async_modeset); + if (ret) { + kfree(c); + return ret; + } } ret = drm_atomic_helper_prepare_planes(dev, state); -- cgit v1.1 From b52fc2183f40170489692852fd66d8750cbaa324 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 19 Apr 2016 18:36:55 -0700 Subject: vexpress/spc: Remove CLK_IS_ROOT This flag is a no-op now (see commit 47b0eeb3dc8a "clk: Deprecate CLK_IS_ROOT", 2016-02-02) so remove it. Acked-by: Sudeep Holla Signed-off-by: Stephen Boyd --- arch/arm/mach-vexpress/spc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index 5766ce2..8409cab 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -547,7 +547,7 @@ static struct clk *ve_spc_clk_register(struct device *cpu_dev) init.name = dev_name(cpu_dev); init.ops = &clk_spc_ops; - init.flags = CLK_IS_ROOT | CLK_GET_RATE_NOCACHE; + init.flags = CLK_GET_RATE_NOCACHE; init.num_parents = 0; return devm_clk_register(cpu_dev, &spc->hw); -- cgit v1.1 From 3fb9c41286e594d52d23ca58404c69a7e2c39c41 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 19 Apr 2016 18:31:12 -0700 Subject: powerpc/512x: clk: Remove CLK_IS_ROOT This flag is a no-op now (see commit 47b0eeb3dc8a "clk: Deprecate CLK_IS_ROOT", 2016-02-02) so remove it. Cc: Gerhard Sittig Signed-off-by: Stephen Boyd --- arch/powerpc/platforms/512x/clock-commonclk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c index c50ea76..6081fbd 100644 --- a/arch/powerpc/platforms/512x/clock-commonclk.c +++ b/arch/powerpc/platforms/512x/clock-commonclk.c @@ -221,7 +221,7 @@ static bool soc_has_mclk_mux0_canin(void) /* convenience wrappers around the common clk API */ static inline struct clk *mpc512x_clk_fixed(const char *name, int rate) { - return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate); + return clk_register_fixed_rate(NULL, name, NULL, 0, rate); } static inline struct clk *mpc512x_clk_factor( -- cgit v1.1 From 3c7f4f54578b983c964eb992229a7dd153ce5ee0 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 1 Jun 2016 14:52:54 -0700 Subject: clk: microchip: Remove CLK_IS_ROOT This flag is a no-op now (see commit 47b0eeb3dc8a "clk: Deprecate CLK_IS_ROOT", 2016-02-02) so remove it. Cc: Purna Chandra Mandal Cc: Ralf Baechle Cc: Signed-off-by: Stephen Boyd --- drivers/clk/microchip/clk-pic32mzda.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/clk/microchip/clk-pic32mzda.c b/drivers/clk/microchip/clk-pic32mzda.c index 020a29a..51f5438 100644 --- a/drivers/clk/microchip/clk-pic32mzda.c +++ b/drivers/clk/microchip/clk-pic32mzda.c @@ -180,15 +180,15 @@ static int pic32mzda_clk_probe(struct platform_device *pdev) /* register fixed rate clocks */ clks[POSCCLK] = clk_register_fixed_rate(&pdev->dev, "posc_clk", NULL, - CLK_IS_ROOT, 24000000); + 0, 24000000); clks[FRCCLK] = clk_register_fixed_rate(&pdev->dev, "frc_clk", NULL, - CLK_IS_ROOT, 8000000); + 0, 8000000); clks[BFRCCLK] = clk_register_fixed_rate(&pdev->dev, "bfrc_clk", NULL, - CLK_IS_ROOT, 8000000); + 0, 8000000); clks[LPRCCLK] = clk_register_fixed_rate(&pdev->dev, "lprc_clk", NULL, - CLK_IS_ROOT, 32000); + 0, 32000); clks[UPLLCLK] = clk_register_fixed_rate(&pdev->dev, "usbphy_clk", NULL, - CLK_IS_ROOT, 24000000); + 0, 24000000); /* fixed rate (optional) clock */ if (of_find_property(np, "microchip,pic32mzda-sosc", NULL)) { pr_info("pic32-clk: dt requests SOSC.\n"); -- cgit v1.1 From b9610e74586fd183b2d1c7fe5316bce8b6cc534f Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 1 Jun 2016 14:56:57 -0700 Subject: clk: Remove CLK_IS_ROOT flag Now that we've gotten rid of all the users of this flag we can retire the number, leaving a slot open for a future flag user. Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 0c72204..fb39d5a 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -25,7 +25,7 @@ #define CLK_SET_PARENT_GATE BIT(1) /* must be gated across re-parent */ #define CLK_SET_RATE_PARENT BIT(2) /* propagate rate change up one level */ #define CLK_IGNORE_UNUSED BIT(3) /* do not gate even if unused */ -#define CLK_IS_ROOT BIT(4) /* Deprecated: Don't use */ + /* unused */ #define CLK_IS_BASIC BIT(5) /* Basic clk, can't do a to_clk_foo() */ #define CLK_GET_RATE_NOCACHE BIT(6) /* do not use the cached clk rate */ #define CLK_SET_RATE_NO_REPARENT BIT(7) /* don't re-parent on rate change */ -- cgit v1.1 From ae4185cd13a3017259d30149e606f8c83857acf2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 22 May 2016 11:05:48 +0200 Subject: MAINTAINERS: Add file patterns for clock device tree bindings Submitters of device tree binding documentation may forget to CC the subsystem maintainer if this is missing. Signed-off-by: Geert Uytterhoeven Cc: Michael Turquette Cc: Stephen Boyd Cc: linux-clk@vger.kernel.org Signed-off-by: Stephen Boyd --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 7304d2e..2334995 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3086,6 +3086,7 @@ M: Stephen Boyd L: linux-clk@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git S: Maintained +F: Documentation/devicetree/bindings/clock/ F: drivers/clk/ X: drivers/clk/clkdev.c F: include/linux/clk-pr* -- cgit v1.1 From 72ad679aa7182d23d269cbe4d655f7e129d3b057 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Mon, 16 May 2016 12:45:36 -0300 Subject: clk: nxp: Select MFD_SYSCON for creg driver Commit 378523d15003 ("clk: add lpc18xx creg clk driver") added a new clock driver but missed the proper MFD_SYSCON select. Fix it. Fixes: 378523d15003 ("clk: add lpc18xx creg clk driver") Signed-off-by: Ezequiel Garcia Acked-by: Joachim Eastwood Signed-off-by: Stephen Boyd --- drivers/clk/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 53ddba2..98efbfc 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -175,6 +175,7 @@ config COMMON_CLK_KEYSTONE config COMMON_CLK_NXP def_bool COMMON_CLK && (ARCH_LPC18XX || ARCH_LPC32XX) select REGMAP_MMIO if ARCH_LPC32XX + select MFD_SYSCON if ARCH_LPC18XX ---help--- Support for clock providers on NXP platforms. -- cgit v1.1 From bc9139d23f6b038e32bcd2dffdee70a8d76b3976 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 25 May 2016 10:56:24 +1000 Subject: drm/nouveau/bios/disp: fix handling of "match any protocol" entries As it turns out, a value of 0xff means "any protocol" and not "VGA". Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h | 5 +++-- drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c | 13 +++++-------- drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c | 12 ++++-------- drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c | 8 +++++--- 4 files changed, 17 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h index db10c11..c5a6ebd 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h @@ -25,7 +25,8 @@ u16 nvbios_outp_match(struct nvkm_bios *, u16 type, u16 mask, u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_outp *); struct nvbios_ocfg { - u16 match; + u8 proto; + u8 flags; u16 clkcmp[2]; }; @@ -33,7 +34,7 @@ u16 nvbios_ocfg_entry(struct nvkm_bios *, u16 outp, u8 idx, u8 *ver, u8 *hdr, u8 *cnt, u8 *len); u16 nvbios_ocfg_parse(struct nvkm_bios *, u16 outp, u8 idx, u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *); -u16 nvbios_ocfg_match(struct nvkm_bios *, u16 outp, u16 type, +u16 nvbios_ocfg_match(struct nvkm_bios *, u16 outp, u8 proto, u8 flags, u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *); u16 nvbios_oclk_match(struct nvkm_bios *, u16 cmp, u32 khz); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c index f031466..5dd3438 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c @@ -76,6 +76,7 @@ exec_lookup(struct nv50_disp *disp, int head, int or, u32 ctrl, mask |= 0x0001 << or; mask |= 0x0100 << head; + list_for_each_entry(outp, &disp->base.outp, head) { if ((outp->info.hasht & 0xff) == type && (outp->info.hashm & mask) == mask) { @@ -155,25 +156,21 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf) if (!outp) return NULL; + *conf = (ctrl & 0x00000f00) >> 8; switch (outp->info.type) { case DCB_OUTPUT_TMDS: - *conf = (ctrl & 0x00000f00) >> 8; if (*conf == 5) *conf |= 0x0100; break; case DCB_OUTPUT_LVDS: - *conf = disp->sor.lvdsconf; - break; - case DCB_OUTPUT_DP: - *conf = (ctrl & 0x00000f00) >> 8; + *conf |= disp->sor.lvdsconf; break; - case DCB_OUTPUT_ANALOG: default: - *conf = 0x00ff; break; } - data = nvbios_ocfg_match(bios, data, *conf, &ver, &hdr, &cnt, &len, &info2); + data = nvbios_ocfg_match(bios, data, *conf & 0xff, *conf >> 8, + &ver, &hdr, &cnt, &len, &info2); if (data && id < 0xff) { data = nvbios_oclk_match(bios, info2.clkcmp[id], pclk); if (data) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c index 4226d21..fcb1b0c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c @@ -387,22 +387,17 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf) if (!outp) return NULL; + *conf = (ctrl & 0x00000f00) >> 8; if (outp->info.location == 0) { switch (outp->info.type) { case DCB_OUTPUT_TMDS: - *conf = (ctrl & 0x00000f00) >> 8; if (*conf == 5) *conf |= 0x0100; break; case DCB_OUTPUT_LVDS: - *conf = disp->sor.lvdsconf; + *conf |= disp->sor.lvdsconf; break; - case DCB_OUTPUT_DP: - *conf = (ctrl & 0x00000f00) >> 8; - break; - case DCB_OUTPUT_ANALOG: default: - *conf = 0x00ff; break; } } else { @@ -410,7 +405,8 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf) pclk = pclk / 2; } - data = nvbios_ocfg_match(bios, data, *conf, &ver, &hdr, &cnt, &len, &info2); + data = nvbios_ocfg_match(bios, data, *conf & 0xff, *conf >> 8, + &ver, &hdr, &cnt, &len, &info2); if (data && id < 0xff) { data = nvbios_oclk_match(bios, info2.clkcmp[id], pclk); if (data) { diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c index a5e9213..9efb1b4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c @@ -141,7 +141,8 @@ nvbios_ocfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx, { u16 data = nvbios_ocfg_entry(bios, outp, idx, ver, hdr, cnt, len); if (data) { - info->match = nvbios_rd16(bios, data + 0x00); + info->proto = nvbios_rd08(bios, data + 0x00); + info->flags = nvbios_rd16(bios, data + 0x01); info->clkcmp[0] = nvbios_rd16(bios, data + 0x02); info->clkcmp[1] = nvbios_rd16(bios, data + 0x04); } @@ -149,12 +150,13 @@ nvbios_ocfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx, } u16 -nvbios_ocfg_match(struct nvkm_bios *bios, u16 outp, u16 type, +nvbios_ocfg_match(struct nvkm_bios *bios, u16 outp, u8 proto, u8 flags, u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *info) { u16 data, idx = 0; while ((data = nvbios_ocfg_parse(bios, outp, idx++, ver, hdr, cnt, len, info))) { - if (info->match == type) + if ((info->proto == proto || info->proto == 0xff) && + (info->flags == flags)) break; } return data; -- cgit v1.1 From 9057c8d75018f05bbc769d7b4602de3b8b20f8aa Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 27 May 2016 12:01:27 +1000 Subject: drm/nouveau/ltc/gm107-: fix typo in the address of NV_PLTCG_LTC0_LTS0_INTR Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c | 6 +++--- drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c index e292f56..389fb13 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c @@ -69,11 +69,11 @@ gm107_ltc_zbc_clear_depth(struct nvkm_ltc *ltc, int i, const u32 depth) } static void -gm107_ltc_lts_isr(struct nvkm_ltc *ltc, int c, int s) +gm107_ltc_intr_lts(struct nvkm_ltc *ltc, int c, int s) { struct nvkm_subdev *subdev = <c->subdev; struct nvkm_device *device = subdev->device; - u32 base = 0x140000 + (c * 0x2000) + (s * 0x200); + u32 base = 0x140400 + (c * 0x2000) + (s * 0x200); u32 stat = nvkm_rd32(device, base + 0x00c); if (stat) { @@ -92,7 +92,7 @@ gm107_ltc_intr(struct nvkm_ltc *ltc) while (mask) { u32 s, c = __ffs(mask); for (s = 0; s < ltc->lts_nr; s++) - gm107_ltc_lts_isr(ltc, c, s); + gm107_ltc_intr_lts(ltc, c, s); mask &= ~(1 << c); } } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c index 2a29bfd..e18e0dc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c @@ -46,7 +46,7 @@ static const struct nvkm_ltc_func gm200_ltc = { .oneinit = gm200_ltc_oneinit, .init = gm200_ltc_init, - .intr = gm107_ltc_intr, /*XXX: not validated */ + .intr = gm107_ltc_intr, .cbc_clear = gm107_ltc_cbc_clear, .cbc_wait = gm107_ltc_cbc_wait, .zbc = 16, -- cgit v1.1 From 383d0a419f8e63e3d65e706c3c515fa9505ce364 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 1 Jun 2016 16:20:10 +1000 Subject: drm/nouveau/gr/gf100-: update sm error decoding from gk20a nvgpu headers Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 37 +++++++++++++++++++------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 9513bad..ae9ab5b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -949,22 +949,41 @@ gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc) } static const struct nvkm_enum gf100_mp_warp_error[] = { - { 0x00, "NO_ERROR" }, - { 0x01, "STACK_MISMATCH" }, + { 0x01, "STACK_ERROR" }, + { 0x02, "API_STACK_ERROR" }, + { 0x03, "RET_EMPTY_STACK_ERROR" }, + { 0x04, "PC_WRAP" }, { 0x05, "MISALIGNED_PC" }, - { 0x08, "MISALIGNED_GPR" }, - { 0x09, "INVALID_OPCODE" }, - { 0x0d, "GPR_OUT_OF_BOUNDS" }, - { 0x0e, "MEM_OUT_OF_BOUNDS" }, - { 0x0f, "UNALIGNED_MEM_ACCESS" }, + { 0x06, "PC_OVERFLOW" }, + { 0x07, "MISALIGNED_IMMC_ADDR" }, + { 0x08, "MISALIGNED_REG" }, + { 0x09, "ILLEGAL_INSTR_ENCODING" }, + { 0x0a, "ILLEGAL_SPH_INSTR_COMBO" }, + { 0x0b, "ILLEGAL_INSTR_PARAM" }, + { 0x0c, "INVALID_CONST_ADDR" }, + { 0x0d, "OOR_REG" }, + { 0x0e, "OOR_ADDR" }, + { 0x0f, "MISALIGNED_ADDR" }, { 0x10, "INVALID_ADDR_SPACE" }, - { 0x11, "INVALID_PARAM" }, + { 0x11, "ILLEGAL_INSTR_PARAM2" }, + { 0x12, "INVALID_CONST_ADDR_LDC" }, + { 0x13, "GEOMETRY_SM_ERROR" }, + { 0x14, "DIVERGENT" }, + { 0x15, "WARP_EXIT" }, {} }; static const struct nvkm_bitfield gf100_mp_global_error[] = { + { 0x00000001, "SM_TO_SM_FAULT" }, + { 0x00000002, "L1_ERROR" }, { 0x00000004, "MULTIPLE_WARP_ERRORS" }, - { 0x00000008, "OUT_OF_STACK_SPACE" }, + { 0x00000008, "PHYSICAL_STACK_OVERFLOW" }, + { 0x00000010, "BPT_INT" }, + { 0x00000020, "BPT_PAUSE" }, + { 0x00000040, "SINGLE_STEP_COMPLETE" }, + { 0x20000000, "ECC_SEC_ERROR" }, + { 0x40000000, "ECC_DED_ERROR" }, + { 0x80000000, "TIMEOUT" }, {} }; -- cgit v1.1 From f045f459d925138fe7d6193a8c86406bda7e49da Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 2 Jun 2016 12:23:31 +1000 Subject: drm/nouveau/fbcon: fix out-of-bounds memory accesses Reported by KASAN. Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/nouveau_fbcon.c | 1 + drivers/gpu/drm/nouveau/nv04_fbcon.c | 7 ++----- drivers/gpu/drm/nouveau/nv50_fbcon.c | 6 ++---- drivers/gpu/drm/nouveau/nvc0_fbcon.c | 6 ++---- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 57aaf98..300ea03 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -552,6 +552,7 @@ nouveau_fbcon_init(struct drm_device *dev) if (ret) goto fini; + fbcon->helper.fbdev->pixmap.buf_align = 4; return 0; fini: diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c index 0f3e4bb..7d9248b 100644 --- a/drivers/gpu/drm/nouveau/nv04_fbcon.c +++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c @@ -82,7 +82,6 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) uint32_t fg; uint32_t bg; uint32_t dsize; - uint32_t width; uint32_t *data = (uint32_t *)image->data; int ret; @@ -93,9 +92,6 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) if (ret) return ret; - width = ALIGN(image->width, 8); - dsize = ALIGN(width * image->height, 32) >> 5; - if (info->fix.visual == FB_VISUAL_TRUECOLOR || info->fix.visual == FB_VISUAL_DIRECTCOLOR) { fg = ((uint32_t *) info->pseudo_palette)[image->fg_color]; @@ -111,10 +107,11 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) ((image->dx + image->width) & 0xffff)); OUT_RING(chan, bg); OUT_RING(chan, fg); - OUT_RING(chan, (image->height << 16) | width); + OUT_RING(chan, (image->height << 16) | image->width); OUT_RING(chan, (image->height << 16) | image->width); OUT_RING(chan, (image->dy << 16) | (image->dx & 0xffff)); + dsize = ALIGN(image->width * image->height, 32) >> 5; while (dsize) { int iter_len = dsize > 128 ? 128 : dsize; diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c index 33d9ee0..1aeb698 100644 --- a/drivers/gpu/drm/nouveau/nv50_fbcon.c +++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c @@ -95,7 +95,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) struct nouveau_fbdev *nfbdev = info->par; struct nouveau_drm *drm = nouveau_drm(nfbdev->dev); struct nouveau_channel *chan = drm->channel; - uint32_t width, dwords, *data = (uint32_t *)image->data; + uint32_t dwords, *data = (uint32_t *)image->data; uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel)); uint32_t *palette = info->pseudo_palette; int ret; @@ -107,9 +107,6 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) if (ret) return ret; - width = ALIGN(image->width, 32); - dwords = (width * image->height) >> 5; - BEGIN_NV04(chan, NvSub2D, 0x0814, 2); if (info->fix.visual == FB_VISUAL_TRUECOLOR || info->fix.visual == FB_VISUAL_DIRECTCOLOR) { @@ -128,6 +125,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) OUT_RING(chan, 0); OUT_RING(chan, image->dy); + dwords = ALIGN(image->width * image->height, 32) >> 5; while (dwords) { int push = dwords > 2047 ? 2047 : dwords; diff --git a/drivers/gpu/drm/nouveau/nvc0_fbcon.c b/drivers/gpu/drm/nouveau/nvc0_fbcon.c index a091335..839f4c8 100644 --- a/drivers/gpu/drm/nouveau/nvc0_fbcon.c +++ b/drivers/gpu/drm/nouveau/nvc0_fbcon.c @@ -95,7 +95,7 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) struct nouveau_fbdev *nfbdev = info->par; struct nouveau_drm *drm = nouveau_drm(nfbdev->dev); struct nouveau_channel *chan = drm->channel; - uint32_t width, dwords, *data = (uint32_t *)image->data; + uint32_t dwords, *data = (uint32_t *)image->data; uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel)); uint32_t *palette = info->pseudo_palette; int ret; @@ -107,9 +107,6 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) if (ret) return ret; - width = ALIGN(image->width, 32); - dwords = (width * image->height) >> 5; - BEGIN_NVC0(chan, NvSub2D, 0x0814, 2); if (info->fix.visual == FB_VISUAL_TRUECOLOR || info->fix.visual == FB_VISUAL_DIRECTCOLOR) { @@ -128,6 +125,7 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) OUT_RING (chan, 0); OUT_RING (chan, image->dy); + dwords = ALIGN(image->width * image->height, 32) >> 5; while (dwords) { int push = dwords > 2047 ? 2047 : dwords; -- cgit v1.1 From 77154fd969df749f5bf83f639af19fca199de033 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 2 Jun 2016 12:42:32 +1000 Subject: drm/nouveau/core: swap the order of imem/fb Fixes a use-after-free reported by valgrind and KASAN. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/core/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h index c612dc1..126a85c 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h @@ -16,9 +16,9 @@ enum nvkm_devidx { NVKM_SUBDEV_MC, NVKM_SUBDEV_BUS, NVKM_SUBDEV_TIMER, + NVKM_SUBDEV_INSTMEM, NVKM_SUBDEV_FB, NVKM_SUBDEV_LTC, - NVKM_SUBDEV_INSTMEM, NVKM_SUBDEV_MMU, NVKM_SUBDEV_BAR, NVKM_SUBDEV_PMU, -- cgit v1.1 From bfa49cfc526201119623de6593d284c96563bede Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Jun 2016 14:16:50 +0100 Subject: net/ethoc: fix null dereference on error exit path priv is assigned to NULL however some of the early error exit paths to label 'free' dereference priv, causing a null pointer dereference. Move the label 'free' to just the free_netdev statement, and add a new exit path 'free2' for the error cases were clk_disable_unprepare needs calling before the final free. Fixes issue found by CoverityScan, CID#113260 Signed-off-by: Colin Ian King Reviewed-by: Max Filippov Signed-off-by: David S. Miller --- drivers/net/ethernet/ethoc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index 41b0106..4edb98c 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -1195,7 +1195,7 @@ static int ethoc_probe(struct platform_device *pdev) priv->mdio = mdiobus_alloc(); if (!priv->mdio) { ret = -ENOMEM; - goto free; + goto free2; } priv->mdio->name = "ethoc-mdio"; @@ -1208,7 +1208,7 @@ static int ethoc_probe(struct platform_device *pdev) ret = mdiobus_register(priv->mdio); if (ret) { dev_err(&netdev->dev, "failed to register MDIO bus\n"); - goto free; + goto free2; } ret = ethoc_mdio_probe(netdev); @@ -1241,9 +1241,10 @@ error2: error: mdiobus_unregister(priv->mdio); mdiobus_free(priv->mdio); -free: +free2: if (priv->clk) clk_disable_unprepare(priv->clk); +free: free_netdev(netdev); out: return ret; -- cgit v1.1 From 14b84e8654c89ed59f433654e6bb64c886d095cd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 1 Jun 2016 15:29:13 +0200 Subject: qed: fix qed_fill_link() error handling gcc warns about qed_fill_link possibly accessing uninitialized data: drivers/net/ethernet/qlogic/qed/qed_main.c: In function 'qed_fill_link': drivers/net/ethernet/qlogic/qed/qed_main.c:1170:35: error: 'link_caps' may be used uninitialized in this function [-Werror=maybe-uninitialized] While this warning is only about the specific case of CONFIG_QED_SRIOV being disabled but the function getting called for a VF (which should never happen), another possibility is that qed_mcp_get_*() fails without returning data. This rearranges the code so we bail out in either of the two cases and print a warning instead of accessing the uninitialized data. The qed_link_output structure remains untouched in this case, but all callers first call memset() on it, so at least we are not leaking stack data then. As discussed, we also use a compile-time check to ensure we never use any of the VF code if CONFIG_QED_SRIOV is disabled, and the PCI device table is updated to no longer bind to virtual functions in that configuration. Signed-off-by: Arnd Bergmann Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 45 ++++++++++++++++++++++------ drivers/net/ethernet/qlogic/qed/qed_sriov.h | 4 ++- drivers/net/ethernet/qlogic/qede/qede_main.c | 2 ++ 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 7530646..61cc686 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1105,6 +1105,39 @@ static int qed_get_port_type(u32 media_type) return port_type; } +static int qed_get_link_data(struct qed_hwfn *hwfn, + struct qed_mcp_link_params *params, + struct qed_mcp_link_state *link, + struct qed_mcp_link_capabilities *link_caps) +{ + void *p; + + if (!IS_PF(hwfn->cdev)) { + qed_vf_get_link_params(hwfn, params); + qed_vf_get_link_state(hwfn, link); + qed_vf_get_link_caps(hwfn, link_caps); + + return 0; + } + + p = qed_mcp_get_link_params(hwfn); + if (!p) + return -ENXIO; + memcpy(params, p, sizeof(*params)); + + p = qed_mcp_get_link_state(hwfn); + if (!p) + return -ENXIO; + memcpy(link, p, sizeof(*link)); + + p = qed_mcp_get_link_capabilities(hwfn); + if (!p) + return -ENXIO; + memcpy(link_caps, p, sizeof(*link_caps)); + + return 0; +} + static void qed_fill_link(struct qed_hwfn *hwfn, struct qed_link_output *if_link) { @@ -1116,15 +1149,9 @@ static void qed_fill_link(struct qed_hwfn *hwfn, memset(if_link, 0, sizeof(*if_link)); /* Prepare source inputs */ - if (IS_PF(hwfn->cdev)) { - memcpy(¶ms, qed_mcp_get_link_params(hwfn), sizeof(params)); - memcpy(&link, qed_mcp_get_link_state(hwfn), sizeof(link)); - memcpy(&link_caps, qed_mcp_get_link_capabilities(hwfn), - sizeof(link_caps)); - } else { - qed_vf_get_link_params(hwfn, ¶ms); - qed_vf_get_link_state(hwfn, &link); - qed_vf_get_link_caps(hwfn, &link_caps); + if (qed_get_link_data(hwfn, ¶ms, &link, &link_caps)) { + dev_warn(&hwfn->cdev->pdev->dev, "no link data available\n"); + return; } /* Set the link parameters to pass to protocol driver */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h index c8667c6..c90b2b6 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h @@ -12,11 +12,13 @@ #include "qed_vf.h" #define QED_VF_ARRAY_LENGTH (3) +#ifdef CONFIG_QED_SRIOV #define IS_VF(cdev) ((cdev)->b_is_vf) #define IS_PF(cdev) (!((cdev)->b_is_vf)) -#ifdef CONFIG_QED_SRIOV #define IS_PF_SRIOV(p_hwfn) (!!((p_hwfn)->cdev->p_iov_info)) #else +#define IS_VF(cdev) (0) +#define IS_PF(cdev) (1) #define IS_PF_SRIOV(p_hwfn) (0) #endif #define IS_PF_SRIOV_ALLOC(p_hwfn) (!!((p_hwfn)->pf_iov_info)) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 5d00d14..5733d18 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -87,7 +87,9 @@ static const struct pci_device_id qede_pci_tbl[] = { {PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_100), QEDE_PRIVATE_PF}, {PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_50), QEDE_PRIVATE_PF}, {PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_25), QEDE_PRIVATE_PF}, +#ifdef CONFIG_QED_SRIOV {PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_IOV), QEDE_PRIVATE_VF}, +#endif { 0 } }; -- cgit v1.1 From 42316a201a60be38b07db1ebc3a1633107ed7209 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 31 May 2016 16:31:33 +0530 Subject: Revert "ARCv2: spinlock/rwlock/atomics: reduce 1 instruction in exponential backoff" This reverts commit 10971638701dedadb58c88ce4d31c9375b224ed6. The issue was fixed in hardware in HS2.1C release and there are no known external users of affected RTL - so revert thw whole delayed retry series ! Signed-off-by: Vineet Gupta --- arch/arc/include/asm/atomic.h | 3 ++- arch/arc/include/asm/spinlock.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 5f3dcbb..75c8226 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -36,7 +36,8 @@ " mov %[tmp], %[delay] \n" /* tmp = delay */ \ "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ - " rol %[delay], %[delay] \n" /* delay *= 2 */ \ + " asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \ + " mov.z %[delay], 1 \n" /* handle overflow */ \ " b 1b \n" /* start over */ \ "4: ; --- success --- \n" \ diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index 800e7c4..a86cb84 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -260,7 +260,8 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) " mov %[tmp], %[delay] \n" /* tmp = delay */ \ "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ - " rol %[delay], %[delay] \n" /* delay *= 2 */ \ + " asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \ + " mov.z %[delay], 1 \n" /* handle overflow */ \ " b 1b \n" /* start over */ \ " \n" \ "4: ; --- done --- \n" \ -- cgit v1.1 From 819f3602dcbd6b021cd50e18f5d05da30bca5b07 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 31 May 2016 16:33:29 +0530 Subject: Revert "ARCv2: spinlock/rwlock: Reset retry delay when starting a new spin-wait cycle" This reverts commit b89aa12c177477e34caa722818536fb5d0bffd76. The issue was fixed in hardware in HS2.1C release and there are no known external users of affected RTL so revert the whole delayed retry series ! Signed-off-by: Vineet Gupta --- arch/arc/include/asm/spinlock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index a86cb84..5e01bdf 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -279,7 +279,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) __asm__ __volatile__( "0: mov %[delay], 1 \n" "1: llock %[val], [%[slock]] \n" - " breq %[val], %[LOCKED], 0b \n" /* spin while LOCKED */ + " breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */ " scond %[LOCKED], [%[slock]] \n" /* acquire */ " bz 4f \n" /* done */ " \n" @@ -358,7 +358,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw) __asm__ __volatile__( "0: mov %[delay], 1 \n" "1: llock %[val], [%[rwlock]] \n" - " brls %[val], %[WR_LOCKED], 0b\n" /* <= 0: spin while write locked */ + " brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */ " sub %[val], %[val], 1 \n" /* reader lock */ " scond %[val], [%[rwlock]] \n" " bz 4f \n" /* done */ @@ -427,7 +427,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) __asm__ __volatile__( "0: mov %[delay], 1 \n" "1: llock %[val], [%[rwlock]] \n" - " brne %[val], %[UNLOCKED], 0b \n" /* while !UNLOCKED spin */ + " brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */ " mov %[val], %[WR_LOCKED] \n" " scond %[val], [%[rwlock]] \n" " bz 4f \n" -- cgit v1.1 From ed6aefed726a305bd36344e230d2a9e9301226fc Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 31 May 2016 16:35:09 +0530 Subject: Revert "ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff" This reverts commit e78fdfef84be13a5c2b8276e12203cdf24778596. The issue was fixed in hardware in HS2.1C release and there are no known external users of affected RTL so revert the whole delayed retry series ! Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 5 - arch/arc/include/asm/atomic.h | 46 +------ arch/arc/include/asm/spinlock.h | 293 ---------------------------------------- 3 files changed, 4 insertions(+), 340 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index be9d0b5..0d3e59f 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -389,11 +389,6 @@ config ARC_HAS_LLSC default y depends on !ARC_CANT_LLSC -config ARC_STAR_9000923308 - bool "Workaround for llock/scond livelock" - default n - depends on ISA_ARCV2 && SMP && ARC_HAS_LLSC - config ARC_HAS_SWAPE bool "Insn: SWAPE (endian-swap)" default y diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 75c8226..dd68399 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -25,51 +25,17 @@ #define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) -#ifdef CONFIG_ARC_STAR_9000923308 - -#define SCOND_FAIL_RETRY_VAR_DEF \ - unsigned int delay = 1, tmp; \ - -#define SCOND_FAIL_RETRY_ASM \ - " bz 4f \n" \ - " ; --- scond fail delay --- \n" \ - " mov %[tmp], %[delay] \n" /* tmp = delay */ \ - "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ - " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ - " asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \ - " mov.z %[delay], 1 \n" /* handle overflow */ \ - " b 1b \n" /* start over */ \ - "4: ; --- success --- \n" \ - -#define SCOND_FAIL_RETRY_VARS \ - ,[delay] "+&r" (delay),[tmp] "=&r" (tmp) \ - -#else /* !CONFIG_ARC_STAR_9000923308 */ - -#define SCOND_FAIL_RETRY_VAR_DEF - -#define SCOND_FAIL_RETRY_ASM \ - " bnz 1b \n" \ - -#define SCOND_FAIL_RETRY_VARS - -#endif - #define ATOMIC_OP(op, c_op, asm_op) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ - unsigned int val; \ - SCOND_FAIL_RETRY_VAR_DEF \ + unsigned int val; \ \ __asm__ __volatile__( \ "1: llock %[val], [%[ctr]] \n" \ " " #asm_op " %[val], %[val], %[i] \n" \ " scond %[val], [%[ctr]] \n" \ - " \n" \ - SCOND_FAIL_RETRY_ASM \ - \ + " bnz 1b \n" \ : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \ - SCOND_FAIL_RETRY_VARS \ : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \ [i] "ir" (i) \ : "cc"); \ @@ -78,8 +44,7 @@ static inline void atomic_##op(int i, atomic_t *v) \ #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ - unsigned int val; \ - SCOND_FAIL_RETRY_VAR_DEF \ + unsigned int val; \ \ /* \ * Explicit full memory barrier needed before/after as \ @@ -91,11 +56,8 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ "1: llock %[val], [%[ctr]] \n" \ " " #asm_op " %[val], %[val], %[i] \n" \ " scond %[val], [%[ctr]] \n" \ - " \n" \ - SCOND_FAIL_RETRY_ASM \ - \ + " bnz 1b \n" \ : [val] "=&r" (val) \ - SCOND_FAIL_RETRY_VARS \ : [ctr] "r" (&v->counter), \ [i] "ir" (i) \ : "cc"); \ diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index 5e01bdf..cded4a9 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -20,11 +20,6 @@ #ifdef CONFIG_ARC_HAS_LLSC -/* - * A normal LLOCK/SCOND based system, w/o need for livelock workaround - */ -#ifndef CONFIG_ARC_STAR_9000923308 - static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int val; @@ -238,294 +233,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) smp_mb(); } -#else /* CONFIG_ARC_STAR_9000923308 */ - -/* - * HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping - * coherency transactions in the SCU. The exclusive line state keeps rotating - * among contenting cores leading to a never ending cycle. So break the cycle - * by deferring the retry of failed exclusive access (SCOND). The actual delay - * needed is function of number of contending cores as well as the unrelated - * coherency traffic from other cores. To keep the code simple, start off with - * small delay of 1 which would suffice most cases and in case of contention - * double the delay. Eventually the delay is sufficient such that the coherency - * pipeline is drained, thus a subsequent exclusive access would succeed. - */ - -#define SCOND_FAIL_RETRY_VAR_DEF \ - unsigned int delay, tmp; \ - -#define SCOND_FAIL_RETRY_ASM \ - " ; --- scond fail delay --- \n" \ - " mov %[tmp], %[delay] \n" /* tmp = delay */ \ - "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \ - " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \ - " asl.f %[delay], %[delay], 1 \n" /* delay *= 2 */ \ - " mov.z %[delay], 1 \n" /* handle overflow */ \ - " b 1b \n" /* start over */ \ - " \n" \ - "4: ; --- done --- \n" \ - -#define SCOND_FAIL_RETRY_VARS \ - ,[delay] "=&r" (delay), [tmp] "=&r" (tmp) \ - -static inline void arch_spin_lock(arch_spinlock_t *lock) -{ - unsigned int val; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[slock]] \n" - " breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */ - " scond %[LOCKED], [%[slock]] \n" /* acquire */ - " bz 4f \n" /* done */ - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val) - SCOND_FAIL_RETRY_VARS - : [slock] "r" (&(lock->slock)), - [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) - : "memory", "cc"); - - smp_mb(); -} - -/* 1 - lock taken successfully */ -static inline int arch_spin_trylock(arch_spinlock_t *lock) -{ - unsigned int val, got_it = 0; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[slock]] \n" - " breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */ - " scond %[LOCKED], [%[slock]] \n" /* acquire */ - " bz.d 4f \n" - " mov.z %[got_it], 1 \n" /* got it */ - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val), - [got_it] "+&r" (got_it) - SCOND_FAIL_RETRY_VARS - : [slock] "r" (&(lock->slock)), - [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) - : "memory", "cc"); - - smp_mb(); - - return got_it; -} - -static inline void arch_spin_unlock(arch_spinlock_t *lock) -{ - smp_mb(); - - lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__; - - smp_mb(); -} - -/* - * Read-write spinlocks, allowing multiple readers but only one writer. - * Unfair locking as Writers could be starved indefinitely by Reader(s) - */ - -static inline void arch_read_lock(arch_rwlock_t *rw) -{ - unsigned int val; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - /* - * zero means writer holds the lock exclusively, deny Reader. - * Otherwise grant lock to first/subseq reader - * - * if (rw->counter > 0) { - * rw->counter--; - * ret = 1; - * } - */ - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[rwlock]] \n" - " brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */ - " sub %[val], %[val], 1 \n" /* reader lock */ - " scond %[val], [%[rwlock]] \n" - " bz 4f \n" /* done */ - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val) - SCOND_FAIL_RETRY_VARS - : [rwlock] "r" (&(rw->counter)), - [WR_LOCKED] "ir" (0) - : "memory", "cc"); - - smp_mb(); -} - -/* 1 - lock taken successfully */ -static inline int arch_read_trylock(arch_rwlock_t *rw) -{ - unsigned int val, got_it = 0; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[rwlock]] \n" - " brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */ - " sub %[val], %[val], 1 \n" /* counter-- */ - " scond %[val], [%[rwlock]] \n" - " bz.d 4f \n" - " mov.z %[got_it], 1 \n" /* got it */ - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val), - [got_it] "+&r" (got_it) - SCOND_FAIL_RETRY_VARS - : [rwlock] "r" (&(rw->counter)), - [WR_LOCKED] "ir" (0) - : "memory", "cc"); - - smp_mb(); - - return got_it; -} - -static inline void arch_write_lock(arch_rwlock_t *rw) -{ - unsigned int val; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - /* - * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__), - * deny writer. Otherwise if unlocked grant to writer - * Hence the claim that Linux rwlocks are unfair to writers. - * (can be starved for an indefinite time by readers). - * - * if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) { - * rw->counter = 0; - * ret = 1; - * } - */ - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[rwlock]] \n" - " brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */ - " mov %[val], %[WR_LOCKED] \n" - " scond %[val], [%[rwlock]] \n" - " bz 4f \n" - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val) - SCOND_FAIL_RETRY_VARS - : [rwlock] "r" (&(rw->counter)), - [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), - [WR_LOCKED] "ir" (0) - : "memory", "cc"); - - smp_mb(); -} - -/* 1 - lock taken successfully */ -static inline int arch_write_trylock(arch_rwlock_t *rw) -{ - unsigned int val, got_it = 0; - SCOND_FAIL_RETRY_VAR_DEF; - - smp_mb(); - - __asm__ __volatile__( - "0: mov %[delay], 1 \n" - "1: llock %[val], [%[rwlock]] \n" - " brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */ - " mov %[val], %[WR_LOCKED] \n" - " scond %[val], [%[rwlock]] \n" - " bz.d 4f \n" - " mov.z %[got_it], 1 \n" /* got it */ - " \n" - SCOND_FAIL_RETRY_ASM - - : [val] "=&r" (val), - [got_it] "+&r" (got_it) - SCOND_FAIL_RETRY_VARS - : [rwlock] "r" (&(rw->counter)), - [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__), - [WR_LOCKED] "ir" (0) - : "memory", "cc"); - - smp_mb(); - - return got_it; -} - -static inline void arch_read_unlock(arch_rwlock_t *rw) -{ - unsigned int val; - - smp_mb(); - - /* - * rw->counter++; - */ - __asm__ __volatile__( - "1: llock %[val], [%[rwlock]] \n" - " add %[val], %[val], 1 \n" - " scond %[val], [%[rwlock]] \n" - " bnz 1b \n" - " \n" - : [val] "=&r" (val) - : [rwlock] "r" (&(rw->counter)) - : "memory", "cc"); - - smp_mb(); -} - -static inline void arch_write_unlock(arch_rwlock_t *rw) -{ - unsigned int val; - - smp_mb(); - - /* - * rw->counter = __ARCH_RW_LOCK_UNLOCKED__; - */ - __asm__ __volatile__( - "1: llock %[val], [%[rwlock]] \n" - " scond %[UNLOCKED], [%[rwlock]]\n" - " bnz 1b \n" - " \n" - : [val] "=&r" (val) - : [rwlock] "r" (&(rw->counter)), - [UNLOCKED] "r" (__ARCH_RW_LOCK_UNLOCKED__) - : "memory", "cc"); - - smp_mb(); -} - -#undef SCOND_FAIL_RETRY_VAR_DEF -#undef SCOND_FAIL_RETRY_ASM -#undef SCOND_FAIL_RETRY_VARS - -#endif /* CONFIG_ARC_STAR_9000923308 */ - #else /* !CONFIG_ARC_HAS_LLSC */ static inline void arch_spin_lock(arch_spinlock_t *lock) -- cgit v1.1 From 7b7eba0f3515fca3296b8881d583f7c1042f5226 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 1 Jun 2016 02:04:44 +0200 Subject: netfilter: x_tables: don't reject valid target size on some architectures Quoting John Stultz: In updating a 32bit arm device from 4.6 to Linus' current HEAD, I noticed I was having some trouble with networking, and realized that /proc/net/ip_tables_names was suddenly empty. Digging through the registration process, it seems we're catching on the: if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && target_offset + sizeof(struct xt_standard_target) != next_offset) return -EINVAL; Where next_offset seems to be 4 bytes larger then the offset + standard_target struct size. next_offset needs to be aligned via XT_ALIGN (so we can access all members of ip(6)t_entry struct). This problem didn't show up on i686 as it only needs 4-byte alignment for u64, but iptables userspace on other 32bit arches does insert extra padding. Reported-by: John Stultz Tested-by: John Stultz Fixes: 7ed2abddd20cf ("netfilter: x_tables: check standard target size too") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index c69c892..2675d58 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -612,7 +612,7 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems, return -EINVAL; if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && - target_offset + sizeof(struct compat_xt_standard_target) != next_offset) + COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset) return -EINVAL; /* compat_xt_entry match has less strict aligment requirements, @@ -694,7 +694,7 @@ int xt_check_entry_offsets(const void *base, return -EINVAL; if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && - target_offset + sizeof(struct xt_standard_target) != next_offset) + XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset) return -EINVAL; return xt_check_entry_match(elems, base + target_offset, -- cgit v1.1 From f55d84b07c4e7340473a25dc82b462607578402c Mon Sep 17 00:00:00 2001 From: Vincent Palatin Date: Wed, 1 Jun 2016 08:53:48 -0700 Subject: stmmac: do not sleep in atomic context for mdio_reset stmmac_mdio_reset() has been updated to use msleep rather udelay (as some PHY requires a one second delay there). It called from stmmac_resume() within the spin_lock_irqsave block atomic context triggering 'scheduling while atomic'. The stmmac_priv lock usage is not fully documented, but it seems to protect the access to the MAC registers / DMA structures rather than the MDIO bus or the PHY (which have separate locking), so we can push the spin_lock after the stmmac_mdio_reset call. Signed-off-by: Vincent Palatin Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index eac45d0..a473c18 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3450,8 +3450,6 @@ int stmmac_resume(struct device *dev) if (!netif_running(ndev)) return 0; - spin_lock_irqsave(&priv->lock, flags); - /* Power Down bit, into the PM register, is cleared * automatically as soon as a magic packet or a Wake-up frame * is received. Anyway, it's better to manually clear @@ -3459,7 +3457,9 @@ int stmmac_resume(struct device *dev) * from another devices (e.g. serial console). */ if (device_may_wakeup(priv->device)) { + spin_lock_irqsave(&priv->lock, flags); priv->hw->mac->pmt(priv->hw, 0); + spin_unlock_irqrestore(&priv->lock, flags); priv->irq_wake = 0; } else { pinctrl_pm_select_default_state(priv->device); @@ -3473,6 +3473,8 @@ int stmmac_resume(struct device *dev) netif_device_attach(ndev); + spin_lock_irqsave(&priv->lock, flags); + priv->cur_rx = 0; priv->dirty_rx = 0; priv->dirty_tx = 0; -- cgit v1.1 From 9bd616e3dbedfc103f158197c8ad93678849b1ed Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 1 Jun 2016 18:52:16 +0100 Subject: cpuidle: Do not access cpuidle_devices when !CONFIG_CPU_IDLE The cpuidle_devices per-CPU variable is only defined when CPU_IDLE is enabled. Commit c8cc7d4de7a4 ("sched/idle: Reorganize the idle loop") removed the #ifdef CONFIG_CPU_IDLE around cpuidle_idle_call() with the compiler optimising away __this_cpu_read(cpuidle_devices). However, with CONFIG_UBSAN && !CONFIG_CPU_IDLE, this optimisation no longer happens and the kernel fails to link since cpuidle_devices is not defined. This patch introduces an accessor function for the current CPU cpuidle device (returning NULL when !CONFIG_CPU_IDLE) and uses it in cpuidle_idle_call(). Signed-off-by: Catalin Marinas Cc: 4.5+ # 4.5+ Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 3 +++ kernel/sched/idle.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 786ad32..07b83d3 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -152,6 +152,8 @@ extern void cpuidle_disable_device(struct cpuidle_device *dev); extern int cpuidle_play_dead(void); extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev); +static inline struct cpuidle_device *cpuidle_get_device(void) +{return __this_cpu_read(cpuidle_devices); } #else static inline void disable_cpuidle(void) { } static inline bool cpuidle_not_available(struct cpuidle_driver *drv, @@ -187,6 +189,7 @@ static inline void cpuidle_disable_device(struct cpuidle_device *dev) { } static inline int cpuidle_play_dead(void) {return -ENODEV; } static inline struct cpuidle_driver *cpuidle_get_cpu_driver( struct cpuidle_device *dev) {return NULL; } +static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; } #endif #if defined(CONFIG_CPU_IDLE) && defined(CONFIG_SUSPEND) diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index bd12c6c..c5aeedf 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -127,7 +127,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, */ static void cpuidle_idle_call(void) { - struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); + struct cpuidle_device *dev = cpuidle_get_device(); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; -- cgit v1.1 From ce25d66ad5f8d921bac5fe2d32d62fa30c0f9a70 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Jun 2016 14:52:43 -0700 Subject: Possible problem with e6afc8ac ("udp: remove headers from UDP packets before queueing") Paul Moore tracked a regression caused by a recent commit, which mistakenly assumed that sk_filter() could be avoided if socket had no current BPF filter. The intent was to avoid udp_lib_checksum_complete() overhead. But sk_filter() also checks skb_pfmemalloc() and security_sock_rcv_skb(), so better call it. Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Signed-off-by: Eric Dumazet Reported-by: Paul Moore Tested-by: Paul Moore Tested-by: Stephen Smalley Cc: samanthakumar Signed-off-by: David S. Miller --- net/ipv4/udp.c | 10 +++++----- net/ipv6/udp.c | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d56c055..0ff31d9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1618,12 +1618,12 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } - if (rcu_access_pointer(sk->sk_filter)) { - if (udp_lib_checksum_complete(skb)) + if (rcu_access_pointer(sk->sk_filter) && + udp_lib_checksum_complete(skb)) goto csum_error; - if (sk_filter(sk, skb)) - goto drop; - } + + if (sk_filter(sk, skb)) + goto drop; udp_csum_pull_header(skb); if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2da1896..f421c9f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -653,12 +653,12 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } - if (rcu_access_pointer(sk->sk_filter)) { - if (udp_lib_checksum_complete(skb)) - goto csum_error; - if (sk_filter(sk, skb)) - goto drop; - } + if (rcu_access_pointer(sk->sk_filter) && + udp_lib_checksum_complete(skb)) + goto csum_error; + + if (sk_filter(sk, skb)) + goto drop; udp_csum_pull_header(skb); if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { -- cgit v1.1 From 5d2be1422e02ccd697ccfcd45c85b4a26e6178e2 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 2 Jun 2016 04:04:56 -0400 Subject: tipc: fix an infoleak in tipc_nl_compat_link_dump link_info.str is a char array of size 60. Memory after the NULL byte is not initialized. Sending the whole object out can cause a leak. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index f795b1d..3ad9fab 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -604,7 +604,8 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); - strcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME])); + nla_strlcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME]), + TIPC_MAX_LINK_NAME); return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); -- cgit v1.1 From 4116def2337991b39919f3b448326e21c40e0dbb Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 2 Jun 2016 04:11:20 -0400 Subject: rds: fix an infoleak in rds_inc_info_copy The last field "flags" of object "minfo" is not initialized. Copying this object out may leak kernel stack data. Assign 0 to it to avoid leak. Signed-off-by: Kangjie Lu Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/recv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rds/recv.c b/net/rds/recv.c index c0be1ec..8413f6c 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -561,5 +561,7 @@ void rds_inc_info_copy(struct rds_incoming *inc, minfo.fport = inc->i_hdr.h_dport; } + minfo.flags = 0; + rds_info_copy(iter, &minfo, sizeof(minfo)); } -- cgit v1.1 From 55eed755c6e30a89be3a791a6b0ad208aadd9bdc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 May 2016 13:11:17 +0200 Subject: locking/seqcount: Re-fix raw_read_seqcount_latch() Commit 50755bc1c305 ("seqlock: fix raw_read_seqcount_latch()") broke raw_read_seqcount_latch(). If you look at the comment that was modified; the thing that changes is the seq count, not the latch pointer. * void latch_modify(struct latch_struct *latch, ...) * { * smp_wmb(); <- Ensure that the last data[1] update is visible * latch->seq++; * smp_wmb(); <- Ensure that the seqcount update is visible * * modify(latch->data[0], ...); * * smp_wmb(); <- Ensure that the data[0] update is visible * latch->seq++; * smp_wmb(); <- Ensure that the seqcount update is visible * * modify(latch->data[1], ...); * } * * The query will have a form like: * * struct entry *latch_query(struct latch_struct *latch, ...) * { * struct entry *entry; * unsigned seq, idx; * * do { * seq = lockless_dereference(latch->seq); So here we have: seq = READ_ONCE(latch->seq); smp_read_barrier_depends(); Which is exactly what we want; the new code: seq = ({ p = READ_ONCE(latch); smp_read_barrier_depends(); p })->seq; is just wrong; because it looses the volatile read on seq, which can now be torn or worse 'optimized'. And the read_depend barrier is also placed wrong, we want it after the load of seq, to match the above data[] up-to-date wmb()s. Such that when we dereference latch->data[] below, we're guaranteed to observe the right data. * * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); * * smp_rmb(); * } while (seq != latch->seq); * * return entry; * } So yes, not passing a pointer is not pretty, but the code was correct, and isn't anymore now. Change to explicit READ_ONCE()+smp_read_barrier_depends() to avoid confusion and allow strict lockless_dereference() checking. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexey Dobriyan Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Paul McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 50755bc1c305 ("seqlock: fix raw_read_seqcount_latch()") Link: http://lkml.kernel.org/r/20160527111117.GL3192@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/seqlock.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 7973a82..ead9765 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -277,7 +277,10 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) static inline int raw_read_seqcount_latch(seqcount_t *s) { - return lockless_dereference(s)->sequence; + int seq = READ_ONCE(s->sequence); + /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ + smp_read_barrier_depends(); + return seq; } /** @@ -331,7 +334,7 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) * unsigned seq, idx; * * do { - * seq = lockless_dereference(latch)->seq; + * seq = raw_read_seqcount_latch(&latch->seq); * * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); -- cgit v1.1 From 0422e83d84ae24b933e4b0d4c1e0f0b4ae8a0a3b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 26 May 2016 21:08:17 +0100 Subject: locking/ww_mutex: Report recursive ww_mutex locking early Recursive locking for ww_mutexes was originally conceived as an exception. However, it is heavily used by the DRM atomic modesetting code. Currently, the recursive deadlock is checked after we have queued up for a busy-spin and as we never release the lock, we spin until kicked, whereupon the deadlock is discovered and reported. A simple solution for the now common problem is to move the recursive deadlock discovery to the first action when taking the ww_mutex. Suggested-by: Maarten Lankhorst Signed-off-by: Chris Wilson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Maarten Lankhorst Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1464293297-19777-1-git-send-email-chris@chris-wilson.co.uk Signed-off-by: Ingo Molnar --- kernel/locking/mutex.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index e364b42..79d2d76 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -486,9 +486,6 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) if (!hold_ctx) return 0; - if (unlikely(ctx == hold_ctx)) - return -EALREADY; - if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { #ifdef CONFIG_DEBUG_MUTEXES @@ -514,6 +511,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, unsigned long flags; int ret; + if (use_ww_ctx) { + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); + if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) + return -EALREADY; + } + preempt_disable(); mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); -- cgit v1.1 From 3b94a891667c30fb4624221497d77fc65d950345 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 27 May 2016 04:12:20 -0700 Subject: perf/x86/intel/uncore: Remove SBOX support for Broadwell server There was a report that on certain Broadwell-EP systems writing any bit of the SBOX PMU initialization MSR would #GP at boot. This did not happen on all systems. My test systems booted fine. Considering both DE and EP may have such issues, this patch removes SBOX support for all Broadwell platforms for now. Reported-and-tested-by: Mark van Dijk Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1464347540-5763-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snbep.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index b262586..874e8bd 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -2868,27 +2868,10 @@ static struct intel_uncore_type bdx_uncore_cbox = { .format_group = &hswep_uncore_cbox_format_group, }; -static struct intel_uncore_type bdx_uncore_sbox = { - .name = "sbox", - .num_counters = 4, - .num_boxes = 4, - .perf_ctr_bits = 48, - .event_ctl = HSWEP_S0_MSR_PMON_CTL0, - .perf_ctr = HSWEP_S0_MSR_PMON_CTR0, - .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, - .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL, - .msr_offset = HSWEP_SBOX_MSR_OFFSET, - .ops = &hswep_uncore_sbox_msr_ops, - .format_group = &hswep_uncore_sbox_format_group, -}; - -#define BDX_MSR_UNCORE_SBOX 3 - static struct intel_uncore_type *bdx_msr_uncores[] = { &bdx_uncore_ubox, &bdx_uncore_cbox, &hswep_uncore_pcu, - &bdx_uncore_sbox, NULL, }; @@ -2897,10 +2880,6 @@ void bdx_uncore_cpu_init(void) if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; uncore_msr_uncores = bdx_msr_uncores; - - /* BDX-DE doesn't have SBOX */ - if (boot_cpu_data.x86_model == 86) - uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; } static struct intel_uncore_type bdx_uncore_ha = { -- cgit v1.1 From f90d83b301701026b2e4c437a3613f377f63290e Mon Sep 17 00:00:00 2001 From: AceLan Kao Date: Fri, 3 Jun 2016 14:45:25 +0800 Subject: ALSA: hda - Fix headset mic detection problem for Dell machine Add the pin configuration value of this machine into the pin_quirk table to make DELL1_MIC_NO_PRESENCE apply to this machine. Signed-off-by: AceLan Kao Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7960316..49d581a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5822,6 +5822,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60180}, {0x14, 0x90170130}, {0x21, 0x02211040}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell Inspiron 5565", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60180}, + {0x14, 0x90170120}, + {0x21, 0x02211030}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60160}, {0x14, 0x90170120}, -- cgit v1.1 From 55f1ea15216a5a14c96738bd5284100a00ffa9dc Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 31 May 2016 11:23:43 +0100 Subject: efi: Fix for_each_efi_memory_desc_in_map() for empty memmaps Commit: 78ce248faa3c ("efi: Iterate over efi.memmap in for_each_efi_memory_desc()") introduced a regression for systems booted with the 'noefi' kernel option. In particular, I observed an early kernel hang in efi_find_mirror()'s for_each_efi_memory_desc() call. As we don't have efi memmap on this system we enter this iterator with the following parameters: efi.memmap.map = 0, efi.memmap.map_end = 0, efi.memmap.desc_size = 28 ... then for_each_efi_memory_desc_in_map() does the following comparison: (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); ... where md = 0, (m)->map_end = 0 and (m)->desc_size = 28 but when we subtract something from a NULL pointer wrap around happens and we end up returning invalid pointer and crash. Fix it by using the correct pointer arithmetics. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: K. Y. Srinivasan Cc: Linus Torvalds Cc: Mark Salter Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 78ce248faa3c ("efi: Iterate over efi.memmap in for_each_efi_memory_desc()") Link: http://lkml.kernel.org/r/1464690224-4503-2-git-send-email-matt@codeblueprint.co.uk [ Made the changelog more readable. ] Signed-off-by: Ingo Molnar --- include/linux/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/efi.h b/include/linux/efi.h index c2db3ca..f196dd0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1005,7 +1005,7 @@ extern int efi_memattr_apply_permissions(struct mm_struct *mm, /* Iterate through an efi_memory_map */ #define for_each_efi_memory_desc_in_map(m, md) \ for ((md) = (m)->map; \ - (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \ + ((void *)(md) + (m)->desc_size) <= (m)->map_end; \ (md) = (void *)(md) + (m)->desc_size) /** -- cgit v1.1 From c75343972b79ef5bd44c498a63b326e37470bbfc Mon Sep 17 00:00:00 2001 From: Dennis Chen Date: Tue, 31 May 2016 11:23:44 +0100 Subject: efi/arm: Fix the format of EFI debug messages When both EFI and memblock debugging is enabled on the kernel command line: 'efi=debug memblock=debug' .. the debug messages for early_con look the following way: [ 0.000000] efi: 0x0000e1050000-0x0000e105ffff [Memory Mapped I/O |RUN| | | | | | | | | | |UC] [ 0.000000] efi: 0x0000e1300000-0x0000e1300fff [Memory Mapped I/O |RUN| | | | | | | | | | |UC] [ 0.000000] efi: 0x0000e8200000-0x0000e827ffff [Memory Mapped I/O |RUN| | | | | | | | | | |UC] [ 0.000000] efi: 0x008000000000-0x008001e7ffff [Runtime Data |RUN| | | | | | | |WB|WT|WC|UC] [ 0.000000] memblock_add: [0x00008000000000-0x00008001e7ffff] flags 0x0 early_init_dt_add_memory_arch+0x54/0x5c [ 0.000000] * ... Note the misplaced '*' line, which happened because the memblock debug message was printed while the EFI debug message was still being constructed.. This patch fixes the output to be the expected: [ 0.000000] efi: 0x0000e1050000-0x0000e105ffff [Memory Mapped I/O |RUN| | | | | | | | | | |UC] [ 0.000000] efi: 0x0000e1300000-0x0000e1300fff [Memory Mapped I/O |RUN| | | | | | | | | | |UC] [ 0.000000] efi: 0x0000e8200000-0x0000e827ffff [Memory Mapped I/O |RUN| | | | | | | | | | |UC] [ 0.000000] efi: 0x008000000000-0x008001e7ffff [Runtime Data |RUN| | | | | | | |WB|WT|WC|UC]* [ 0.000000] memblock_add: [0x00008000000000-0x00008001e7ffff] flags 0x0 early_init_dt_add_memory_arch+0x54/0x5c ... Note how the '*' is now in the proper EFI debug message line. Signed-off-by: Dennis Chen Signed-off-by: Matt Fleming Acked-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Dan Williams Cc: Linus Torvalds Cc: Mark Salter Cc: Peter Zijlstra Cc: Steve Capper Cc: Steve McIntyre Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1464690224-4503-3-git-send-email-matt@codeblueprint.co.uk [ Made the changelog more readable. ] Signed-off-by: Ingo Molnar --- drivers/firmware/efi/arm-init.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index a850cbc..c49d50e 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -174,6 +174,7 @@ static __init void reserve_regions(void) { efi_memory_desc_t *md; u64 paddr, npages, size; + int resv; if (efi_enabled(EFI_DBG)) pr_info("Processing EFI memory map:\n"); @@ -190,12 +191,14 @@ static __init void reserve_regions(void) paddr = md->phys_addr; npages = md->num_pages; + resv = is_reserve_region(md); if (efi_enabled(EFI_DBG)) { char buf[64]; - pr_info(" 0x%012llx-0x%012llx %s", + pr_info(" 0x%012llx-0x%012llx %s%s\n", paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1, - efi_md_typeattr_format(buf, sizeof(buf), md)); + efi_md_typeattr_format(buf, sizeof(buf), md), + resv ? "*" : ""); } memrange_efi_to_native(&paddr, &npages); @@ -204,14 +207,9 @@ static __init void reserve_regions(void) if (is_normal_ram(md)) early_init_dt_add_memory_arch(paddr, size); - if (is_reserve_region(md)) { + if (resv) memblock_mark_nomap(paddr, size); - if (efi_enabled(EFI_DBG)) - pr_cont("*"); - } - if (efi_enabled(EFI_DBG)) - pr_cont("\n"); } set_bit(EFI_MEMMAP, &efi.flags); -- cgit v1.1 From c7103f650a11328f28b9fa1c95027db331b7774b Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 31 May 2016 11:50:28 -0700 Subject: EDAC, sb_edac: Fix rank lookup on Broadwell Broadwell made a small change to the rank target register moving the target rank ID field up from bits 16:19 to bits 20:23. Also found that the offset field grew by one bit in the IVY_BRIDGE to HASWELL transition, so fix the RIR_OFFSET() macro too. Signed-off-by: Tony Luck Cc: stable@vger.kernel.org # v3.19+ Cc: Aristeu Rozanski Cc: Mauro Carvalho Chehab Cc: linux-edac Link: http://lkml.kernel.org/r/2943fb819b1f7e396681165db9c12bb3df0e0b16.1464735623.git.tony.luck@intel.com Signed-off-by: Borislav Petkov --- drivers/edac/sb_edac.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index b4d0bf6..ace662e 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -239,8 +239,11 @@ static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = { { 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc }, }; -#define RIR_RNK_TGT(reg) GET_BITFIELD(reg, 16, 19) -#define RIR_OFFSET(reg) GET_BITFIELD(reg, 2, 14) +#define RIR_RNK_TGT(type, reg) (((type) == BROADWELL) ? \ + GET_BITFIELD(reg, 20, 23) : GET_BITFIELD(reg, 16, 19)) + +#define RIR_OFFSET(type, reg) (((type) == HASWELL || (type) == BROADWELL) ? \ + GET_BITFIELD(reg, 2, 15) : GET_BITFIELD(reg, 2, 14)) /* Device 16, functions 2-7 */ @@ -1894,14 +1897,14 @@ static void get_memory_layout(const struct mem_ctl_info *mci) pci_read_config_dword(pvt->pci_tad[i], rir_offset[j][k], ®); - tmp_mb = RIR_OFFSET(reg) << 6; + tmp_mb = RIR_OFFSET(pvt->info.type, reg) << 6; gb = div_u64_rem(tmp_mb, 1024, &mb); edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n", i, j, k, gb, (mb*1000)/1024, ((u64)tmp_mb) << 20L, - (u32)RIR_RNK_TGT(reg), + (u32)RIR_RNK_TGT(pvt->info.type, reg), reg); } } @@ -2234,7 +2237,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, pci_read_config_dword(pvt->pci_tad[ch_add + base_ch], rir_offset[n_rir][idx], ®); - *rank = RIR_RNK_TGT(reg); + *rank = RIR_RNK_TGT(pvt->info.type, reg); edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n", n_rir, -- cgit v1.1 From 31143e2933d1675c4c1ba6ce125cdd95870edd85 Mon Sep 17 00:00:00 2001 From: Franky Lin Date: Thu, 2 Jun 2016 02:00:27 -0700 Subject: brcmfmac: add eth_type_trans back for PCIe full dongle A regression was introduced in commit 9c349892ccc9 ("brcmfmac: revise handling events in receive path") which moves eth_type_trans() call to brcmf_rx_frame(). Msgbuf layer doesn't use brcmf_rx_frame() but invokes brcmf_netif_rx() directly. In such case the Ethernet header was not stripped out resulting in null pointer dereference in the networking stack. BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 IP: [] enqueue_to_backlog+0x56/0x260 PGD 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: fuse ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype [...] rtsx_pci scsi_mod usbcore usb_common i8042 serio nvme nvme_core CPU: 7 PID: 1340 Comm: irq/136-brcmf_p Not tainted 4.7.0-rc1-mainline #1 Hardware name: Dell Inc. XPS 15 9550/0N7TVV, BIOS 01.02.00 04/07/2016 task: ffff8804a0c5bd00 ti: ffff88049e124000 task.ti: ffff88049e124000 RIP: 0010:[] [] enqueue_to_backlog+0x56/0x260 RSP: 0018:ffff88049e127ca0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff8804bddd7c40 RCX: 000000000000002f RDX: 0000000000000000 RSI: 0000000000000007 RDI: ffff8804bddd7d4c RBP: ffff88049e127ce8 R08: 0000000000000000 R09: 0000000000000000 R10: ffff8804bddd12c0 R11: 000000000000149e R12: 0000000000017c40 R13: ffff88049e127d08 R14: ffff8804a9bd6d00 R15: ffff8804bddd7d4c FS: 0000000000000000(0000) GS:ffff8804bddc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000048 CR3: 0000000001806000 CR4: 00000000003406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Stack: ffff8804bdddad00 ffff8804ad089e00 0000000000000000 0000000000000282 0000000000000000 ffff8804a9bd6d00 ffff8804a1b27e00 ffff8804a9bd6d00 ffff88002ee88000 ffff88049e127d28 ffffffff814c3f3b ffffffff81311fc3 Call Trace: [] netif_rx_internal+0x4b/0x170 [] ? swiotlb_tbl_unmap_single+0xf3/0x120 [] netif_rx_ni+0x27/0xc0 [] brcmf_netif_rx+0x49/0x70 [brcmfmac] [] brcmf_msgbuf_process_rx+0x2b4/0x570 [brcmfmac] [] ? __xen_set_pgd_hyper+0x57/0xd0 [] ? irq_forced_thread_fn+0x70/0x70 [] brcmf_proto_msgbuf_rx_trigger+0x31/0xe0 [brcmfmac] [] brcmf_pcie_isr_thread+0x7f/0x110 [brcmfmac] [] irq_thread_fn+0x20/0x50 [] irq_thread+0x12d/0x1c0 [] ? __schedule+0x2f5/0x7a0 [] ? wake_threads_waitq+0x30/0x30 [] ? irq_thread_dtor+0xb0/0xb0 [] kthread+0xd8/0xf0 [] ret_from_fork+0x1f/0x40 [] ? kthread_worker_fn+0x170/0x170 Code: 1c f5 60 9a 8e 81 9c 58 0f 1f 44 00 00 48 89 45 d0 fa 66 0f 1f 44 00 00 4c 8d bb 0c 01 00 00 4c 89 ff e8 5e 08 11 00 49 8b 56 20 <48> 8b 52 48 83 e2 01 74 10 8b 8b 08 01 00 00 8b 15 59 c5 42 00 RIP [] enqueue_to_backlog+0x56/0x260 RSP CR2: 0000000000000048 Fixes: 9c349892ccc9 ("brcmfmac: revise handling events in receive path") Reported-by: Rafal Milecki Reported-by: Grey Christoforo Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Arend Van Spriel Reviewed-by: Hante Meuleman Signed-off-by: Franky Lin [arend@broadcom.com: rephrased the commit message] Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c index 68f1ce0..2b9a2bc 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c @@ -1157,6 +1157,8 @@ brcmf_msgbuf_process_rx_complete(struct brcmf_msgbuf *msgbuf, void *buf) brcmu_pkt_buf_free_skb(skb); return; } + + skb->protocol = eth_type_trans(skb, ifp->ndev); brcmf_netif_rx(ifp, skb); } -- cgit v1.1 From fbedcaf43fba35677c01a4ae51e6f79edf4049ba Mon Sep 17 00:00:00 2001 From: Nicholas Krause Date: Thu, 19 May 2016 18:45:58 -0400 Subject: EDAC: Fix workqueues poll period resetting After the workqueue cleanup, we're registering workqueues based on the presence of an ->edac_check function. When that is the case, we're setting OP_RUNNING_POLL. But we forgot to check that in edac_mc_reset_delay_period(), leading to: BUG: unable to handle kernel paging request at 0000000000015d10 IP: [ .. ] queued_spin_lock_slowpath PGD 3ffcc8067 PUD 3ffc56067 PMD 0 Oops: 0002 [#1] SMP Modules linked in: ... CPU: 1 PID: 2792 Comm: edactest Not tainted 4.6.0-dirty #1 Hardware name: HP ProLiant MicroServer, BIOS O41 10/01/2013 Stack: Call Trace: ? _raw_spin_lock_irqsave ? lock_timer_base.isra.34 ? del_timer ? try_to_grab_pending ? mod_delayed_work_on ? edac_mc_reset_delay_period ? edac_set_poll_msec ? param_attr_store ? module_attr_store ? kernfs_fop_write ? __vfs_write ? __vfs_read ? __alloc_fd ? vfs_write ? SyS_write ? entry_SYSCALL_64_fastpath Code: RIP [ .. ] queued_spin_lock_slowpath RSP <> CR2: 0000000000015d10 ---[ end trace 3f286bc71cca15d1 ]--- Kernel panic - not syncing: Fatal exception Fix it. Signed-off-by: Nicholas Krause Cc: # 4.5 Cc: Mauro Carvalho Chehab Cc: linux-edac Link: http://lkml.kernel.org/r/1463697958-13406-1-git-send-email-xerofoify@gmail.com [ Rewrite commit message. ] Signed-off-by: Borislav Petkov --- drivers/edac/edac_mc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 6aa256b0..c3ee3ad 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -565,7 +565,8 @@ void edac_mc_reset_delay_period(unsigned long value) list_for_each(item, &mc_devices) { mci = list_entry(item, struct mem_ctl_info, link); - edac_mod_work(&mci->work, value); + if (mci->op_state == OP_RUNNING_POLL) + edac_mod_work(&mci->work, value); } mutex_unlock(&mem_ctls_mutex); } -- cgit v1.1 From 665f05e0b836d46b22f0b712eb76d8b7f69a5ea0 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 2 Jun 2016 10:58:08 -0700 Subject: EDAC, sb_edac: Readd accidentally dropped Broadwell-D support In commit 2c1ea4c700af ("EDAC, sb_edac: Use cpu family/model in driver detection") we switched from using PCI ids to determine which platform we are running on to using CPU model instead. I forgot that Broadwell-DE has its own distinct model number different from Broadwell-EP or -EX. Fixing this isn't just adding a line to the array of cpuids - the exising code assumed a 1:1 mapping between entries in that array and the "enum type" values. Added the type to pci_id_table structure to remove this dependency and allows two Broadwell cpu models. Signed-off-by: Tony Luck Cc: Aristeu Rozanski Cc: Dave Hansen Cc: Mauro Carvalho Chehab Cc: linux-edac Fixes: 2c1ea4c700af ("EDAC, sb_edac: Use cpu family/model in driver detection") Link: http://lkml.kernel.org/r/b3cffe40dec6dfe0235a5d52a504f0ba86a07ce7.1464902605.git.tony.luck@intel.com Signed-off-by: Borislav Petkov --- drivers/edac/sb_edac.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index ace662e..6744d88 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -329,6 +329,7 @@ struct pci_id_descr { struct pci_id_table { const struct pci_id_descr *descr; int n_devs; + enum type type; }; struct sbridge_dev { @@ -397,9 +398,14 @@ static const struct pci_id_descr pci_dev_descr_sbridge[] = { { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0) }, }; -#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) } +#define PCI_ID_TABLE_ENTRY(A, T) { \ + .descr = A, \ + .n_devs = ARRAY_SIZE(A), \ + .type = T \ +} + static const struct pci_id_table pci_dev_descr_sbridge_table[] = { - PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge), + PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge, SANDY_BRIDGE), {0,} /* 0 terminated list. */ }; @@ -466,7 +472,7 @@ static const struct pci_id_descr pci_dev_descr_ibridge[] = { }; static const struct pci_id_table pci_dev_descr_ibridge_table[] = { - PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge), + PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge, IVY_BRIDGE), {0,} /* 0 terminated list. */ }; @@ -539,7 +545,7 @@ static const struct pci_id_descr pci_dev_descr_haswell[] = { }; static const struct pci_id_table pci_dev_descr_haswell_table[] = { - PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell), + PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell, HASWELL), {0,} /* 0 terminated list. */ }; @@ -583,7 +589,7 @@ static const struct pci_id_descr pci_dev_descr_knl[] = { }; static const struct pci_id_table pci_dev_descr_knl_table[] = { - PCI_ID_TABLE_ENTRY(pci_dev_descr_knl), + PCI_ID_TABLE_ENTRY(pci_dev_descr_knl, KNIGHTS_LANDING), {0,} }; @@ -651,7 +657,7 @@ static const struct pci_id_descr pci_dev_descr_broadwell[] = { }; static const struct pci_id_table pci_dev_descr_broadwell_table[] = { - PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell), + PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell, BROADWELL), {0,} /* 0 terminated list. */ }; @@ -3360,12 +3366,12 @@ fail0: #define ICPU(model, table) \ { X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table } -/* Order here must match "enum type" */ static const struct x86_cpu_id sbridge_cpuids[] = { ICPU(0x2d, pci_dev_descr_sbridge_table), /* SANDY_BRIDGE */ ICPU(0x3e, pci_dev_descr_ibridge_table), /* IVY_BRIDGE */ ICPU(0x3f, pci_dev_descr_haswell_table), /* HASWELL */ ICPU(0x4f, pci_dev_descr_broadwell_table), /* BROADWELL */ + ICPU(0x56, pci_dev_descr_broadwell_table), /* BROADWELL-DE */ ICPU(0x57, pci_dev_descr_knl_table), /* KNIGHTS_LANDING */ { } }; @@ -3401,7 +3407,7 @@ static int sbridge_probe(const struct x86_cpu_id *id) mc, mc + 1, num_mc); sbridge_dev->mc = mc++; - rc = sbridge_register_mci(sbridge_dev, id - sbridge_cpuids); + rc = sbridge_register_mci(sbridge_dev, ptable->type); if (unlikely(rc < 0)) goto fail1; } -- cgit v1.1 From 357cc9b4a8a7a0cd0e662537b76e6fa4670b6798 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:15 -0700 Subject: sch_hfsc: always keep backlog updated hfsc updates backlog lazily, that is only when we dump the stats. This is problematic after we begin to update backlog in qdisc_tree_reduce_backlog(). Reported-by: Stas Nichiporovich Tested-by: Stas Nichiporovich Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_hfsc.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d783d7c..1ac9f9f 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1529,6 +1529,7 @@ hfsc_reset_qdisc(struct Qdisc *sch) q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); qdisc_watchdog_cancel(&q->watchdog); + sch->qstats.backlog = 0; sch->q.qlen = 0; } @@ -1559,14 +1560,6 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) struct hfsc_sched *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); struct tc_hfsc_qopt qopt; - struct hfsc_class *cl; - unsigned int i; - - sch->qstats.backlog = 0; - for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) - sch->qstats.backlog += cl->qdisc->qstats.backlog; - } qopt.defcls = q->defcls; if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) @@ -1604,6 +1597,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (cl->qdisc->q.qlen == 1) set_active(cl, qdisc_pkt_len(skb)); + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -1672,6 +1666,7 @@ hfsc_dequeue(struct Qdisc *sch) qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; @@ -1695,6 +1690,7 @@ hfsc_drop(struct Qdisc *sch) } cl->qstats.drops++; qdisc_qstats_drop(sch); + sch->qstats.backlog -= len; sch->q.qlen--; return len; } -- cgit v1.1 From 6529d75ad9228f4d8a8f6c5c5244ceb945ac9bc2 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:16 -0700 Subject: sch_prio: update backlog as well We need to update backlog too when we update qlen. Joint work with Stas. Reported-by: Stas Nichiporovich Tested-by: Stas Nichiporovich Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_prio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index fee1b15..4b0a821 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -85,6 +85,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, qdisc); if (ret == NET_XMIT_SUCCESS) { + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -117,6 +118,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch) struct sk_buff *skb = qdisc_dequeue_peeked(qdisc); if (skb) { qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } @@ -135,6 +137,7 @@ static unsigned int prio_drop(struct Qdisc *sch) for (prio = q->bands-1; prio >= 0; prio--) { qdisc = q->queues[prio]; if (qdisc->ops->drop && (len = qdisc->ops->drop(qdisc)) != 0) { + sch->qstats.backlog -= len; sch->q.qlen--; return len; } @@ -151,6 +154,7 @@ prio_reset(struct Qdisc *sch) for (prio = 0; prio < q->bands; prio++) qdisc_reset(q->queues[prio]); + sch->qstats.backlog = 0; sch->q.qlen = 0; } -- cgit v1.1 From 6a73b571b63075ef408c83f07c2565b5652f93cc Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:17 -0700 Subject: sch_drr: update backlog as well Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_drr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index a63e879..bf8af2c 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -375,6 +375,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->deficit = cl->quantum; } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return err; } @@ -407,6 +408,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch) bstats_update(&cl->bstats, skb); qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } @@ -428,6 +430,7 @@ static unsigned int drr_drop(struct Qdisc *sch) if (cl->qdisc->ops->drop) { len = cl->qdisc->ops->drop(cl->qdisc); if (len > 0) { + sch->qstats.backlog -= len; sch->q.qlen--; if (cl->qdisc->q.qlen == 0) list_del(&cl->alist); @@ -463,6 +466,7 @@ static void drr_reset_qdisc(struct Qdisc *sch) qdisc_reset(cl->qdisc); } } + sch->qstats.backlog = 0; sch->q.qlen = 0; } -- cgit v1.1 From d7f4f332f082c4d4ba53582f902ed6b44fd6f45e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:18 -0700 Subject: sch_red: update backlog as well Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_red.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 8c0508c..91578bd 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -97,6 +97,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, child); if (likely(ret == NET_XMIT_SUCCESS)) { + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; @@ -118,6 +119,7 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch) skb = child->dequeue(child); if (skb) { qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } else { if (!red_is_idling(&q->vars)) @@ -143,6 +145,7 @@ static unsigned int red_drop(struct Qdisc *sch) if (child->ops->drop && (len = child->ops->drop(child)) > 0) { q->stats.other++; qdisc_qstats_drop(sch); + sch->qstats.backlog -= len; sch->q.qlen--; return len; } @@ -158,6 +161,7 @@ static void red_reset(struct Qdisc *sch) struct red_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); + sch->qstats.backlog = 0; sch->q.qlen = 0; red_restart(&q->vars); } -- cgit v1.1 From 8d5958f424b62060a8696b12c17dad198d5d386f Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:19 -0700 Subject: sch_tbf: update backlog as well Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 83b90b5..3161e49 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -207,6 +207,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -217,6 +218,7 @@ static unsigned int tbf_drop(struct Qdisc *sch) unsigned int len = 0; if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) { + sch->qstats.backlog -= len; sch->q.qlen--; qdisc_qstats_drop(sch); } @@ -263,6 +265,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) q->t_c = now; q->tokens = toks; q->ptokens = ptoks; + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); @@ -294,6 +297,7 @@ static void tbf_reset(struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); + sch->qstats.backlog = 0; sch->q.qlen = 0; q->t_c = ktime_get_ns(); q->tokens = q->buffer; -- cgit v1.1 From 9c8b0778e48e4bbdb77121c6c1b7dd48e5182e67 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Thu, 2 Jun 2016 17:36:28 +0800 Subject: gianfar: fix the last transmit buffer descriptor When the transmit hardware timestamping is enabled, an additional TxBD would be added and would be set as the last TxBD with TXBD_LAST and TXBD_INTERRUPT. However this has been broken by a patch recently. This made the software couldn't get transmit hardware timestamps and resulted in call trace. So, this patch is to fix this issue. Fixes: 48963b4492e9 ("gianfar: Remove redundant ops for do_tstamp from xmit()") Signed-off-by: Yangbo Lu Reviewed-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 7615e06..2e6785b 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2440,7 +2440,8 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_queue->tx_ring_size); if (likely(!nr_frags)) { - lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT); + if (likely(!do_tstamp)) + lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT); } else { u32 lstatus_start = lstatus; -- cgit v1.1 From 8478b6cdc10e8a7735deeb9d9e46ad5b157c84d0 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Thu, 2 Jun 2016 16:14:52 +0300 Subject: net: ethernet: ti: cpsw: fix rx-usecs interrupt pacing consistency The rx-usecs shouldn't be changed while interface down/up. Currently, for instance, if it's set to 100us, after interface down/up it's 500us. It's a hidden bug that can lead to lavish interrupt pacing time increasing while "down/up" up to max value. Steps to reproduce: - set rx-usecs to be 100us - down/up interface - read new unexpected rx-usecs Signed-off-by: Ivan Khoronzhuk Reviewed-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 4b08a2f..e6bb0ec 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1339,7 +1339,7 @@ static int cpsw_ndo_open(struct net_device *ndev) if (priv->coal_intvl != 0) { struct ethtool_coalesce coal; - coal.rx_coalesce_usecs = (priv->coal_intvl << 4); + coal.rx_coalesce_usecs = priv->coal_intvl; cpsw_set_coalesce(ndev, &coal); } -- cgit v1.1 From 207bdf1844286d2ba94523d30528f82aaf33d52b Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 3 Jun 2016 10:17:06 +0200 Subject: net-next: mediatek: use mdiobus_free() in favour of kfree() The driver currently uses kfree() to clear the mii_bus. This is not the correct way to clear the memory and mdiobus_free() should be used instead. This patch fixes the two instances where this happens in the driver. Reviewed-by: Andrew Lunn Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index c984462..28f169f 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -280,7 +280,7 @@ static int mtk_mdio_init(struct mtk_eth *eth) return 0; err_free_bus: - kfree(eth->mii_bus); + mdiobus_free(eth->mii_bus); err_put_node: of_node_put(mii_np); @@ -295,7 +295,7 @@ static void mtk_mdio_cleanup(struct mtk_eth *eth) mdiobus_unregister(eth->mii_bus); of_node_put(eth->mii_bus->dev.of_node); - kfree(eth->mii_bus); + mdiobus_free(eth->mii_bus); } static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask) -- cgit v1.1 From 08ef55c6f257acf3bdc6940813f80e8f0f5d90ec Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 3 Jun 2016 10:17:07 +0200 Subject: net-next: mediatek: fix gigabit and flow control advertisement The current code will not setup the PHYs advertisement features correctly. Fix this and properly advertise Gigabit features and properly handle asymmetric pause frames. Signed-off-by: Sean Wang Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 30 +++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 28f169f..b0652f6 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -133,6 +133,8 @@ static int mtk_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg) static void mtk_phy_link_adjust(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); + u16 lcl_adv = 0, rmt_adv = 0; + u8 flowctrl; u32 mcr = MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE | MAC_MCR_TX_EN | MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN | @@ -150,11 +152,30 @@ static void mtk_phy_link_adjust(struct net_device *dev) if (mac->phy_dev->link) mcr |= MAC_MCR_FORCE_LINK; - if (mac->phy_dev->duplex) + if (mac->phy_dev->duplex) { mcr |= MAC_MCR_FORCE_DPX; - if (mac->phy_dev->pause) - mcr |= MAC_MCR_FORCE_RX_FC | MAC_MCR_FORCE_TX_FC; + if (mac->phy_dev->pause) + rmt_adv = LPA_PAUSE_CAP; + if (mac->phy_dev->asym_pause) + rmt_adv |= LPA_PAUSE_ASYM; + + if (mac->phy_dev->advertising & ADVERTISED_Pause) + lcl_adv |= ADVERTISE_PAUSE_CAP; + if (mac->phy_dev->advertising & ADVERTISED_Asym_Pause) + lcl_adv |= ADVERTISE_PAUSE_ASYM; + + flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); + + if (flowctrl & FLOW_CTRL_TX) + mcr |= MAC_MCR_FORCE_TX_FC; + if (flowctrl & FLOW_CTRL_RX) + mcr |= MAC_MCR_FORCE_RX_FC; + + netif_dbg(mac->hw, link, dev, "rx pause %s, tx pause %s\n", + flowctrl & FLOW_CTRL_RX ? "enabled" : "disabled", + flowctrl & FLOW_CTRL_TX ? "enabled" : "disabled"); + } mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); @@ -236,7 +257,8 @@ static int mtk_phy_connect(struct mtk_mac *mac) mac->phy_dev->autoneg = AUTONEG_ENABLE; mac->phy_dev->speed = 0; mac->phy_dev->duplex = 0; - mac->phy_dev->supported &= PHY_BASIC_FEATURES; + mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause | + SUPPORTED_Asym_Pause; mac->phy_dev->advertising = mac->phy_dev->supported | ADVERTISED_Autoneg; phy_start_aneg(mac->phy_dev); -- cgit v1.1 From 0c72c50f6f93b0c3daa9ea35d89ab3a933c7b5a0 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 3 Jun 2016 10:17:08 +0200 Subject: net-next: mediatek: add fixed-phy support The MT7623 SoC has a builtin gigabit switch. If we want to use it, GMAC1 needs to be configured using a fixed link speed and flow control settings. The easiest way to do this is to used the fixed-phy driver, allowing us to reuse the existing mdio polling code to setup the MAC. Reviewed-by: Andrew Lunn Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index b0652f6..231d284 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -229,6 +229,9 @@ static int mtk_phy_connect(struct mtk_mac *mac) u32 val, ge_mode; np = of_parse_phandle(mac->of_node, "phy-handle", 0); + if (!np && of_phy_is_fixed_link(mac->of_node)) + if (!of_phy_register_fixed_link(mac->of_node)) + np = of_node_get(mac->of_node); if (!np) return -ENODEV; -- cgit v1.1 From 37920fce0fc10264410eb880d411968b7934b61d Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 3 Jun 2016 10:17:09 +0200 Subject: net-next: mediatek: properly handle RGMII modes If an external Gigabit PHY is connected to either of the MACs we need to be able to tell the PHY to use a delay. Not doing so will result in heavy packet loss and/or data corruption when using PHYs such as the IC+ IP1001. We tell the PHY which MII delay mode to use via the devictree. The ethernet driver needs to be adapted to handle all 3 rgmii-*id modes in the same way as normal rgmii when setting up the MAC. Reviewed-by: Andrew Lunn Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 231d284..4763252 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -236,6 +236,9 @@ static int mtk_phy_connect(struct mtk_mac *mac) return -ENODEV; switch (of_get_phy_mode(np)) { + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII: ge_mode = 0; break; -- cgit v1.1 From 182fd9eecb287e696c82b30d06c6150d80a49c5b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 22 May 2016 11:06:10 +0200 Subject: MAINTAINERS: Add file patterns for wireless device tree bindings Submitters of device tree binding documentation may forget to CC the subsystem maintainer if this is missing. Signed-off-by: Geert Uytterhoeven Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Signed-off-by: Kalle Valo --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d39715..7e47346 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7851,6 +7851,7 @@ Q: http://patchwork.kernel.org/project/linux-wireless/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next.git S: Maintained +F: Documentation/devicetree/bindings/net/wireless/ F: drivers/net/wireless/ NETXEN (1/10) GbE SUPPORT -- cgit v1.1 From fac7d1917dfddfa53e98524f0abfbe60252740fe Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 4 Jun 2016 11:41:49 -0400 Subject: fix EOPENSTALE bug in do_last() EOPENSTALE occuring at the last component of a trailing symlink ends up with do_last() retrying its lookup. After the symlink body has been discarded. The thing is, all this retry_lookup logics in there is not needed at all - the upper layers will do the right thing if we simply return that -EOPENSTALE as we would with any other error. Trying to microoptimize in do_last() is a lot of headache for no good reason. Cc: stable@vger.kernel.org # v4.2+ Tested-by: Oleg Drokin Reviewed-and-Tested-by: Jeff Layton Signed-off-by: Al Viro --- fs/namei.c | 43 ++++--------------------------------------- 1 file changed, 4 insertions(+), 39 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 4c4f95a..3d9511e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3166,9 +3166,7 @@ static int do_last(struct nameidata *nd, int acc_mode = op->acc_mode; unsigned seq; struct inode *inode; - struct path save_parent = { .dentry = NULL, .mnt = NULL }; struct path path; - bool retried = false; int error; nd->flags &= ~LOOKUP_PARENT; @@ -3211,7 +3209,6 @@ static int do_last(struct nameidata *nd, return -EISDIR; } -retry_lookup: if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { error = mnt_want_write(nd->path.mnt); if (!error) @@ -3292,23 +3289,14 @@ finish_lookup: if (unlikely(error)) return error; - if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) { - path_to_nameidata(&path, nd); - } else { - save_parent.dentry = nd->path.dentry; - save_parent.mnt = mntget(path.mnt); - nd->path.dentry = path.dentry; - - } + path_to_nameidata(&path, nd); nd->inode = inode; nd->seq = seq; /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ finish_open: error = complete_walk(nd); - if (error) { - path_put(&save_parent); + if (error) return error; - } audit_inode(nd->name, nd->path.dentry, 0); error = -EISDIR; if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) @@ -3331,13 +3319,9 @@ finish_open_created: goto out; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ error = vfs_open(&nd->path, file, current_cred()); - if (!error) { - *opened |= FILE_OPENED; - } else { - if (error == -EOPENSTALE) - goto stale_open; + if (error) goto out; - } + *opened |= FILE_OPENED; opened: error = open_check_o_direct(file); if (!error) @@ -3353,26 +3337,7 @@ out: } if (got_write) mnt_drop_write(nd->path.mnt); - path_put(&save_parent); return error; - -stale_open: - /* If no saved parent or already retried then can't retry */ - if (!save_parent.dentry || retried) - goto out; - - BUG_ON(save_parent.dentry != dir); - path_put(&nd->path); - nd->path = save_parent; - nd->inode = dir->d_inode; - save_parent.mnt = NULL; - save_parent.dentry = NULL; - if (got_write) { - mnt_drop_write(nd->path.mnt); - got_write = false; - } - retried = true; - goto retry_lookup; } static int do_tmpfile(struct nameidata *nd, unsigned flags, -- cgit v1.1 From 69a834c28fb514403eb91a4f0120da214a52e056 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 24 May 2016 18:29:38 -0400 Subject: drm/msm: deal with exhausted vmap space better Some, but not all, callers of obj->vmap() would check if return IS_ERR(). So let's actually return an error if vmap() fails. And fixup the call-sites that were not handling this properly. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +- drivers/gpu/drm/msm/msm_fbdev.c | 4 ++++ drivers/gpu/drm/msm/msm_gem.c | 2 ++ drivers/gpu/drm/msm/msm_rd.c | 3 +++ drivers/gpu/drm/msm/msm_ringbuffer.c | 4 ++++ 5 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index fbe304e..2aec27d 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -408,7 +408,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, } adreno_gpu->memptrs = msm_gem_vaddr(adreno_gpu->memptrs_bo); - if (!adreno_gpu->memptrs) { + if (IS_ERR(adreno_gpu->memptrs)) { dev_err(drm->dev, "could not vmap memptrs\n"); return -ENOMEM; } diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index d9759bf..c6cf837 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -159,6 +159,10 @@ static int msm_fbdev_create(struct drm_fb_helper *helper, dev->mode_config.fb_base = paddr; fbi->screen_base = msm_gem_vaddr_locked(fbdev->bo); + if (IS_ERR(fbi->screen_base)) { + ret = PTR_ERR(fbi->screen_base); + goto fail_unlock; + } fbi->screen_size = fbdev->bo->size; fbi->fix.smem_start = paddr; fbi->fix.smem_len = fbdev->bo->size; diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 7daf4054..69836f56 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -398,6 +398,8 @@ void *msm_gem_vaddr_locked(struct drm_gem_object *obj) return ERR_CAST(pages); msm_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); + if (msm_obj->vaddr == NULL) + return ERR_PTR(-ENOMEM); } return msm_obj->vaddr; } diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index b48f73a..0857710 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -312,6 +312,9 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit) struct msm_gem_object *obj = submit->bos[idx].obj; const char *buf = msm_gem_vaddr_locked(&obj->base); + if (IS_ERR(buf)) + continue; + buf += iova - submit->bos[idx].iova; rd_write_section(rd, RD_GPUADDR, diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 1f14b90..42f5359 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -40,6 +40,10 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) } ring->start = msm_gem_vaddr_locked(ring->bo); + if (IS_ERR(ring->start)) { + ret = PTR_ERR(ring->start); + goto fail; + } ring->end = ring->start + (size / 4); ring->cur = ring->start; -- cgit v1.1 From ba344afd667a6f1c6bf7c53e08fc16bd84fa4df7 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 24 May 2016 18:43:26 -0400 Subject: drm/msm: fix some crashes in submit fail path If submit fails, before fence is created or before submit is added to submit-list, then unitialized fields cause problems in the clean-up path. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index b89ca51..635eff1 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -40,12 +40,14 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, submit->dev = dev; submit->gpu = gpu; + submit->fence = NULL; submit->pid = get_pid(task_pid(current)); /* initially, until copy_from_user() and bo lookup succeeds: */ submit->nr_bos = 0; submit->nr_cmds = 0; + INIT_LIST_HEAD(&submit->node); INIT_LIST_HEAD(&submit->bo_list); ww_acquire_init(&submit->ticket, &reservation_ww_class); -- cgit v1.1 From a9e26cab40ecfd4a0d718f22fa30db4dd1edbf60 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 1 Jun 2016 14:02:51 -0400 Subject: drm/msm: fix potential submit error path issue Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 635eff1..eb4bb8b 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -77,6 +77,11 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, void __user *userptr = u64_to_user_ptr(args->bos + (i * sizeof(submit_bo))); + /* make sure we don't have garbage flags, in case we hit + * error path before flags is initialized: + */ + submit->bos[i].flags = 0; + ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo)); if (ret) { ret = -EFAULT; -- cgit v1.1 From e6ec03a25f12b312b7e0c037fe4a6471c4ee5665 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 5 Jun 2016 00:23:09 -0400 Subject: autofs braino fix for do_last() It's an analogue of commit 7500c38a (fix the braino in "namei: massage lookup_slow() to be usable by lookup_one_len_unlocked()"). The same problem (->lookup()-returned unhashed negative dentry just might be an autofs one with ->d_manage() that would wait until the daemon makes it positive) applies in do_last() - we need to do follow_managed() first. Fortunately, remaining callers of follow_managed() are OK - only autofs has that weirdness (negative dentry that does not mean an instant -ENOENT)) and autofs never has its negative dentries hashed, so we can't pick one from a dcache lookup. ->d_manage() is a bloody mess ;-/ Cc: stable@vger.kernel.org # v4.6 Spotted-by: Ian Kent Signed-off-by: Al Viro --- fs/namei.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 3d9511e..d7c0cac 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3260,6 +3260,10 @@ static int do_last(struct nameidata *nd, got_write = false; } + error = follow_managed(&path, nd); + if (unlikely(error < 0)) + return error; + if (unlikely(d_is_negative(path.dentry))) { path_to_nameidata(&path, nd); return -ENOENT; @@ -3275,10 +3279,6 @@ static int do_last(struct nameidata *nd, return -EEXIST; } - error = follow_managed(&path, nd); - if (unlikely(error < 0)) - return error; - seq = 0; /* out of RCU mode, so the value doesn't matter */ inode = d_backing_inode(path.dentry); finish_lookup: -- cgit v1.1 From 3ec10d3a2ba591c87da94219c1e46b02ae97757a Mon Sep 17 00:00:00 2001 From: Marco Angaroni Date: Mon, 16 May 2016 19:18:09 +0200 Subject: ipvs: update real-server binding of outgoing connections in SIP-pe Previous patch that introduced handling of outgoing packets in SIP persistent-engine did not call ip_vs_check_template() in case packet was matching a connection template. Assumption was that real-server was healthy, since it was sending a packet just in that moment. There are however real-server fault conditions requiring that association between call-id and real-server (represented by connection template) gets updated. Here is an example of the sequence of events: 1) RS1 is a back2back user agent that handled call-id1 and call-id2 2) RS1 is down and was marked as unavailable 3) new message from outside comes to IPVS with call-id1 4) IPVS reschedules the message to RS2, which becomes new call handler 5) RS2 forwards the message outside, translating call-id1 to call-id2 6) inside pe->conn_out() IPVS matches call-id2 with existing template 7) IPVS does not change association call-id2 <-> RS1 8) new message comes from client with call-id2 9) IPVS reschedules the message to a real-server potentially different from RS2, which is now the correct destination This patch introduces ip_vs_check_template() call in the handling of outgoing packets for SIP-pe. And also introduces a second optional argument for ip_vs_check_template() that allows to check if dest associated to a connection template is the same dest that was identified as the source of the packet. This is to change the real-server bound to a particular call-id independently from its availability status: the idea is that it's more reliable, for in->out direction (where internal network can be considered trusted), to always associate a call-id with the last real-server that used it in one of its messages. Think about above sequence of events where, just after step 5, RS1 returns instead to be available. Comparison of dests is done by simply comparing pointers to struct ip_vs_dest; there should be no cases where struct ip_vs_dest keeps its memory address, but represent a different real-server in terms of ip-address / port. Fixes: 39b972231536 ("ipvs: handle connections started by real-servers") Signed-off-by: Marco Angaroni Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_conn.c | 5 +++-- net/netfilter/ipvs/ip_vs_core.c | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index af4c10e..cd6018a 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1232,7 +1232,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp); const char *ip_vs_state_name(__u16 proto, int state); void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp); -int ip_vs_check_template(struct ip_vs_conn *ct); +int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest); void ip_vs_random_dropentry(struct netns_ipvs *ipvs); int ip_vs_conn_init(void); void ip_vs_conn_cleanup(void); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 2cb3c62..096a451 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -762,7 +762,7 @@ static int expire_quiescent_template(struct netns_ipvs *ipvs, * If available, return 1, otherwise invalidate this connection * template and return 0. */ -int ip_vs_check_template(struct ip_vs_conn *ct) +int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest) { struct ip_vs_dest *dest = ct->dest; struct netns_ipvs *ipvs = ct->ipvs; @@ -772,7 +772,8 @@ int ip_vs_check_template(struct ip_vs_conn *ct) */ if ((dest == NULL) || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || - expire_quiescent_template(ipvs, dest)) { + expire_quiescent_template(ipvs, dest) || + (cdest && (dest != cdest))) { IP_VS_DBG_BUF(9, "check_template: dest not available for " "protocol %s s:%s:%d v:%s:%d " "-> d:%s:%d\n", diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 1207f20..2c1b498 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -321,7 +321,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* Check if a template already exists */ ct = ip_vs_ct_in_get(¶m); - if (!ct || !ip_vs_check_template(ct)) { + if (!ct || !ip_vs_check_template(ct, NULL)) { struct ip_vs_scheduler *sched; /* @@ -1154,7 +1154,8 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, vport, ¶m) < 0) return NULL; ct = ip_vs_ct_in_get(¶m); - if (!ct) { + /* check if template exists and points to the same dest */ + if (!ct || !ip_vs_check_template(ct, dest)) { ct = ip_vs_conn_new(¶m, dest->af, daddr, dport, IP_VS_CONN_F_TEMPLATE, dest, 0); if (!ct) { -- cgit v1.1 From a02cc9d3cc9f98905df214d4a57e5918473260ea Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 3 Jun 2016 15:32:18 +0200 Subject: bnx2x: allow adding VLANs while interface is down Since implementing VLAN filtering in commit 05cc5a39ddb74 ("bnx2x: add vlan filtering offload") bnx2x refuses to add a VLAN while the interface is down: # ip link add link enp3s0f0 enp3s0f0_10 type vlan id 10 RTNETLINK answers: Bad address and in dmesg (with bnx2x.debug=0x20): bnx2x: [bnx2x_vlan_rx_add_vid:12941(enp3s0f0)]Ignoring VLAN configuration the interface is down Other drivers have no problem with this. Fix this peculiar behavior in the following way: - Accept requests to add/kill VID regardless of the device state. Maintain the requested list of VIDs in the bp->vlan_reg list. - If the device is up, try to configure the VID list into the hardware. If we run out of VLAN credits or encounter a failure configuring an entry, fall back to accepting all VLANs. If we successfully configure all entries from the list, turn the fallback off. - Use the same code for reconfiguring VLANs during NIC load. Signed-off-by: Michal Schmidt Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 151 ++++++++++------------- 1 file changed, 62 insertions(+), 89 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index c5fe9158..a59d55e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12895,52 +12895,71 @@ static int __bnx2x_vlan_configure_vid(struct bnx2x *bp, u16 vid, bool add) return rc; } -int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp) +static int bnx2x_vlan_configure_vid_list(struct bnx2x *bp) { struct bnx2x_vlan_entry *vlan; int rc = 0; - if (!bp->vlan_cnt) { - DP(NETIF_MSG_IFUP, "No need to re-configure vlan filters\n"); - return 0; - } - + /* Configure all non-configured entries */ list_for_each_entry(vlan, &bp->vlan_reg, link) { - /* Prepare for cleanup in case of errors */ - if (rc) { - vlan->hw = false; - continue; - } - - if (!vlan->hw) + if (vlan->hw) continue; - DP(NETIF_MSG_IFUP, "Re-configuring vlan 0x%04x\n", vlan->vid); + if (bp->vlan_cnt >= bp->vlan_credit) + return -ENOBUFS; rc = __bnx2x_vlan_configure_vid(bp, vlan->vid, true); if (rc) { - BNX2X_ERR("Unable to configure VLAN %d\n", vlan->vid); - vlan->hw = false; - rc = -EINVAL; - continue; + BNX2X_ERR("Unable to config VLAN %d\n", vlan->vid); + return rc; } + + DP(NETIF_MSG_IFUP, "HW configured for VLAN %d\n", vlan->vid); + vlan->hw = true; + bp->vlan_cnt++; } - return rc; + return 0; +} + +static void bnx2x_vlan_configure(struct bnx2x *bp, bool set_rx_mode) +{ + bool need_accept_any_vlan; + + need_accept_any_vlan = !!bnx2x_vlan_configure_vid_list(bp); + + if (bp->accept_any_vlan != need_accept_any_vlan) { + bp->accept_any_vlan = need_accept_any_vlan; + DP(NETIF_MSG_IFUP, "Accept all VLAN %s\n", + bp->accept_any_vlan ? "raised" : "cleared"); + if (set_rx_mode) { + if (IS_PF(bp)) + bnx2x_set_rx_mode_inner(bp); + else + bnx2x_vfpf_storm_rx_mode(bp); + } + } +} + +int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp) +{ + struct bnx2x_vlan_entry *vlan; + + /* The hw forgot all entries after reload */ + list_for_each_entry(vlan, &bp->vlan_reg, link) + vlan->hw = false; + bp->vlan_cnt = 0; + + /* Don't set rx mode here. Our caller will do it. */ + bnx2x_vlan_configure(bp, false); + + return 0; } static int bnx2x_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct bnx2x *bp = netdev_priv(dev); struct bnx2x_vlan_entry *vlan; - bool hw = false; - int rc = 0; - - if (!netif_running(bp->dev)) { - DP(NETIF_MSG_IFUP, - "Ignoring VLAN configuration the interface is down\n"); - return -EFAULT; - } DP(NETIF_MSG_IFUP, "Adding VLAN %d\n", vid); @@ -12948,93 +12967,47 @@ static int bnx2x_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) if (!vlan) return -ENOMEM; - bp->vlan_cnt++; - if (bp->vlan_cnt > bp->vlan_credit && !bp->accept_any_vlan) { - DP(NETIF_MSG_IFUP, "Accept all VLAN raised\n"); - bp->accept_any_vlan = true; - if (IS_PF(bp)) - bnx2x_set_rx_mode_inner(bp); - else - bnx2x_vfpf_storm_rx_mode(bp); - } else if (bp->vlan_cnt <= bp->vlan_credit) { - rc = __bnx2x_vlan_configure_vid(bp, vid, true); - hw = true; - } - vlan->vid = vid; - vlan->hw = hw; + vlan->hw = false; + list_add_tail(&vlan->link, &bp->vlan_reg); - if (!rc) { - list_add(&vlan->link, &bp->vlan_reg); - } else { - bp->vlan_cnt--; - kfree(vlan); - } - - DP(NETIF_MSG_IFUP, "Adding VLAN result %d\n", rc); + if (netif_running(dev)) + bnx2x_vlan_configure(bp, true); - return rc; + return 0; } static int bnx2x_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct bnx2x *bp = netdev_priv(dev); struct bnx2x_vlan_entry *vlan; + bool found = false; int rc = 0; - if (!netif_running(bp->dev)) { - DP(NETIF_MSG_IFUP, - "Ignoring VLAN configuration the interface is down\n"); - return -EFAULT; - } - DP(NETIF_MSG_IFUP, "Removing VLAN %d\n", vid); - if (!bp->vlan_cnt) { - BNX2X_ERR("Unable to kill VLAN %d\n", vid); - return -EINVAL; - } - list_for_each_entry(vlan, &bp->vlan_reg, link) - if (vlan->vid == vid) + if (vlan->vid == vid) { + found = true; break; + } - if (vlan->vid != vid) { + if (!found) { BNX2X_ERR("Unable to kill VLAN %d - not found\n", vid); return -EINVAL; } - if (vlan->hw) + if (netif_running(dev) && vlan->hw) { rc = __bnx2x_vlan_configure_vid(bp, vid, false); + DP(NETIF_MSG_IFUP, "HW deconfigured for VLAN %d\n", vid); + bp->vlan_cnt--; + } list_del(&vlan->link); kfree(vlan); - bp->vlan_cnt--; - - if (bp->vlan_cnt <= bp->vlan_credit && bp->accept_any_vlan) { - /* Configure all non-configured entries */ - list_for_each_entry(vlan, &bp->vlan_reg, link) { - if (vlan->hw) - continue; - - rc = __bnx2x_vlan_configure_vid(bp, vlan->vid, true); - if (rc) { - BNX2X_ERR("Unable to config VLAN %d\n", - vlan->vid); - continue; - } - DP(NETIF_MSG_IFUP, "HW configured for VLAN %d\n", - vlan->vid); - vlan->hw = true; - } - DP(NETIF_MSG_IFUP, "Accept all VLAN Removed\n"); - bp->accept_any_vlan = false; - if (IS_PF(bp)) - bnx2x_set_rx_mode_inner(bp); - else - bnx2x_vfpf_storm_rx_mode(bp); - } + if (netif_running(dev)) + bnx2x_vlan_configure(bp, true); DP(NETIF_MSG_IFUP, "Removing VLAN result %d\n", rc); -- cgit v1.1 From b10c22e5f9902a329450c2027e9291b71e9f1602 Mon Sep 17 00:00:00 2001 From: Herve Jourdain Date: Wed, 1 Jun 2016 02:24:46 +0800 Subject: drm/vc4: Fix ioctl permissions for render nodes. Contrary to other flags to DRM_IOCTL_DEF_DRV(), which restrict usage, the flag for render node is an enabler (the IOCTL can't be used from render node if it's not present). So DRM_RENDER_ALLOW needs to be added to all the flags that were previously 0. Signed-off-by: Herve Jourdain Reviewed-by: Eric Anholt Fixes: 0cd3e2747662 ("drm/vc4: Add missing render node support") --- drivers/gpu/drm/vc4/vc4_drv.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index ef7de8e..250ed7e 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -66,12 +66,12 @@ static const struct file_operations vc4_drm_fops = { }; static const struct drm_ioctl_desc vc4_drm_ioctls[] = { - DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0), - DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0), - DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0), - DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0), - DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0), - DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY), }; -- cgit v1.1 From 56d1fe0979dc9b73c1c12ee07722ac380d42a0c4 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Wed, 18 May 2016 14:02:46 +0200 Subject: drm/vc4: Make pageflip completion handling more robust. Protect both the setup of the pageflip event and the latching of the new requested displaylist head pointer by the event lock, so we can't get into a situation where vc4_atomic_flush latches the new display list via HVS_WRITE, then immediately gets preempted before queueing the pageflip event, then the page-flip completes in hw and the vc4_crtc_handle_page_flip() runs and no-ops due to lack of a pending pageflip event, then vc4_atomic_flush continues and only then queues the pageflip event - after the page flip handling already no-oped. This would cause flip completion handling only at the next vblank - one frame too late. In vc4_crtc_handle_page_flip() check the actual DL head pointer in SCALER_DISPLACTX against the requested pointer for page flip to make sure that the flip actually really completed in the current vblank and doesn't get deferred to the next one because the DL head pointer was written a bit too late into SCALER_DISPLISTX, after start of vblank, and missed the boat. This avoids handling a pageflip completion too early - one frame too early. According to Eric, DL head pointer updates which were written into the HVS DISPLISTX reg get committed to hardware at the last pixel of active scanout. Our vblank interrupt handler, as triggered by PV_INT_VFP_START irq, gets to run earliest at the first pixel of HBLANK at the end of the last scanline of active scanout, ie. vblank irq handling runs at least 1 pixel duration after a potential pageflip completion happened in hardware. This ordering of events in the hardware, together with the lock protection and SCALER_DISPLACTX sampling of this patch, guarantees that pageflip completion handling only runs at exactly the vblank irq of actual pageflip completion in all cases. Background info from Eric about the relative timing of HVS, PV's and trigger points for interrupts, DL updates: https://lists.freedesktop.org/archives/dri-devel/2016-May/107510.html Tested on RPi 2B with hardware timing measurement equipment and shown to no longer complete flips too early or too late. Signed-off-by: Mario Kleiner Reviewed-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_crtc.c | 28 ++++++++++++++++++---------- drivers/gpu/drm/vc4/vc4_regs.h | 4 ++++ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index e9befb6..0f18b76 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -456,14 +456,6 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size); - HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), - vc4_state->mm.start); - - if (debug_dump_regs) { - DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc)); - vc4_hvs_dump_state(dev); - } - if (crtc->state->event) { unsigned long flags; @@ -473,8 +465,20 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, spin_lock_irqsave(&dev->event_lock, flags); vc4_crtc->event = crtc->state->event; - spin_unlock_irqrestore(&dev->event_lock, flags); crtc->state->event = NULL; + + HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), + vc4_state->mm.start); + + spin_unlock_irqrestore(&dev->event_lock, flags); + } else { + HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), + vc4_state->mm.start); + } + + if (debug_dump_regs) { + DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc)); + vc4_hvs_dump_state(dev); } } @@ -500,10 +504,14 @@ static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc) { struct drm_crtc *crtc = &vc4_crtc->base; struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state); + u32 chan = vc4_crtc->channel; unsigned long flags; spin_lock_irqsave(&dev->event_lock, flags); - if (vc4_crtc->event) { + if (vc4_crtc->event && + (vc4_state->mm.start == HVS_READ(SCALER_DISPLACTX(chan)))) { drm_crtc_send_vblank_event(crtc, vc4_crtc->event); vc4_crtc->event = NULL; drm_crtc_vblank_put(crtc); diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 6163b95..f99eece 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -341,6 +341,10 @@ #define SCALER_DISPLACT0 0x00000030 #define SCALER_DISPLACT1 0x00000034 #define SCALER_DISPLACT2 0x00000038 +#define SCALER_DISPLACTX(x) (SCALER_DISPLACT0 + \ + (x) * (SCALER_DISPLACT1 - \ + SCALER_DISPLACT0)) + #define SCALER_DISPCTRL0 0x00000040 # define SCALER_DISPCTRLX_ENABLE BIT(31) # define SCALER_DISPCTRLX_RESET BIT(30) -- cgit v1.1 From a8953c52b95167b5d21a66f0859751570271d834 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 3 Jun 2016 14:37:40 +1000 Subject: drm/nouveau/disp/sor/gf119: both links use the same training register It appears that, for whatever reason, both link A and B use the same register to control the training pattern. It's a little odd, as the GPUs before this (Tesla/Fermi1) have per-link registers, as do newer GPUs (Maxwell). Fixes the third DP output on NVS 510 (GK107). Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c index b4b41b1..5111560 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c @@ -40,8 +40,7 @@ static int gf119_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern) { struct nvkm_device *device = outp->base.disp->engine.subdev.device; - const u32 loff = gf119_sor_loff(outp); - nvkm_mask(device, 0x61c110 + loff, 0x0f0f0f0f, 0x01010101 * pattern); + nvkm_mask(device, 0x61c110, 0x0f0f0f0f, 0x01010101 * pattern); return 0; } -- cgit v1.1 From 4691409b3e2250ed66aa8dcefa23fe765daf7add Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 3 Jun 2016 15:05:52 +1000 Subject: drm/nouveau/disp/sor/gm107: training pattern registers are like gm200 Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild | 1 + drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c | 2 +- drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h | 9 +++- .../gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c | 53 ++++++++++++++++++++++ .../gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c | 15 +----- 6 files changed, 64 insertions(+), 18 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild index a74c5dd..e2a64ed 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild @@ -18,6 +18,7 @@ nvkm-y += nvkm/engine/disp/piornv50.o nvkm-y += nvkm/engine/disp/sornv50.o nvkm-y += nvkm/engine/disp/sorg94.o nvkm-y += nvkm/engine/disp/sorgf119.o +nvkm-y += nvkm/engine/disp/sorgm107.o nvkm-y += nvkm/engine/disp/sorgm200.o nvkm-y += nvkm/engine/disp/dport.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c index b694414..f4b9cf8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c @@ -36,7 +36,7 @@ gm107_disp = { .outp.internal.crt = nv50_dac_output_new, .outp.internal.tmds = nv50_sor_output_new, .outp.internal.lvds = nv50_sor_output_new, - .outp.internal.dp = gf119_sor_dp_new, + .outp.internal.dp = gm107_sor_dp_new, .dac.nr = 3, .dac.power = nv50_dac_power, .dac.sense = nv50_dac_sense, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h index e9067ba..4e983f6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h @@ -62,7 +62,12 @@ int g94_sor_dp_lnk_pwr(struct nvkm_output_dp *, int); int gf119_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *, struct nvkm_output **); int gf119_sor_dp_lnk_ctl(struct nvkm_output_dp *, int, int, bool); +int gf119_sor_dp_drv_ctl(struct nvkm_output_dp *, int, int, int, int); -int gm200_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *, - struct nvkm_output **); +int gm107_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *, + struct nvkm_output **); +int gm107_sor_dp_pattern(struct nvkm_output_dp *, int); + +int gm200_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *, + struct nvkm_output **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c index 5111560..22706c0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c @@ -63,7 +63,7 @@ gf119_sor_dp_lnk_ctl(struct nvkm_output_dp *outp, int nr, int bw, bool ef) return 0; } -static int +int gf119_sor_dp_drv_ctl(struct nvkm_output_dp *outp, int ln, int vs, int pe, int pc) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c new file mode 100644 index 0000000..37790b2 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c @@ -0,0 +1,53 @@ +/* + * Copyright 2016 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + */ +#include "nv50.h" +#include "outpdp.h" + +int +gm107_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern) +{ + struct nvkm_device *device = outp->base.disp->engine.subdev.device; + const u32 soff = outp->base.or * 0x800; + const u32 data = 0x01010101 * pattern; + if (outp->base.info.sorconf.link & 1) + nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, data); + else + nvkm_mask(device, 0x61c12c + soff, 0x0f0f0f0f, data); + return 0; +} + +static const struct nvkm_output_dp_func +gm107_sor_dp_func = { + .pattern = gm107_sor_dp_pattern, + .lnk_pwr = g94_sor_dp_lnk_pwr, + .lnk_ctl = gf119_sor_dp_lnk_ctl, + .drv_ctl = gf119_sor_dp_drv_ctl, +}; + +int +gm107_sor_dp_new(struct nvkm_disp *disp, int index, + struct dcb_output *dcbE, struct nvkm_output **poutp) +{ + return nvkm_output_dp_new_(&gm107_sor_dp_func, disp, index, dcbE, poutp); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c index 2cfbef9..c44fa7e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c @@ -57,19 +57,6 @@ gm200_sor_dp_lane_map(struct nvkm_device *device, u8 lane) } static int -gm200_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern) -{ - struct nvkm_device *device = outp->base.disp->engine.subdev.device; - const u32 soff = gm200_sor_soff(outp); - const u32 data = 0x01010101 * pattern; - if (outp->base.info.sorconf.link & 1) - nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, data); - else - nvkm_mask(device, 0x61c12c + soff, 0x0f0f0f0f, data); - return 0; -} - -static int gm200_sor_dp_lnk_pwr(struct nvkm_output_dp *outp, int nr) { struct nvkm_device *device = outp->base.disp->engine.subdev.device; @@ -129,7 +116,7 @@ gm200_sor_dp_drv_ctl(struct nvkm_output_dp *outp, static const struct nvkm_output_dp_func gm200_sor_dp_func = { - .pattern = gm200_sor_dp_pattern, + .pattern = gm107_sor_dp_pattern, .lnk_pwr = gm200_sor_dp_lnk_pwr, .lnk_ctl = gf119_sor_dp_lnk_ctl, .drv_ctl = gm200_sor_dp_drv_ctl, -- cgit v1.1 From fc100a7f89da85da8edd9c2e6f6e8b2490d74ae1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 3 Jun 2016 19:19:20 +0200 Subject: soreuseport: Fix reuseport_bpf testcase on 32bit architectures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following compiler warnings when compiling the reuseport_bpf testcase on a 32 bit platform: reuseport_bpf.c: In function ‘attach_ebpf’: reuseport_bpf.c:114:15: warning: cast from pointer to integer of ifferent size [-Wpointer-to-int-cast] Signed-off-by: Helge Deller Signed-off-by: David S. Miller --- tools/testing/selftests/net/reuseport_bpf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index 96ba386..4a82174 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -111,9 +111,9 @@ static void attach_ebpf(int fd, uint16_t mod) memset(&attr, 0, sizeof(attr)); attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; attr.insn_cnt = ARRAY_SIZE(prog); - attr.insns = (uint64_t)prog; - attr.license = (uint64_t)bpf_license; - attr.log_buf = (uint64_t)bpf_log_buf; + attr.insns = (unsigned long) &prog; + attr.license = (unsigned long) &bpf_license; + attr.log_buf = (unsigned long) &bpf_log_buf; attr.log_size = sizeof(bpf_log_buf); attr.log_level = 1; attr.kern_version = 0; @@ -351,8 +351,8 @@ static void test_filter_no_reuseport(const struct test_params p) memset(&eprog, 0, sizeof(eprog)); eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; eprog.insn_cnt = ARRAY_SIZE(ecode); - eprog.insns = (uint64_t)ecode; - eprog.license = (uint64_t)bpf_license; + eprog.insns = (unsigned long) &ecode; + eprog.license = (unsigned long) &bpf_license; eprog.kern_version = 0; memset(&cprog, 0, sizeof(cprog)); -- cgit v1.1 From 1957598840f47d42bb0b7f8a871717a780708686 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 3 Jun 2016 23:49:17 +0200 Subject: soreuseport: add compat case for setsockopt SO_ATTACH_REUSEPORT_CBPF Commit 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF") missed to add the compat case for the SO_ATTACH_REUSEPORT_CBPF option. Signed-off-by: Helge Deller Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/compat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/compat.c b/net/compat.c index 5cfd26a..1373947 100644 --- a/net/compat.c +++ b/net/compat.c @@ -354,7 +354,8 @@ static int do_set_sock_timeout(struct socket *sock, int level, static int compat_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { - if (optname == SO_ATTACH_FILTER) + if (optname == SO_ATTACH_FILTER || + optname == SO_ATTACH_REUSEPORT_CBPF) return do_set_attach_filter(sock, level, optname, optval, optlen); if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO) -- cgit v1.1 From 943f44d94aa26bfdcaafc40d3701e24eeb58edce Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 25 Mar 2016 08:33:16 -0700 Subject: IB/cm: Fix a recently introduced locking bug ib_cm_notify() can be called from interrupt context. Hence do not reenable interrupts unconditionally in cm_establish(). This patch avoids that lockdep reports the following warning: WARNING: CPU: 0 PID: 23317 at kernel/locking/lockdep.c:2624 trace _hardirqs_on_caller+0x112/0x1b0 DEBUG_LOCKS_WARN_ON(current->hardirq_context) Call Trace: [] dump_stack+0x67/0x92 [] __warn+0xc1/0xe0 [] warn_slowpath_fmt+0x4a/0x50 [] trace_hardirqs_on_caller+0x112/0x1b0 [] trace_hardirqs_on+0xd/0x10 [] _raw_spin_unlock_irq+0x27/0x40 [] ib_cm_notify+0x25c/0x290 [ib_cm] [] srpt_qp_event+0xa1/0xf0 [ib_srpt] [] mlx4_ib_qp_event+0x67/0xd0 [mlx4_ib] [] mlx4_qp_event+0x5a/0xc0 [mlx4_core] [] mlx4_eq_int+0x3d8/0xcf0 [mlx4_core] [] mlx4_msi_x_interrupt+0xc/0x20 [mlx4_core] [] handle_irq_event_percpu+0x64/0x100 [] handle_irq_event+0x34/0x60 [] handle_edge_irq+0x6a/0x150 [] handle_irq+0x15/0x20 [] do_IRQ+0x5c/0x110 [] common_interrupt+0x89/0x89 [] blk_run_queue_async+0x37/0x40 [] rq_completed+0x43/0x70 [dm_mod] [] dm_softirq_done+0x176/0x280 [dm_mod] [] blk_done_softirq+0x52/0x90 [] __do_softirq+0x10f/0x230 [] irq_exit+0xa8/0xb0 [] smp_trace_call_function_single_interrupt+0x2e/0x30 [] smp_call_function_single_interrupt+0x9/0x10 [] call_function_single_interrupt+0x89/0x90 Fixes: commit be4b499323bf (IB/cm: Do not queue work to a device that's going away) Signed-off-by: Bart Van Assche Cc: Erez Shitrit Cc: Sean Hefty Cc: Nikolay Borisov Cc: stable # v4.2+ Acked-by: Erez Shitrit Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1d92e09..c995255 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3452,14 +3452,14 @@ static int cm_establish(struct ib_cm_id *cm_id) work->cm_event.event = IB_CM_USER_ESTABLISHED; /* Check if the device started its remove_one */ - spin_lock_irq(&cm.lock); + spin_lock_irqsave(&cm.lock, flags); if (!cm_dev->going_down) { queue_delayed_work(cm.wq, &work->work, 0); } else { kfree(work); ret = -ENODEV; } - spin_unlock_irq(&cm.lock); + spin_unlock_irqrestore(&cm.lock, flags); out: return ret; -- cgit v1.1 From a8b7da58ec81e74a3a6982e2dba24f899b56c915 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 28 May 2016 08:01:20 +0300 Subject: IB/hfi1: fix some indenting That extra tabs are misleading. Signed-off-by: Dan Carpenter Reviewed-by: Bart Van Assche Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 5cc492e..0d28a5a 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1337,7 +1337,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) dma_free_coherent(&dd->pcidev->dev, sizeof(u64), (void *)dd->rcvhdrtail_dummy_kvaddr, dd->rcvhdrtail_dummy_physaddr); - dd->rcvhdrtail_dummy_kvaddr = NULL; + dd->rcvhdrtail_dummy_kvaddr = NULL; } for (ctxt = 0; tmp && ctxt < dd->num_rcv_contexts; ctxt++) { -- cgit v1.1 From f242d93ae92032f78840471e5c2bfc2d04ae324c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 31 May 2016 10:54:36 +0300 Subject: IB/hfi1: Avoid large frame size warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_FRAME_WARN is set to 1024 bytes, which is useful to find stack consumers, we get a warning in hfi1 driver. drivers/infiniband/hw/hfi1/affinity.c: In function ‘hfi1_get_proc_affinity’: drivers/infiniband/hw/hfi1/affinity.c:415:1: warning: the frame size of 1056 bytes is larger than 1024 bytes [-Wframe-larger-than=] This change removes unneeded buf[1024] declaration and usage. Fixes: f48ad614c100 ("IB/hfi1: Move driver out of staging") Signed-off-by: Leon Romanovsky Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 6e7050a..14d7eeb 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -300,16 +300,15 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) const struct cpumask *node_mask, *proc_mask = tsk_cpus_allowed(current); struct cpu_mask_set *set = &dd->affinity->proc; - char buf[1024]; /* * check whether process/context affinity has already * been set */ if (cpumask_weight(proc_mask) == 1) { - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask)); - hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %s", - current->pid, current->comm, buf); + hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", + current->pid, current->comm, + cpumask_pr_args(proc_mask)); /* * Mark the pre-set CPU as used. This is atomic so we don't * need the lock @@ -318,9 +317,9 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) cpumask_set_cpu(cpu, &set->used); goto done; } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) { - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask)); - hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %s", - current->pid, current->comm, buf); + hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", + current->pid, current->comm, + cpumask_pr_args(proc_mask)); goto done; } @@ -356,8 +355,8 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ? &dd->affinity->rcv_intr.mask : &dd->affinity->rcv_intr.used)); - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(intrs)); - hfi1_cdbg(PROC, "CPUs used by interrupts: %s", buf); + hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl", + cpumask_pr_args(intrs)); /* * If we don't have a NUMA node requested, preference is towards @@ -366,18 +365,16 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) if (node == -1) node = dd->node; node_mask = cpumask_of_node(node); - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(node_mask)); - hfi1_cdbg(PROC, "device on NUMA %u, CPUs %s", node, buf); + hfi1_cdbg(PROC, "device on NUMA %u, CPUs %*pbl", node, + cpumask_pr_args(node_mask)); /* diff will hold all unused cpus */ cpumask_andnot(diff, &set->mask, &set->used); - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(diff)); - hfi1_cdbg(PROC, "unused CPUs (all) %s", buf); + hfi1_cdbg(PROC, "unused CPUs (all) %*pbl", cpumask_pr_args(diff)); /* get cpumask of available CPUs on preferred NUMA */ cpumask_and(mask, diff, node_mask); - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask)); - hfi1_cdbg(PROC, "available cpus on NUMA %s", buf); + hfi1_cdbg(PROC, "available cpus on NUMA %*pbl", cpumask_pr_args(mask)); /* * At first, we don't want to place processes on the same @@ -395,8 +392,8 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) cpumask_andnot(diff, &set->mask, &set->used); cpumask_andnot(mask, diff, node_mask); } - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask)); - hfi1_cdbg(PROC, "possible CPUs for process %s", buf); + hfi1_cdbg(PROC, "possible CPUs for process %*pbl", + cpumask_pr_args(mask)); cpu = cpumask_first(mask); if (cpu >= nr_cpu_ids) /* empty */ -- cgit v1.1 From da1f857be62ca0472024da37eab068b3b8ce0a15 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 31 May 2016 19:05:56 +0300 Subject: IB/core: fix an error code in ib_core_init() We should return the error code if ib_add_ibnl_clients() fails. The current code returns success. Fixes: 735c631ae99d ('IB/core: Register SA ibnl client during ib_core initialization') Signed-off-by: Dan Carpenter Reviewed-by: Mark Bloch Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 5516fb0..8b8a8d9 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1024,7 +1024,8 @@ static int __init ib_core_init(void) goto err_mad; } - if (ib_add_ibnl_clients()) { + ret = ib_add_ibnl_clients(); + if (ret) { pr_warn("Couldn't register ibnl clients\n"); goto err_sa; } -- cgit v1.1 From 0147ebcf8927f09e1923114092f6b14c1de75a95 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Jun 2016 19:06:36 +0100 Subject: IB/core: fix null pointer deref and mem leak in error handling The current error handling in setup_hw_stats has a couple of issues. It is possible to generate a null pointer deference on the kfree of hsag->attrs[i] because two of the early error exit paths jump to the kfree when hsags NULL and not allocated. Fix this by moving the kfree on stats and jumping to that, avoiding the hsag freeing. Secondly, there is a memory leak of stats if the hsag allocation fails; instead of returning, jump to the kfree on stats. Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 5e573bb..ed04a7b 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -899,14 +899,14 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, return; if (!stats->names || stats->num_counters <= 0) - goto err; + goto err_free_stats; hsag = kzalloc(sizeof(*hsag) + // 1 extra for the lifespan config entry sizeof(void *) * (stats->num_counters + 1), GFP_KERNEL); if (!hsag) - return; + goto err_free_stats; ret = device->get_hw_stats(device, stats, port_num, stats->num_counters); @@ -946,10 +946,11 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, return; err: - kfree(stats); for (; i >= 0; i--) kfree(hsag->attrs[i]); kfree(hsag); +err_free_stats: + kfree(stats); return; } -- cgit v1.1 From ce67fef68de552b14ae417511a323013b478f78e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 2 Jun 2016 11:45:01 +0200 Subject: IB/usnic: Remove unused DMA attributes The DMA attributes are set but never used. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Doug Ledford --- drivers/infiniband/hw/usnic/usnic_uiom.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 7209fbc..a0b6ebe 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -112,10 +111,6 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, int i; int flags; dma_addr_t pa; - DEFINE_DMA_ATTRS(attrs); - - if (dmasync) - dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); if (!can_do_mlock()) return -EPERM; -- cgit v1.1 From ca920f5b67f1b798a1f86c675ea441b295bf8464 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 07:58:32 -0700 Subject: IB/mlx4: Fix device managed flow steering support test Perform the test for device managed flow steering support even if memory windows are not supported. I noticed this because smatch reported inconsistent indentation for the device managed flow steering support test. Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Cc: Yishai Hadas Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b01ef6e..0eb09e1 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -505,9 +505,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B; else props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A; - if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) - props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; } + if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) + props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; -- cgit v1.1 From 249f06561fc333581e48e6d388a56e3d100d23b6 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 11:39:35 -0700 Subject: IB/srp: Always initialize use_fast_reg and use_fmr Avoid that mapping fails due to use_fast_reg != 0 or use_fmr != 0 if both member variables should be zero (if never_register == 1 or if neither FMR nor FR is supported). Remove an initialization that became superfluous due to changing a kmalloc() into a kzalloc() call. Fixes: 509c5f33f4f6 ("IB/srp: Prevent mapping failures") Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Laurence Oberman Signed-off-by: Bart Van Assche Reviewed-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 646de17..bc24b8d 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3526,7 +3526,7 @@ static void srp_add_one(struct ib_device *device) int mr_page_shift, p; u64 max_pages_per_mr; - srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL); + srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL); if (!srp_dev) return; @@ -3586,8 +3586,6 @@ static void srp_add_one(struct ib_device *device) IB_ACCESS_REMOTE_WRITE); if (IS_ERR(srp_dev->global_mr)) goto err_pd; - } else { - srp_dev->global_mr = NULL; } for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { -- cgit v1.1 From 9edba790fc52322051fd7b6589421021f0726483 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 11:40:24 -0700 Subject: IB/srp: Fix srp_map_sg_dma() Because patch "IB/srp: Move common code into the caller" was applied partially srp_map_sg_dma() doesn't work properly. Fix this by applying the remainder of that patch. See also http://thread.gmane.org/gmane.linux.drivers.rdma/35803/focus=35811. Fixes: 3849e44d1c4b ("IB/srp: Move common code into the caller") Signed-off-by: Bart Van Assche Cc: Mike Marciniszyn Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Laurence Oberman Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index bc24b8d..3322ed7 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1457,7 +1457,6 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, { unsigned int sg_offset = 0; - state->desc = req->indirect_desc; state->fr.next = req->fr_list; state->fr.end = req->fr_list + ch->target->mr_per_cmd; state->sg = scat; @@ -1489,7 +1488,6 @@ static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, struct scatterlist *sg; int i; - state->desc = req->indirect_desc; for_each_sg(scat, sg, count, i) { srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), ib_sg_dma_len(dev->dev, sg), @@ -1655,6 +1653,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, target->indirect_size, DMA_TO_DEVICE); memset(&state, 0, sizeof(state)); + state.desc = req->indirect_desc; if (dev->use_fast_reg) ret = srp_map_sg_fr(&state, ch, req, scat, count); else if (dev->use_fmr) -- cgit v1.1 From 0270be78da8d27cc5588d2472694aa7ad2c680b3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:08:00 -0700 Subject: RDMA/core: Fix indentation Make indentation consistent. Detected by smatch. Signed-off-by: Bart Van Assche Cc: Tatyana Nikolova Cc: Steve Wise Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/iwpm_msg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index 43e3fa2..1c41b95 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -506,7 +506,7 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb, if (!nlmsg_request) { pr_info("%s: Could not find a matching request (seq = %u)\n", __func__, msg_seq); - return -EINVAL; + return -EINVAL; } pm_msg = nlmsg_request->req_buffer; local_sockaddr = (struct sockaddr_storage *) -- cgit v1.1 From 2190d10de58101448b0ac360facc0d5166e3d30d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:08:44 -0700 Subject: IB/mad: Fix indentation Make indentation consistent. Detected by smatch. Signed-off-by: Bart Van Assche Cc: Hal Rosenstock Cc: Ira Weiny Reviewed-By: Ira Weiny Reviewed-by: Hal Rosenstock Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/mad.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 82fb511..2d49228 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1638,9 +1638,9 @@ static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv) /* Now, check to see if there are any methods still in use */ if (!check_method_table(method)) { /* If not, release management method table */ - kfree(method); - class->method_table[mgmt_class] = NULL; - /* Any management classes left ? */ + kfree(method); + class->method_table[mgmt_class] = NULL; + /* Any management classes left ? */ if (!check_class_table(class)) { /* If not, release management class table */ kfree(class); -- cgit v1.1 From c0a67f6ba356521a8266694ffffa4998264d0cb0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:09:16 -0700 Subject: IB/rdmavt: Annotate rvt_reset_qp() This patch avoids that sparse reports the following warning: rdmavt/qp.c:507:17: warning: context imbalance in 'rvt_reset_qp' - unexpected unlock Signed-off-by: Bart Van Assche Cc: Mike Marciniszyn Cc: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 5fa4d4d..7de5134 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -502,6 +502,12 @@ static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) */ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, enum ib_qp_type type) + __releases(&qp->s_lock) + __releases(&qp->s_hlock) + __releases(&qp->r_lock) + __acquires(&qp->r_lock) + __acquires(&qp->s_hlock) + __acquires(&qp->s_lock) { if (qp->state != IB_QPS_RESET) { qp->state = IB_QPS_RESET; -- cgit v1.1 From 48a0cc139fb89590fd66eea11b626b9b9f6b8e9d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:09:56 -0700 Subject: IB/hfi1: Fix indentation Make the indentation of the source code consistent. Detected by smatch. Signed-off-by: Bart Van Assche Cc: Mike Marciniszyn Cc: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 3b876da..81619fb 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -7832,8 +7832,8 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) * save first 2 flits in the packet that caused * the error */ - dd->err_info_rcvport.packet_flit1 = hdr0; - dd->err_info_rcvport.packet_flit2 = hdr1; + dd->err_info_rcvport.packet_flit1 = hdr0; + dd->err_info_rcvport.packet_flit2 = hdr1; } switch (info) { case 1: @@ -11906,7 +11906,7 @@ static void update_synth_timer(unsigned long opaque) hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit); } -mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); + mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); } #define C_MAX_NAME 13 /* 12 chars + one for /0 */ -- cgit v1.1 From d55215c50e4eaa4b906a42ef45884d8fcbadc777 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:10:37 -0700 Subject: IB/hfi1: Use bit 0 instead of bit 1 The first argument of test_bit() and clear_bit() is a bit number and not a bitmask. Hence change that first argument from (1 << 0) into 0. This patch avoids that smatch reports the following warnings: user_sdma.c:1059: sdma_cache_evict() warn: test_bit() takes a bit number user_sdma.c:1590: sdma_rb_remove() warn: test_bit() takes a bit number Signed-off-by: Bart Van Assche Cc: Mike Marciniszyn Cc: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 29f4795..2eca5b7 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -183,7 +183,7 @@ struct user_sdma_iovec { struct sdma_mmu_node *node; }; -#define SDMA_CACHE_NODE_EVICT BIT(0) +#define SDMA_CACHE_NODE_EVICT 0 struct sdma_mmu_node { struct mmu_rb_node rb; -- cgit v1.1 From 55c40648d31d0d97608f266955b8afae74e2b686 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:11:16 -0700 Subject: IB/hfi1: Suppress sparse warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid that sparse reports the following warnings for the hfi1 driver: trace.c:217:13: warning: no previous prototype for ‘print_u64_array’ [-Wmissing-prototypes] user_sdma.c:1361:17: warning: dubious: !x & y Signed-off-by: Bart Van Assche Cc: Mike Marciniszyn Cc: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/trace.c | 13 ------------- drivers/infiniband/hw/hfi1/user_sdma.c | 4 ++-- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 79b2952..4cfb137 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -214,19 +214,6 @@ const char *print_u32_array( return ret; } -const char *print_u64_array( - struct trace_seq *p, - u64 *arr, int len) -{ - int i; - const char *ret = trace_seq_buffer_ptr(p); - - for (i = 0; i < len; i++) - trace_seq_printf(p, "%s0x%016llx", i == 0 ? "" : " ", arr[i]); - trace_seq_putc(p, 0); - return ret; -} - __hfi1_trace_fn(PKT); __hfi1_trace_fn(PROC); __hfi1_trace_fn(SDMA); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 2eca5b7..47ffd27 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1355,11 +1355,11 @@ static int set_txreq_header(struct user_sdma_request *req, */ SDMA_DBG(req, "TID offset %ubytes %uunits om%u", req->tidoffset, req->tidoffset / req->omfactor, - !!(req->omfactor - KDETH_OM_SMALL)); + req->omfactor != KDETH_OM_SMALL); KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET, req->tidoffset / req->omfactor); KDETH_SET(hdr->kdeth.ver_tid_offset, OM, - !!(req->omfactor - KDETH_OM_SMALL)); + req->omfactor != KDETH_OM_SMALL); } done: trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt, -- cgit v1.1 From dcf15cbded656a12335bc4151f3f75f10080a375 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Fri, 3 Jun 2016 10:26:12 +0800 Subject: ACPI / EC: Fix a boot EC regresion by restoring boot EC support for the DSDT EC According to the Windows probing result, during the table loading, the EC device described in the ECDT should be used. And the ECDT EC is also effective during the period the namespace objects are initialized (we can see a separate process executing _STA/_INI on Windows before executing other device specific control methods, for example, EC._REG). During the device enumration, the EC device described in the DSDT should be used. But there are differences between Linux and Windows around the device probing order. Thus in Linux, we should enable the DSDT EC as early as possible before enumerating devices in order not to trigger issues related to the device enumeration order differences. This patch thus converts acpi_boot_ec_enable() into acpi_ec_dsdt_probe() to fix the gap. This also fixes a user reported regression triggered after we switched the "table loading"/"ECDT support" to be ACPI spec 2.0 compliant. Fixes: 59f0aa9480cf (ACPI 2.0 / ECDT: Remove early namespace reference from EC) Link: https://bugzilla.kernel.org/show_bug.cgi?id=119261 Reported-and-tested-by: Gabriele Mazzotta Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/bus.c | 2 +- drivers/acpi/ec.c | 29 ++++++++++++++++++++++------- drivers/acpi/internal.h | 2 +- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 31e8da6..262ca31 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1051,7 +1051,7 @@ static int __init acpi_bus_init(void) * Maybe EC region is required at bus_scan/acpi_get_devices. So it * is necessary to enable it as early as possible. */ - acpi_boot_ec_enable(); + acpi_ec_dsdt_probe(); printk(KERN_INFO PREFIX "Interpreter enabled\n"); diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 0e70181..73c76d6 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1446,10 +1446,30 @@ ec_parse_io_ports(struct acpi_resource *resource, void *context) return AE_OK; } -int __init acpi_boot_ec_enable(void) +static const struct acpi_device_id ec_device_ids[] = { + {"PNP0C09", 0}, + {"", 0}, +}; + +int __init acpi_ec_dsdt_probe(void) { - if (!boot_ec) + acpi_status status; + + if (boot_ec) return 0; + + /* + * Finding EC from DSDT if there is no ECDT EC available. When this + * function is invoked, ACPI tables have been fully loaded, we can + * walk namespace now. + */ + boot_ec = make_acpi_ec(); + if (!boot_ec) + return -ENOMEM; + status = acpi_get_devices(ec_device_ids[0].id, + ec_parse_device, boot_ec, NULL); + if (ACPI_FAILURE(status) || !boot_ec->handle) + return -ENODEV; if (!ec_install_handlers(boot_ec)) { first_ec = boot_ec; return 0; @@ -1457,11 +1477,6 @@ int __init acpi_boot_ec_enable(void) return -EFAULT; } -static const struct acpi_device_id ec_device_ids[] = { - {"PNP0C09", 0}, - {"", 0}, -}; - #if 0 /* * Some EC firmware variations refuses to respond QR_EC when SCI_EVT is not diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 7c18847..b4733b5 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -180,7 +180,7 @@ typedef int (*acpi_ec_query_func) (void *data); int acpi_ec_init(void); int acpi_ec_ecdt_probe(void); -int acpi_boot_ec_enable(void); +int acpi_ec_dsdt_probe(void); void acpi_ec_block_transactions(void); void acpi_ec_unblock_transactions(void); void acpi_ec_unblock_transactions_early(void); -- cgit v1.1 From a27758ffaf96f89002129eedb2cc172d254099f8 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 3 Jun 2016 15:05:57 -0700 Subject: net_sched: keep backlog updated with qlen For gso_skb we only update qlen, backlog should be updated too. Note, it is correct to just update these stats at one layer, because the gso_skb is cached there. Reported-by: Stas Nichiporovich Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/sch_generic.h | 5 ++++- net/sched/sch_generic.c | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a1fd76c..6803af1 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -691,9 +691,11 @@ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ if (!sch->gso_skb) { sch->gso_skb = sch->dequeue(sch); - if (sch->gso_skb) + if (sch->gso_skb) { /* it's still part of the queue */ + qdisc_qstats_backlog_inc(sch, sch->gso_skb); sch->q.qlen++; + } } return sch->gso_skb; @@ -706,6 +708,7 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) if (skb) { sch->gso_skb = NULL; + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } else { skb = sch->dequeue(sch); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 269dd71..f9e0e9c 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -49,6 +49,7 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { q->gso_skb = skb; q->qstats.requeues++; + qdisc_qstats_backlog_inc(q, skb); q->q.qlen++; /* it's still part of the queue */ __netif_schedule(q); @@ -92,6 +93,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, txq = skb_get_tx_queue(txq->dev, skb); if (!netif_xmit_frozen_or_stopped(txq)) { q->gso_skb = NULL; + qdisc_qstats_backlog_dec(q, skb); q->q.qlen--; } else skb = NULL; -- cgit v1.1 From 97c1df3e54e811aed484a036a798b4b25d002ecf Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 6 Jun 2016 15:36:07 -0500 Subject: mnt: If fs_fully_visible fails call put_filesystem. Add this trivial missing error handling. Cc: stable@vger.kernel.org Fixes: 1b852bceb0d1 ("mnt: Refactor the logic for mounting sysfs and proc in a user namespace") Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 4fb1691..9d45c8a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2409,8 +2409,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } if (type->fs_flags & FS_USERNS_VISIBLE) { - if (!fs_fully_visible(type, &mnt_flags)) + if (!fs_fully_visible(type, &mnt_flags)) { + put_filesystem(type); return -EPERM; + } } } -- cgit v1.1 From d71ed6c930ac7d8f88f3cef6624a7e826392d61f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 27 May 2016 14:50:05 -0500 Subject: mnt: fs_fully_visible test the proper mount for MNT_LOCKED MNT_LOCKED implies on a child mount implies the child is locked to the parent. So while looping through the children the children should be tested (not their parent). Typically an unshare of a mount namespace locks all mounts together making both the parent and the slave as locked but there are a few corner cases where other things work. Cc: stable@vger.kernel.org Fixes: ceeb0e5d39fc ("vfs: Ignore unlocked mounts in fs_fully_visible") Reported-by: Seth Forshee Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 9d45c8a..a7ec92c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3273,7 +3273,7 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { struct inode *inode = child->mnt_mountpoint->d_inode; /* Only worry about locked mounts */ - if (!(mnt_flags & MNT_LOCKED)) + if (!(child->mnt.mnt_flags & MNT_LOCKED)) continue; /* Is the directory permanetly empty? */ if (!is_empty_dir_inode(inode)) -- cgit v1.1 From dab38e43b298501a4e8807b56117c029e2e98383 Mon Sep 17 00:00:00 2001 From: Torsten Hilbrich Date: Tue, 7 Jun 2016 13:14:21 +0200 Subject: ALSA: hda/realtek: Add T560 docking unit fixup Tested with Lenovo Ultradock. Fixes the non-working headphone jack on the docking unit. Signed-off-by: Torsten Hilbrich Tested-by: Torsten Hilbrich Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 49d581a..0fe18ed 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5634,6 +5634,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK), + SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), -- cgit v1.1 From 41aaa99fab6ceaa4b533c2b6ad4913987ddb3ddc Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Mon, 6 Jun 2016 19:52:55 -0400 Subject: IB/core: Fix array length allocation The new sysfs hw_counters code had an off by one in its array allocation length. Fix that and the comment along with it. Reported-by: Mark Bloch Fixes: b40f4757daa1 (IB/core: Make device counter infrastructure dynamic) Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index ed04a7b..2bc4344 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -901,9 +901,12 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, if (!stats->names || stats->num_counters <= 0) goto err_free_stats; + /* + * Two extra attribue elements here, one for the lifespan entry and + * one to NULL terminate the list for the sysfs core code + */ hsag = kzalloc(sizeof(*hsag) + - // 1 extra for the lifespan config entry - sizeof(void *) * (stats->num_counters + 1), + sizeof(void *) * (stats->num_counters + 2), GFP_KERNEL); if (!hsag) goto err_free_stats; -- cgit v1.1 From 495fbae6e2c115099921ba33b1e1bea1190b5280 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Tue, 7 Jun 2016 07:43:46 -0400 Subject: IB/core: fix error unwind in sysfs hw counters code Between the initial and final versions of the function setup_hw_stats, the order of variable initialization was changed. However, the unwind flow on error did not properly keep up with the flow changes. Make the unwind flow match a proper unwind of the allocation flow, then remove no longer needed variable initializations. Fixes: b40f4757daa1 (IB/core: Make device counter infrastructure dynamic) Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 2bc4344..35d0d47 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -889,9 +889,9 @@ static struct attribute *alloc_hsa_lifespan(char *name, u8 port_num) static void setup_hw_stats(struct ib_device *device, struct ib_port *port, u8 port_num) { - struct attribute_group *hsag = NULL; + struct attribute_group *hsag; struct rdma_hw_stats *stats; - int i = 0, ret; + int i, ret; stats = device->alloc_hw_stats(device, port_num); @@ -914,7 +914,7 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, ret = device->get_hw_stats(device, stats, port_num, stats->num_counters); if (ret != stats->num_counters) - goto err; + goto err_free_hsag; stats->timestamp = jiffies; @@ -951,6 +951,7 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, err: for (; i >= 0; i--) kfree(hsag->attrs[i]); +err_free_hsag: kfree(hsag); err_free_stats: kfree(stats); -- cgit v1.1 From d7012467a95b767b4d3beb2e027aa24a83f12f0f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sat, 4 Jun 2016 15:15:18 +0300 Subject: IB/core: Fix query port failure in RoCE Currently ib_query_port always attempts to to read the subnet prefix by calling ib_query_gid(). For RoCE/iWARP there is no subnet manager and no subnet prefix. Fix this by querying GID[0] only for IB networks. Fixes: fad61ad4e755 ('IB/core: Add subnet prefix to port info') Signed-off-by: Eli Cohen Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 8b8a8d9..5c155fa 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -661,6 +661,9 @@ int ib_query_port(struct ib_device *device, if (err || port_attr->subnet_prefix) return err; + if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND) + return 0; + err = ib_query_gid(device, port_num, 0, &gid, NULL); if (err) return err; -- cgit v1.1 From 198b12f77084244d310888dd5d643083cb5c2aa1 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sat, 4 Jun 2016 15:15:20 +0300 Subject: IB/IPoIB: Fix race between ipoib_remove_one to sysfs functions In ipoib_remove_one the driver holds the rtnl_lock and tries to do some operation like dev_change_flags or unregister_netdev, while sysfs callback like ipoib_vlan_delete holds sysfs mutex and tries to hold the rtnl_lock via rtnl_trylock() and restart_syscall() if the lock is not free, meanwhile ipoib_remove_one tries to get the sysfs lock in order to free its sysfs directory, and we will get a->b, b->a deadlock. Trace like the following: schedule+0x37/0x80 schedule_preempt_disabled+0xe/0x10 __mutex_lock_slowpath+0xb5/0x120 mutex_lock+0x23/0x40 rtnl_lock+0x15/0x20 netdev_run_todo+0x17c/0x320 rtnl_unlock+0xe/0x10 ipoib_vlan_delete+0x11b/0x1b0 [ib_ipoib] delete_child+0x54/0x80 [ib_ipoib] dev_attr_store+0x18/0x30 sysfs_kf_write+0x37/0x40 mutex_lock+0x16/0x40 SyS_write+0x55/0xc0 entry_SYSCALL_64_fastpath+0x16/0x75 And schedule+0x37/0x80 __kernfs_remove+0x1a8/0x260 ? wake_atomic_t_function+0x60/0x60 kernfs_remove+0x25/0x40 sysfs_remove_dir+0x50/0x80 kobject_del+0x18/0x50 device_del+0x19f/0x260 netdev_unregister_kobject+0x6a/0x80 rollback_registered_many+0x1fd/0x340 rollback_registered+0x3c/0x70 unregister_netdevice_queue+0x55/0xc0 unregister_netdev+0x20/0x30 ipoib_remove_one+0x114/0x1b0 [ib_ipoib] ib_unregister_client+0x4a/0x170 [ib_core] ? find_module_all+0x71/0xa0 ipoib_cleanup_module+0x10/0x94 [ib_ipoib] SyS_delete_module+0x1b5/0x210 entry_SYSCALL_64_fastpath+0x16/0x75 The fix is by checking the flag IPOIB_FLAG_INTF_ON_DESTROY in order to get out from the sysfs function. Fixes: 862096a8bbf8 ("IB/ipoib: Add more rtnl_link_ops callbacks") Fixes: 9baa0b036410 ("IB/ipoib: Add rtnl_link_ops support") Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 + drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 ++++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 3 +++ drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 6 ++++++ 4 files changed, 14 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index bab7db6..4f7d9b4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -94,6 +94,7 @@ enum { IPOIB_NEIGH_TBL_FLUSH = 12, IPOIB_FLAG_DEV_ADDR_SET = 13, IPOIB_FLAG_DEV_ADDR_CTRL = 14, + IPOIB_FLAG_GOING_DOWN = 15, IPOIB_MAX_BACKOFF_SECONDS = 16, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index b2f4283..951d9ab 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1486,6 +1486,10 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, { struct net_device *dev = to_net_dev(d); int ret; + struct ipoib_dev_priv *priv = netdev_priv(dev); + + if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags)) + return -EPERM; if (!rtnl_trylock()) return restart_syscall(); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 2d7c163..0aa52c2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2141,6 +2141,9 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) ib_unregister_event_handler(&priv->event_handler); flush_workqueue(ipoib_workqueue); + /* mark interface in the middle of destruction */ + set_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags); + rtnl_lock(); dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); rtnl_unlock(); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 64a3559..a2f9f29 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -131,6 +131,9 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) ppriv = netdev_priv(pdev); + if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) + return -EPERM; + snprintf(intf_name, sizeof intf_name, "%s.%04x", ppriv->dev->name, pkey); priv = ipoib_intf_alloc(intf_name); @@ -183,6 +186,9 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) ppriv = netdev_priv(pdev); + if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) + return -EPERM; + if (!rtnl_trylock()) return restart_syscall(); -- cgit v1.1 From 8e787646fbce895c20c4433973e90af90e1c6a28 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Sat, 4 Jun 2016 15:15:21 +0300 Subject: IB/core: Fix removal of default GID cache entry When deleting a default GID from the cache, its gid_type field is set to 0. This could set the gid_type to RoCE v1 for a RoCE v2 default GID, essentially making it inaccessible to future modifications, since it is no longer found by find_gid(). This fix preserves the gid_type value for default gids during cache operations. Fixes: b39ffa1df505 ('IB/core: Add gid_type to gid attribute') Signed-off-by: Aviv Heller Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cache.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index c2e257d..0409667 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -178,6 +178,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port, { int ret = 0; struct net_device *old_net_dev; + enum ib_gid_type old_gid_type; /* in rdma_cap_roce_gid_table, this funciton should be protected by a * sleep-able lock. @@ -199,6 +200,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port, } old_net_dev = table->data_vec[ix].attr.ndev; + old_gid_type = table->data_vec[ix].attr.gid_type; if (old_net_dev && old_net_dev != attr->ndev) dev_put(old_net_dev); /* if modify_gid failed, just delete the old gid */ @@ -207,10 +209,14 @@ static int write_gid(struct ib_device *ib_dev, u8 port, attr = &zattr; table->data_vec[ix].context = NULL; } - if (default_gid) - table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT; + memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid)); memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr)); + if (default_gid) { + table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT; + if (action == GID_TABLE_WRITE_ACTION_DEL) + table->data_vec[ix].attr.gid_type = old_gid_type; + } if (table->data_vec[ix].attr.ndev && table->data_vec[ix].attr.ndev != old_net_dev) dev_hold(table->data_vec[ix].attr.ndev); -- cgit v1.1 From 9b29953bf8ca23944c5e00dcc15ad7bd9fecdd4e Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Sat, 4 Jun 2016 15:15:22 +0300 Subject: IB/IPoIB: Disable bottom half when dealing with device address Align locking usage when touching device address with rest of the kernel. Lock the bottom half when doing so using netif_addr_lock_bh. This also solves the following case as reported by lockdep: CPU0 CPU1 ---- ---- lock(_xmit_INFINIBAND); local_irq_disable(); lock(&(&mc->mca_lock)->rlock); lock(_xmit_INFINIBAND); lock(&(&mc->mca_lock)->rlock); *** DEADLOCK *** Fixes: 492a7e67ff83 ("IB/IPoIB: Allow setting the device address") Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 8 ++++---- drivers/infiniband/ulp/ipoib/ipoib_main.c | 8 ++++---- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 45c40a1..dc6d241 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1015,7 +1015,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL)) return false; - netif_addr_lock(priv->dev); + netif_addr_lock_bh(priv->dev); /* The subnet prefix may have changed, update it now so we won't have * to do it later @@ -1026,12 +1026,12 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) search_gid.global.interface_id = priv->local_gid.global.interface_id; - netif_addr_unlock(priv->dev); + netif_addr_unlock_bh(priv->dev); err = ib_find_gid(priv->ca, &search_gid, IB_GID_TYPE_IB, priv->dev, &port, &index); - netif_addr_lock(priv->dev); + netif_addr_lock_bh(priv->dev); if (search_gid.global.interface_id != priv->local_gid.global.interface_id) @@ -1092,7 +1092,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) } out: - netif_addr_unlock(priv->dev); + netif_addr_unlock_bh(priv->dev); return ret; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 0aa52c2..248da50 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1851,7 +1851,7 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid) struct ipoib_dev_priv *child_priv; struct net_device *netdev = priv->dev; - netif_addr_lock(netdev); + netif_addr_lock_bh(netdev); memcpy(&priv->local_gid.global.interface_id, &gid->global.interface_id, @@ -1859,7 +1859,7 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid) memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid)); clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); - netif_addr_unlock(netdev); + netif_addr_unlock_bh(netdev); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { down_read(&priv->vlan_rwsem); @@ -1875,7 +1875,7 @@ static int ipoib_check_lladdr(struct net_device *dev, union ib_gid *gid = (union ib_gid *)(ss->__data + 4); int ret = 0; - netif_addr_lock(dev); + netif_addr_lock_bh(dev); /* Make sure the QPN, reserved and subnet prefix match the current * lladdr, it also makes sure the lladdr is unicast. @@ -1885,7 +1885,7 @@ static int ipoib_check_lladdr(struct net_device *dev, gid->global.interface_id == 0) ret = -EINVAL; - netif_addr_unlock(dev); + netif_addr_unlock_bh(dev); return ret; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 82fbc94..d3394b6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -582,13 +582,13 @@ void ipoib_mcast_join_task(struct work_struct *work) return; } priv->local_lid = port_attr.lid; - netif_addr_lock(dev); + netif_addr_lock_bh(dev); if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) { - netif_addr_unlock(dev); + netif_addr_unlock_bh(dev); return; } - netif_addr_unlock(dev); + netif_addr_unlock_bh(dev); spin_lock_irq(&priv->lock); if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) -- cgit v1.1 From 8aec013afe6d9665eb478396026ebd4384dbe934 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Sat, 4 Jun 2016 15:15:24 +0300 Subject: IB/core: Initialize sysfs attributes before sysfs create group For dynamically allocated sysfs attributes there is a need to call sysfs_attr_init in order to comply with lockdep, not calling it will result in error complaining key is not in .data section. Fixes: b40f4757daa1 ("IB/core: Make device counter infrastructure dynamic") Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 35d0d47..a5793c8 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -925,10 +925,13 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, hsag->attrs[i] = alloc_hsa(i, port_num, stats->names[i]); if (!hsag->attrs[i]) goto err; + sysfs_attr_init(hsag->attrs[i]); } /* treat an error here as non-fatal */ hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num); + if (hsag->attrs[i]) + sysfs_attr_init(hsag->attrs[i]); if (port) { struct kobject *kobj = &port->kobj; -- cgit v1.1 From 47355b3cd7d3c9c5226bff7c449b9d269fb17fa6 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 6 Jun 2016 19:34:39 +0300 Subject: IB/core: Fix bit curruption in ib_device_cap_flags structure ib_device_cap_flags 64-bit expansion caused caps overlapping and made consumers read wrong device capabilities. For example IB_DEVICE_SG_GAPS_REG was falsely read by the iser driver causing it to use a non-existing capability. This happened because signed int becomes sign extended when converted it to u64. Fix this by casting IB_DEVICE_ON_DEMAND_PAGING enumeration to ULL. Fixes: f5aa9159a418 ('IB/core: Add arbitrary sg_list support') Reported-by: Robert LeBlanc Cc: Stable #[v4.6+] Acked-by: Sagi Grimberg Signed-off-by: Max Gurtovoy Signed-off-by: Matan Barak Reviewed-by: Christoph Hellwig Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 432bed5..c97357b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -217,7 +217,7 @@ enum ib_device_cap_flags { IB_DEVICE_CROSS_CHANNEL = (1 << 27), IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), - IB_DEVICE_ON_DEMAND_PAGING = (1 << 31), + IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31), IB_DEVICE_SG_GAPS_REG = (1ULL << 32), IB_DEVICE_VIRTUAL_FUNCTION = ((u64)1 << 33), IB_DEVICE_RAW_SCATTER_FCS = ((u64)1 << 34), -- cgit v1.1 From c7e162a417488f3c79eb09f3c4f1d36f1e042463 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 6 Jun 2016 19:34:40 +0300 Subject: IB/core: Make all casts in ib_device_cap_flags enum consistent Replace the few u64 casts with ULL to match the rest of the casts. Signed-off-by: Max Gurtovoy Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c97357b..7e440d4 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -219,8 +219,8 @@ enum ib_device_cap_flags { IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31), IB_DEVICE_SG_GAPS_REG = (1ULL << 32), - IB_DEVICE_VIRTUAL_FUNCTION = ((u64)1 << 33), - IB_DEVICE_RAW_SCATTER_FCS = ((u64)1 << 34), + IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33), + IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34), }; enum ib_signature_prot_cap { -- cgit v1.1 From da6d6ba3c6f085abf82723612efd746a97f8e414 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sat, 4 Jun 2016 15:15:28 +0300 Subject: IB/mlx5: Set flow steering capability bit Flow steering is supported by mlx5 device when the following features are supported by firmware: 1. NIC RX flow table. 2. Device has enough flow steering levels. 3. Atomic modification of flow table entry. 4. Flow tables chaining. To check if flow steering is supported it's enough to check if the driver opened the mlx5 bypass namespace. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c72797c..83047ef 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -524,6 +524,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_CAP_ETH(dev->mdev, scatter_fcs)) props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS; + if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) + props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; + props->vendor_part_id = mdev->pdev->device; props->hw_ver = mdev->pdev->revision; -- cgit v1.1 From 2788cf3bd90af3791c3195c52391bcf34fa67b40 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:29 +0300 Subject: IB/mlx5: Return PORT_ERR in Active to Initializing tranisition FW port-change events are fired on Active <-> non Active port state transitions only. When the port state changes from Active to Initializing (Active -> Down -> Initializing), a single event is fired. The HCA transitions from Down to Initializing unless prevented from doing so, hence the driver should also propagate events when the port state is Initializing to consumers so they'll be aware that the port is no longer Active and act accordingly. Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB...') Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 83047ef..94a8f91 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1869,14 +1869,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, break; case MLX5_DEV_EVENT_PORT_DOWN: + case MLX5_DEV_EVENT_PORT_INITIALIZED: ibev.event = IB_EVENT_PORT_ERR; port = (u8)param; break; - case MLX5_DEV_EVENT_PORT_INITIALIZED: - /* not used by ULPs */ - return; - case MLX5_DEV_EVENT_LID_CHANGE: ibev.event = IB_EVENT_LID_CHANGE; port = (u8)param; -- cgit v1.1 From c0fcebf55289c48148992eee002a7caf853a5358 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sat, 4 Jun 2016 15:15:30 +0300 Subject: IB/mlx5: Fix FW version diaplay in sysfs Add a 4-digit padding to show FW version in proper format. Fixes: 9603b61de1eee ('mlx5: Move pci device handling from...') Signed-off-by: Eran Ben Elisha Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 94a8f91..8845f4b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1801,7 +1801,7 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, { struct mlx5_ib_dev *dev = container_of(device, struct mlx5_ib_dev, ib_dev.dev); - return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(dev->mdev), + return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); } -- cgit v1.1 From bc5c6eed0510f19b033ce7a7d3976e695e96785b Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:31 +0300 Subject: IB/mlx5: Limit query HCA clock When PAGE_SIZE is larger than 4K, the user shouldn't be able to query the HCA core clock. This counter is within 4KB boundary and the user-space shall not read information that's after this boundary. Fixes: b368d7cb8ceb7 ('IB/mlx5: Add hca_core_clock_offset to...') Signed-off-by: Majd Dibbiny Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8845f4b..05036db 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -991,7 +991,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (field_avail(typeof(resp), cqe_version, udata->outlen)) resp.response_length += sizeof(resp.cqe_version); - if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) { + /* + * We don't want to expose information from the PCI bar that is located + * after 4096 bytes, so if the arch only supports larger pages, let's + * pretend we don't support reading the HCA's core clock. This is also + * forced by mmap function. + */ + if (PAGE_SIZE <= 4096 && + field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) { resp.comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; resp.hca_core_clock_offset = -- cgit v1.1 From 0540d8148d419bf769e5aa99c77027febd8922f0 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:32 +0300 Subject: IB/mlx5: Fix returned values of query QP Some variables were not initialized properly: max_recv_wr, max_recv_sge, max_send_wr, qp_context and max_inline_data. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB...') Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 5041176..43c1441 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -235,6 +235,8 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, qp->rq.max_gs = 0; qp->rq.wqe_cnt = 0; qp->rq.wqe_shift = 0; + cap->max_recv_wr = 0; + cap->max_recv_sge = 0; } else { if (ucmd) { qp->rq.wqe_cnt = ucmd->rq_wqe_count; @@ -4079,17 +4081,19 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->cap.max_recv_sge = qp->rq.max_gs; if (!ibqp->uobject) { - qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; + qp_attr->cap.max_send_wr = qp->sq.max_post; qp_attr->cap.max_send_sge = qp->sq.max_gs; + qp_init_attr->qp_context = ibqp->qp_context; } else { qp_attr->cap.max_send_wr = 0; qp_attr->cap.max_send_sge = 0; } - /* We don't support inline sends for kernel QPs (yet), and we - * don't know what userspace's value should be. - */ - qp_attr->cap.max_inline_data = 0; + qp_init_attr->qp_type = ibqp->qp_type; + qp_init_attr->recv_cq = ibqp->recv_cq; + qp_init_attr->send_cq = ibqp->send_cq; + qp_init_attr->srq = ibqp->srq; + qp_attr->cap.max_inline_data = qp->max_inline_data; qp_init_attr->cap = qp_attr->cap; -- cgit v1.1 From 2cc6ad5f2130733e561f97dba7a2052f8ea024aa Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:33 +0300 Subject: IB/mlx5: Check BlueFlame HCA support BlueFlame support is reported only for PFs when the HCA capability is on. Fixes: 938fe83c8dcbb ('net/mlx5_core: New device capabilities...') Signed-off-by: Majd Dibbiny Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 05036db..b48ad85 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -918,7 +918,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); - resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); + if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) + resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); resp.cache_line_size = L1_CACHE_BYTES; resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); -- cgit v1.1 From 9ea578528656e191c1097798a771ff08bab6f323 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:34 +0300 Subject: IB/mlx5: Fix entries checks in mlx5_ib_create_cq Number of entries shouldn't be greater than the device's max capability. This should be checked before rounding the entries number to power of two. Fixes: 51ee86a4af639 ('IB/mlx5: Fix check of number of entries...') Signed-off-by: Majd Dibbiny Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index dabcc65..3984c68 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -822,7 +822,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int eqn; int err; - if (entries < 0) + if (entries < 0 || + (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))) return ERR_PTR(-EINVAL); if (check_cq_create_flags(attr->flags)) -- cgit v1.1 From 3c4c37746c919c983e439ac6a7328cd2d48c10ed Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:35 +0300 Subject: IB/mlx5: Fix entries check in mlx5_ib_resize_cq Verify that number of entries is less than device capability. Add an appropriate warning message for error flow. Fixes: bde51583f49b ('IB/mlx5: Add support for resize CQ') Signed-off-by: Majd Dibbiny Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 3984c68..9c0e67b 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -1169,11 +1169,16 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) return -ENOSYS; } - if (entries < 1) + if (entries < 1 || + entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) { + mlx5_ib_warn(dev, "wrong entries number %d, max %d\n", + entries, + 1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)); return -EINVAL; + } entries = roundup_pow_of_two(entries + 1); - if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1) + if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1) return -EINVAL; if (entries == ibcq->cqe + 1) -- cgit v1.1 From d3ae2bdeba9bad8cb95301451aeaf03ce31e82f0 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:36 +0300 Subject: IB/mlx5: Fix pkey_index length in the QP path record Pkey index fields in the QP context path record are extended to 16 bits, as required by IB spec (version 1.3). This change affects all QP commands which include path records. To enable this change, moved the free adaptive routing flag bit (free_ar) to the most significant byte of the QP path record. Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB ...') Signed-off-by: Noa Osherovich Reviewed-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 15 ++++++++------- include/linux/mlx5/qp.h | 5 ++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 43c1441..6b90bfd 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1859,7 +1859,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, int err; if (attr_mask & IB_QP_PKEY_INDEX) - path->pkey_index = attr->pkey_index; + path->pkey_index = cpu_to_be16(attr->pkey_index); if (ah->ah_flags & IB_AH_GRH) { if (ah->grh.sgid_index >= @@ -1879,9 +1879,9 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, ah->grh.sgid_index); path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4; } else { - path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; - path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : - 0; + path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; + path->fl_free_ar |= + (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x40 : 0; path->rlid = cpu_to_be16(ah->dlid); path->grh_mlid = ah->src_path_bits & 0x7f; if (ah->ah_flags & IB_AH_GRH) @@ -2266,7 +2266,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num); if (attr_mask & IB_QP_PKEY_INDEX) - context->pri_path.pkey_index = attr->pkey_index; + context->pri_path.pkey_index = cpu_to_be16(attr->pkey_index); /* todo implement counter_index functionality */ @@ -4015,11 +4015,12 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); - qp_attr->alt_pkey_index = context->alt_path.pkey_index & 0x7f; + qp_attr->alt_pkey_index = + be16_to_cpu(context->alt_path.pkey_index); qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; } - qp_attr->pkey_index = context->pri_path.pkey_index & 0x7f; + qp_attr->pkey_index = be16_to_cpu(context->pri_path.pkey_index); qp_attr->port_num = context->pri_path.port; /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 6422102..e4e2988 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -460,10 +460,9 @@ struct mlx5_core_qp { }; struct mlx5_qp_path { - u8 fl; + u8 fl_free_ar; u8 rsvd3; - u8 free_ar; - u8 pkey_index; + __be16 pkey_index; u8 rsvd0; u8 grh_mlid; __be16 rlid; -- cgit v1.1 From f879ee8d900fc78b5bc5d840edd9ecb57d02ab7e Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Sat, 4 Jun 2016 15:15:37 +0300 Subject: IB/mlx5: Fix alternate path code Userspace flag IBV_QP_ALT_PATH is supposed to set the alternate path including fields alt_pkey_index and alt_timeout. Added IB_QP_PKEY_INDEX and IB_QP_TIMEOUT to the attribute mask when calling mlx5_set_path for the alternate path to force setting the alt_pkey_index and alt_timeout values. Fixes: bf24481a3a7c4 ('IB/mlx5: Consider alternate path in pkey ...') Signed-off-by: Achiad Shochat Signed-off-by: Noa Osherovich Reviewed-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 6b90bfd..ce43422 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1853,13 +1853,15 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, const struct ib_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, - u32 path_flags, const struct ib_qp_attr *attr) + u32 path_flags, const struct ib_qp_attr *attr, + bool alt) { enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port); int err; if (attr_mask & IB_QP_PKEY_INDEX) - path->pkey_index = cpu_to_be16(attr->pkey_index); + path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index : + attr->pkey_index); if (ah->ah_flags & IB_AH_GRH) { if (ah->grh.sgid_index >= @@ -1905,7 +1907,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, path->port = port; if (attr_mask & IB_QP_TIMEOUT) - path->ackto_lt = attr->timeout << 3; + path->ackto_lt = (alt ? attr->alt_timeout : attr->timeout) << 3; if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt) return modify_raw_packet_eth_prio(dev->mdev, @@ -2279,7 +2281,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_AV) { err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path, attr_mask & IB_QP_PORT ? attr->port_num : qp->port, - attr_mask, 0, attr); + attr_mask, 0, attr, false); if (err) goto out; } @@ -2290,7 +2292,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_ALT_PATH) { err = mlx5_set_path(dev, qp, &attr->alt_ah_attr, &context->alt_path, - attr->alt_port_num, attr_mask, 0, attr); + attr->alt_port_num, + attr_mask | IB_QP_PKEY_INDEX | IB_QP_TIMEOUT, + 0, attr, true); if (err) goto out; } -- cgit v1.1 From 61c78eea9516a921799c17b4c20558e2aa780fd3 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sat, 4 Jun 2016 15:15:19 +0300 Subject: IB/IPoIB: Don't update neigh validity for unresolved entries ipoib_neigh_get unconditionally updates the "alive" variable member on any packet send. This prevents the neighbor garbage collection from cleaning out a dead neighbor entry if we are still queueing packets for it. If the queue for this neighbor is full, then don't update the alive timestamp. That way the neighbor can time out even if packets are still being queued as long as none of them are being sent. Fixes: b63b70d87741 ("IPoIB: Use a private hash table for path lookup in xmit path") Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 248da50..5f58c41 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1206,7 +1206,9 @@ struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr) neigh = NULL; goto out_unlock; } - neigh->alive = jiffies; + + if (likely(skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)) + neigh->alive = jiffies; goto out_unlock; } } -- cgit v1.1 From 5b6c1b4d46b0dae4edea636a776d09f2064f4cd7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 4 Jun 2016 20:50:59 +0200 Subject: bpf, trace: use READ_ONCE for retrieving file ptr In bpf_perf_event_read() and bpf_perf_event_output(), we must use READ_ONCE() for fetching the struct file pointer, which could get updated concurrently, so we must prevent the compiler from potential refetching. We already do this with tail calls for fetching the related bpf_prog, but not so on stored perf events. Semantics for both are the same with regards to updates. Fixes: a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") Fixes: 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/trace/bpf_trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 780bcbe..720b7bb 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -198,7 +198,7 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5) if (unlikely(index >= array->map.max_entries)) return -E2BIG; - file = (struct file *)array->ptrs[index]; + file = READ_ONCE(array->ptrs[index]); if (unlikely(!file)) return -ENOENT; @@ -247,7 +247,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) if (unlikely(index >= array->map.max_entries)) return -E2BIG; - file = (struct file *)array->ptrs[index]; + file = READ_ONCE(array->ptrs[index]); if (unlikely(!file)) return -ENOENT; -- cgit v1.1 From 80e509db54c81247b32fcb75bb1730fc789b893d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Jun 2016 12:55:13 -0700 Subject: fq_codel: fix NET_XMIT_CN behavior My prior attempt to fix the backlogs of parents failed. If we return NET_XMIT_CN, our parents wont increase their backlog, so our qdisc_tree_reduce_backlog() should take this into account. v2: Florian Westphal pointed out that we could drop the packet, so we need to save qdisc_pkt_len(skb) in a temp variable before calling fq_codel_drop() Fixes: 9d18562a2278 ("fq_codel: add batch ability to fq_codel_drop()") Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Reported-by: Stas Nichiporovich Signed-off-by: Eric Dumazet Cc: WANG Cong Cc: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 6883a89..fff7867 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -199,6 +199,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) unsigned int idx, prev_backlog, prev_qlen; struct fq_codel_flow *flow; int uninitialized_var(ret); + unsigned int pkt_len; bool memory_limited; idx = fq_codel_classify(skb, sch, &ret); @@ -230,6 +231,8 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) prev_backlog = sch->qstats.backlog; prev_qlen = sch->q.qlen; + /* save this packet length as it might be dropped by fq_codel_drop() */ + pkt_len = qdisc_pkt_len(skb); /* fq_codel_drop() is quite expensive, as it performs a linear search * in q->backlogs[] to find a fat flow. * So instead of dropping a single packet, drop half of its backlog @@ -237,14 +240,23 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) */ ret = fq_codel_drop(sch, q->drop_batch_size); - q->drop_overlimit += prev_qlen - sch->q.qlen; + prev_qlen -= sch->q.qlen; + prev_backlog -= sch->qstats.backlog; + q->drop_overlimit += prev_qlen; if (memory_limited) - q->drop_overmemory += prev_qlen - sch->q.qlen; - /* As we dropped packet(s), better let upper stack know this */ - qdisc_tree_reduce_backlog(sch, prev_qlen - sch->q.qlen, - prev_backlog - sch->qstats.backlog); + q->drop_overmemory += prev_qlen; - return ret == idx ? NET_XMIT_CN : NET_XMIT_SUCCESS; + /* As we dropped packet(s), better let upper stack know this. + * If we dropped a packet for this flow, return NET_XMIT_CN, + * but in this case, our parents wont increase their backlogs. + */ + if (ret == idx) { + qdisc_tree_reduce_backlog(sch, prev_qlen - 1, + prev_backlog - pkt_len); + return NET_XMIT_CN; + } + qdisc_tree_reduce_backlog(sch, prev_qlen, prev_backlog); + return NET_XMIT_SUCCESS; } /* This is the specific function called from codel_dequeue() -- cgit v1.1 From 335b48d980f631fbc5b233cbb3625ac0c86d67cb Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 4 Jun 2016 13:59:58 -0700 Subject: RDS: TCP: Add/use rds_tcp_reset_callbacks to reset tcp socket safely When rds_tcp_accept_one() has to replace the existing tcp socket with a newer tcp socket (duelling-syn resolution), it must lock_sock() to suppress the rds_tcp_data_recv() path while callbacks are being changed. Also, existing RDS datagram reassembly state must be reset, so that the next datagram on the new socket does not have corrupted state. Similarly when resetting the newly accepted socket, appropriate locks and synchronization is needed. This commit ensures correct synchronization by invoking kernel_sock_shutdown to reset a newly accepted sock, and by taking appropriate lock_sock()s (for old and new sockets) when resetting existing callbacks. Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/tcp.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++--- net/rds/tcp.h | 1 + net/rds/tcp_listen.c | 13 ++++------- 3 files changed, 67 insertions(+), 12 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 86187da..8faa0b1 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -126,9 +126,68 @@ void rds_tcp_restore_callbacks(struct socket *sock, } /* - * This is the only path that sets tc->t_sock. Send and receive trust that - * it is set. The RDS_CONN_UP bit protects those paths from being - * called while it isn't set. + * rds_tcp_reset_callbacks() switches the to the new sock and + * returns the existing tc->t_sock. + * + * The only functions that set tc->t_sock are rds_tcp_set_callbacks + * and rds_tcp_reset_callbacks. Send and receive trust that + * it is set. The absence of RDS_CONN_UP bit protects those paths + * from being called while it isn't set. + */ +void rds_tcp_reset_callbacks(struct socket *sock, + struct rds_connection *conn) +{ + struct rds_tcp_connection *tc = conn->c_transport_data; + struct socket *osock = tc->t_sock; + + if (!osock) + goto newsock; + + /* Need to resolve a duelling SYN between peers. + * We have an outstanding SYN to this peer, which may + * potentially have transitioned to the RDS_CONN_UP state, + * so we must quiesce any send threads before resetting + * c_transport_data. We quiesce these threads by setting + * cp_state to something other than RDS_CONN_UP, and then + * waiting for any existing threads in rds_send_xmit to + * complete release_in_xmit(). (Subsequent threads entering + * rds_send_xmit() will bail on !rds_conn_up(). + */ + lock_sock(osock->sk); + /* reset receive side state for rds_tcp_data_recv() for osock */ + if (tc->t_tinc) { + rds_inc_put(&tc->t_tinc->ti_inc); + tc->t_tinc = NULL; + } + tc->t_tinc_hdr_rem = sizeof(struct rds_header); + tc->t_tinc_data_rem = 0; + tc->t_sock = NULL; + + write_lock_bh(&osock->sk->sk_callback_lock); + + osock->sk->sk_user_data = NULL; + osock->sk->sk_data_ready = tc->t_orig_data_ready; + osock->sk->sk_write_space = tc->t_orig_write_space; + osock->sk->sk_state_change = tc->t_orig_state_change; + write_unlock_bh(&osock->sk->sk_callback_lock); + release_sock(osock->sk); + sock_release(osock); +newsock: + lock_sock(sock->sk); + write_lock_bh(&sock->sk->sk_callback_lock); + tc->t_sock = sock; + sock->sk->sk_user_data = conn; + sock->sk->sk_data_ready = rds_tcp_data_ready; + sock->sk->sk_write_space = rds_tcp_write_space; + sock->sk->sk_state_change = rds_tcp_state_change; + + write_unlock_bh(&sock->sk->sk_callback_lock); + release_sock(sock->sk); +} + +/* Add tc to rds_tcp_tc_list and set tc->t_sock. See comments + * above rds_tcp_reset_callbacks for notes about synchronization + * with data path */ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) { diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 41c2283..ec0602b 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -50,6 +50,7 @@ struct rds_tcp_statistics { void rds_tcp_tune(struct socket *sock); void rds_tcp_nonagle(struct socket *sock); void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn); +void rds_tcp_reset_callbacks(struct socket *sock, struct rds_connection *conn); void rds_tcp_restore_callbacks(struct socket *sock, struct rds_tcp_connection *tc); u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 4bf4bef..d9fe536 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -78,7 +78,6 @@ int rds_tcp_accept_one(struct socket *sock) struct inet_sock *inet; struct rds_tcp_connection *rs_tcp = NULL; int conn_state; - struct sock *nsk; if (!sock) /* module unload or netns delete in progress */ return -ENETUNREACH; @@ -139,23 +138,19 @@ int rds_tcp_accept_one(struct socket *sock) atomic_set(&conn->c_state, RDS_CONN_CONNECTING); wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags)); - rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp); + rds_tcp_reset_callbacks(new_sock, conn); conn->c_outgoing = 0; } + } else { + rds_tcp_set_callbacks(new_sock, conn); } - rds_tcp_set_callbacks(new_sock, conn); rds_connect_complete(conn); /* marks RDS_CONN_UP */ new_sock = NULL; ret = 0; goto out; rst_nsk: /* reset the newly returned accept sock and bail */ - nsk = new_sock->sk; - rds_tcp_stats_inc(s_tcp_listen_closed_stale); - nsk->sk_user_data = NULL; - nsk->sk_prot->disconnect(nsk, 0); - tcp_done(nsk); - new_sock = NULL; + kernel_sock_shutdown(new_sock, SHUT_RDWR); ret = 0; out: if (rs_tcp) -- cgit v1.1 From 0b6f760cff04a7cdfafc3ec6915e91fed0533d8d Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 4 Jun 2016 13:59:59 -0700 Subject: RDS: TCP: Retransmit half-sent datagrams when switching sockets in rds_tcp_reset_callbacks When we switch a connection's sockets in rds_tcp_rest_callbacks, any partially sent datagram must be retransmitted on the new socket so that the receiver can correctly reassmble the RDS datagram. Use rds_send_reset() which is designed for this purpose. Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/send.c | 1 + net/rds/tcp.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/rds/send.c b/net/rds/send.c index c9cdb35..b1962f8 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -99,6 +99,7 @@ void rds_send_reset(struct rds_connection *conn) list_splice_init(&conn->c_retrans, &conn->c_send_queue); spin_unlock_irqrestore(&conn->c_lock, flags); } +EXPORT_SYMBOL_GPL(rds_send_reset); static int acquire_in_xmit(struct rds_connection *conn) { diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 8faa0b1..7ab1b41 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -173,6 +173,7 @@ void rds_tcp_reset_callbacks(struct socket *sock, release_sock(osock->sk); sock_release(osock); newsock: + rds_send_reset(conn); lock_sock(sock->sk); write_lock_bh(&sock->sk->sk_callback_lock); tc->t_sock = sock; -- cgit v1.1 From 9c79440e2c5e2518879f1599270f64c3ddda3baf Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 4 Jun 2016 14:00:00 -0700 Subject: RDS: TCP: fix race windows in send-path quiescence by rds_tcp_accept_one() The send path needs to be quiesced before resetting callbacks from rds_tcp_accept_one(), and commit eb192840266f ("RDS:TCP: Synchronize rds_tcp_accept_one with rds_send_xmit when resetting t_sock") achieves this using the c_state and RDS_IN_XMIT bit following the pattern used by rds_conn_shutdown(). However this leaves the possibility of a race window as shown in the sequence below take t_conn_lock in rds_tcp_conn_connect send outgoing syn to peer drop t_conn_lock in rds_tcp_conn_connect incoming from peer triggers rds_tcp_accept_one, conn is marked CONNECTING wait for RDS_IN_XMIT to quiesce any rds_send_xmit threads call rds_tcp_reset_callbacks [.. race-window where incoming syn-ack can cause the conn to be marked UP from rds_tcp_state_change ..] lock_sock called from rds_tcp_reset_callbacks, and we set t_sock to null As soon as the conn is marked UP in the race-window above, rds_send_xmit() threads will proceed to rds_tcp_xmit and may encounter a null-pointer deref on the t_sock. Given that rds_tcp_state_change() is invoked in softirq context, whereas rds_tcp_reset_callbacks() is in workq context, and testing for RDS_IN_XMIT after lock_sock could result in a deadlock with tcp_sendmsg, this commit fixes the race by using a new c_state, RDS_TCP_RESETTING, which will prevent a transition to RDS_CONN_UP from rds_tcp_state_change(). Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/rds.h | 2 ++ net/rds/tcp.c | 14 +++++++++++++- net/rds/tcp_connect.c | 2 +- net/rds/tcp_listen.c | 7 +++---- net/rds/threads.c | 10 ++++++++-- 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/net/rds/rds.h b/net/rds/rds.h index 80256b0..387df5f 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -74,6 +74,7 @@ enum { RDS_CONN_CONNECTING, RDS_CONN_DISCONNECTING, RDS_CONN_UP, + RDS_CONN_RESETTING, RDS_CONN_ERROR, }; @@ -813,6 +814,7 @@ void rds_connect_worker(struct work_struct *); void rds_shutdown_worker(struct work_struct *); void rds_send_worker(struct work_struct *); void rds_recv_worker(struct work_struct *); +void rds_connect_path_complete(struct rds_connection *conn, int curr); void rds_connect_complete(struct rds_connection *conn); /* transport.c */ diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 7ab1b41..74ee126 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -148,11 +148,23 @@ void rds_tcp_reset_callbacks(struct socket *sock, * potentially have transitioned to the RDS_CONN_UP state, * so we must quiesce any send threads before resetting * c_transport_data. We quiesce these threads by setting - * cp_state to something other than RDS_CONN_UP, and then + * c_state to something other than RDS_CONN_UP, and then * waiting for any existing threads in rds_send_xmit to * complete release_in_xmit(). (Subsequent threads entering * rds_send_xmit() will bail on !rds_conn_up(). + * + * However an incoming syn-ack at this point would end up + * marking the conn as RDS_CONN_UP, and would again permit + * rds_send_xmi() threads through, so ideally we would + * synchronize on RDS_CONN_UP after lock_sock(), but cannot + * do that: waiting on !RDS_IN_XMIT after lock_sock() may + * end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT + * would not get set. As a result, we set c_state to + * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change + * cannot mark rds_conn_path_up() in the window before lock_sock() */ + atomic_set(&conn->c_state, RDS_CONN_RESETTING); + wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags)); lock_sock(osock->sk); /* reset receive side state for rds_tcp_data_recv() for osock */ if (tc->t_tinc) { diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index fb82e0a..fba13d0 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -60,7 +60,7 @@ void rds_tcp_state_change(struct sock *sk) case TCP_SYN_RECV: break; case TCP_ESTABLISHED: - rds_connect_complete(conn); + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); break; case TCP_CLOSE_WAIT: case TCP_CLOSE: diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index d9fe536..686b1d0 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -135,16 +135,15 @@ int rds_tcp_accept_one(struct socket *sock) !conn->c_outgoing) { goto rst_nsk; } else { - atomic_set(&conn->c_state, RDS_CONN_CONNECTING); - wait_event(conn->c_waitq, - !test_bit(RDS_IN_XMIT, &conn->c_flags)); rds_tcp_reset_callbacks(new_sock, conn); conn->c_outgoing = 0; + /* rds_connect_path_complete() marks RDS_CONN_UP */ + rds_connect_path_complete(conn, RDS_CONN_DISCONNECTING); } } else { rds_tcp_set_callbacks(new_sock, conn); + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); } - rds_connect_complete(conn); /* marks RDS_CONN_UP */ new_sock = NULL; ret = 0; goto out; diff --git a/net/rds/threads.c b/net/rds/threads.c index 454aa6d..4a32304 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -71,9 +71,9 @@ struct workqueue_struct *rds_wq; EXPORT_SYMBOL_GPL(rds_wq); -void rds_connect_complete(struct rds_connection *conn) +void rds_connect_path_complete(struct rds_connection *conn, int curr) { - if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) { + if (!rds_conn_transition(conn, curr, RDS_CONN_UP)) { printk(KERN_WARNING "%s: Cannot transition to state UP, " "current state is %d\n", __func__, @@ -90,6 +90,12 @@ void rds_connect_complete(struct rds_connection *conn) queue_delayed_work(rds_wq, &conn->c_send_w, 0); queue_delayed_work(rds_wq, &conn->c_recv_w, 0); } +EXPORT_SYMBOL_GPL(rds_connect_path_complete); + +void rds_connect_complete(struct rds_connection *conn) +{ + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); +} EXPORT_SYMBOL_GPL(rds_connect_complete); /* -- cgit v1.1 From fa54cc70ed2eddd96773ffdf4d96d23ea7483e56 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sun, 5 Jun 2016 07:17:19 +0800 Subject: rxrpc: fix ptr_ret.cocci warnings net/rxrpc/rxkad.c:1165:1-3: WARNING: PTR_ERR_OR_ZERO can be used Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR Generated by: scripts/coccinelle/api/ptr_ret.cocci CC: David Howells Signed-off-by: Fengguang Wu Signed-off-by: David S. Miller --- net/rxrpc/rxkad.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 6b726a0..bab56ed 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -1162,9 +1162,7 @@ static int rxkad_init(void) /* pin the cipher we need so that the crypto layer doesn't invoke * keventd to go get it */ rxkad_ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(rxkad_ci)) - return PTR_ERR(rxkad_ci); - return 0; + return PTR_ERR_OR_ZERO(rxkad_ci); } /* -- cgit v1.1 From 9c77679cadb118c0aa99e6f88533d91765a131ba Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 5 Apr 2016 17:01:33 -0700 Subject: x86, build: copy ldlinux.c32 to image.iso For newer versions of Syslinux, we need ldlinux.c32 in addition to isolinux.bin to reside on the boot disk, so if the latter is found, copy it, too, to the isoimage tree. Signed-off-by: H. Peter Anvin Cc: Linux Stable Tree --- arch/x86/boot/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 700a9c6..be8e688 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -162,6 +162,9 @@ isoimage: $(obj)/bzImage for i in lib lib64 share end ; do \ if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \ cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \ + if [ -f /usr/$$i/syslinux/ldlinux.c32 ]; then \ + cp /usr/$$i/syslinux/ldlinux.c32 $(obj)/isoimage ; \ + fi ; \ break ; \ fi ; \ if [ $$i = end ] ; then exit 1 ; fi ; \ -- cgit v1.1 From b9a8460a08a1e0150073cda3e7a0dd23cb888052 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 6 Jun 2016 02:37:14 -0400 Subject: bnxt_en: Fix tx push race condition. Set the is_push flag in the software BD before the tx data is pushed to the chip. It is possible to get the tx interrupt as soon as the tx data is pushed. The tx handler will not handle the event properly if the is_push flag is not set and it will crash. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 72a2eff..4615ed4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -286,7 +286,9 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH | prod); txr->tx_prod = prod; + tx_buf->is_push = 1; netdev_tx_sent_queue(txq, skb->len); + wmb(); /* Sync is_push and byte queue before pushing data */ push_len = (length + sizeof(*tx_push) + 7) / 8; if (push_len > 16) { @@ -298,7 +300,6 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) push_len); } - tx_buf->is_push = 1; goto tx_done; } -- cgit v1.1 From 5a9f6b238e59bc05afb4cdeaf3672990bf2a5309 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 6 Jun 2016 02:37:15 -0400 Subject: bnxt_en: Enable and disable RX CTAG and RX STAG VLAN acceleration together. The hardware can only be set to strip or not strip both the VLAN CTAG and STAG. It cannot strip one and not strip the other. Add logic to bnxt_fix_features() to toggle both feature flags when the user is toggling one of them. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4615ed4..ae2b264 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5467,6 +5467,20 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev, if (!bnxt_rfs_capable(bp)) features &= ~NETIF_F_NTUPLE; + + /* Both CTAG and STAG VLAN accelaration on the RX side have to be + * turned on or off together. + */ + if ((features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX)) != + (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX)) { + if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) + features &= ~(NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_RX); + else + features |= NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_RX; + } + return features; } -- cgit v1.1 From 8852ddb4dcdfe6f877a02f79bf2bca9ae63c039a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 6 Jun 2016 02:37:16 -0400 Subject: bnxt_en: Simplify VLAN receive logic. Since both CTAG and STAG rx acceleration must be enabled together, we only need to check one feature flag (NETIF_F_HW_VLAN_CTAG_RX) before calling __vlan_hwaccel_put_tag(). Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ae2b264..c777cde 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1113,19 +1113,13 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, if (tpa_info->hash_type != PKT_HASH_TYPE_NONE) skb_set_hash(skb, tpa_info->rss_hash, tpa_info->hash_type); - if (tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) { - netdev_features_t features = skb->dev->features; + if ((tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) && + (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { u16 vlan_proto = tpa_info->metadata >> RX_CMP_FLAGS2_METADATA_TPID_SFT; + u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_VID_MASK; - if (((features & NETIF_F_HW_VLAN_CTAG_RX) && - vlan_proto == ETH_P_8021Q) || - ((features & NETIF_F_HW_VLAN_STAG_RX) && - vlan_proto == ETH_P_8021AD)) { - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), - tpa_info->metadata & - RX_CMP_FLAGS2_METADATA_VID_MASK); - } + __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); } skb_checksum_none_assert(skb); @@ -1278,19 +1272,14 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, skb->protocol = eth_type_trans(skb, dev); - if (rxcmp1->rx_cmp_flags2 & - cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) { - netdev_features_t features = skb->dev->features; + if ((rxcmp1->rx_cmp_flags2 & + cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) && + (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { u32 meta_data = le32_to_cpu(rxcmp1->rx_cmp_meta_data); + u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_VID_MASK; u16 vlan_proto = meta_data >> RX_CMP_FLAGS2_METADATA_TPID_SFT; - if (((features & NETIF_F_HW_VLAN_CTAG_RX) && - vlan_proto == ETH_P_8021Q) || - ((features & NETIF_F_HW_VLAN_STAG_RX) && - vlan_proto == ETH_P_8021AD)) - __vlan_hwaccel_put_tag(skb, htons(vlan_proto), - meta_data & - RX_CMP_FLAGS2_METADATA_VID_MASK); + __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag); } skb_checksum_none_assert(skb); -- cgit v1.1 From 9f647a6de9926f4844feb8c5e21c78d4e61c55ab Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 6 Jun 2016 09:21:44 +0100 Subject: net: fec: fix spelling mistakes and add missing newline trivial fix to spelling mistakes and add missing newline in pr_err messages Signed-off-by: Colin Ian King Acked-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 3c0255e..fea0f33 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2416,24 +2416,24 @@ fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec) return -EOPNOTSUPP; if (ec->rx_max_coalesced_frames > 255) { - pr_err("Rx coalesced frames exceed hardware limiation"); + pr_err("Rx coalesced frames exceed hardware limitation\n"); return -EINVAL; } if (ec->tx_max_coalesced_frames > 255) { - pr_err("Tx coalesced frame exceed hardware limiation"); + pr_err("Tx coalesced frame exceed hardware limitation\n"); return -EINVAL; } cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr); if (cycle > 0xFFFF) { - pr_err("Rx coalesed usec exceeed hardware limiation"); + pr_err("Rx coalesced usec exceed hardware limitation\n"); return -EINVAL; } cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr); if (cycle > 0xFFFF) { - pr_err("Rx coalesed usec exceeed hardware limiation"); + pr_err("Rx coalesced usec exceed hardware limitation\n"); return -EINVAL; } -- cgit v1.1 From 7b01b8e847d00cf9cf0c2c3aa8fdfc4126dca024 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 6 Jun 2016 16:08:41 +0100 Subject: gtp: #define _UAPI_LINUX_GTP_H_ and not _UAPI_LINUX_GTP_H__ Fix clang build warning: ./include/uapi/linux/gtp.h:1:9: warning: '_UAPI_LINUX_GTP_H_' is used as a header guard here, followed by #define of a different macro [-Wheader-guard] fix by defining _UAPI_LINUX_GTP_H_ and not _UAPI_LINUX_GTP_H__ Signed-off-by: Colin Ian King Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/uapi/linux/gtp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h index ca1054d..72a04a0 100644 --- a/include/uapi/linux/gtp.h +++ b/include/uapi/linux/gtp.h @@ -1,5 +1,5 @@ #ifndef _UAPI_LINUX_GTP_H_ -#define _UAPI_LINUX_GTP_H__ +#define _UAPI_LINUX_GTP_H_ enum gtp_genl_cmds { GTP_CMD_NEWPDP, -- cgit v1.1 From 1a0f7d2984f3864e64a43714b4a0999b5a27cff5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 6 Jun 2016 16:16:47 +0100 Subject: net: cls_u32: fix error code for invalid flags 'err' variable is not set in this test, we would return whatever previous test set 'err' to. Signed-off-by: Jakub Kicinski Acked-by: Sridhar Samudrala Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 079b43b..b17e090 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -863,7 +863,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (tb[TCA_U32_FLAGS]) { flags = nla_get_u32(tb[TCA_U32_FLAGS]); if (!tc_flags_valid(flags)) - return err; + return -EINVAL; } n = (struct tc_u_knode *)*arg; -- cgit v1.1 From d47a0f387fe907bdb0430a398850c1cb80eb7def Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 6 Jun 2016 16:16:48 +0100 Subject: net: cls_u32: be more strict about skip-sw flag Return an error if user requested skip-sw and the underlaying hardware cannot handle tc offloads (or offloads are disabled). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index b17e090..fe05449 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -457,20 +457,21 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_to_netdev offload; int err; + if (!tc_should_offload(dev, flags)) + return tc_skip_sw(flags) ? -EINVAL : 0; + offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, flags)) { - offload.cls_u32->command = TC_CLSU32_NEW_HNODE; - offload.cls_u32->hnode.divisor = h->divisor; - offload.cls_u32->hnode.handle = h->handle; - offload.cls_u32->hnode.prio = h->prio; + offload.cls_u32->command = TC_CLSU32_NEW_HNODE; + offload.cls_u32->hnode.divisor = h->divisor; + offload.cls_u32->hnode.handle = h->handle; + offload.cls_u32->hnode.prio = h->prio; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); - if (tc_skip_sw(flags)) - return err; - } + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + if (tc_skip_sw(flags)) + return err; return 0; } -- cgit v1.1 From aafddbf0cffeb790f919436285328c762279b5d4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jun 2016 09:12:39 -0700 Subject: fq_codel: return non zero qlen in class dumps We properly scan the flow list to count number of packets, but John passed 0 to gnet_stats_copy_queue() so we report a zero value to user space instead of the result. Fixes: 640158536632 ("net: sched: restrict use of qstats qlen") Signed-off-by: Eric Dumazet Cc: John Fastabend Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index fff7867..da250b2 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -661,7 +661,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, qs.backlog = q->backlogs[idx]; qs.drops = flow->dropped; } - if (gnet_stats_copy_queue(d, NULL, &qs, 0) < 0) + if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; if (idx < q->flows_cnt) return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); -- cgit v1.1 From a03e6fe569713fb3ff0714f8fd7c8785c0ca9e22 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 6 Jun 2016 09:54:30 -0700 Subject: act_police: fix a crash during removal The police action is using its own code to initialize tcf hash info, which makes us to forgot to initialize a->hinfo correctly. Fix this by calling the helper function tcf_hash_create() directly. This patch fixed the following crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 IP: [] __lock_acquire+0xd3/0xf91 PGD d3c34067 PUD d3e18067 PMD 0 Oops: 0000 [#1] SMP CPU: 2 PID: 853 Comm: tc Not tainted 4.6.0+ #87 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8800d3e28040 ti: ffff8800d3f6c000 task.ti: ffff8800d3f6c000 RIP: 0010:[] [] __lock_acquire+0xd3/0xf91 RSP: 0000:ffff88011b203c80 EFLAGS: 00010002 RAX: 0000000000000046 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000028 RBP: ffff88011b203d40 R08: 0000000000000001 R09: 0000000000000000 R10: ffff88011b203d58 R11: ffff88011b208000 R12: 0000000000000001 R13: ffff8800d3e28040 R14: 0000000000000028 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88011b200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000028 CR3: 00000000d4be1000 CR4: 00000000000006e0 Stack: ffff8800d3e289c0 0000000000000046 000000001b203d60 ffffffff00000000 0000000000000000 ffff880000000000 0000000000000000 ffffffff00000000 ffffffff8187142c ffff88011b203ce8 ffff88011b203ce8 ffffffff8101dbfc Call Trace: [] ? __tcf_hash_release+0x77/0xd1 [] ? native_sched_clock+0x1a/0x35 [] ? native_sched_clock+0x1a/0x35 [] ? sched_clock_local+0x11/0x78 [] ? mark_lock+0x24/0x201 [] lock_acquire+0x120/0x1b4 [] ? lock_acquire+0x120/0x1b4 [] ? __tcf_hash_release+0x77/0xd1 [] _raw_spin_lock_bh+0x3c/0x72 [] ? __tcf_hash_release+0x77/0xd1 [] __tcf_hash_release+0x77/0xd1 [] tcf_action_destroy+0x49/0x7c [] tcf_exts_destroy+0x20/0x2d [] u32_destroy_key+0x1b/0x4d [] u32_delete_key_freepf_rcu+0x1b/0x1d [] rcu_process_callbacks+0x610/0x82e [] ? u32_destroy_key+0x4d/0x4d [] __do_softirq+0x191/0x3f4 Fixes: ddf97ccdd7cb ("net_sched: add network namespace support for tc actions") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_police.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index b884dae..c557789 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -38,7 +38,7 @@ struct tcf_police { bool peak_present; }; #define to_police(pc) \ - container_of(pc, struct tcf_police, common) + container_of(pc->priv, struct tcf_police, common) #define POL_TAB_MASK 15 @@ -119,14 +119,12 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action *a, int ovr, int bind) { - unsigned int h; int ret = 0, err; struct nlattr *tb[TCA_POLICE_MAX + 1]; struct tc_police *parm; struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; struct tc_action_net *tn = net_generic(net, police_net_id); - struct tcf_hashinfo *hinfo = tn->hinfo; int size; if (nla == NULL) @@ -145,7 +143,7 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, if (parm->index) { if (tcf_hash_search(tn, a, parm->index)) { - police = to_police(a->priv); + police = to_police(a); if (bind) { police->tcf_bindcnt += 1; police->tcf_refcnt += 1; @@ -156,16 +154,15 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, /* not replacing */ return -EEXIST; } + } else { + ret = tcf_hash_create(tn, parm->index, NULL, a, + sizeof(*police), bind, false); + if (ret) + return ret; + ret = ACT_P_CREATED; } - police = kzalloc(sizeof(*police), GFP_KERNEL); - if (police == NULL) - return -ENOMEM; - ret = ACT_P_CREATED; - police->tcf_refcnt = 1; - spin_lock_init(&police->tcf_lock); - if (bind) - police->tcf_bindcnt = 1; + police = to_police(a); override: if (parm->rate.rate) { err = -ENOMEM; @@ -237,16 +234,8 @@ override: return ret; police->tcfp_t_c = ktime_get_ns(); - police->tcf_index = parm->index ? parm->index : - tcf_hash_new_index(tn); - police->tcf_tm.install = jiffies; - police->tcf_tm.lastuse = jiffies; - h = tcf_hash(police->tcf_index, POL_TAB_MASK); - spin_lock_bh(&hinfo->lock); - hlist_add_head(&police->tcf_head, &hinfo->htab[h]); - spin_unlock_bh(&hinfo->lock); + tcf_hash_insert(tn, a); - a->priv = police; return ret; failure_unlock: @@ -255,7 +244,7 @@ failure: qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) - kfree(police); + tcf_hash_cleanup(a, est); return err; } -- cgit v1.1 From 92c075dbdeed02bdf293cb0f513bad70aa714b8d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 6 Jun 2016 22:50:39 +0200 Subject: net: sched: fix tc_should_offload for specific clsact classes When offloading classifiers such as u32 or flower to hardware, and the qdisc is clsact (TC_H_CLSACT), then we need to differentiate its classes, since not all of them handle ingress, therefore we must leave those in software path. Add a .tcf_cl_offload() callback, so we can generically handle them, tested on ixgbe. Fixes: 10cbc6843446 ("net/sched: cls_flower: Hardware offloaded filters statistics support") Fixes: 5b33f48842fa ("net/flower: Introduce hardware offload support") Fixes: a1b7c5fd7fe9 ("net: sched: add cls_u32 offload hooks for netdevs") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 10 +++++++--- include/net/sch_generic.h | 1 + net/sched/cls_flower.c | 6 +++--- net/sched/cls_u32.c | 8 ++++---- net/sched/sch_ingress.c | 12 ++++++++++++ 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 0f7efa8..3722dda 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -392,16 +392,20 @@ struct tc_cls_u32_offload { }; }; -static inline bool tc_should_offload(struct net_device *dev, u32 flags) +static inline bool tc_should_offload(const struct net_device *dev, + const struct tcf_proto *tp, u32 flags) { + const struct Qdisc *sch = tp->q; + const struct Qdisc_class_ops *cops = sch->ops->cl_ops; + if (!(dev->features & NETIF_F_HW_TC)) return false; - if (flags & TCA_CLS_FLAGS_SKIP_HW) return false; - if (!dev->netdev_ops->ndo_setup_tc) return false; + if (cops && cops->tcf_cl_offload) + return cops->tcf_cl_offload(tp->classid); return true; } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 6803af1..62d5531 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -168,6 +168,7 @@ struct Qdisc_class_ops { /* Filter manipulation */ struct tcf_proto __rcu ** (*tcf_chain)(struct Qdisc *, unsigned long); + bool (*tcf_cl_offload)(u32 classid); unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); void (*unbind_tcf)(struct Qdisc *, unsigned long); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 730aaca..b3b7978 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -171,7 +171,7 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie) struct tc_cls_flower_offload offload = {0}; struct tc_to_netdev tc; - if (!tc_should_offload(dev, 0)) + if (!tc_should_offload(dev, tp, 0)) return; offload.command = TC_CLSFLOWER_DESTROY; @@ -194,7 +194,7 @@ static void fl_hw_replace_filter(struct tcf_proto *tp, struct tc_cls_flower_offload offload = {0}; struct tc_to_netdev tc; - if (!tc_should_offload(dev, flags)) + if (!tc_should_offload(dev, tp, flags)) return; offload.command = TC_CLSFLOWER_REPLACE; @@ -216,7 +216,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) struct tc_cls_flower_offload offload = {0}; struct tc_to_netdev tc; - if (!tc_should_offload(dev, 0)) + if (!tc_should_offload(dev, tp, 0)) return; offload.command = TC_CLSFLOWER_STATS; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index fe05449..27b99fd 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -440,7 +440,7 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, 0)) { + if (tc_should_offload(dev, tp, 0)) { offload.cls_u32->command = TC_CLSU32_DELETE_KNODE; offload.cls_u32->knode.handle = handle; dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, @@ -457,7 +457,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_to_netdev offload; int err; - if (!tc_should_offload(dev, flags)) + if (!tc_should_offload(dev, tp, flags)) return tc_skip_sw(flags) ? -EINVAL : 0; offload.type = TC_SETUP_CLSU32; @@ -485,7 +485,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, 0)) { + if (tc_should_offload(dev, tp, 0)) { offload.cls_u32->command = TC_CLSU32_DELETE_HNODE; offload.cls_u32->hnode.divisor = h->divisor; offload.cls_u32->hnode.handle = h->handle; @@ -508,7 +508,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, flags)) { + if (tc_should_offload(dev, tp, flags)) { offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; offload.cls_u32->knode.handle = n->handle; offload.cls_u32->knode.fshift = n->fshift; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 10adbc6..8fe6999 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -27,6 +27,11 @@ static unsigned long ingress_get(struct Qdisc *sch, u32 classid) return TC_H_MIN(classid) + 1; } +static bool ingress_cl_offload(u32 classid) +{ + return true; +} + static unsigned long ingress_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { @@ -86,6 +91,7 @@ static const struct Qdisc_class_ops ingress_class_ops = { .put = ingress_put, .walk = ingress_walk, .tcf_chain = ingress_find_tcf, + .tcf_cl_offload = ingress_cl_offload, .bind_tcf = ingress_bind_filter, .unbind_tcf = ingress_put, }; @@ -110,6 +116,11 @@ static unsigned long clsact_get(struct Qdisc *sch, u32 classid) } } +static bool clsact_cl_offload(u32 classid) +{ + return TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS); +} + static unsigned long clsact_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { @@ -158,6 +169,7 @@ static const struct Qdisc_class_ops clsact_class_ops = { .put = ingress_put, .walk = ingress_walk, .tcf_chain = clsact_find_tcf, + .tcf_cl_offload = clsact_cl_offload, .bind_tcf = clsact_bind_filter, .unbind_tcf = ingress_put, }; -- cgit v1.1 From ce3cf4ec0305919fc69a972f6c2b2efd35d36abc Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 6 Jun 2016 15:07:18 -0700 Subject: tcp: record TLP and ER timer stats in v6 stats The v6 tcp stats scan do not provide TLP and ER timer information correctly like the v4 version . This patch fixes that. Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Fixes: eed530b6c676 ("tcp: early retransmit") Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 79e33e0..f36c2d0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1721,7 +1721,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { -- cgit v1.1 From 2c2c1af4497514da1be2b571066859701dd79231 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 7 Jun 2016 17:38:52 -0700 Subject: cpufreq: intel_pstate: Fix code ordering in intel_pstate_set_policy() The limits->max_perf is rounded_up but immediately overwritten by another assignment to limits->max_perf. Move that operation to the correct location. While here also added a pr_debug() call in ->set_policy to aid in debugging. Fixes: 785ee2788141 (cpufreq: intel_pstate: Fix limits->max_perf rounding error) Signed-off-by: Srinivas Pandruvada [ rjw : Subject & changelog ] Cc: 4.4+ # 4.4+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 0d159b5..724b905 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1460,6 +1460,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) intel_pstate_clear_update_util_hook(policy->cpu); + pr_debug("set_policy cpuinfo.max %u policy->max %u\n", + policy->cpuinfo.max_freq, policy->max); + cpu = all_cpu_data[0]; if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && policy->max < policy->cpuinfo.max_freq && @@ -1495,13 +1498,13 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits->max_sysfs_pct); limits->max_perf_pct = max(limits->min_policy_pct, limits->max_perf_pct); - limits->max_perf = round_up(limits->max_perf, FRAC_BITS); /* Make sure min_perf_pct <= max_perf_pct */ limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); limits->min_perf = div_fp(limits->min_perf_pct, 100); limits->max_perf = div_fp(limits->max_perf_pct, 100); + limits->max_perf = round_up(limits->max_perf, FRAC_BITS); out: intel_pstate_set_update_util_hook(policy->cpu); -- cgit v1.1 From 983e600e88835f0321d1a0ea06f52d48b7b5a544 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 7 Jun 2016 17:38:53 -0700 Subject: cpufreq: intel_pstate: Fix ->set_policy() interface for no_turbo When turbo is disabled, the ->set_policy() interface is broken. For example, when turbo is disabled and cpuinfo.max = 2900000 (full max turbo frequency), setting the limits results in frequency less than the requested one: Set 1000000 KHz results in 0700000 KHz Set 1500000 KHz results in 1100000 KHz Set 2000000 KHz results in 1500000 KHz This is because the limits->max_perf fraction is calculated using the max turbo frequency as the reference, but when the max P-State is capped in intel_pstate_get_min_max(), the reference is not the max turbo P-State. This results in reducing max P-State. One option is to always use max turbo as reference for calculating limits. But this will not be correct. By definition the intel_pstate sysfs limits, shows percentage of available performance. So when BIOS has disabled turbo, the available performance is max non turbo. So the max_perf_pct should still show 100%. Signed-off-by: Srinivas Pandruvada [ rjw : Subject & changelog, rewrite in fewer lines of code ] Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 724b905..ee367e9 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1561,8 +1561,11 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; - policy->cpuinfo.max_freq = - cpu->pstate.turbo_pstate * cpu->pstate.scaling; + update_turbo_state(); + policy->cpuinfo.max_freq = limits->turbo_disabled ? + cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; + policy->cpuinfo.max_freq *= cpu->pstate.scaling; + intel_pstate_init_acpi_perf_limits(policy); policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; cpumask_set_cpu(policy->cpu, policy->cpus); -- cgit v1.1 From 3d56c25e3bb0726a5c5e16fc2d9e38f8ed763085 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Jun 2016 21:26:55 -0400 Subject: fix d_walk()/non-delayed __d_free() race Ascend-to-parent logics in d_walk() depends on all encountered child dentries not getting freed without an RCU delay. Unfortunately, in quite a few cases it is not true, with hard-to-hit oopsable race as the result. Fortunately, the fix is simiple; right now the rule is "if it ever been hashed, freeing must be delayed" and changing it to "if it ever had a parent, freeing must be delayed" closes that hole and covers all cases the old rule used to cover. Moreover, pipes and sockets remain _not_ covered, so we do not introduce RCU delay in the cases which are the reason for having that delay conditional in the first place. Cc: stable@vger.kernel.org # v3.2+ (and watch out for __d_materialise_dentry()) Signed-off-by: Al Viro --- fs/dcache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index ad4a542..817c243 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1636,7 +1636,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) struct dentry *dentry = __d_alloc(parent->d_sb, name); if (!dentry) return NULL; - + dentry->d_flags |= DCACHE_RCUACCESS; spin_lock(&parent->d_lock); /* * don't need child lock because it is not subject @@ -2358,7 +2358,6 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) { BUG_ON(!d_unhashed(entry)); hlist_bl_lock(b); - entry->d_flags |= DCACHE_RCUACCESS; hlist_bl_add_head_rcu(&entry->d_hash, b); hlist_bl_unlock(b); } @@ -2843,6 +2842,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target, /* ... and switch them in the tree */ if (IS_ROOT(dentry)) { /* splicing a tree */ + dentry->d_flags |= DCACHE_RCUACCESS; dentry->d_parent = target->d_parent; target->d_parent = target; list_del_init(&target->d_child); -- cgit v1.1 From a01e718f7241c53f564402f7acff373eed5bd166 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Jun 2016 21:53:51 -0400 Subject: fix a regression in atomic_open() open("/foo/no_such_file", O_RDONLY | O_CREAT) on should fail with EACCES when /foo is not writable; failing with ENOENT is obviously wrong. That got broken by a braino introduced when moving the creat_error logics from atomic_open() to lookup_open(). Easy to fix, fortunately. Spotted-by: "Yan, Zheng" Tested-by: "Yan, Zheng" Signed-off-by: Al Viro --- fs/namei.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index d7c0cac..28cb1cd 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2995,9 +2995,13 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, } if (*opened & FILE_CREATED) fsnotify_create(dir, dentry); - path->dentry = dentry; - path->mnt = nd->path.mnt; - return 1; + if (unlikely(d_is_negative(dentry))) { + error = -ENOENT; + } else { + path->dentry = dentry; + path->mnt = nd->path.mnt; + return 1; + } } } dput(dentry); -- cgit v1.1 From 1607f09c226d1378439c411baaaa020042750338 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 5 Jun 2016 23:14:14 +0200 Subject: coredump: fix dumping through pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The offset in the core file used to be tracked with ->written field of the coredump_params structure. The field was retired in favour of file->f_pos. However, ->f_pos is not maintained for pipes which leads to breakage. Restore explicit tracking of the offset in coredump_params. Introduce ->pos field for this purpose since ->written was already reused. Fixes: a00839395103 ("get rid of coredump_params->written"). Reported-by: Zbigniew Jędrzejewski-Szmek Signed-off-by: Mateusz Guzik Reviewed-by: Omar Sandoval Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/coredump.c | 2 +- fs/binfmt_elf.c | 2 +- fs/binfmt_elf_fdpic.c | 2 +- fs/coredump.c | 4 +++- include/linux/binfmts.h | 1 + 5 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 84fb984..85c85eb 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -172,7 +172,7 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i, if (rc < 0) goto out; - skip = roundup(cprm->file->f_pos - total + sz, 4) - cprm->file->f_pos; + skip = roundup(cprm->pos - total + sz, 4) - cprm->pos; if (!dump_skip(cprm, skip)) goto Eio; out: diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index e158b22..a7a28110 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2275,7 +2275,7 @@ static int elf_core_dump(struct coredump_params *cprm) goto end_coredump; /* Align to page */ - if (!dump_skip(cprm, dataoff - cprm->file->f_pos)) + if (!dump_skip(cprm, dataoff - cprm->pos)) goto end_coredump; for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 71ade0e..2035893 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1787,7 +1787,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto end_coredump; } - if (!dump_skip(cprm, dataoff - cprm->file->f_pos)) + if (!dump_skip(cprm, dataoff - cprm->pos)) goto end_coredump; if (!elf_fdpic_dump_segments(cprm)) diff --git a/fs/coredump.c b/fs/coredump.c index 38a7ab8..281b768 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -794,6 +794,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr) return 0; file->f_pos = pos; cprm->written += n; + cprm->pos += n; nr -= n; } return 1; @@ -808,6 +809,7 @@ int dump_skip(struct coredump_params *cprm, size_t nr) if (dump_interrupted() || file->f_op->llseek(file, nr, SEEK_CUR) < 0) return 0; + cprm->pos += nr; return 1; } else { while (nr > PAGE_SIZE) { @@ -822,7 +824,7 @@ EXPORT_SYMBOL(dump_skip); int dump_align(struct coredump_params *cprm, int align) { - unsigned mod = cprm->file->f_pos & (align - 1); + unsigned mod = cprm->pos & (align - 1); if (align & (align - 1)) return 0; return mod ? dump_skip(cprm, align - mod) : 1; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 576e463..314b3ca 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -65,6 +65,7 @@ struct coredump_params { unsigned long limit; unsigned long mm_flags; loff_t written; + loff_t pos; }; /* -- cgit v1.1 From 0b148def403153a4d1565f1640356cb78ce5109f Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 7 Jun 2016 19:14:17 +0900 Subject: bridge: Don't insert unnecessary local fdb entry on changing mac address The missing br_vlan_should_use() test caused creation of an unneeded local fdb entry on changing mac address of a bridge device when there is a vlan which is configured on a bridge port but not on the bridge device. Fixes: 2594e9064a57 ("bridge: vlan: add per-vlan struct and move to rhashtables") Signed-off-by: Toshiaki Makita Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index dcea4f4..c18080a 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -279,6 +279,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) * change from under us. */ list_for_each_entry(v, &vg->vlan_list, vlist) { + if (!br_vlan_should_use(v)) + continue; f = __br_fdb_get(br, br->dev->dev_addr, v->vid); if (f && f->is_local && !f->dst) fdb_delete_local(br, NULL, f); -- cgit v1.1 From 88832a22d6bb50e3b5f9d5ecc6cf26707c35f322 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 7 Jun 2016 19:27:51 +0100 Subject: net-sysfs: fix missing The of_find_net_device_by_node() function is defined in but not included in the .c file that implements it. Fix the following warning by including the header: net/core/net-sysfs.c:1494:19: warning: symbol 'of_find_net_device_by_node' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 2b3f76f..7a0b616 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "net-sysfs.h" -- cgit v1.1 From 970442c599b22ccd644ebfe94d1d303bf6f87c05 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 2 Jun 2016 17:19:27 -0700 Subject: x86/cpu/intel: Introduce macros for Intel family numbers Problem: We have a boatload of open-coded family-6 model numbers. Half of them have these model numbers in hex and the other half in decimal. This makes grepping for them tons of fun, if you were to try. Solution: Consolidate all the magic numbers. Put all the definitions in one header. The names here are closely derived from the comments describing the models from arch/x86/events/intel/core.c. We could easily make them shorter by doing things like s/SANDYBRIDGE/SNB/, but they seemed fine even with the longer versions to me. Do not take any of these names too literally, like "DESKTOP" or "MOBILE". These are all colloquial names and not precise descriptions of everywhere a given model will show up. Signed-off-by: Dave Hansen Cc: Adrian Hunter Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Darren Hart Cc: Dave Hansen Cc: Denys Vlasenko Cc: Doug Thompson Cc: Eduardo Valentin Cc: H. Peter Anvin Cc: Jacob Pan Cc: Kan Liang Cc: Len Brown Cc: Linus Torvalds Cc: Mauro Carvalho Chehab Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Rajneesh Bhardwaj Cc: Souvik Kumar Chakravarty Cc: Srinivas Pandruvada Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Tony Luck Cc: Ulf Hansson Cc: Viresh Kumar Cc: Vishwanath Somayaji Cc: Zhang Rui Cc: jacob.jun.pan@intel.com Cc: linux-acpi@vger.kernel.org Cc: linux-edac@vger.kernel.org Cc: linux-mmc@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: platform-driver-x86@vger.kernel.org Link: http://lkml.kernel.org/r/20160603001927.F2A7D828@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/intel-family.h | 68 +++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 arch/x86/include/asm/intel-family.h diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h new file mode 100644 index 0000000..6999f7d --- /dev/null +++ b/arch/x86/include/asm/intel-family.h @@ -0,0 +1,68 @@ +#ifndef _ASM_X86_INTEL_FAMILY_H +#define _ASM_X86_INTEL_FAMILY_H + +/* + * "Big Core" Processors (Branded as Core, Xeon, etc...) + * + * The "_X" parts are generally the EP and EX Xeons, or the + * "Extreme" ones, like Broadwell-E. + * + * Things ending in "2" are usually because we have no better + * name for them. There's no processor called "WESTMERE2". + */ + +#define INTEL_FAM6_CORE_YONAH 0x0E +#define INTEL_FAM6_CORE2_MEROM 0x0F +#define INTEL_FAM6_CORE2_MEROM_L 0x16 +#define INTEL_FAM6_CORE2_PENRYN 0x17 +#define INTEL_FAM6_CORE2_DUNNINGTON 0x1D + +#define INTEL_FAM6_NEHALEM 0x1E +#define INTEL_FAM6_NEHALEM_EP 0x1A +#define INTEL_FAM6_NEHALEM_EX 0x2E +#define INTEL_FAM6_WESTMERE 0x25 +#define INTEL_FAM6_WESTMERE2 0x1F +#define INTEL_FAM6_WESTMERE_EP 0x2C +#define INTEL_FAM6_WESTMERE_EX 0x2F + +#define INTEL_FAM6_SANDYBRIDGE 0x2A +#define INTEL_FAM6_SANDYBRIDGE_X 0x2D +#define INTEL_FAM6_IVYBRIDGE 0x3A +#define INTEL_FAM6_IVYBRIDGE_X 0x3E + +#define INTEL_FAM6_HASWELL_CORE 0x3C +#define INTEL_FAM6_HASWELL_X 0x3F +#define INTEL_FAM6_HASWELL_ULT 0x45 +#define INTEL_FAM6_HASWELL_GT3E 0x46 + +#define INTEL_FAM6_BROADWELL_CORE 0x3D +#define INTEL_FAM6_BROADWELL_XEON_D 0x56 +#define INTEL_FAM6_BROADWELL_GT3E 0x47 +#define INTEL_FAM6_BROADWELL_X 0x4F + +#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E +#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E +#define INTEL_FAM6_SKYLAKE_X 0x55 +#define INTEL_FAM6_KABYLAKE_MOBILE 0x8E +#define INTEL_FAM6_KABYLAKE_DESKTOP 0x9E + +/* "Small Core" Processors (Atom) */ + +#define INTEL_FAM6_ATOM_PINEVIEW 0x1C +#define INTEL_FAM6_ATOM_LINCROFT 0x26 +#define INTEL_FAM6_ATOM_PENWELL 0x27 +#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35 +#define INTEL_FAM6_ATOM_CEDARVIEW 0x36 +#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */ +#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ +#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ +#define INTEL_FAM6_ATOM_MERRIFIELD1 0x4A /* Tangier */ +#define INTEL_FAM6_ATOM_MERRIFIELD2 0x5A /* Annidale */ +#define INTEL_FAM6_ATOM_GOLDMONT 0x5C +#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ + +/* Xeon Phi */ + +#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ + +#endif /* _ASM_X86_INTEL_FAMILY_H */ -- cgit v1.1 From 96685a55a82c383cbba7ef1d4a636acf708cf17f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 1 Jun 2016 12:04:28 +0200 Subject: x86/cpu/AMD: Extend X86_FEATURE_TOPOEXT workaround to newer models We need to reenable the topology extensions CPUID leafs on newer models too, if BIOS has disabled them, as we rely on them to get proper compute unit topology. Make the printk a once thing, while at it. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rui Huang Cc: Sherry Hurwitz Cc: Thomas Gleixner Cc: linux-hwmon@vger.kernel.org Link: http://lkml.kernel.org/r/1464775468-23355-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c343a54..f5c69d8 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -674,14 +674,14 @@ static void init_amd_bd(struct cpuinfo_x86 *c) u64 value; /* re-enable TopologyExtensions if switched off by BIOS */ - if ((c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && + if ((c->x86_model >= 0x10) && (c->x86_model <= 0x6f) && !cpu_has(c, X86_FEATURE_TOPOEXT)) { if (msr_set_bit(0xc0011005, 54) > 0) { rdmsrl(0xc0011005, value); if (value & BIT_64(54)) { set_cpu_cap(c, X86_FEATURE_TOPOEXT); - pr_info(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); + pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); } } } -- cgit v1.1 From 2c610022711675ee908b903d242f0b90e1db661f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Jun 2016 10:19:51 +0200 Subject: locking/qspinlock: Fix spin_unlock_wait() some more While this prior commit: 54cf809b9512 ("locking,qspinlock: Fix spin_is_locked() and spin_unlock_wait()") ... fixes spin_is_locked() and spin_unlock_wait() for the usage in ipc/sem and netfilter, it does not in fact work right for the usage in task_work and futex. So while the 2 locks crossed problem: spin_lock(A) spin_lock(B) if (!spin_is_locked(B)) spin_unlock_wait(A) foo() foo(); ... works with the smp_mb() injected by both spin_is_locked() and spin_unlock_wait(), this is not sufficient for: flag = 1; smp_mb(); spin_lock() spin_unlock_wait() if (!flag) // add to lockless list // iterate lockless list ... because in this scenario, the store from spin_lock() can be delayed past the load of flag, uncrossing the variables and loosing the guarantee. This patch reworks spin_is_locked() and spin_unlock_wait() to work in both cases by exploiting the observation that while the lock byte store can be delayed, the contender must have registered itself visibly in other state contained in the word. It also allows for architectures to override both functions, as PPC and ARM64 have an additional issue for which we currently have no generic solution. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boqun Feng Cc: Davidlohr Bueso Cc: Giovanni Gherdovich Cc: Linus Torvalds Cc: Pan Xinhui Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Cc: Will Deacon Cc: stable@vger.kernel.org # v4.2 and later Fixes: 54cf809b9512 ("locking,qspinlock: Fix spin_is_locked() and spin_unlock_wait()") Signed-off-by: Ingo Molnar --- include/asm-generic/qspinlock.h | 53 ++++++++++++------------------------ kernel/locking/qspinlock.c | 60 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 36 deletions(-) diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 6bd0570..05f05f1 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -22,37 +22,33 @@ #include /** + * queued_spin_unlock_wait - wait until the _current_ lock holder releases the lock + * @lock : Pointer to queued spinlock structure + * + * There is a very slight possibility of live-lock if the lockers keep coming + * and the waiter is just unfortunate enough to not see any unlock state. + */ +#ifndef queued_spin_unlock_wait +extern void queued_spin_unlock_wait(struct qspinlock *lock); +#endif + +/** * queued_spin_is_locked - is the spinlock locked? * @lock: Pointer to queued spinlock structure * Return: 1 if it is locked, 0 otherwise */ +#ifndef queued_spin_is_locked static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { /* - * queued_spin_lock_slowpath() can ACQUIRE the lock before - * issuing the unordered store that sets _Q_LOCKED_VAL. - * - * See both smp_cond_acquire() sites for more detail. - * - * This however means that in code like: - * - * spin_lock(A) spin_lock(B) - * spin_unlock_wait(B) spin_is_locked(A) - * do_something() do_something() - * - * Both CPUs can end up running do_something() because the store - * setting _Q_LOCKED_VAL will pass through the loads in - * spin_unlock_wait() and/or spin_is_locked(). + * See queued_spin_unlock_wait(). * - * Avoid this by issuing a full memory barrier between the spin_lock() - * and the loads in spin_unlock_wait() and spin_is_locked(). - * - * Note that regular mutual exclusion doesn't care about this - * delayed store. + * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL + * isn't immediately observable. */ - smp_mb(); - return atomic_read(&lock->val) & _Q_LOCKED_MASK; + return atomic_read(&lock->val); } +#endif /** * queued_spin_value_unlocked - is the spinlock structure unlocked? @@ -122,21 +118,6 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock) } #endif -/** - * queued_spin_unlock_wait - wait until current lock holder releases the lock - * @lock : Pointer to queued spinlock structure - * - * There is a very slight possibility of live-lock if the lockers keep coming - * and the waiter is just unfortunate enough to not see any unlock state. - */ -static inline void queued_spin_unlock_wait(struct qspinlock *lock) -{ - /* See queued_spin_is_locked() */ - smp_mb(); - while (atomic_read(&lock->val) & _Q_LOCKED_MASK) - cpu_relax(); -} - #ifndef virt_spin_lock static __always_inline bool virt_spin_lock(struct qspinlock *lock) { diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index ce2f75e..5fc8c31 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -267,6 +267,66 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath #endif +/* + * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before + * issuing an _unordered_ store to set _Q_LOCKED_VAL. + * + * This means that the store can be delayed, but no later than the + * store-release from the unlock. This means that simply observing + * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired. + * + * There are two paths that can issue the unordered store: + * + * (1) clear_pending_set_locked(): *,1,0 -> *,0,1 + * + * (2) set_locked(): t,0,0 -> t,0,1 ; t != 0 + * atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1 + * + * However, in both cases we have other !0 state we've set before to queue + * ourseves: + * + * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our + * load is constrained by that ACQUIRE to not pass before that, and thus must + * observe the store. + * + * For (2) we have a more intersting scenario. We enqueue ourselves using + * xchg_tail(), which ends up being a RELEASE. This in itself is not + * sufficient, however that is followed by an smp_cond_acquire() on the same + * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and + * guarantees we must observe that store. + * + * Therefore both cases have other !0 state that is observable before the + * unordered locked byte store comes through. This means we can use that to + * wait for the lock store, and then wait for an unlock. + */ +#ifndef queued_spin_unlock_wait +void queued_spin_unlock_wait(struct qspinlock *lock) +{ + u32 val; + + for (;;) { + val = atomic_read(&lock->val); + + if (!val) /* not locked, we're done */ + goto done; + + if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */ + break; + + /* not locked, but pending, wait until we observe the lock */ + cpu_relax(); + } + + /* any unlock is good */ + while (atomic_read(&lock->val) & _Q_LOCKED_MASK) + cpu_relax(); + +done: + smp_rmb(); /* CTRL + RMB -> ACQUIRE */ +} +EXPORT_SYMBOL(queued_spin_unlock_wait); +#endif + #endif /* _GEN_PV_LOCK_SLOWPATH */ /** -- cgit v1.1 From 62a92c8f553e49270a0ee391b8733da71ab0aebc Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 7 Jun 2016 15:44:15 +0300 Subject: perf/core: Remove a redundant check There is no way to end up in _free_event() with event::pmu being NULL. The latter is initialized in event allocation path and remains set forever. In case of allocation failure, the error path doesn't use _free_event(). Having the check, however, suggests that it is possible to have a event::pmu==NULL situation in _free_event() and confuses the robots. This patch gets rid of the check. Reported-by: Dan Carpenter Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: eranian@google.com Cc: vince@deater.net Link: http://lkml.kernel.org/r/1465303455-26032-1-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 050a290..87e945d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3862,10 +3862,8 @@ static void _free_event(struct perf_event *event) if (event->ctx) put_ctx(event->ctx); - if (event->pmu) { - exclusive_event_destroy(event); - module_put(event->pmu->module); - } + exclusive_event_destroy(event); + module_put(event->pmu->module); call_rcu(&event->rcu_head, free_event_rcu); } -- cgit v1.1 From 9c57259117b9c25472a3fa6d5a14d6bb3b647e87 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 3 Jun 2016 17:58:40 -0500 Subject: sched/debug: Fix /proc/sched_debug regression Commit: cb2517653fcc ("sched/debug: Make schedstats a runtime tunable that is disabled by default") ... introduced a bug when CONFIG_SCHEDSTATS is enabled and the runtime tunable is disabled (which is the default). The wait-time, sum-exec, and sum-sleep fields are missing from the /proc/sched_debug file in the runnable_tasks section. Fix it with a new schedstat_val() macro which returns the field value when schedstats is enabled and zero otherwise. The macro works with both SCHEDSTATS and !SCHEDSTATS. I put the macro in stats.h since it might end up being useful in other places. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mel Gorman Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Thomas Gleixner Fixes: cb2517653fcc ("sched/debug: Make schedstats a runtime tunable that is disabled by default") Link: http://lkml.kernel.org/r/bcda7c2790cf2ccbe586a28c02dd7b6fe7749a2b.1464994423.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/debug.c | 15 ++++----------- kernel/sched/stats.h | 3 +++ 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index cf905f6..0368c39 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -427,19 +427,12 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) SPLIT_NS(p->se.vruntime), (long long)(p->nvcsw + p->nivcsw), p->prio); -#ifdef CONFIG_SCHEDSTATS - if (schedstat_enabled()) { - SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", - SPLIT_NS(p->se.statistics.wait_sum), - SPLIT_NS(p->se.sum_exec_runtime), - SPLIT_NS(p->se.statistics.sum_sleep_runtime)); - } -#else + SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", - 0LL, 0L, + SPLIT_NS(schedstat_val(p, se.statistics.wait_sum)), SPLIT_NS(p->se.sum_exec_runtime), - 0LL, 0L); -#endif + SPLIT_NS(schedstat_val(p, se.statistics.sum_sleep_runtime))); + #ifdef CONFIG_NUMA_BALANCING SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); #endif diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 70b3b6a..78955cb 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -33,6 +33,8 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) # define schedstat_inc(rq, field) do { if (schedstat_enabled()) { (rq)->field++; } } while (0) # define schedstat_add(rq, field, amt) do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0) # define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0) +# define schedstat_val(rq, field) ((schedstat_enabled()) ? (rq)->field : 0) + #else /* !CONFIG_SCHEDSTATS */ static inline void rq_sched_info_arrive(struct rq *rq, unsigned long long delta) @@ -47,6 +49,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) # define schedstat_inc(rq, field) do { } while (0) # define schedstat_add(rq, field, amt) do { } while (0) # define schedstat_set(var, val) do { } while (0) +# define schedstat_val(rq, field) 0 #endif #ifdef CONFIG_SCHED_INFO -- cgit v1.1 From 4698f88c06b893f2acc0b443004a53bf490fde7c Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 7 Jun 2016 14:43:16 -0500 Subject: sched/debug: Fix 'schedstats=enable' cmdline option The 'schedstats=enable' option doesn't work, and also produces the following warning during boot: WARNING: CPU: 0 PID: 0 at /home/jpoimboe/git/linux/kernel/jump_label.c:61 static_key_slow_inc+0x8c/0xa0 static_key_slow_inc used before call to jump_label_init Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 4.7.0-rc1+ #25 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014 0000000000000086 3ae3475a4bea95d4 ffffffff81e03da8 ffffffff8143fc83 ffffffff81e03df8 0000000000000000 ffffffff81e03de8 ffffffff810b1ffb 0000003d00000096 ffffffff823514d0 ffff88007ff197c8 0000000000000000 Call Trace: [] dump_stack+0x85/0xc2 [] __warn+0xcb/0xf0 [] warn_slowpath_fmt+0x5f/0x80 [] static_key_slow_inc+0x8c/0xa0 [] static_key_enable+0x16/0x40 [] setup_schedstats+0x29/0x94 [] unknown_bootoption+0x89/0x191 [] parse_args+0x297/0x4b0 [] start_kernel+0x1d8/0x4a9 [] ? set_init_arg+0x55/0x55 [] ? early_idt_handler_array+0x120/0x120 [] x86_64_start_reservations+0x2f/0x31 [] x86_64_start_kernel+0x14a/0x16d The problem is that it tries to update the 'sched_schedstats' static key before jump labels have been initialized. Changing jump_label_init() to be called earlier before parse_early_param() wouldn't fix it: it would still fail trying to poke_text() because mm isn't yet initialized. Instead, just create a temporary '__sched_schedstats' variable which can be copied to the static key later during sched_init() after jump labels have been initialized. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Matt Fleming Cc: Mel Gorman Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Thomas Gleixner Fixes: cb2517653fcc ("sched/debug: Make schedstats a runtime tunable that is disabled by default") Link: http://lkml.kernel.org/r/453775fe3433bed65731a583e228ccea806d18cd.1465322027.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7f2cae4..385c947 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2253,9 +2253,11 @@ int sysctl_numa_balancing(struct ctl_table *table, int write, #endif #endif +#ifdef CONFIG_SCHEDSTATS + DEFINE_STATIC_KEY_FALSE(sched_schedstats); +static bool __initdata __sched_schedstats = false; -#ifdef CONFIG_SCHEDSTATS static void set_schedstats(bool enabled) { if (enabled) @@ -2278,11 +2280,16 @@ static int __init setup_schedstats(char *str) if (!str) goto out; + /* + * This code is called before jump labels have been set up, so we can't + * change the static branch directly just yet. Instead set a temporary + * variable so init_schedstats() can do it later. + */ if (!strcmp(str, "enable")) { - set_schedstats(true); + __sched_schedstats = true; ret = 1; } else if (!strcmp(str, "disable")) { - set_schedstats(false); + __sched_schedstats = false; ret = 1; } out: @@ -2293,6 +2300,11 @@ out: } __setup("schedstats=", setup_schedstats); +static void __init init_schedstats(void) +{ + set_schedstats(__sched_schedstats); +} + #ifdef CONFIG_PROC_SYSCTL int sysctl_schedstats(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -2313,8 +2325,10 @@ int sysctl_schedstats(struct ctl_table *table, int write, set_schedstats(state); return err; } -#endif -#endif +#endif /* CONFIG_PROC_SYSCTL */ +#else /* !CONFIG_SCHEDSTATS */ +static inline void init_schedstats(void) {} +#endif /* CONFIG_SCHEDSTATS */ /* * fork()/clone()-time setup: @@ -7487,6 +7501,8 @@ void __init sched_init(void) #endif init_sched_fair_class(); + init_schedstats(); + scheduler_running = 1; } -- cgit v1.1 From 0b0d81e3b7334897da9b2e3ffee860c2046f7bc0 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 26 May 2016 13:43:43 -0500 Subject: objtool, drm/vmwgfx: Fix "duplicate frame pointer save" warning objtool reports the following warnings: drivers/gpu/drm/vmwgfx/vmwgfx_msg.o: warning: objtool: vmw_send_msg()+0x107: duplicate frame pointer save drivers/gpu/drm/vmwgfx/vmwgfx_msg.o: warning: objtool: vmw_host_get_guestinfo()+0x252: duplicate frame pointer save To quote Linus: "The reason is that VMW_PORT_HB_OUT() uses a magic instruction sequence (a "rep outsb") to communicate with the hypervisor (it's a virtual GPU driver for vmware), and %rbp is part of the communication. So the inline asm does a save-and-restore of the frame pointer around the instruction sequence. I actually find the objtool warning to be quite reasonable, so it's not exactly a false positive, since in this case it actually does point out that the frame pointer won't be reliable over that instruction sequence. But in this particular case it just ends up being the wrong thing - the code is what it is, and %rbp just can't have the frame information due to annoying magic calling conventions." Silence the warnings by telling objtool to ignore the two functions which use the VMW_PORT_HB_{IN,OUT} macros. Reported-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Acked-by: Linus Torvalds Cc: DRI Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160526184343.fdtjjjg67smmeekt@treble Signed-off-by: Ingo Molnar --- drivers/gpu/drm/vmwgfx/vmwgfx_msg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index 6de283c..f0374f9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "drmP.h" #include "vmwgfx_msg.h" @@ -194,7 +195,7 @@ static int vmw_send_msg(struct rpc_channel *channel, const char *msg) return -EINVAL; } - +STACK_FRAME_NON_STANDARD(vmw_send_msg); /** @@ -304,6 +305,7 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg, return 0; } +STACK_FRAME_NON_STANDARD(vmw_recv_msg); /** -- cgit v1.1 From c0530dd3ef1d3997d885e1345e34a284db2b9cfa Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Wed, 8 Jun 2016 14:57:28 +0530 Subject: cxgb4: Add device id of T540-BT adapter Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index a2cdfc1..50812a1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -144,6 +144,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x5015), /* T502-bt */ CH_PCI_ID_TABLE_FENTRY(0x5016), /* T580-OCP-SO */ CH_PCI_ID_TABLE_FENTRY(0x5017), /* T520-OCP-SO */ + CH_PCI_ID_TABLE_FENTRY(0x5018), /* T540-BT */ CH_PCI_ID_TABLE_FENTRY(0x5080), /* Custom T540-cr */ CH_PCI_ID_TABLE_FENTRY(0x5081), /* Custom T540-LL-cr */ CH_PCI_ID_TABLE_FENTRY(0x5082), /* Custom T504-cr */ -- cgit v1.1 From 077fa7aed17de5022e44bf07dbaf732078b7b5b2 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 8 Jun 2016 14:25:22 +0100 Subject: futex: Calculate the futex key based on a tail page for file-based futexes Mike Galbraith reported that the LTP test case futex_wake04 was broken by commit 65d8fc777f6d ("futex: Remove requirement for lock_page() in get_futex_key()"). This test case uses futexes backed by hugetlbfs pages and so there is an associated inode with a futex stored on such pages. The problem is that the key is being calculated based on the head page index of the hugetlbfs page and not the tail page. Prior to the optimisation, the page lock was used to stabilise mappings and pin the inode is file-backed which is overkill. If the page was a compound page, the head page was automatically looked up as part of the page lock operation but the tail page index was used to calculate the futex key. After the optimisation, the compound head is looked up early and the page lock is only relied upon to identify truncated pages, special pages or a shmem page moving to swapcache. The head page is looked up because without the page lock, special care has to be taken to pin the inode correctly. However, the tail page is still required to calculate the futex key so this patch records the tail page. On vanilla 4.6, the output of the test case is; futex_wake04 0 TINFO : Hugepagesize 2097152 futex_wake04 1 TFAIL : futex_wake04.c:126: Bug: wait_thread2 did not wake after 30 secs. With the patch applied futex_wake04 0 TINFO : Hugepagesize 2097152 futex_wake04 1 TPASS : Hi hydra, thread2 awake! Fixes: 65d8fc777f6d "futex: Remove requirement for lock_page() in get_futex_key()" Reported-and-tested-by: Mike Galbraith Signed-off-by: Mel Gorman Acked-by: Peter Zijlstra (Intel) Reviewed-by: Davidlohr Bueso Cc: Sebastian Andrzej Siewior Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20160608132522.GM2469@suse.de Signed-off-by: Thomas Gleixner --- kernel/futex.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index ee25f5b..33664f7 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -469,7 +469,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; - struct page *page; + struct page *page, *tail; struct address_space *mapping; int err, ro = 0; @@ -530,7 +530,15 @@ again: * considered here and page lock forces unnecessarily serialization * From this point on, mapping will be re-verified if necessary and * page lock will be acquired only if it is unavoidable - */ + * + * Mapping checks require the head page for any compound page so the + * head page and mapping is looked up now. For anonymous pages, it + * does not matter if the page splits in the future as the key is + * based on the address. For filesystem-backed pages, the tail is + * required as the index of the page determines the key. For + * base pages, there is no tail page and tail == page. + */ + tail = page; page = compound_head(page); mapping = READ_ONCE(page->mapping); @@ -654,7 +662,7 @@ again: key->both.offset |= FUT_OFF_INODE; /* inode-based key */ key->shared.inode = inode; - key->shared.pgoff = basepage_index(page); + key->shared.pgoff = basepage_index(tail); rcu_read_unlock(); } -- cgit v1.1 From a5c5e2da8551eb69e5d5d09d51d526140b5db9fb Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 8 Jun 2016 12:59:17 +0200 Subject: l2tp: fix configuration passed to setup_udp_tunnel_sock() Unused fields of udp_cfg must be all zeros. Otherwise setup_udp_tunnel_sock() fills ->gro_receive and ->gro_complete callbacks with garbage, eventually resulting in panic when used by udp_gro_receive(). [ 72.694123] BUG: unable to handle kernel paging request at ffff880033f87d78 [ 72.695518] IP: [] 0xffff880033f87d78 [ 72.696530] PGD 26e2067 PUD 26e3067 PMD 342ed063 PTE 8000000033f87163 [ 72.696530] Oops: 0011 [#1] SMP KASAN [ 72.696530] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core ip6_udp_tunnel udp_tunnel pptp gre pppox ppp_generic slhc crc32c_intel ghash_clmulni_intel jitterentropy_rng sha256_generic hmac drbg ansi_cprng aesni_intel evdev aes_x86_64 ablk_helper cryptd lrw gf128mul glue_helper serio_raw acpi_cpufreq button proc\ essor ext4 crc16 jbd2 mbcache virtio_blk virtio_net virtio_pci virtio_ring virtio [ 72.696530] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.7.0-rc1 #1 [ 72.696530] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 [ 72.696530] task: ffff880035b59700 ti: ffff880035b70000 task.ti: ffff880035b70000 [ 72.696530] RIP: 0010:[] [] 0xffff880033f87d78 [ 72.696530] RSP: 0018:ffff880035f87bc0 EFLAGS: 00010246 [ 72.696530] RAX: ffffed000698f996 RBX: ffff88003326b840 RCX: ffffffff814cc823 [ 72.696530] RDX: ffff88003326b840 RSI: ffff880033e48038 RDI: ffff880034c7c780 [ 72.696530] RBP: ffff880035f87c18 R08: 000000000000a506 R09: 0000000000000000 [ 72.696530] R10: ffff880035f87b38 R11: ffff880034b9344d R12: 00000000ebfea715 [ 72.696530] R13: 0000000000000000 R14: ffff880034c7c780 R15: 0000000000000000 [ 72.696530] FS: 0000000000000000(0000) GS:ffff880035f80000(0000) knlGS:0000000000000000 [ 72.696530] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 72.696530] CR2: ffff880033f87d78 CR3: 0000000033c98000 CR4: 00000000000406a0 [ 72.696530] Stack: [ 72.696530] ffffffff814cc834 ffff880034b93468 0000001481416818 ffff88003326b874 [ 72.696530] ffff880034c7ccb0 ffff880033e48038 ffff88003326b840 ffff880034b93462 [ 72.696530] ffff88003326b88a ffff88003326b88c ffff880034b93468 ffff880035f87c70 [ 72.696530] Call Trace: [ 72.696530] [ 72.696530] [] ? udp_gro_receive+0x1c6/0x1f9 [ 72.696530] [] udp4_gro_receive+0x2b5/0x310 [ 72.696530] [] inet_gro_receive+0x4a3/0x4cd [ 72.696530] [] dev_gro_receive+0x584/0x7a3 [ 72.696530] [] ? __lock_is_held+0x29/0x64 [ 72.696530] [] napi_gro_receive+0x124/0x21d [ 72.696530] [] virtnet_receive+0x8df/0x8f6 [virtio_net] [ 72.696530] [] virtnet_poll+0x1d/0x8d [virtio_net] [ 72.696530] [] net_rx_action+0x15b/0x3b9 [ 72.696530] [] __do_softirq+0x216/0x546 [ 72.696530] [] irq_exit+0x49/0xb6 [ 72.696530] [] do_IRQ+0xe2/0xfa [ 72.696530] [] common_interrupt+0x89/0x89 [ 72.696530] [ 72.696530] [] ? trace_hardirqs_on_caller+0x229/0x270 [ 72.696530] [] ? default_idle+0x1c/0x2d [ 72.696530] [] ? default_idle+0x1a/0x2d [ 72.696530] [] arch_cpu_idle+0xa/0xc [ 72.696530] [] default_idle_call+0x1a/0x1c [ 72.696530] [] cpu_startup_entry+0x15b/0x20f [ 72.696530] [] start_secondary+0x12c/0x133 [ 72.696530] Code: ff ff ff ff ff ff ff ff ff ff 7f ff ff ff ff ff ff ff 7f 00 7e f8 33 00 88 ff ff 6d 61 58 81 ff ff ff ff 5e de 0a 81 ff ff ff ff <00> 5c e2 34 00 88 ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 72.696530] RIP [] 0xffff880033f87d78 [ 72.696530] RSP [ 72.696530] CR2: ffff880033f87d78 [ 72.696530] ---[ end trace ad7758b9a1dccf99 ]--- [ 72.696530] Kernel panic - not syncing: Fatal exception in interrupt [ 72.696530] Kernel Offset: disabled [ 72.696530] ---[ end Kernel panic - not syncing: Fatal exception in interrupt v2: use empty initialiser instead of "{ NULL }" to avoid relying on first field's type. Fixes: 38fd2af24fcf ("udp: Add socket based GRO and config") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 6edfa99..1e40dac 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1581,7 +1581,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; if (encap == L2TP_ENCAPTYPE_UDP) { - struct udp_tunnel_sock_cfg udp_cfg; + struct udp_tunnel_sock_cfg udp_cfg = { }; udp_cfg.sk_user_data = tunnel; udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP; -- cgit v1.1 From 00bc0ef5880dc7b82f9c320dead4afaad48e47be Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 8 Jun 2016 15:13:34 +0200 Subject: ipv6: Skip XFRM lookup if dst_entry in socket cache is valid At present we perform an xfrm_lookup() for each UDPv6 message we send. The lookup involves querying the flow cache (flow_cache_lookup) and, in case of a cache miss, creating an XFRM bundle. If we miss the flow cache, we can end up creating a new bundle and deriving the path MTU (xfrm_init_pmtu) from on an already transformed dst_entry, which we pass from the socket cache (sk->sk_dst_cache) down to xfrm_lookup(). This can happen only if we're caching the dst_entry in the socket, that is when we're using a connected UDP socket. To put it another way, the path MTU shrinks each time we miss the flow cache, which later on leads to incorrectly fragmented payload. It can be observed with ESPv6 in transport mode: 1) Set up a transformation and lower the MTU to trigger fragmentation # ip xfrm policy add dir out src ::1 dst ::1 \ tmpl src ::1 dst ::1 proto esp spi 1 # ip xfrm state add src ::1 dst ::1 \ proto esp spi 1 enc 'aes' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b # ip link set dev lo mtu 1500 2) Monitor the packet flow and set up an UDP sink # tcpdump -ni lo -ttt & # socat udp6-listen:12345,fork /dev/null & 3) Send a datagram that needs fragmentation with a connected socket # perl -e 'print "@" x 1470 | socat - udp6:[::1]:12345 2016/06/07 18:52:52 socat[724] E read(3, 0x555bb3d5ba00, 8192): Protocol error 00:00:00.000000 IP6 ::1 > ::1: frag (0|1448) ESP(spi=0x00000001,seq=0x2), length 1448 00:00:00.000014 IP6 ::1 > ::1: frag (1448|32) 00:00:00.000050 IP6 ::1 > ::1: ESP(spi=0x00000001,seq=0x3), length 1272 (^ ICMPv6 Parameter Problem) 00:00:00.000022 IP6 ::1 > ::1: ESP(spi=0x00000001,seq=0x5), length 136 4) Compare it to a non-connected socket # perl -e 'print "@" x 1500' | socat - udp6-sendto:[::1]:12345 00:00:40.535488 IP6 ::1 > ::1: frag (0|1448) ESP(spi=0x00000001,seq=0x6), length 1448 00:00:00.000010 IP6 ::1 > ::1: frag (1448|64) What happens in step (3) is: 1) when connecting the socket in __ip6_datagram_connect(), we perform an XFRM lookup, miss the flow cache, create an XFRM bundle, and cache the destination, 2) afterwards, when sending the datagram, we perform an XFRM lookup, again, miss the flow cache (due to mismatch of flowi6_iif and flowi6_oif, which is an issue of its own), and recreate an XFRM bundle based on the cached (and already transformed) destination. To prevent the recreation of an XFRM bundle, avoid an XFRM lookup altogether whenever we already have a destination entry cached in the socket. This prevents the path MTU shrinkage and brings us on par with UDPv4. The fix also benefits connected PINGv6 sockets, another user of ip6_sk_dst_lookup_flow(), who also suffer messages being transformed twice. Joint work with Hannes Frederic Sowa. Reported-by: Jan Tluka Signed-off-by: Jakub Sitnicki Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cbf127a..635b8d3 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1071,17 +1071,12 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); - int err; dst = ip6_sk_dst_check(sk, dst, fl6); + if (!dst) + dst = ip6_dst_lookup_flow(sk, fl6, final_dst); - err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6); - if (err) - return ERR_PTR(err); - if (final_dst) - fl6->daddr = *final_dst; - - return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + return dst; } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); -- cgit v1.1 From e0d194adfa9f5f473068cc546bee60fb84ab77ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2016 06:19:45 -0700 Subject: net_sched: add missing paddattr description "make htmldocs" complains otherwise: .//net/core/gen_stats.c:65: warning: No description found for parameter 'padattr' .//net/core/gen_stats.c:101: warning: No description found for parameter 'padattr' Fixes: 9854518ea04d ("sched: align nlattr properly when needed") Signed-off-by: Eric Dumazet Reported-by: kbuild test robot Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/gen_stats.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index f96ee8b..be873e4 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -47,6 +47,7 @@ nla_put_failure: * @xstats_type: TLV type for backward compatibility xstats TLV * @lock: statistics lock * @d: dumping handle + * @padattr: padding attribute * * Initializes the dumping handle, grabs the statistic lock and appends * an empty TLV header to the socket buffer for use a container for all @@ -87,6 +88,7 @@ EXPORT_SYMBOL(gnet_stats_start_copy_compat); * @type: TLV type for top level statistic TLV * @lock: statistics lock * @d: dumping handle + * @padattr: padding attribute * * Initializes the dumping handle, grabs the statistic lock and appends * an empty TLV header to the socket buffer for use a container for all -- cgit v1.1 From 3497ed8c852a5a3d48957ca91baaa443d9bfcd4d Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Mon, 6 Jun 2016 17:29:30 +0100 Subject: sfc: report supported link speeds on SFP connections 7000-series SFC NICs connected with an SFP+ module currently fail to report any supported link speeds. Reported-by: Jarod Wilson Signed-off-by: Bert Kenward Reviewed-by: Jarod Wilson Tested-by: Jarod Wilson Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mcdi_port.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c index 7f295c4..2a9228a 100644 --- a/drivers/net/ethernet/sfc/mcdi_port.c +++ b/drivers/net/ethernet/sfc/mcdi_port.c @@ -189,11 +189,12 @@ static u32 mcdi_to_ethtool_cap(u32 media, u32 cap) case MC_CMD_MEDIA_XFP: case MC_CMD_MEDIA_SFP_PLUS: - result |= SUPPORTED_FIBRE; - break; - case MC_CMD_MEDIA_QSFP_PLUS: result |= SUPPORTED_FIBRE; + if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) + result |= SUPPORTED_1000baseT_Full; + if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) + result |= SUPPORTED_10000baseT_Full; if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) result |= SUPPORTED_40000baseCR4_Full; break; -- cgit v1.1 From ce492b3b8f99cf9d2f807ec22d8805c996a09503 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Fri, 3 Jun 2016 14:21:34 -0700 Subject: drm/fsl-dcu: use flat regmap cache Using flat regmap cache instead of RB-tree to avoid the following lockdep warning on driver load: WARNING: CPU: 0 PID: 1 at kernel/locking/lockdep.c:2755 lockdep_trace_alloc+0x15c/0x160() DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)) The RB-tree regmap cache needs to allocate new space on first writes. However, allocations in an atomic context (e.g. when a spinlock is held) are not allowed. The function regmap_write calls map->lock, which acquires a spinlock in the fast_io case. Since the FSL DCU driver uses MMIO, the regmap bus of type regmap_mmio is being used which has fast_io set to true. Use flat regmap cache and specify max register to be large enouth to cover all registers available in LS1021a and Vybrids register space. Signed-off-by: Stefan Agner Cc: Mark Brown Cc: stable@vger.kernel.org --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c index 0ec1ad9..dc723f7 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c @@ -42,9 +42,10 @@ static const struct regmap_config fsl_dcu_regmap_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .cache_type = REGCACHE_RBTREE, + .cache_type = REGCACHE_FLAT, .volatile_reg = fsl_dcu_drm_is_volatile_reg, + .max_register = 0x11fc, }; static int fsl_dcu_drm_irq_init(struct drm_device *dev) -- cgit v1.1 From 85332739628fe4beafecdb713438c7cb1454c2f5 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 3 Jun 2016 14:27:03 +0300 Subject: drm/omap: fix unused variable warning in dsi & hdmi Signed-off-by: Tomi Valkeinen Signed-off-by: Dave Airlie --- drivers/gpu/drm/omapdrm/dss/dsi.c | 1 - drivers/gpu/drm/omapdrm/dss/hdmi5.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index 9ed8272..56c43f3 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -1167,7 +1167,6 @@ static int dsi_regulator_init(struct platform_device *dsidev) { struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev); struct regulator *vdds_dsi; - int r; if (dsi->vdds_dsi_reg != NULL) return 0; diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c index e129245..9255c0e 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c @@ -120,7 +120,6 @@ static irqreturn_t hdmi_irq_handler(int irq, void *data) static int hdmi_init_regulator(void) { - int r; struct regulator *reg; if (hdmi.vdda_reg != NULL) -- cgit v1.1 From 6eef3801e719e4ea9c15c01b1d77706f47331166 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 8 Jun 2016 20:11:03 +0100 Subject: net: cls_u32: catch all hardware offload errors Errors reported by u32_replace_hw_hnode() were not propagated. Signed-off-by: Jakub Kicinski Acked-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 27b99fd..54ab32a 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -922,11 +922,17 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ht->divisor = divisor; ht->handle = handle; ht->prio = tp->prio; + + err = u32_replace_hw_hnode(tp, ht, flags); + if (err) { + kfree(ht); + return err; + } + RCU_INIT_POINTER(ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, ht); *arg = (unsigned long)ht; - u32_replace_hw_hnode(tp, ht, flags); return 0; } -- cgit v1.1 From 201c44bd8ffa899f07b7b322a73e19baf0ada1e5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 8 Jun 2016 20:11:04 +0100 Subject: net: cls_u32: be more strict about skip-sw flag for knodes Return an error if user requested skip-sw and the underlaying hardware cannot handle tc offloads (or offloads are disabled). This patch fixes the knode handling. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 54ab32a..ffe593e 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -508,27 +508,28 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, tp, flags)) { - offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; - offload.cls_u32->knode.handle = n->handle; - offload.cls_u32->knode.fshift = n->fshift; + if (!tc_should_offload(dev, tp, flags)) + return tc_skip_sw(flags) ? -EINVAL : 0; + + offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; + offload.cls_u32->knode.handle = n->handle; + offload.cls_u32->knode.fshift = n->fshift; #ifdef CONFIG_CLS_U32_MARK - offload.cls_u32->knode.val = n->val; - offload.cls_u32->knode.mask = n->mask; + offload.cls_u32->knode.val = n->val; + offload.cls_u32->knode.mask = n->mask; #else - offload.cls_u32->knode.val = 0; - offload.cls_u32->knode.mask = 0; + offload.cls_u32->knode.val = 0; + offload.cls_u32->knode.mask = 0; #endif - offload.cls_u32->knode.sel = &n->sel; - offload.cls_u32->knode.exts = &n->exts; - if (n->ht_down) - offload.cls_u32->knode.link_handle = n->ht_down->handle; - - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); - if (tc_skip_sw(flags)) - return err; - } + offload.cls_u32->knode.sel = &n->sel; + offload.cls_u32->knode.exts = &n->exts; + if (n->ht_down) + offload.cls_u32->knode.link_handle = n->ht_down->handle; + + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + if (tc_skip_sw(flags)) + return err; return 0; } -- cgit v1.1 From 0a46baaf634663d28038fc137239b71bf5385e5a Mon Sep 17 00:00:00 2001 From: Shweta Choudaha Date: Wed, 8 Jun 2016 20:15:43 +0100 Subject: ip6gre: Allow live link address change The ip6 GRE tap device should not be forced to down state to change the mac address and should allow live address change for tap device similar to ipv4 gre. Signed-off-by: Shweta Choudaha Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index f4ac284..fdc9de2 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1256,6 +1256,8 @@ static int ip6gre_tap_init(struct net_device *dev) if (ret) return ret; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + tunnel = netdev_priv(dev); ip6gre_tnl_link_config(tunnel, 1); @@ -1289,6 +1291,7 @@ static void ip6gre_tap_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; } static bool ip6gre_netlink_encap_parms(struct nlattr *data[], -- cgit v1.1 From 9b15350f0d5c401c02eca15c4e6ca0603cff1a41 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 8 Jun 2016 23:23:01 +0200 Subject: qfq: don't leak skb if kzalloc fails When we need to create a new aggregate to enqueue the skb we call kzalloc. If that fails we returned ENOBUFS without freeing the skb. Spotted during code review. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 8d2d8d9..f18857f 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1235,8 +1235,10 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->agg->lmax, qdisc_pkt_len(skb), cl->common.classid); err = qfq_change_agg(sch, cl, cl->agg->class_weight, qdisc_pkt_len(skb)); - if (err) - return err; + if (err) { + cl->qstats.drops++; + return qdisc_drop(skb, sch); + } } err = qdisc_enqueue(skb, cl->qdisc); -- cgit v1.1 From 35639a0e98391036a4c7f23253c321d6621a8897 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 9 Jun 2016 11:32:14 +0530 Subject: ALSA: hda - Add PCI ID for Kabylake Kabylake shows up as PCI ID 0xa171. And Kabylake-LP as 0x9d71. Since these are similar to Skylake add these to SKL_PLUS macro Signed-off-by: Vinod Koul Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 9a0d144..94089fc 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -365,8 +365,11 @@ enum { #define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170) #define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70) +#define IS_KBL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa171) +#define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71) #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) -#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) +#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \ + IS_KBL(pci) || IS_KBL_LP(pci) static char *driver_short_names[] = { [AZX_DRIVER_ICH] = "HDA Intel", @@ -2181,6 +2184,12 @@ static const struct pci_device_id azx_ids[] = { /* Sunrise Point-LP */ { PCI_DEVICE(0x8086, 0x9d70), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, + /* Kabylake */ + { PCI_DEVICE(0x8086, 0xa171), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, + /* Kabylake-LP */ + { PCI_DEVICE(0x8086, 0x9d71), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, /* Broxton-P(Apollolake) */ { PCI_DEVICE(0x8086, 0x5a98), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON }, -- cgit v1.1 From 6cbf6236d54c24b9a29e6892549c25b6902b44ce Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Jun 2016 09:40:55 +0200 Subject: cfg80211: remove get/set antenna and tx power warnings Since set_tx_power and set_antenna are frequently implemented without the matching get_tx_power/get_antenna, we shouldn't have added warnings for those. Remove them. The remaining ones are correct and need to be implemented symmetrically for correct operation. Cc: stable@vger.kernel.org Fixes: de3bb771f471 ("cfg80211: add more warnings for inconsistent ops") Signed-off-by: Johannes Berg --- net/wireless/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index d25c82b..ecca389 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -363,8 +363,6 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv, WARN_ON(ops->remain_on_channel && !ops->cancel_remain_on_channel); WARN_ON(ops->tdls_channel_switch && !ops->tdls_cancel_channel_switch); WARN_ON(ops->add_tx_ts && !ops->del_tx_ts); - WARN_ON(ops->set_tx_power && !ops->get_tx_power); - WARN_ON(ops->set_antenna && !ops->get_antenna); alloc_size = sizeof(*rdev) + sizeof_priv; -- cgit v1.1 From 3d5fdff46c4b2b9534fa2f9fc78e90a48e0ff724 Mon Sep 17 00:00:00 2001 From: Prasun Maiti Date: Mon, 6 Jun 2016 20:04:19 +0530 Subject: wext: Fix 32 bit iwpriv compatibility issue with 64 bit Kernel iwpriv app uses iw_point structure to send data to Kernel. The iw_point structure holds a pointer. For compatibility Kernel converts the pointer as required for WEXT IOCTLs (SIOCIWFIRST to SIOCIWLAST). Some drivers may use iw_handler_def.private_args to populate iwpriv commands instead of iw_handler_def.private. For those case, the IOCTLs from SIOCIWFIRSTPRIV to SIOCIWLASTPRIV will follow the path ndo_do_ioctl(). Accordingly when the filled up iw_point structure comes from 32 bit iwpriv to 64 bit Kernel, Kernel will not convert the pointer and sends it to driver. So, the driver may get the invalid data. The pointer conversion for the IOCTLs (SIOCIWFIRSTPRIV to SIOCIWLASTPRIV), which follow the path ndo_do_ioctl(), is mandatory. This patch adds pointer conversion from 32 bit to 64 bit and vice versa, if the ioctl comes from 32 bit iwpriv to 64 bit Kernel. Cc: stable@vger.kernel.org Signed-off-by: Prasun Maiti Signed-off-by: Ujjal Roy Tested-by: Dibyajyoti Ghosh Signed-off-by: Johannes Berg --- net/wireless/wext-core.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 6250b1c..dbb2738e 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -958,8 +958,29 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, return private(dev, iwr, cmd, info, handler); } /* Old driver API : call driver ioctl handler */ - if (dev->netdev_ops->ndo_do_ioctl) - return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); + if (dev->netdev_ops->ndo_do_ioctl) { +#ifdef CONFIG_COMPAT + if (info->flags & IW_REQUEST_FLAG_COMPAT) { + int ret = 0; + struct iwreq iwr_lcl; + struct compat_iw_point *iwp_compat = (void *) &iwr->u.data; + + memcpy(&iwr_lcl, iwr, sizeof(struct iwreq)); + iwr_lcl.u.data.pointer = compat_ptr(iwp_compat->pointer); + iwr_lcl.u.data.length = iwp_compat->length; + iwr_lcl.u.data.flags = iwp_compat->flags; + + ret = dev->netdev_ops->ndo_do_ioctl(dev, (void *) &iwr_lcl, cmd); + + iwp_compat->pointer = ptr_to_compat(iwr_lcl.u.data.pointer); + iwp_compat->length = iwr_lcl.u.data.length; + iwp_compat->flags = iwr_lcl.u.data.flags; + + return ret; + } else +#endif + return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); + } return -EOPNOTSUPP; } -- cgit v1.1 From d1a7f7aadc866eff61c422da0ecbe1f8e383e0f5 Mon Sep 17 00:00:00 2001 From: Ken Wang Date: Tue, 24 May 2016 09:26:27 +0800 Subject: drm/amdgpu/iceland: Set SC_PA_RASTER_CONFIG according to different RB enabled fix the raster config setting for different iceland configs. Signed-off-by: Ken Wang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index f19bab6..4d747ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3974,11 +3974,15 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) amdgpu_ring_write(ring, 0x3a00161a); amdgpu_ring_write(ring, 0x0000002e); break; - case CHIP_TOPAZ: case CHIP_CARRIZO: amdgpu_ring_write(ring, 0x00000002); amdgpu_ring_write(ring, 0x00000000); break; + case CHIP_TOPAZ: + amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ? + 0x00000000 : 0x00000002); + amdgpu_ring_write(ring, 0x00000000); + break; case CHIP_STONEY: amdgpu_ring_write(ring, 0x00000000); amdgpu_ring_write(ring, 0x00000000); -- cgit v1.1 From 8b4af8a8e3e6ad82e0d32d1665d9a755c05c4c12 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Mon, 23 May 2016 18:24:41 +0800 Subject: drm/admgpu/powerplay/polaris: fix powertune table upload Exclude AVFS related fields when update powertune table to hw. The driver shouldn't set them directly. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c index 0b99ab3..ae96f14 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c @@ -286,7 +286,7 @@ int polaris10_populate_pm_fuses(struct pp_hwmgr *hwmgr) if (polaris10_copy_bytes_to_smc(hwmgr->smumgr, pm_fuse_table_offset, (uint8_t *)&data->power_tune_table, - sizeof(struct SMU74_Discrete_PmFuses), data->sram_end)) + (sizeof(struct SMU74_Discrete_PmFuses) - 92), data->sram_end)) PP_ASSERT_WITH_CODE(false, "Attempt to download PmFuseTable Failed!", return -EINVAL); -- cgit v1.1 From 212cb3b6d79bb9f525da5593133d93b107184b27 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Thu, 19 May 2016 14:35:17 +0800 Subject: drm/amdgpu: add late_fini for ip_funcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This give IP modules an optional late cleanup function. This is needed to handle tricky inter-module dependencies during tear down. Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/amd_shared.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 6080951..afce1ed 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -157,6 +157,7 @@ struct amd_ip_funcs { int (*hw_init)(void *handle); /* tears down the hw state */ int (*hw_fini)(void *handle); + void (*late_fini)(void *handle); /* handles IP specific hw/sw changes for suspend */ int (*suspend)(void *handle); /* handles IP specific hw/sw changes for resume */ -- cgit v1.1 From 482587e3145ef4100b52946660ae52b457d09194 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Thu, 19 May 2016 14:36:01 +0800 Subject: drm/amdgpu: impl late_fini for amdgpu_pp_ip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements late_init support for powerplay. Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 6bd961f..1cd53c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -223,6 +223,22 @@ static int amdgpu_pp_hw_fini(void *handle) return ret; } +static void amdgpu_pp_late_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + +#ifdef CONFIG_DRM_AMD_POWERPLAY + if (adev->pp_enabled) { + amdgpu_pm_sysfs_fini(adev); + amd_powerplay_fini(adev->powerplay.pp_handle); + } + + if (adev->powerplay.ip_funcs->late_fini) + adev->powerplay.ip_funcs->late_fini( + adev->powerplay.pp_handle); +#endif +} + static int amdgpu_pp_suspend(void *handle) { int ret = 0; @@ -311,6 +327,7 @@ const struct amd_ip_funcs amdgpu_pp_ip_funcs = { .sw_fini = amdgpu_pp_sw_fini, .hw_init = amdgpu_pp_hw_init, .hw_fini = amdgpu_pp_hw_fini, + .late_fini = amdgpu_pp_late_fini, .suspend = amdgpu_pp_suspend, .resume = amdgpu_pp_resume, .is_idle = amdgpu_pp_is_idle, -- cgit v1.1 From a6dcfd9cc55432e4dcbe058d6ae9f07fb3452992 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Thu, 19 May 2016 14:36:34 +0800 Subject: drm/amdgpu: fix pplib finish bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1,should use late_fini to kfree all resource otherwise the released pointer maybe accessed in IRQ ip fini routine. 2,hwmgr should not be kfree by pem_fini which is invoked by hw fini path. Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 7 ------- drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c | 3 --- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bb8b149..1996670 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1325,6 +1325,11 @@ static int amdgpu_fini(struct amdgpu_device *adev) adev->ip_block_status[i].valid = false; } + for (i = adev->num_ip_blocks - 1; i >= 0; i--) { + if (adev->ip_blocks[i].funcs->late_fini) + adev->ip_blocks[i].funcs->late_fini((void *)adev); + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 1cd53c6..10b1be5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -183,13 +183,6 @@ static int amdgpu_pp_sw_fini(void *handle) if (ret) return ret; -#ifdef CONFIG_DRM_AMD_POWERPLAY - if (adev->pp_enabled) { - amdgpu_pm_sysfs_fini(adev); - amd_powerplay_fini(adev->powerplay.pp_handle); - } -#endif - return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c index 46410e3..fb88e4e 100644 --- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c +++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c @@ -58,9 +58,6 @@ static void pem_fini(struct pp_eventmgr *eventmgr) pem_unregister_interrupts(eventmgr); pem_handle_event(eventmgr, AMD_PP_EVENT_UNINITIALIZE, &event_data); - - if (eventmgr != NULL) - kfree(eventmgr); } int eventmgr_init(struct pp_instance *handle) -- cgit v1.1 From 768c95e70c4bd33b3da32a15dd33486246f4ca79 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 1 Jun 2016 11:09:01 -0400 Subject: drm/amdgpu: fix fw leak in non-powerplay dpm code We need to release the firmware on driver tear down. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 3 +++ drivers/gpu/drm/amd/amdgpu/fiji_dpm.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/iceland_dpm.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/tonga_dpm.c | 5 +++++ 4 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index ea407db..5ec1f1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -6221,6 +6221,9 @@ static int ci_dpm_sw_fini(void *handle) ci_dpm_fini(adev); mutex_unlock(&adev->pm.mutex); + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c b/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c index 245cabf..ed03b75 100644 --- a/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c @@ -72,6 +72,11 @@ static int fiji_dpm_sw_init(void *handle) static int fiji_dpm_sw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c b/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c index 460bc8a..825ccd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c @@ -72,6 +72,11 @@ static int iceland_dpm_sw_init(void *handle) static int iceland_dpm_sw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c index b7615ce..f06f6f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c @@ -71,6 +71,11 @@ static int tonga_dpm_sw_init(void *handle) static int tonga_dpm_sw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; + return 0; } -- cgit v1.1 From 9a005bef5b5b5ceb78ff1138e6d6baf4bbeb8061 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 25 May 2016 16:55:07 +0800 Subject: drm/amdgpu: clear SA bo when created MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This help fix reloading driver hang issue of SDMA ring Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 8bf84ef..48618ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -115,6 +115,7 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev, return r; } r = amdgpu_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr); + memset(sa_manager->cpu_ptr, 0, sa_manager->size); amdgpu_bo_unreserve(sa_manager->bo); return r; } -- cgit v1.1 From d72f7c0685870aa7efda0a06f8ca160a94905031 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 25 May 2016 16:55:50 +0800 Subject: drm/amdgpu: init more register for sdma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This help fix reloading driver hang issue of SDMA ring Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 2 ++ drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 2 ++ drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 518dca4..76f73ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -419,6 +419,8 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) /* Initialize the ring buffer's read and write pointers */ WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0); /* set the wb address whether it's enabled or not */ WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index f4c3130..e11a374 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -461,6 +461,8 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) /* Initialize the ring buffer's read and write pointers */ WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0); /* set the wb address whether it's enabled or not */ WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 31d99b00..585d8fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -672,6 +672,8 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) /* Initialize the ring buffer's read and write pointers */ WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0); /* set the wb address whether it's enabled or not */ WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], -- cgit v1.1 From 505dfe76cd3203bb2dcf13d862f46b7f0e95869a Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 25 May 2016 16:57:14 +0800 Subject: drm/amdgpu: modify sdma start sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit should fist halt engine, and then doing the register programing, and later unhalt engine, and finally run ring_test. this help fix reloading driver hang issue of SDMA ring original sequence is wrong for it programing engine after unhalt, which will lead to fault behavior when doing driver reloading after unloaded. Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 9 +++++++-- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 8 ++++++-- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 15 +++++++++++---- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 76f73ab..0079916 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -448,7 +448,12 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); ring->ready = true; + } + + cik_sdma_enable(adev, true); + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; r = amdgpu_ring_test_ring(ring); if (r) { ring->ready = false; @@ -531,8 +536,8 @@ static int cik_sdma_start(struct amdgpu_device *adev) if (r) return r; - /* unhalt the MEs */ - cik_sdma_enable(adev, true); + /* halt the engine before programing */ + cik_sdma_enable(adev, false); /* start the gfx rings and rlc compute queues */ r = cik_sdma_gfx_resume(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index e11a374..f6014b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -491,7 +491,11 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); ring->ready = true; + } + sdma_v2_4_enable(adev, true); + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; r = amdgpu_ring_test_ring(ring); if (r) { ring->ready = false; @@ -582,8 +586,8 @@ static int sdma_v2_4_start(struct amdgpu_device *adev) return -EINVAL; } - /* unhalt the MEs */ - sdma_v2_4_enable(adev, true); + /* halt the engine before programing */ + sdma_v2_4_enable(adev, false); /* start the gfx rings and rlc compute queues */ r = sdma_v2_4_gfx_resume(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 585d8fe..33605d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -713,7 +713,15 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); ring->ready = true; + } + /* unhalt the MEs */ + sdma_v3_0_enable(adev, true); + /* enable sdma ring preemption */ + sdma_v3_0_ctx_switch_enable(adev, true); + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; r = amdgpu_ring_test_ring(ring); if (r) { ring->ready = false; @@ -806,10 +814,9 @@ static int sdma_v3_0_start(struct amdgpu_device *adev) } } - /* unhalt the MEs */ - sdma_v3_0_enable(adev, true); - /* enable sdma ring preemption */ - sdma_v3_0_ctx_switch_enable(adev, true); + /* disble sdma engine before programing it */ + sdma_v3_0_ctx_switch_enable(adev, false); + sdma_v3_0_enable(adev, false); /* start the gfx rings and rlc compute queues */ r = sdma_v3_0_gfx_resume(adev); -- cgit v1.1 From fdff8cfa72b3e42d4d0c70684fa18b1dfee46d97 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Tue, 24 May 2016 13:23:46 +0800 Subject: drm/amdgpu: vBIOS post only call when mem_size zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1996670..1727a4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1518,8 +1518,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_atombios_has_gpu_virtualization_table(adev); /* Post card if necessary */ - if (!amdgpu_card_posted(adev) || - adev->virtualization.supports_sr_iov) { + if (!amdgpu_card_posted(adev)) { if (!adev->bios) { dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n"); return -EINVAL; -- cgit v1.1 From 2ba272d7bde27e1db2cf1c6cee49b01b7ea08989 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Wed, 27 Apr 2016 18:07:41 +0800 Subject: drm/amdgpu: add pipeline sync while vmid switch in same ctx Since vmid-mgr supports vmid sharing in one vm, the same ctx could get different vmids for two emits without vm flush, vm_flush could be done in another ring. Signed-off-by: Chunming Zhou Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 9 +++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 992f00b..01c36b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -799,6 +799,7 @@ struct amdgpu_ring { unsigned cond_exe_offs; u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; + int vmid; }; /* @@ -936,7 +937,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr, uint32_t gds_base, uint32_t gds_size, uint32_t gws_base, uint32_t gws_size, - uint32_t oa_base, uint32_t oa_size); + uint32_t oa_base, uint32_t oa_size, + bool vmid_switch); void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id); uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 34e3542..7a0b1e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -122,6 +122,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, bool skip_preamble, need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; + int vmid = 0, old_vmid = ring->vmid; struct fence *hwf; uint64_t ctx; @@ -135,9 +136,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (job) { vm = job->vm; ctx = job->ctx; + vmid = job->vm_id; } else { vm = NULL; ctx = 0; + vmid = 0; } if (!ring->ready) { @@ -163,7 +166,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr, job->gds_base, job->gds_size, job->gws_base, job->gws_size, - job->oa_base, job->oa_size); + job->oa_base, job->oa_size, + (ring->current_ctx == ctx) && (old_vmid != vmid)); if (r) { amdgpu_ring_undo(ring); return r; @@ -180,7 +184,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, need_ctx_switch = ring->current_ctx != ctx; for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; - /* drop preamble IBs if we don't have a context switch */ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble) continue; @@ -188,6 +191,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, need_ctx_switch); need_ctx_switch = false; + ring->vmid = vmid; } if (ring->funcs->emit_hdp_invalidate) @@ -198,6 +202,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vm_id) amdgpu_vm_reset_id(adev, job->vm_id); + ring->vmid = old_vmid; amdgpu_ring_undo(ring); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9f36ed3..62a4c12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -298,7 +298,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr, uint32_t gds_base, uint32_t gds_size, uint32_t gws_base, uint32_t gws_size, - uint32_t oa_base, uint32_t oa_size) + uint32_t oa_base, uint32_t oa_size, + bool vmid_switch) { struct amdgpu_device *adev = ring->adev; struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id]; @@ -312,8 +313,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, int r; if (ring->funcs->emit_pipeline_sync && ( - pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || - ring->type == AMDGPU_RING_TYPE_COMPUTE)) + pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || vmid_switch)) amdgpu_ring_emit_pipeline_sync(ring); if (ring->funcs->emit_vm_flush && -- cgit v1.1 From 3a3e88804d44e41ef2182d2a6577a6803fdd9ee0 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 27 May 2016 17:52:58 +0800 Subject: drm/amdgpu: fix mem leak in smumgr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c index c483baf..0728c1e3 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c @@ -81,6 +81,7 @@ int smum_init(struct amd_pp_init *pp_init, struct pp_instance *handle) int smum_fini(struct pp_smumgr *smumgr) { + kfree(smumgr->device); kfree(smumgr); return 0; } -- cgit v1.1 From 61da601b95cd5565d047e42e73f984f5bdfbba70 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 27 May 2016 19:09:06 +0800 Subject: drm/amdgpu: fix mem leak in pplib/hwmgr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 1c48917..910d56d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -93,6 +93,13 @@ int hwmgr_fini(struct pp_hwmgr *hwmgr) if (hwmgr == NULL || hwmgr->ps == NULL) return -EINVAL; + /* do hwmgr finish*/ + kfree(hwmgr->backend); + + kfree(hwmgr->start_thermal_controller.function_list); + + kfree(hwmgr->set_temperature_range.function_list); + kfree(hwmgr->ps); kfree(hwmgr); return 0; -- cgit v1.1 From 89e0ec9f5e27a8c5b5954290bef703dc6aac44f3 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 27 May 2016 19:34:11 +0800 Subject: drm/amdgpu: fix mem leak in atombios MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1727a4d..964f314 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -827,8 +827,10 @@ static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg) */ static void amdgpu_atombios_fini(struct amdgpu_device *adev) { - if (adev->mode_info.atom_context) + if (adev->mode_info.atom_context) { kfree(adev->mode_info.atom_context->scratch); + kfree(adev->mode_info.atom_context->iio); + } kfree(adev->mode_info.atom_context); adev->mode_info.atom_context = NULL; kfree(adev->mode_info.atom_card_info); -- cgit v1.1 From 9d8f086cd05954e03f10db1a9a52a240d086dc8c Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 13:43:45 +0800 Subject: drm/amdgpu: fix memleak in pptable_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 8 +++- .../amd/powerplay/hwmgr/tonga_processpptables.c | 54 ++++++++++------------ 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 8e345bf..e629f8a 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -73,11 +73,14 @@ static int pp_sw_init(void *handle) ret = hwmgr->hwmgr_func->backend_init(hwmgr); if (ret) - goto err; + goto err1; pr_info("amdgpu: powerplay initialized\n"); return 0; +err1: + if (hwmgr->pptable_func->pptable_fini) + hwmgr->pptable_func->pptable_fini(hwmgr); err: pr_err("amdgpu: powerplay initialization failed\n"); return ret; @@ -100,6 +103,9 @@ static int pp_sw_fini(void *handle) if (hwmgr->hwmgr_func->backend_fini != NULL) ret = hwmgr->hwmgr_func->backend_fini(hwmgr); + if (hwmgr->pptable_func->pptable_fini) + hwmgr->pptable_func->pptable_fini(hwmgr); + return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c index 10e3630..296ec7e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c @@ -1040,48 +1040,44 @@ int tonga_pp_tables_uninitialize(struct pp_hwmgr *hwmgr) struct phm_ppt_v1_information *pp_table_information = (struct phm_ppt_v1_information *)(hwmgr->pptable); - if (NULL != hwmgr->soft_pp_table) { - kfree(hwmgr->soft_pp_table); + if (NULL != hwmgr->soft_pp_table) hwmgr->soft_pp_table = NULL; - } - if (NULL != pp_table_information->vdd_dep_on_sclk) - pp_table_information->vdd_dep_on_sclk = NULL; + kfree(pp_table_information->vdd_dep_on_sclk); + pp_table_information->vdd_dep_on_sclk = NULL; - if (NULL != pp_table_information->vdd_dep_on_mclk) - pp_table_information->vdd_dep_on_mclk = NULL; + kfree(pp_table_information->vdd_dep_on_mclk); + pp_table_information->vdd_dep_on_mclk = NULL; - if (NULL != pp_table_information->valid_mclk_values) - pp_table_information->valid_mclk_values = NULL; + kfree(pp_table_information->valid_mclk_values); + pp_table_information->valid_mclk_values = NULL; - if (NULL != pp_table_information->valid_sclk_values) - pp_table_information->valid_sclk_values = NULL; + kfree(pp_table_information->valid_sclk_values); + pp_table_information->valid_sclk_values = NULL; - if (NULL != pp_table_information->vddc_lookup_table) - pp_table_information->vddc_lookup_table = NULL; + kfree(pp_table_information->vddc_lookup_table); + pp_table_information->vddc_lookup_table = NULL; - if (NULL != pp_table_information->vddgfx_lookup_table) - pp_table_information->vddgfx_lookup_table = NULL; + kfree(pp_table_information->vddgfx_lookup_table); + pp_table_information->vddgfx_lookup_table = NULL; - if (NULL != pp_table_information->mm_dep_table) - pp_table_information->mm_dep_table = NULL; + kfree(pp_table_information->mm_dep_table); + pp_table_information->mm_dep_table = NULL; - if (NULL != pp_table_information->cac_dtp_table) - pp_table_information->cac_dtp_table = NULL; + kfree(pp_table_information->cac_dtp_table); + pp_table_information->cac_dtp_table = NULL; - if (NULL != hwmgr->dyn_state.cac_dtp_table) - hwmgr->dyn_state.cac_dtp_table = NULL; + kfree(hwmgr->dyn_state.cac_dtp_table); + hwmgr->dyn_state.cac_dtp_table = NULL; - if (NULL != pp_table_information->ppm_parameter_table) - pp_table_information->ppm_parameter_table = NULL; + kfree(pp_table_information->ppm_parameter_table); + pp_table_information->ppm_parameter_table = NULL; - if (NULL != pp_table_information->pcie_table) - pp_table_information->pcie_table = NULL; + kfree(pp_table_information->pcie_table); + pp_table_information->pcie_table = NULL; - if (NULL != hwmgr->pptable) { - kfree(hwmgr->pptable); - hwmgr->pptable = NULL; - } + kfree(hwmgr->pptable); + hwmgr->pptable = NULL; return result; } -- cgit v1.1 From 67a6a504af90e58c478b2e7fa6c0af8ed64c995b Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 14:17:42 +0800 Subject: drm/amdgpu: fix missing free wb for cond_exec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 3b02272..870f949 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -343,6 +343,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) ring->ring = NULL; ring->ring_obj = NULL; + amdgpu_wb_free(ring->adev, ring->cond_exe_offs); amdgpu_wb_free(ring->adev, ring->fence_offs); amdgpu_wb_free(ring->adev, ring->rptr_offs); amdgpu_wb_free(ring->adev, ring->wptr_offs); -- cgit v1.1 From 13331ac384e8211a0bf3158a895ac8b22005a622 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 14:18:57 +0800 Subject: drm/amdgpu: fix gfx8 ucode mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 4d747ba..9f6f866 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -836,6 +836,26 @@ err1: return r; } + +static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; + release_firmware(adev->gfx.mec_fw); + adev->gfx.mec_fw = NULL; + if ((adev->asic_type != CHIP_STONEY) && + (adev->asic_type != CHIP_TOPAZ)) + release_firmware(adev->gfx.mec2_fw); + adev->gfx.mec2_fw = NULL; + + kfree(adev->gfx.rlc.register_list_format); +} + static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; @@ -1983,7 +2003,7 @@ static int gfx_v8_0_sw_fini(void *handle) gfx_v8_0_rlc_fini(adev); - kfree(adev->gfx.rlc.register_list_format); + gfx_v8_0_free_microcode(adev); return 0; } -- cgit v1.1 From e517cd77ee76a26f3542d51a1b4f4d7c452f85cf Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 16:01:48 +0800 Subject: drm/amdgpu: fix gfx 7 ucode mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 7f18a53..8c6ad1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -991,6 +991,22 @@ out: return err; } +static void gfx_v7_0_free_microcode(struct amdgpu_device *adev) +{ + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + release_firmware(adev->gfx.mec_fw); + adev->gfx.mec_fw = NULL; + release_firmware(adev->gfx.mec2_fw); + adev->gfx.mec2_fw = NULL; + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; +} + /** * gfx_v7_0_tiling_mode_table_init - init the hw tiling table * @@ -4489,6 +4505,7 @@ static int gfx_v7_0_sw_fini(void *handle) gfx_v7_0_cp_compute_fini(adev); gfx_v7_0_rlc_fini(adev); gfx_v7_0_mec_fini(adev); + gfx_v7_0_free_microcode(adev); return 0; } -- cgit v1.1 From 05f19eb5bd1d46aeeeb86a2a8538f35d7d55eb34 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 15:13:59 +0800 Subject: drm/amdgpu: fix uvd fini mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 01abfc2..e19520c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -253,19 +253,20 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) { int r; - if (adev->uvd.vcpu_bo == NULL) - return 0; + kfree(adev->uvd.saved_bo); amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); - r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); - if (!r) { - amdgpu_bo_kunmap(adev->uvd.vcpu_bo); - amdgpu_bo_unpin(adev->uvd.vcpu_bo); - amdgpu_bo_unreserve(adev->uvd.vcpu_bo); - } + if (adev->uvd.vcpu_bo) { + r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); + if (!r) { + amdgpu_bo_kunmap(adev->uvd.vcpu_bo); + amdgpu_bo_unpin(adev->uvd.vcpu_bo); + amdgpu_bo_unreserve(adev->uvd.vcpu_bo); + } - amdgpu_bo_unref(&adev->uvd.vcpu_bo); + amdgpu_bo_unref(&adev->uvd.vcpu_bo); + } amdgpu_ring_fini(&adev->uvd.ring); -- cgit v1.1 From 14d83e78c578a6c45163fb399ee760fe0d314bad Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 15:15:32 +0800 Subject: drm/amdgpu: fix sdma3 ucode mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 33605d4..532ea88 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -236,6 +236,15 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev) } } +static void sdma_v3_0_free_microcode(struct amdgpu_device *adev) +{ + int i; + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; + } +} + /** * sdma_v3_0_init_microcode - load ucode images from disk * @@ -1256,6 +1265,7 @@ static int sdma_v3_0_sw_fini(void *handle) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); + sdma_v3_0_free_microcode(adev); return 0; } -- cgit v1.1 From 9c55c5204445689c1d6b7b60e89de7f8fcf8a77f Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 16:05:58 +0800 Subject: drm/amdgpu: fix sdma24 ucode mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index f6014b0..b556bd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -105,6 +105,15 @@ static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev) } } +static void sdma_v2_4_free_microcode(struct amdgpu_device *adev) +{ + int i; + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; + } +} + /** * sdma_v2_4_init_microcode - load ucode images from disk * @@ -1018,6 +1027,7 @@ static int sdma_v2_4_sw_fini(void *handle) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); + sdma_v2_4_free_microcode(adev); return 0; } -- cgit v1.1 From d1ff53b7c2aa6e8c9dbd37ea7d858eeaba1ecb4a Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 16:07:40 +0800 Subject: drm/amdgpu: fix cik sdma ucode memleak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 0079916..9dc4e24 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -66,6 +66,16 @@ MODULE_FIRMWARE("radeon/mullins_sdma1.bin"); u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev); + +static void cik_sdma_free_microcode(struct amdgpu_device *adev) +{ + int i; + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; + } +} + /* * sDMA - System DMA * Starting with CIK, the GPU has new asynchronous @@ -1005,6 +1015,7 @@ static int cik_sdma_sw_fini(void *handle) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); + cik_sdma_free_microcode(adev); return 0; } -- cgit v1.1 From e6232effab9091472689b1c5604a7e59d320a8e0 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 15:16:04 +0800 Subject: drm/amdgpu: fix fiji smu fini mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c index 673a75c..0ac8642 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c @@ -1006,6 +1006,10 @@ static int fiji_smu_init(struct pp_smumgr *smumgr) static int fiji_smu_fini(struct pp_smumgr *smumgr) { + struct fiji_smumgr *priv = (struct fiji_smumgr *)(smumgr->backend); + + smu_free_memory(smumgr->device, (void *)priv->header_buffer.handle); + if (smumgr->backend) { kfree(smumgr->backend); smumgr->backend = NULL; -- cgit v1.1 From 86e4cdd675f489e0b3deaa3d6b75cddadd16e71c Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 30 May 2016 15:16:26 +0800 Subject: drm/amdgpu: fix tonga smu_fini mem leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c index 32820b6..70d3ecf 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c @@ -328,6 +328,11 @@ int tonga_write_smc_sram_dword(struct pp_smumgr *smumgr, static int tonga_smu_fini(struct pp_smumgr *smumgr) { + struct tonga_smumgr *priv = (struct tonga_smumgr *)(smumgr->backend); + + smu_free_memory(smumgr->device, (void *)priv->smu_buffer.handle); + smu_free_memory(smumgr->device, (void *)priv->header_buffer.handle); + if (smumgr->backend != NULL) { kfree(smumgr->backend); smumgr->backend = NULL; -- cgit v1.1 From a392746a8c38de494a1a2d00c5cfd34a05449e35 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Tue, 31 May 2016 13:44:30 +0800 Subject: drm/amdgpu: add release firmware for cgs Powerplay uses cgs to load the firmware so add a function to release it as well to avoid leaking it on driver unload. Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 12 ++++++++++++ drivers/gpu/drm/amd/include/cgs_common.h | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 199f76b..8943099 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -696,6 +696,17 @@ static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type) return result; } +static int amdgpu_cgs_rel_firmware(struct cgs_device *cgs_device, enum cgs_ucode_id type) +{ + CGS_FUNC_ADEV; + if ((CGS_UCODE_ID_SMU == type) || (CGS_UCODE_ID_SMU_SK == type)) { + release_firmware(adev->pm.fw); + return 0; + } + /* cannot release other firmware because they are not created by cgs */ + return -EINVAL; +} + static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, enum cgs_ucode_id type, struct cgs_firmware_info *info) @@ -1125,6 +1136,7 @@ static const struct cgs_ops amdgpu_cgs_ops = { amdgpu_cgs_pm_query_clock_limits, amdgpu_cgs_set_camera_voltages, amdgpu_cgs_get_firmware_info, + amdgpu_cgs_rel_firmware, amdgpu_cgs_set_powergating_state, amdgpu_cgs_set_clockgating_state, amdgpu_cgs_get_active_displays_info, diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h index a461e15..7464daf 100644 --- a/drivers/gpu/drm/amd/include/cgs_common.h +++ b/drivers/gpu/drm/amd/include/cgs_common.h @@ -581,6 +581,9 @@ typedef int (*cgs_get_firmware_info)(struct cgs_device *cgs_device, enum cgs_ucode_id type, struct cgs_firmware_info *info); +typedef int (*cgs_rel_firmware)(struct cgs_device *cgs_device, + enum cgs_ucode_id type); + typedef int(*cgs_set_powergating_state)(struct cgs_device *cgs_device, enum amd_ip_block_type block_type, enum amd_powergating_state state); @@ -645,6 +648,7 @@ struct cgs_ops { cgs_set_camera_voltages_t set_camera_voltages; /* Firmware Info */ cgs_get_firmware_info get_firmware_info; + cgs_rel_firmware rel_firmware; /* cg pg interface*/ cgs_set_powergating_state set_powergating_state; cgs_set_clockgating_state set_clockgating_state; @@ -738,6 +742,8 @@ struct cgs_device CGS_CALL(set_camera_voltages,dev,mask,voltages) #define cgs_get_firmware_info(dev, type, info) \ CGS_CALL(get_firmware_info, dev, type, info) +#define cgs_rel_firmware(dev, type) \ + CGS_CALL(rel_firmware, dev, type) #define cgs_set_powergating_state(dev, block_type, state) \ CGS_CALL(set_powergating_state, dev, block_type, state) #define cgs_set_clockgating_state(dev, block_type, state) \ -- cgit v1.1 From 5bbc16cc7be89dbe6dd824570456c3340b6d2ef7 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Tue, 31 May 2016 13:44:48 +0800 Subject: drm/amdgpu: fix smu ucode memleak (v2) Properly release the smu ucode in powerplay. v2: agd: add polaris as well Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c | 2 ++ drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c | 1 + drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c | 2 ++ 3 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c index 0ac8642..8e52a2e 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c @@ -1014,6 +1014,8 @@ static int fiji_smu_fini(struct pp_smumgr *smumgr) kfree(smumgr->backend); smumgr->backend = NULL; } + + cgs_rel_firmware(smumgr->device, CGS_UCODE_ID_SMU); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index de618ea..043b6ac 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -469,6 +469,7 @@ int polaris10_smu_fini(struct pp_smumgr *smumgr) kfree(smumgr->backend); smumgr->backend = NULL; } + cgs_rel_firmware(smumgr->device, CGS_UCODE_ID_SMU); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c index 70d3ecf..b22722e 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c @@ -337,6 +337,8 @@ static int tonga_smu_fini(struct pp_smumgr *smumgr) kfree(smumgr->backend); smumgr->backend = NULL; } + + cgs_rel_firmware(smumgr->device, CGS_UCODE_ID_SMU); return 0; } -- cgit v1.1 From d2e312183b62cde0c44af35664f3b104b247dd9c Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 2 Jun 2016 19:11:01 +0800 Subject: drm/amd/powerplay: fix bug visit array out of bounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rex Zhu Acked-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c | 2 +- drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c index 24a16e4..586f732 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c @@ -1830,7 +1830,7 @@ static uint16_t fiji_find_closest_vddci(struct pp_hwmgr *hwmgr, uint16_t vddci) PP_ASSERT_WITH_CODE(false, "VDDCI is larger than max VDDCI in VDDCI Voltage Table!", - return vddci_table->entries[i].value); + return vddci_table->entries[i-1].value); } static int fiji_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 910d56d..20f20e0 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -469,7 +469,7 @@ uint16_t phm_find_closest_vddci(struct pp_atomctrl_voltage_table *vddci_table, u PP_ASSERT_WITH_CODE(false, "VDDCI is larger than max VDDCI in VDDCI Voltage Table!", - return vddci_table->entries[i].value); + return vddci_table->entries[i-1].value); } int phm_find_boot_level(void *table, -- cgit v1.1 From 5f96ddb4607382528ef2eb23b49ce1856fdb316d Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Fri, 3 Jun 2016 19:12:42 +0800 Subject: drm/amd/powerplay: delete useless code as pptable changed in vbios. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vbios table changed so this code is useless now. Signed-off-by: Rex Zhu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c index 16fed48..d27e8c4 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c @@ -2847,27 +2847,6 @@ static int tonga_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) } } - /* Initialize Vddc DPM table based on allow Vddc values. And populate corresponding std values. */ - for (i = 0; i < allowed_vdd_sclk_table->count; i++) { - data->dpm_table.vddc_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].vddc; - /* tonga_hwmgr->dpm_table.VddcTable.dpm_levels[i].param1 = stdVoltageTable->entries[i].Leakage; */ - /* param1 is for corresponding std voltage */ - data->dpm_table.vddc_table.dpm_levels[i].enabled = 1; - } - data->dpm_table.vddc_table.count = allowed_vdd_sclk_table->count; - - if (NULL != allowed_vdd_mclk_table) { - /* Initialize Vddci DPM table based on allow Mclk values */ - for (i = 0; i < allowed_vdd_mclk_table->count; i++) { - data->dpm_table.vdd_ci_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].vddci; - data->dpm_table.vdd_ci_table.dpm_levels[i].enabled = 1; - data->dpm_table.mvdd_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].mvdd; - data->dpm_table.mvdd_table.dpm_levels[i].enabled = 1; - } - data->dpm_table.vdd_ci_table.count = allowed_vdd_mclk_table->count; - data->dpm_table.mvdd_table.count = allowed_vdd_mclk_table->count; - } - /* setup PCIE gen speed levels*/ tonga_setup_default_pcie_tables(hwmgr); -- cgit v1.1 From be94535f95313a013b844b563ef15ddd8fb43da8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 9 Jun 2016 09:51:39 +0200 Subject: mlxsw: spectrum: Make split flow match firmware requirements When a port is created following a split / unsplit we need to map it to the correct module and lane, enable it and then continue to initialize its various parameters such as MTU and VLAN filters. Under certain conditions, such as trying to split ports at the bottom row of the front panel by four, we get firmware errors. After evaluating this with the firmware team it was decided to alter the split / unsplit flow, so that first all the affected ports are mapped, then enabled and finally each is initialized separately. Fix the split / unsplit flow by first mapping and enabling all the affected ports. Newer firmware versions will support both flows. Fixes: 18f1e70c4137 ("mlxsw: spectrum: Introduce port splitting") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 147 ++++++++++++++++--------- 1 file changed, 95 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 4a72737..cc62d4c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -247,15 +247,23 @@ static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl); } -static int mlxsw_sp_port_swid_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 swid) +static int __mlxsw_sp_port_swid_set(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 swid) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char pspa_pl[MLXSW_REG_PSPA_LEN]; - mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sp_port->local_port); + mlxsw_reg_pspa_pack(pspa_pl, swid, local_port); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pspa), pspa_pl); } +static int mlxsw_sp_port_swid_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 swid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + return __mlxsw_sp_port_swid_set(mlxsw_sp, mlxsw_sp_port->local_port, + swid); +} + static int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable) { @@ -1681,8 +1689,8 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } -static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, - bool split, u8 module, u8 width) +static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, + bool split, u8 module, u8 width) { struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *dev; @@ -1839,28 +1847,6 @@ err_port_active_vlans_alloc: return err; } -static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, - bool split, u8 module, u8 width, u8 lane) -{ - int err; - - err = mlxsw_sp_port_module_map(mlxsw_sp, local_port, module, width, - lane); - if (err) - return err; - - err = __mlxsw_sp_port_create(mlxsw_sp, local_port, split, module, - width); - if (err) - goto err_port_create; - - return 0; - -err_port_create: - mlxsw_sp_port_module_unmap(mlxsw_sp, local_port); - return err; -} - static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port) { struct net_device *dev = mlxsw_sp_port->dev; @@ -1927,7 +1913,7 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) if (!width) continue; mlxsw_sp->port_to_module[i] = module; - err = __mlxsw_sp_port_create(mlxsw_sp, i, false, module, width); + err = mlxsw_sp_port_create(mlxsw_sp, i, false, module, width); if (err) goto err_port_create; } @@ -1948,12 +1934,85 @@ static u8 mlxsw_sp_cluster_base_port_get(u8 local_port) return local_port - offset; } +static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port, + u8 module, unsigned int count) +{ + u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count; + int err, i; + + for (i = 0; i < count; i++) { + err = mlxsw_sp_port_module_map(mlxsw_sp, base_port + i, module, + width, i * width); + if (err) + goto err_port_module_map; + } + + for (i = 0; i < count; i++) { + err = __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i, 0); + if (err) + goto err_port_swid_set; + } + + for (i = 0; i < count; i++) { + err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true, + module, width); + if (err) + goto err_port_create; + } + + return 0; + +err_port_create: + for (i--; i >= 0; i--) + mlxsw_sp_port_remove(mlxsw_sp, base_port + i); + i = count; +err_port_swid_set: + for (i--; i >= 0; i--) + __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i, + MLXSW_PORT_SWID_DISABLED_PORT); + i = count; +err_port_module_map: + for (i--; i >= 0; i--) + mlxsw_sp_port_module_unmap(mlxsw_sp, base_port + i); + return err; +} + +static void mlxsw_sp_port_unsplit_create(struct mlxsw_sp *mlxsw_sp, + u8 base_port, unsigned int count) +{ + u8 local_port, module, width = MLXSW_PORT_MODULE_MAX_WIDTH; + int i; + + /* Split by four means we need to re-create two ports, otherwise + * only one. + */ + count = count / 2; + + for (i = 0; i < count; i++) { + local_port = base_port + i * 2; + module = mlxsw_sp->port_to_module[local_port]; + + mlxsw_sp_port_module_map(mlxsw_sp, local_port, module, width, + 0); + } + + for (i = 0; i < count; i++) + __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i * 2, 0); + + for (i = 0; i < count; i++) { + local_port = base_port + i * 2; + module = mlxsw_sp->port_to_module[local_port]; + + mlxsw_sp_port_create(mlxsw_sp, local_port, false, module, + width); + } +} + static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, unsigned int count) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); struct mlxsw_sp_port *mlxsw_sp_port; - u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count; u8 module, cur_width, base_port; int i; int err; @@ -2001,25 +2060,16 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, for (i = 0; i < count; i++) mlxsw_sp_port_remove(mlxsw_sp, base_port + i); - for (i = 0; i < count; i++) { - err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true, - module, width, i * width); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to create split port\n"); - goto err_port_create; - } + err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, module, count); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to create split ports\n"); + goto err_port_split_create; } return 0; -err_port_create: - for (i--; i >= 0; i--) - mlxsw_sp_port_remove(mlxsw_sp, base_port + i); - for (i = 0; i < count / 2; i++) { - module = mlxsw_sp->port_to_module[base_port + i * 2]; - mlxsw_sp_port_create(mlxsw_sp, base_port + i * 2, false, - module, MLXSW_PORT_MODULE_MAX_WIDTH, 0); - } +err_port_split_create: + mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count); return err; } @@ -2061,14 +2111,7 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port) for (i = 0; i < count; i++) mlxsw_sp_port_remove(mlxsw_sp, base_port + i); - for (i = 0; i < count / 2; i++) { - module = mlxsw_sp->port_to_module[base_port + i * 2]; - err = mlxsw_sp_port_create(mlxsw_sp, base_port + i * 2, false, - module, MLXSW_PORT_MODULE_MAX_WIDTH, - 0); - if (err) - dev_err(mlxsw_sp->bus_info->dev, "Failed to reinstantiate port\n"); - } + mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count); return 0; } -- cgit v1.1 From d664b41e2adf5851e4d0d39f450b2f3f808b65d6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 9 Jun 2016 09:51:40 +0200 Subject: mlxsw: spectrum: Don't sleep during ndo_get_phys_port_name() When rtnl_fill_ifinfo() is called for a certain netdevice it queries its various parameters such as switch id and physical port name. The function might get called in an atomic context, which means the underlying driver must not sleep during the query operation. Don't query the device and sleep during ndo_get_phys_port_name(), but instead store the needed parameters in port creation time. Fixes: 2bf9a58675c5 ("mlxsw: spectrum: Add support for physical port names") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 64 +++++++++----------------- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 5 ++ 2 files changed, 26 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index cc62d4c..6f9e3dd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -313,9 +313,9 @@ mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sspr), sspr_pl); } -static int __mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, - u8 local_port, u8 *p_module, - u8 *p_width, u8 *p_lane) +static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, + u8 local_port, u8 *p_module, + u8 *p_width, u8 *p_lane) { char pmlp_pl[MLXSW_REG_PMLP_LEN]; int err; @@ -330,16 +330,6 @@ static int __mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, - u8 local_port, u8 *p_module, - u8 *p_width) -{ - u8 lane; - - return __mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, p_module, - p_width, &lane); -} - static int mlxsw_sp_port_module_map(struct mlxsw_sp *mlxsw_sp, u8 local_port, u8 module, u8 width, u8 lane) { @@ -957,17 +947,11 @@ static int mlxsw_sp_port_get_phys_port_name(struct net_device *dev, char *name, size_t len) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - u8 module, width, lane; + u8 module = mlxsw_sp_port->mapping.module; + u8 width = mlxsw_sp_port->mapping.width; + u8 lane = mlxsw_sp_port->mapping.lane; int err; - err = __mlxsw_sp_port_module_info_get(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_port->local_port, - &module, &width, &lane); - if (err) { - netdev_err(dev, "Failed to retrieve module information\n"); - return err; - } - if (!mlxsw_sp_port->split) err = snprintf(name, len, "p%d", module + 1); else @@ -1690,7 +1674,7 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) } static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, - bool split, u8 module, u8 width) + bool split, u8 module, u8 width, u8 lane) { struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *dev; @@ -1705,6 +1689,9 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, mlxsw_sp_port->mlxsw_sp = mlxsw_sp; mlxsw_sp_port->local_port = local_port; mlxsw_sp_port->split = split; + mlxsw_sp_port->mapping.module = module; + mlxsw_sp_port->mapping.width = width; + mlxsw_sp_port->mapping.lane = lane; bytes = DIV_ROUND_UP(VLAN_N_VID, BITS_PER_BYTE); mlxsw_sp_port->active_vlans = kzalloc(bytes, GFP_KERNEL); if (!mlxsw_sp_port->active_vlans) { @@ -1895,8 +1882,8 @@ static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) { + u8 module, width, lane; size_t alloc_size; - u8 module, width; int i; int err; @@ -1907,13 +1894,14 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) { err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &module, - &width); + &width, &lane); if (err) goto err_port_module_info_get; if (!width) continue; mlxsw_sp->port_to_module[i] = module; - err = mlxsw_sp_port_create(mlxsw_sp, i, false, module, width); + err = mlxsw_sp_port_create(mlxsw_sp, i, false, module, width, + lane); if (err) goto err_port_create; } @@ -1955,7 +1943,7 @@ static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port, for (i = 0; i < count; i++) { err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true, - module, width); + module, width, i * width); if (err) goto err_port_create; } @@ -2004,7 +1992,7 @@ static void mlxsw_sp_port_unsplit_create(struct mlxsw_sp *mlxsw_sp, module = mlxsw_sp->port_to_module[local_port]; mlxsw_sp_port_create(mlxsw_sp, local_port, false, module, - width); + width, 0); } } @@ -2024,18 +2012,14 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, return -EINVAL; } + module = mlxsw_sp_port->mapping.module; + cur_width = mlxsw_sp_port->mapping.width; + if (count != 2 && count != 4) { netdev_err(mlxsw_sp_port->dev, "Port can only be split into 2 or 4 ports\n"); return -EINVAL; } - err = mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, &module, - &cur_width); - if (err) { - netdev_err(mlxsw_sp_port->dev, "Failed to get port's width\n"); - return err; - } - if (cur_width != MLXSW_PORT_MODULE_MAX_WIDTH) { netdev_err(mlxsw_sp_port->dev, "Port cannot be split further\n"); return -EINVAL; @@ -2077,10 +2061,9 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); struct mlxsw_sp_port *mlxsw_sp_port; - u8 module, cur_width, base_port; + u8 cur_width, base_port; unsigned int count; int i; - int err; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) { @@ -2094,12 +2077,7 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port) return -EINVAL; } - err = mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, &module, - &cur_width); - if (err) { - netdev_err(mlxsw_sp_port->dev, "Failed to get port's width\n"); - return err; - } + cur_width = mlxsw_sp_port->mapping.width; count = cur_width == 1 ? 4 : 2; base_port = mlxsw_sp_cluster_base_port_get(local_port); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index e2c022d..13b30ea 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -229,6 +229,11 @@ struct mlxsw_sp_port { struct ieee_maxrate *maxrate; struct ieee_pfc *pfc; } dcb; + struct { + u8 module; + u8 width; + u8 lane; + } mapping; /* 802.1Q bridge VLANs */ unsigned long *active_vlans; unsigned long *untagged_vlans; -- cgit v1.1 From 67961f9db8c477026ea20ce05761bde6f8bf85b0 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 8 Jun 2016 15:33:42 -0700 Subject: mm/hugetlb: fix huge page reserve accounting for private mappings When creating a private mapping of a hugetlbfs file, it is possible to unmap pages via ftruncate or fallocate hole punch. If subsequent faults repopulate these mappings, the reserve counts will go negative. This is because the code currently assumes all faults to private mappings will consume reserves. The problem can be recreated as follows: - mmap(MAP_PRIVATE) a file in hugetlbfs filesystem - write fault in pages in the mapping - fallocate(FALLOC_FL_PUNCH_HOLE) some pages in the mapping - write fault in pages in the hole This will result in negative huge page reserve counts and negative subpool usage counts for the hugetlbfs. Note that this can also be recreated with ftruncate, but fallocate is more straight forward. This patch modifies the routines vma_needs_reserves and vma_has_reserves to examine the reserve map associated with private mappings similar to that for shared mappings. However, the reserve map semantics for private and shared mappings are very different. This results in subtly different code that is explained in the comments. Link: http://lkml.kernel.org/r/1464720957-15698-1-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Acked-by: Hillf Danton Cc: Dave Hansen Cc: Kirill Shutemov Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Aneesh Kumar Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d26162e..388c2bb 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -832,8 +832,27 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg) * Only the process that called mmap() has reserves for * private mappings. */ - if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) - return true; + if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { + /* + * Like the shared case above, a hole punch or truncate + * could have been performed on the private mapping. + * Examine the value of chg to determine if reserves + * actually exist or were previously consumed. + * Very Subtle - The value of chg comes from a previous + * call to vma_needs_reserves(). The reserve map for + * private mappings has different (opposite) semantics + * than that of shared mappings. vma_needs_reserves() + * has already taken this difference in semantics into + * account. Therefore, the meaning of chg is the same + * as in the shared case above. Code could easily be + * combined, but keeping it separate draws attention to + * subtle differences. + */ + if (chg) + return false; + else + return true; + } return false; } @@ -1816,6 +1835,25 @@ static long __vma_reservation_common(struct hstate *h, if (vma->vm_flags & VM_MAYSHARE) return ret; + else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) && ret >= 0) { + /* + * In most cases, reserves always exist for private mappings. + * However, a file associated with mapping could have been + * hole punched or truncated after reserves were consumed. + * As subsequent fault on such a range will not use reserves. + * Subtle - The reserve map for private mappings has the + * opposite meaning than that of shared mappings. If NO + * entry is in the reserve map, it means a reservation exists. + * If an entry exists in the reserve map, it means the + * reservation has already been consumed. As a result, the + * return value of this routine is the opposite of the + * value returned from reserve map manipulation routines above. + */ + if (ret) + return 0; + else + return 1; + } else return ret < 0 ? ret : 0; } -- cgit v1.1 From 91a4c272145652d798035c17e1c02c91001d3f51 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 8 Jun 2016 15:33:45 -0700 Subject: kasan: change memory hot-add error messages to info messages Change the following memory hot-add error messages to info messages. There is no need for these to be errors. kasan: WARNING: KASAN doesn't support memory hot-add kasan: Memory hot-add will be disabled Link: http://lkml.kernel.org/r/1464794430-5486-1-git-send-email-shuahkh@osg.samsung.com Signed-off-by: Shuah Khan Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 18b6a2b..28439ac 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -763,8 +763,8 @@ static int kasan_mem_notifier(struct notifier_block *nb, static int __init kasan_memhotplug_init(void) { - pr_err("WARNING: KASAN doesn't support memory hot-add\n"); - pr_err("Memory hot-add will be disabled\n"); + pr_info("WARNING: KASAN doesn't support memory hot-add\n"); + pr_info("Memory hot-add will be disabled\n"); hotplug_memory_notifier(kasan_mem_notifier, 0); -- cgit v1.1 From d0db7afa1b767d95e3e14632718da5a9794129bc Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 8 Jun 2016 15:33:47 -0700 Subject: revert "mm: memcontrol: fix possible css ref leak on oom" Revert commit 1383399d7be0 ("mm: memcontrol: fix possible css ref leak on oom"). Johannes points out "There is a task_in_memcg_oom() check before calling mem_cgroup_oom()". Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 58c69c9..75e7440 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1608,7 +1608,7 @@ static void memcg_oom_recover(struct mem_cgroup *memcg) static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order) { - if (!current->memcg_may_oom || current->memcg_in_oom) + if (!current->memcg_may_oom) return; /* * We are in the middle of the charge context here, so we -- cgit v1.1 From 770a5370226cb207461bbad902543381c1fad521 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 8 Jun 2016 15:33:50 -0700 Subject: mm: thp: broken page count after commit aa88b68c3b1d Christian Borntraeger reported a kernel panic after corrupt page counts, and it turned out to be a regression introduced with commit aa88b68c3b1d ("thp: keep huge zero page pinned until tlb flush"), at least on s390. put_huge_zero_page() was moved over from zap_huge_pmd() to release_pages(), and it was replaced by tlb_remove_page(). However, release_pages() might not always be triggered by (the arch-specific) tlb_remove_page(). On s390 we call free_page_and_swap_cache() from tlb_remove_page(), and not tlb_flush_mmu() -> free_pages_and_swap_cache() like the generic version, because we don't use the MMU-gather logic. Although both functions have very similar names, they are doing very unsimilar things, in particular free_page_xxx is just doing a put_page(), while free_pages_xxx calls release_pages(). This of course results in very harmful put_page()s on the huge zero page, on architectures where tlb_remove_page() is implemented in this way. It seems to affect only s390 and sh, but sh doesn't have THP support, so the problem (currently) probably only exists on s390. The following quick hack fixed the issue: Link: http://lkml.kernel.org/r/20160602172141.75c006a9@thinkpad Signed-off-by: Gerald Schaefer Reported-by: Christian Borntraeger Tested-by: Christian Borntraeger Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: "Aneesh Kumar K.V" Cc: Mel Gorman Cc: Hugh Dickins Cc: Johannes Weiner Cc: Dave Hansen Cc: Vlastimil Babka Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: [4.6.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap_state.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/swap_state.c b/mm/swap_state.c index 0d457e7..c99463a 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -252,7 +252,10 @@ static inline void free_swap_cache(struct page *page) void free_page_and_swap_cache(struct page *page) { free_swap_cache(page); - put_page(page); + if (is_huge_zero_page(page)) + put_huge_zero_page(); + else + put_page(page); } /* -- cgit v1.1 From ba62bafe942b159a6109cbec780d36496e06b6c5 Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Wed, 8 Jun 2016 15:33:53 -0700 Subject: kernel/relay.c: fix potential memory leak When relay_open_buf() fails in relay_open(), code will goto free_bufs, but chan is nowhere freed. Link: http://lkml.kernel.org/r/1464777927-19675-1-git-send-email-yizhouzhou@ict.ac.cn Signed-off-by: Zhouyi Zhou Cc: Jens Axboe Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/relay.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/relay.c b/kernel/relay.c index 074994b..04d7cf3 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -614,6 +614,7 @@ free_bufs: kref_put(&chan->kref, relay_destroy_channel); mutex_unlock(&relay_channels_mutex); + kfree(chan); return NULL; } EXPORT_SYMBOL_GPL(relay_open); -- cgit v1.1 From f3a932baa7f65072434f1c04c02c8a4d2746fcfc Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Wed, 8 Jun 2016 15:33:56 -0700 Subject: mm: introduce dedicated WQ_MEM_RECLAIM workqueue to do lru_add_drain_all This patch is based on https://patchwork.ozlabs.org/patch/574623/. Tejun submitted commit 23d11a58a9a6 ("workqueue: skip flush dependency checks for legacy workqueues") for the legacy create*_workqueue() interface. But some workq created by alloc_workqueue still reports warning on memory reclaim, e.g nvme_workq with flag WQ_MEM_RECLAIM set: workqueue: WQ_MEM_RECLAIM nvme:nvme_reset_work is flushing !WQ_MEM_RECLAIM events:lru_add_drain_per_cpu ------------[ cut here ]------------ WARNING: CPU: 0 PID: 6 at SoC/linux/kernel/workqueue.c:2448 check_flush_dependency+0xb4/0x10c ... check_flush_dependency+0xb4/0x10c flush_work+0x54/0x140 lru_add_drain_all+0x138/0x188 migrate_prep+0xc/0x18 alloc_contig_range+0xf4/0x350 cma_alloc+0xec/0x1e4 dma_alloc_from_contiguous+0x38/0x40 __dma_alloc+0x74/0x25c nvme_alloc_queue+0xcc/0x36c nvme_reset_work+0x5c4/0xda8 process_one_work+0x128/0x2ec worker_thread+0x58/0x434 kthread+0xd4/0xe8 ret_from_fork+0x10/0x50 That's because lru_add_drain_all() will schedule the drain work on system_wq, whose flag is set to 0, !WQ_MEM_RECLAIM. Introduce a dedicated WQ_MEM_RECLAIM workqueue to do lru_add_drain_all(), aiding in getting memory freed. Link: http://lkml.kernel.org/r/1464917521-9775-1-git-send-email-shhuiw@foxmail.com Signed-off-by: Wang Sheng-Hui Acked-by: Tejun Heo Cc: Keith Busch Cc: Peter Zijlstra Cc: Thierry Reding Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/mm/swap.c b/mm/swap.c index 9591614..59f5faf 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -667,6 +667,24 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy) static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); +/* + * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM + * workqueue, aiding in getting memory freed. + */ +static struct workqueue_struct *lru_add_drain_wq; + +static int __init lru_init(void) +{ + lru_add_drain_wq = alloc_workqueue("lru-add-drain", WQ_MEM_RECLAIM, 0); + + if (WARN(!lru_add_drain_wq, + "Failed to create workqueue lru_add_drain_wq")) + return -ENOMEM; + + return 0; +} +early_initcall(lru_init); + void lru_add_drain_all(void) { static DEFINE_MUTEX(lock); @@ -686,7 +704,7 @@ void lru_add_drain_all(void) pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || need_activate_page_drain(cpu)) { INIT_WORK(work, lru_add_drain_per_cpu); - schedule_work_on(cpu, work); + queue_work_on(cpu, lru_add_drain_wq, work); cpumask_set_cpu(cpu, &has_work); } } -- cgit v1.1 From 18aba41cbfbcd138e9f6d8d446427d8b7691c194 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Wed, 8 Jun 2016 15:33:59 -0700 Subject: mm/fadvise.c: do not discard partial pages with POSIX_FADV_DONTNEED I noticed that the logic in the fadvise64_64 syscall is incorrect for partial pages. While first page of the region is correctly skipped if it is partial, the last page of the region is mistakenly discarded. This leads to problems for applications that read data in non-page-aligned chunks discarding already processed data between the reads. A somewhat misguided application that does something like write(XX bytes (non-page-alligned)); drop the data it just wrote; repeat gets a significant penalty in performance as a result. Link: http://lkml.kernel.org/r/1464917140-1506698-1-git-send-email-green@linuxhacker.ru Signed-off-by: Oleg Drokin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/fadvise.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mm/fadvise.c b/mm/fadvise.c index b8024fa..6c707bf 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -126,6 +126,17 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) */ start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT; end_index = (endbyte >> PAGE_SHIFT); + if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK) { + /* First page is tricky as 0 - 1 = -1, but pgoff_t + * is unsigned, so the end_index >= start_index + * check below would be true and we'll discard the whole + * file cache which is not what was asked. + */ + if (end_index == 0) + break; + + end_index--; + } if (end_index >= start_index) { unsigned long count = invalidate_mapping_pages(mapping, -- cgit v1.1 From 29ccf7590ec49647b3442b6b2c64c4406a931c80 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 10 Jun 2016 11:40:49 +1000 Subject: drm/amdgpu: fix warning with powerplay disabled. This just fixes a warning when you disable powerplay. Signed-off-by: Dave Airlie --- drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 10b1be5..8225655 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -218,9 +218,9 @@ static int amdgpu_pp_hw_fini(void *handle) static void amdgpu_pp_late_fini(void *handle) { +#ifdef CONFIG_DRM_AMD_POWERPLAY struct amdgpu_device *adev = (struct amdgpu_device *)handle; -#ifdef CONFIG_DRM_AMD_POWERPLAY if (adev->pp_enabled) { amdgpu_pm_sysfs_fini(adev); amd_powerplay_fini(adev->powerplay.pp_handle); -- cgit v1.1 From 418f8399a8bedf376ec13eb01088f04a76ebdd6f Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Fri, 10 Jun 2016 00:07:28 +0300 Subject: net/mlx5: Fix the size of modify QP mailbox Add 16 reserved bytes at the end of mlx5_modify_qp_mbox_in to match the hardware spec definition. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Majd Dibbiny Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/qp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 6422102..1532dcf 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -560,6 +560,7 @@ struct mlx5_modify_qp_mbox_in { __be32 optparam; u8 rsvd0[4]; struct mlx5_qp_context ctx; + u8 rsvd2[16]; }; struct mlx5_modify_qp_mbox_out { -- cgit v1.1 From 9cd3411c42c5d5ba55d6e745edfe7df53c1ffa41 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Fri, 10 Jun 2016 00:07:29 +0300 Subject: net/mlx5: Fix masking of reserved bits in XRCD number Mask the reserved bits when reading the number of newly created XRCD. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Majd Dibbiny Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index b720a27..b82d658 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -418,7 +418,7 @@ int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn) if (out.hdr.status) err = mlx5_cmd_status_to_err(&out.hdr); else - *xrcdn = be32_to_cpu(out.xrcdn); + *xrcdn = be32_to_cpu(out.xrcdn) & 0xffffff; return err; } -- cgit v1.1 From 86d56a1a6b7352542661d8a9463758c7f285fce3 Mon Sep 17 00:00:00 2001 From: Shahar Klein Date: Fri, 10 Jun 2016 00:07:30 +0300 Subject: net/mlx5: Fix MLX5_CMD_OP_MAX to be defined correctly Having MLX5_CMD_OP_MAX on another file causes us to repeatedly miss accounting new commands added to the driver and hence there're no entries for them in debugfs. To solve that, we integrate it into the commands enum as the last entry. Fixes: 34a40e689393 ('net/mlx5_core: Introduce modify flow table command') Signed-off-by: Shahar Klein Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 2 -- include/linux/mlx5/mlx5_ifc.h | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 035abdf..51f0caf 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1240,8 +1240,6 @@ struct mlx5_destroy_psv_out { u8 rsvd[8]; }; -#define MLX5_CMD_OP_MAX 0x920 - enum { VPORT_STATE_DOWN = 0x0, VPORT_STATE_UP = 0x1, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9a05cd7..986a615 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -205,7 +205,8 @@ enum { MLX5_CMD_OP_ALLOC_FLOW_COUNTER = 0x939, MLX5_CMD_OP_DEALLOC_FLOW_COUNTER = 0x93a, MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b, - MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c + MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c, + MLX5_CMD_OP_MAX }; struct mlx5_ifc_flow_table_fields_supported_bits { -- cgit v1.1 From 2fee37a47cebc26d58eec5dafc8ba787a6ee5350 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 10 Jun 2016 00:07:31 +0300 Subject: net/mlx5: Fix root flow table update When we destroy the last flow table we need to update the root_ft to NULL. It fixes an issue for when the last flow table is destroyed and recreated again, root_ft pointer will not be updated, as a result traffic will be dropped. Fixes: 2cc43b494a6c ('net/mlx5_core: Managing root flow table') Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 8b5f0b2..fa6fec1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1292,8 +1292,8 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft) ft->id); return err; } - root->root_ft = new_root_ft; } + root->root_ft = new_root_ft; return 0; } -- cgit v1.1 From 876d634d19e41603aab91455f2c52a78a28372d5 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 10 Jun 2016 00:07:32 +0300 Subject: net/mlx5: Fix flow steering NIC capabilities check Flow steering infrastructure is currently used only on link layer ethernet, therefore the driver should initialize the flow steering when the device link layer is ethernet. In addition, add missing capability check before initializing the namespace of NIC RX flow tables. Fixes: 2530236303d9 ('net/mlx5_core: Flow steering tree initialization') Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 10 +++++++++- include/linux/mlx5/device.h | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index fa6fec1..c1efa55 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1767,6 +1767,9 @@ static void cleanup_root_ns(struct mlx5_core_dev *dev) void mlx5_cleanup_fs(struct mlx5_core_dev *dev) { + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + cleanup_root_ns(dev); cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns); @@ -1828,15 +1831,20 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) { int err = 0; + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + err = mlx5_init_fc_stats(dev); if (err) return err; - if (MLX5_CAP_GEN(dev, nic_flow_table)) { + if (MLX5_CAP_GEN(dev, nic_flow_table) && + MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) { err = init_root_ns(dev); if (err) goto err; } + if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { err = init_fdb_root_ns(dev); if (err) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 51f0caf..73a4847 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1367,6 +1367,12 @@ enum mlx5_cap_type { #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap) +#define MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.cap) + +#define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \ + MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap) + #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) -- cgit v1.1 From bd02ef8eec0b98abe6d5efe280c87903b2eb9874 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 10 Jun 2016 00:07:33 +0300 Subject: net/mlx5: Fix E-Switch flow steering capabilities check Add missing capabilities check for E-Switch FDB and ACLs flow tables before creating their namespace in flow steering. Fixes: efdc810ba39d ('net/mlx5: Flow steering, Add vport ACL support') Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 28 ++++++++++++----------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index c1efa55..e912a3d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1846,19 +1846,21 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) } if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { - err = init_fdb_root_ns(dev); - if (err) - goto err; - } - if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { - err = init_egress_acl_root_ns(dev); - if (err) - goto err; - } - if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { - err = init_ingress_acl_root_ns(dev); - if (err) - goto err; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) { + err = init_fdb_root_ns(dev); + if (err) + goto err; + } + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { + err = init_egress_acl_root_ns(dev); + if (err) + goto err; + } + if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { + err = init_ingress_acl_root_ns(dev); + if (err) + goto err; + } } return 0; -- cgit v1.1 From 3fe3d819d5015d56d0d7289ae16db5e612640c5b Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 10 Jun 2016 00:07:34 +0300 Subject: net/mlx5: E-Switch, Use the correct free() function We must use kvfree() for something that could have been allocated with vzalloc(), do that. Fixes: 5742df0f7dbe ('net/mlx5: E-Switch, Introduce VST vport ingress/egress ACLs') Fixes: 86d722ad2c3b ('net/mlx5: Use flow steering infrastructure for mlx5_en') Signed-off-by: Or Gerlitz Reported-by: Ilya Lesokhin Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index b84a691..5374796 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -529,7 +529,7 @@ out: } } - kfree(flow_group_in); + kvfree(flow_group_in); return err; } @@ -1097,7 +1097,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, vport->egress.drop_grp = drop_grp; vport->egress.allowed_vlans_grp = vlan_grp; out: - kfree(flow_group_in); + kvfree(flow_group_in); if (err && !IS_ERR_OR_NULL(vlan_grp)) mlx5_destroy_flow_group(vlan_grp); if (err && !IS_ERR_OR_NULL(acl)) @@ -1259,7 +1259,7 @@ out: mlx5_destroy_flow_table(vport->ingress.acl); } - kfree(flow_group_in); + kvfree(flow_group_in); } static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, -- cgit v1.1 From 3f42ac6648723e906c1c10edc0c523aff29963cc Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 10 Jun 2016 00:07:35 +0300 Subject: net/mlx5: E-Switch, Use the correct error check on returned pointers The mlx5 flow-steering API (mlx5_create_flow_table/group/rule) never returns null pointer on error. Even if it was doing that, checking for IS_ERR_OR_NULL(p) and then returning PTR_ERR(p) would have cause bugs, since PTR_ERR(NULL) --> success, crash. To make things more robust and protect against related future bugs, convert all IS_ERR_OR_NULL checks on returned values to IS_ERR. Fixes: 5742df0f7dbe ('net/mlx5: E-Switch, Introduce VST vport ingress/egress ACLs') Fixes: 86d722ad2c3b ('net/mlx5: Use flow steering infrastructure for mlx5_en') Signed-off-by: Or Gerlitz Reported-by: Ilya Lesokhin Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 34 +++++++++++------------ 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 5374796..a350af2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -383,7 +383,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, match_v, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, &dest); - if (IS_ERR_OR_NULL(flow_rule)) { + if (IS_ERR(flow_rule)) { pr_warn( "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); @@ -457,7 +457,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0); - if (IS_ERR_OR_NULL(fdb)) { + if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create FDB Table err %d\n", err); goto out; @@ -474,7 +474,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3); eth_broadcast_addr(dmac); g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create flow group err(%d)\n", err); goto out; @@ -489,7 +489,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) eth_zero_addr(dmac); dmac[0] = 0x01; g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); goto out; @@ -506,7 +506,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); goto out; @@ -1060,7 +1060,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, return; acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); - if (IS_ERR_OR_NULL(acl)) { + if (IS_ERR(acl)) { err = PTR_ERR(acl); esw_warn(dev, "Failed to create E-Switch vport[%d] egress flow Table, err(%d)\n", vport->vport, err); @@ -1075,7 +1075,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); vlan_grp = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(vlan_grp)) { + if (IS_ERR(vlan_grp)) { err = PTR_ERR(vlan_grp); esw_warn(dev, "Failed to create E-Switch vport[%d] egress allowed vlans flow group, err(%d)\n", vport->vport, err); @@ -1086,7 +1086,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); drop_grp = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(drop_grp)) { + if (IS_ERR(drop_grp)) { err = PTR_ERR(drop_grp); esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n", vport->vport, err); @@ -1174,7 +1174,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, return; acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); - if (IS_ERR_OR_NULL(acl)) { + if (IS_ERR(acl)) { err = PTR_ERR(acl); esw_warn(dev, "Failed to create E-Switch vport[%d] ingress flow Table, err(%d)\n", vport->vport, err); @@ -1192,7 +1192,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); g = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged spoofchk flow group, err(%d)\n", vport->vport, err); @@ -1207,7 +1207,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); g = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged flow group, err(%d)\n", vport->vport, err); @@ -1223,7 +1223,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2); g = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create E-Switch vport[%d] ingress spoofchk flow group, err(%d)\n", vport->vport, err); @@ -1236,7 +1236,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3); g = mlx5_create_flow_group(acl, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create E-Switch vport[%d] ingress drop flow group, err(%d)\n", vport->vport, err); @@ -1363,7 +1363,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, match_v, MLX5_FLOW_CONTEXT_ACTION_ALLOW, 0, NULL); - if (IS_ERR_OR_NULL(vport->ingress.allow_rule)) { + if (IS_ERR(vport->ingress.allow_rule)) { err = PTR_ERR(vport->ingress.allow_rule); pr_warn("vport[%d] configure ingress allow rule, err(%d)\n", vport->vport, err); @@ -1380,7 +1380,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, match_v, MLX5_FLOW_CONTEXT_ACTION_DROP, 0, NULL); - if (IS_ERR_OR_NULL(vport->ingress.drop_rule)) { + if (IS_ERR(vport->ingress.drop_rule)) { err = PTR_ERR(vport->ingress.drop_rule); pr_warn("vport[%d] configure ingress drop rule, err(%d)\n", vport->vport, err); @@ -1439,7 +1439,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, match_v, MLX5_FLOW_CONTEXT_ACTION_ALLOW, 0, NULL); - if (IS_ERR_OR_NULL(vport->egress.allowed_vlan)) { + if (IS_ERR(vport->egress.allowed_vlan)) { err = PTR_ERR(vport->egress.allowed_vlan); pr_warn("vport[%d] configure egress allowed vlan rule failed, err(%d)\n", vport->vport, err); @@ -1457,7 +1457,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, match_v, MLX5_FLOW_CONTEXT_ACTION_DROP, 0, NULL); - if (IS_ERR_OR_NULL(vport->egress.drop_rule)) { + if (IS_ERR(vport->egress.drop_rule)) { err = PTR_ERR(vport->egress.drop_rule); pr_warn("vport[%d] configure egress drop rule failed, err(%d)\n", vport->vport, err); -- cgit v1.1 From 25fff58cb24c8880090d8b8ef7746001a135e876 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 10 Jun 2016 00:07:36 +0300 Subject: net/mlx5: E-Switch, Fix vport enable flow Reorder vport enable flow to mark the vport as enabled before calling the vport change handler which was modified to handle the case for when vport is not enabled. This fixes the case for when the PF netdev is open before sriov is enabled, once sriov is enabled at esw_enable_vport, esw_vport_change_handle_locked didn't read the PF context since it thought the PF vport was not enabled. When we enable the vport, arming for events is not required anymore, since it's done on the vport change handle Fixes: 586cfa7f1d58 ('net/mlx5: E-Switch, Use vport event handler for vport cleanup') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index a350af2..cfec20c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1491,14 +1491,11 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, /* Sync with current vport context */ vport->enabled_events = enable_events; - esw_vport_change_handle_locked(vport); - vport->enabled = true; /* only PF is trusted by default */ vport->trusted = (vport_num) ? false : true; - - arm_vport_context_events_cmd(esw->dev, vport_num, enable_events); + esw_vport_change_handle_locked(vport); esw->enabled_vports++; esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); -- cgit v1.1 From 23898c763f4af6f5c80b0230b1ea788a0ce3cf73 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Fri, 10 Jun 2016 00:07:37 +0300 Subject: net/mlx5: E-Switch, Modify node guid on vf set MAC In RoCE, the RDMA-CM needs the node guid to establish connection between nodes. Today, the node guid exposed to mlx5 Ethernet VFs is zero, therefore RDMA-CM on the VF is broken. Whenever the administrator sets a MAC for a VF, derive the node guid from it and set it as well in the following way: MAC: e4:1d:2d:b3:f4:01 -> node_guid: e4:1d:2d:ff:fe:b3:f4:01 Fixes: 77256579c6b43 ('net/mlx5: E-Switch, Introduce Vport...') Signed-off-by: Noa Osherovich Signed-off-by: Majd Dibbiny Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 23 ++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 38 +++++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 9 ++++-- include/linux/mlx5/vport.h | 2 ++ 4 files changed, 68 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index cfec20c..9b1855b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1725,11 +1725,24 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) +static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) +{ + ((u8 *)node_guid)[7] = mac[0]; + ((u8 *)node_guid)[6] = mac[1]; + ((u8 *)node_guid)[5] = mac[2]; + ((u8 *)node_guid)[4] = 0xff; + ((u8 *)node_guid)[3] = 0xfe; + ((u8 *)node_guid)[2] = mac[3]; + ((u8 *)node_guid)[1] = mac[4]; + ((u8 *)node_guid)[0] = mac[5]; +} + int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int vport, u8 mac[ETH_ALEN]) { - int err = 0; struct mlx5_vport *evport; + u64 node_guid; + int err = 0; if (!ESW_ALLOWED(esw)) return -EPERM; @@ -1753,11 +1766,17 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, return err; } + node_guid_gen_from_mac(&node_guid, mac); + err = mlx5_modify_nic_vport_node_guid(esw->dev, vport, node_guid); + if (err) + mlx5_core_warn(esw->dev, + "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n", + vport, err); + mutex_lock(&esw->state_lock); if (evport->enabled) err = esw_vport_ingress_config(esw, evport); mutex_unlock(&esw->state_lock); - return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index b69dadc..daf44cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -508,6 +508,44 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); +int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, + u32 vport, u64 node_guid) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *nic_vport_context; + u8 *guid; + void *in; + int err; + + if (!vport) + return -EINVAL; + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EACCES; + if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify)) + return -ENOTSUPP; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.node_guid, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport); + + nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + guid = MLX5_ADDR_OF(nic_vport_context, nic_vport_context, + node_guid); + MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} + int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, u16 *qkey_viol_cntr) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 986a615..e955a28 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -501,7 +501,9 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x1b]; + u8 reserved_at_5[0x19]; + u8 nic_vport_node_guid_modify[0x1]; + u8 nic_vport_port_guid_modify[0x1]; u8 reserved_at_20[0x7e0]; }; @@ -4584,7 +4586,10 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits { }; struct mlx5_ifc_modify_nic_vport_field_select_bits { - u8 reserved_at_0[0x19]; + u8 reserved_at_0[0x16]; + u8 node_guid[0x1]; + u8 port_guid[0x1]; + u8 reserved_at_18[0x1]; u8 mtu[0x1]; u8 change_event[0x1]; u8 promisc[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 301da4a..6c16c19 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -50,6 +50,8 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu); int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); +int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, + u32 vport, u64 node_guid); int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, u16 *qkey_viol_cntr); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, -- cgit v1.1 From 62e3c24ac4f0ee307c41a3652636f88a3f8d165c Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Fri, 10 Jun 2016 00:07:38 +0300 Subject: net/mlx5: E-Switch, always set mc_promisc for allmulti vports Set the mc_promisc flag also in the case of adding new mc address to existing allmulti vport. Fixes: a35f71f27a61 ('net/mlx5: E-Switch, Implement promiscuous rx modes vf request handling') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9b1855b..aebbd6c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -651,6 +651,7 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, esw_fdb_set_vport_rule(esw, mac, vport_idx); + iter_vaddr->mc_promisc = true; break; case MLX5_ACTION_DEL: if (!iter_vaddr) -- cgit v1.1 From 811afeaa3797bdf4eabed286100811b33005c9e0 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Fri, 10 Jun 2016 00:07:39 +0300 Subject: net/mlx5e: Use ndo_stop explicitly at shutdown flow The current implementation copies the flow of ndo_stop instead of calling it explicitly, Fixed it. Fixes: 5fc7197d3a25 ("net/mlx5: Add pci shutdown callback") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index fd43929..f5c8d5d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3192,10 +3192,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) flush_workqueue(priv->wq); if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) { netif_device_detach(netdev); - mutex_lock(&priv->state_lock); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_close_locked(netdev); - mutex_unlock(&priv->state_lock); + mlx5e_close(netdev); } else { unregister_netdev(netdev); } -- cgit v1.1 From 0ca00fc1f808602137dc6d51f17747b3bb0fc34d Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 10 Jun 2016 00:07:40 +0300 Subject: net/mlx5e: Fix blue flame quota logic Blue flame is a latency enhancement feature that allows the driver to write the packet data directly to the NIC's registers thus making the read of the packet data from host memory redundant. We maintain a quota for the blue flame which is reloaded whenever we identify that the hardware is processing send requests and processes them fast enough so by the time we post the next send request it was able to process all the pending ones. This indicates that the hardware is capable of processing more blue flame requests efficiently. The blue flame quota is decremented whenever we send using blue flame. The current code erroneously clears the budget if we did not use blue flame for the current post send operation and we fix it here. Fixes: 88a85f99e51f ('net/mlx5e: TX latency optimization to save DMA reads') Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 229ab16..b000ddc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -317,7 +317,8 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) while ((sq->pc & wq->sz_m1) > sq->edge) mlx5e_send_nop(sq, false); - sq->bf_budget = bf ? sq->bf_budget - 1 : 0; + if (bf) + sq->bf_budget--; sq->stats.packets++; sq->stats.bytes += num_bytes; -- cgit v1.1 From ca8bdaf13abbdcbb12ff12164aa2d5b522ec524d Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 8 Jun 2016 19:21:17 +0100 Subject: stmmac: fix parameter to dwmac4_set_umac_addr() The dwmac4_set_umac_addr() takes a struct mac_device_info as the first parameter, but is being passed a ioaddr instead from dwmac4_set_filter(). Fix the warning/bug by changing the first parameter. drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c:159:46: warning: incorrect type in argument 1 (different address spaces) drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c:159:46: expected struct mac_device_info *hw drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c:159:46: got void [noderef] *ioaddr Note, only compile tested this as do not have any hardware with it in. Signed-off-by: Ben Dooks Acked-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 4f7283d..44da877 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -156,7 +156,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw, struct netdev_hw_addr *ha; netdev_for_each_uc_addr(ha, dev) { - dwmac4_set_umac_addr(ioaddr, ha->addr, reg); + dwmac4_set_umac_addr(hw, ha->addr, reg); reg++; } } -- cgit v1.1 From 719c44d340beeecd22cbda91b00ef55585b3c1a0 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 7 Jun 2016 12:06:34 -0400 Subject: packet: compat support for sock_fprog Socket option PACKET_FANOUT_DATA takes a struct sock_fprog as argument if PACKET_FANOUT has mode PACKET_FANOUT_CBPF. This structure contains a pointer into user memory. If userland is 32-bit and kernel is 64-bit the two disagree about the layout of struct sock_fprog. Add compat setsockopt support to convert a 32-bit compat_sock_fprog to a 64-bit sock_fprog. This is analogous to compat_sock_fprog support for SO_REUSEPORT added in commit 1957598840f4 ("soreuseport: add compat case for setsockopt SO_ATTACH_REUSEPORT_CBPF"). Reported-by: Daniel Borkmann Signed-off-by: Willem de Bruijn Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/compat.h | 1 + net/compat.c | 17 +++++++++++++++-- net/packet/af_packet.c | 25 +++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/include/net/compat.h b/include/net/compat.h index 48103cf..13de0cc 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -42,6 +42,7 @@ int compat_sock_get_timestampns(struct sock *, struct timespec __user *); int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *, struct sockaddr __user **, struct iovec **); +struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval); asmlinkage long compat_sys_sendmsg(int, struct compat_msghdr __user *, unsigned int); asmlinkage long compat_sys_sendmmsg(int, struct compat_mmsghdr __user *, diff --git a/net/compat.c b/net/compat.c index 1373947..1cd2ec0 100644 --- a/net/compat.c +++ b/net/compat.c @@ -309,8 +309,8 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) __scm_destroy(scm); } -static int do_set_attach_filter(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) +/* allocate a 64-bit sock_fprog on the user stack for duration of syscall. */ +struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval) { struct compat_sock_fprog __user *fprog32 = (struct compat_sock_fprog __user *)optval; struct sock_fprog __user *kfprog = compat_alloc_user_space(sizeof(struct sock_fprog)); @@ -323,6 +323,19 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname, __get_user(ptr, &fprog32->filter) || __put_user(len, &kfprog->len) || __put_user(compat_ptr(ptr), &kfprog->filter)) + return NULL; + + return kfprog; +} +EXPORT_SYMBOL_GPL(get_compat_bpf_fprog); + +static int do_set_attach_filter(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct sock_fprog __user *kfprog; + + kfprog = get_compat_bpf_fprog(optval); + if (!kfprog) return -EFAULT; return sock_setsockopt(sock, level, optname, (char __user *)kfprog, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4040eb9..9bff6ef 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -93,6 +93,7 @@ #include #endif #include +#include #include "internal.h" @@ -3940,6 +3941,27 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, } +#ifdef CONFIG_COMPAT +static int compat_packet_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct packet_sock *po = pkt_sk(sock->sk); + + if (level != SOL_PACKET) + return -ENOPROTOOPT; + + if (optname == PACKET_FANOUT_DATA && + po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) { + optval = (char __user *)get_compat_bpf_fprog(optval); + if (!optval) + return -EFAULT; + optlen = sizeof(struct sock_fprog); + } + + return packet_setsockopt(sock, level, optname, optval, optlen); +} +#endif + static int packet_notifier(struct notifier_block *this, unsigned long msg, void *ptr) { @@ -4416,6 +4438,9 @@ static const struct proto_ops packet_ops = { .shutdown = sock_no_shutdown, .setsockopt = packet_setsockopt, .getsockopt = packet_getsockopt, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_packet_setsockopt, +#endif .sendmsg = packet_sendmsg, .recvmsg = packet_recvmsg, .mmap = packet_mmap, -- cgit v1.1 From 50219538ffc0493a2b451a3aa0191138ef8bfe9d Mon Sep 17 00:00:00 2001 From: Shrikrishna Khare Date: Wed, 8 Jun 2016 07:40:53 -0700 Subject: vmxnet3: segCnt can be 1 for LRO packets The device emulation may send segCnt of 1 for LRO packets. Signed-off-by: Shrikrishna Khare Signed-off-by: Jin Heo Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 2 +- drivers/net/vmxnet3/vmxnet3_int.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index db8022a..08885bc 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1369,7 +1369,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd; segCnt = rcdlro->segCnt; - BUG_ON(segCnt <= 1); + WARN_ON_ONCE(segCnt == 0); mss = rcdlro->mss; if (unlikely(segCnt <= 1)) segCnt = 0; diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index c482539..3d2b64e 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -69,10 +69,10 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.4.7.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.4.8.0-k" /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01040700 +#define VMXNET3_DRIVER_VERSION_NUM 0x01040800 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ -- cgit v1.1 From aaee8c3c5cce2d9107310dd9f3026b4f901d441c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 24 May 2016 15:54:04 -0700 Subject: x86/entry/traps: Don't force in_interrupt() to return true in IST handlers Forcing in_interrupt() to return true if we're not in a bona fide interrupt confuses the softirq code. This fixes warnings like: NOHZ: local_softirq_pending 282 ... which can happen when running things like selftests/x86. This will change perf's static percpu buffer usage in IST context. I think this is okay, and it's changing the behavior to match historical (pre-4.0) behavior. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 959274753857 ("x86, traps: Track entry into and exit from IST context") Link: http://lkml.kernel.org/r/cdc215f94d118d691d73df35275022331156fb45.1464130360.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d159048..00f03d8 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -96,6 +96,12 @@ static inline void cond_local_irq_disable(struct pt_regs *regs) local_irq_disable(); } +/* + * In IST context, we explicitly disable preemption. This serves two + * purposes: it makes it much less likely that we would accidentally + * schedule in IST context and it will force a warning if we somehow + * manage to schedule by accident. + */ void ist_enter(struct pt_regs *regs) { if (user_mode(regs)) { @@ -110,13 +116,7 @@ void ist_enter(struct pt_regs *regs) rcu_nmi_enter(); } - /* - * We are atomic because we're on the IST stack; or we're on - * x86_32, in which case we still shouldn't schedule; or we're - * on x86_64 and entered from user mode, in which case we're - * still atomic unless ist_begin_non_atomic is called. - */ - preempt_count_add(HARDIRQ_OFFSET); + preempt_disable(); /* This code is a bit fragile. Test it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work"); @@ -124,7 +124,7 @@ void ist_enter(struct pt_regs *regs) void ist_exit(struct pt_regs *regs) { - preempt_count_sub(HARDIRQ_OFFSET); + preempt_enable_no_resched(); if (!user_mode(regs)) rcu_nmi_exit(); @@ -155,7 +155,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) BUG_ON((unsigned long)(current_top_of_stack() - current_stack_pointer()) >= THREAD_SIZE); - preempt_count_sub(HARDIRQ_OFFSET); + preempt_enable_no_resched(); } /** @@ -165,7 +165,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) */ void ist_end_non_atomic(void) { - preempt_count_add(HARDIRQ_OFFSET); + preempt_disable(); } static nokprobe_inline int -- cgit v1.1 From 9d98bcec731756b8688b59ec998707924d716d7b Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Wed, 8 Jun 2016 14:59:52 +0800 Subject: x86/ioapic: Fix incorrect pointers in ioapic_setup_resources() On a 4-socket Brickland system, hot-removing one ioapic is fine. Hot-removing the 2nd one causes panic in mp_unregister_ioapic() while calling release_resource(). It is because the iomem_res pointer has already been released when removing the first ioapic. To explain the use of &res[num] here: res is assigned to ioapic_resources, and later in ioapic_insert_resources() we do: struct resource *r = ioapic_resources; for_each_ioapic(i) { insert_resource(&iomem_resource, r); r++; } Here 'r' is treated as an arry of 'struct resource', and the r++ ensures that each element of the array is inserted separately. Thus we should call release_resouce() on each element at &res[num]. Fix it by assigning the correct pointers to ioapics[i].iomem_res in ioapic_setup_resources(). Signed-off-by: Rui Wang Signed-off-by: Thomas Gleixner Cc: tony.luck@intel.com Cc: linux-pci@vger.kernel.org Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: bhelgaas@google.com Link: http://lkml.kernel.org/r/1465369193-4816-3-git-send-email-rui.y.wang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 84e33ff..446702e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2588,8 +2588,8 @@ static struct resource * __init ioapic_setup_resources(void) res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY; snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); mem += IOAPIC_RESOURCE_NAME_SIZE; + ioapics[i].iomem_res = &res[num]; num++; - ioapics[i].iomem_res = res; } ioapic_resources = res; -- cgit v1.1