From 4f3532506a339d91e593da59eea4e7143e698ca3 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 17 May 2016 08:42:12 +0000 Subject: ASoC: rsnd: open 31bit of SSICKR mask SSICKR (Gen2) / BRGCKR (Gen3) 31bit mask should be opened, because BRGB (= for 48kHz) might select it. Special thanks Yokoyama-san Reported-by: Hiroyuki Yokoyama Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/adg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c index 606399d..0891014 100644 --- a/sound/soc/sh/rcar/adg.c +++ b/sound/soc/sh/rcar/adg.c @@ -522,7 +522,7 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv, } } - rsnd_mod_bset(adg_mod, SSICKR, 0x00FF0000, ckr); + rsnd_mod_bset(adg_mod, SSICKR, 0x80FF0000, ckr); rsnd_mod_write(adg_mod, BRRA, rbga); rsnd_mod_write(adg_mod, BRRB, rbgb); -- cgit v1.1 From 99cf4b267e4863b95c7d33f59371b6ccdfd4a4ce Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 25 May 2016 10:49:52 -0300 Subject: ASoC: ak4613: Remove owner assignment from platform_driver This platform_driver does not need to set an owner as it will be populated by the driver core. Generated by scripts/coccinelle/api/platform_no_drv_owner.cocci. Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown --- sound/soc/codecs/ak4613.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/ak4613.c b/sound/soc/codecs/ak4613.c index 647f69d..33d2f2e1 100644 --- a/sound/soc/codecs/ak4613.c +++ b/sound/soc/codecs/ak4613.c @@ -530,7 +530,6 @@ static int ak4613_i2c_remove(struct i2c_client *client) static struct i2c_driver ak4613_i2c_driver = { .driver = { .name = "ak4613-codec", - .owner = THIS_MODULE, .of_match_table = ak4613_of_match, }, .probe = ak4613_i2c_probe, -- cgit v1.1 From 500e06b9a3f50d3db992eb3b8dc309f695d33f63 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 31 May 2016 19:09:55 +0530 Subject: ASoC: hdac_hdmi: Fix potential NULL dereference Static checker warns: Pointer 'hlink' returned from call to function 'snd_hdac_ext_bus_get_link' at line may be NULL and will be dereferenced" So we should always check the return of snd_hdac_ext_bus_get_link() before referencing the link pointer Signed-off-by: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/codecs/hdac_hdmi.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index 181cd3b..2abb742 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -1474,6 +1474,11 @@ static int hdmi_codec_probe(struct snd_soc_codec *codec) * exit, we call pm_runtime_suspend() so that will do for us */ hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdac.dev)); + if (!hlink) { + dev_err(&edev->hdac.dev, "hdac link not found\n"); + return -EIO; + } + snd_hdac_ext_bus_link_get(edev->ebus, hlink); ret = create_fill_widget_route_map(dapm); @@ -1634,6 +1639,11 @@ static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev) /* hold the ref while we probe */ hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdac.dev)); + if (!hlink) { + dev_err(&edev->hdac.dev, "hdac link not found\n"); + return -EIO; + } + snd_hdac_ext_bus_link_get(edev->ebus, hlink); hdmi_priv = devm_kzalloc(&codec->dev, sizeof(*hdmi_priv), GFP_KERNEL); @@ -1744,6 +1754,11 @@ static int hdac_hdmi_runtime_suspend(struct device *dev) } hlink = snd_hdac_ext_bus_get_link(ebus, dev_name(dev)); + if (!hlink) { + dev_err(dev, "hdac link not found\n"); + return -EIO; + } + snd_hdac_ext_bus_link_put(ebus, hlink); return 0; @@ -1765,6 +1780,11 @@ static int hdac_hdmi_runtime_resume(struct device *dev) return 0; hlink = snd_hdac_ext_bus_get_link(ebus, dev_name(dev)); + if (!hlink) { + dev_err(dev, "hdac link not found\n"); + return -EIO; + } + snd_hdac_ext_bus_link_get(ebus, hlink); err = snd_hdac_display_power(bus, true); -- cgit v1.1 From 8e1cc0e4bab7b44a55ea59d26683b618b34950bc Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 2 Jun 2016 12:55:05 +0300 Subject: ASoC: davinci-mcasp: Fix dra7 DMA offset when using CFG port The TX and RX offset is different for each serializers when using the CFG port for DMA access. When using the CFG port only one serializer can be used per direction so print error message and only configure the first serializer's offset. Reported-by: Misael Lopez Cruz Suggested-by: Misael Lopez Cruz Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown --- sound/soc/davinci/davinci-mcasp.c | 56 ++++++++++++++++++++++++++++++++++++--- sound/soc/davinci/davinci-mcasp.h | 4 +-- 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index 0f66fda..237dc67 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -1513,8 +1513,9 @@ static struct davinci_mcasp_pdata am33xx_mcasp_pdata = { }; static struct davinci_mcasp_pdata dra7_mcasp_pdata = { - .tx_dma_offset = 0x200, - .rx_dma_offset = 0x284, + /* The CFG port offset will be calculated if it is needed */ + .tx_dma_offset = 0, + .rx_dma_offset = 0, .version = MCASP_VERSION_4, }; @@ -1734,6 +1735,52 @@ static int davinci_mcasp_get_dma_type(struct davinci_mcasp *mcasp) return PCM_EDMA; } +static u32 davinci_mcasp_txdma_offset(struct davinci_mcasp_pdata *pdata) +{ + int i; + u32 offset = 0; + + if (pdata->version != MCASP_VERSION_4) + return pdata->tx_dma_offset; + + for (i = 0; i < pdata->num_serializer; i++) { + if (pdata->serial_dir[i] == TX_MODE) { + if (!offset) { + offset = DAVINCI_MCASP_TXBUF_REG(i); + } else { + pr_err("%s: Only one serializer allowed!\n", + __func__); + break; + } + } + } + + return offset; +} + +static u32 davinci_mcasp_rxdma_offset(struct davinci_mcasp_pdata *pdata) +{ + int i; + u32 offset = 0; + + if (pdata->version != MCASP_VERSION_4) + return pdata->rx_dma_offset; + + for (i = 0; i < pdata->num_serializer; i++) { + if (pdata->serial_dir[i] == RX_MODE) { + if (!offset) { + offset = DAVINCI_MCASP_RXBUF_REG(i); + } else { + pr_err("%s: Only one serializer allowed!\n", + __func__); + break; + } + } + } + + return offset; +} + static int davinci_mcasp_probe(struct platform_device *pdev) { struct snd_dmaengine_dai_dma_data *dma_data; @@ -1862,7 +1909,7 @@ static int davinci_mcasp_probe(struct platform_device *pdev) if (dat) dma_data->addr = dat->start; else - dma_data->addr = mem->start + pdata->tx_dma_offset; + dma_data->addr = mem->start + davinci_mcasp_txdma_offset(pdata); dma = &mcasp->dma_request[SNDRV_PCM_STREAM_PLAYBACK]; res = platform_get_resource(pdev, IORESOURCE_DMA, 0); @@ -1883,7 +1930,8 @@ static int davinci_mcasp_probe(struct platform_device *pdev) if (dat) dma_data->addr = dat->start; else - dma_data->addr = mem->start + pdata->rx_dma_offset; + dma_data->addr = + mem->start + davinci_mcasp_rxdma_offset(pdata); dma = &mcasp->dma_request[SNDRV_PCM_STREAM_CAPTURE]; res = platform_get_resource(pdev, IORESOURCE_DMA, 1); diff --git a/sound/soc/davinci/davinci-mcasp.h b/sound/soc/davinci/davinci-mcasp.h index 1e8787f..afddc80 100644 --- a/sound/soc/davinci/davinci-mcasp.h +++ b/sound/soc/davinci/davinci-mcasp.h @@ -85,9 +85,9 @@ (n << 2)) /* Transmit Buffer for Serializer n */ -#define DAVINCI_MCASP_TXBUF_REG 0x200 +#define DAVINCI_MCASP_TXBUF_REG(n) (0x200 + (n << 2)) /* Receive Buffer for Serializer n */ -#define DAVINCI_MCASP_RXBUF_REG 0x280 +#define DAVINCI_MCASP_RXBUF_REG(n) (0x280 + (n << 2)) /* McASP FIFO Registers */ #define DAVINCI_MCASP_V2_AFIFO_BASE (0x1010) -- cgit v1.1 From 6de7df8d1b1a45a07d6ecc6b4f94179e1e68f5ec Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 31 May 2016 23:12:00 +0200 Subject: ASoC: hdmi-codec: select CONFIG_HDMI SND_SOC_HDMI_CODEC can be enabled without HDMI support, leading to a link error: In function `hdmi_codec_hw_params': sound/soc/codecs/hdmi-codec.c:188: undefined reference to `hdmi_audio_infoframe_init' sound/built-in.o:(.debug_addr+0x1a5c0): undefined reference to `hdmi_audio_infoframe_init' This changes the Kconfig file to select HDMI, as the other codec using hdmi_audio_infoframe_init already does. Signed-off-by: Arnd Bergmann Acked-by: Jyri Sarha Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 4d82a58..f3fb98f 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -483,9 +483,10 @@ config SND_SOC_DMIC tristate config SND_SOC_HDMI_CODEC - tristate - select SND_PCM_ELD - select SND_PCM_IEC958 + tristate + select SND_PCM_ELD + select SND_PCM_IEC958 + select HDMI config SND_SOC_ES8328 tristate "Everest Semi ES8328 CODEC" -- cgit v1.1 From 572f1f613a109e03b52e72b62df2d2c0e595d3bd Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 7 Jun 2016 11:03:08 +0800 Subject: ASoC: rt5670: fix HP Playback Volume control The register setting for HP Playback Volume is inverted. So, set the invert flag in SOC_DOUBLE_TLV. Signed-off-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/codecs/rt5670.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c index 49a9e70..0af5ddb 100644 --- a/sound/soc/codecs/rt5670.c +++ b/sound/soc/codecs/rt5670.c @@ -619,7 +619,7 @@ static const struct snd_kcontrol_new rt5670_snd_controls[] = { RT5670_L_MUTE_SFT, RT5670_R_MUTE_SFT, 1, 1), SOC_DOUBLE_TLV("HP Playback Volume", RT5670_HP_VOL, RT5670_L_VOL_SFT, RT5670_R_VOL_SFT, - 39, 0, out_vol_tlv), + 39, 1, out_vol_tlv), /* OUTPUT Control */ SOC_DOUBLE("OUT Channel Switch", RT5670_LOUT1, RT5670_VOL_L_SFT, RT5670_VOL_R_SFT, 1, 1), -- cgit v1.1 From f5967101e9de12addcda4510dfbac66d7c5779c3 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 30 May 2016 12:56:27 +0200 Subject: x86/hweight: Get rid of the special calling convention People complained about ARCH_HWEIGHT_CFLAGS and how it throws a wrench into kcov, lto, etc, experimentations. Add asm versions for __sw_hweight{32,64}() and do explicit saving and restoring of clobbered registers. This gets rid of the special calling convention. We get to call those functions on !X86_FEATURE_POPCNT CPUs. We still need to hardcode POPCNT and register operands as some old gas versions which we support, do not know about POPCNT. Btw, remove redundant REX prefix from 32-bit POPCNT because alternatives can do padding now. Suggested-by: H. Peter Anvin Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1464605787-20603-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 5 --- arch/x86/include/asm/arch_hweight.h | 24 +++++------- arch/x86/kernel/i386_ksyms_32.c | 2 + arch/x86/kernel/x8664_ksyms_64.c | 3 ++ arch/x86/lib/Makefile | 2 +- arch/x86/lib/hweight.S | 77 +++++++++++++++++++++++++++++++++++++ lib/Makefile | 5 --- lib/hweight.c | 4 ++ 8 files changed, 97 insertions(+), 25 deletions(-) create mode 100644 arch/x86/lib/hweight.S diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0a7b885..729d41d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -294,11 +294,6 @@ config X86_32_LAZY_GS def_bool y depends on X86_32 && !CC_STACKPROTECTOR -config ARCH_HWEIGHT_CFLAGS - string - default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 - default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 - config ARCH_SUPPORTS_UPROBES def_bool y diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index 02e799f..e7cd631 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -4,8 +4,8 @@ #include #ifdef CONFIG_64BIT -/* popcnt %edi, %eax -- redundant REX prefix for alignment */ -#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" +/* popcnt %edi, %eax */ +#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc7" /* popcnt %rdi, %rax */ #define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" #define REG_IN "D" @@ -17,19 +17,15 @@ #define REG_OUT "a" #endif -/* - * __sw_hweightXX are called from within the alternatives below - * and callee-clobbered registers need to be taken care of. See - * ARCH_HWEIGHT_CFLAGS in for the respective - * compiler switches. - */ +#define __HAVE_ARCH_SW_HWEIGHT + static __always_inline unsigned int __arch_hweight32(unsigned int w) { - unsigned int res = 0; + unsigned int res; asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) - : "="REG_OUT (res) - : REG_IN (w)); + : "="REG_OUT (res) + : REG_IN (w)); return res; } @@ -53,11 +49,11 @@ static inline unsigned long __arch_hweight64(__u64 w) #else static __always_inline unsigned long __arch_hweight64(__u64 w) { - unsigned long res = 0; + unsigned long res; asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) - : "="REG_OUT (res) - : REG_IN (w)); + : "="REG_OUT (res) + : REG_IN (w)); return res; } diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 64341aa..d40ee8a 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -42,3 +42,5 @@ EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(___preempt_schedule); EXPORT_SYMBOL(___preempt_schedule_notrace); #endif + +EXPORT_SYMBOL(__sw_hweight32); diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index cd05942..f1aebfb 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -44,6 +44,9 @@ EXPORT_SYMBOL(clear_page); EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(__sw_hweight32); +EXPORT_SYMBOL(__sw_hweight64); + /* * Export string functions. We normally rely on gcc builtin for most of these, * but gcc sometimes decides not to inline them. diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 72a5767..ec969cc 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -25,7 +25,7 @@ lib-y += memcpy_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o -obj-y += msr.o msr-reg.o msr-reg-export.o +obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S new file mode 100644 index 0000000..02de3d7 --- /dev/null +++ b/arch/x86/lib/hweight.S @@ -0,0 +1,77 @@ +#include + +#include + +/* + * unsigned int __sw_hweight32(unsigned int w) + * %rdi: w + */ +ENTRY(__sw_hweight32) + +#ifdef CONFIG_X86_64 + movl %edi, %eax # w +#endif + __ASM_SIZE(push,) %__ASM_REG(dx) + movl %eax, %edx # w -> t + shrl %edx # t >>= 1 + andl $0x55555555, %edx # t &= 0x55555555 + subl %edx, %eax # w -= t + + movl %eax, %edx # w -> t + shrl $2, %eax # w_tmp >>= 2 + andl $0x33333333, %edx # t &= 0x33333333 + andl $0x33333333, %eax # w_tmp &= 0x33333333 + addl %edx, %eax # w = w_tmp + t + + movl %eax, %edx # w -> t + shrl $4, %edx # t >>= 4 + addl %edx, %eax # w_tmp += t + andl $0x0f0f0f0f, %eax # w_tmp &= 0x0f0f0f0f + imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101 + shrl $24, %eax # w = w_tmp >> 24 + __ASM_SIZE(pop,) %__ASM_REG(dx) + ret +ENDPROC(__sw_hweight32) + +ENTRY(__sw_hweight64) +#ifdef CONFIG_X86_64 + pushq %rdx + + movq %rdi, %rdx # w -> t + movabsq $0x5555555555555555, %rax + shrq %rdx # t >>= 1 + andq %rdx, %rax # t &= 0x5555555555555555 + movabsq $0x3333333333333333, %rdx + subq %rax, %rdi # w -= t + + movq %rdi, %rax # w -> t + shrq $2, %rdi # w_tmp >>= 2 + andq %rdx, %rax # t &= 0x3333333333333333 + andq %rdi, %rdx # w_tmp &= 0x3333333333333333 + addq %rdx, %rax # w = w_tmp + t + + movq %rax, %rdx # w -> t + shrq $4, %rdx # t >>= 4 + addq %rdx, %rax # w_tmp += t + movabsq $0x0f0f0f0f0f0f0f0f, %rdx + andq %rdx, %rax # w_tmp &= 0x0f0f0f0f0f0f0f0f + movabsq $0x0101010101010101, %rdx + imulq %rdx, %rax # w_tmp *= 0x0101010101010101 + shrq $56, %rax # w = w_tmp >> 56 + + popq %rdx + ret +#else /* CONFIG_X86_32 */ + /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */ + pushl %ecx + + call __sw_hweight32 + movl %eax, %ecx # stash away result + movl %edx, %eax # second part of input + call __sw_hweight32 + addl %ecx, %eax # result + + popl %ecx + ret +#endif +ENDPROC(__sw_hweight64) diff --git a/lib/Makefile b/lib/Makefile index ff6a7a6..07d06a8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -15,9 +15,6 @@ KCOV_INSTRUMENT_rbtree.o := n KCOV_INSTRUMENT_list_debug.o := n KCOV_INSTRUMENT_debugobjects.o := n KCOV_INSTRUMENT_dynamic_debug.o := n -# Kernel does not boot if we instrument this file as it uses custom calling -# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS). -KCOV_INSTRUMENT_hweight.o := n lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ @@ -74,8 +71,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o -GCOV_PROFILE_hweight.o := n -CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_BTREE) += btree.o diff --git a/lib/hweight.c b/lib/hweight.c index 9a5c1f2..43273a7 100644 --- a/lib/hweight.c +++ b/lib/hweight.c @@ -9,6 +9,7 @@ * The Hamming Weight of a number is the total number of bits set in it. */ +#ifndef __HAVE_ARCH_SW_HWEIGHT unsigned int __sw_hweight32(unsigned int w) { #ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER @@ -25,6 +26,7 @@ unsigned int __sw_hweight32(unsigned int w) #endif } EXPORT_SYMBOL(__sw_hweight32); +#endif unsigned int __sw_hweight16(unsigned int w) { @@ -43,6 +45,7 @@ unsigned int __sw_hweight8(unsigned int w) } EXPORT_SYMBOL(__sw_hweight8); +#ifndef __HAVE_ARCH_SW_HWEIGHT unsigned long __sw_hweight64(__u64 w) { #if BITS_PER_LONG == 32 @@ -65,3 +68,4 @@ unsigned long __sw_hweight64(__u64 w) #endif } EXPORT_SYMBOL(__sw_hweight64); +#endif -- cgit v1.1 From 2823d4da5d8a0c222747b24eceb65f5b30717d02 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:37 -0700 Subject: x86, bitops: remove use of "sbb" to return CF Use SETC instead of SBB to return the value of CF from assembly. Using SETcc enables uniformity with other flags-returning pieces of assembly code. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-2-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/bitops.h | 24 ++++++++++++------------ arch/x86/include/asm/percpu.h | 12 ++++++------ arch/x86/include/asm/signal.h | 6 +++--- arch/x86/include/asm/sync_bitops.h | 18 +++++++++--------- arch/x86/kernel/vm86_32.c | 5 +---- 5 files changed, 31 insertions(+), 34 deletions(-) diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 7766d1c..b2b797d 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -230,11 +230,11 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr) */ static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr) { - int oldbit; + unsigned char oldbit; asm("bts %2,%1\n\t" - "sbb %0,%0" - : "=r" (oldbit), ADDR + "setc %0" + : "=qm" (oldbit), ADDR : "Ir" (nr)); return oldbit; } @@ -270,11 +270,11 @@ static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *a */ static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) { - int oldbit; + unsigned char oldbit; asm volatile("btr %2,%1\n\t" - "sbb %0,%0" - : "=r" (oldbit), ADDR + "setc %0" + : "=qm" (oldbit), ADDR : "Ir" (nr)); return oldbit; } @@ -282,11 +282,11 @@ static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long /* WARNING: non atomic and it can be reordered! */ static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr) { - int oldbit; + unsigned char oldbit; asm volatile("btc %2,%1\n\t" - "sbb %0,%0" - : "=r" (oldbit), ADDR + "setc %0" + : "=qm" (oldbit), ADDR : "Ir" (nr) : "memory"); return oldbit; @@ -313,11 +313,11 @@ static __always_inline int constant_test_bit(long nr, const volatile unsigned lo static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr) { - int oldbit; + unsigned char oldbit; asm volatile("bt %2,%1\n\t" - "sbb %0,%0" - : "=r" (oldbit) + "setc %0" + : "=qm" (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); return oldbit; diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index e0ba66c..65039e9 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -510,9 +510,9 @@ do { \ /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define x86_test_and_clear_bit_percpu(bit, var) \ ({ \ - int old__; \ - asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ - : "=r" (old__), "+m" (var) \ + unsigned char old__; \ + asm volatile("btr %2,"__percpu_arg(1)"\n\tsetc %0" \ + : "=qm" (old__), "+m" (var) \ : "dIr" (bit)); \ old__; \ }) @@ -532,11 +532,11 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr, static inline int x86_this_cpu_variable_test_bit(int nr, const unsigned long __percpu *addr) { - int oldbit; + unsigned char oldbit; asm volatile("bt "__percpu_arg(2)",%1\n\t" - "sbb %0,%0" - : "=r" (oldbit) + "setc %0" + : "=qm" (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); return oldbit; diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 2138c9a..dd1e7d6 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -81,9 +81,9 @@ static inline int __const_sigismember(sigset_t *set, int _sig) static inline int __gen_sigismember(sigset_t *set, int _sig) { - int ret; - asm("btl %2,%1\n\tsbbl %0,%0" - : "=r"(ret) : "m"(*set), "Ir"(_sig-1) : "cc"); + unsigned char ret; + asm("btl %2,%1\n\tsetc %0" + : "=qm"(ret) : "m"(*set), "Ir"(_sig-1) : "cc"); return ret; } diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h index f28a24b..cbf8847 100644 --- a/arch/x86/include/asm/sync_bitops.h +++ b/arch/x86/include/asm/sync_bitops.h @@ -79,10 +79,10 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr) */ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr) { - int oldbit; + unsigned char oldbit; - asm volatile("lock; bts %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "+m" (ADDR) + asm volatile("lock; bts %2,%1\n\tsetc %0" + : "=qm" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory"); return oldbit; } @@ -97,10 +97,10 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr) */ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr) { - int oldbit; + unsigned char oldbit; - asm volatile("lock; btr %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "+m" (ADDR) + asm volatile("lock; btr %2,%1\n\tsetc %0" + : "=qm" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory"); return oldbit; } @@ -115,10 +115,10 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr) */ static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr) { - int oldbit; + unsigned char oldbit; - asm volatile("lock; btc %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "+m" (ADDR) + asm volatile("lock; btc %2,%1\n\tsetc %0" + : "=qm" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory"); return oldbit; } diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 3dce1ca..01f30e5 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -440,10 +440,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs) static inline int is_revectored(int nr, struct revectored_struct *bitmap) { - __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" - :"=r" (nr) - :"m" (*bitmap), "r" (nr)); - return nr; + return test_bit(nr, bitmap->__map); } #define val_byte(val, n) (((__u8 *)&val)[n]) -- cgit v1.1 From 117780eef7740729e803bdcc0d5f2f48137ea8e3 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:38 -0700 Subject: x86, asm: use bool for bitops and other assembly outputs The gcc people have confirmed that using "bool" when combined with inline assembly always is treated as a byte-sized operand that can be assumed to be 0 or 1, which is exactly what the SET instruction emits. Change the output types and intermediate variables of as many operations as practical to "bool". Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-3-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/boot/bitops.h | 8 +++++--- arch/x86/boot/boot.h | 8 ++++---- arch/x86/boot/string.c | 2 +- arch/x86/include/asm/apm.h | 6 +++--- arch/x86/include/asm/archrandom.h | 16 ++++++++-------- arch/x86/include/asm/atomic.h | 8 ++++---- arch/x86/include/asm/atomic64_64.h | 10 +++++----- arch/x86/include/asm/bitops.h | 28 ++++++++++++++-------------- arch/x86/include/asm/local.h | 8 ++++---- arch/x86/include/asm/percpu.h | 8 ++++---- arch/x86/include/asm/rmwcc.h | 4 ++-- arch/x86/include/asm/rwsem.h | 17 +++++++++-------- include/linux/random.h | 12 ++++++------ 13 files changed, 69 insertions(+), 66 deletions(-) diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h index 878e4b9..0d41d68 100644 --- a/arch/x86/boot/bitops.h +++ b/arch/x86/boot/bitops.h @@ -16,14 +16,16 @@ #define BOOT_BITOPS_H #define _LINUX_BITOPS_H /* Inhibit inclusion of */ -static inline int constant_test_bit(int nr, const void *addr) +#include + +static inline bool constant_test_bit(int nr, const void *addr) { const u32 *p = (const u32 *)addr; return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0; } -static inline int variable_test_bit(int nr, const void *addr) +static inline bool variable_test_bit(int nr, const void *addr) { - u8 v; + bool v; const u32 *p = (const u32 *)addr; asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr)); diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 9011a88..2edb2d5 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -176,16 +176,16 @@ static inline void wrgs32(u32 v, addr_t addr) } /* Note: these only return true/false, not a signed return value! */ -static inline int memcmp_fs(const void *s1, addr_t s2, size_t len) +static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len) { - u8 diff; + bool diff; asm volatile("fs; repe; cmpsb; setnz %0" : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; } -static inline int memcmp_gs(const void *s1, addr_t s2, size_t len) +static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len) { - u8 diff; + bool diff; asm volatile("gs; repe; cmpsb; setnz %0" : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 318b846..cc3bd58 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -17,7 +17,7 @@ int memcmp(const void *s1, const void *s2, size_t len) { - u8 diff; + bool diff; asm("repe; cmpsb; setnz %0" : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index 20370c6..93eebc63 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -45,11 +45,11 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, : "memory", "cc"); } -static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, - u32 ecx_in, u32 *eax) +static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, + u32 ecx_in, u32 *eax) { int cx, dx, si; - u8 error; + bool error; /* * N.B. We do NOT need a cld after the BIOS call diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 69f1366..ab6f599 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -43,7 +43,7 @@ #ifdef CONFIG_ARCH_RANDOM /* Instead of arch_get_random_long() when alternatives haven't run. */ -static inline int rdrand_long(unsigned long *v) +static inline bool rdrand_long(unsigned long *v) { int ok; asm volatile("1: " RDRAND_LONG "\n\t" @@ -53,13 +53,13 @@ static inline int rdrand_long(unsigned long *v) "2:" : "=r" (ok), "=a" (*v) : "0" (RDRAND_RETRY_LOOPS)); - return ok; + return !!ok; } /* A single attempt at RDSEED */ static inline bool rdseed_long(unsigned long *v) { - unsigned char ok; + bool ok; asm volatile(RDSEED_LONG "\n\t" "setc %0" : "=qm" (ok), "=a" (*v)); @@ -67,7 +67,7 @@ static inline bool rdseed_long(unsigned long *v) } #define GET_RANDOM(name, type, rdrand, nop) \ -static inline int name(type *v) \ +static inline bool name(type *v) \ { \ int ok; \ alternative_io("movl $0, %0\n\t" \ @@ -80,13 +80,13 @@ static inline int name(type *v) \ X86_FEATURE_RDRAND, \ ASM_OUTPUT2("=r" (ok), "=a" (*v)), \ "0" (RDRAND_RETRY_LOOPS)); \ - return ok; \ + return !!ok; \ } #define GET_SEED(name, type, rdseed, nop) \ -static inline int name(type *v) \ +static inline bool name(type *v) \ { \ - unsigned char ok; \ + bool ok; \ alternative_io("movb $0, %0\n\t" \ nop, \ rdseed "\n\t" \ @@ -119,7 +119,7 @@ GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4); #else -static inline int rdrand_long(unsigned long *v) +static inline bool rdrand_long(unsigned long *v) { return 0; } diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 3e86742..17d8812 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -75,7 +75,7 @@ static __always_inline void atomic_sub(int i, atomic_t *v) * true if the result is zero, or false for all * other cases. */ -static __always_inline int atomic_sub_and_test(int i, atomic_t *v) +static __always_inline bool atomic_sub_and_test(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e"); } @@ -112,7 +112,7 @@ static __always_inline void atomic_dec(atomic_t *v) * returns true if the result is 0, or false for all other * cases. */ -static __always_inline int atomic_dec_and_test(atomic_t *v) +static __always_inline bool atomic_dec_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); } @@ -125,7 +125,7 @@ static __always_inline int atomic_dec_and_test(atomic_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static __always_inline int atomic_inc_and_test(atomic_t *v) +static __always_inline bool atomic_inc_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e"); } @@ -139,7 +139,7 @@ static __always_inline int atomic_inc_and_test(atomic_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static __always_inline int atomic_add_negative(int i, atomic_t *v) +static __always_inline bool atomic_add_negative(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s"); } diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 0373510..4f881d7 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -70,7 +70,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) * true if the result is zero, or false for all * other cases. */ -static inline int atomic64_sub_and_test(long i, atomic64_t *v) +static inline bool atomic64_sub_and_test(long i, atomic64_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", "e"); } @@ -109,7 +109,7 @@ static __always_inline void atomic64_dec(atomic64_t *v) * returns true if the result is 0, or false for all other * cases. */ -static inline int atomic64_dec_and_test(atomic64_t *v) +static inline bool atomic64_dec_and_test(atomic64_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e"); } @@ -122,7 +122,7 @@ static inline int atomic64_dec_and_test(atomic64_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static inline int atomic64_inc_and_test(atomic64_t *v) +static inline bool atomic64_inc_and_test(atomic64_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e"); } @@ -136,7 +136,7 @@ static inline int atomic64_inc_and_test(atomic64_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline int atomic64_add_negative(long i, atomic64_t *v) +static inline bool atomic64_add_negative(long i, atomic64_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", "s"); } @@ -180,7 +180,7 @@ static inline long atomic64_xchg(atomic64_t *v, long new) * Atomically adds @a to @v, so long as it was not @u. * Returns the old value of @v. */ -static inline int atomic64_add_unless(atomic64_t *v, long a, long u) +static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) { long c, old; c = atomic64_read(v); diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index b2b797d..8cbb7f4 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -201,7 +201,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c"); } @@ -213,7 +213,7 @@ static __always_inline int test_and_set_bit(long nr, volatile unsigned long *add * * This is the same as test_and_set_bit on x86. */ -static __always_inline int +static __always_inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr) { return test_and_set_bit(nr, addr); @@ -228,9 +228,9 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) { - unsigned char oldbit; + bool oldbit; asm("bts %2,%1\n\t" "setc %0" @@ -247,7 +247,7 @@ static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *a * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c"); } @@ -268,9 +268,9 @@ static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *a * accessed from a hypervisor on the same CPU if running in a VM: don't change * this without also updating arch/x86/kernel/kvm.c */ -static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) { - unsigned char oldbit; + bool oldbit; asm volatile("btr %2,%1\n\t" "setc %0" @@ -280,9 +280,9 @@ static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long } /* WARNING: non atomic and it can be reordered! */ -static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) { - unsigned char oldbit; + bool oldbit; asm volatile("btc %2,%1\n\t" "setc %0" @@ -300,20 +300,20 @@ static __always_inline int __test_and_change_bit(long nr, volatile unsigned long * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c"); } -static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr) +static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) { return ((1UL << (nr & (BITS_PER_LONG-1))) & (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } -static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr) +static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { - unsigned char oldbit; + bool oldbit; asm volatile("bt %2,%1\n\t" "setc %0" @@ -329,7 +329,7 @@ static __always_inline int variable_test_bit(long nr, volatile const unsigned lo * @nr: bit number to test * @addr: Address to start counting from */ -static int test_bit(int nr, const volatile unsigned long *addr); +static bool test_bit(int nr, const volatile unsigned long *addr); #endif #define test_bit(nr, addr) \ diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 4ad6560..0cdc65b 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -50,7 +50,7 @@ static inline void local_sub(long i, local_t *l) * true if the result is zero, or false for all * other cases. */ -static inline int local_sub_and_test(long i, local_t *l) +static inline bool local_sub_and_test(long i, local_t *l) { GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", "e"); } @@ -63,7 +63,7 @@ static inline int local_sub_and_test(long i, local_t *l) * returns true if the result is 0, or false for all other * cases. */ -static inline int local_dec_and_test(local_t *l) +static inline bool local_dec_and_test(local_t *l) { GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e"); } @@ -76,7 +76,7 @@ static inline int local_dec_and_test(local_t *l) * and returns true if the result is zero, or false for all * other cases. */ -static inline int local_inc_and_test(local_t *l) +static inline bool local_inc_and_test(local_t *l) { GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e"); } @@ -90,7 +90,7 @@ static inline int local_inc_and_test(local_t *l) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline int local_add_negative(long i, local_t *l) +static inline bool local_add_negative(long i, local_t *l) { GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", "s"); } diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 65039e9..184d7f3 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -510,14 +510,14 @@ do { \ /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define x86_test_and_clear_bit_percpu(bit, var) \ ({ \ - unsigned char old__; \ + bool old__; \ asm volatile("btr %2,"__percpu_arg(1)"\n\tsetc %0" \ : "=qm" (old__), "+m" (var) \ : "dIr" (bit)); \ old__; \ }) -static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr, +static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr, const unsigned long __percpu *addr) { unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; @@ -529,10 +529,10 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr, #endif } -static inline int x86_this_cpu_variable_test_bit(int nr, +static inline bool x86_this_cpu_variable_test_bit(int nr, const unsigned long __percpu *addr) { - unsigned char oldbit; + bool oldbit; asm volatile("bt "__percpu_arg(2)",%1\n\t" "setc %0" diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 8f7866a..a15b73d 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -23,11 +23,11 @@ cc_label: \ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ - char c; \ + bool c; \ asm volatile (fullop "; set" cc " %1" \ : "+m" (var), "=qm" (c) \ : __VA_ARGS__ : "memory"); \ - return c != 0; \ + return c; \ } while (0) #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 453744c..c508770 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -77,7 +77,7 @@ static inline void __down_read(struct rw_semaphore *sem) /* * trylock for reading -- returns 1 if successful, 0 if contention */ -static inline int __down_read_trylock(struct rw_semaphore *sem) +static inline bool __down_read_trylock(struct rw_semaphore *sem) { long result, tmp; asm volatile("# beginning __down_read_trylock\n\t" @@ -93,7 +93,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) : "+m" (sem->count), "=&a" (result), "=&r" (tmp) : "i" (RWSEM_ACTIVE_READ_BIAS) : "memory", "cc"); - return result >= 0 ? 1 : 0; + return result >= 0; } /* @@ -134,9 +134,10 @@ static inline int __down_write_killable(struct rw_semaphore *sem) /* * trylock for writing -- returns 1 if successful, 0 if contention */ -static inline int __down_write_trylock(struct rw_semaphore *sem) +static inline bool __down_write_trylock(struct rw_semaphore *sem) { - long result, tmp; + bool result; + long tmp0, tmp1; asm volatile("# beginning __down_write_trylock\n\t" " mov %0,%1\n\t" "1:\n\t" @@ -144,14 +145,14 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) /* was the active mask 0 before? */ " jnz 2f\n\t" " mov %1,%2\n\t" - " add %3,%2\n\t" + " add %4,%2\n\t" LOCK_PREFIX " cmpxchg %2,%0\n\t" " jnz 1b\n\t" "2:\n\t" - " sete %b1\n\t" - " movzbl %b1, %k1\n\t" + " sete %3\n\t" "# ending __down_write_trylock\n\t" - : "+m" (sem->count), "=&a" (result), "=&r" (tmp) + : "+m" (sem->count), "=&a" (tmp0), "=&r" (tmp1), + "=qm" (result) : "er" (RWSEM_ACTIVE_WRITE_BIAS) : "memory", "cc"); return result; diff --git a/include/linux/random.h b/include/linux/random.h index e47e533..3d6e981 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -95,27 +95,27 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed) #ifdef CONFIG_ARCH_RANDOM # include #else -static inline int arch_get_random_long(unsigned long *v) +static inline bool arch_get_random_long(unsigned long *v) { return 0; } -static inline int arch_get_random_int(unsigned int *v) +static inline bool arch_get_random_int(unsigned int *v) { return 0; } -static inline int arch_has_random(void) +static inline bool arch_has_random(void) { return 0; } -static inline int arch_get_random_seed_long(unsigned long *v) +static inline bool arch_get_random_seed_long(unsigned long *v) { return 0; } -static inline int arch_get_random_seed_int(unsigned int *v) +static inline bool arch_get_random_seed_int(unsigned int *v) { return 0; } -static inline int arch_has_random_seed(void) +static inline bool arch_has_random_seed(void) { return 0; } -- cgit v1.1 From 18fe58229d80c7f4f138a07e84ba608e1ebd232b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:39 -0700 Subject: x86, asm: change the GEN_*_RMWcc() macros to not quote the condition Change the lexical defintion of the GEN_*_RMWcc() macros to not take the condition code as a quoted string. This will help support changing them to use the new __GCC_ASM_FLAG_OUTPUTS__ feature in a subsequent patch. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-4-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/atomic.h | 8 ++++---- arch/x86/include/asm/atomic64_64.h | 8 ++++---- arch/x86/include/asm/bitops.h | 6 +++--- arch/x86/include/asm/local.h | 8 ++++---- arch/x86/include/asm/preempt.h | 2 +- arch/x86/include/asm/rmwcc.h | 4 ++-- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 17d8812..7322c15 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -77,7 +77,7 @@ static __always_inline void atomic_sub(int i, atomic_t *v) */ static __always_inline bool atomic_sub_and_test(int i, atomic_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e"); + GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e); } /** @@ -114,7 +114,7 @@ static __always_inline void atomic_dec(atomic_t *v) */ static __always_inline bool atomic_dec_and_test(atomic_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); + GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e); } /** @@ -127,7 +127,7 @@ static __always_inline bool atomic_dec_and_test(atomic_t *v) */ static __always_inline bool atomic_inc_and_test(atomic_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e"); + GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e); } /** @@ -141,7 +141,7 @@ static __always_inline bool atomic_inc_and_test(atomic_t *v) */ static __always_inline bool atomic_add_negative(int i, atomic_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s"); + GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s); } /** diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 4f881d7..57bf925 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -72,7 +72,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) */ static inline bool atomic64_sub_and_test(long i, atomic64_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", "e"); + GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e); } /** @@ -111,7 +111,7 @@ static __always_inline void atomic64_dec(atomic64_t *v) */ static inline bool atomic64_dec_and_test(atomic64_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e"); + GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e); } /** @@ -124,7 +124,7 @@ static inline bool atomic64_dec_and_test(atomic64_t *v) */ static inline bool atomic64_inc_and_test(atomic64_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e"); + GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e); } /** @@ -138,7 +138,7 @@ static inline bool atomic64_inc_and_test(atomic64_t *v) */ static inline bool atomic64_add_negative(long i, atomic64_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", "s"); + GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s); } /** diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 8cbb7f4..ed8f485 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -203,7 +203,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) */ static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c); } /** @@ -249,7 +249,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * */ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c); } /** @@ -302,7 +302,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon */ static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c); } static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 0cdc65b..7511978 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -52,7 +52,7 @@ static inline void local_sub(long i, local_t *l) */ static inline bool local_sub_and_test(long i, local_t *l) { - GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", "e"); + GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", e); } /** @@ -65,7 +65,7 @@ static inline bool local_sub_and_test(long i, local_t *l) */ static inline bool local_dec_and_test(local_t *l) { - GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e"); + GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", e); } /** @@ -78,7 +78,7 @@ static inline bool local_dec_and_test(local_t *l) */ static inline bool local_inc_and_test(local_t *l) { - GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e"); + GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", e); } /** @@ -92,7 +92,7 @@ static inline bool local_inc_and_test(local_t *l) */ static inline bool local_add_negative(long i, local_t *l) { - GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", "s"); + GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", s); } /** diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index d397deb..17f2186 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -81,7 +81,7 @@ static __always_inline void __preempt_count_sub(int val) */ static __always_inline bool __preempt_count_dec_and_test(void) { - GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); + GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e); } /* diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index a15b73d..e3264c4 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -5,7 +5,7 @@ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ - asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ + asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ : : "m" (var), ## __VA_ARGS__ \ : "memory" : cc_label); \ return 0; \ @@ -24,7 +24,7 @@ cc_label: \ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ bool c; \ - asm volatile (fullop "; set" cc " %1" \ + asm volatile (fullop "; set" #cc " %1" \ : "+m" (var), "=qm" (c) \ : __VA_ARGS__ : "memory"); \ return c; \ -- cgit v1.1 From ff3554b409b82d349f71e9d7082648b7b0a1a5bb Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:40 -0700 Subject: x86, asm: define CC_SET() and CC_OUT() macros The CC_SET() and CC_OUT() macros can be used together to take advantage of the new __GCC_ASM_FLAG_OUTPUTS__ feature in gcc 6+ while remaining backwards compatible. CC_SET() generates a SET instruction on older compilers; CC_OUT() makes sure the output is received in the correct variable. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-5-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/asm.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index f5063b6..7acb51c 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -42,6 +42,18 @@ #define _ASM_SI __ASM_REG(si) #define _ASM_DI __ASM_REG(di) +/* + * Macros to generate condition code outputs from inline assembly, + * The output operand must be type "bool". + */ +#ifdef __GCC_ASM_FLAG_OUTPUTS__ +# define CC_SET(c) "\n\t/* output condition code " #c "*/\n" +# define CC_OUT(c) "=@cc" #c +#else +# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n" +# define CC_OUT(c) [_cc_ ## c] "=qm" +#endif + /* Exception table entry */ #ifdef __ASSEMBLY__ # define _ASM_EXTABLE_HANDLE(from, to, handler) \ -- cgit v1.1 From ba741e356c49bfce0adcfa851080666870867f6b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:41 -0700 Subject: x86, asm: change GEN_*_RMWcc() to use CC_SET()/CC_OUT() Change the GEN_*_RMWcc() macros to use the CC_SET()/CC_OUT() macros defined in , and disable the use of asm goto if __GCC_ASM_FLAG_OUTPUTS__ is enabled. This allows gcc to receive the flags output directly in gcc 6+. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-6-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/rmwcc.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index e3264c4..661dd30 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -1,7 +1,9 @@ #ifndef _ASM_X86_RMWcc #define _ASM_X86_RMWcc -#ifdef CC_HAVE_ASM_GOTO +#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO) + +/* Use asm goto */ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ @@ -19,13 +21,15 @@ cc_label: \ #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val)) -#else /* !CC_HAVE_ASM_GOTO */ +#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ + +/* Use flags output or a set instruction */ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ bool c; \ - asm volatile (fullop "; set" #cc " %1" \ - : "+m" (var), "=qm" (c) \ + asm volatile (fullop ";" CC_SET(cc) \ + : "+m" (var), CC_OUT(cc) (c) \ : __VA_ARGS__ : "memory"); \ return c; \ } while (0) @@ -36,6 +40,6 @@ do { \ #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val)) -#endif /* CC_HAVE_ASM_GOTO */ +#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ #endif /* _ASM_X86_RMWcc */ -- cgit v1.1 From 86b61240d4c233b440cd29daf0baa440daf4a148 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:42 -0700 Subject: x86, asm: Use CC_SET()/CC_OUT() in Remove open-coded uses of set instructions to use CC_SET()/CC_OUT() in . Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-7-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/bitops.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index ed8f485..68557f52 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -233,8 +233,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * bool oldbit; asm("bts %2,%1\n\t" - "setc %0" - : "=qm" (oldbit), ADDR + CC_SET(c) + : CC_OUT(c) (oldbit), ADDR : "Ir" (nr)); return oldbit; } @@ -273,8 +273,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long bool oldbit; asm volatile("btr %2,%1\n\t" - "setc %0" - : "=qm" (oldbit), ADDR + CC_SET(c) + : CC_OUT(c) (oldbit), ADDR : "Ir" (nr)); return oldbit; } @@ -285,8 +285,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon bool oldbit; asm volatile("btc %2,%1\n\t" - "setc %0" - : "=qm" (oldbit), ADDR + CC_SET(c) + : CC_OUT(c) (oldbit), ADDR : "Ir" (nr) : "memory"); return oldbit; @@ -316,8 +316,8 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l bool oldbit; asm volatile("bt %2,%1\n\t" - "setc %0" - : "=qm" (oldbit) + CC_SET(c) + : CC_OUT(c) (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); return oldbit; -- cgit v1.1 From 64be6d36f5674f3424d1901772f76e21874f4954 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:43 -0700 Subject: x86, asm: Use CC_SET()/CC_OUT() in Remove open-coded uses of set instructions to use CC_SET()/CC_OUT() in . Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-8-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/percpu.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 184d7f3..e02e3f8 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -511,8 +511,9 @@ do { \ #define x86_test_and_clear_bit_percpu(bit, var) \ ({ \ bool old__; \ - asm volatile("btr %2,"__percpu_arg(1)"\n\tsetc %0" \ - : "=qm" (old__), "+m" (var) \ + asm volatile("btr %2,"__percpu_arg(1)"\n\t" \ + CC_SET(c) \ + : CC_OUT(c) (old__), "+m" (var) \ : "dIr" (bit)); \ old__; \ }) @@ -535,8 +536,8 @@ static inline bool x86_this_cpu_variable_test_bit(int nr, bool oldbit; asm volatile("bt "__percpu_arg(2)",%1\n\t" - "setc %0" - : "=qm" (oldbit) + CC_SET(c) + : CC_OUT(c) (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); return oldbit; -- cgit v1.1 From 35ccfb7114e2f0f454f264c049b03c31f4c6bbc0 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:44 -0700 Subject: x86, asm: Use CC_SET()/CC_OUT() in Remove open-coded uses of set instructions to use CC_SET()/CC_OUT() in . Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-9-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/rwsem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index c508770..1e8be26 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -149,10 +149,10 @@ static inline bool __down_write_trylock(struct rw_semaphore *sem) LOCK_PREFIX " cmpxchg %2,%0\n\t" " jnz 1b\n\t" "2:\n\t" - " sete %3\n\t" + CC_SET(e) "# ending __down_write_trylock\n\t" : "+m" (sem->count), "=&a" (tmp0), "=&r" (tmp1), - "=qm" (result) + CC_OUT(e) (result) : "er" (RWSEM_ACTIVE_WRITE_BIAS) : "memory", "cc"); return result; -- cgit v1.1 From 66928b4eb92dfb6d87c204238057b9278b36452b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:45 -0700 Subject: x86, asm, boot: Use CC_SET()/CC_OUT() in arch/x86/boot/boot.h Remove open-coded uses of set instructions to use CC_SET()/CC_OUT() in arch/x86/boot/boot.h. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-10-git-send-email-hpa@linux.intel.com Reviewed-by: Andy Lutomirski Reviewed-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) --- arch/x86/boot/boot.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 2edb2d5..7c1495f 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -24,6 +24,7 @@ #include #include #include +#include #include "bitops.h" #include "ctype.h" #include "cpuflags.h" @@ -179,15 +180,15 @@ static inline void wrgs32(u32 v, addr_t addr) static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len) { bool diff; - asm volatile("fs; repe; cmpsb; setnz %0" - : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); + asm volatile("fs; repe; cmpsb" CC_SET(nz) + : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; } static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len) { bool diff; - asm volatile("gs; repe; cmpsb; setnz %0" - : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); + asm volatile("gs; repe; cmpsb" CC_SET(nz) + : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; } -- cgit v1.1 From 3b290398638ee4e57f1fb2e35c02005cba9a737f Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 8 Jun 2016 12:38:46 -0700 Subject: x86, asm: Use CC_SET()/CC_OUT() and static_cpu_has() in archrandom.h Use CC_SET()/CC_OUT() and static_cpu_has(). This produces code good enough to eliminate ad hoc use of alternatives in , greatly simplifying the code. While we are at it, make x86_init_rdrand() compile out completely if we don't need it. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1465414726-197858-11-git-send-email-hpa@linux.intel.com v2: fix a conflict between and discovered by Ingo Molnar. There are a few places in x86-specific code where we need all of even when CONFIG_ARCH_RANDOM is disabled, so does not suffice. --- arch/x86/include/asm/archrandom.h | 128 ++++++++++++++++++-------------------- arch/x86/kernel/cpu/rdrand.c | 4 +- 2 files changed, 62 insertions(+), 70 deletions(-) diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index ab6f599..5b0579a 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -25,8 +25,6 @@ #include #include -#include -#include #define RDRAND_RETRY_LOOPS 10 @@ -40,97 +38,91 @@ # define RDSEED_LONG RDSEED_INT #endif -#ifdef CONFIG_ARCH_RANDOM +/* Unconditional execution of RDRAND and RDSEED */ -/* Instead of arch_get_random_long() when alternatives haven't run. */ static inline bool rdrand_long(unsigned long *v) { - int ok; - asm volatile("1: " RDRAND_LONG "\n\t" - "jc 2f\n\t" - "decl %0\n\t" - "jnz 1b\n\t" - "2:" - : "=r" (ok), "=a" (*v) - : "0" (RDRAND_RETRY_LOOPS)); - return !!ok; + bool ok; + unsigned int retry = RDRAND_RETRY_LOOPS; + do { + asm volatile(RDRAND_LONG "\n\t" + CC_SET(c) + : CC_OUT(c) (ok), "=a" (*v)); + if (ok) + return true; + } while (--retry); + return false; +} + +static inline bool rdrand_int(unsigned int *v) +{ + bool ok; + unsigned int retry = RDRAND_RETRY_LOOPS; + do { + asm volatile(RDRAND_INT "\n\t" + CC_SET(c) + : CC_OUT(c) (ok), "=a" (*v)); + if (ok) + return true; + } while (--retry); + return false; } -/* A single attempt at RDSEED */ static inline bool rdseed_long(unsigned long *v) { bool ok; asm volatile(RDSEED_LONG "\n\t" - "setc %0" - : "=qm" (ok), "=a" (*v)); + CC_SET(c) + : CC_OUT(c) (ok), "=a" (*v)); return ok; } -#define GET_RANDOM(name, type, rdrand, nop) \ -static inline bool name(type *v) \ -{ \ - int ok; \ - alternative_io("movl $0, %0\n\t" \ - nop, \ - "\n1: " rdrand "\n\t" \ - "jc 2f\n\t" \ - "decl %0\n\t" \ - "jnz 1b\n\t" \ - "2:", \ - X86_FEATURE_RDRAND, \ - ASM_OUTPUT2("=r" (ok), "=a" (*v)), \ - "0" (RDRAND_RETRY_LOOPS)); \ - return !!ok; \ -} - -#define GET_SEED(name, type, rdseed, nop) \ -static inline bool name(type *v) \ -{ \ - bool ok; \ - alternative_io("movb $0, %0\n\t" \ - nop, \ - rdseed "\n\t" \ - "setc %0", \ - X86_FEATURE_RDSEED, \ - ASM_OUTPUT2("=q" (ok), "=a" (*v))); \ - return ok; \ +static inline bool rdseed_int(unsigned int *v) +{ + bool ok; + asm volatile(RDSEED_INT "\n\t" + CC_SET(c) + : CC_OUT(c) (ok), "=a" (*v)); + return ok; } -#ifdef CONFIG_X86_64 - -GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5); -GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4); - -GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP5); -GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4); - -#else - -GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3); -GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); - -GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP4); -GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4); - -#endif /* CONFIG_X86_64 */ - +/* Conditional execution based on CPU type */ #define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND) #define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED) -#else +/* + * These are the generic interfaces; they must not be declared if the + * stubs in are to be invoked, + * i.e. CONFIG_ARCH_RANDOM is not defined. + */ +#ifdef CONFIG_ARCH_RANDOM -static inline bool rdrand_long(unsigned long *v) +static inline bool arch_get_random_long(unsigned long *v) { - return 0; + return arch_has_random() ? rdrand_long(v) : false; } -static inline bool rdseed_long(unsigned long *v) +static inline bool arch_get_random_int(unsigned int *v) { - return 0; + return arch_has_random() ? rdrand_int(v) : false; } -#endif /* CONFIG_ARCH_RANDOM */ +static inline bool arch_get_random_seed_long(unsigned long *v) +{ + return arch_has_random_seed() ? rdseed_long(v) : false; +} + +static inline bool arch_get_random_seed_int(unsigned int *v) +{ + return arch_has_random_seed() ? rdseed_int(v) : false; +} extern void x86_init_rdrand(struct cpuinfo_x86 *c); +#else /* !CONFIG_ARCH_RANDOM */ + +static inline void x86_init_rdrand(struct cpuinfo_x86 *c) { } + +#endif /* !CONFIG_ARCH_RANDOM */ + #endif /* ASM_X86_ARCHRANDOM_H */ diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index f6f50c4..cfa97ff 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -39,9 +39,9 @@ __setup("nordrand", x86_rdrand_setup); */ #define SANITY_CHECK_LOOPS 8 +#ifdef CONFIG_ARCH_RANDOM void x86_init_rdrand(struct cpuinfo_x86 *c) { -#ifdef CONFIG_ARCH_RANDOM unsigned long tmp; int i; @@ -55,5 +55,5 @@ void x86_init_rdrand(struct cpuinfo_x86 *c) return; } } -#endif } +#endif -- cgit v1.1 From 99158f10e91768d34c5004c40c42f802b719bcae Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 24 May 2016 15:48:38 -0700 Subject: x86/xen: Simplify set_aliased_prot() A year ago, via the following commit: aa1acff356bb ("x86/xen: Probe target addresses in set_aliased_prot() before the hypercall") I added an explicit probe to work around a hypercall issue. The code can be simplified by using probe_kernel_read(). No change in functionality. Signed-off-by: Andy Lutomirski Reviewed-by: Andrew Cooper Acked-by: David Vrabel Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Vrabel Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jan Beulich Cc: Kees Cook Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: xen-devel Link: http://lkml.kernel.org/r/0706f1a2538e481194514197298cca6b5e3f2638.1464129798.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 760789a..0f87db2 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -521,9 +521,7 @@ static void set_aliased_prot(void *v, pgprot_t prot) preempt_disable(); - pagefault_disable(); /* Avoid warnings due to being atomic. */ - __get_user(dummy, (unsigned char __user __force *)v); - pagefault_enable(); + probe_kernel_read(&dummy, v, 1); if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); -- cgit v1.1 From d72fea6214f3fee2355d8cb6c51348059d98768b Mon Sep 17 00:00:00 2001 From: Ajit Pandey Date: Mon, 13 Jun 2016 13:35:15 +0100 Subject: ASoC: wm5110: Add missing route from OUT3R to SYSCLK Output 3 is stereo on wm5110 and all inputs/outputs should have a connection to SYSCLK. This patch adds the missing DAPM route. Signed-off-by: Ajit Pandey Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm5110.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index b5820e4..d54f1b4 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -1723,6 +1723,7 @@ static const struct snd_soc_dapm_route wm5110_dapm_routes[] = { { "OUT2L", NULL, "SYSCLK" }, { "OUT2R", NULL, "SYSCLK" }, { "OUT3L", NULL, "SYSCLK" }, + { "OUT3R", NULL, "SYSCLK" }, { "OUT4L", NULL, "SYSCLK" }, { "OUT4R", NULL, "SYSCLK" }, { "OUT5L", NULL, "SYSCLK" }, -- cgit v1.1 From 19edeb30c55079c9bba4573d700bbcafed03303c Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 13 Jun 2016 13:35:16 +0100 Subject: ASoC: wm5102: Correct supported channels on trace compressed DAI The audio trace firmware on wm5102 only supports 4 channels correct the DAI driver structure to reflect this. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm5102.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c index da60e3f..e7fe6b7 100644 --- a/sound/soc/codecs/wm5102.c +++ b/sound/soc/codecs/wm5102.c @@ -1872,7 +1872,7 @@ static struct snd_soc_dai_driver wm5102_dai[] = { .capture = { .stream_name = "Audio Trace CPU", .channels_min = 1, - .channels_max = 6, + .channels_max = 4, .rates = WM5102_RATES, .formats = WM5102_FORMATS, }, -- cgit v1.1 From 8d950d2fb23b696d393020486ab6a350bcb2c582 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Thu, 9 Jun 2016 13:24:19 -0400 Subject: MAINTAINERS: Update the Calgary IOMMU entry Update the contact info for Muli, clean-up my name, and update the mailing list to the IOMMU mailing list. Signed-off-by: Jon Mason Cc: Andrew Morton Cc: Bartlomiej Zolnierkiewicz Cc: Greg Kroah-Hartman Cc: Krzysztof Kozlowski Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1465493059-11840-2-git-send-email-jdmason@kudzu.us Signed-off-by: Ingo Molnar --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 16700e4..f589a9d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2773,9 +2773,9 @@ F: include/net/caif/ F: net/caif/ CALGARY x86-64 IOMMU -M: Muli Ben-Yehuda -M: "Jon D. Mason" -L: discuss@x86-64.org +M: Muli Ben-Yehuda +M: Jon Mason +L: iommu@lists.linux-foundation.org S: Maintained F: arch/x86/kernel/pci-calgary_64.c F: arch/x86/kernel/tce_64.c -- cgit v1.1 From f0702555b16d31d61dc758fac6efb994c3fe3ec6 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 9 Jun 2016 13:57:04 -0700 Subject: x86/vdso/32: Assemble sigreturn.S separately sigreturn.S was historically included by the various __kernel_vsyscall implementations due to assumptions about all the 32-bit vDSO images having the sigreturn symbols at the same address. Those assumptions were removed in v3.16, and as of v4.4, there is only a single 32-bit vDSO left. Simplify the build process by assembling sigreturn.S into a normal object file. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/d7b6dfde3c7397aa26977320da90448363b5a7e9.1465505753.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/Makefile | 5 +++-- arch/x86/entry/vdso/vdso32/sigreturn.S | 8 -------- arch/x86/entry/vdso/vdso32/system_call.S | 7 +------ 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 253b72e..68b63fd 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -134,7 +134,7 @@ VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1 override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ targets += vdso32/vdso32.lds -targets += vdso32/note.o vdso32/vclock_gettime.o vdso32/system_call.o +targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o targets += vdso32/vclock_gettime.o KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO @@ -156,7 +156,8 @@ $(obj)/vdso32.so.dbg: FORCE \ $(obj)/vdso32/vdso32.lds \ $(obj)/vdso32/vclock_gettime.o \ $(obj)/vdso32/note.o \ - $(obj)/vdso32/system_call.o + $(obj)/vdso32/system_call.o \ + $(obj)/vdso32/sigreturn.o $(call if_changed,vdso) # diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S index d7ec4e2..20633e0 100644 --- a/arch/x86/entry/vdso/vdso32/sigreturn.S +++ b/arch/x86/entry/vdso/vdso32/sigreturn.S @@ -1,11 +1,3 @@ -/* - * Common code for the sigreturn entry points in vDSO images. - * So far this code is the same for both int80 and sysenter versions. - * This file is #include'd by int80.S et al to define them first thing. - * The kernel assumes that the addresses of these routines are constant - * for all vDSO implementations. - */ - #include #include #include diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index 0109ac6..ed4bc97 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -2,16 +2,11 @@ * AT_SYSINFO entry point */ +#include #include #include #include -/* - * First get the common code for the sigreturn entry points. - * This must come first. - */ -#include "sigreturn.S" - .text .globl __kernel_vsyscall .type __kernel_vsyscall,@function -- cgit v1.1 From a4455082dc6f0b5d51a23523f77600e8ede47c79 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 8 Jun 2016 10:25:33 -0700 Subject: x86/signals: Add missing signal_compat code for x86 features The 32-bit siginfo is a different binary format than the 64-bit one. So, when running 32-bit binaries on 64-bit kernels, we have to convert the kernel's 64-bit version to a 32-bit version that userspace can grok. We've added a few features to siginfo over the past few years and neglected to add them to arch/x86/kernel/signal_compat.c: 1. The si_addr_lsb used in SIGBUS's sent for machine checks 2. The upper/lower bounds for MPX SIGSEGV faults 3. The protection key for pkey faults I caught this with some protection keys unit tests and realized it affected a few more features. This was tested only with my protection keys patch that looks for a proper value in si_pkey. I didn't actually test the machine check or MPX code. Signed-off-by: Dave Hansen Cc: Al Viro Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/20160608172533.F8F05637@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/compat.h | 11 +++++++++++ arch/x86/kernel/signal_compat.c | 15 +++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 5a3b2c1..a188061 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -40,6 +40,7 @@ typedef s32 compat_long_t; typedef s64 __attribute__((aligned(4))) compat_s64; typedef u32 compat_uint_t; typedef u32 compat_ulong_t; +typedef u32 compat_u32; typedef u64 __attribute__((aligned(4))) compat_u64; typedef u32 compat_uptr_t; @@ -181,6 +182,16 @@ typedef struct compat_siginfo { /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ struct { unsigned int _addr; /* faulting insn/memory ref. */ + short int _addr_lsb; /* Valid LSB of the reported address. */ + union { + /* used when si_code=SEGV_BNDERR */ + struct { + compat_uptr_t _lower; + compat_uptr_t _upper; + } _addr_bnd; + /* used when si_code=SEGV_PKUERR */ + compat_u32 _pkey; + }; } _sigfault; /* SIGPOLL */ diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index dc3c0b1..5335ad9 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -32,6 +32,21 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) &to->_sifields._pad[0]); switch (from->si_code >> 16) { case __SI_FAULT >> 16: + if (from->si_signo == SIGBUS && + (from->si_code == BUS_MCEERR_AR || + from->si_code == BUS_MCEERR_AO)) + put_user_ex(from->si_addr_lsb, &to->si_addr_lsb); + + if (from->si_signo == SIGSEGV) { + if (from->si_code == SEGV_BNDERR) { + compat_uptr_t lower = (unsigned long)&to->si_lower; + compat_uptr_t upper = (unsigned long)&to->si_upper; + put_user_ex(lower, &to->si_lower); + put_user_ex(upper, &to->si_upper); + } + if (from->si_code == SEGV_PKUERR) + put_user_ex(from->si_pkey, &to->si_pkey); + } break; case __SI_SYS >> 16: put_user_ex(from->si_syscall, &to->si_syscall); -- cgit v1.1 From 02e8fda2cc00419a11cf38199afea4c0d7172be8 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 8 Jun 2016 10:25:34 -0700 Subject: x86/signals: Add build-time checks to the siginfo compat code There were at least 3 features added to the __SI_FAULT area of the siginfo struct that did not make it to the compat siginfo: 1. The si_addr_lsb used in SIGBUS's sent for machine checks 2. The upper/lower bounds for MPX SIGSEGV faults 3. The protection key for pkey faults There was also some turmoil when I was attempting to add the pkey field because it needs to be a fixed size on 32 and 64-bit and not have any alignment constraints. This patch adds some compile-time checks to the compat code to make it harder to screw this up. Basically, the checks are supposed to trip any time someone changes the siginfo structure. That sounds bad, but it's what we want. If someone changes siginfo, we want them to also be _forced_ to go look at the compat code. The details are in the comments. Signed-off-by: Dave Hansen Cc: Al Viro Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/20160608172534.C73DAFC3@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_compat.c | 93 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 5335ad9..b44564b 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -1,11 +1,104 @@ #include #include +/* + * The compat_siginfo_t structure and handing code is very easy + * to break in several ways. It must always be updated when new + * updates are made to the main siginfo_t, and + * copy_siginfo_to_user32() must be updated when the + * (arch-independent) copy_siginfo_to_user() is updated. + * + * It is also easy to put a new member in the compat_siginfo_t + * which has implicit alignment which can move internal structure + * alignment around breaking the ABI. This can happen if you, + * for instance, put a plain 64-bit value in there. + */ +static inline void signal_compat_build_tests(void) +{ + int _sifields_offset = offsetof(compat_siginfo_t, _sifields); + + /* + * If adding a new si_code, there is probably new data in + * the siginfo. Make sure folks bumping the si_code + * limits also have to look at this code. Make sure any + * new fields are handled in copy_siginfo_to_user32()! + */ + BUILD_BUG_ON(NSIGILL != 8); + BUILD_BUG_ON(NSIGFPE != 8); + BUILD_BUG_ON(NSIGSEGV != 4); + BUILD_BUG_ON(NSIGBUS != 5); + BUILD_BUG_ON(NSIGTRAP != 4); + BUILD_BUG_ON(NSIGCHLD != 6); + BUILD_BUG_ON(NSIGSYS != 1); + + /* This is part of the ABI and can never change in size: */ + BUILD_BUG_ON(sizeof(compat_siginfo_t) != 128); + /* + * The offsets of all the (unioned) si_fields are fixed + * in the ABI, of course. Make sure none of them ever + * move and are always at the beginning: + */ + BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int)); +#define CHECK_CSI_OFFSET(name) BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name)) + + /* + * Ensure that the size of each si_field never changes. + * If it does, it is a sign that the + * copy_siginfo_to_user32() code below needs to updated + * along with the size in the CHECK_SI_SIZE(). + * + * We repeat this check for both the generic and compat + * siginfos. + * + * Note: it is OK for these to grow as long as the whole + * structure stays within the padding size (checked + * above). + */ +#define CHECK_CSI_SIZE(name, size) BUILD_BUG_ON(size != sizeof(((compat_siginfo_t *)0)->_sifields.name)) +#define CHECK_SI_SIZE(name, size) BUILD_BUG_ON(size != sizeof(((siginfo_t *)0)->_sifields.name)) + + CHECK_CSI_OFFSET(_kill); + CHECK_CSI_SIZE (_kill, 2*sizeof(int)); + CHECK_SI_SIZE (_kill, 2*sizeof(int)); + + CHECK_CSI_OFFSET(_timer); + CHECK_CSI_SIZE (_timer, 5*sizeof(int)); + CHECK_SI_SIZE (_timer, 6*sizeof(int)); + + CHECK_CSI_OFFSET(_rt); + CHECK_CSI_SIZE (_rt, 3*sizeof(int)); + CHECK_SI_SIZE (_rt, 4*sizeof(int)); + + CHECK_CSI_OFFSET(_sigchld); + CHECK_CSI_SIZE (_sigchld, 5*sizeof(int)); + CHECK_SI_SIZE (_sigchld, 8*sizeof(int)); + + CHECK_CSI_OFFSET(_sigchld_x32); + CHECK_CSI_SIZE (_sigchld_x32, 7*sizeof(int)); + /* no _sigchld_x32 in the generic siginfo_t */ + + CHECK_CSI_OFFSET(_sigfault); + CHECK_CSI_SIZE (_sigfault, 4*sizeof(int)); + CHECK_SI_SIZE (_sigfault, 8*sizeof(int)); + + CHECK_CSI_OFFSET(_sigpoll); + CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int)); + CHECK_SI_SIZE (_sigpoll, 4*sizeof(int)); + + CHECK_CSI_OFFSET(_sigsys); + CHECK_CSI_SIZE (_sigsys, 3*sizeof(int)); + CHECK_SI_SIZE (_sigsys, 4*sizeof(int)); + + /* any new si_fields should be added here */ +} + int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) { int err = 0; bool ia32 = test_thread_flag(TIF_IA32); + signal_compat_build_tests(); + if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) return -EFAULT; -- cgit v1.1 From e754aedc26efde6baef2d7824fbecf998a5510a4 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 8 Jun 2016 10:25:35 -0700 Subject: x86/mpx, selftests: Add MPX self test I've had this code for a while, but never submitted it upstream. Now that Skylake hardware is out in the wild, folks can actually run this for real. It tests the following: 1. The MPX hardware is enabled by the kernel and doing what it is supposed to 2. The MPX management code is present and enabled in the kernel 3. MPX Signal handling 4. The MPX bounds table population code (on-demand population) 5. The MPX bounds table unmapping code (kernel-initiated freeing when unused) This has also caught bugs in the XSAVE code because MPX state is saved/restored with XSAVE. I'm submitting it now because it would have caught the recent issues with the compat_siginfo code not being properly augmented when new siginfo state is added. Signed-off-by: Dave Hansen Cc: Al Viro Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Shuah Khan Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160608172535.5B40B0EE@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/mpx-debug.h | 14 + tools/testing/selftests/x86/mpx-dig.c | 498 +++++++++ tools/testing/selftests/x86/mpx-hw.h | 123 +++ tools/testing/selftests/x86/mpx-mini-test.c | 1585 +++++++++++++++++++++++++++ tools/testing/selftests/x86/mpx-mm.h | 9 + 6 files changed, 2230 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/x86/mpx-debug.h create mode 100644 tools/testing/selftests/x86/mpx-dig.c create mode 100644 tools/testing/selftests/x86/mpx-hw.h create mode 100644 tools/testing/selftests/x86/mpx-mini-test.c create mode 100644 tools/testing/selftests/x86/mpx-mm.h diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index c73425de..abe9c35 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -5,7 +5,7 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \ - check_initial_reg_state sigreturn ldt_gdt iopl + check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/mpx-debug.h b/tools/testing/selftests/x86/mpx-debug.h new file mode 100644 index 0000000..9230981 --- /dev/null +++ b/tools/testing/selftests/x86/mpx-debug.h @@ -0,0 +1,14 @@ +#ifndef _MPX_DEBUG_H +#define _MPX_DEBUG_H + +#ifndef DEBUG_LEVEL +#define DEBUG_LEVEL 0 +#endif +#define dprintf_level(level, args...) do { if(level <= DEBUG_LEVEL) printf(args); } while(0) +#define dprintf1(args...) dprintf_level(1, args) +#define dprintf2(args...) dprintf_level(2, args) +#define dprintf3(args...) dprintf_level(3, args) +#define dprintf4(args...) dprintf_level(4, args) +#define dprintf5(args...) dprintf_level(5, args) + +#endif /* _MPX_DEBUG_H */ diff --git a/tools/testing/selftests/x86/mpx-dig.c b/tools/testing/selftests/x86/mpx-dig.c new file mode 100644 index 0000000..ce85356 --- /dev/null +++ b/tools/testing/selftests/x86/mpx-dig.c @@ -0,0 +1,498 @@ +/* + * Written by Dave Hansen + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mpx-debug.h" +#include "mpx-mm.h" +#include "mpx-hw.h" + +unsigned long bounds_dir_global; + +#define mpx_dig_abort() __mpx_dig_abort(__FILE__, __func__, __LINE__) +static void inline __mpx_dig_abort(const char *file, const char *func, int line) +{ + fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func); + printf("MPX dig abort @ %s::%d in %s()\n", file, line, func); + abort(); +} + +/* + * run like this (BDIR finds the probably bounds directory): + * + * BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \ + * | head -1 | awk -F- '{print $1}')"; + * ./mpx-dig $pid 0x$BDIR + * + * NOTE: + * assumes that the only 2097152-kb VMA is the bounds dir + */ + +long nr_incore(void *ptr, unsigned long size_bytes) +{ + int i; + long ret = 0; + long vec_len = size_bytes / PAGE_SIZE; + unsigned char *vec = malloc(vec_len); + int incore_ret; + + if (!vec) + mpx_dig_abort(); + + incore_ret = mincore(ptr, size_bytes, vec); + if (incore_ret) { + printf("mincore ret: %d\n", incore_ret); + perror("mincore"); + mpx_dig_abort(); + } + for (i = 0; i < vec_len; i++) + ret += vec[i]; + free(vec); + return ret; +} + +int open_proc(int pid, char *file) +{ + static char buf[100]; + int fd; + + snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file); + fd = open(&buf[0], O_RDONLY); + if (fd < 0) + perror(buf); + + return fd; +} + +struct vaddr_range { + unsigned long start; + unsigned long end; +}; +struct vaddr_range *ranges; +int nr_ranges_allocated; +int nr_ranges_populated; +int last_range = -1; + +int __pid_load_vaddrs(int pid) +{ + int ret = 0; + int proc_maps_fd = open_proc(pid, "maps"); + char linebuf[10000]; + unsigned long start; + unsigned long end; + char rest[1000]; + FILE *f = fdopen(proc_maps_fd, "r"); + + if (!f) + mpx_dig_abort(); + nr_ranges_populated = 0; + while (!feof(f)) { + char *readret = fgets(linebuf, sizeof(linebuf), f); + int parsed; + + if (readret == NULL) { + if (feof(f)) + break; + mpx_dig_abort(); + } + + parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest); + if (parsed != 3) + mpx_dig_abort(); + + dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest); + if (nr_ranges_populated >= nr_ranges_allocated) { + ret = -E2BIG; + break; + } + ranges[nr_ranges_populated].start = start; + ranges[nr_ranges_populated].end = end; + nr_ranges_populated++; + } + last_range = -1; + fclose(f); + close(proc_maps_fd); + return ret; +} + +int pid_load_vaddrs(int pid) +{ + int ret; + + dprintf2("%s(%d)\n", __func__, pid); + if (!ranges) { + nr_ranges_allocated = 4; + ranges = malloc(nr_ranges_allocated * sizeof(ranges[0])); + dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid, + nr_ranges_allocated, ranges); + assert(ranges != NULL); + } + do { + ret = __pid_load_vaddrs(pid); + if (!ret) + break; + if (ret == -E2BIG) { + dprintf2("%s(%d) need to realloc\n", __func__, pid); + nr_ranges_allocated *= 2; + ranges = realloc(ranges, + nr_ranges_allocated * sizeof(ranges[0])); + dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, + pid, nr_ranges_allocated, ranges); + assert(ranges != NULL); + dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated); + } + } while (1); + + dprintf2("%s(%d) done\n", __func__, pid); + + return ret; +} + +static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r) +{ + if (vaddr < r->start) + return 0; + if (vaddr >= r->end) + return 0; + return 1; +} + +static inline int vaddr_mapped_by_range(unsigned long vaddr) +{ + int i; + + if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range])) + return 1; + + for (i = 0; i < nr_ranges_populated; i++) { + struct vaddr_range *r = &ranges[i]; + + if (vaddr_in_range(vaddr, r)) + continue; + last_range = i; + return 1; + } + return 0; +} + +const int bt_entry_size_bytes = sizeof(unsigned long) * 4; + +void *read_bounds_table_into_buf(unsigned long table_vaddr) +{ +#ifdef MPX_DIG_STANDALONE + static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES]; + off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET); + if (seek_ret != table_vaddr) + mpx_dig_abort(); + + int read_ret = read(fd, &bt_buf, sizeof(bt_buf)); + if (read_ret != sizeof(bt_buf)) + mpx_dig_abort(); + return &bt_buf; +#else + return (void *)table_vaddr; +#endif +} + +int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr, + unsigned long bde_vaddr) +{ + unsigned long offset_inside_bt; + int nr_entries = 0; + int do_abort = 0; + char *bt_buf; + + dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n", + __func__, base_controlled_vaddr, bde_vaddr); + + bt_buf = read_bounds_table_into_buf(table_vaddr); + + dprintf4("%s() read done\n", __func__); + + for (offset_inside_bt = 0; + offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES; + offset_inside_bt += bt_entry_size_bytes) { + unsigned long bt_entry_index; + unsigned long bt_entry_controls; + unsigned long this_bt_entry_for_vaddr; + unsigned long *bt_entry_buf; + int i; + + dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__, + offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES); + bt_entry_buf = (void *)&bt_buf[offset_inside_bt]; + if (!bt_buf) { + printf("null bt_buf\n"); + mpx_dig_abort(); + } + if (!bt_entry_buf) { + printf("null bt_entry_buf\n"); + mpx_dig_abort(); + } + dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__, + bt_entry_buf); + if (!bt_entry_buf[0] && + !bt_entry_buf[1] && + !bt_entry_buf[2] && + !bt_entry_buf[3]) + continue; + + nr_entries++; + + bt_entry_index = offset_inside_bt/bt_entry_size_bytes; + bt_entry_controls = sizeof(void *); + this_bt_entry_for_vaddr = + base_controlled_vaddr + bt_entry_index*bt_entry_controls; + /* + * We sign extend vaddr bits 48->63 which effectively + * creates a hole in the virtual address space. + * This calculation corrects for the hole. + */ + if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL) + this_bt_entry_for_vaddr |= 0xffff800000000000; + + if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) { + printf("bt_entry_buf: %p\n", bt_entry_buf); + printf("there is a bte for %lx but no mapping\n", + this_bt_entry_for_vaddr); + printf(" bde vaddr: %016lx\n", bde_vaddr); + printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr); + printf(" table_vaddr: %016lx\n", table_vaddr); + printf(" entry vaddr: %016lx @ offset %lx\n", + table_vaddr + offset_inside_bt, offset_inside_bt); + do_abort = 1; + mpx_dig_abort(); + } + if (DEBUG_LEVEL < 4) + continue; + + printf("table entry[%lx]: ", offset_inside_bt); + for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long)) + printf("0x%016lx ", bt_entry_buf[i]); + printf("\n"); + } + if (do_abort) + mpx_dig_abort(); + dprintf4("%s() done\n", __func__); + return nr_entries; +} + +int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes, + int *nr_populated_bdes) +{ + unsigned long i; + int total_entries = 0; + + dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf, + len_bytes, bd_offset_bytes, buf + len_bytes); + + for (i = 0; i < len_bytes; i += sizeof(unsigned long)) { + unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long); + unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i]; + unsigned long bounds_dir_entry; + unsigned long bd_for_vaddr; + unsigned long bt_start; + unsigned long bt_tail; + int nr_entries; + + dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i, + bounds_dir_entry_ptr); + + bounds_dir_entry = *bounds_dir_entry_ptr; + if (!bounds_dir_entry) { + dprintf4("no bounds dir at index 0x%lx / 0x%lx " + "start at offset:%lx %lx\n", bd_index, bd_index, + bd_offset_bytes, i); + continue; + } + dprintf3("found bounds_dir_entry: 0x%lx @ " + "index 0x%lx buf ptr: %p\n", bounds_dir_entry, i, + &buf[i]); + /* mask off the enable bit: */ + bounds_dir_entry &= ~0x1; + (*nr_populated_bdes)++; + dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes); + dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes); + + bt_start = bounds_dir_entry; + bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1; + if (!vaddr_mapped_by_range(bt_start)) { + printf("bounds directory 0x%lx points to nowhere\n", + bounds_dir_entry); + mpx_dig_abort(); + } + if (!vaddr_mapped_by_range(bt_tail)) { + printf("bounds directory end 0x%lx points to nowhere\n", + bt_tail); + mpx_dig_abort(); + } + /* + * Each bounds directory entry controls 1MB of virtual address + * space. This variable is the virtual address in the process + * of the beginning of the area controlled by this bounds_dir. + */ + bd_for_vaddr = bd_index * (1UL<<20); + + nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr, + bounds_dir_global+bd_offset_bytes+i); + total_entries += nr_entries; + dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries " + "total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n", + bd_index, buf+i, + bounds_dir_entry, nr_entries, total_entries, + bd_for_vaddr, bd_for_vaddr + (1UL<<20)); + } + dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes, + bd_offset_bytes); + return total_entries; +} + +int proc_pid_mem_fd = -1; + +void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir, + long buffer_size_bytes, void *buffer) +{ + unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir; + int read_ret; + off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET); + + if (seek_ret != seekto) + mpx_dig_abort(); + + read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes); + /* there shouldn't practically be short reads of /proc/$pid/mem */ + if (read_ret != buffer_size_bytes) + mpx_dig_abort(); + + return buffer; +} +void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir, + long buffer_size_bytes, void *buffer) + +{ + unsigned char vec[buffer_size_bytes / PAGE_SIZE]; + char *dig_bounds_dir_ptr = + (void *)(bounds_dir_global + byte_offset_inside_bounds_dir); + /* + * use mincore() to quickly find the areas of the bounds directory + * that have memory and thus will be worth scanning. + */ + int incore_ret; + + int incore = 0; + int i; + + dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr); + + incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]); + if (incore_ret) { + printf("mincore ret: %d\n", incore_ret); + perror("mincore"); + mpx_dig_abort(); + } + for (i = 0; i < sizeof(vec); i++) + incore += vec[i]; + dprintf4("%s() total incore: %d\n", __func__, incore); + if (!incore) + return NULL; + dprintf3("%s() total incore: %d\n", __func__, incore); + return dig_bounds_dir_ptr; +} + +int inspect_pid(int pid) +{ + static int dig_nr; + long offset_inside_bounds_dir; + char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)]; + char *dig_bounds_dir_ptr; + int total_entries = 0; + int nr_populated_bdes = 0; + int inspect_self; + + if (getpid() == pid) { + dprintf4("inspecting self\n"); + inspect_self = 1; + } else { + dprintf4("inspecting pid %d\n", pid); + mpx_dig_abort(); + } + + for (offset_inside_bounds_dir = 0; + offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES; + offset_inside_bounds_dir += sizeof(bounds_dir_buf)) { + static int bufs_skipped; + int this_entries; + + if (inspect_self) { + dig_bounds_dir_ptr = + fill_bounds_dir_buf_self(offset_inside_bounds_dir, + sizeof(bounds_dir_buf), + &bounds_dir_buf[0]); + } else { + dig_bounds_dir_ptr = + fill_bounds_dir_buf_other(offset_inside_bounds_dir, + sizeof(bounds_dir_buf), + &bounds_dir_buf[0]); + } + if (!dig_bounds_dir_ptr) { + bufs_skipped++; + continue; + } + this_entries = search_bd_buf(dig_bounds_dir_ptr, + sizeof(bounds_dir_buf), + offset_inside_bounds_dir, + &nr_populated_bdes); + total_entries += this_entries; + } + printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr, + total_entries, nr_populated_bdes); + return total_entries + nr_populated_bdes; +} + +#ifdef MPX_DIG_REMOTE +int main(int argc, char **argv) +{ + int err; + char *c; + unsigned long bounds_dir_entry; + int pid; + + printf("mpx-dig starting...\n"); + err = sscanf(argv[1], "%d", &pid); + printf("parsing: '%s', err: %d\n", argv[1], err); + if (err != 1) + mpx_dig_abort(); + + err = sscanf(argv[2], "%lx", &bounds_dir_global); + printf("parsing: '%s': %d\n", argv[2], err); + if (err != 1) + mpx_dig_abort(); + + proc_pid_mem_fd = open_proc(pid, "mem"); + if (proc_pid_mem_fd < 0) + mpx_dig_abort(); + + inspect_pid(pid); + return 0; +} +#endif + +long inspect_me(struct mpx_bounds_dir *bounds_dir) +{ + int pid = getpid(); + + pid_load_vaddrs(pid); + bounds_dir_global = (unsigned long)bounds_dir; + dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir); + return inspect_pid(pid); +} diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h new file mode 100644 index 0000000..093c190 --- /dev/null +++ b/tools/testing/selftests/x86/mpx-hw.h @@ -0,0 +1,123 @@ +#ifndef _MPX_HW_H +#define _MPX_HW_H + +#include + +/* Describe the MPX Hardware Layout in here */ + +#define NR_MPX_BOUNDS_REGISTERS 4 + +#ifdef __i386__ + +#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 16 /* 4 * 32-bits */ +#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 14) /* 16k */ +#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 4 +#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 22) /* 4MB */ + +#define MPX_BOUNDS_TABLE_BOTTOM_BIT 2 +#define MPX_BOUNDS_TABLE_TOP_BIT 11 +#define MPX_BOUNDS_DIR_BOTTOM_BIT 12 +#define MPX_BOUNDS_DIR_TOP_BIT 31 + +#else + +/* + * Linear Address of "pointer" (LAp) + * 0 -> 2: ignored + * 3 -> 19: index in to bounds table + * 20 -> 47: index in to bounds directory + * 48 -> 63: ignored + */ + +#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 32 +#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 22) /* 4MB */ +#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 8 +#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 31) /* 2GB */ + +#define MPX_BOUNDS_TABLE_BOTTOM_BIT 3 +#define MPX_BOUNDS_TABLE_TOP_BIT 19 +#define MPX_BOUNDS_DIR_BOTTOM_BIT 20 +#define MPX_BOUNDS_DIR_TOP_BIT 47 + +#endif + +#define MPX_BOUNDS_DIR_NR_ENTRIES \ + (MPX_BOUNDS_DIR_SIZE_BYTES/MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES) +#define MPX_BOUNDS_TABLE_NR_ENTRIES \ + (MPX_BOUNDS_TABLE_SIZE_BYTES/MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES) + +#define MPX_BOUNDS_TABLE_ENTRY_VALID_BIT 0x1 + +struct mpx_bd_entry { + union { + char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES]; + void *contents[1]; + }; +} __attribute__((packed)); + +struct mpx_bt_entry { + union { + char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES]; + unsigned long contents[1]; + }; +} __attribute__((packed)); + +struct mpx_bounds_dir { + struct mpx_bd_entry entries[MPX_BOUNDS_DIR_NR_ENTRIES]; +} __attribute__((packed)); + +struct mpx_bounds_table { + struct mpx_bt_entry entries[MPX_BOUNDS_TABLE_NR_ENTRIES]; +} __attribute__((packed)); + +static inline unsigned long GET_BITS(unsigned long val, int bottombit, int topbit) +{ + int total_nr_bits = topbit - bottombit; + unsigned long mask = (1UL << total_nr_bits)-1; + return (val >> bottombit) & mask; +} + +static inline unsigned long __vaddr_bounds_table_index(void *vaddr) +{ + return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_TABLE_BOTTOM_BIT, + MPX_BOUNDS_TABLE_TOP_BIT); +} + +static inline unsigned long __vaddr_bounds_directory_index(void *vaddr) +{ + return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_DIR_BOTTOM_BIT, + MPX_BOUNDS_DIR_TOP_BIT); +} + +static inline struct mpx_bd_entry *mpx_vaddr_to_bd_entry(void *vaddr, + struct mpx_bounds_dir *bounds_dir) +{ + unsigned long index = __vaddr_bounds_directory_index(vaddr); + return &bounds_dir->entries[index]; +} + +static inline int bd_entry_valid(struct mpx_bd_entry *bounds_dir_entry) +{ + unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents; + return (__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT); +} + +static inline struct mpx_bounds_table * +__bd_entry_to_bounds_table(struct mpx_bd_entry *bounds_dir_entry) +{ + unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents; + assert(__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT); + __bd_entry &= ~MPX_BOUNDS_TABLE_ENTRY_VALID_BIT; + return (struct mpx_bounds_table *)__bd_entry; +} + +static inline struct mpx_bt_entry * +mpx_vaddr_to_bt_entry(void *vaddr, struct mpx_bounds_dir *bounds_dir) +{ + struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(vaddr, bounds_dir); + struct mpx_bounds_table *bt = __bd_entry_to_bounds_table(bde); + unsigned long index = __vaddr_bounds_table_index(vaddr); + return &bt->entries[index]; +} + +#endif /* _MPX_HW_H */ diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c new file mode 100644 index 0000000..616ee96 --- /dev/null +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -0,0 +1,1585 @@ +/* + * mpx-mini-test.c: routines to test Intel MPX (Memory Protection eXtentions) + * + * Written by: + * "Ren, Qiaowei" + * "Wei, Gang" + * "Hansen, Dave" + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2. + */ + +/* + * 2014-12-05: Dave Hansen: fixed all of the compiler warnings, and made sure + * it works on 32-bit. + */ + +int inspect_every_this_many_mallocs = 100; +int zap_all_every_this_many_mallocs = 1000; + +#define _GNU_SOURCE +#define _LARGEFILE64_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpx-hw.h" +#include "mpx-debug.h" +#include "mpx-mm.h" + +#ifndef __always_inline +#define __always_inline inline __attribute__((always_inline) +#endif + +#ifndef TEST_DURATION_SECS +#define TEST_DURATION_SECS 3 +#endif + +void write_int_to(char *prefix, char *file, int int_to_write) +{ + char buf[100]; + int fd = open(file, O_RDWR); + int len; + int ret; + + assert(fd >= 0); + len = snprintf(buf, sizeof(buf), "%s%d", prefix, int_to_write); + assert(len >= 0); + assert(len < sizeof(buf)); + ret = write(fd, buf, len); + assert(ret == len); + ret = close(fd); + assert(!ret); +} + +void write_pid_to(char *prefix, char *file) +{ + write_int_to(prefix, file, getpid()); +} + +void trace_me(void) +{ +/* tracing events dir */ +#define TED "/sys/kernel/debug/tracing/events/" +/* + write_pid_to("common_pid=", TED "signal/filter"); + write_pid_to("common_pid=", TED "exceptions/filter"); + write_int_to("", TED "signal/enable", 1); + write_int_to("", TED "exceptions/enable", 1); +*/ + write_pid_to("", "/sys/kernel/debug/tracing/set_ftrace_pid"); + write_int_to("", "/sys/kernel/debug/tracing/trace", 0); +} + +#define test_failed() __test_failed(__FILE__, __LINE__) +static void __test_failed(char *f, int l) +{ + fprintf(stderr, "abort @ %s::%d\n", f, l); + abort(); +} + +/* Error Printf */ +#define eprintf(args...) fprintf(stderr, args) + +#ifdef __i386__ + +/* i386 directory size is 4MB */ +#define REG_IP_IDX REG_EIP +#define REX_PREFIX + +#define XSAVE_OFFSET_IN_FPMEM sizeof(struct _libc_fpstate) + +/* + * __cpuid() is from the Linux Kernel: + */ +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile( + "push %%ebx;" + "cpuid;" + "mov %%ebx, %1;" + "pop %%ebx" + : "=a" (*eax), + "=g" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +#else /* __i386__ */ + +#define REG_IP_IDX REG_RIP +#define REX_PREFIX "0x48, " + +#define XSAVE_OFFSET_IN_FPMEM 0 + +/* + * __cpuid() is from the Linux Kernel: + */ +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile( + "cpuid;" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +#endif /* !__i386__ */ + +struct xsave_hdr_struct { + uint64_t xstate_bv; + uint64_t reserved1[2]; + uint64_t reserved2[5]; +} __attribute__((packed)); + +struct bndregs_struct { + uint64_t bndregs[8]; +} __attribute__((packed)); + +struct bndcsr_struct { + uint64_t cfg_reg_u; + uint64_t status_reg; +} __attribute__((packed)); + +struct xsave_struct { + uint8_t fpu_sse[512]; + struct xsave_hdr_struct xsave_hdr; + uint8_t ymm[256]; + uint8_t lwp[128]; + struct bndregs_struct bndregs; + struct bndcsr_struct bndcsr; +} __attribute__((packed)); + +uint8_t __attribute__((__aligned__(64))) buffer[4096]; +struct xsave_struct *xsave_buf = (struct xsave_struct *)buffer; + +uint8_t __attribute__((__aligned__(64))) test_buffer[4096]; +struct xsave_struct *xsave_test_buf = (struct xsave_struct *)test_buffer; + +uint64_t num_bnd_chk; + +static __always_inline void xrstor_state(struct xsave_struct *fx, uint64_t mask) +{ + uint32_t lmask = mask; + uint32_t hmask = mask >> 32; + + asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); +} + +static __always_inline void xsave_state_1(void *_fx, uint64_t mask) +{ + uint32_t lmask = mask; + uint32_t hmask = mask >> 32; + unsigned char *fx = _fx; + + asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); +} + +static inline uint64_t xgetbv(uint32_t index) +{ + uint32_t eax, edx; + + asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ + : "=a" (eax), "=d" (edx) + : "c" (index)); + return eax + ((uint64_t)edx << 32); +} + +static uint64_t read_mpx_status_sig(ucontext_t *uctxt) +{ + memset(buffer, 0, sizeof(buffer)); + memcpy(buffer, + (uint8_t *)uctxt->uc_mcontext.fpregs + XSAVE_OFFSET_IN_FPMEM, + sizeof(struct xsave_struct)); + + return xsave_buf->bndcsr.status_reg; +} + +#include + +static uint8_t *get_next_inst_ip(uint8_t *addr) +{ + uint8_t *ip = addr; + uint8_t sib; + uint8_t rm; + uint8_t mod; + uint8_t base; + uint8_t modrm; + + /* determine the prefix. */ + switch(*ip) { + case 0xf2: + case 0xf3: + case 0x66: + ip++; + break; + } + + /* look for rex prefix */ + if ((*ip & 0x40) == 0x40) + ip++; + + /* Make sure we have a MPX instruction. */ + if (*ip++ != 0x0f) + return addr; + + /* Skip the op code byte. */ + ip++; + + /* Get the modrm byte. */ + modrm = *ip++; + + /* Break it down into parts. */ + rm = modrm & 7; + mod = (modrm >> 6); + + /* Init the parts of the address mode. */ + base = 8; + + /* Is it a mem mode? */ + if (mod != 3) { + /* look for scaled indexed addressing */ + if (rm == 4) { + /* SIB addressing */ + sib = *ip++; + base = sib & 7; + switch (mod) { + case 0: + if (base == 5) + ip += 4; + break; + + case 1: + ip++; + break; + + case 2: + ip += 4; + break; + } + + } else { + /* MODRM addressing */ + switch (mod) { + case 0: + /* DISP32 addressing, no base */ + if (rm == 5) + ip += 4; + break; + + case 1: + ip++; + break; + + case 2: + ip += 4; + break; + } + } + } + return ip; +} + +#ifdef si_lower +static inline void *__si_bounds_lower(siginfo_t *si) +{ + return si->si_lower; +} + +static inline void *__si_bounds_upper(siginfo_t *si) +{ + return si->si_upper; +} +#else +static inline void **__si_bounds_hack(siginfo_t *si) +{ + void *sigfault = &si->_sifields._sigfault; + void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault); + void **__si_lower = end_sigfault; + + return __si_lower; +} + +static inline void *__si_bounds_lower(siginfo_t *si) +{ + return *__si_bounds_hack(si); +} + +static inline void *__si_bounds_upper(siginfo_t *si) +{ + return (*__si_bounds_hack(si)) + sizeof(void *); +} +#endif + +static int br_count; +static int expected_bnd_index = -1; +uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */ +unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS]; + +/* + * The kernel is supposed to provide some information about the bounds + * exception in the siginfo. It should match what we have in the bounds + * registers that we are checking against. Just check against the shadow copy + * since it is easily available, and we also check that *it* matches the real + * registers. + */ +void check_siginfo_vs_shadow(siginfo_t* si) +{ + int siginfo_ok = 1; + void *shadow_lower = (void *)(unsigned long)shadow_plb[expected_bnd_index][0]; + void *shadow_upper = (void *)(unsigned long)shadow_plb[expected_bnd_index][1]; + + if ((expected_bnd_index < 0) || + (expected_bnd_index >= NR_MPX_BOUNDS_REGISTERS)) { + fprintf(stderr, "ERROR: invalid expected_bnd_index: %d\n", + expected_bnd_index); + exit(6); + } + if (__si_bounds_lower(si) != shadow_lower) + siginfo_ok = 0; + if (__si_bounds_upper(si) != shadow_upper) + siginfo_ok = 0; + + if (!siginfo_ok) { + fprintf(stderr, "ERROR: siginfo bounds do not match " + "shadow bounds for register %d\n", expected_bnd_index); + exit(7); + } +} + +void handler(int signum, siginfo_t *si, void *vucontext) +{ + int i; + ucontext_t *uctxt = vucontext; + int trapno; + unsigned long ip; + + dprintf1("entered signal handler\n"); + + trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; + ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; + + if (trapno == 5) { + typeof(si->si_addr) *si_addr_ptr = &si->si_addr; + uint64_t status = read_mpx_status_sig(uctxt); + uint64_t br_reason = status & 0x3; + + br_count++; + dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); + +#define __SI_FAULT (3 << 16) +#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */ + + dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", + status, ip, br_reason); + dprintf2("si_signo: %d\n", si->si_signo); + dprintf2(" signum: %d\n", signum); + dprintf2("info->si_code == SEGV_BNDERR: %d\n", + (si->si_code == SEGV_BNDERR)); + dprintf2("info->si_code: %d\n", si->si_code); + dprintf2("info->si_lower: %p\n", __si_bounds_lower(si)); + dprintf2("info->si_upper: %p\n", __si_bounds_upper(si)); + + check_siginfo_vs_shadow(si); + + for (i = 0; i < 8; i++) + dprintf3("[%d]: %p\n", i, si_addr_ptr[i]); + switch (br_reason) { + case 0: /* traditional BR */ + fprintf(stderr, + "Undefined status with bound exception:%jx\n", + status); + exit(5); + case 1: /* #BR MPX bounds exception */ + /* these are normal and we expect to see them */ + dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n", + status, (void *)ip, si->si_addr); + num_bnd_chk++; + uctxt->uc_mcontext.gregs[REG_IP_IDX] = + (greg_t)get_next_inst_ip((uint8_t *)ip); + break; + case 2: + fprintf(stderr, "#BR status == 2, missing bounds table," + "kernel should have handled!!\n"); + exit(4); + break; + default: + fprintf(stderr, "bound check error: status 0x%jx at %p\n", + status, (void *)ip); + num_bnd_chk++; + uctxt->uc_mcontext.gregs[REG_IP_IDX] = + (greg_t)get_next_inst_ip((uint8_t *)ip); + fprintf(stderr, "bound check error: si_addr %p\n", si->si_addr); + exit(3); + } + } else if (trapno == 14) { + eprintf("ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", + trapno, ip); + eprintf("si_addr %p\n", si->si_addr); + eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); + test_failed(); + } else { + eprintf("unexpected trap %d! at 0x%lx\n", trapno, ip); + eprintf("si_addr %p\n", si->si_addr); + eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); + test_failed(); + } +} + +static inline void cpuid_count(unsigned int op, int count, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + *eax = op; + *ecx = count; + __cpuid(eax, ebx, ecx, edx); +} + +#define XSTATE_CPUID 0x0000000d + +/* + * List of XSAVE features Linux knows about: + */ +enum xfeature_bit { + XSTATE_BIT_FP, + XSTATE_BIT_SSE, + XSTATE_BIT_YMM, + XSTATE_BIT_BNDREGS, + XSTATE_BIT_BNDCSR, + XSTATE_BIT_OPMASK, + XSTATE_BIT_ZMM_Hi256, + XSTATE_BIT_Hi16_ZMM, + + XFEATURES_NR_MAX, +}; + +#define XSTATE_FP (1 << XSTATE_BIT_FP) +#define XSTATE_SSE (1 << XSTATE_BIT_SSE) +#define XSTATE_YMM (1 << XSTATE_BIT_YMM) +#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS) +#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR) +#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK) +#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256) +#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM) + +#define MPX_XSTATES (XSTATE_BNDREGS | XSTATE_BNDCSR) /* 0x18 */ + +bool one_bit(unsigned int x, int bit) +{ + return !!(x & (1<xsave_hdr.xstate_bv = 0x10; + xsave_buf->bndcsr.cfg_reg_u = (unsigned long)l1base | 1; + xsave_buf->bndcsr.status_reg = 0; + + dprintf2("bf xrstor\n"); + dprintf2("xsave cndcsr: status %jx, configu %jx\n", + xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u); + xrstor_state(xsave_buf, 0x18); + dprintf2("after xrstor\n"); + + xsave_state_1(xsave_buf, 0x18); + + dprintf1("xsave bndcsr: status %jx, configu %jx\n", + xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u); +} + +#include + +struct mpx_bounds_dir *bounds_dir_ptr; + +unsigned long __bd_incore(const char *func, int line) +{ + unsigned long ret = nr_incore(bounds_dir_ptr, MPX_BOUNDS_DIR_SIZE_BYTES); + return ret; +} +#define bd_incore() __bd_incore(__func__, __LINE__) + +void check_clear(void *ptr, unsigned long sz) +{ + unsigned long *i; + + for (i = ptr; (void *)i < ptr + sz; i++) { + if (*i) { + dprintf1("%p is NOT clear at %p\n", ptr, i); + assert(0); + } + } + dprintf1("%p is clear for %lx\n", ptr, sz); +} + +void check_clear_bd(void) +{ + check_clear(bounds_dir_ptr, 2UL << 30); +} + +#define USE_MALLOC_FOR_BOUNDS_DIR 1 +bool process_specific_init(void) +{ + unsigned long size; + unsigned long *dir; + /* Guarantee we have the space to align it, add padding: */ + unsigned long pad = getpagesize(); + + size = 2UL << 30; /* 2GB */ + if (sizeof(unsigned long) == 4) + size = 4UL << 20; /* 4MB */ + dprintf1("trying to allocate %ld MB bounds directory\n", (size >> 20)); + + if (USE_MALLOC_FOR_BOUNDS_DIR) { + unsigned long _dir; + + dir = malloc(size + pad); + assert(dir); + _dir = (unsigned long)dir; + _dir += 0xfffUL; + _dir &= ~0xfffUL; + dir = (void *)_dir; + } else { + /* + * This makes debugging easier because the address + * calculations are simpler: + */ + dir = mmap((void *)0x200000000000, size + pad, + PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (dir == (void *)-1) { + perror("unable to allocate bounds directory"); + abort(); + } + check_clear(dir, size); + } + bounds_dir_ptr = (void *)dir; + madvise(bounds_dir_ptr, size, MADV_NOHUGEPAGE); + bd_incore(); + dprintf1("bounds directory: 0x%p -> 0x%p\n", bounds_dir_ptr, + (char *)bounds_dir_ptr + size); + check_clear(dir, size); + enable_mpx(dir); + check_clear(dir, size); + if (prctl(43, 0, 0, 0, 0)) { + printf("no MPX support\n"); + abort(); + return false; + } + return true; +} + +bool process_specific_finish(void) +{ + if (prctl(44)) { + printf("no MPX support\n"); + return false; + } + return true; +} + +void setup_handler() +{ + int r, rs; + struct sigaction newact; + struct sigaction oldact; + + /* #BR is mapped to sigsegv */ + int signum = SIGSEGV; + + newact.sa_handler = 0; /* void(*)(int)*/ + newact.sa_sigaction = handler; /* void (*)(int, siginfo_t*, void *) */ + + /*sigset_t - signals to block while in the handler */ + /* get the old signal mask. */ + rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); + assert(rs == 0); + + /* call sa_sigaction, not sa_handler*/ + newact.sa_flags = SA_SIGINFO; + + newact.sa_restorer = 0; /* void(*)(), obsolete */ + r = sigaction(signum, &newact, &oldact); + assert(r == 0); +} + +void mpx_prepare(void) +{ + dprintf2("%s()\n", __func__); + setup_handler(); + process_specific_init(); +} + +void mpx_cleanup(void) +{ + printf("%s(): %jd BRs. bye...\n", __func__, num_bnd_chk); + process_specific_finish(); +} + +/*-------------- the following is test case ---------------*/ +#include +#include +#include +#include +#include + +uint64_t num_lower_brs; +uint64_t num_upper_brs; + +#define MPX_CONFIG_OFFSET 1024 +#define MPX_BOUNDS_OFFSET 960 +#define MPX_HEADER_OFFSET 512 +#define MAX_ADDR_TESTED (1<<28) +#define TEST_ROUNDS 100 + +/* + 0F 1A /r BNDLDX-Load + 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation + 66 0F 1A /r BNDMOV bnd1, bnd2/m128 + 66 0F 1B /r BNDMOV bnd1/m128, bnd2 + F2 0F 1A /r BNDCU bnd, r/m64 + F2 0F 1B /r BNDCN bnd, r/m64 + F3 0F 1A /r BNDCL bnd, r/m64 + F3 0F 1B /r BNDMK bnd, m64 +*/ + +static __always_inline void xsave_state(void *_fx, uint64_t mask) +{ + uint32_t lmask = mask; + uint32_t hmask = mask >> 32; + unsigned char *fx = _fx; + + asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); +} + +static __always_inline void mpx_clear_bnd0(void) +{ + long size = 0; + void *ptr = NULL; + /* F3 0F 1B /r BNDMK bnd, m64 */ + /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */ + asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t" + : : "c" (ptr), "d" (size-1) + : "memory"); +} + +static __always_inline void mpx_make_bound_helper(unsigned long ptr, + unsigned long size) +{ + /* F3 0F 1B /r BNDMK bnd, m64 */ + /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */ + asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t" + : : "c" (ptr), "d" (size-1) + : "memory"); +} + +static __always_inline void mpx_check_lowerbound_helper(unsigned long ptr) +{ + /* F3 0F 1A /r NDCL bnd, r/m64 */ + /* f3 0f 1a 01 bndcl (%rcx),%bnd0 */ + asm volatile(".byte 0xf3,0x0f,0x1a,0x01\n\t" + : : "c" (ptr) + : "memory"); +} + +static __always_inline void mpx_check_upperbound_helper(unsigned long ptr) +{ + /* F2 0F 1A /r BNDCU bnd, r/m64 */ + /* f2 0f 1a 01 bndcu (%rcx),%bnd0 */ + asm volatile(".byte 0xf2,0x0f,0x1a,0x01\n\t" + : : "c" (ptr) + : "memory"); +} + +static __always_inline void mpx_movbndreg_helper() +{ + /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */ + /* 66 0f 1b c2 bndmov %bnd0,%bnd2 */ + + asm volatile(".byte 0x66,0x0f,0x1b,0xc2\n\t"); +} + +static __always_inline void mpx_movbnd2mem_helper(uint8_t *mem) +{ + /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */ + /* 66 0f 1b 01 bndmov %bnd0,(%rcx) */ + asm volatile(".byte 0x66,0x0f,0x1b,0x01\n\t" + : : "c" (mem) + : "memory"); +} + +static __always_inline void mpx_movbnd_from_mem_helper(uint8_t *mem) +{ + /* 66 0F 1A /r BNDMOV bnd1, bnd2/m128 */ + /* 66 0f 1a 01 bndmov (%rcx),%bnd0 */ + asm volatile(".byte 0x66,0x0f,0x1a,0x01\n\t" + : : "c" (mem) + : "memory"); +} + +static __always_inline void mpx_store_dsc_helper(unsigned long ptr_addr, + unsigned long ptr_val) +{ + /* 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation */ + /* 0f 1b 04 11 bndstx %bnd0,(%rcx,%rdx,1) */ + asm volatile(".byte 0x0f,0x1b,0x04,0x11\n\t" + : : "c" (ptr_addr), "d" (ptr_val) + : "memory"); +} + +static __always_inline void mpx_load_dsc_helper(unsigned long ptr_addr, + unsigned long ptr_val) +{ + /* 0F 1A /r BNDLDX-Load */ + /*/ 0f 1a 04 11 bndldx (%rcx,%rdx,1),%bnd0 */ + asm volatile(".byte 0x0f,0x1a,0x04,0x11\n\t" + : : "c" (ptr_addr), "d" (ptr_val) + : "memory"); +} + +void __print_context(void *__print_xsave_buffer, int line) +{ + uint64_t *bounds = (uint64_t *)(__print_xsave_buffer + MPX_BOUNDS_OFFSET); + uint64_t *cfg = (uint64_t *)(__print_xsave_buffer + MPX_CONFIG_OFFSET); + + int i; + eprintf("%s()::%d\n", "print_context", line); + for (i = 0; i < 4; i++) { + eprintf("bound[%d]: 0x%016lx 0x%016lx(0x%016lx)\n", i, + (unsigned long)bounds[i*2], + ~(unsigned long)bounds[i*2+1], + (unsigned long)bounds[i*2+1]); + } + + eprintf("cpcfg: %jx cpstatus: %jx\n", cfg[0], cfg[1]); +} +#define print_context(x) __print_context(x, __LINE__) +#ifdef DEBUG +#define dprint_context(x) print_context(x) +#else +#define dprint_context(x) do{}while(0) +#endif + +void init() +{ + int i; + + srand((unsigned int)time(NULL)); + + for (i = 0; i < 4; i++) { + shadow_plb[i][0] = 0; + shadow_plb[i][1] = ~(unsigned long)0; + } +} + +long int __mpx_random(int line) +{ +#ifdef NOT_SO_RANDOM + static long fake = 722122311; + fake += 563792075; + return fakse; +#else + return random(); +#endif +} +#define mpx_random() __mpx_random(__LINE__) + +uint8_t *get_random_addr() +{ + uint8_t*addr = (uint8_t *)(unsigned long)(rand() % MAX_ADDR_TESTED); + return (addr - (unsigned long)addr % sizeof(uint8_t *)); +} + +static inline bool compare_context(void *__xsave_buffer) +{ + uint64_t *bounds = (uint64_t *)(__xsave_buffer + MPX_BOUNDS_OFFSET); + + int i; + for (i = 0; i < 4; i++) { + dprintf3("shadow[%d]{%016lx/%016lx}\nbounds[%d]{%016lx/%016lx}\n", + i, (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1], + i, (unsigned long)bounds[i*2], ~(unsigned long)bounds[i*2+1]); + if ((shadow_plb[i][0] != bounds[i*2]) || + (shadow_plb[i][1] != ~(unsigned long)bounds[i*2+1])) { + eprintf("ERROR comparing shadow to real bound register %d\n", i); + eprintf("shadow{0x%016lx/0x%016lx}\nbounds{0x%016lx/0x%016lx}\n", + (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1], + (unsigned long)bounds[i*2], (unsigned long)bounds[i*2+1]); + return false; + } + } + + return true; +} + +void mkbnd_shadow(uint8_t *ptr, int index, long offset) +{ + uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]); + uint64_t *upper = (uint64_t *)&(shadow_plb[index][1]); + *lower = (unsigned long)ptr; + *upper = (unsigned long)ptr + offset - 1; +} + +void check_lowerbound_shadow(uint8_t *ptr, int index) +{ + uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]); + if (*lower > (uint64_t)(unsigned long)ptr) + num_lower_brs++; + else + dprintf1("LowerBoundChk passed:%p\n", ptr); +} + +void check_upperbound_shadow(uint8_t *ptr, int index) +{ + uint64_t upper = *(uint64_t *)&(shadow_plb[index][1]); + if (upper < (uint64_t)(unsigned long)ptr) + num_upper_brs++; + else + dprintf1("UpperBoundChk passed:%p\n", ptr); +} + +__always_inline void movbndreg_shadow(int src, int dest) +{ + shadow_plb[dest][0] = shadow_plb[src][0]; + shadow_plb[dest][1] = shadow_plb[src][1]; +} + +__always_inline void movbnd2mem_shadow(int src, unsigned long *dest) +{ + unsigned long *lower = (unsigned long *)&(shadow_plb[src][0]); + unsigned long *upper = (unsigned long *)&(shadow_plb[src][1]); + *dest = *lower; + *(dest+1) = *upper; +} + +__always_inline void movbnd_from_mem_shadow(unsigned long *src, int dest) +{ + unsigned long *lower = (unsigned long *)&(shadow_plb[dest][0]); + unsigned long *upper = (unsigned long *)&(shadow_plb[dest][1]); + *lower = *src; + *upper = *(src+1); +} + +__always_inline void stdsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val) +{ + shadow_map[0] = (unsigned long)shadow_plb[index][0]; + shadow_map[1] = (unsigned long)shadow_plb[index][1]; + shadow_map[2] = (unsigned long)ptr_val; + dprintf3("%s(%d, %p, %p) set shadow map[2]: %p\n", __func__, + index, ptr, ptr_val, ptr_val); + /*ptr ignored */ +} + +void lddsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val) +{ + uint64_t lower = shadow_map[0]; + uint64_t upper = shadow_map[1]; + uint8_t *value = (uint8_t *)shadow_map[2]; + + if (value != ptr_val) { + dprintf2("%s(%d, %p, %p) init shadow bounds[%d] " + "because %p != %p\n", __func__, index, ptr, + ptr_val, index, value, ptr_val); + shadow_plb[index][0] = 0; + shadow_plb[index][1] = ~(unsigned long)0; + } else { + shadow_plb[index][0] = lower; + shadow_plb[index][1] = upper; + } + /* ptr ignored */ +} + +static __always_inline void mpx_test_helper0(uint8_t *buf, uint8_t *ptr) +{ + mpx_make_bound_helper((unsigned long)ptr, 0x1800); +} + +static __always_inline void mpx_test_helper0_shadow(uint8_t *buf, uint8_t *ptr) +{ + mkbnd_shadow(ptr, 0, 0x1800); +} + +static __always_inline void mpx_test_helper1(uint8_t *buf, uint8_t *ptr) +{ + /* these are hard-coded to check bnd0 */ + expected_bnd_index = 0; + mpx_check_lowerbound_helper((unsigned long)(ptr-1)); + mpx_check_upperbound_helper((unsigned long)(ptr+0x1800)); + /* reset this since we do not expect any more bounds exceptions */ + expected_bnd_index = -1; +} + +static __always_inline void mpx_test_helper1_shadow(uint8_t *buf, uint8_t *ptr) +{ + check_lowerbound_shadow(ptr-1, 0); + check_upperbound_shadow(ptr+0x1800, 0); +} + +static __always_inline void mpx_test_helper2(uint8_t *buf, uint8_t *ptr) +{ + mpx_make_bound_helper((unsigned long)ptr, 0x1800); + mpx_movbndreg_helper(); + mpx_movbnd2mem_helper(buf); + mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800); +} + +static __always_inline void mpx_test_helper2_shadow(uint8_t *buf, uint8_t *ptr) +{ + mkbnd_shadow(ptr, 0, 0x1800); + movbndreg_shadow(0, 2); + movbnd2mem_shadow(0, (unsigned long *)buf); + mkbnd_shadow(ptr+0x12, 0, 0x1800); +} + +static __always_inline void mpx_test_helper3(uint8_t *buf, uint8_t *ptr) +{ + mpx_movbnd_from_mem_helper(buf); +} + +static __always_inline void mpx_test_helper3_shadow(uint8_t *buf, uint8_t *ptr) +{ + movbnd_from_mem_shadow((unsigned long *)buf, 0); +} + +static __always_inline void mpx_test_helper4(uint8_t *buf, uint8_t *ptr) +{ + mpx_store_dsc_helper((unsigned long)buf, (unsigned long)ptr); + mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800); +} + +static __always_inline void mpx_test_helper4_shadow(uint8_t *buf, uint8_t *ptr) +{ + stdsc_shadow(0, buf, ptr); + mkbnd_shadow(ptr+0x12, 0, 0x1800); +} + +static __always_inline void mpx_test_helper5(uint8_t *buf, uint8_t *ptr) +{ + mpx_load_dsc_helper((unsigned long)buf, (unsigned long)ptr); +} + +static __always_inline void mpx_test_helper5_shadow(uint8_t *buf, uint8_t *ptr) +{ + lddsc_shadow(0, buf, ptr); +} + +#define NR_MPX_TEST_FUNCTIONS 6 + +/* + * For compatibility reasons, MPX will clear the bounds registers + * when you make function calls (among other things). We have to + * preserve the registers in between calls to the "helpers" since + * they build on each other. + * + * Be very careful not to make any function calls inside the + * helpers, or anywhere else beween the xrstor and xsave. + */ +#define run_helper(helper_nr, buf, buf_shadow, ptr) do { \ + xrstor_state(xsave_test_buf, flags); \ + mpx_test_helper##helper_nr(buf, ptr); \ + xsave_state(xsave_test_buf, flags); \ + mpx_test_helper##helper_nr##_shadow(buf_shadow, ptr); \ +} while (0) + +static void run_helpers(int nr, uint8_t *buf, uint8_t *buf_shadow, uint8_t *ptr) +{ + uint64_t flags = 0x18; + + dprint_context(xsave_test_buf); + switch (nr) { + case 0: + run_helper(0, buf, buf_shadow, ptr); + break; + case 1: + run_helper(1, buf, buf_shadow, ptr); + break; + case 2: + run_helper(2, buf, buf_shadow, ptr); + break; + case 3: + run_helper(3, buf, buf_shadow, ptr); + break; + case 4: + run_helper(4, buf, buf_shadow, ptr); + break; + case 5: + run_helper(5, buf, buf_shadow, ptr); + break; + default: + test_failed(); + break; + } + dprint_context(xsave_test_buf); +} + +unsigned long buf_shadow[1024]; /* used to check load / store descriptors */ +extern long inspect_me(struct mpx_bounds_dir *bounds_dir); + +long cover_buf_with_bt_entries(void *buf, long buf_len) +{ + int i; + long nr_to_fill; + int ratio = 1000; + unsigned long buf_len_in_ptrs; + + /* Fill about 1/100 of the space with bt entries */ + nr_to_fill = buf_len / (sizeof(unsigned long) * ratio); + + if (!nr_to_fill) + dprintf3("%s() nr_to_fill: %ld\n", __func__, nr_to_fill); + + /* Align the buffer to pointer size */ + while (((unsigned long)buf) % sizeof(void *)) { + buf++; + buf_len--; + } + /* We are storing pointers, so make */ + buf_len_in_ptrs = buf_len / sizeof(void *); + + for (i = 0; i < nr_to_fill; i++) { + long index = (mpx_random() % buf_len_in_ptrs); + void *ptr = buf + index * sizeof(unsigned long); + unsigned long ptr_addr = (unsigned long)ptr; + + /* ptr and size can be anything */ + mpx_make_bound_helper((unsigned long)ptr, 8); + + /* + * take bnd0 and put it in to bounds tables "buf + index" is an + * address inside the buffer where we are pretending that we + * are going to put a pointer We do not, though because we will + * never load entries from the table, so it doesn't matter. + */ + mpx_store_dsc_helper(ptr_addr, (unsigned long)ptr); + dprintf4("storing bound table entry for %lx (buf start @ %p)\n", + ptr_addr, buf); + } + return nr_to_fill; +} + +unsigned long align_down(unsigned long alignme, unsigned long align_to) +{ + return alignme & ~(align_to-1); +} + +unsigned long align_up(unsigned long alignme, unsigned long align_to) +{ + return (alignme + align_to - 1) & ~(align_to-1); +} + +/* + * Using 1MB alignment guarantees that each no allocation + * will overlap with another's bounds tables. + * + * We have to cook our own allocator here. malloc() can + * mix other allocation with ours which means that even + * if we free all of our allocations, there might still + * be bounds tables for the *areas* since there is other + * valid memory there. + * + * We also can't use malloc() because a free() of an area + * might not free it back to the kernel. We want it + * completely unmapped an malloc() does not guarantee + * that. + */ +#ifdef __i386__ +long alignment = 4096; +long sz_alignment = 4096; +#else +long alignment = 1 * MB; +long sz_alignment = 1 * MB; +#endif +void *mpx_mini_alloc(unsigned long sz) +{ + unsigned long long tries = 0; + static void *last; + void *ptr; + void *try_at; + + sz = align_up(sz, sz_alignment); + + try_at = last + alignment; + while (1) { + ptr = mmap(try_at, sz, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (ptr == (void *)-1) + return NULL; + if (ptr == try_at) + break; + + munmap(ptr, sz); + try_at += alignment; +#ifdef __i386__ + /* + * This isn't quite correct for 32-bit binaries + * on 64-bit kernels since they can use the + * entire 32-bit address space, but it's close + * enough. + */ + if (try_at > (void *)0xC0000000) +#else + if (try_at > (void *)0x0000800000000000) +#endif + try_at = (void *)0x0; + if (!(++tries % 10000)) + dprintf1("stuck in %s(), tries: %lld\n", __func__, tries); + continue; + } + last = ptr; + dprintf3("mpx_mini_alloc(0x%lx) returning: %p\n", sz, ptr); + return ptr; +} +void mpx_mini_free(void *ptr, long sz) +{ + dprintf2("%s() ptr: %p\n", __func__, ptr); + if ((unsigned long)ptr > 0x100000000000) { + dprintf1("uh oh !!!!!!!!!!!!!!! pointer too high: %p\n", ptr); + test_failed(); + } + sz = align_up(sz, sz_alignment); + dprintf3("%s() ptr: %p before munmap\n", __func__, ptr); + munmap(ptr, sz); + dprintf3("%s() ptr: %p DONE\n", __func__, ptr); +} + +#define NR_MALLOCS 100 +struct one_malloc { + char *ptr; + int nr_filled_btes; + unsigned long size; +}; +struct one_malloc mallocs[NR_MALLOCS]; + +void free_one_malloc(int index) +{ + unsigned long free_ptr; + unsigned long mask; + + if (!mallocs[index].ptr) + return; + + mpx_mini_free(mallocs[index].ptr, mallocs[index].size); + dprintf4("freed[%d]: %p\n", index, mallocs[index].ptr); + + free_ptr = (unsigned long)mallocs[index].ptr; + mask = alignment-1; + dprintf4("lowerbits: %lx / %lx mask: %lx\n", free_ptr, + (free_ptr & mask), mask); + assert((free_ptr & mask) == 0); + + mallocs[index].ptr = NULL; +} + +#ifdef __i386__ +#define MPX_BOUNDS_TABLE_COVERS 4096 +#else +#define MPX_BOUNDS_TABLE_COVERS (1 * MB) +#endif +void zap_everything(void) +{ + long after_zap; + long before_zap; + int i; + + before_zap = inspect_me(bounds_dir_ptr); + dprintf1("zapping everything start: %ld\n", before_zap); + for (i = 0; i < NR_MALLOCS; i++) + free_one_malloc(i); + + after_zap = inspect_me(bounds_dir_ptr); + dprintf1("zapping everything done: %ld\n", after_zap); + /* + * We only guarantee to empty the thing out if our allocations are + * exactly aligned on the boundaries of a boudns table. + */ + if ((alignment >= MPX_BOUNDS_TABLE_COVERS) && + (sz_alignment >= MPX_BOUNDS_TABLE_COVERS)) { + if (after_zap != 0) + test_failed(); + + assert(after_zap == 0); + } +} + +void do_one_malloc(void) +{ + static int malloc_counter; + long sz; + int rand_index = (mpx_random() % NR_MALLOCS); + void *ptr = mallocs[rand_index].ptr; + + dprintf3("%s() enter\n", __func__); + + if (ptr) { + dprintf3("freeing one malloc at index: %d\n", rand_index); + free_one_malloc(rand_index); + if (mpx_random() % (NR_MALLOCS*3) == 3) { + int i; + dprintf3("zapping some more\n"); + for (i = rand_index; i < NR_MALLOCS; i++) + free_one_malloc(i); + } + if ((mpx_random() % zap_all_every_this_many_mallocs) == 4) + zap_everything(); + } + + /* 1->~1M */ + sz = (1 + mpx_random() % 1000) * 1000; + ptr = mpx_mini_alloc(sz); + if (!ptr) { + /* + * If we are failing allocations, just assume we + * are out of memory and zap everything. + */ + dprintf3("zapping everything because out of memory\n"); + zap_everything(); + goto out; + } + + dprintf3("malloc: %p size: 0x%lx\n", ptr, sz); + mallocs[rand_index].nr_filled_btes = cover_buf_with_bt_entries(ptr, sz); + mallocs[rand_index].ptr = ptr; + mallocs[rand_index].size = sz; +out: + if ((++malloc_counter) % inspect_every_this_many_mallocs == 0) + inspect_me(bounds_dir_ptr); +} + +void run_timed_test(void (*test_func)(void)) +{ + int done = 0; + long iteration = 0; + static time_t last_print; + time_t now; + time_t start; + + time(&start); + while (!done) { + time(&now); + if ((now - start) > TEST_DURATION_SECS) + done = 1; + + test_func(); + iteration++; + + if ((now - last_print > 1) || done) { + printf("iteration %ld complete, OK so far\n", iteration); + last_print = now; + } + } +} + +void check_bounds_table_frees(void) +{ + printf("executing unmaptest\n"); + inspect_me(bounds_dir_ptr); + run_timed_test(&do_one_malloc); + printf("done with malloc() fun\n"); +} + +void insn_test_failed(int test_nr, int test_round, void *buf, + void *buf_shadow, void *ptr) +{ + print_context(xsave_test_buf); + eprintf("ERROR: test %d round %d failed\n", test_nr, test_round); + while (test_nr == 5) { + struct mpx_bt_entry *bte; + struct mpx_bounds_dir *bd = (void *)bounds_dir_ptr; + struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(buf, bd); + + printf(" bd: %p\n", bd); + printf("&bde: %p\n", bde); + printf("*bde: %lx\n", *(unsigned long *)bde); + if (!bd_entry_valid(bde)) + break; + + bte = mpx_vaddr_to_bt_entry(buf, bd); + printf(" te: %p\n", bte); + printf("bte[0]: %lx\n", bte->contents[0]); + printf("bte[1]: %lx\n", bte->contents[1]); + printf("bte[2]: %lx\n", bte->contents[2]); + printf("bte[3]: %lx\n", bte->contents[3]); + break; + } + test_failed(); +} + +void check_mpx_insns_and_tables(void) +{ + int successes = 0; + int failures = 0; + int buf_size = (1024*1024); + unsigned long *buf = malloc(buf_size); + const int total_nr_tests = NR_MPX_TEST_FUNCTIONS * TEST_ROUNDS; + int i, j; + + memset(buf, 0, buf_size); + memset(buf_shadow, 0, sizeof(buf_shadow)); + + for (i = 0; i < TEST_ROUNDS; i++) { + uint8_t *ptr = get_random_addr() + 8; + + for (j = 0; j < NR_MPX_TEST_FUNCTIONS; j++) { + if (0 && j != 5) { + successes++; + continue; + } + dprintf2("starting test %d round %d\n", j, i); + dprint_context(xsave_test_buf); + /* + * test5 loads an address from the bounds tables. + * The load will only complete if 'ptr' matches + * the load and the store, so with random addrs, + * the odds of this are very small. Make it + * higher by only moving 'ptr' 1/10 times. + */ + if (random() % 10 <= 0) + ptr = get_random_addr() + 8; + dprintf3("random ptr{%p}\n", ptr); + dprint_context(xsave_test_buf); + run_helpers(j, (void *)buf, (void *)buf_shadow, ptr); + dprint_context(xsave_test_buf); + if (!compare_context(xsave_test_buf)) { + insn_test_failed(j, i, buf, buf_shadow, ptr); + failures++; + goto exit; + } + successes++; + dprint_context(xsave_test_buf); + dprintf2("finished test %d round %d\n", j, i); + dprintf3("\n"); + dprint_context(xsave_test_buf); + } + } + +exit: + dprintf2("\nabout to free:\n"); + free(buf); + dprintf1("successes: %d\n", successes); + dprintf1(" failures: %d\n", failures); + dprintf1(" tests: %d\n", total_nr_tests); + dprintf1(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs); + dprintf1(" saw: %d #BRs\n", br_count); + if (failures) { + eprintf("ERROR: non-zero number of failures\n"); + exit(20); + } + if (successes != total_nr_tests) { + eprintf("ERROR: succeded fewer than number of tries (%d != %d)\n", + successes, total_nr_tests); + exit(21); + } + if (num_upper_brs + num_lower_brs != br_count) { + eprintf("ERROR: unexpected number of #BRs: %jd %jd %d\n", + num_upper_brs, num_lower_brs, br_count); + eprintf("successes: %d\n", successes); + eprintf(" failures: %d\n", failures); + eprintf(" tests: %d\n", total_nr_tests); + eprintf(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs); + eprintf(" saw: %d #BRs\n", br_count); + exit(22); + } +} + +/* + * This is supposed to SIGSEGV nicely once the kernel + * can no longer allocate vaddr space. + */ +void exhaust_vaddr_space(void) +{ + unsigned long ptr; + /* Try to make sure there is no room for a bounds table anywhere */ + unsigned long skip = MPX_BOUNDS_TABLE_SIZE_BYTES - PAGE_SIZE; +#ifdef __i386__ + unsigned long max_vaddr = 0xf7788000UL; +#else + unsigned long max_vaddr = 0x800000000000UL; +#endif + + dprintf1("%s() start\n", __func__); + /* do not start at 0, we aren't allowed to map there */ + for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) { + void *ptr_ret; + int ret = madvise((void *)ptr, PAGE_SIZE, MADV_NORMAL); + + if (!ret) { + dprintf1("madvise() %lx ret: %d\n", ptr, ret); + continue; + } + ptr_ret = mmap((void *)ptr, PAGE_SIZE, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (ptr_ret != (void *)ptr) { + perror("mmap"); + dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret); + break; + } + if (!(ptr & 0xffffff)) + dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret); + } + for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) { + dprintf2("covering 0x%lx with bounds table entries\n", ptr); + cover_buf_with_bt_entries((void *)ptr, PAGE_SIZE); + } + dprintf1("%s() end\n", __func__); + printf("done with vaddr space fun\n"); +} + +void mpx_table_test(void) +{ + printf("starting mpx bounds table test\n"); + run_timed_test(check_mpx_insns_and_tables); + printf("done with mpx bounds table test\n"); +} + +int main(int argc, char **argv) +{ + int unmaptest = 0; + int vaddrexhaust = 0; + int tabletest = 0; + int i; + + check_mpx_support(); + mpx_prepare(); + srandom(11179); + + bd_incore(); + init(); + bd_incore(); + + trace_me(); + + xsave_state((void *)xsave_test_buf, 0x1f); + if (!compare_context(xsave_test_buf)) + printf("Init failed\n"); + + for (i = 1; i < argc; i++) { + if (!strcmp(argv[i], "unmaptest")) + unmaptest = 1; + if (!strcmp(argv[i], "vaddrexhaust")) + vaddrexhaust = 1; + if (!strcmp(argv[i], "tabletest")) + tabletest = 1; + } + if (!(unmaptest || vaddrexhaust || tabletest)) { + unmaptest = 1; + /* vaddrexhaust = 1; */ + tabletest = 1; + } + if (unmaptest) + check_bounds_table_frees(); + if (tabletest) + mpx_table_test(); + if (vaddrexhaust) + exhaust_vaddr_space(); + printf("%s completed successfully\n", argv[0]); + exit(0); +} + +#include "mpx-dig.c" diff --git a/tools/testing/selftests/x86/mpx-mm.h b/tools/testing/selftests/x86/mpx-mm.h new file mode 100644 index 0000000..af706a5 --- /dev/null +++ b/tools/testing/selftests/x86/mpx-mm.h @@ -0,0 +1,9 @@ +#ifndef _MPX_MM_H +#define _MPX_MM_H + +#define PAGE_SIZE 4096 +#define MB (1UL<<20) + +extern long nr_incore(void *ptr, unsigned long size_bytes); + +#endif /* _MPX_MM_H */ -- cgit v1.1 From b914bb55f2abd66937c23fa0b89becaf9bcceb37 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 14 Jun 2016 21:33:44 +0530 Subject: ASoC: Intel: Skylake: Initialize module list for Broxton The module list was not initialized for Broxton DSP code, so initialize it. Signed-off-by: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/skylake/bxt-sst.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/skylake/bxt-sst.c b/sound/soc/intel/skylake/bxt-sst.c index 965ce40..8b95e09 100644 --- a/sound/soc/intel/skylake/bxt-sst.c +++ b/sound/soc/intel/skylake/bxt-sst.c @@ -291,6 +291,7 @@ int bxt_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq, sst_dsp_mailbox_init(sst, (BXT_ADSP_SRAM0_BASE + SKL_ADSP_W0_STAT_SZ), SKL_ADSP_W0_UP_SZ, BXT_ADSP_SRAM1_BASE, SKL_ADSP_W1_SZ); + INIT_LIST_HEAD(&sst->module_list); ret = skl_ipc_init(dev, skl); if (ret) return ret; -- cgit v1.1 From 50c7a0ef2d97e56c7ce2f1ea5fe1d8e25aadc1bb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 16 Jun 2016 14:34:32 +0200 Subject: ASoC: wm8940: Enable cache usage to fix crashes on resume The wm8940 driver is using a regmap cache sync to restore the configuration of the chip when switching from OFF to STANDBY, but does not actually define a register cache which means that the restore is never going to work and we trigger asserts in regmap. Fix this by enabling caching. Based on commit d3030d11961a8c10 ("ASoC: ak4642: Enable cache usage to fix crashes on resume") by Mark Brown . Signed-off-by: Geert Uytterhoeven Acked-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm8940.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/wm8940.c b/sound/soc/codecs/wm8940.c index f6f9395..1c60081 100644 --- a/sound/soc/codecs/wm8940.c +++ b/sound/soc/codecs/wm8940.c @@ -743,6 +743,7 @@ static const struct regmap_config wm8940_regmap = { .max_register = WM8940_MONOMIX, .reg_defaults = wm8940_reg_defaults, .num_reg_defaults = ARRAY_SIZE(wm8940_reg_defaults), + .cache_type = REGCACHE_RBTREE, .readable_reg = wm8940_readable_register, .volatile_reg = wm8940_volatile_register, -- cgit v1.1 From dcd2d1f78664fdc75eadaaf65257834e24383d01 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 16 Jun 2016 14:34:30 +0200 Subject: ASoC: ak4613: Enable cache usage to fix crashes on resume During system resume: kernel BUG at drivers/base/regmap/regcache.c:347! ... PC is at regcache_sync+0x1c/0x128 LR is at ak4613_resume+0x28/0x34 The ak4613 driver is using a regmap cache sync to restore the configuration of the chip on resume but does not actually define a register cache which means that the resume is never going to work and we trigger asserts in regmap. Fix this by enabling caching. Based on commit d3030d11961a8c10 ("ASoC: ak4642: Enable cache usage to fix crashes on resume") by Mark Brown . Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- sound/soc/codecs/ak4613.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/ak4613.c b/sound/soc/codecs/ak4613.c index 33d2f2e1..5013d2b 100644 --- a/sound/soc/codecs/ak4613.c +++ b/sound/soc/codecs/ak4613.c @@ -146,6 +146,7 @@ static const struct regmap_config ak4613_regmap_cfg = { .max_register = 0x16, .reg_defaults = ak4613_reg, .num_reg_defaults = ARRAY_SIZE(ak4613_reg), + .cache_type = REGCACHE_RBTREE, }; static const struct of_device_id ak4613_of_match[] = { -- cgit v1.1 From 86c0ae7cde7a8699116b365a5e3016753f0cd92a Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Thu, 16 Jun 2016 22:04:35 +0200 Subject: ASoC: cx20442: set tty->receiver_room in v253_open Commit 79901317ce80 ("n_tty: Don't flush buffer when closing ldisc"), introduced in v3.10, revealed a bug in the cx20442 codec driver which has never been setting tty->receive_room on line discipline open as it should from the beginning. Fix it. Created and tested on Amstrad Delta against Linux-4.7-rc3 Signed-off-by: Janusz Krzysztofik Signed-off-by: Mark Brown --- sound/soc/codecs/cx20442.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/cx20442.c b/sound/soc/codecs/cx20442.c index d6f4abb..fb3885f 100644 --- a/sound/soc/codecs/cx20442.c +++ b/sound/soc/codecs/cx20442.c @@ -226,6 +226,7 @@ static int v253_open(struct tty_struct *tty) if (!tty->disc_data) return -ENODEV; + tty->receive_room = 16; if (tty->ops->write(tty, v253_init, len) != len) { ret = -EIO; goto err; -- cgit v1.1 From c39341cf0d08357f448f4c2fffe2ebcc9495fd01 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 13 Sep 2015 14:15:21 +0200 Subject: ecryptfs: drop null test before destroy functions Remove unneeded NULL test. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression x; @@ -if (x != NULL) \(kmem_cache_destroy\|mempool_destroy\|dma_pool_destroy\)(x); // Signed-off-by: Julia Lawall Signed-off-by: Tyler Hicks --- fs/ecryptfs/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 1698132..6120044 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -738,8 +738,7 @@ static void ecryptfs_free_kmem_caches(void) struct ecryptfs_cache_info *info; info = &ecryptfs_cache_infos[i]; - if (*(info->cache)) - kmem_cache_destroy(*(info->cache)); + kmem_cache_destroy(*(info->cache)); } } -- cgit v1.1 From 5f9f2c2abd16fcea6cf7cf87791a24687e2fc345 Mon Sep 17 00:00:00 2001 From: Wei Yuan Date: Wed, 17 Feb 2016 14:50:10 +0800 Subject: eCryptfs: fix typos in comment Signed-off-by: Weiyuan Signed-off-by: Tyler Hicks --- fs/ecryptfs/crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 0d8eb34..6a69b55 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -45,7 +45,7 @@ * ecryptfs_to_hex * @dst: Buffer to take hex character representation of contents of * src; must be at least of size (src_size * 2) - * @src: Buffer to be converted to a hex string respresentation + * @src: Buffer to be converted to a hex string representation * @src_size: number of bytes to convert */ void ecryptfs_to_hex(char *dst, char *src, size_t src_size) @@ -60,7 +60,7 @@ void ecryptfs_to_hex(char *dst, char *src, size_t src_size) * ecryptfs_from_hex * @dst: Buffer to take the bytes from src hex; must be at least of * size (src_size / 2) - * @src: Buffer to be converted from a hex string respresentation to raw value + * @src: Buffer to be converted from a hex string representation to raw value * @dst_size: size of dst buffer, or number of hex characters pairs to convert */ void ecryptfs_from_hex(char *dst, char *src, int dst_size) -- cgit v1.1 From 40f0fd372a623e8d32bae0b9361d2a7453ae7a2e Mon Sep 17 00:00:00 2001 From: Chris J Arges Date: Thu, 9 Jun 2016 15:31:29 -0500 Subject: ecryptfs: fix spelling mistakes Noticed some minor spelling errors when looking through the code. Signed-off-by: Chris J Arges Signed-off-by: Tyler Hicks --- fs/ecryptfs/crypto.c | 4 ++-- fs/ecryptfs/file.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 6a69b55..e5e29f8 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -953,7 +953,7 @@ struct ecryptfs_cipher_code_str_map_elem { }; /* Add support for additional ciphers by adding elements here. The - * cipher_code is whatever OpenPGP applicatoins use to identify the + * cipher_code is whatever OpenPGP applications use to identify the * ciphers. List in order of probability. */ static struct ecryptfs_cipher_code_str_map_elem ecryptfs_cipher_code_str_map[] = { @@ -1410,7 +1410,7 @@ int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry, * * Common entry point for reading file metadata. From here, we could * retrieve the header information from the header region of the file, - * the xattr region of the file, or some other repostory that is + * the xattr region of the file, or some other repository that is * stored separately from the file itself. The current implementation * supports retrieving the metadata information from the file contents * and from the xattr region. diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 7000b96..53d0141 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -171,7 +171,7 @@ out: /** * ecryptfs_open - * @inode: inode speciying file to open + * @inode: inode specifying file to open * @file: Structure to return filled in * * Opens the file specified by inode. @@ -240,7 +240,7 @@ out: /** * ecryptfs_dir_open - * @inode: inode speciying file to open + * @inode: inode specifying file to open * @file: Structure to return filled in * * Opens the file specified by inode. -- cgit v1.1 From 81e43960dce1c8e58e682fb3ec26c1d8f83a9afc Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sat, 25 Jun 2016 07:58:45 -0400 Subject: ALSA: hda - fix read before array start UBSAN reports the following warning from accessing path->path[-1] in set_path_power(): [ 16.078040] ================================================================================ [ 16.078124] UBSAN: Undefined behaviour in sound/pci/hda/hda_generic.c:3981:17 [ 16.078198] index -1 is out of range for type 'hda_nid_t [10]' [ 16.078270] CPU: 2 PID: 1738 Comm: modprobe Not tainted 4.7.0-rc1-wt+ #47 [ 16.078274] Hardware name: LENOVO 3443CTO/3443CTO, BIOS G6ET23WW (1.02 ) 08/14/2012 [ 16.078278] ffff8800cb246000 ffff8800cb3638b8 ffffffff815c4fe3 0000000000000032 [ 16.078286] ffff8800cb3638e0 ffffffffffffffff ffff8800cb3638d0 ffffffff8162443d [ 16.078294] ffffffffa0894200 ffff8800cb363920 ffffffff81624af7 0000000000000292 [ 16.078302] Call Trace: [ 16.078311] [] dump_stack+0x86/0xd3 [ 16.078317] [] ubsan_epilogue+0xd/0x40 [ 16.078324] [] __ubsan_handle_out_of_bounds+0x67/0x70 [ 16.078335] [] set_path_power+0x1bf/0x230 [snd_hda_codec_generic] [ 16.078344] [] add_pin_power_ctls+0x8d/0xc0 [snd_hda_codec_generic] [ 16.078352] [] ? pin_power_down_callback+0x20/0x20 [snd_hda_codec_generic] [ 16.078360] [] add_all_pin_power_ctls+0x107/0x150 [snd_hda_codec_generic] [ 16.078370] [] snd_hda_gen_parse_auto_config+0x2d73/0x49e0 [snd_hda_codec_generic] [ 16.078376] [] ? trace_hardirqs_on_caller+0x1b0/0x2c0 [ 16.078390] [] alc_parse_auto_config+0x147/0x310 [snd_hda_codec_realtek] [ 16.078402] [] patch_alc269+0x23a/0x560 [snd_hda_codec_realtek] [ 16.078417] [] hda_codec_driver_probe+0xa4/0x1a0 [snd_hda_codec] [ 16.078424] [] driver_probe_device+0x101/0x380 [ 16.078430] [] __driver_attach+0xb9/0x100 [ 16.078438] [] ? driver_probe_device+0x380/0x380 [ 16.078444] [] bus_for_each_dev+0x70/0xc0 [ 16.078449] [] driver_attach+0x27/0x50 [ 16.078454] [] bus_add_driver+0x166/0x2c0 [ 16.078460] [] ? 0xffffffffa0369000 [ 16.078465] [] driver_register+0x7d/0x130 [ 16.078477] [] __hda_codec_driver_register+0x6f/0x90 [snd_hda_codec] [ 16.078488] [] realtek_driver_init+0x1e/0x1000 [snd_hda_codec_realtek] [ 16.078493] [] do_one_initcall+0x4e/0x1d0 [ 16.078499] [] ? rcu_read_lock_sched_held+0x6d/0x80 [ 16.078504] [] ? kmem_cache_alloc_trace+0x391/0x560 [ 16.078510] [] ? do_init_module+0x28/0x273 [ 16.078515] [] do_init_module+0x9b/0x273 [ 16.078522] [] load_module+0x20b2/0x3410 [ 16.078527] [] ? m_show+0x210/0x210 [ 16.078533] [] ? kernel_read+0x66/0xe0 [ 16.078541] [] SYSC_finit_module+0xba/0xc0 [ 16.078547] [] SyS_finit_module+0xe/0x10 [ 16.078552] [] entry_SYSCALL_64_fastpath+0x1f/0xbd [ 16.078556] ================================================================================ Fix by checking path->depth before use. Signed-off-by: Bob Copeland Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_generic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 320445f..79c7b34 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -3977,6 +3977,8 @@ static hda_nid_t set_path_power(struct hda_codec *codec, hda_nid_t nid, for (n = 0; n < spec->paths.used; n++) { path = snd_array_elem(&spec->paths, n); + if (!path->depth) + continue; if (path->path[0] == nid || path->path[path->depth - 1] == nid) { bool pin_old = path->pin_enabled; -- cgit v1.1 From 18f5839932e5c8c019f394d433dc5efcd6ae737a Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 23 Jun 2016 22:07:03 +0530 Subject: ASoC: Intel: atom: fix missing breaks that would cause the wrong operation to execute Now we correctly error an attempt to execute an unsupported operation. Signed-off-by: Alan Cox Signed-off-by: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/atom/sst-mfld-platform-compress.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/soc/intel/atom/sst-mfld-platform-compress.c b/sound/soc/intel/atom/sst-mfld-platform-compress.c index 3951689..1bead81 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-compress.c +++ b/sound/soc/intel/atom/sst-mfld-platform-compress.c @@ -182,24 +182,29 @@ static int sst_platform_compr_trigger(struct snd_compr_stream *cstream, int cmd) case SNDRV_PCM_TRIGGER_START: if (stream->compr_ops->stream_start) return stream->compr_ops->stream_start(sst->dev, stream->id); + break; case SNDRV_PCM_TRIGGER_STOP: if (stream->compr_ops->stream_drop) return stream->compr_ops->stream_drop(sst->dev, stream->id); + break; case SND_COMPR_TRIGGER_DRAIN: if (stream->compr_ops->stream_drain) return stream->compr_ops->stream_drain(sst->dev, stream->id); + break; case SND_COMPR_TRIGGER_PARTIAL_DRAIN: if (stream->compr_ops->stream_partial_drain) return stream->compr_ops->stream_partial_drain(sst->dev, stream->id); + break; case SNDRV_PCM_TRIGGER_PAUSE_PUSH: if (stream->compr_ops->stream_pause) return stream->compr_ops->stream_pause(sst->dev, stream->id); + break; case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: if (stream->compr_ops->stream_pause_release) return stream->compr_ops->stream_pause_release(sst->dev, stream->id); - default: - return -EINVAL; + break; } + return -EINVAL; } static int sst_platform_compr_pointer(struct snd_compr_stream *cstream, -- cgit v1.1 From 7dd4912594daf769a46744848b05bd5bc6d62469 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jun 2016 15:53:54 +0200 Subject: sched/fair: Fix effective_load() to consistently use smoothed load Starting with the following commit: fde7d22e01aa ("sched/fair: Fix overly small weight for interactive group entities") calc_tg_weight() doesn't compute the right value as expected by effective_load(). The difference is in the 'correction' term. In order to ensure \Sum rw_j >= rw_i we cannot use tg->load_avg directly, since that might be lagging a correction on the current cfs_rq->avg.load_avg value. Therefore we use tg->load_avg - cfs_rq->tg_load_avg_contrib + cfs_rq->avg.load_avg. Now, per the referenced commit, calc_tg_weight() doesn't use cfs_rq->avg.load_avg, as is later used in @w, but uses cfs_rq->load.weight instead. So stop using calc_tg_weight() and do it explicitly. The effects of this bug are wake_affine() making randomly poor choices in cgroup-intense workloads. Signed-off-by: Peter Zijlstra (Intel) Cc: # v4.3+ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: fde7d22e01aa ("sched/fair: Fix overly small weight for interactive group entities") Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bdcbeea..cc48bef 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -735,8 +735,6 @@ void post_init_entity_util_avg(struct sched_entity *se) } } -static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq); -static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq); #else void init_entity_runnable_average(struct sched_entity *se) { @@ -4946,19 +4944,24 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) return wl; for_each_sched_entity(se) { - long w, W; + struct cfs_rq *cfs_rq = se->my_q; + long W, w = cfs_rq_load_avg(cfs_rq); - tg = se->my_q->tg; + tg = cfs_rq->tg; /* * W = @wg + \Sum rw_j */ - W = wg + calc_tg_weight(tg, se->my_q); + W = wg + atomic_long_read(&tg->load_avg); + + /* Ensure \Sum rw_j >= rw_i */ + W -= cfs_rq->tg_load_avg_contrib; + W += w; /* * w = rw_i + @wl */ - w = cfs_rq_load_avg(se->my_q) + wl; + w += wl; /* * wl = S * s'_i; see (2) -- cgit v1.1 From ea1dc6fc6242f991656e35e2ed3d90ec1cd13418 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jun 2016 16:11:02 +0200 Subject: sched/fair: Fix calc_cfs_shares() fixed point arithmetics width confusion Commit: fde7d22e01aa ("sched/fair: Fix overly small weight for interactive group entities") did something non-obvious but also did it buggy yet latent. The problem was exposed for real by a later commit in the v4.7 merge window: 2159197d6677 ("sched/core: Enable increased load resolution on 64-bit kernels") ... after which tg->load_avg and cfs_rq->load.weight had different units (10 bit fixed point and 20 bit fixed point resp.). Add a comment to explain the use of cfs_rq->load.weight over the 'natural' cfs_rq->avg.load_avg and add scale_load_down() to correct for the difference in unit. Since this is (now, as per a previous commit) the only user of calc_tg_weight(), collapse it. The effects of this bug should be randomly inconsistent SMP-balancing of cgroups workloads. Reported-by: Jirka Hladky Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 2159197d6677 ("sched/core: Enable increased load resolution on 64-bit kernels") Fixes: fde7d22e01aa ("sched/fair: Fix overly small weight for interactive group entities") Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index cc48bef..c8c5d2d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2497,28 +2497,22 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) #ifdef CONFIG_FAIR_GROUP_SCHED # ifdef CONFIG_SMP -static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq) +static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) { - long tg_weight; + long tg_weight, load, shares; /* - * Use this CPU's real-time load instead of the last load contribution - * as the updating of the contribution is delayed, and we will use the - * the real-time load to calc the share. See update_tg_load_avg(). + * This really should be: cfs_rq->avg.load_avg, but instead we use + * cfs_rq->load.weight, which is its upper bound. This helps ramp up + * the shares for small weight interactive tasks. */ - tg_weight = atomic_long_read(&tg->load_avg); - tg_weight -= cfs_rq->tg_load_avg_contrib; - tg_weight += cfs_rq->load.weight; - - return tg_weight; -} + load = scale_load_down(cfs_rq->load.weight); -static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) -{ - long tg_weight, load, shares; + tg_weight = atomic_long_read(&tg->load_avg); - tg_weight = calc_tg_weight(tg, cfs_rq); - load = cfs_rq->load.weight; + /* Ensure tg_weight >= load */ + tg_weight -= cfs_rq->tg_load_avg_contrib; + tg_weight += load; shares = (tg->shares * load); if (tg_weight) @@ -2537,6 +2531,7 @@ static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) return tg->shares; } # endif /* CONFIG_SMP */ + static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight) { -- cgit v1.1 From 9c6795a9b3cbb56a9fbfaf43909c5c22999ba317 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 27 Jun 2016 21:06:51 +0200 Subject: ALSA: echoaudio: Fix memory allocation 'commpage_bak' is allocated with 'sizeof(struct echoaudio)' bytes. We then copy 'sizeof(struct comm_page)' bytes in it. On my system, smatch complains because one is 2960 and the other is 3072. This would result in memory corruption or a oops. Signed-off-by: Christophe JAILLET Cc: Signed-off-by: Takashi Iwai --- sound/pci/echoaudio/echoaudio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index 1cb85ae..286f5e3 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -2200,11 +2200,11 @@ static int snd_echo_resume(struct device *dev) u32 pipe_alloc_mask; int err; - commpage_bak = kmalloc(sizeof(struct echoaudio), GFP_KERNEL); + commpage_bak = kmalloc(sizeof(*commpage), GFP_KERNEL); if (commpage_bak == NULL) return -ENOMEM; commpage = chip->comm_page; - memcpy(commpage_bak, commpage, sizeof(struct comm_page)); + memcpy(commpage_bak, commpage, sizeof(*commpage)); err = init_hw(chip, chip->pci->device, chip->pci->subsystem_device); if (err < 0) { -- cgit v1.1 From 54e430bbd490e18ab116afa4cd90dcc45787b3df Mon Sep 17 00:00:00 2001 From: Brian King Date: Mon, 27 Jun 2016 09:09:40 -0500 Subject: ipr: Clear interrupt on croc/crocodile when running with LSI If we fall back to using LSI on the Croc or Crocodile chip we need to clear the interrupt so we don't hang the system. Cc: Tested-by: Benjamin Herrenschmidt Signed-off-by: Brian King Signed-off-by: Martin K. Petersen --- drivers/scsi/ipr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index d6a691e..d6803a9 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -10093,6 +10093,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev, ioa_cfg->intr_flag = IPR_USE_MSI; else { ioa_cfg->intr_flag = IPR_USE_LSI; + ioa_cfg->clear_isr = 1; ioa_cfg->nvectors = 1; dev_info(&pdev->dev, "Cannot enable MSI.\n"); } -- cgit v1.1 From 5e7ff2ca7f2da55fe777167849d0c93403bd0dc8 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 23 Jun 2016 15:05:26 -0400 Subject: SCSI: fix new bug in scsi_dev_info_list string matching Commit b704f70ce200 ("SCSI: fix bug in scsi_dev_info_list matching") changed the way vendor- and model-string matching was carried out in the routine that looks up entries in a SCSI devinfo list. The new matching code failed to take into account the case of a maximum-length string; in such cases it could end up testing for a terminating '\0' byte beyond the end of the memory allocated to the string. This out-of-bounds bug was detected by UBSAN. I don't know if anybody has actually encountered this bug. The symptom would be that a device entry in the blacklist might not be matched properly if it contained an 8-character vendor name or a 16-character model name. Such entries certainly exist in scsi_static_device_list. This patch fixes the problem by adding a check for a maximum-length string before the '\0' test. Signed-off-by: Alan Stern Fixes: b704f70ce200 ("SCSI: fix bug in scsi_dev_info_list matching") Tested-by: Wilfried Klaebe CC: # v4.4+ Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_devinfo.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index ff41c31..eaccd65 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -429,7 +429,7 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor, * here, and we don't know what device it is * trying to work with, leave it as-is. */ - vmax = 8; /* max length of vendor */ + vmax = sizeof(devinfo->vendor); vskip = vendor; while (vmax > 0 && *vskip == ' ') { vmax--; @@ -439,7 +439,7 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor, while (vmax > 0 && vskip[vmax - 1] == ' ') --vmax; - mmax = 16; /* max length of model */ + mmax = sizeof(devinfo->model); mskip = model; while (mmax > 0 && *mskip == ' ') { mmax--; @@ -455,10 +455,12 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor, * Behave like the older version of get_device_flags. */ if (memcmp(devinfo->vendor, vskip, vmax) || - devinfo->vendor[vmax]) + (vmax < sizeof(devinfo->vendor) && + devinfo->vendor[vmax])) continue; if (memcmp(devinfo->model, mskip, mmax) || - devinfo->model[mmax]) + (mmax < sizeof(devinfo->model) && + devinfo->model[mmax])) continue; return devinfo; } else { -- cgit v1.1 From 6858107e78b4ecb9f244db814ffbdba1b5ce759b Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Wed, 29 Jun 2016 10:27:52 +0530 Subject: ALSA: hda - Add PCI ID for Kabylake-H Kabylake-H shows up as PCI ID 0xa2f0. We missed adding this earlier with other KBL IDs. Signed-off-by: Vinod Koul Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 94089fc..e320c44 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -367,9 +367,10 @@ enum { #define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70) #define IS_KBL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa171) #define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71) +#define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0) #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) #define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \ - IS_KBL(pci) || IS_KBL_LP(pci) + IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci) static char *driver_short_names[] = { [AZX_DRIVER_ICH] = "HDA Intel", @@ -2190,6 +2191,9 @@ static const struct pci_device_id azx_ids[] = { /* Kabylake-LP */ { PCI_DEVICE(0x8086, 0x9d71), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, + /* Kabylake-H */ + { PCI_DEVICE(0x8086, 0xa2f0), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, /* Broxton-P(Apollolake) */ { PCI_DEVICE(0x8086, 0x5a98), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON }, -- cgit v1.1 From 62db7152c924e4c060e42b34a69cd39658e8a0dc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 29 Jun 2016 15:23:08 +0200 Subject: ALSA: au88x0: Fix calculation in vortex_wtdma_bufshift() vortex_wtdma_bufshift() function does calculate the page index wrongly, first masking then shift, which always results in zero. The proper computation is to first shift, then mask. Reported-by: Dan Carpenter Cc: Signed-off-by: Takashi Iwai --- sound/pci/au88x0/au88x0_core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c index 4a054d7..d3125c1 100644 --- a/sound/pci/au88x0/au88x0_core.c +++ b/sound/pci/au88x0/au88x0_core.c @@ -1444,9 +1444,8 @@ static int vortex_wtdma_bufshift(vortex_t * vortex, int wtdma) int page, p, pp, delta, i; page = - (hwread(vortex->mmio, VORTEX_WTDMA_STAT + (wtdma << 2)) & - WT_SUBBUF_MASK) - >> WT_SUBBUF_SHIFT; + (hwread(vortex->mmio, VORTEX_WTDMA_STAT + (wtdma << 2)) + >> WT_SUBBUF_SHIFT) & WT_SUBBUF_MASK; if (dma->nr_periods >= 4) delta = (page - dma->period_real) & 3; else { -- cgit v1.1 From 838086414b3cda5c592591f2b82256996306dab6 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Thu, 9 Jun 2016 19:50:13 -0400 Subject: e1000e: keep Rx/Tx HW_VLAN_CTAG in sync The bit in the e1000 driver that mentions explicitly that the hardware has no support for separate RX/TX VLAN accel toggling rings true for e1000e as well, and thus both NETIF_F_HW_VLAN_CTAG_RX and NETIF_F_HW_VLAN_CTAG_TX need to be kept in sync. Revert a portion of commit 889ad456660461 ("e1000e: keep VLAN interfaces functional after rxvlan off") since keeping the bits in sync resolves the original issue. Signed-off-by: Jarod Wilson Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 73f7452..2b2e2f8 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -154,16 +154,6 @@ void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val) writel(val, hw->hw_addr + reg); } -static bool e1000e_vlan_used(struct e1000_adapter *adapter) -{ - u16 vid; - - for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - return true; - - return false; -} - /** * e1000_regdump - register printout routine * @hw: pointer to the HW structure @@ -3453,8 +3443,7 @@ static void e1000e_set_rx_mode(struct net_device *netdev) ew32(RCTL, rctl); - if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX || - e1000e_vlan_used(adapter)) + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) e1000e_vlan_strip_enable(adapter); else e1000e_vlan_strip_disable(adapter); @@ -6926,6 +6915,14 @@ static netdev_features_t e1000_fix_features(struct net_device *netdev, if ((hw->mac.type >= e1000_pch2lan) && (netdev->mtu > ETH_DATA_LEN)) features &= ~NETIF_F_RXFCS; + /* Since there is no support for separate Rx/Tx vlan accel + * enable/disable make sure Tx flag is always in same state as Rx. + */ + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; + else + features &= ~NETIF_F_HW_VLAN_CTAG_TX; + return features; } -- cgit v1.1 From b3a3c5176c146ec7de653a3062237620464175fb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 13 May 2016 01:51:55 +0800 Subject: ixgbevf: ixgbevf_write/read_posted_mbx should use IXGBE_ERR_MBX to initialize ret_val Now ixgbevf_write/read_posted_mbx use -IXGBE_ERR_MBX as the initiative return value, but it's incorrect, cause in ixgbevf_vlan_rx_add_vid(), it use err == IXGBE_ERR_MBX, the err returned from mac.ops.set_vfta, and in ixgbevf_set_vfta_vf, it return from write/read_posted. so we should initialize err with IXGBE_ERR_MBX, instead of -IXGBE_ERR_MBX. With this fix, the other functions that called it also can work well, cause they only care about if err is 0 or not. Signed-off-by: Xin Long Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/mbx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.c b/drivers/net/ethernet/intel/ixgbevf/mbx.c index 61a80da..2819abc 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.c +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.c @@ -85,7 +85,7 @@ static s32 ixgbevf_poll_for_ack(struct ixgbe_hw *hw) static s32 ixgbevf_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size) { struct ixgbe_mbx_info *mbx = &hw->mbx; - s32 ret_val = -IXGBE_ERR_MBX; + s32 ret_val = IXGBE_ERR_MBX; if (!mbx->ops.read) goto out; @@ -111,7 +111,7 @@ out: static s32 ixgbevf_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size) { struct ixgbe_mbx_info *mbx = &hw->mbx; - s32 ret_val = -IXGBE_ERR_MBX; + s32 ret_val = IXGBE_ERR_MBX; /* exit if either we can't write or there isn't a defined timeout */ if (!mbx->ops.write || !mbx->timeout) -- cgit v1.1 From 7b427a59538a98161321aa46c13f4ea81b43f4eb Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Mon, 27 Jun 2016 16:33:24 +0800 Subject: xen-blkfront: save uncompleted reqs in blkfront_resume() Uncompleted reqs used to be 'saved and resubmitted' in blkfront_recover() during migration, but that's too late after multi-queue was introduced. After a migrate to another host (which may not have multiqueue support), the number of rings (block hardware queues) may be changed and the ring and shadow structure will also be reallocated. The blkfront_recover() then can't 'save and resubmit' the real uncompleted reqs because shadow structure have been reallocated. This patch fixes this issue by moving the 'save' logic out of blkfront_recover() to earlier place in blkfront_resume(). The 'resubmit' is not changed and still in blkfront_recover(). Signed-off-by: Bob Liu Signed-off-by: Konrad Rzeszutek Wilk Cc: stable@vger.kernel.org --- drivers/block/xen-blkfront.c | 91 +++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 51 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2e6d1e9..fcc5b4e 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -207,6 +207,9 @@ struct blkfront_info struct blk_mq_tag_set tag_set; struct blkfront_ring_info *rinfo; unsigned int nr_rings; + /* Save uncomplete reqs and bios for migration. */ + struct list_head requests; + struct bio_list bio_list; }; static unsigned int nr_minors; @@ -2002,69 +2005,22 @@ static int blkif_recover(struct blkfront_info *info) { unsigned int i, r_index; struct request *req, *n; - struct blk_shadow *copy; int rc; struct bio *bio, *cloned_bio; - struct bio_list bio_list, merge_bio; unsigned int segs, offset; int pending, size; struct split_bio *split_bio; - struct list_head requests; blkfront_gather_backend_features(info); segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; blk_queue_max_segments(info->rq, segs); - bio_list_init(&bio_list); - INIT_LIST_HEAD(&requests); for (r_index = 0; r_index < info->nr_rings; r_index++) { - struct blkfront_ring_info *rinfo; - - rinfo = &info->rinfo[r_index]; - /* Stage 1: Make a safe copy of the shadow state. */ - copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow), - GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); - if (!copy) - return -ENOMEM; - - /* Stage 2: Set up free list. */ - memset(&rinfo->shadow, 0, sizeof(rinfo->shadow)); - for (i = 0; i < BLK_RING_SIZE(info); i++) - rinfo->shadow[i].req.u.rw.id = i+1; - rinfo->shadow_free = rinfo->ring.req_prod_pvt; - rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; + struct blkfront_ring_info *rinfo = &info->rinfo[r_index]; rc = blkfront_setup_indirect(rinfo); - if (rc) { - kfree(copy); + if (rc) return rc; - } - - for (i = 0; i < BLK_RING_SIZE(info); i++) { - /* Not in use? */ - if (!copy[i].request) - continue; - - /* - * Get the bios in the request so we can re-queue them. - */ - if (copy[i].request->cmd_flags & - (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { - /* - * Flush operations don't contain bios, so - * we need to requeue the whole request - */ - list_add(©[i].request->queuelist, &requests); - continue; - } - merge_bio.head = copy[i].request->bio; - merge_bio.tail = copy[i].request->biotail; - bio_list_merge(&bio_list, &merge_bio); - copy[i].request->bio = NULL; - blk_end_request_all(copy[i].request, 0); - } - - kfree(copy); } xenbus_switch_state(info->xbdev, XenbusStateConnected); @@ -2079,7 +2035,7 @@ static int blkif_recover(struct blkfront_info *info) kick_pending_request_queues(rinfo); } - list_for_each_entry_safe(req, n, &requests, queuelist) { + list_for_each_entry_safe(req, n, &info->requests, queuelist) { /* Requeue pending requests (flush or discard) */ list_del_init(&req->queuelist); BUG_ON(req->nr_phys_segments > segs); @@ -2087,7 +2043,7 @@ static int blkif_recover(struct blkfront_info *info) } blk_mq_kick_requeue_list(info->rq); - while ((bio = bio_list_pop(&bio_list)) != NULL) { + while ((bio = bio_list_pop(&info->bio_list)) != NULL) { /* Traverse the list of pending bios and re-queue them */ if (bio_segments(bio) > segs) { /* @@ -2133,9 +2089,42 @@ static int blkfront_resume(struct xenbus_device *dev) { struct blkfront_info *info = dev_get_drvdata(&dev->dev); int err = 0; + unsigned int i, j; dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename); + bio_list_init(&info->bio_list); + INIT_LIST_HEAD(&info->requests); + for (i = 0; i < info->nr_rings; i++) { + struct blkfront_ring_info *rinfo = &info->rinfo[i]; + struct bio_list merge_bio; + struct blk_shadow *shadow = rinfo->shadow; + + for (j = 0; j < BLK_RING_SIZE(info); j++) { + /* Not in use? */ + if (!shadow[j].request) + continue; + + /* + * Get the bios in the request so we can re-queue them. + */ + if (shadow[j].request->cmd_flags & + (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { + /* + * Flush operations don't contain bios, so + * we need to requeue the whole request + */ + list_add(&shadow[j].request->queuelist, &info->requests); + continue; + } + merge_bio.head = shadow[j].request->bio; + merge_bio.tail = shadow[j].request->biotail; + bio_list_merge(&info->bio_list, &merge_bio); + shadow[j].request->bio = NULL; + blk_mq_end_request(shadow[j].request, 0); + } + } + blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); err = negotiate_mq(info); -- cgit v1.1 From 4f14f5c11db161ab89b02f7196496ca32ca5dbf8 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Sat, 25 Jun 2016 07:59:22 +0300 Subject: ASoC: fsl_ssi: Fix number of words per frame for I2S-slave mode The i.MX51 datasheet says: Chapter 56.1.2.4 I2S Mode ... When I2S modes are entered (I2S master (01) or I2S slave (10)), the following settings are recommended: ... - TX Frame Rate should be 2 i.e. (STCCR[12:8] = 1) - RX Frame Rate should be 2 i.e. (SRCCR[12:8] = 1) Chapter 56.3.3.12 SSI Transmit and Receive Clock Control Registers (STCCR & SRCCR) ... Bits 12-8 DC4-DC0 Frame Rate Divider Control. These bits are used to control the divide ratio for the programmable frame rate dividers. The divide ratio works on the word clock. In Normal mode, this ratio determines the word transfer rate. In Network mode, this ratio sets the number of words per frame. The divide ratio ranges from 1 to 32 in Normal mode and from 2 to 32 in Network mode. In Normal mode, a divide ratio of 1 (DC=00000) provides continuous periodic data word transfer. A bit-length frame sync must be used in this case. Function fsl_ssi_hw_params() setup Normal mode for MONO output, so with DC=0, SSI enters to continuous periodic data word transfer. To fix this, setup DC for any I2S mode. Patch has tested on custom board based on Digi CCMX-51 module (i.MX51). Signed-off-by: Alexander Shiyan Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 632ecc0..bedec4a 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -952,16 +952,16 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, ssi_private->i2s_mode = CCSR_SSI_SCR_NET; switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: + regmap_update_bits(regs, CCSR_SSI_STCCR, + CCSR_SSI_SxCCR_DC_MASK, + CCSR_SSI_SxCCR_DC(2)); + regmap_update_bits(regs, CCSR_SSI_SRCCR, + CCSR_SSI_SxCCR_DC_MASK, + CCSR_SSI_SxCCR_DC(2)); switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBM_CFS: case SND_SOC_DAIFMT_CBS_CFS: ssi_private->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_MASTER; - regmap_update_bits(regs, CCSR_SSI_STCCR, - CCSR_SSI_SxCCR_DC_MASK, - CCSR_SSI_SxCCR_DC(2)); - regmap_update_bits(regs, CCSR_SSI_SRCCR, - CCSR_SSI_SxCCR_DC_MASK, - CCSR_SSI_SxCCR_DC(2)); break; case SND_SOC_DAIFMT_CBM_CFM: ssi_private->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_SLAVE; -- cgit v1.1 From f8608985f851c917b3884b692d8e326b0210d34e Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 18 May 2016 16:16:51 +0200 Subject: configfs: Remove ppos increment in configfs_write_bin_file The simple_write_to_buffer() already increments the @ppos on success, see fs/libfs.c simple_write_to_buffer() comment: " On success, the number of bytes written is returned and the offset @ppos advanced by this number, or negative value is returned on error. " If the configfs_write_bin_file() is invoked with @count smaller than the total length of the written binary file, it will be invoked multiple times. Since configfs_write_bin_file() increments @ppos on success, after calling simple_write_to_buffer(), the @ppos is incremented twice. Subsequent invocation of configfs_write_bin_file() will result in the next piece of data being written to the offset twice as long as the length of the previous write, thus creating buffer with "holes" in it. The simple testcase using DTO follows: $ mkdir /sys/kernel/config/device-tree/overlays/1 $ dd bs=1 if=foo.dtbo of=/sys/kernel/config/device-tree/overlays/1/dtbo Without this patch, the testcase will result in twice as big buffer in the kernel, which is then passed to the cfs_overlay_item_dtbo_write() . Signed-off-by: Marek Vasut Cc: Geert Uytterhoeven Cc: Christoph Hellwig Cc: Pantelis Antoniou --- fs/configfs/file.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 33b7ee3..bbc1252 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -357,8 +357,6 @@ configfs_write_bin_file(struct file *file, const char __user *buf, len = simple_write_to_buffer(buffer->bin_buffer, buffer->bin_buffer_size, ppos, buf, count); - if (len > 0) - *ppos += len; out: mutex_unlock(&buffer->mutex); return len; -- cgit v1.1 From fedbb6b4ff341c1e2120f4ffbf367fd78ac3e8f3 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Wed, 29 Jun 2016 21:47:03 +0300 Subject: ipv4: Fix ip_skb_dst_mtu to use the sk passed by ip_finish_output ip_skb_dst_mtu uses skb->sk, assuming it is an AF_INET socket (e.g. it calls ip_sk_use_pmtu which casts sk as an inet_sk). However, in the case of UDP tunneling, the skb->sk is not necessarily an inet socket (could be AF_PACKET socket, or AF_UNSPEC if arriving from tun/tap). OTOH, the sk passed as an argument throughout IP stack's output path is the one which is of PMTU interest: - In case of local sockets, sk is same as skb->sk; - In case of a udp tunnel, sk is the tunneling socket. Fix, by passing ip_finish_output's sk to ip_skb_dst_mtu. This augments 7026b1ddb6 'netfilter: Pass socket pointer down through okfn().' Signed-off-by: Shmulik Ladkani Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip.h | 5 ++--- net/bridge/br_netfilter_hooks.c | 2 +- net/ipv4/ip_output.c | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 37165fb..08f36cd 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -313,10 +313,9 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, return min(dst->dev->mtu, IP_MAX_MTU); } -static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb) +static inline unsigned int ip_skb_dst_mtu(struct sock *sk, + const struct sk_buff *skb) { - struct sock *sk = skb->sk; - if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) { bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 2d25979..77e7f69 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -700,7 +700,7 @@ static int br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)) { - unsigned int mtu = ip_skb_dst_mtu(skb); + unsigned int mtu = ip_skb_dst_mtu(sk, skb); struct iphdr *iph = ip_hdr(skb); if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 124bf0a..4bd4921 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -271,7 +271,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk return dst_output(net, sk, skb); } #endif - mtu = ip_skb_dst_mtu(skb); + mtu = ip_skb_dst_mtu(sk, skb); if (skb_is_gso(skb)) return ip_finish_output_gso(net, sk, skb, mtu); @@ -541,7 +541,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, iph = ip_hdr(skb); - mtu = ip_skb_dst_mtu(skb); + mtu = ip_skb_dst_mtu(sk, skb); if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu) mtu = IPCB(skb)->frag_max_size; -- cgit v1.1 From 43daa96b166c3cf5ff30dfac0c5efa2620e4beab Mon Sep 17 00:00:00 2001 From: Soohoon Lee Date: Wed, 29 Jun 2016 15:07:21 -0400 Subject: usbnet: Stop RX Q on MTU change When MTU is changed unlink_urbs() flushes RX Q but mean while usbnet_bh() can fill up the Q at the same time. Depends on which HCD is down there unlink takes long time then the flush never ends. Signed-off-by: Soohoon Lee Reviewed-by: Kimball Murray Signed-off-by: David S. Miller --- drivers/net/usb/usbnet.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 61ba464..6086a01 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -395,8 +395,11 @@ int usbnet_change_mtu (struct net_device *net, int new_mtu) dev->hard_mtu = net->mtu + net->hard_header_len; if (dev->rx_urb_size == old_hard_mtu) { dev->rx_urb_size = dev->hard_mtu; - if (dev->rx_urb_size > old_rx_urb_size) + if (dev->rx_urb_size > old_rx_urb_size) { + usbnet_pause_rx(dev); usbnet_unlink_rx_urbs(dev); + usbnet_resume_rx(dev); + } } /* max qlen depend on hard_mtu and rx_urb_size */ @@ -1508,8 +1511,9 @@ static void usbnet_bh (unsigned long param) } else if (netif_running (dev->net) && netif_device_present (dev->net) && netif_carrier_ok(dev->net) && - !timer_pending (&dev->delay) && - !test_bit (EVENT_RX_HALT, &dev->flags)) { + !timer_pending(&dev->delay) && + !test_bit(EVENT_RX_PAUSED, &dev->flags) && + !test_bit(EVENT_RX_HALT, &dev->flags)) { int temp = dev->rxq.qlen; if (temp < RX_QLEN(dev)) { -- cgit v1.1 From 0b340405fca980b12a2ddafdd4536ad6fb624755 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 20 Jun 2016 12:20:58 +0200 Subject: drm/sun4i: Report proper vblank The sun4i display engine doesn't have any vblank counter. Use the proper helper for that. Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 257d2b4..cbe4a25 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -92,7 +92,7 @@ static struct drm_driver sun4i_drv_driver = { /* Frame Buffer Operations */ /* VBlank Operations */ - .get_vblank_counter = drm_vblank_count, + .get_vblank_counter = drm_vblank_no_hw_counter, .enable_vblank = sun4i_drv_enable_vblank, .disable_vblank = sun4i_drv_disable_vblank, }; -- cgit v1.1 From 2cd368300aa5bcec40c079ee5096287847506504 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 20 Jun 2016 12:20:59 +0200 Subject: drm/sun4i: Send vblank event when the CRTC is disabled So far, we were missing to send the vblank event when disabling the CRTC, making us never report the last vblank event. This was causing a time out on the page flip, which should be solved now. Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_crtc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_crtc.c b/drivers/gpu/drm/sun4i/sun4i_crtc.c index 4182a21..41cacec 100644 --- a/drivers/gpu/drm/sun4i/sun4i_crtc.c +++ b/drivers/gpu/drm/sun4i/sun4i_crtc.c @@ -65,6 +65,14 @@ static void sun4i_crtc_disable(struct drm_crtc *crtc) DRM_DEBUG_DRIVER("Disabling the CRTC\n"); sun4i_tcon_disable(drv->tcon); + + if (crtc->state->event && !crtc->state->active) { + spin_lock_irq(&crtc->dev->event_lock); + drm_crtc_send_vblank_event(crtc, crtc->state->event); + spin_unlock_irq(&crtc->dev->event_lock); + + crtc->state->event = NULL; + } } static void sun4i_crtc_enable(struct drm_crtc *crtc) -- cgit v1.1 From 74524955556096a0b2a821a49b4d0abebad3ee16 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Thu, 16 Jun 2016 05:15:33 -0700 Subject: writeback: inode cgroup wb switch should not call ihold() Asynchronous wb switching of inodes takes an additional ref count on an inode to make sure inode remains valid until switchover is completed. However, anyone calling ihold() must already have a ref count on inode, but in this case inode->i_count may already be zero: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 917 at fs/inode.c:397 ihold+0x2b/0x30 CPU: 1 PID: 917 Comm: kworker/u4:5 Not tainted 4.7.0-rc2+ #49 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: writeback wb_workfn (flush-8:16) 0000000000000000 ffff88007ca0fb58 ffffffff805990af 0000000000000000 0000000000000000 ffff88007ca0fb98 ffffffff80268702 0000018d000004e2 ffff88007cef40e8 ffff88007c9b89a8 ffff880079e3a740 0000000000000003 Call Trace: [] dump_stack+0x4d/0x6e [] __warn+0xc2/0xe0 [] warn_slowpath_null+0x18/0x20 [] ihold+0x2b/0x30 [] inode_switch_wbs+0x11c/0x180 [] wbc_detach_inode+0x170/0x1a0 [] writeback_sb_inodes+0x21c/0x530 [] wb_writeback+0xee/0x1e0 [] wb_workfn+0xd7/0x280 [] ? try_to_wake_up+0x1b1/0x2b0 [] process_one_work+0x129/0x300 [] worker_thread+0x126/0x480 [] ? __schedule+0x1c7/0x561 [] ? process_one_work+0x300/0x300 [] kthread+0xc4/0xe0 [] ? kfree+0xc8/0x100 [] ret_from_fork+0x1f/0x40 [] ? __kthread_parkme+0x70/0x70 ---[ end trace aaefd2fd9f306bc4 ]--- Signed-off-by: Tahsin Erdogan Acked-by: Tejun Heo Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 989a2ce..fe7e83a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -483,9 +483,9 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) goto out_free; } inode->i_state |= I_WB_SWITCH; + __iget(inode); spin_unlock(&inode->i_lock); - ihold(inode); isw->inode = inode; atomic_inc(&isw_nr_in_flight); -- cgit v1.1 From 65c0554b73c920023cc8998802e508b798113b46 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 30 Jun 2016 18:11:41 +0200 Subject: x86/power/64: Fix kernel text mapping corruption during image restoration Logan Gunthorpe reports that hibernation stopped working reliably for him after commit ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata). That turns out to be a consequence of a long-standing issue with the 64-bit image restoration code on x86, which is that the temporary page tables set up by it to avoid page tables corruption when the last bits of the image kernel's memory contents are copied into their original page frames re-use the boot kernel's text mapping, but that mapping may very well get corrupted just like any other part of the page tables. Of course, if that happens, the final jump to the image kernel's entry point will go to nowhere. The exact reason why commit ab76f7b4ab23 matters here is that it sometimes causes a PMD of a large page to be split into PTEs that are allocated dynamically and get corrupted during image restoration as described above. To fix that issue note that the code copying the last bits of the image kernel's memory contents to the page frames occupied by them previoulsy doesn't use the kernel text mapping, because it runs from a special page covered by the identity mapping set up for that code from scratch. Hence, the kernel text mapping is only needed before that code starts to run and then it will only be used just for the final jump to the image kernel's entry point. Accordingly, the temporary page tables set up in swsusp_arch_resume() on x86-64 need to contain the kernel text mapping too. That mapping is only going to be used for the final jump to the image kernel, so it only needs to cover the image kernel's entry point, because the first thing the image kernel does after getting control back is to switch over to its own original page tables. Moreover, the virtual address of the image kernel's entry point in that mapping has to be the same as the one mapped by the image kernel's page tables. With that in mind, modify the x86-64's arch_hibernation_header_save() and arch_hibernation_header_restore() routines to pass the physical address of the image kernel's entry point (in addition to its virtual address) to the boot kernel (a small piece of assembly code involved in passing the entry point's virtual address to the image kernel is not necessary any more after that, so drop it). Update RESTORE_MAGIC too to reflect the image header format change. Next, in set_up_temporary_mappings(), use the physical and virtual addresses of the image kernel's entry point passed in the image header to set up a minimum kernel text mapping (using memory pages that won't be overwritten by the image kernel's memory contents) that will map those addresses to each other as appropriate. This makes the concern about the possible corruption of the original boot kernel text mapping go away and if the the minimum kernel text mapping used for the final jump marks the image kernel's entry point memory as executable, the jump to it is guaraneed to succeed. Fixes: ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata) Link: http://marc.info/?l=linux-pm&m=146372852823760&w=2 Reported-by: Logan Gunthorpe Reported-and-tested-by: Borislav Petkov Tested-by: Kees Cook Signed-off-by: Rafael J. Wysocki --- arch/x86/power/hibernate_64.c | 97 ++++++++++++++++++++++++++++++++++----- arch/x86/power/hibernate_asm_64.S | 55 ++++++++++------------ 2 files changed, 109 insertions(+), 43 deletions(-) diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 009947d..f2b5e6a 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -19,6 +19,7 @@ #include #include #include +#include /* Defined in hibernate_asm_64.S */ extern asmlinkage __visible int restore_image(void); @@ -28,6 +29,7 @@ extern asmlinkage __visible int restore_image(void); * kernel's text (this value is passed in the image header). */ unsigned long restore_jump_address __visible; +unsigned long jump_address_phys; /* * Value of the cr3 register from before the hibernation (this value is passed @@ -37,7 +39,43 @@ unsigned long restore_cr3 __visible; pgd_t *temp_level4_pgt __visible; -void *relocated_restore_code __visible; +unsigned long relocated_restore_code __visible; + +static int set_up_temporary_text_mapping(void) +{ + pmd_t *pmd; + pud_t *pud; + + /* + * The new mapping only has to cover the page containing the image + * kernel's entry point (jump_address_phys), because the switch over to + * it is carried out by relocated code running from a page allocated + * specifically for this purpose and covered by the identity mapping, so + * the temporary kernel text mapping is only needed for the final jump. + * Moreover, in that mapping the virtual address of the image kernel's + * entry point must be the same as its virtual address in the image + * kernel (restore_jump_address), so the image kernel's + * restore_registers() code doesn't find itself in a different area of + * the virtual address space after switching over to the original page + * tables used by the image kernel. + */ + pud = (pud_t *)get_safe_page(GFP_ATOMIC); + if (!pud) + return -ENOMEM; + + pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!pmd) + return -ENOMEM; + + set_pmd(pmd + pmd_index(restore_jump_address), + __pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC)); + set_pud(pud + pud_index(restore_jump_address), + __pud(__pa(pmd) | _KERNPG_TABLE)); + set_pgd(temp_level4_pgt + pgd_index(restore_jump_address), + __pgd(__pa(pud) | _KERNPG_TABLE)); + + return 0; +} static void *alloc_pgt_page(void *context) { @@ -59,9 +97,10 @@ static int set_up_temporary_mappings(void) if (!temp_level4_pgt) return -ENOMEM; - /* It is safe to reuse the original kernel mapping */ - set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map), - init_level4_pgt[pgd_index(__START_KERNEL_map)]); + /* Prepare a temporary mapping for the kernel text */ + result = set_up_temporary_text_mapping(); + if (result) + return result; /* Set up the direct mapping from scratch */ for (i = 0; i < nr_pfn_mapped; i++) { @@ -78,19 +117,50 @@ static int set_up_temporary_mappings(void) return 0; } +static int relocate_restore_code(void) +{ + pgd_t *pgd; + pud_t *pud; + + relocated_restore_code = get_safe_page(GFP_ATOMIC); + if (!relocated_restore_code) + return -ENOMEM; + + memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE); + + /* Make the page containing the relocated code executable */ + pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code); + pud = pud_offset(pgd, relocated_restore_code); + if (pud_large(*pud)) { + set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX)); + } else { + pmd_t *pmd = pmd_offset(pud, relocated_restore_code); + + if (pmd_large(*pmd)) { + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX)); + } else { + pte_t *pte = pte_offset_kernel(pmd, relocated_restore_code); + + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX)); + } + } + __flush_tlb_all(); + + return 0; +} + int swsusp_arch_resume(void) { int error; /* We have got enough memory and from now on we cannot recover */ - if ((error = set_up_temporary_mappings())) + error = set_up_temporary_mappings(); + if (error) return error; - relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC); - if (!relocated_restore_code) - return -ENOMEM; - memcpy(relocated_restore_code, &core_restore_code, - &restore_registers - &core_restore_code); + error = relocate_restore_code(); + if (error) + return error; restore_image(); return 0; @@ -109,11 +179,12 @@ int pfn_is_nosave(unsigned long pfn) struct restore_data_record { unsigned long jump_address; + unsigned long jump_address_phys; unsigned long cr3; unsigned long magic; }; -#define RESTORE_MAGIC 0x0123456789ABCDEFUL +#define RESTORE_MAGIC 0x123456789ABCDEF0UL /** * arch_hibernation_header_save - populate the architecture specific part @@ -126,7 +197,8 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) if (max_size < sizeof(struct restore_data_record)) return -EOVERFLOW; - rdr->jump_address = restore_jump_address; + rdr->jump_address = (unsigned long)&restore_registers; + rdr->jump_address_phys = __pa_symbol(&restore_registers); rdr->cr3 = restore_cr3; rdr->magic = RESTORE_MAGIC; return 0; @@ -142,6 +214,7 @@ int arch_hibernation_header_restore(void *addr) struct restore_data_record *rdr = addr; restore_jump_address = rdr->jump_address; + jump_address_phys = rdr->jump_address_phys; restore_cr3 = rdr->cr3; return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL; } diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 4400a43..3177c2b 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -44,9 +44,6 @@ ENTRY(swsusp_arch_suspend) pushfq popq pt_regs_flags(%rax) - /* save the address of restore_registers */ - movq $restore_registers, %rax - movq %rax, restore_jump_address(%rip) /* save cr3 */ movq %cr3, %rax movq %rax, restore_cr3(%rip) @@ -57,31 +54,34 @@ ENTRY(swsusp_arch_suspend) ENDPROC(swsusp_arch_suspend) ENTRY(restore_image) - /* switch to temporary page tables */ - movq $__PAGE_OFFSET, %rdx - movq temp_level4_pgt(%rip), %rax - subq %rdx, %rax - movq %rax, %cr3 - /* Flush TLB */ - movq mmu_cr4_features(%rip), %rax - movq %rax, %rdx - andq $~(X86_CR4_PGE), %rdx - movq %rdx, %cr4; # turn off PGE - movq %cr3, %rcx; # flush TLB - movq %rcx, %cr3; - movq %rax, %cr4; # turn PGE back on - /* prepare to jump to the image kernel */ - movq restore_jump_address(%rip), %rax - movq restore_cr3(%rip), %rbx + movq restore_jump_address(%rip), %r8 + movq restore_cr3(%rip), %r9 + + /* prepare to switch to temporary page tables */ + movq temp_level4_pgt(%rip), %rax + movq mmu_cr4_features(%rip), %rbx /* prepare to copy image data to their original locations */ movq restore_pblist(%rip), %rdx + + /* jump to relocated restore code */ movq relocated_restore_code(%rip), %rcx jmpq *%rcx /* code below has been relocated to a safe page */ ENTRY(core_restore_code) + /* switch to temporary page tables */ + movq $__PAGE_OFFSET, %rcx + subq %rcx, %rax + movq %rax, %cr3 + /* flush TLB */ + movq %rbx, %rcx + andq $~(X86_CR4_PGE), %rcx + movq %rcx, %cr4; # turn off PGE + movq %cr3, %rcx; # flush TLB + movq %rcx, %cr3; + movq %rbx, %cr4; # turn PGE back on .Lloop: testq %rdx, %rdx jz .Ldone @@ -96,24 +96,17 @@ ENTRY(core_restore_code) /* progress to the next pbe */ movq pbe_next(%rdx), %rdx jmp .Lloop + .Ldone: /* jump to the restore_registers address from the image header */ - jmpq *%rax - /* - * NOTE: This assumes that the boot kernel's text mapping covers the - * image kernel's page containing restore_registers and the address of - * this page is the same as in the image kernel's text mapping (it - * should always be true, because the text mapping is linear, starting - * from 0, and is supposed to cover the entire kernel text for every - * kernel). - * - * code below belongs to the image kernel - */ + jmpq *%r8 + /* code below belongs to the image kernel */ + .align PAGE_SIZE ENTRY(restore_registers) FRAME_BEGIN /* go back to the original page tables */ - movq %rbx, %cr3 + movq %r9, %cr3 /* Flush TLB, including "global" things (vmalloc) */ movq mmu_cr4_features(%rip), %rax -- cgit v1.1 From 1ead852dd88779eda12cb09cc894a03d9abfe1ec Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 16 Jun 2016 19:13:49 +0200 Subject: x86/amd_nb: Fix boot crash on non-AMD systems Fix boot crash that triggers if this driver is built into a kernel and run on non-AMD systems. AMD northbridges users call amd_cache_northbridges() and it returns a negative value to signal that we weren't able to cache/detect any northbridges on the system. At least, it should do so as all its callers expect it to do so. But it does return a negative value only when kmalloc() fails. Fix it to return -ENODEV if there are no NBs cached as otherwise, amd_nb users like amd64_edac, for example, which relies on it to know whether it should load or not, gets loaded on systems like Intel Xeons where it shouldn't. Reported-and-tested-by: Tony Battersby Signed-off-by: Borislav Petkov Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1466097230-5333-2-git-send-email-bp@alien8.de Link: https://lkml.kernel.org/r/5761BEB0.9000807@cybernetics.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_nb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index a147e67..e991d5c 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -71,8 +71,8 @@ int amd_cache_northbridges(void) while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL) i++; - if (i == 0) - return 0; + if (!i) + return -ENODEV; nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL); if (!nb) -- cgit v1.1 From c76a093dc1415d364020b8b33f1e194ef4d26fd0 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Fri, 1 Jul 2016 12:46:01 +0900 Subject: x86/Documentation: Fix various typos in Documentation/x86/ files Signed-off-by: Masanari Iida Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: corbet@lwn.net Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/20160701034601.30308-1-standby24x7@gmail.com Signed-off-by: Ingo Molnar --- Documentation/x86/intel_mpx.txt | 6 +++--- Documentation/x86/tlb.txt | 4 ++-- Documentation/x86/x86_64/machinecheck | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/x86/intel_mpx.txt b/Documentation/x86/intel_mpx.txt index 1a5a121..85d0549 100644 --- a/Documentation/x86/intel_mpx.txt +++ b/Documentation/x86/intel_mpx.txt @@ -45,7 +45,7 @@ is how we expect the compiler, application and kernel to work together. MPX-instrumented. 3) The kernel detects that the CPU has MPX, allows the new prctl() to succeed, and notes the location of the bounds directory. Userspace is - expected to keep the bounds directory at that locationWe note it + expected to keep the bounds directory at that location. We note it instead of reading it each time because the 'xsave' operation needed to access the bounds directory register is an expensive operation. 4) If the application needs to spill bounds out of the 4 registers, it @@ -167,7 +167,7 @@ If a #BR is generated due to a bounds violation caused by MPX. We need to decode MPX instructions to get violation address and set this address into extended struct siginfo. -The _sigfault feild of struct siginfo is extended as follow: +The _sigfault field of struct siginfo is extended as follow: 87 /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ 88 struct { @@ -240,5 +240,5 @@ them at the same bounds table. This is allowed architecturally. See more information "Intel(R) Architecture Instruction Set Extensions Programming Reference" (9.3.4). -However, if users did this, the kernel might be fooled in to unmaping an +However, if users did this, the kernel might be fooled in to unmapping an in-use bounds table since it does not recognize sharing. diff --git a/Documentation/x86/tlb.txt b/Documentation/x86/tlb.txt index 39d1723..6a0607b 100644 --- a/Documentation/x86/tlb.txt +++ b/Documentation/x86/tlb.txt @@ -5,7 +5,7 @@ memory, it has two choices: from areas other than the one we are trying to flush will be destroyed and must be refilled later, at some cost. 2. Use the invlpg instruction to invalidate a single page at a - time. This could potentialy cost many more instructions, but + time. This could potentially cost many more instructions, but it is a much more precise operation, causing no collateral damage to other TLB entries. @@ -19,7 +19,7 @@ Which method to do depends on a few things: work. 3. The size of the TLB. The larger the TLB, the more collateral damage we do with a full flush. So, the larger the TLB, the - more attrative an individual flush looks. Data and + more attractive an individual flush looks. Data and instructions have separate TLBs, as do different page sizes. 4. The microarchitecture. The TLB has become a multi-level cache on modern CPUs, and the global flushes have become more diff --git a/Documentation/x86/x86_64/machinecheck b/Documentation/x86/x86_64/machinecheck index b1fb302..d0648a7 100644 --- a/Documentation/x86/x86_64/machinecheck +++ b/Documentation/x86/x86_64/machinecheck @@ -36,7 +36,7 @@ between all CPUs. check_interval How often to poll for corrected machine check errors, in seconds - (Note output is hexademical). Default 5 minutes. When the poller + (Note output is hexadecimal). Default 5 minutes. When the poller finds MCEs it triggers an exponential speedup (poll more often) on the polling interval. When the poller stops finding MCEs, it triggers an exponential backoff (poll less often) on the polling -- cgit v1.1 From 9216a97a12b069c62f0e927a9f54be4883648a0f Mon Sep 17 00:00:00 2001 From: Sony Chacko Date: Wed, 29 Jun 2016 17:51:34 -0400 Subject: qlcnic: add wmb() call in transmit data path. Call wmb() to ensure writes are complete before hardware fetches updated Tx descriptors. Signed-off-by: Sony Chacko Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 607bb7d..87c642d 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -772,6 +772,8 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) tx_ring->tx_stats.tx_bytes += skb->len; tx_ring->tx_stats.xmit_called++; + /* Ensure writes are complete before HW fetches Tx descriptors */ + wmb(); qlcnic_update_cmd_producer(tx_ring); return NETDEV_TX_OK; -- cgit v1.1 From f95ae8a0edba1cb85110df4f1c96786419a0472f Mon Sep 17 00:00:00 2001 From: hayeswang Date: Thu, 30 Jun 2016 15:33:35 +0800 Subject: r8152: clear LINK_OFF_WAKE_EN after autoresume LINK_OFF_WAKE_EN should be cleared after autoresume, otherwise after system suspend, the system would wake up when linking off occurs. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 4e257b8..d7f20a9 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2421,7 +2421,18 @@ static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable) ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML); } else { + u32 ocp_data; + __rtl_set_wol(tp, tp->saved_wolopts); + + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG); + + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34); + ocp_data &= ~LINK_OFF_WAKE_EN; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data); + + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML); + r8153_u2p3en(tp, true); r8153_u1u2en(tp, true); } -- cgit v1.1 From 3d8c4530e50dc030efcec575aa69483439fc6fda Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 30 Jun 2016 10:36:15 +0100 Subject: net: mvneta: fix open() error cleanup If mvneta_mdio_probe() fails, a kernel warning is triggered due to missing cleanup in the error path. Add the necessary cleanup. ------------[ cut here ]------------ WARNING: CPU: 1 PID: 281 at kernel/irq/manage.c:1814 __free_percpu_irq+0xfc/0x130 percpu IRQ 38 still enabled on CPU0! Modules linked in: bnep bluetooth xhci_plat_hcd xhci_hcd marvell_cesa armada_thermal des_generic ehci_orion mcp3021 spi_orion sfp mdio_i2c evbug fuse CPU: 1 PID: 281 Comm: connmand Not tainted 4.7.0-rc2+ #53 Hardware name: Marvell Armada 380/385 (Device Tree) Backtrace: [] (dump_backtrace) from [] (show_stack+0x18/0x1c) r6:60010093 r5:ffffffff r4:00000000 r3:dc8ba500 [] (show_stack) from [] (dump_stack+0xa4/0xdc) [] (dump_stack) from [] (__warn+0xd8/0x104) r6:c081e6a0 r5:00000000 r4:edfe5d50 r3:dc8ba500 [] (__warn) from [] (warn_slowpath_fmt+0x40/0x48) r10:a0010013 r8:c09356f8 r7:00000026 r6:ef11a260 r5:edd7b980 r4:ef11a200 [] (warn_slowpath_fmt) from [] (__free_percpu_irq+0xfc/0x130) r3:00000026 r2:c081e7ac [] (__free_percpu_irq) from [] (free_percpu_irq+0x48/0x74) r10:00008914 r8:00000000 r7:ffffffed r6:c09356f8 r5:00000026 r4:ef11a200 [] (free_percpu_irq) from [] (mvneta_open+0x118/0x134) r6:ffffffed r5:ef01e640 r4:ef01e000 r3:ef01e000 [] (mvneta_open) from [] (__dev_open+0xa4/0x108) r7:ef01e030 r6:c06ff3d8 r5:ffff9003 r4:ef01e000 [] (__dev_open) from [] (__dev_change_flags+0x94/0x150) r7:00001002 r6:00000001 r5:ffff9003 r4:ef01e000 [] (__dev_change_flags) from [] (dev_change_flags+0x20/0x50) r8:00000000 r7:c09334c8 r6:00001002 r5:00000148 r4:ef01e000 r3:00008914 [] (dev_change_flags) from [] (devinet_ioctl+0x6f4/0x7e0) r8:00000000 r7:c09334c8 r6:00000000 r5:ee87200c r4:00000000 r3:00008914 [] (devinet_ioctl) from [] (inet_ioctl+0x1b8/0x1c8) r10:beb4499c r9:edfe4000 r8:ecf13280 r7:c096cf00 r6:beb4499c r5:eef7c240 r4:00008914 [] (inet_ioctl) from [] (sock_ioctl+0x78/0x300) [] (sock_ioctl) from [] (do_vfs_ioctl+0x98/0xa60) r7:00000011 r6:00008914 r5:00000011 r4:c01568d0 [] (do_vfs_ioctl) from [] (SyS_ioctl+0x3c/0x60) r10:00000000 r9:edfe4000 r8:beb4499c r7:00000011 r6:00008914 r5:ecf13280 r4:ecf13280 [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x1c) r8:c0010004 r7:00000036 r6:00000011 r5:000a2978 r4:00000000 r3:00009003 ---[ end trace 711f625d5b04b3a7 ]--- Signed-off-by: Russell King Tested-by: Jon Nettleton Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index a6d26d3..d5d263b 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3458,6 +3458,8 @@ static int mvneta_open(struct net_device *dev) return 0; err_free_irq: + unregister_cpu_notifier(&pp->cpu_notifier); + on_each_cpu(mvneta_percpu_disable, pp, true); free_percpu_irq(pp->dev->irq, pp->ports); err_cleanup_txqs: mvneta_cleanup_txqs(pp); -- cgit v1.1 From fdfe3b32db70aa49d1d60a1ddd2280099174bddb Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Fri, 1 Jul 2016 09:49:05 +0800 Subject: ASoC: rt5645: fix reg-2f default value. The default value of reg-2f in codec rt5650 is 0x5002, not 0x1002. Signed-off-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 3c6594d..d70847c 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -253,7 +253,7 @@ static const struct reg_default rt5650_reg[] = { { 0x2b, 0x5454 }, { 0x2c, 0xaaa0 }, { 0x2d, 0x0000 }, - { 0x2f, 0x1002 }, + { 0x2f, 0x5002 }, { 0x31, 0x5000 }, { 0x32, 0x0000 }, { 0x33, 0x0000 }, -- cgit v1.1 From f87fda00b6ed232a817c655b8d179b48bde8fdbe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Jun 2016 16:13:41 +0200 Subject: bonding: prevent out of bound accesses ether_addr_equal_64bits() requires some care about its arguments, namely that 8 bytes might be read, even if last 2 byte values are not used. KASan detected a violation with null_mac_addr and lacpdu_mcast_addr in bond_3ad.c Same problem with mac_bcast[] and mac_v6_allmcast[] in bond_alb.c : Although the 8-byte alignment was there, KASan would detect out of bound accesses. Fixes: 815117adaf5b ("bonding: use ether_addr_equal_unaligned for bond addr compare") Fixes: bb54e58929f3 ("bonding: Verify RX LACPDU has proper dest mac-addr") Fixes: 885a136c52a8 ("bonding: use compare_ether_addr_64bits() in ALB") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Acked-by: Dmitry Vyukov Acked-by: Nikolay Aleksandrov Acked-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_3ad.c | 11 +++++++---- drivers/net/bonding/bond_alb.c | 7 ++----- include/net/bonding.h | 7 ++++++- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index ca81f46..edc70ff 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -101,11 +101,14 @@ enum ad_link_speed_type { #define MAC_ADDRESS_EQUAL(A, B) \ ether_addr_equal_64bits((const u8 *)A, (const u8 *)B) -static struct mac_addr null_mac_addr = { { 0, 0, 0, 0, 0, 0 } }; +static const u8 null_mac_addr[ETH_ALEN + 2] __long_aligned = { + 0, 0, 0, 0, 0, 0 +}; static u16 ad_ticks_per_sec; static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000; -static const u8 lacpdu_mcast_addr[ETH_ALEN] = MULTICAST_LACPDU_ADDR; +static const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned = + MULTICAST_LACPDU_ADDR; /* ================= main 802.3ad protocol functions ================== */ static int ad_lacpdu_send(struct port *port); @@ -1739,7 +1742,7 @@ static void ad_clear_agg(struct aggregator *aggregator) aggregator->is_individual = false; aggregator->actor_admin_aggregator_key = 0; aggregator->actor_oper_aggregator_key = 0; - aggregator->partner_system = null_mac_addr; + eth_zero_addr(aggregator->partner_system.mac_addr_value); aggregator->partner_system_priority = 0; aggregator->partner_oper_aggregator_key = 0; aggregator->receive_state = 0; @@ -1761,7 +1764,7 @@ static void ad_initialize_agg(struct aggregator *aggregator) if (aggregator) { ad_clear_agg(aggregator); - aggregator->aggregator_mac_address = null_mac_addr; + eth_zero_addr(aggregator->aggregator_mac_address.mac_addr_value); aggregator->aggregator_identifier = 0; aggregator->slave = NULL; } diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index c5ac160..551f0f8 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -42,13 +42,10 @@ -#ifndef __long_aligned -#define __long_aligned __attribute__((aligned((sizeof(long))))) -#endif -static const u8 mac_bcast[ETH_ALEN] __long_aligned = { +static const u8 mac_bcast[ETH_ALEN + 2] __long_aligned = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; -static const u8 mac_v6_allmcast[ETH_ALEN] __long_aligned = { +static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x01 }; static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC; diff --git a/include/net/bonding.h b/include/net/bonding.h index 791800d..6360c25 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -34,6 +34,9 @@ #define BOND_DEFAULT_MIIMON 100 +#ifndef __long_aligned +#define __long_aligned __attribute__((aligned((sizeof(long))))) +#endif /* * Less bad way to call ioctl from within the kernel; this needs to be * done some other way to get the call out of interrupt context. @@ -138,7 +141,9 @@ struct bond_params { struct reciprocal_value reciprocal_packets_per_slave; u16 ad_actor_sys_prio; u16 ad_user_port_key; - u8 ad_actor_system[ETH_ALEN]; + + /* 2 bytes of padding : see ether_addr_equal_64bits() */ + u8 ad_actor_system[ETH_ALEN + 2]; }; struct bond_parm_tbl { -- cgit v1.1 From 0d834442cc247c7b3f3bd6019512ae03e96dd99a Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 30 Jun 2016 17:34:38 +0300 Subject: net/mlx5: Fix teardown errors that happen in pci error handler In case of internal error state we will simulate the commands status through the return value translation function, but we need to simulate all the teardown fw commands as successful so we will not have fw command failure prints. This also fix memory leaks that happen because we skip teardown stages due to failed fw commands. Fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core driver') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 0b49862..fda43bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -295,6 +295,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_FLOW_GROUP: case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_2ERR_QP: + case MLX5_CMD_OP_2RST_QP: + case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -321,8 +327,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: case MLX5_CMD_OP_SQERR2RTS_QP: - case MLX5_CMD_OP_2ERR_QP: - case MLX5_CMD_OP_2RST_QP: case MLX5_CMD_OP_QUERY_QP: case MLX5_CMD_OP_SQD_RTS_QP: case MLX5_CMD_OP_INIT2INIT_QP: @@ -342,7 +346,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT: - case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_ROCE_ADDRESS: case MLX5_CMD_OP_SET_ROCE_ADDRESS: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: @@ -390,11 +393,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_CREATE_RQT: case MLX5_CMD_OP_MODIFY_RQT: case MLX5_CMD_OP_QUERY_RQT: + case MLX5_CMD_OP_CREATE_FLOW_TABLE: case MLX5_CMD_OP_QUERY_FLOW_TABLE: case MLX5_CMD_OP_CREATE_FLOW_GROUP: case MLX5_CMD_OP_QUERY_FLOW_GROUP: - case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: case MLX5_CMD_OP_QUERY_FLOW_COUNTER: -- cgit v1.1 From c1d4d2e92ad670168a17a57dfa182a5a5baa72d4 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 30 Jun 2016 17:34:39 +0300 Subject: net/mlx5: Avoid calling sleeping function by the health poll thread In internal error state the health poll thread will eventually call synchronize_irq() (to safely trigger command completions) which might sleep, so we are calling sleeping function from atomic context which is invalid. Here we move trigger_cmd_completions(dev) to enter error state which is the earliest stage in error state handling. This way we won't need to wait for next health poll to trigger command completions and will solve the scheduling while atomic issue. mlx5_enter_error_state can be called from two contexts, protect it with dev->intf_state_lock Fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core driver') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 42d16b9..96a5946 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -108,15 +108,21 @@ static int in_fatal(struct mlx5_core_dev *dev) void mlx5_enter_error_state(struct mlx5_core_dev *dev) { + mutex_lock(&dev->intf_state_mutex); if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) - return; + goto unlock; mlx5_core_err(dev, "start\n"); - if (pci_channel_offline(dev->pdev) || in_fatal(dev)) + if (pci_channel_offline(dev->pdev) || in_fatal(dev)) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + trigger_cmd_completions(dev); + } mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0); mlx5_core_err(dev, "end\n"); + +unlock: + mutex_unlock(&dev->intf_state_mutex); } static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) @@ -245,7 +251,6 @@ static void poll_health(unsigned long data) u32 count; if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - trigger_cmd_completions(dev); mod_timer(&health->timer, get_next_poll_jiffies()); return; } -- cgit v1.1 From 5adff6a0886273eb426360400f120443833760a3 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 30 Jun 2016 17:34:40 +0300 Subject: net/mlx5: Fix incorrect page count when in internal error Change page cleanup flow when in internal error to properly decrement the page counts when reclaiming pages. The prevents timing out waiting for extra pages that were actually cleaned up previously. fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core driver') Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 63 +++++++++++++++------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 9eeee05..32dea35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -345,7 +345,6 @@ retry: func_id, npages, err); goto out_4k; } - dev->priv.fw_pages += npages; err = mlx5_cmd_status_to_err(&out.hdr); if (err) { @@ -373,6 +372,33 @@ out_free: return err; } +static int reclaim_pages_cmd(struct mlx5_core_dev *dev, + struct mlx5_manage_pages_inbox *in, int in_size, + struct mlx5_manage_pages_outbox *out, int out_size) +{ + struct fw_page *fwp; + struct rb_node *p; + u32 npages; + u32 i = 0; + + if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) + return mlx5_cmd_exec_check_status(dev, (u32 *)in, in_size, + (u32 *)out, out_size); + + npages = be32_to_cpu(in->num_entries); + + p = rb_first(&dev->priv.page_root); + while (p && i < npages) { + fwp = rb_entry(p, struct fw_page, rb_node); + out->pas[i] = cpu_to_be64(fwp->addr); + p = rb_next(p); + i++; + } + + out->num_entries = cpu_to_be32(i); + return 0; +} + static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, int *nclaimed) { @@ -398,15 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, in.func_id = cpu_to_be16(func_id); in.num_entries = cpu_to_be32(npages); mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); + err = reclaim_pages_cmd(dev, &in, sizeof(in), out, outlen); if (err) { - mlx5_core_err(dev, "failed reclaiming pages\n"); - goto out_free; - } - dev->priv.fw_pages -= npages; - - if (out->hdr.status) { - err = mlx5_cmd_status_to_err(&out->hdr); + mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err); goto out_free; } @@ -417,13 +437,15 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, err = -EINVAL; goto out_free; } - if (nclaimed) - *nclaimed = num_claimed; for (i = 0; i < num_claimed; i++) { addr = be64_to_cpu(out->pas[i]); free_4k(dev, addr); } + + if (nclaimed) + *nclaimed = num_claimed; + dev->priv.fw_pages -= num_claimed; if (func_id) dev->priv.vfs_pages -= num_claimed; @@ -514,14 +536,10 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) p = rb_first(&dev->priv.page_root); if (p) { fwp = rb_entry(p, struct fw_page, rb_node); - if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - free_4k(dev, fwp->addr); - nclaimed = 1; - } else { - err = reclaim_pages(dev, fwp->func_id, - optimal_reclaimed_pages(), - &nclaimed); - } + err = reclaim_pages(dev, fwp->func_id, + optimal_reclaimed_pages(), + &nclaimed); + if (err) { mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", err); @@ -536,6 +554,13 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) } } while (p); + WARN(dev->priv.fw_pages, + "FW pages counter is %d after reclaiming all pages\n", + dev->priv.fw_pages); + WARN(dev->priv.vfs_pages, + "VFs FW pages counter is %d after reclaiming all pages\n", + dev->priv.vfs_pages); + return 0; } -- cgit v1.1 From d57847dc4177c6fd8d950cb533f5edf0eab45b11 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 30 Jun 2016 17:34:41 +0300 Subject: net/mlx5: Fix wait_vital for VFs and remove fixed sleep The device ID for VFs is in a different location than PFs. This results in the poll always timing out for VFs. There's no good way to read the VF device ID without using the PF's configuration space. Switch to waiting for the health poll to start incrementing. Also remove the 1s sleep at the beginning. fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core driver') Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 41 ++++++++++---------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c65f4a1..6695893 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1422,46 +1422,31 @@ void mlx5_disable_device(struct mlx5_core_dev *dev) mlx5_pci_err_detected(dev->pdev, 0); } -/* wait for the device to show vital signs. For now we check - * that we can read the device ID and that the health buffer - * shows a non zero value which is different than 0xffffffff +/* wait for the device to show vital signs by waiting + * for the health counter to start counting. */ -static void wait_vital(struct pci_dev *pdev) +static int wait_vital(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_core_health *health = &dev->priv.health; const int niter = 100; + u32 last_count = 0; u32 count; - u16 did; int i; - /* Wait for firmware to be ready after reset */ - msleep(1000); - for (i = 0; i < niter; i++) { - if (pci_read_config_word(pdev, 2, &did)) { - dev_warn(&pdev->dev, "failed reading config word\n"); - break; - } - if (did == pdev->device) { - dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i); - break; - } - msleep(50); - } - if (i == niter) - dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__); - for (i = 0; i < niter; i++) { count = ioread32be(health->health_counter); if (count && count != 0xffffffff) { - dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i); - break; + if (last_count && last_count != count) { + dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i); + return 0; + } + last_count = count; } msleep(50); } - if (i == niter) - dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__); + return -ETIMEDOUT; } static void mlx5_pci_resume(struct pci_dev *pdev) @@ -1473,7 +1458,11 @@ static void mlx5_pci_resume(struct pci_dev *pdev) dev_info(&pdev->dev, "%s was called\n", __func__); pci_save_state(pdev); - wait_vital(pdev); + err = wait_vital(pdev); + if (err) { + dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__); + return; + } err = mlx5_load_one(dev, priv); if (err) -- cgit v1.1 From 9cba4ebcf374c3772f6eb61f2d065294b2451b49 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 30 Jun 2016 17:34:42 +0300 Subject: net/mlx5: Fix potential deadlock in command mode change Call command completion handler in case of timeout when working in interrupts mode. Avoid flushing the commands workqueue after acquiring the semaphores to prevent a potential deadlock. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 79 +++++++++++---------------- 1 file changed, 33 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index fda43bc..74067f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -710,13 +710,13 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) if (cmd->mode == CMD_MODE_POLLING) { wait_for_completion(&ent->done); - err = ent->ret; - } else { - if (!wait_for_completion_timeout(&ent->done, timeout)) - err = -ETIMEDOUT; - else - err = 0; + } else if (!wait_for_completion_timeout(&ent->done, timeout)) { + ent->ret = -ETIMEDOUT; + mlx5_cmd_comp_handler(dev, 1UL << ent->idx); } + + err = ent->ret; + if (err == -ETIMEDOUT) { mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", mlx5_command_str(msg_to_opcode(ent->in)), @@ -774,28 +774,26 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, goto out_free; } - if (!callback) { - err = wait_func(dev, ent); - if (err == -ETIMEDOUT) - goto out; - - ds = ent->ts2 - ent->ts1; - op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode); - if (op < ARRAY_SIZE(cmd->stats)) { - stats = &cmd->stats[op]; - spin_lock_irq(&stats->lock); - stats->sum += ds; - ++stats->n; - spin_unlock_irq(&stats->lock); - } - mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, - "fw exec time for %s is %lld nsec\n", - mlx5_command_str(op), ds); - *status = ent->status; - free_cmd(ent); - } + if (callback) + goto out; - return err; + err = wait_func(dev, ent); + if (err == -ETIMEDOUT) + goto out_free; + + ds = ent->ts2 - ent->ts1; + op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode); + if (op < ARRAY_SIZE(cmd->stats)) { + stats = &cmd->stats[op]; + spin_lock_irq(&stats->lock); + stats->sum += ds; + ++stats->n; + spin_unlock_irq(&stats->lock); + } + mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, + "fw exec time for %s is %lld nsec\n", + mlx5_command_str(op), ds); + *status = ent->status; out_free: free_cmd(ent); @@ -1185,41 +1183,30 @@ err_dbg: return err; } -void mlx5_cmd_use_events(struct mlx5_core_dev *dev) +static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) { struct mlx5_cmd *cmd = &dev->cmd; int i; for (i = 0; i < cmd->max_reg_cmds; i++) down(&cmd->sem); - down(&cmd->pages_sem); - flush_workqueue(cmd->wq); - - cmd->mode = CMD_MODE_EVENTS; + cmd->mode = mode; up(&cmd->pages_sem); for (i = 0; i < cmd->max_reg_cmds; i++) up(&cmd->sem); } -void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) +void mlx5_cmd_use_events(struct mlx5_core_dev *dev) { - struct mlx5_cmd *cmd = &dev->cmd; - int i; - - for (i = 0; i < cmd->max_reg_cmds; i++) - down(&cmd->sem); - - down(&cmd->pages_sem); - - flush_workqueue(cmd->wq); - cmd->mode = CMD_MODE_POLLING; + mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS); +} - up(&cmd->pages_sem); - for (i = 0; i < cmd->max_reg_cmds; i++) - up(&cmd->sem); +void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) +{ + mlx5_cmd_change_mod(dev, CMD_MODE_POLLING); } static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) -- cgit v1.1 From 65ee67084589c1783a74b4a4a5db38d7264ec8b5 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 30 Jun 2016 17:34:43 +0300 Subject: net/mlx5: Add timeout handle to commands with callback The current implementation does not handle timeout in case of command with callback request, and this can lead to deadlock if the command doesn't get fw response. Add delayed callback timeout work before posting the command to fw. In case of real fw command completion we will cancel the delayed work. In case of fw command timeout the callback timeout handler will be called and it will simulate fw completion with timeout error. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 38 ++++++++++++++++++++++----- include/linux/mlx5/driver.h | 1 + 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 74067f5..d6e2a1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -606,11 +606,36 @@ static void dump_command(struct mlx5_core_dev *dev, pr_debug("\n"); } +static u16 msg_to_opcode(struct mlx5_cmd_msg *in) +{ + struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data); + + return be16_to_cpu(hdr->opcode); +} + +static void cb_timeout_handler(struct work_struct *work) +{ + struct delayed_work *dwork = container_of(work, struct delayed_work, + work); + struct mlx5_cmd_work_ent *ent = container_of(dwork, + struct mlx5_cmd_work_ent, + cb_timeout_work); + struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, + cmd); + + ent->ret = -ETIMEDOUT; + mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", + mlx5_command_str(msg_to_opcode(ent->in)), + msg_to_opcode(ent->in)); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx); +} + static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); struct mlx5_cmd *cmd = ent->cmd; struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd); + unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); struct mlx5_cmd_layout *lay; struct semaphore *sem; unsigned long flags; @@ -651,6 +676,9 @@ static void cmd_work_handler(struct work_struct *work) dump_command(dev, ent, 1); ent->ts1 = ktime_get_ns(); + if (ent->callback) + schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); + /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); @@ -695,13 +723,6 @@ static const char *deliv_status_to_str(u8 status) } } -static u16 msg_to_opcode(struct mlx5_cmd_msg *in) -{ - struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data); - - return be16_to_cpu(hdr->opcode); -} - static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) { unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); @@ -765,6 +786,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, if (!callback) init_completion(&ent->done); + INIT_DELAYED_WORK(&ent->cb_timeout_work, cb_timeout_handler); INIT_WORK(&ent->work, cmd_work_handler); if (page_queue) { cmd_work_handler(&ent->work); @@ -1242,6 +1264,8 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) struct semaphore *sem; ent = cmd->ent_arr[i]; + if (ent->callback) + cancel_delayed_work(&ent->cb_timeout_work); if (ent->page_queue) sem = &cmd->pages_sem; else diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 80776d0..fd72ecf 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -629,6 +629,7 @@ struct mlx5_cmd_work_ent { void *uout; int uout_size; mlx5_cmd_cbk_t callback; + struct delayed_work cb_timeout_work; void *context; int idx; struct completion done; -- cgit v1.1 From 29429f3300a378f7c29583b4c2c2ef29e2190a69 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 30 Jun 2016 17:34:44 +0300 Subject: net/mlx5e: Timeout if SQ doesn't flush during close Avoid an infinite loop by timing out waiting for the SQ to flush. Also clean up the TX descriptors if that happens. Fixes: f62b8bb8f2d3 ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality') Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 25 +++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 32 +++++++++++++++++++++++ 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index baa991a..c22d8c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -305,6 +305,7 @@ struct mlx5e_sq_dma { enum { MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, MLX5E_SQ_STATE_BF_ENABLE, + MLX5E_SQ_STATE_TX_TIMEOUT, }; struct mlx5e_ico_wqe_info { @@ -589,6 +590,7 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); +void mlx5e_free_tx_descs(struct mlx5e_sq *sq); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cb6defd..b94c84b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -39,6 +39,13 @@ #include "eswitch.h" #include "vxlan.h" +enum { + MLX5_EN_QP_FLUSH_TIMEOUT_MS = 5000, + MLX5_EN_QP_FLUSH_MSLEEP_QUANT = 20, + MLX5_EN_QP_FLUSH_MAX_ITER = MLX5_EN_QP_FLUSH_TIMEOUT_MS / + MLX5_EN_QP_FLUSH_MSLEEP_QUANT, +}; + struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; struct mlx5_wq_param wq; @@ -782,6 +789,9 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq) static void mlx5e_close_sq(struct mlx5e_sq *sq) { + int tout = 0; + int err; + if (sq->txq) { clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); /* prevent netif_tx_wake_queue */ @@ -792,15 +802,24 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq) if (mlx5e_sq_has_room_for(sq, 1)) mlx5e_send_nop(sq, true); - mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); + err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_ERR); + if (err) + set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state); } - while (sq->cc != sq->pc) /* wait till sq is empty */ - msleep(20); + /* wait till sq is empty, unless a TX timeout occurred on this SQ */ + while (sq->cc != sq->pc && + !test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)) { + msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT); + if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER) + set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state); + } /* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */ napi_synchronize(&sq->channel->napi); + mlx5e_free_tx_descs(sq); mlx5e_disable_sq(sq); mlx5e_destroy_sq(sq); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 5a750b9cd..65e3bce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -341,6 +341,35 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) return mlx5e_sq_xmit(sq, skb); } +void mlx5e_free_tx_descs(struct mlx5e_sq *sq) +{ + struct mlx5e_tx_wqe_info *wi; + struct sk_buff *skb; + u16 ci; + int i; + + while (sq->cc != sq->pc) { + ci = sq->cc & sq->wq.sz_m1; + skb = sq->skb[ci]; + wi = &sq->wqe_info[ci]; + + if (!skb) { /* nop */ + sq->cc++; + continue; + } + + for (i = 0; i < wi->num_dma; i++) { + struct mlx5e_sq_dma *dma = + mlx5e_dma_get(sq, sq->dma_fifo_cc++); + + mlx5e_tx_dma_unmap(sq->pdev, dma); + } + + dev_kfree_skb_any(skb); + sq->cc += wi->num_wqebbs; + } +} + bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) { struct mlx5e_sq *sq; @@ -352,6 +381,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) sq = container_of(cq, struct mlx5e_sq, cq); + if (unlikely(test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state))) + return false; + npkts = 0; nbytes = 0; -- cgit v1.1 From 3947ca1859999ac00698c0da0d233813a93d288a Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 30 Jun 2016 17:34:45 +0300 Subject: net/mlx5e: Implement ndo_tx_timeout callback Add callback to handle TX timeouts. Fixes: f62b8bb8f2d3 ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality') Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 46 +++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index c22d8c8..244aced 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -539,6 +539,7 @@ struct mlx5e_priv { struct workqueue_struct *wq; struct work_struct update_carrier_work; struct work_struct set_rx_mode_work; + struct work_struct tx_timeout_work; struct delayed_work update_stats_work; struct mlx5_core_dev *mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b94c84b..38c1286 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -98,6 +98,26 @@ static void mlx5e_update_carrier_work(struct work_struct *work) mutex_unlock(&priv->state_lock); } +static void mlx5e_tx_timeout_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + tx_timeout_work); + int err; + + rtnl_lock(); + mutex_lock(&priv->state_lock); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + mlx5e_close_locked(priv->netdev); + err = mlx5e_open_locked(priv->netdev); + if (err) + netdev_err(priv->netdev, "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", + err); +unlock: + mutex_unlock(&priv->state_lock); + rtnl_unlock(); +} + static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) { struct mlx5e_sw_stats *s = &priv->stats.sw; @@ -2609,6 +2629,29 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, return features; } +static void mlx5e_tx_timeout(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + bool sched_work = false; + int i; + + netdev_err(dev, "TX timeout detected\n"); + + for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) { + struct mlx5e_sq *sq = priv->txq_to_sq_map[i]; + + if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i))) + continue; + sched_work = true; + set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state); + netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n", + i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc); + } + + if (sched_work && test_bit(MLX5E_STATE_OPENED, &priv->state)) + schedule_work(&priv->tx_timeout_work); +} + static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -2626,6 +2669,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif + .ndo_tx_timeout = mlx5e_tx_timeout, }; static const struct net_device_ops mlx5e_netdev_ops_sriov = { @@ -2655,6 +2699,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, + .ndo_tx_timeout = mlx5e_tx_timeout, }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -2857,6 +2902,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); + INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work); INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); } -- cgit v1.1 From 6cd392a082deca8accec5c50b5b3fc1a9de5bfa2 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 30 Jun 2016 17:34:46 +0300 Subject: net/mlx5e: Handle RQ flush in error cases Add a timeout to avoid an infinite loop waiting for RQ's to flush. This occurs during AER/EEH and will also happen if the device stops posting completions due to internal error or reset, or if moving the RQ to the error state fails. Also cleanup posted receive resources when closing the RQ. Fixes: f62b8bb8f2d3 ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality') Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 7 ++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 16 +++++++-- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 41 +++++++++++++++++++++++ 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 244aced..b429591 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -191,6 +191,7 @@ struct mlx5e_tstamp { enum { MLX5E_RQ_STATE_POST_WQES_ENABLE, MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, + MLX5E_RQ_STATE_FLUSH_TIMEOUT, }; struct mlx5e_cq { @@ -220,6 +221,8 @@ typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq, typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq *rq, u16 ix); + struct mlx5e_dma_info { struct page *page; dma_addr_t addr; @@ -241,6 +244,7 @@ struct mlx5e_rq { struct mlx5e_cq cq; mlx5e_fp_handle_rx_cqe handle_rx_cqe; mlx5e_fp_alloc_wqe alloc_wqe; + mlx5e_fp_dealloc_wqe dealloc_wqe; unsigned long state; int ix; @@ -592,12 +596,15 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); void mlx5e_free_tx_descs(struct mlx5e_sq *sq); +void mlx5e_free_rx_descs(struct mlx5e_rq *rq); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); +void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq); void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 38c1286..103feab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -332,6 +332,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, } rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; + rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); @@ -347,6 +348,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, } rq->handle_rx_cqe = mlx5e_handle_rx_cqe; rq->alloc_wqe = mlx5e_alloc_rx_wqe; + rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : @@ -552,17 +554,25 @@ err_destroy_rq: static void mlx5e_close_rq(struct mlx5e_rq *rq) { + int tout = 0; + int err; + clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ - mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); - while (!mlx5_wq_ll_is_empty(&rq->wq)) - msleep(20); + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); + while (!mlx5_wq_ll_is_empty(&rq->wq) && !err && + tout++ < MLX5_EN_QP_FLUSH_MAX_ITER) + msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT); + + if (err || tout == MLX5_EN_QP_FLUSH_MAX_ITER) + set_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state); /* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */ napi_synchronize(&rq->channel->napi); mlx5e_disable_rq(rq); + mlx5e_free_rx_descs(rq); mlx5e_destroy_rq(rq); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 022acc2..9f2a16a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -212,6 +212,20 @@ err_free_skb: return -ENOMEM; } +void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) +{ + struct sk_buff *skb = rq->skb[ix]; + + if (skb) { + rq->skb[ix] = NULL; + dma_unmap_single(rq->pdev, + *((dma_addr_t *)skb->cb), + rq->wqe_sz, + DMA_FROM_DEVICE); + dev_kfree_skb(skb); + } +} + static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) { return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; @@ -574,6 +588,30 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) return 0; } +void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + + wi->free_wqe(rq, wi); +} + +void mlx5e_free_rx_descs(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->wq; + struct mlx5e_rx_wqe *wqe; + __be16 wqe_ix_be; + u16 wqe_ix; + + while (!mlx5_wq_ll_is_empty(wq)) { + wqe_ix_be = *wq->tail_next; + wqe_ix = be16_to_cpu(wqe_ix_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix); + rq->dealloc_wqe(rq, wqe_ix); + mlx5_wq_ll_pop(&rq->wq, wqe_ix_be, + &wqe->next.next_wqe_index); + } +} + #define RQ_CANNOT_POST(rq) \ (!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state) || \ test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) @@ -878,6 +916,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); int work_done = 0; + if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state))) + return 0; + if (cq->decmprs_left) work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); -- cgit v1.1 From e3a19b53cbb0e6738b7a547f262179065b72e3fa Mon Sep 17 00:00:00 2001 From: Matthew Finlay Date: Thu, 30 Jun 2016 17:34:47 +0300 Subject: net/mlx5e: Copy all L2 headers into inline segment ConnectX4-Lx uses an inline wqe mode that currently defaults to requiring the entire L2 header be included in the wqe. This patch fixes mlx5e_get_inline_hdr_size() to account for all L2 headers (VLAN, QinQ, etc) using skb_network_offset(skb). Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files") Signed-off-by: Matthew Finlay Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 65e3bce..42a5f06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -123,7 +123,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, * headers and occur before the data gather. * Therefore these headers must be copied into the WQE */ -#define MLX5E_MIN_INLINE ETH_HLEN +#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) if (bf) { u16 ihs = skb_headlen(skb); @@ -135,7 +135,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, return skb_headlen(skb); } - return MLX5E_MIN_INLINE; + return max(skb_network_offset(skb), MLX5E_MIN_INLINE); } static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data, -- cgit v1.1 From 7ccdd0841b305323e10e779c476d3fbae2165756 Mon Sep 17 00:00:00 2001 From: Rana Shahout Date: Thu, 30 Jun 2016 17:34:48 +0300 Subject: net/mlx5e: Fix select queue callback The default fallback function used by mlx5e select queue can return any TX queues in range [0..dev->num_real_tx_queues). The current implementation assumes that the fallback function returns a number in the range [0.. number of channels). Actually dev->num_real_tx_queues = (number of channels) * dev->num_tc; which is more than the expected range if num_tc is configured and could lead to crashes. To fix this we test if num_tc is not configured we can safely return the fallback suggestion, if not we will reciprocal_scale the fallback result and normalize it to the desired range. Fixes: 08fb1dacdd76 ('net/mlx5e: Support DCBNL IEEE ETS') Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Reported-by: Doug Ledford Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 ++++- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 103feab..216fe3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1707,8 +1707,11 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev) netdev_set_num_tc(netdev, ntc); + /* Map netdev TCs to offset 0 + * We have our own UP to TXQ mapping for QoS + */ for (tc = 0; tc < ntc; tc++) - netdev_set_tc_queue(netdev, tc, nch, tc * nch); + netdev_set_tc_queue(netdev, tc, nch, 0); } int mlx5e_open_locked(struct net_device *netdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 42a5f06..5740b46 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -110,8 +110,20 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, { struct mlx5e_priv *priv = netdev_priv(dev); int channel_ix = fallback(dev, skb); - int up = (netdev_get_num_tc(dev) && skb_vlan_tag_present(skb)) ? - skb->vlan_tci >> VLAN_PRIO_SHIFT : 0; + int up = 0; + + if (!netdev_get_num_tc(dev)) + return channel_ix; + + if (skb_vlan_tag_present(skb)) + up = skb->vlan_tci >> VLAN_PRIO_SHIFT; + + /* channel_ix can be larger than num_channels since + * dev->num_real_tx_queues = num_channels * num_tc + */ + if (channel_ix >= priv->params.num_channels) + channel_ix = reciprocal_scale(channel_ix, + priv->params.num_channels); return priv->channeltc_to_txq_map[channel_ix][up]; } -- cgit v1.1 From cdcf11212b22ff8e3de88ced1a6eda5bb950e120 Mon Sep 17 00:00:00 2001 From: Rana Shahout Date: Thu, 30 Jun 2016 17:34:49 +0300 Subject: net/mlx5e: Validate BW weight values of ETS Valid weight assigned to ETS TClass values are 1-100 Fixes: 08fb1dacdd76 ('net/mlx5e: Support DCBNL IEEE ETS') Signed-off-by: Rana Shahout Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 8 ++++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b429591..943b1bd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -145,7 +145,6 @@ struct mlx5e_umr_wqe { #ifdef CONFIG_MLX5_CORE_EN_DCB #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ -#define MLX5E_MIN_BW_ALLOC 1 /* Min percentage of BW allocation */ #endif struct mlx5e_params { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index b2db180..c585349 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -96,7 +96,7 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw, tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; break; case IEEE_8021QAZ_TSA_ETS: - tc_tx_bw[i] = ets->tc_tx_bw[i] ?: MLX5E_MIN_BW_ALLOC; + tc_tx_bw[i] = ets->tc_tx_bw[i]; break; } } @@ -140,8 +140,12 @@ static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets) /* Validate Bandwidth Sum */ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { + if (!ets->tc_tx_bw[i]) + return -EINVAL; + bw_sum += ets->tc_tx_bw[i]; + } } if (bw_sum != 0 && bw_sum != 100) -- cgit v1.1 From 87424ad52d76535234f217637fcaadcd2ef2334d Mon Sep 17 00:00:00 2001 From: Shaker Daibes Date: Thu, 30 Jun 2016 17:34:50 +0300 Subject: net/mlx5e: Log link state changes Add Link UP/Down prints to kernel log when link state changes Signed-off-by: Shaker Daibes Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 216fe3e..7a0dca2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -81,10 +81,13 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv) port_state = mlx5_query_vport_state(mdev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0); - if (port_state == VPORT_STATE_UP) + if (port_state == VPORT_STATE_UP) { + netdev_info(priv->netdev, "Link up\n"); netif_carrier_on(priv->netdev); - else + } else { + netdev_info(priv->netdev, "Link down\n"); netif_carrier_off(priv->netdev); + } } static void mlx5e_update_carrier_work(struct work_struct *work) -- cgit v1.1 From 8ba8682107ee2ca3347354e018865d8e1967c5f4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 1 Jul 2016 00:39:35 -0700 Subject: block: fix use-after-free in sys_ioprio_get() get_task_ioprio() accesses the task->io_context without holding the task lock and thus can race with exit_io_context(), leading to a use-after-free. The reproducer below hits this within a few seconds on my 4-core QEMU VM: #define _GNU_SOURCE #include #include #include #include int main(int argc, char **argv) { pid_t pid, child; long nproc, i; /* ioprio_set(IOPRIO_WHO_PROCESS, 0, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); */ syscall(SYS_ioprio_set, 1, 0, 0x6000); nproc = sysconf(_SC_NPROCESSORS_ONLN); for (i = 0; i < nproc; i++) { pid = fork(); assert(pid != -1); if (pid == 0) { for (;;) { pid = fork(); assert(pid != -1); if (pid == 0) { _exit(0); } else { child = wait(NULL); assert(child == pid); } } } pid = fork(); assert(pid != -1); if (pid == 0) { for (;;) { /* ioprio_get(IOPRIO_WHO_PGRP, 0); */ syscall(SYS_ioprio_get, 2, 0); } } } for (;;) { /* ioprio_get(IOPRIO_WHO_PGRP, 0); */ syscall(SYS_ioprio_get, 2, 0); } return 0; } This gets us KASAN dumps like this: [ 35.526914] ================================================================== [ 35.530009] BUG: KASAN: out-of-bounds in get_task_ioprio+0x7b/0x90 at addr ffff880066f34e6c [ 35.530009] Read of size 2 by task ioprio-gpf/363 [ 35.530009] ============================================================================= [ 35.530009] BUG blkdev_ioc (Not tainted): kasan: bad access detected [ 35.530009] ----------------------------------------------------------------------------- [ 35.530009] Disabling lock debugging due to kernel taint [ 35.530009] INFO: Allocated in create_task_io_context+0x2b/0x370 age=0 cpu=0 pid=360 [ 35.530009] ___slab_alloc+0x55d/0x5a0 [ 35.530009] __slab_alloc.isra.20+0x2b/0x40 [ 35.530009] kmem_cache_alloc_node+0x84/0x200 [ 35.530009] create_task_io_context+0x2b/0x370 [ 35.530009] get_task_io_context+0x92/0xb0 [ 35.530009] copy_process.part.8+0x5029/0x5660 [ 35.530009] _do_fork+0x155/0x7e0 [ 35.530009] SyS_clone+0x19/0x20 [ 35.530009] do_syscall_64+0x195/0x3a0 [ 35.530009] return_from_SYSCALL_64+0x0/0x6a [ 35.530009] INFO: Freed in put_io_context+0xe7/0x120 age=0 cpu=0 pid=1060 [ 35.530009] __slab_free+0x27b/0x3d0 [ 35.530009] kmem_cache_free+0x1fb/0x220 [ 35.530009] put_io_context+0xe7/0x120 [ 35.530009] put_io_context_active+0x238/0x380 [ 35.530009] exit_io_context+0x66/0x80 [ 35.530009] do_exit+0x158e/0x2b90 [ 35.530009] do_group_exit+0xe5/0x2b0 [ 35.530009] SyS_exit_group+0x1d/0x20 [ 35.530009] entry_SYSCALL_64_fastpath+0x1a/0xa4 [ 35.530009] INFO: Slab 0xffffea00019bcd00 objects=20 used=4 fp=0xffff880066f34ff0 flags=0x1fffe0000004080 [ 35.530009] INFO: Object 0xffff880066f34e58 @offset=3672 fp=0x0000000000000001 [ 35.530009] ================================================================== Fix it by grabbing the task lock while we poke at the io_context. Cc: stable@vger.kernel.org Reported-by: Dmitry Vyukov Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/ioprio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/ioprio.c b/block/ioprio.c index cc7800e..01b8116 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -150,8 +150,10 @@ static int get_task_ioprio(struct task_struct *p) if (ret) goto out; ret = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, IOPRIO_NORM); + task_lock(p); if (p->io_context) ret = p->io_context->ioprio; + task_unlock(p); out: return ret; } -- cgit v1.1 From eb70db8756717b90c01ccc765fdefc4dd969fc74 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 1 Jul 2016 16:07:50 -0400 Subject: packet: Use symmetric hash for PACKET_FANOUT_HASH. People who use PACKET_FANOUT_HASH want a symmetric hash, meaning that they want packets going in both directions on a flow to hash to the same bucket. The core kernel SKB hash became non-symmetric when the ipv6 flow label and other entities were incorporated into the standard flow hash order to increase entropy. But there are no users of PACKET_FANOUT_HASH who want an assymetric hash, they all want a symmetric one. Therefore, use the flow dissector to compute a flat symmetric hash over only the protocol, addresses and ports. This hash does not get installed into and override the normal skb hash, so this change has no effect whatsoever on the rest of the stack. Reported-by: Eric Leblond Tested-by: Eric Leblond Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + net/core/flow_dissector.c | 43 +++++++++++++++++++++++++++++++++++++++++++ net/packet/af_packet.c | 2 +- 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ee38a41..24859d4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1062,6 +1062,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) } void __skb_get_hash(struct sk_buff *skb); +u32 __skb_get_hash_symmetric(struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index a669dea..61ad43f 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -651,6 +651,23 @@ void make_flow_keys_digest(struct flow_keys_digest *digest, } EXPORT_SYMBOL(make_flow_keys_digest); +static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; + +u32 __skb_get_hash_symmetric(struct sk_buff *skb) +{ + struct flow_keys keys; + + __flow_hash_secret_init(); + + memset(&keys, 0, sizeof(keys)); + __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, + NULL, 0, 0, 0, + FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); + + return __flow_hash_from_keys(&keys, hashrnd); +} +EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); + /** * __skb_get_hash: calculate a flow hash * @skb: sk_buff to calculate flow hash from @@ -868,6 +885,29 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { }, }; +static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, + .offset = offsetof(struct flow_keys, control), + }, + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct flow_keys, basic), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v4addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v6addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_PORTS, + .offset = offsetof(struct flow_keys, ports), + }, +}; + static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { { .key_id = FLOW_DISSECTOR_KEY_CONTROL, @@ -889,6 +929,9 @@ static int __init init_default_flow_dissectors(void) skb_flow_dissector_init(&flow_keys_dissector, flow_keys_dissector_keys, ARRAY_SIZE(flow_keys_dissector_keys)); + skb_flow_dissector_init(&flow_keys_dissector_symmetric, + flow_keys_dissector_symmetric_keys, + ARRAY_SIZE(flow_keys_dissector_symmetric_keys)); skb_flow_dissector_init(&flow_keys_buf_dissector, flow_keys_buf_dissector_keys, ARRAY_SIZE(flow_keys_buf_dissector_keys)); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9bff6ef..9f0983f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1341,7 +1341,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return reciprocal_scale(skb_get_hash(skb), num); + return reciprocal_scale(__skb_get_hash_symmetric(skb), num); } static unsigned int fanout_demux_lb(struct packet_fanout *f, -- cgit v1.1 From 82a31b9231f02d9c1b7b290a46999d517b0d312a Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 30 Jun 2016 10:15:22 -0700 Subject: net_sched: fix mirrored packets checksum Similar to commit 9b368814b336 ("net: fix bridge multicast packet checksum validation") we need to fixup the checksum for CHECKSUM_COMPLETE when pushing skb on RX path. Otherwise we get similar splats. Cc: Jamal Hadi Salim Cc: Tom Herbert Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/skbuff.h | 19 +++++++++++++++++++ net/core/skbuff.c | 18 ------------------ net/sched/act_mirred.c | 2 +- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 24859d4..f39b371 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2871,6 +2871,25 @@ static inline void skb_postpush_rcsum(struct sk_buff *skb, } /** + * skb_push_rcsum - push skb and update receive checksum + * @skb: buffer to update + * @len: length of data pulled + * + * This function performs an skb_push on the packet and updates + * the CHECKSUM_COMPLETE checksum. It should be used on + * receive path processing instead of skb_push unless you know + * that the checksum difference is zero (e.g., a valid IP header) + * or you are setting ip_summed to CHECKSUM_NONE. + */ +static inline unsigned char *skb_push_rcsum(struct sk_buff *skb, + unsigned int len) +{ + skb_push(skb, len); + skb_postpush_rcsum(skb, skb->data, len); + return skb->data; +} + +/** * pskb_trim_rcsum - trim received skb and update checksum * @skb: buffer to trim * @len: new length diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f2b77e5..eb12d21 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3016,24 +3016,6 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page, EXPORT_SYMBOL_GPL(skb_append_pagefrags); /** - * skb_push_rcsum - push skb and update receive checksum - * @skb: buffer to update - * @len: length of data pulled - * - * This function performs an skb_push on the packet and updates - * the CHECKSUM_COMPLETE checksum. It should be used on - * receive path processing instead of skb_push unless you know - * that the checksum difference is zero (e.g., a valid IP header) - * or you are setting ip_summed to CHECKSUM_NONE. - */ -static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len) -{ - skb_push(skb, len); - skb_postpush_rcsum(skb, skb->data, len); - return skb->data; -} - -/** * skb_pull_rcsum - pull skb and update receive checksum * @skb: buffer to update * @len: length of data pulled diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 128942b..1f5bd6c 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -181,7 +181,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, if (!(at & AT_EGRESS)) { if (m->tcfm_ok_push) - skb_push(skb2, skb->mac_len); + skb_push_rcsum(skb2, skb->mac_len); } /* mirror is always swallowed */ -- cgit v1.1 From 79c62220d74a4a3f961a2cb7320da09eebf5daf7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 1 Jul 2016 00:00:54 +0200 Subject: macsec: set actual real device for xmit when !protect_frames Avoid recursions of dev_queue_xmit() to the wrong net device when frames are unprotected, since at that time skb->dev still points to our own macsec dev and unlike macsec_encrypt_finish() dev pointer doesn't get updated to real underlying device. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Daniel Borkmann Acked-by: Sabrina Dubroca Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- drivers/net/macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 0e7eff7..8bcd78f 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2640,6 +2640,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.OutPktsUntagged++; u64_stats_update_end(&secy_stats->syncp); + skb->dev = macsec->real_dev; len = skb->len; ret = dev_queue_xmit(skb); count_tx(dev, ret, len); -- cgit v1.1 From 016eb55157166132b094e53434748cae35e18455 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 30 Jun 2016 13:27:20 -0700 Subject: net: bcmsysport: Device stats are unsigned long On 64bits kernels, device stats are 64bits wide, not 32bits. Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 543bf38..bfa26a2 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -392,7 +392,7 @@ static void bcm_sysport_get_stats(struct net_device *dev, else p = (char *)priv; p += s->stat_offset; - data[i] = *(u32 *)p; + data[i] = *(unsigned long *)p; } } -- cgit v1.1 From 55e77a3e8297581c919b45adcc4d0815b69afa84 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Fri, 1 Jul 2016 11:11:21 +0200 Subject: tipc: fix nl compat regression for link statistics Fix incorrect use of nla_strlcpy() where the first NLA_HDRLEN bytes of the link name where left out. Making the output of tipc-config -ls look something like: Link statistics: dcast-link 1:data0-1.1.2:data0 1:data0-1.1.3:data0 Also, for the record, the patch that introduce this regression claims "Sending the whole object out can cause a leak". Which isn't very likely as this is a compat layer, where the data we are parsing is generated by us and we know the string to be NULL terminated. But you can of course never be to secure. Fixes: 5d2be1422e02 (tipc: fix an infoleak in tipc_nl_compat_link_dump) Signed-off-by: Richard Alpe Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 3ad9fab..1fd4647 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -604,7 +604,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); - nla_strlcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME]), + nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME], TIPC_MAX_LINK_NAME); return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, -- cgit v1.1 From 4a6e68bf96c1fa293717d2f00a68a68c92fa4150 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Wed, 29 Jun 2016 04:27:35 -0400 Subject: ACPI,PCI,IRQ: factor in PCI possible The change introduced in commit 103544d86976 (ACPI,PCI,IRQ: reduce resource requirements) omitted the initially applied PCI_POSSIBLE penalty when the IRQ is active. Incorrect calculation of the penalty leads the ACPI code to assigning a wrong interrupt number to a PCI INTx interrupt. This would not be as bad as it sounds in theory. It would just cause the interrupts to be shared and result in performance penalty. However, some drivers (like the parallel port driver) don't like interrupt sharing and in the above case they will causes all of the PCI drivers wanting to share the interrupt to be unable to request it. The issue has not been caught in testing because the behavior is platform-specific and depends on the peripherals ending up sharing the IRQ and their drivers. Before the above commit the code would add the PCI_POSSIBLE value divided by the number of possible IRQ users to the IRQ penalty during initialization. Later in that code path, if the IRQ is chosen as the active IRQ or if it is used by ISA; additional penalties are added. Fixes: 103544d86976 (ACPI,PCI,IRQ: reduce resource requirements) Signed-off-by: Sinan Kaya Tested-by: Wim Osterholt [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pci_link.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index 4ed4061..db7be62 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -470,6 +470,7 @@ static int acpi_irq_pci_sharing_penalty(int irq) { struct acpi_pci_link *link; int penalty = 0; + int i; list_for_each_entry(link, &acpi_link_list, list) { /* @@ -478,18 +479,14 @@ static int acpi_irq_pci_sharing_penalty(int irq) */ if (link->irq.active && link->irq.active == irq) penalty += PIRQ_PENALTY_PCI_USING; - else { - int i; - - /* - * If a link is inactive, penalize the IRQs it - * might use, but not as severely. - */ - for (i = 0; i < link->irq.possible_count; i++) - if (link->irq.possible[i] == irq) - penalty += PIRQ_PENALTY_PCI_POSSIBLE / - link->irq.possible_count; - } + + /* + * penalize the IRQs PCI might use, but not as severely. + */ + for (i = 0; i < link->irq.possible_count; i++) + if (link->irq.possible[i] == irq) + penalty += PIRQ_PENALTY_PCI_POSSIBLE / + link->irq.possible_count; } return penalty; -- cgit v1.1 From 487cf917ed0d12afaf403d9d77684bf44b8c13be Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Wed, 29 Jun 2016 04:27:36 -0400 Subject: Revert "ACPI, PCI, IRQ: remove redundant code in acpi_irq_penalty_init()" Trying to make the ISA and PCI init functionality common turned out to be a bad idea, because the ISA path depends on external functionality. Restore the previous behavior and limit the refactoring to PCI interrupts only. Fixes: 1fcb6a813c4f "ACPI,PCI,IRQ: remove redundant code in acpi_irq_penalty_init()" Signed-off-by: Sinan Kaya Tested-by: Wim Osterholt Signed-off-by: Rafael J. Wysocki --- arch/x86/pci/acpi.c | 1 + drivers/acpi/pci_link.c | 36 ++++++++++++++++++++++++++++++++++++ include/acpi/acpi_drivers.h | 1 + 3 files changed, 38 insertions(+) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index b2a4e2a..3cd6983 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -396,6 +396,7 @@ int __init pci_acpi_init(void) return -ENODEV; printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); + acpi_irq_penalty_init(); pcibios_enable_irq = acpi_pci_irq_enable; pcibios_disable_irq = acpi_pci_irq_disable; x86_init.pci.init_irq = x86_init_noop; diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index db7be62..606083b 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -517,6 +517,42 @@ static int acpi_irq_get_penalty(int irq) return penalty; } +int __init acpi_irq_penalty_init(void) +{ + struct acpi_pci_link *link; + int i; + + /* + * Update penalties to facilitate IRQ balancing. + */ + list_for_each_entry(link, &acpi_link_list, list) { + + /* + * reflect the possible and active irqs in the penalty table -- + * useful for breaking ties. + */ + if (link->irq.possible_count) { + int penalty = + PIRQ_PENALTY_PCI_POSSIBLE / + link->irq.possible_count; + + for (i = 0; i < link->irq.possible_count; i++) { + if (link->irq.possible[i] < ACPI_MAX_ISA_IRQS) + acpi_isa_irq_penalty[link->irq. + possible[i]] += + penalty; + } + + } else if (link->irq.active && + (link->irq.active < ACPI_MAX_ISA_IRQS)) { + acpi_isa_irq_penalty[link->irq.active] += + PIRQ_PENALTY_PCI_POSSIBLE; + } + } + + return 0; +} + static int acpi_irq_balance = -1; /* 0: static, 1: balance */ static int acpi_pci_link_allocate(struct acpi_pci_link *link) diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index 797ae2e..29c6912 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -78,6 +78,7 @@ /* ACPI PCI Interrupt Link (pci_link.c) */ +int acpi_irq_penalty_init(void); int acpi_pci_link_allocate_irq(acpi_handle handle, int index, int *triggering, int *polarity, char **name); int acpi_pci_link_free_irq(acpi_handle handle); -- cgit v1.1 From f7eca374f000bd8bd6aacc2619475fdba0b7ecca Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Wed, 29 Jun 2016 04:27:37 -0400 Subject: ACPI,PCI,IRQ: separate ISA penalty calculation Since commit 103544d86976 (ACPI,PCI,IRQ: reduce resource requirements) the penalty values are calculated on the fly rather than at boot time. This works fine for PCI interrupts but not so well for ISA interrupts. The information on whether or not an ISA interrupt is in use is not available to the pci_link.c code directly. That information is obtained from the outside via acpi_penalize_isa_irq(). [If its "active" argument is true, then the IRQ is in use by ISA.] Since the current code relies on PCI Link objects for determination of penalties, we are factoring in the PCI penalty twice after acpi_penalize_isa_irq() function is called. To avoid that, limit the newly added functionality to just PCI interrupts so that old behavior is still maintained. Fixes: 103544d86976 (ACPI,PCI,IRQ: reduce resource requirements) Signed-off-by: Sinan Kaya Tested-by: Wim Osterholt Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pci_link.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index 606083b..c983bf7 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -496,9 +496,6 @@ static int acpi_irq_get_penalty(int irq) { int penalty = 0; - if (irq < ACPI_MAX_ISA_IRQS) - penalty += acpi_isa_irq_penalty[irq]; - /* * Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict * with PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be @@ -513,6 +510,9 @@ static int acpi_irq_get_penalty(int irq) penalty += PIRQ_PENALTY_PCI_USING; } + if (irq < ACPI_MAX_ISA_IRQS) + return penalty + acpi_isa_irq_penalty[irq]; + penalty += acpi_irq_pci_sharing_penalty(irq); return penalty; } -- cgit v1.1 From a8b7d7709dc6db7d6d8a9a04aa9d49b029a27203 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Thu, 30 Jun 2016 19:46:16 +0000 Subject: net: stmmac: Fix null-function call in ISR on stmmac1000 (resent due to overhelpful mail client corrupting patch) At least on Meson GXBB, the CORE_IRQ_MTL_RX_OVERFLOW interrupt is thrown with the stmmac1000 driver, which does not support set_rx_tail_ptr. With this patch and the clock fixes, 1G ethernet works on ODROID-C2. Signed-off-by: Matt Corallo Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index a473c18..e407126 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2804,7 +2804,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) priv->tx_path_in_lpi_mode = true; if (status & CORE_IRQ_TX_PATH_EXIT_LPI_MODE) priv->tx_path_in_lpi_mode = false; - if (status & CORE_IRQ_MTL_RX_OVERFLOW) + if (status & CORE_IRQ_MTL_RX_OVERFLOW && priv->hw->dma->set_rx_tail_ptr) priv->hw->dma->set_rx_tail_ptr(priv->ioaddr, priv->rx_tail_addr, STMMAC_CHAN0); -- cgit v1.1 From 373819ec391de0d11f63b10b2fb69ef2854236ca Mon Sep 17 00:00:00 2001 From: Sergio Valverde Date: Fri, 1 Jul 2016 11:44:30 -0600 Subject: enc28j60: Fix race condition in enc28j60 driver The interrupt worker code for the enc28j60 relies only on the TXIF flag to determinate if the packet transmission was completed. However the datasheet specifies in section 12.1.3 that TXERIF will clear the TXRTS after a transmit abort. Also in section 12.1.4 that TXIF will be set when TXRTS transitions from '1' to '0'. Therefore the TXIF flag is enabled during transmission errors. This causes a race condition, since the worker code will invoke enc28j60_tx_clear() -> netif_wake_queue(), potentially invoking the ndo_start_xmit function to send a new packet. The enc28j60_send_packet function uses a workqueue that invokes enc28j60_hw_tx(). In between this function is called, the worker from the interrupt handler will enter the path for error handler because of the TXERIF flag, causing to invoke enc28j60_tx_clear() again and releasing the packet scheduled for transmission, causing a kernel crash with due a NULL pointer. These crashes due a NULL pointer were observed under stress conditions of the device. A BUG_ON() sequence was used to validate the issue was fixed, and has been running without problems for 2 years now. Signed-off-by: Diego Dompe Acked-by: Sergio Valverde Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/enc28j60.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c index 7066954..0a26b11 100644 --- a/drivers/net/ethernet/microchip/enc28j60.c +++ b/drivers/net/ethernet/microchip/enc28j60.c @@ -1151,7 +1151,8 @@ static void enc28j60_irq_work_handler(struct work_struct *work) enc28j60_phy_read(priv, PHIR); } /* TX complete handler */ - if ((intflags & EIR_TXIF) != 0) { + if (((intflags & EIR_TXIF) != 0) && + ((intflags & EIR_TXERIF) == 0)) { bool err = false; loop++; if (netif_msg_intr(priv)) @@ -1203,7 +1204,7 @@ static void enc28j60_irq_work_handler(struct work_struct *work) enc28j60_tx_clear(ndev, true); } else enc28j60_tx_clear(ndev, true); - locked_reg_bfclr(priv, EIR, EIR_TXERIF); + locked_reg_bfclr(priv, EIR, EIR_TXERIF | EIR_TXIF); } /* RX Error handler */ if ((intflags & EIR_RXERIF) != 0) { @@ -1238,6 +1239,8 @@ static void enc28j60_irq_work_handler(struct work_struct *work) */ static void enc28j60_hw_tx(struct enc28j60_net *priv) { + BUG_ON(!priv->tx_skb); + if (netif_msg_tx_queued(priv)) printk(KERN_DEBUG DRV_NAME ": Tx Packet Len:%d\n", priv->tx_skb->len); -- cgit v1.1 From b291c418172f2cfbe009d81cd9a92f7a2de7c579 Mon Sep 17 00:00:00 2001 From: Stefan Hauser Date: Fri, 1 Jul 2016 22:35:03 +0200 Subject: net: phy: dp83867: Fix initialization of PHYCR register When initializing the PHY control register, the FIFO depth bits are written without reading the previous register value, i.e. all other bits are overwritten with zero. This disables automatic MDI-X configuration, which is enabled by default. Fix initialization by doing a read/modify/write operation. Signed-off-by: Stefan Hauser Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/dp83867.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 2afa61b..91177a4 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -57,6 +57,7 @@ /* PHY CTRL bits */ #define DP83867_PHYCR_FIFO_DEPTH_SHIFT 14 +#define DP83867_PHYCR_FIFO_DEPTH_MASK (3 << 14) /* RGMIIDCTL bits */ #define DP83867_RGMII_TX_CLK_DELAY_SHIFT 4 @@ -133,8 +134,8 @@ static int dp83867_of_init(struct phy_device *phydev) static int dp83867_config_init(struct phy_device *phydev) { struct dp83867_private *dp83867; - int ret; - u16 val, delay; + int ret, val; + u16 delay; if (!phydev->priv) { dp83867 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83867), @@ -151,8 +152,12 @@ static int dp83867_config_init(struct phy_device *phydev) } if (phy_interface_is_rgmii(phydev)) { - ret = phy_write(phydev, MII_DP83867_PHYCTRL, - (dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT)); + val = phy_read(phydev, MII_DP83867_PHYCTRL); + if (val < 0) + return val; + val &= ~DP83867_PHYCR_FIFO_DEPTH_MASK; + val |= (dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT); + ret = phy_write(phydev, MII_DP83867_PHYCTRL, val); if (ret) return ret; } -- cgit v1.1 From 9010ae4a8dee29e5886e86682799dde0eee7f447 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 1 Jul 2016 15:22:22 -0700 Subject: perf/x86/intel: Update event constraints when HT is off This patch updates the event constraints for non-PEBS mode for Intel Broadwell and Skylake processors. When HT is off, each CPU gets 8 generic counters. However, not all events can be programmed on any of the 8 counters. This patch adds the constraints for the MEM_* events which can only be measured on the bottom 4 counters. The constraints are also valid when HT is off because, then, there are only 4 generic counters and they are the bottom counters. Signed-off-by: Stephane Eranian Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Cc: kan.liang@intel.com Link: http://lkml.kernel.org/r/1467411742-13245-1-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7c66695..9b4f9d3 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -115,6 +115,10 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ + /* + * When HT is off these events can only run on the bottom 4 counters + * When HT is on, they are impacted by the HT bug and require EXCL access + */ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -139,6 +143,10 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly = INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ + /* + * When HT is off these events can only run on the bottom 4 counters + * When HT is on, they are impacted by the HT bug and require EXCL access + */ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -182,6 +190,16 @@ struct event_constraint intel_skl_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ + + /* + * when HT is off, these can only run on the bottom 4 counters + */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc6, 0xf), /* FRONTEND_RETIRED.* */ + EVENT_CONSTRAINT_END }; @@ -250,6 +268,10 @@ static struct event_constraint intel_hsw_event_constraints[] = { /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), + /* + * When HT is off these events can only run on the bottom 4 counters + * When HT is on, they are impacted by the HT bug and require EXCL access + */ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -264,6 +286,13 @@ struct event_constraint intel_bdw_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */ + /* + * when HT is off, these can only run on the bottom 4 counters + */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */ EVENT_CONSTRAINT_END }; -- cgit v1.1 From fc18822510721fe694d273c5211c71ea52796d76 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 1 Jul 2016 23:02:05 -0500 Subject: perf/x86: Fix 32-bit perf user callgraph collection A basic perf callgraph record operation causes an immediate panic on a 32-bit kernel compiled with CONFIG_CC_STACKPROTECTOR=y: $ perf record -g ls Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: c0404fbd CPU: 0 PID: 998 Comm: ls Not tainted 4.7.0-rc5+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.1-1.fc24 04/01/2014 c0dd5967 ff7afe1c 00000086 f41dbc2c c07445a0 464c457f f41dbca8 f41dbc44 c05646f4 f41dbca8 464c457f f41dbca8 464c457f f41dbc54 c04625be c0ce56fc c0404fbd f41dbc88 c0404fbd b74668f0 f41dc000 00000000 c0000000 00000000 Call Trace: [] dump_stack+0x58/0x78 [] panic+0x8e/0x1c6 [] __stack_chk_fail+0x1e/0x30 [] ? perf_callchain_user+0x22d/0x230 [] perf_callchain_user+0x22d/0x230 [] get_perf_callchain+0x1ff/0x270 [] perf_callchain+0x78/0x90 [] perf_prepare_sample+0x24b/0x370 [] perf_event_output_forward+0x24/0x70 [] __perf_event_overflow+0xa0/0x210 [] ? cpu_clock_event_read+0x43/0x50 [] perf_swevent_hrtimer+0x101/0x180 [] ? kmap_atomic_prot+0x35/0x140 [] ? get_page_from_freelist+0x279/0x950 [] ? vma_interval_tree_remove+0x158/0x230 [] ? wp_page_copy.isra.82+0x2f4/0x630 [] ? page_add_file_rmap+0x1d/0x50 [] ? unlock_page+0x61/0x80 [] ? filemap_map_pages+0x305/0x320 [] ? handle_mm_fault+0xb7f/0x1560 [] ? timerqueue_del+0x1b/0x70 [] ? __remove_hrtimer+0x2e/0x60 [] __hrtimer_run_queues+0xcb/0x2a0 [] ? __perf_event_overflow+0x210/0x210 [] hrtimer_interrupt+0x8a/0x180 [] local_apic_timer_interrupt+0x32/0x60 [] smp_apic_timer_interrupt+0x33/0x50 [] apic_timer_interrupt+0x34/0x3c Kernel Offset: disabled ---[ end Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: c0404fbd The panic is caused by the fact that perf_callchain_user() mistakenly assumes it's 64-bit only and ends up corrupting the stack. Signed-off-by: Josh Poimboeuf Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@vger.kernel.org # v4.5+ Fixes: 75925e1ad7f5 ("perf/x86: Optimize stack walk user accesses") Link: http://lkml.kernel.org/r/1a547f5077ec30f75f9b57074837c3c80df86e5e.1467432113.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 33787ee..26ced53 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2319,7 +2319,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { struct stack_frame frame; - const void __user *fp; + const unsigned long __user *fp; if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { /* TODO: We don't support guest os callchain now */ @@ -2332,7 +2332,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM)) return; - fp = (void __user *)regs->bp; + fp = (unsigned long __user *)regs->bp; perf_callchain_store(entry, regs->ip); @@ -2345,16 +2345,17 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs pagefault_disable(); while (entry->nr < entry->max_stack) { unsigned long bytes; + frame.next_frame = NULL; frame.return_address = 0; - if (!access_ok(VERIFY_READ, fp, 16)) + if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2)) break; - bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8); + bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp)); if (bytes != 0) break; - bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8); + bytes = __copy_from_user_nmi(&frame.return_address, fp + 1, sizeof(*fp)); if (bytes != 0) break; -- cgit v1.1 From 0caa7616a6aca449dd68b58cb29bd491d296c2d5 Mon Sep 17 00:00:00 2001 From: Aaron Campbell Date: Sat, 2 Jul 2016 21:23:24 -0300 Subject: iommu/vt-d: Fix infinite loop in free_all_cpu_cached_iovas Per VT-d spec Section 10.4.2 ("Capability Register"), the maximum number of possible domains is 64K; indeed this is the maximum value that the cap_ndoms() macro will expand to. Since the value 65536 will not fix in a u16, the 'did' variable must be promoted to an int, otherwise the test for < 65536 will always be true and the loop will never end. The symptom, in my case, was a hung machine during suspend. Fixes: 3bd4f9112f87 ("iommu/vt-d: Fix overflow of iommu->domains array") Signed-off-by: Aaron Campbell Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index cfe410e..323dac9 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4602,13 +4602,13 @@ static void free_all_cpu_cached_iovas(unsigned int cpu) for (i = 0; i < g_num_of_iommus; i++) { struct intel_iommu *iommu = g_iommus[i]; struct dmar_domain *domain; - u16 did; + int did; if (!iommu) continue; for (did = 0; did < cap_ndoms(iommu->cap); did++) { - domain = get_iommu_domain(iommu, did); + domain = get_iommu_domain(iommu, (u16)did); if (!domain) continue; -- cgit v1.1 From 3fa6993fef634e05d200d141a85df0b044572364 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 4 Jul 2016 14:02:15 +0200 Subject: ALSA: timer: Fix negative queue usage by racy accesses The user timer tu->qused counter may go to a negative value when multiple concurrent reads are performed since both the check and the decrement of tu->qused are done in two individual locked contexts. This results in bogus read outs, and the endless loop in the user-space side. The fix is to move the decrement of the tu->qused counter into the same spinlock context as the zero-check of the counter. Cc: Signed-off-by: Takashi Iwai --- sound/core/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index e722022..9a6157e 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1955,6 +1955,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, qhead = tu->qhead++; tu->qhead %= tu->queue_size; + tu->qused--; spin_unlock_irq(&tu->qlock); if (tu->tread) { @@ -1968,7 +1969,6 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, } spin_lock_irq(&tu->qlock); - tu->qused--; if (err < 0) goto _error; result += unit; -- cgit v1.1 From dbd1b8ea43b17e2ed4acda72f83ea17f69408682 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 1 Jul 2016 09:24:14 -0500 Subject: cpuidle: Fix last_residency division Snooze is a poll idle state in powernv and pseries platforms. Snooze has a timeout so that if a CPU stays in snooze for more than target residency of the next available idle state, then it would exit thereby giving chance to the cpuidle governor to re-evaluate and promote the CPU to a deeper idle state. Therefore whenever snooze exits due to this timeout, its last_residency will be target_residency of the next deeper state. Commit e93e59ce5b85 "cpuidle: Replace ktime_get() with local_clock()" changed the math around last_residency calculation. Specifically, while converting last_residency value from nano- to microseconds, it carries out right shift by 10. Because of that, in snooze timeout exit scenarios last_residency calculated is roughly 2.3% less than target_residency of the next available state. This pattern is picked up by get_typical_interval() in the menu governor and therefore expected_interval in menu_select() is frequently less than the target_residency of any state other than snooze. Due to this we are entering snooze at a higher rate, thereby affecting the single thread performance. Fix this by using more precise division via ktime_us_delta(). Fixes: e93e59ce5b85 "cpuidle: Replace ktime_get() with local_clock()" Reported-by: Anton Blanchard Bisected-by: Shilpasri G Bhat Signed-off-by: Shreyas B. Prabhu Acked-by: Daniel Lezcano Acked-by: Balbir Singh Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index a4d0059..c73207a 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -173,7 +173,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, struct cpuidle_state *target_state = &drv->states[index]; bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP); - u64 time_start, time_end; + ktime_t time_start, time_end; s64 diff; /* @@ -195,13 +195,13 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, sched_idle_set_state(target_state); trace_cpu_idle_rcuidle(index, dev->cpu); - time_start = local_clock(); + time_start = ns_to_ktime(local_clock()); stop_critical_timings(); entered_state = target_state->enter(dev, drv, index); start_critical_timings(); - time_end = local_clock(); + time_end = ns_to_ktime(local_clock()); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); /* The cpu is no longer idle or about to enter idle. */ @@ -217,11 +217,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, if (!cpuidle_state_is_coupled(drv, index)) local_irq_enable(); - /* - * local_clock() returns the time in nanosecond, let's shift - * by 10 (divide by 1024) to have microsecond based time. - */ - diff = (time_end - time_start) >> 10; + diff = ktime_us_delta(time_end, time_start); if (diff > INT_MAX) diff = INT_MAX; -- cgit v1.1 From 87041a58d3b19455d39baed2a5e5bb43b58fb915 Mon Sep 17 00:00:00 2001 From: Colin Pitrat Date: Sat, 18 Jun 2016 19:05:04 +0100 Subject: gpio: sch: Fix Oops on module load on Asus Eee PC 1201 This fixes the issue descirbe in bug 117531 (https://bugzilla.kernel.org/show_bug.cgi?id=117531). It's a regression introduced in linux 4.5 that causes a Oops at load of gpio_sch and prevents powering off the computer. The issue is that sch_gpio_reg_set is called in sch_gpio_probe before gpio_chip data is initialized with the pointer to the sch_gpio struct. As sch_gpio_reg_set calls gpiochip_get_data, it returns NULL which causes the Oops. The patch follows Mika's advice (https://lkml.org/lkml/2016/5/9/61) and consists in modifying sch_gpio_reg_get and sch_gpio_reg_set to take a sch_gpio struct directly instead of a gpio_chip, which avoids the call to gpiochip_get_data. Thanks Mika for your patience with me :-) Cc: stable@vger.kernel.org Signed-off-by: Colin Pitrat Acked-by: Alexandre Courbot Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/gpio/gpio-sch.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/gpio/gpio-sch.c b/drivers/gpio/gpio-sch.c index e85e753..eb43ae4 100644 --- a/drivers/gpio/gpio-sch.c +++ b/drivers/gpio/gpio-sch.c @@ -61,9 +61,8 @@ static unsigned sch_gpio_bit(struct sch_gpio *sch, unsigned gpio) return gpio % 8; } -static int sch_gpio_reg_get(struct gpio_chip *gc, unsigned gpio, unsigned reg) +static int sch_gpio_reg_get(struct sch_gpio *sch, unsigned gpio, unsigned reg) { - struct sch_gpio *sch = gpiochip_get_data(gc); unsigned short offset, bit; u8 reg_val; @@ -75,10 +74,9 @@ static int sch_gpio_reg_get(struct gpio_chip *gc, unsigned gpio, unsigned reg) return reg_val; } -static void sch_gpio_reg_set(struct gpio_chip *gc, unsigned gpio, unsigned reg, +static void sch_gpio_reg_set(struct sch_gpio *sch, unsigned gpio, unsigned reg, int val) { - struct sch_gpio *sch = gpiochip_get_data(gc); unsigned short offset, bit; u8 reg_val; @@ -98,14 +96,15 @@ static int sch_gpio_direction_in(struct gpio_chip *gc, unsigned gpio_num) struct sch_gpio *sch = gpiochip_get_data(gc); spin_lock(&sch->lock); - sch_gpio_reg_set(gc, gpio_num, GIO, 1); + sch_gpio_reg_set(sch, gpio_num, GIO, 1); spin_unlock(&sch->lock); return 0; } static int sch_gpio_get(struct gpio_chip *gc, unsigned gpio_num) { - return sch_gpio_reg_get(gc, gpio_num, GLV); + struct sch_gpio *sch = gpiochip_get_data(gc); + return sch_gpio_reg_get(sch, gpio_num, GLV); } static void sch_gpio_set(struct gpio_chip *gc, unsigned gpio_num, int val) @@ -113,7 +112,7 @@ static void sch_gpio_set(struct gpio_chip *gc, unsigned gpio_num, int val) struct sch_gpio *sch = gpiochip_get_data(gc); spin_lock(&sch->lock); - sch_gpio_reg_set(gc, gpio_num, GLV, val); + sch_gpio_reg_set(sch, gpio_num, GLV, val); spin_unlock(&sch->lock); } @@ -123,7 +122,7 @@ static int sch_gpio_direction_out(struct gpio_chip *gc, unsigned gpio_num, struct sch_gpio *sch = gpiochip_get_data(gc); spin_lock(&sch->lock); - sch_gpio_reg_set(gc, gpio_num, GIO, 0); + sch_gpio_reg_set(sch, gpio_num, GIO, 0); spin_unlock(&sch->lock); /* @@ -182,13 +181,13 @@ static int sch_gpio_probe(struct platform_device *pdev) * GPIO7 is configured by the CMC as SLPIOVR * Enable GPIO[9:8] core powered gpios explicitly */ - sch_gpio_reg_set(&sch->chip, 8, GEN, 1); - sch_gpio_reg_set(&sch->chip, 9, GEN, 1); + sch_gpio_reg_set(sch, 8, GEN, 1); + sch_gpio_reg_set(sch, 9, GEN, 1); /* * SUS_GPIO[2:0] enabled by default * Enable SUS_GPIO3 resume powered gpio explicitly */ - sch_gpio_reg_set(&sch->chip, 13, GEN, 1); + sch_gpio_reg_set(sch, 13, GEN, 1); break; case PCI_DEVICE_ID_INTEL_ITC_LPC: -- cgit v1.1 From 85b03b3033fd4eba82665b3b9902c095a08cc52f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sun, 3 Jul 2016 18:32:05 +0200 Subject: Revert "gpiolib: Split GPIO flags parsing and GPIO configuration" This reverts commit 923b93e451db876d1479d3e4458fce14fec31d1c. Make sure consumers do not overwrite gpio flags for pins that have already been claimed. While adding support for gpio drivers to refuse a request using unsupported flags, the order of when the requested flag was checked and the new flags were applied was reversed to that consumers could overwrite flags for already requested gpios. This not only affects device-tree setups where two drivers could request the same gpio using conflicting configurations, but also allowed user space to clear gpio flags for already claimed pins simply by attempting to export them through the sysfs interface. By for example clearing the FLAG_ACTIVE_LOW flag this way, user space could effectively change the polarity of a signal. Reverting this change obviously prevents gpio drivers from doing sanity checks on the flags in their request callbacks. Fortunately only one recently added driver (gpio-tps65218 in v4.6) appears to do this, and a follow up patch could restore this functionality through a different interface. Cc: stable # 4.4 Signed-off-by: Johan Hovold Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-legacy.c | 8 +++---- drivers/gpio/gpiolib.c | 52 +++++++++++++------------------------------ 2 files changed, 20 insertions(+), 40 deletions(-) diff --git a/drivers/gpio/gpiolib-legacy.c b/drivers/gpio/gpiolib-legacy.c index 3a5c701..8b83099 100644 --- a/drivers/gpio/gpiolib-legacy.c +++ b/drivers/gpio/gpiolib-legacy.c @@ -28,6 +28,10 @@ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label) if (!desc && gpio_is_valid(gpio)) return -EPROBE_DEFER; + err = gpiod_request(desc, label); + if (err) + return err; + if (flags & GPIOF_OPEN_DRAIN) set_bit(FLAG_OPEN_DRAIN, &desc->flags); @@ -37,10 +41,6 @@ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label) if (flags & GPIOF_ACTIVE_LOW) set_bit(FLAG_ACTIVE_LOW, &desc->flags); - err = gpiod_request(desc, label); - if (err) - return err; - if (flags & GPIOF_DIR_IN) err = gpiod_direction_input(desc); else diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 570771e..be74bd3 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1352,14 +1352,6 @@ static int __gpiod_request(struct gpio_desc *desc, const char *label) spin_lock_irqsave(&gpio_lock, flags); } done: - if (status < 0) { - /* Clear flags that might have been set by the caller before - * requesting the GPIO. - */ - clear_bit(FLAG_ACTIVE_LOW, &desc->flags); - clear_bit(FLAG_OPEN_DRAIN, &desc->flags); - clear_bit(FLAG_OPEN_SOURCE, &desc->flags); - } spin_unlock_irqrestore(&gpio_lock, flags); return status; } @@ -2587,28 +2579,13 @@ struct gpio_desc *__must_check gpiod_get_optional(struct device *dev, } EXPORT_SYMBOL_GPL(gpiod_get_optional); -/** - * gpiod_parse_flags - helper function to parse GPIO lookup flags - * @desc: gpio to be setup - * @lflags: gpio_lookup_flags - returned from of_find_gpio() or - * of_get_gpio_hog() - * - * Set the GPIO descriptor flags based on the given GPIO lookup flags. - */ -static void gpiod_parse_flags(struct gpio_desc *desc, unsigned long lflags) -{ - if (lflags & GPIO_ACTIVE_LOW) - set_bit(FLAG_ACTIVE_LOW, &desc->flags); - if (lflags & GPIO_OPEN_DRAIN) - set_bit(FLAG_OPEN_DRAIN, &desc->flags); - if (lflags & GPIO_OPEN_SOURCE) - set_bit(FLAG_OPEN_SOURCE, &desc->flags); -} /** * gpiod_configure_flags - helper function to configure a given GPIO * @desc: gpio whose value will be assigned * @con_id: function within the GPIO consumer + * @lflags: gpio_lookup_flags - returned from of_find_gpio() or + * of_get_gpio_hog() * @dflags: gpiod_flags - optional GPIO initialization flags * * Return 0 on success, -ENOENT if no GPIO has been assigned to the @@ -2616,10 +2593,17 @@ static void gpiod_parse_flags(struct gpio_desc *desc, unsigned long lflags) * occurred while trying to acquire the GPIO. */ static int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id, - enum gpiod_flags dflags) + unsigned long lflags, enum gpiod_flags dflags) { int status; + if (lflags & GPIO_ACTIVE_LOW) + set_bit(FLAG_ACTIVE_LOW, &desc->flags); + if (lflags & GPIO_OPEN_DRAIN) + set_bit(FLAG_OPEN_DRAIN, &desc->flags); + if (lflags & GPIO_OPEN_SOURCE) + set_bit(FLAG_OPEN_SOURCE, &desc->flags); + /* No particular flag request, return here... */ if (!(dflags & GPIOD_FLAGS_BIT_DIR_SET)) { pr_debug("no flags found for %s\n", con_id); @@ -2686,13 +2670,11 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev, return desc; } - gpiod_parse_flags(desc, lookupflags); - status = gpiod_request(desc, con_id); if (status < 0) return ERR_PTR(status); - status = gpiod_configure_flags(desc, con_id, flags); + status = gpiod_configure_flags(desc, con_id, lookupflags, flags); if (status < 0) { dev_dbg(dev, "setup of GPIO %s failed\n", con_id); gpiod_put(desc); @@ -2748,6 +2730,10 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, if (IS_ERR(desc)) return desc; + ret = gpiod_request(desc, NULL); + if (ret) + return ERR_PTR(ret); + if (active_low) set_bit(FLAG_ACTIVE_LOW, &desc->flags); @@ -2758,10 +2744,6 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, set_bit(FLAG_OPEN_SOURCE, &desc->flags); } - ret = gpiod_request(desc, NULL); - if (ret) - return ERR_PTR(ret); - return desc; } EXPORT_SYMBOL_GPL(fwnode_get_named_gpiod); @@ -2814,8 +2796,6 @@ int gpiod_hog(struct gpio_desc *desc, const char *name, chip = gpiod_to_chip(desc); hwnum = gpio_chip_hwgpio(desc); - gpiod_parse_flags(desc, lflags); - local_desc = gpiochip_request_own_desc(chip, hwnum, name); if (IS_ERR(local_desc)) { status = PTR_ERR(local_desc); @@ -2824,7 +2804,7 @@ int gpiod_hog(struct gpio_desc *desc, const char *name, return status; } - status = gpiod_configure_flags(desc, name, dflags); + status = gpiod_configure_flags(desc, name, lflags, dflags); if (status < 0) { pr_err("setup of hog GPIO %s (chip %s, offset %d) failed, %d\n", name, chip->label, hwnum, status); -- cgit v1.1 From d5d5e8d55732c7c35c354e45e3b0af2795978a57 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Sat, 2 Jul 2016 15:02:48 +0800 Subject: geneve: fix max_mtu setting For ipv6+udp+geneve encapsulation data, the max_mtu should subtract sizeof(ipv6hdr), instead of sizeof(iphdr). Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- drivers/net/geneve.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index cc39cef..9b3dc3c 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1072,12 +1072,17 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict) { + struct geneve_dev *geneve = netdev_priv(dev); /* The max_mtu calculation does not take account of GENEVE * options, to avoid excluding potentially valid * configurations. */ - int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr) - - dev->hard_header_len; + int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len; + + if (geneve->remote.sa.sa_family == AF_INET6) + max_mtu -= sizeof(struct ipv6hdr); + else + max_mtu -= sizeof(struct iphdr); if (new_mtu < 68) return -EINVAL; -- cgit v1.1 From 3dad5424adfb346c871847d467f97dcdca64ea97 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sun, 3 Jul 2016 10:54:54 +0200 Subject: RDS: fix rds_tcp_init() error path If register_pernet_subsys() fails, we shouldn't try to call unregister_pernet_subsys(). Fixes: 467fa15356 ("RDS-TCP: Support multiple RDS-TCP listen endpoints, one per netns.") Cc: stable@vger.kernel.org Cc: Sowmini Varadhan Cc: David S. Miller Signed-off-by: Vegard Nossum Acked-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/tcp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 74ee126..c8a7b4c 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -616,7 +616,7 @@ static int rds_tcp_init(void) ret = rds_tcp_recv_init(); if (ret) - goto out_slab; + goto out_pernet; ret = rds_trans_register(&rds_tcp_transport); if (ret) @@ -628,8 +628,9 @@ static int rds_tcp_init(void) out_recv: rds_tcp_recv_exit(); -out_slab: +out_pernet: unregister_pernet_subsys(&rds_tcp_net_ops); +out_slab: kmem_cache_destroy(rds_tcp_conn_slab); out: return ret; -- cgit v1.1 From c086e7096170390594c425114d98172bc9aceb8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sun, 3 Jul 2016 22:24:50 +0200 Subject: cdc_ncm: workaround for EM7455 "silent" data interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several Lenovo users have reported problems with their Sierra Wireless EM7455 modem. The driver has loaded successfully and the MBIM management channel has appeared to work, including establishing a connection to the mobile network. But no frames have been received over the data interface. The problem affects all EM7455 and MC7455, and is assumed to affect other modems based on the same Qualcomm chipset and baseband firmware. Testing narrowed the problem down to what seems to be a firmware timing bug during initialization. Adding a short sleep while probing is sufficient to make the problem disappear. Experiments have shown that 1-2 ms is too little to have any effect, while 10-20 ms is enough to reliably succeed. Reported-by: Stefan Armbruster Reported-by: Ralph Plawetzki Reported-by: Andreas Fett Reported-by: Rasmus Lerdorf Reported-by: Samo Ratnik Reported-and-tested-by: Aleksander Morgado Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 53759c3..877c951 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -854,6 +854,13 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ if (cdc_ncm_init(dev)) goto error2; + /* Some firmwares need a pause here or they will silently fail + * to set up the interface properly. This value was decided + * empirically on a Sierra Wireless MC7455 running 02.08.02.00 + * firmware. + */ + usleep_range(10000, 20000); + /* configure data interface */ temp = usb_set_interface(dev->udev, iface_no, data_altsetting); if (temp) { -- cgit v1.1 From a788a4a040e003574b8ad17115706ab1601ec572 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Mon, 4 Jul 2016 07:46:42 +0200 Subject: fsl/fman: fix error handling This is likely that checking 'fman->fifo_offset' instead of 'fman->cam_offset' is expected here. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index 1de2e1e..f634e769 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -2036,7 +2036,7 @@ static int fman_init(struct fman *fman) /* allocate MURAM for FIFO according to total size */ fman->fifo_offset = fman_muram_alloc(fman->muram, fman->state->total_fifo_size); - if (IS_ERR_VALUE(fman->cam_offset)) { + if (IS_ERR_VALUE(fman->fifo_offset)) { free_init_resources(fman); dev_err(fman->dev, "%s: MURAM alloc for BMI FIFO failed\n", __func__); -- cgit v1.1 From c8e2ca30fdba613c088ab2fd5ac7b9c69b402559 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 4 Jul 2016 17:16:41 -0700 Subject: Revert "fsl/fman: fix error handling" This reverts commit a788a4a040e003574b8ad17115706ab1601ec572. This patch is wrong, the type returned doesn't fit what the error pointer macros expect. Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index f634e769..1de2e1e 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -2036,7 +2036,7 @@ static int fman_init(struct fman *fman) /* allocate MURAM for FIFO according to total size */ fman->fifo_offset = fman_muram_alloc(fman->muram, fman->state->total_fifo_size); - if (IS_ERR_VALUE(fman->fifo_offset)) { + if (IS_ERR_VALUE(fman->cam_offset)) { free_init_resources(fman); dev_err(fman->dev, "%s: MURAM alloc for BMI FIFO failed\n", __func__); -- cgit v1.1 From 7831b4ff0d926e0deeaabef9db8800ed069a2757 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 4 Jul 2016 14:07:16 +0200 Subject: qeth: delete napi struct when removing a qeth device A qeth_card contains a napi_struct linked to the net_device during device probing. This struct must be deleted when removing the qeth device, otherwise Panic on oops can occur when qeth devices are repeatedly removed and added. Fixes: a1c3ed4c9ca ("qeth: NAPI support for l2 and l3 discipline") Cc: stable@vger.kernel.org # v2.6.37+ Signed-off-by: Ursula Braun Tested-by: Alexander Klein Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 1 + drivers/s390/net/qeth_l3_main.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 80b1979..df036b8 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1051,6 +1051,7 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) qeth_l2_set_offline(cgdev); if (card->dev) { + netif_napi_del(&card->napi); unregister_netdev(card->dev); card->dev = NULL; } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index ac54433..709b523 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3226,6 +3226,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) qeth_l3_set_offline(cgdev); if (card->dev) { + netif_napi_del(&card->napi); unregister_netdev(card->dev); card->dev = NULL; } -- cgit v1.1 From 3b8e64f6f8e2d437b15572f445b1932a45f9be6a Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 5 Jul 2016 10:04:53 +0800 Subject: gpu: drm: sun4i_drv: add missing of_node_put after calling of_parse_phandle of_node_put needs to be called when the device node which is got from of_parse_phandle has finished using. Cc: Maxime Ripard Cc: Chen-Yu Tsai Signed-off-by: Peter Chen Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index cbe4a25..937394c 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -310,6 +310,7 @@ static int sun4i_drv_probe(struct platform_device *pdev) count += sun4i_drv_add_endpoints(&pdev->dev, &match, pipeline); + of_node_put(pipeline); DRM_DEBUG_DRIVER("Queued %d outputs on pipeline %d\n", count, i); -- cgit v1.1 From 9cd25743765cfe851aed8d655a62d60156aed293 Mon Sep 17 00:00:00 2001 From: Torsten Hilbrich Date: Tue, 5 Jul 2016 10:40:22 +0200 Subject: ALSA: hda/realtek: Add Lenovo L460 to docking unit fixup This solves the issue that a headphone is not working on the docking unit. Signed-off-by: Torsten Hilbrich Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 900bfbc..5fac786 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5651,6 +5651,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x504a, "ThinkPad X260", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x504b, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE), SND_PCI_QUIRK(0x17aa, 0x5050, "Thinkpad T560p", ALC292_FIXUP_TPT460), + SND_PCI_QUIRK(0x17aa, 0x5051, "Thinkpad L460", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x5053, "Thinkpad T460", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), -- cgit v1.1 From 99ec8a3608330d202448085185cf28389b789b7b Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 5 Jul 2016 14:25:59 +0100 Subject: irqchip/mips-gic: Map to VPs using HW VPNum When mapping an interrupt to a VP(E) we must use the identifier for the VP that the hardware expects, and this does not always match up with the Linux CPU number. Commit d46812bb0bef ("irqchip: mips-gic: Use HW IDs for VPE_OTHER_ADDR") corrected this for the cases that existed at the time it was written, but commit 2af70a962070 ("irqchip/mips-gic: Add a IPI hierarchy domain") added another case before the former patch was merged. This leads to incorrectly using Linux CPU numbers when mapping interrupts to VPs, which breaks on certain systems such as those with multi-core I6400 CPUs. Fix by adding the appropriate call to mips_cm_vp_id() to retrieve the expected VP identifier. Fixes: d46812bb0bef ("irqchip: mips-gic: Use HW IDs for VPE_OTHER_ADDR") Fixes: 2af70a962070 ("irqchip/mips-gic: Add a IPI hierarchy domain") Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: Jason Cooper Cc: Qais Yousef Cc: Ralf Baechle Cc: Marc Zyngier Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20160705132600.27730-1-paul.burton@imgtec.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-mips-gic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 8a4adbeb..69b1b82 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -718,7 +718,7 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq, spin_lock_irqsave(&gic_lock, flags); gic_map_to_pin(intr, gic_cpu_pin); - gic_map_to_vpe(intr, vpe); + gic_map_to_vpe(intr, mips_cm_vp_id(vpe)); for (i = 0; i < min(gic_vpes, NR_CPUS); i++) clear_bit(intr, pcpu_masks[i].pcpu_mask); set_bit(intr, pcpu_masks[vpe].pcpu_mask); -- cgit v1.1 From 547aefc4db877e65245c3d95fcce703701bf3a0c Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 5 Jul 2016 14:26:00 +0100 Subject: irqchip/mips-gic: Match IPI IRQ domain by bus token only Commit fbde2d7d8290 ("MIPS: Add generic SMP IPI support") introduced code which calls irq_find_matching_host with a NULL node parameter in order to discover IPI IRQ domains which are not associated with the DT root node's interrupt parent. This suggests that implementations of IPI IRQ domains should effectively ignore the node parameter if it is NULL and search purely based upon the bus token. Commit 2af70a962070 ("irqchip/mips-gic: Add a IPI hierarchy domain") did not do this when implementing the GIC IPI IRQ domain, and on MIPS Boston boards this leads to no IPI domain being discovered and a NULL pointer dereference when attempting to send an IPI: CPU 0 Unable to handle kernel paging request at virtual address 0000000000000040, epc == ffffffff8016e70c, ra == ffffffff8010ff5c Oops[#1]: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.7.0-rc6-00223-gad0d1b6 #945 task: a8000000ff066fc0 ti: a8000000ff068000 task.ti: a8000000ff068000 $ 0 : 0000000000000000 0000000000000001 ffffffff80730000 0000000000000003 $ 4 : 0000000000000000 ffffffff8057e5b0 a800000001e3ee00 0000000000000000 $ 8 : 0000000000000000 0000000000000023 0000000000000001 0000000000000001 $12 : 0000000000000000 ffffffff803323d0 0000000000000000 0000000000000000 $16 : 0000000000000000 0000000000000000 0000000000000001 ffffffff801108fc $20 : 0000000000000000 ffffffff8057e5b0 0000000000000001 0000000000000000 $24 : 0000000000000000 ffffffff8012de28 $28 : a8000000ff068000 a8000000ff06fbc0 0000000000000000 ffffffff8010ff5c Hi : ffffffff8014c174 Lo : a800000001e1e140 epc : ffffffff8016e70c __ipi_send_mask+0x24/0x11c ra : ffffffff8010ff5c mips_smp_send_ipi_mask+0x68/0x178 Status: 140084e2 KX SX UX KERNEL EXL Cause : 00800008 (ExcCode 02) BadVA : 0000000000000040 PrId : 0001a920 (MIPS I6400) Process swapper/0 (pid: 1, threadinfo=a8000000ff068000, task=a8000000ff066fc0, tls=0000000000000000) Stack : 0000000000000000 0000000000000000 0000000000000001 ffffffff801108fc 0000000000000000 ffffffff8057e5b0 0000000000000001 ffffffff8010ff5c 0000000000000001 0000000000000020 0000000000000000 0000000000000000 0000000000000000 ffffffff801108fc 0000000000000000 0000000000000001 0000000000000001 0000000000000000 0000000000000000 ffffffff801865e8 a8000000ff0c7500 a8000000ff06fc90 0000000000000001 0000000000000002 ffffffff801108fc ffffffff801868b8 0000000000000000 ffffffff801108fc 0000000000000000 0000000000000003 ffffffff8068c700 0000000000000001 ffffffff80730000 0000000000000001 a8000000ff00a290 ffffffff80110c50 0000000000000003 a800000001e48308 0000000000000003 0000000000000008 ... Call Trace: [] __ipi_send_mask+0x24/0x11c [] mips_smp_send_ipi_mask+0x68/0x178 [] generic_exec_single+0x150/0x170 [] smp_call_function_single+0x108/0x160 [] cps_boot_secondary+0x328/0x394 [] __cpu_up+0x38/0x90 [] bringup_cpu+0x24/0xac [] cpuhp_up_callbacks+0x58/0xdc [] cpu_up+0x118/0x18c [] smp_init+0xbc/0xe8 [] kernel_init_freeable+0xa0/0x228 [] kernel_init+0x10/0xf0 [] ret_from_kernel_thread+0x14/0x1c Fix this by allowing the GIC IPI IRQ domain to match purely based upon the bus token if the node provided is NULL. Fixes: 2af70a962070 ("irqchip/mips-gic: Add a IPI hierarchy domain") Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: Jason Cooper Cc: Qais Yousef Cc: Ralf Baechle Cc: Marc Zyngier Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20160705132600.27730-2-paul.burton@imgtec.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-mips-gic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 69b1b82..70ed1d0 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -959,7 +959,7 @@ int gic_ipi_domain_match(struct irq_domain *d, struct device_node *node, switch (bus_token) { case DOMAIN_BUS_IPI: is_ipi = d->bus_token == bus_token; - return to_of_node(d->fwnode) == node && is_ipi; + return (!node || to_of_node(d->fwnode) == node) && is_ipi; break; default: return 0; -- cgit v1.1 From 92c74bceb0dd4b74a6c0b56c2f9a5c93a0860808 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 5 Jul 2016 08:46:43 +0200 Subject: Revert "gpio: gpiolib-of: Allow compile testing" This reverts commit 1e4a80640338924b9f9fd7a121ac31d08134410a. This creates more problems than it solves right now. Compile testing needs to go in with patches fixing the problems it uncovers. Signed-off-by: Linus Walleij --- drivers/gpio/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index cebcb40..536112f 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -49,7 +49,7 @@ config GPIO_DEVRES config OF_GPIO def_bool y - depends on OF || COMPILE_TEST + depends on OF config GPIO_ACPI def_bool y -- cgit v1.1 From 2609af19362d03332b55fc7836e7023bcd6d90bf Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 5 Jul 2016 16:11:46 +0800 Subject: r8152: fix runtime function for RTL8152 The RTL8152 doesn't have U1U2 and U2P3 features, so use different runtime functions for RTL812 and RTL8153 by adding autosuspend_en() to rtl_ops. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d7f20a9..0da72d3 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -31,7 +31,7 @@ #define NETNEXT_VERSION "08" /* Information for net */ -#define NET_VERSION "4" +#define NET_VERSION "5" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -624,6 +624,7 @@ struct r8152 { int (*eee_get)(struct r8152 *, struct ethtool_eee *); int (*eee_set)(struct r8152 *, struct ethtool_eee *); bool (*in_nway)(struct r8152 *); + void (*autosuspend_en)(struct r8152 *tp, bool enable); } rtl_ops; int intr_interval; @@ -2408,9 +2409,6 @@ static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable) if (enable) { u32 ocp_data; - r8153_u1u2en(tp, false); - r8153_u2p3en(tp, false); - __rtl_set_wol(tp, WAKE_ANY); ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG); @@ -2432,7 +2430,17 @@ static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable) ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data); ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML); + } +} +static void rtl8153_runtime_enable(struct r8152 *tp, bool enable) +{ + rtl_runtime_suspend_enable(tp, enable); + + if (enable) { + r8153_u1u2en(tp, false); + r8153_u2p3en(tp, false); + } else { r8153_u2p3en(tp, true); r8153_u1u2en(tp, true); } @@ -3523,7 +3531,7 @@ static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message) napi_disable(&tp->napi); if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { rtl_stop_rx(tp); - rtl_runtime_suspend_enable(tp, true); + tp->rtl_ops.autosuspend_en(tp, true); } else { cancel_delayed_work_sync(&tp->schedule); tp->rtl_ops.down(tp); @@ -3549,7 +3557,7 @@ static int rtl8152_resume(struct usb_interface *intf) if (netif_running(tp->netdev) && tp->netdev->flags & IFF_UP) { if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { - rtl_runtime_suspend_enable(tp, false); + tp->rtl_ops.autosuspend_en(tp, false); clear_bit(SELECTIVE_SUSPEND, &tp->flags); napi_disable(&tp->napi); set_bit(WORK_ENABLE, &tp->flags); @@ -3568,7 +3576,7 @@ static int rtl8152_resume(struct usb_interface *intf) usb_submit_urb(tp->intr_urb, GFP_KERNEL); } else if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { if (tp->netdev->flags & IFF_UP) - rtl_runtime_suspend_enable(tp, false); + tp->rtl_ops.autosuspend_en(tp, false); clear_bit(SELECTIVE_SUSPEND, &tp->flags); } @@ -4148,6 +4156,7 @@ static int rtl_ops_init(struct r8152 *tp) ops->eee_get = r8152_get_eee; ops->eee_set = r8152_set_eee; ops->in_nway = rtl8152_in_nway; + ops->autosuspend_en = rtl_runtime_suspend_enable; break; case RTL_VER_03: @@ -4163,6 +4172,7 @@ static int rtl_ops_init(struct r8152 *tp) ops->eee_get = r8153_get_eee; ops->eee_set = r8153_set_eee; ops->in_nway = rtl8153_in_nway; + ops->autosuspend_en = rtl8153_runtime_enable; break; default: -- cgit v1.1 From a30b016808e214c6864ad579ef867b3fe0a314f8 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Tue, 5 Jul 2016 12:09:47 +0300 Subject: bonding: fix enslavement slave link notifications Currently, link notifications are not sent by bond_set_slave_link_state() upon enslavement if the slave is enslaved when up. This happens because slave->link default init value is 0, which is the same as BOND_LINK_UP, resulting in bond_set_slave_link_state() ignoring this transition. This patch sets the default value of slave->link to BOND_LINK_NOCHANGE, assuring it will count as a state transition and thus trigger notification logic. Signed-off-by: Aviv Heller Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 941ec99..a2afa3b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1584,6 +1584,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } /* check for initial state */ + new_slave->link = BOND_LINK_NOCHANGE; if (bond->params.miimon) { if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) { if (bond->params.updelay) { -- cgit v1.1 From eae033c1b86721ec54511607fc26bfb94a0e004b Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 5 Jul 2016 12:17:12 +0300 Subject: net/mlx5: Avoid setting unused var when modifying vport node GUID GCC complains on unused-but-set-variable, clean this up. Fixes: 23898c763f4a ('net/mlx5: E-Switch, Modify node guid on vf set MAC') Signed-off-by: Or Gerlitz Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index daf44cd..91846df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -513,7 +513,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, { int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); void *nic_vport_context; - u8 *guid; void *in; int err; @@ -535,8 +534,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in, in, nic_vport_context); - guid = MLX5_ADDR_OF(nic_vport_context, nic_vport_context, - node_guid); MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid); err = mlx5_modify_nic_vport_context(mdev, in, inlen); -- cgit v1.1 From f5d6516120ee5c777fb7b1ba9d39031881ad511b Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Tue, 5 Jul 2016 18:07:24 +0530 Subject: cxgb4: update latest firmware version supported Change t4fw_version.h to update latest firmware version number Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h index c4b262c..2accab3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h @@ -36,8 +36,8 @@ #define __T4FW_VERSION_H__ #define T4FW_VERSION_MAJOR 0x01 -#define T4FW_VERSION_MINOR 0x0E -#define T4FW_VERSION_MICRO 0x04 +#define T4FW_VERSION_MINOR 0x0F +#define T4FW_VERSION_MICRO 0x25 #define T4FW_VERSION_BUILD 0x00 #define T4FW_MIN_VERSION_MAJOR 0x01 @@ -45,8 +45,8 @@ #define T4FW_MIN_VERSION_MICRO 0x00 #define T5FW_VERSION_MAJOR 0x01 -#define T5FW_VERSION_MINOR 0x0E -#define T5FW_VERSION_MICRO 0x04 +#define T5FW_VERSION_MINOR 0x0F +#define T5FW_VERSION_MICRO 0x25 #define T5FW_VERSION_BUILD 0x00 #define T5FW_MIN_VERSION_MAJOR 0x00 @@ -54,8 +54,8 @@ #define T5FW_MIN_VERSION_MICRO 0x00 #define T6FW_VERSION_MAJOR 0x01 -#define T6FW_VERSION_MINOR 0x0E -#define T6FW_VERSION_MICRO 0x04 +#define T6FW_VERSION_MINOR 0x0F +#define T6FW_VERSION_MICRO 0x25 #define T6FW_VERSION_BUILD 0x00 #define T6FW_MIN_VERSION_MAJOR 0x00 -- cgit v1.1 From 262e2bfd7d1e1f1ee48b870e5dfabb87c06b975e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20Pr=C3=83=C2=A9mont?= Date: Thu, 30 Jun 2016 17:00:32 +0200 Subject: qla2xxx: Fix NULL pointer deref in QLA interrupt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In qla24xx_process_response_queue() rsp->msix->cpuid may trigger NULL pointer dereference when rsp->msix is NULL: [ 5.622457] NULL pointer dereference at 0000000000000050 [ 5.622457] IP: [] qla24xx_process_response_queue+0x44/0x4b0 [ 5.622457] PGD 0 [ 5.622457] Oops: 0000 [#1] SMP [ 5.622457] Modules linked in: [ 5.622457] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.6.3-x86_64 #1 [ 5.622457] Hardware name: HP ProLiant DL360 G5, BIOS P58 05/02/2011 [ 5.622457] task: ffff8801a88f3740 ti: ffff8801a8954000 task.ti: ffff8801a8954000 [ 5.622457] RIP: 0010:[] [] qla24xx_process_response_queue+0x44/0x4b0 [ 5.622457] RSP: 0000:ffff8801afb03de8 EFLAGS: 00010002 [ 5.622457] RAX: 0000000000000000 RBX: 0000000000000032 RCX: 00000000ffffffff [ 5.622457] RDX: 0000000000000002 RSI: ffff8801a79bf8c8 RDI: ffff8800c8f7e7c0 [ 5.622457] RBP: ffff8801afb03e68 R08: 0000000000000000 R09: 0000000000000000 [ 5.622457] R10: 00000000ffff8c47 R11: 0000000000000002 R12: ffff8801a79bf8c8 [ 5.622457] R13: ffff8800c8f7e7c0 R14: ffff8800c8f60000 R15: 0000000000018013 [ 5.622457] FS: 0000000000000000(0000) GS:ffff8801afb00000(0000) knlGS:0000000000000000 [ 5.622457] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5.622457] CR2: 0000000000000050 CR3: 0000000001e07000 CR4: 00000000000006e0 [ 5.622457] Stack: [ 5.622457] ffff8801afb03e30 ffffffff810c0f2d 0000000000000086 0000000000000002 [ 5.622457] ffff8801afb03e28 ffffffff816570e1 ffff8800c8994628 0000000000000002 [ 5.622457] ffff8801afb03e60 ffffffff816772d4 b47c472ad6955e68 0000000000000032 [ 5.622457] Call Trace: [ 5.622457] [ 5.622457] [] ? __wake_up_common+0x4d/0x80 [ 5.622457] [] ? usb_hcd_resume_root_hub+0x51/0x60 [ 5.622457] [] ? uhci_hub_status_data+0x64/0x240 [ 5.622457] [] qla24xx_intr_handler+0xf0/0x2e0 [ 5.622457] [] ? get_next_timer_interrupt+0xce/0x200 [ 5.622457] [] handle_irq_event_percpu+0x64/0x100 [ 5.622457] [] handle_irq_event+0x27/0x50 [ 5.622457] [] handle_edge_irq+0x65/0x140 [ 5.622457] [] handle_irq+0x18/0x30 [ 5.622457] [] do_IRQ+0x46/0xd0 [ 5.622457] [] common_interrupt+0x7f/0x7f [ 5.622457] [ 5.622457] [] ? mwait_idle+0x68/0x80 [ 5.622457] [] arch_cpu_idle+0xa/0x10 [ 5.622457] [] default_idle_call+0x27/0x30 [ 5.622457] [] cpu_startup_entry+0x19b/0x230 [ 5.622457] [] start_secondary+0x136/0x140 [ 5.622457] Code: 00 00 65 48 8b 04 25 28 00 00 00 48 89 45 d0 31 c0 48 8b 47 58 a8 02 0f 84 c5 00 00 00 48 8b 46 50 49 89 f4 65 8b 15 34 bb aa 7e <39> 50 50 74 11 89 50 50 48 8b 46 50 8b 40 50 41 89 86 60 8b 00 [ 5.622457] RIP [] qla24xx_process_response_queue+0x44/0x4b0 [ 5.622457] RSP [ 5.622457] CR2: 0000000000000050 [ 5.622457] ---[ end trace fa2b19c25106d42b ]--- [ 5.622457] Kernel panic - not syncing: Fatal exception in interrupt The affected code was introduced by commit cdb898c52d1dfad4b4800b83a58b3fe5d352edde (qla2xxx: Add irq affinity notification). Only dereference rsp->msix when it has been set so the machine can boot fine. Possibly rsp->msix is unset because: [ 3.479679] qla2xxx [0000:00:00.0]-0005: : QLogic Fibre Channel HBA Driver: 8.07.00.33-k. [ 3.481839] qla2xxx [0000:13:00.0]-001d: : Found an ISP2432 irq 17 iobase 0xffffc90000038000. [ 3.484081] qla2xxx [0000:13:00.0]-0035:0: MSI-X; Unsupported ISP2432 (0x2, 0x3). [ 3.485804] qla2xxx [0000:13:00.0]-0037:0: Falling back-to MSI mode -258. [ 3.890145] scsi host0: qla2xxx [ 3.891956] qla2xxx [0000:13:00.0]-00fb:0: QLogic QLE2460 - PCI-Express Single Channel 4Gb Fibre Channel HBA. [ 3.894207] qla2xxx [0000:13:00.0]-00fc:0: ISP2432: PCIe (2.5GT/s x4) @ 0000:13:00.0 hdma+ host#=0 fw=7.03.00 (9496). [ 5.714774] qla2xxx [0000:13:00.0]-500a:0: LOOP UP detected (4 Gbps). Signed-off-by: Bruno Prémont Acked-by: Quinn Tran CC: # 4.5+ Fixes: cdb898c52d1dfad4b4800b83a58b3fe5d352edde Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_isr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 5649c20..a92a62d 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2548,7 +2548,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha, if (!vha->flags.online) return; - if (rsp->msix->cpuid != smp_processor_id()) { + if (rsp->msix && rsp->msix->cpuid != smp_processor_id()) { /* if kernel does not notify qla of IRQ's CPU change, * then set it here. */ -- cgit v1.1 From 45209046c47b93fadf26dc59a9da724f387b9cf2 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Tue, 5 Jul 2016 13:53:12 +0800 Subject: ACPICA: Namespace: Fix namespace/interpreter lock ordering There is a lock order issue in acpi_load_tables(). The namespace lock is held before holding the interpreter lock. With ACPI_MUTEX_DEBUG enabled in the kernel, this is printed to the log during boot: [ 0.885699] ACPI Error: Invalid acquire order: Thread 405884224 owns [ACPI_MTX_Namespace], wants [ACPI_MTX_Interpreter] (20160422/utmutex-263) [ 0.885881] ACPI Error: Could not acquire AML Interpreter mutex (20160422/exutils-95) [ 0.893846] ACPI Error: Mutex [0x0] is not acquired, cannot release (20160422/utmutex-326) [ 0.894019] ACPI Error: Could not release AML Interpreter mutex (20160422/exutils-133) The issue has been introduced by the following commit: Commit: 2f38b1b16d9280689e5cfa47a4c50956bf437f0d ACPICA Commit: bfe03ffcde8ed56a7eae38ea0b188aeb12f9c52e Subject: ACPICA: Namespace: Fix a regression that MLC support triggers dead lock in dynamic table loading Which fixed a deadlock issue for acpi_ns_load_table() in acpi_ex_add_table() but didn't take care of the lock order in acpi_ns_load_table() correctly. Originally (before the above commit), ACPICA used the namespace/interpreter locks in the following 2 key code paths: 1. Table loading: acpi_ns_load_table L(Namespace) acpi_ns_parse_table acpi_ns_one_complete_parse U(Namespace) 2. Object evaluation: acpi_ns_evaluate L(Interpreter) acpi_ps_execute_method U(Interpreter) acpi_ns_load_table L(Namespace) U(Namespace) acpi_ev_initialize_region L(Namespace) U(Namespace) address_space.setup L(Namespace) U(Namespace) address_space.handler L(Namespace) U(Namespace) acpi_os_wait_semaphore acpi_os_acquire_mutex acpi_os_sleep L(Interpreter) U(Interpreter) During runtime, while acpi_ns_evaluate is called, the lock order is always Interpreter -> Namespace. In turn, the problematic commit acquires the locks in the following order: 3. Table loading: acpi_ns_load_table L(Namespace) acpi_ns_parse_table L(Interpreter) acpi_ns_one_complete_parse U(Interpreter) U(Namespace) To fix the lock order issue, move the interpreter lock to acpi_ns_load_table() to ensure the lock order correctness: 4. Table loading: acpi_ns_load_table L(Interpreter) L(Namespace) acpi_ns_parse_table acpi_ns_one_complete_parse U(Namespace) U(Interpreter) However, this doesn't fix the current design issues related to the namespace lock. For example, we can notice that in acpi_ns_evaluate(), outside of acpi_ns_load_table(), the namespace objects may be created by the named object creation control methods. And the creation of the method-owned namespace objects are not locked by the namespace lock. This patch doesn't try to fix such kind of existing issues. Fixes: 2f38b1b16d92 (ACPICA: Namespace: Fix a regression that MLC support triggers dead lock in dynamic table loading) Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/nsload.c | 7 ++++++- drivers/acpi/acpica/nsparse.c | 9 ++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/acpica/nsload.c b/drivers/acpi/acpica/nsload.c index b5e2b0a..297f6aa 100644 --- a/drivers/acpi/acpica/nsload.c +++ b/drivers/acpi/acpica/nsload.c @@ -46,6 +46,7 @@ #include "acnamesp.h" #include "acdispat.h" #include "actables.h" +#include "acinterp.h" #define _COMPONENT ACPI_NAMESPACE ACPI_MODULE_NAME("nsload") @@ -78,6 +79,8 @@ acpi_ns_load_table(u32 table_index, struct acpi_namespace_node *node) ACPI_FUNCTION_TRACE(ns_load_table); + acpi_ex_enter_interpreter(); + /* * Parse the table and load the namespace with all named * objects found within. Control methods are NOT parsed @@ -89,7 +92,7 @@ acpi_ns_load_table(u32 table_index, struct acpi_namespace_node *node) */ status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); + goto unlock_interp; } /* If table already loaded into namespace, just return */ @@ -130,6 +133,8 @@ acpi_ns_load_table(u32 table_index, struct acpi_namespace_node *node) unlock: (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); +unlock_interp: + (void)acpi_ex_exit_interpreter(); if (ACPI_FAILURE(status)) { return_ACPI_STATUS(status); diff --git a/drivers/acpi/acpica/nsparse.c b/drivers/acpi/acpica/nsparse.c index 1783cd7..f631a47 100644 --- a/drivers/acpi/acpica/nsparse.c +++ b/drivers/acpi/acpica/nsparse.c @@ -47,7 +47,6 @@ #include "acparser.h" #include "acdispat.h" #include "actables.h" -#include "acinterp.h" #define _COMPONENT ACPI_NAMESPACE ACPI_MODULE_NAME("nsparse") @@ -171,8 +170,6 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node) ACPI_FUNCTION_TRACE(ns_parse_table); - acpi_ex_enter_interpreter(); - /* * AML Parse, pass 1 * @@ -188,7 +185,7 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node) status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS1, table_index, start_node); if (ACPI_FAILURE(status)) { - goto error_exit; + return_ACPI_STATUS(status); } /* @@ -204,10 +201,8 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node) status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS2, table_index, start_node); if (ACPI_FAILURE(status)) { - goto error_exit; + return_ACPI_STATUS(status); } -error_exit: - acpi_ex_exit_interpreter(); return_ACPI_STATUS(status); } -- cgit v1.1 From 217215041b9285af2193a755b56a8f3ed408bfe2 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 6 Jul 2016 06:50:36 +1000 Subject: drm/nouveau/disp/sor/gf119: select correct sor when poking training pattern Fixes a regression caused by a stupid thinko from "disp/sor/gf119: both links use the same training register". Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c index 22706c0..49bd5da 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c @@ -40,7 +40,8 @@ static int gf119_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern) { struct nvkm_device *device = outp->base.disp->engine.subdev.device; - nvkm_mask(device, 0x61c110, 0x0f0f0f0f, 0x01010101 * pattern); + const u32 soff = gf119_sor_soff(outp); + nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, 0x01010101 * pattern); return 0; } -- cgit v1.1 From 096cdc6f52225835ff503f987a0d68ef770bb78e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 21 Jun 2016 16:58:46 +0300 Subject: platform/chrome: cros_ec_dev - double fetch bug in ioctl We verify "u_cmd.outsize" and "u_cmd.insize" but we need to make sure that those values have not changed between the two copy_from_user() calls. Otherwise it could lead to a buffer overflow. Additionally, cros_ec_cmd_xfer() can set s_cmd->insize to a lower value. We should use the new smaller value so we don't copy too much data to the user. Reported-by: Pengfei Wang Fixes: a841178445bb ('mfd: cros_ec: Use a zero-length array for command data') Signed-off-by: Dan Carpenter Reviewed-by: Kees Cook Tested-by: Gwendal Grignou Cc: # v4.2+ Signed-off-by: Olof Johansson --- drivers/platform/chrome/cros_ec_dev.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c index 6d8ee3b..8abd80d 100644 --- a/drivers/platform/chrome/cros_ec_dev.c +++ b/drivers/platform/chrome/cros_ec_dev.c @@ -151,13 +151,19 @@ static long ec_device_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg) goto exit; } + if (u_cmd.outsize != s_cmd->outsize || + u_cmd.insize != s_cmd->insize) { + ret = -EINVAL; + goto exit; + } + s_cmd->command += ec->cmd_offset; ret = cros_ec_cmd_xfer(ec->ec_dev, s_cmd); /* Only copy data to userland if data was received. */ if (ret < 0) goto exit; - if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + u_cmd.insize)) + if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + s_cmd->insize)) ret = -EFAULT; exit: kfree(s_cmd); -- cgit v1.1 From 7e3fd813717693597daaa95dee875f4cb2d911ef Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Tue, 5 Jul 2016 19:18:07 +0800 Subject: ACPI / debugger: Fix regression introduced by IS_ERR_VALUE() removal The FIFO unlocking mechanism in acpi_dbg has been broken by the following commit: Commit: 287980e49ffc0f6d911601e7e352a812ed27768e Subject: remove lots of IS_ERR_VALUE abuses It converted !IS_ERR_VALUE(ret) into !ret which was not entirely correct. Fix the regression by taking ret > 0 into account too as appropriate. Fixes: 287980e49ffc (remove lots of IS_ERR_VALUE abuses) Signed-off-by: Lv Zheng [ rjw: Simplifications, changelog & subject massage ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_dbg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpi_dbg.c b/drivers/acpi/acpi_dbg.c index 1f41284..dee8692 100644 --- a/drivers/acpi/acpi_dbg.c +++ b/drivers/acpi/acpi_dbg.c @@ -602,7 +602,7 @@ static int acpi_aml_read_user(char __user *buf, int len) crc->tail = (crc->tail + n) & (ACPI_AML_BUF_SIZE - 1); ret = n; out: - acpi_aml_unlock_fifo(ACPI_AML_OUT_USER, !ret); + acpi_aml_unlock_fifo(ACPI_AML_OUT_USER, ret >= 0); return ret; } @@ -672,7 +672,7 @@ static int acpi_aml_write_user(const char __user *buf, int len) crc->head = (crc->head + n) & (ACPI_AML_BUF_SIZE - 1); ret = n; out: - acpi_aml_unlock_fifo(ACPI_AML_IN_USER, !ret); + acpi_aml_unlock_fifo(ACPI_AML_IN_USER, ret >= 0); return n; } -- cgit v1.1 From ab58298cf459fcd4f588a401d36abf0bd2215b51 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 5 Jul 2016 21:12:53 +0200 Subject: net: fix decnet rtnexthop parsing dn_fib_count_nhs() could enter an infinite loop if nhp->rtnh_len == 0 (i.e. if userspace passes a malformed netlink message). Let's use the helpers from net/nexthop.h which take care of all this stuff. We can do exactly the same as e.g. fib_count_nexthops() and fib_get_nhs() from net/ipv4/fib_semantics.c. This fixes the softlockup for me. Cc: Thomas Graf Signed-off-by: Vegard Nossum Signed-off-by: David S. Miller --- net/decnet/dn_fib.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index df48034..a796fc7 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -41,6 +41,7 @@ #include #include #include +#include #define RT_MIN_TABLE 1 @@ -150,14 +151,13 @@ static int dn_fib_count_nhs(const struct nlattr *attr) struct rtnexthop *nhp = nla_data(attr); int nhs = 0, nhlen = nla_len(attr); - while(nhlen >= (int)sizeof(struct rtnexthop)) { - if ((nhlen -= nhp->rtnh_len) < 0) - return 0; + while (rtnh_ok(nhp, nhlen)) { nhs++; - nhp = RTNH_NEXT(nhp); + nhp = rtnh_next(nhp, &nhlen); } - return nhs; + /* leftover implies invalid nexthop configuration, discard it */ + return nhlen > 0 ? 0 : nhs; } static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, @@ -167,21 +167,24 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, int nhlen = nla_len(attr); change_nexthops(fi) { - int attrlen = nhlen - sizeof(struct rtnexthop); - if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) + int attrlen; + + if (!rtnh_ok(nhp, nhlen)) return -EINVAL; nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; nh->nh_oif = nhp->rtnh_ifindex; nh->nh_weight = nhp->rtnh_hops + 1; - if (attrlen) { + attrlen = rtnh_attrlen(nhp); + if (attrlen > 0) { struct nlattr *gw_attr; gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0; } - nhp = RTNH_NEXT(nhp); + + nhp = rtnh_next(nhp, &nhlen); } endfor_nexthops(fi); return 0; -- cgit v1.1 From 903ce4abdf374e3365d93bcb3df56c62008835ba Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 5 Jul 2016 12:10:23 -0700 Subject: ipv6: Fix mem leak in rt6i_pcpu It was first reported and reproduced by Petr (thanks!) in https://bugzilla.kernel.org/show_bug.cgi?id=119581 free_percpu(rt->rt6i_pcpu) used to always happen in ip6_dst_destroy(). However, after fixing a deadlock bug in commit 9c7370a166b4 ("ipv6: Fix a potential deadlock when creating pcpu rt"), free_percpu() is not called before setting non_pcpu_rt->rt6i_pcpu to NULL. It is worth to note that rt6i_pcpu is protected by table->tb6_lock. kmemleak somehow did not report it. We nailed it down by observing the pcpu entries in /proc/vmallocinfo (first suggested by Hannes, thanks!). Signed-off-by: Martin KaFai Lau Fixes: 9c7370a166b4 ("ipv6: Fix a potential deadlock when creating pcpu rt") Reported-by: Petr Novopashenniy Tested-by: Petr Novopashenniy Acked-by: Hannes Frederic Sowa Cc: Hannes Frederic Sowa Cc: Petr Novopashenniy Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1bcef23..771be1f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -177,6 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) } } + free_percpu(non_pcpu_rt->rt6i_pcpu); non_pcpu_rt->rt6i_pcpu = NULL; } -- cgit v1.1 From 175a20c16fdb7700fcac63f1eeb2caa7e1dddd2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Jun 2016 18:06:49 +0300 Subject: x86/perf/intel/rapl: Fix module name collision with powercap intel-rapl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 4b6e2571bf00 the rapl perf module calls itself intel-rapl. That name was already in use by the rapl powercap driver, which now fails to load if the perf module is loaded. Fix the problem by renaming the perf module to intel-rapl-perf, so that both modules can coexist. Fixes: 4b6e2571bf00 ("x86/perf/intel/rapl: Make the Intel RAPL PMU driver modular") Signed-off-by: Ville Syrjälä Cc: Vince Weaver Cc: Alexander Shishkin Cc: Kan Liang Cc: Stephane Eranian Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1466694409-3620-1-git-send-email-ville.syrjala@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/events/intel/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile index 3660b2c..06c2baa 100644 --- a/arch/x86/events/intel/Makefile +++ b/arch/x86/events/intel/Makefile @@ -1,8 +1,8 @@ obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o cqm.o obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o -obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl.o -intel-rapl-objs := rapl.o +obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl-perf.o +intel-rapl-perf-objs := rapl.o obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o -- cgit v1.1 From 522e5cb76d0663c88f96b6a8301451c8efa37207 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 1 Jul 2016 16:42:55 +0200 Subject: iommu/amd: Fix unity mapping initialization race There is a race condition in the AMD IOMMU init code that causes requested unity mappings to be blocked by the IOMMU for a short period of time. This results on boot failures and IO_PAGE_FAULTs on some machines. Fix this by making sure the unity mappings are installed before all other DMA is blocked. Fixes: aafd8ba0ca74 ('iommu/amd: Implement add_device and remove_device') Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index d091def..59741ea 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1568,13 +1568,23 @@ static int __init amd_iommu_init_pci(void) break; } + /* + * Order is important here to make sure any unity map requirements are + * fulfilled. The unity mappings are created and written to the device + * table during the amd_iommu_init_api() call. + * + * After that we call init_device_table_dma() to make sure any + * uninitialized DTE will block DMA, and in the end we flush the caches + * of all IOMMUs to make sure the changes to the device table are + * active. + */ + ret = amd_iommu_init_api(); + init_device_table_dma(); for_each_iommu(iommu) iommu_flush_all_caches(iommu); - ret = amd_iommu_init_api(); - if (!ret) print_iommu_info(); -- cgit v1.1 From 095d28c62f9a05edc186e1b2b02bc44585402bdd Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Wed, 6 Jul 2016 09:31:35 +0800 Subject: drm/amd/powerplay: fix incorrect voltage table value for polaris10 Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index ec2a7ad..5785dc2 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -733,7 +733,7 @@ static int polaris10_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr, table->Smio[level] |= data->mvdd_voltage_table.entries[level].smio_low; } - table->SmioMask2 = data->vddci_voltage_table.mask_low; + table->SmioMask2 = data->mvdd_voltage_table.mask_low; table->MvddLevelCount = (uint32_t) PP_HOST_TO_SMC_UL(count); } -- cgit v1.1 From 1dfefee8939b07dd65a35bb78f6a06df85578301 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Wed, 6 Jul 2016 09:32:24 +0800 Subject: drm/amd/powerplay: fix incorrect voltage table value for tonga Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c index 233eb7f..5d0f655 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c @@ -1302,7 +1302,7 @@ static int tonga_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr, table->Smio[count] |= data->mvdd_voltage_table.entries[count].smio_low; } - table->SmioMask2 = data->vddci_voltage_table.mask_low; + table->SmioMask2 = data->mvdd_voltage_table.mask_low; CONVERT_FROM_HOST_TO_SMC_UL(table->MvddLevelCount); } -- cgit v1.1 From 4b2427605e5325eafb5cfc2698f517db68e41075 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 5 Jul 2016 13:11:47 +0800 Subject: drm/amd/powerplay: incorrectly use of the function return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit '0' means true. Signed-off-by: Rex Zhu Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c index 671fdb4..dccc859 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c @@ -302,7 +302,7 @@ static int init_dpm_2_parameters( (((unsigned long)powerplay_table) + le16_to_cpu(powerplay_table->usPPMTableOffset)); if (0 != powerplay_table->usPPMTableOffset) { - if (1 == get_platform_power_management_table(hwmgr, atom_ppm_table)) { + if (get_platform_power_management_table(hwmgr, atom_ppm_table) == 0) { phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_EnablePlatformPowerManagement); } -- cgit v1.1 From e5eb37170b3cbbf948c6aeaccece818a59e76a6c Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Wed, 29 Jun 2016 16:37:35 +0800 Subject: drm/amd/powerplay: fix bug that get wrong polaris evv voltage. value is 32 bits for polaris, not 16. Signed-off-by: Rex Zhu Reviewed-by: Ken Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c | 7 ++++--- drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c | 4 ++-- drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index 5785dc2..b8209ca 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -2685,7 +2685,7 @@ static int polaris10_get_evv_voltages(struct pp_hwmgr *hwmgr) { struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); uint16_t vv_id; - uint16_t vddc = 0; + uint32_t vddc = 0; uint16_t i, j; uint32_t sclk = 0; struct phm_ppt_v1_information *table_info = @@ -2716,8 +2716,9 @@ static int polaris10_get_evv_voltages(struct pp_hwmgr *hwmgr) continue); - /* need to make sure vddc is less than 2v or else, it could burn the ASIC. */ - PP_ASSERT_WITH_CODE((vddc < 2000 && vddc != 0), + /* need to make sure vddc is less than 2v or else, it could burn the ASIC. + * real voltage level in unit of 0.01mv */ + PP_ASSERT_WITH_CODE((vddc < 200000 && vddc != 0), "Invalid VDDC value", result = -EINVAL;); /* the voltage should not be zero nor equal to leakage ID */ diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c index bf4e18f..90b35c5 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c @@ -1256,7 +1256,7 @@ int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock, } int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type, - uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage) + uint32_t sclk, uint16_t virtual_voltage_Id, uint32_t *voltage) { int result; @@ -1274,7 +1274,7 @@ int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_ if (0 != result) return result; - *voltage = get_voltage_info_param_space.usVoltageLevel; + *voltage = ((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3 *)(&get_voltage_info_param_space))->ulVoltageLevel; return result; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h index 248c5db..1e35a96 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h @@ -305,7 +305,7 @@ extern int atomctrl_get_engine_pll_dividers_ai(struct pp_hwmgr *hwmgr, uint32_t extern int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock, uint8_t level); extern int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type, - uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage); + uint32_t sclk, uint16_t virtual_voltage_Id, uint32_t *voltage); extern int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl_sclk_range_table *table); extern int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl__avfs_parameters *param); -- cgit v1.1 From ab6bad05c886cf0ef0c86bd1f665cdbe8e5e75e7 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 28 Jun 2016 16:55:52 -0400 Subject: drm/amd/powerplay: Update CKS on/ CKS off voltage offset calculation. As get the right evv voltage, update them to latest coefficients to align with BB. agd: squash in Slava's 32 bit build fix Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c | 23 ++++++++++------------ 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index b8209ca..91e25f9 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -98,7 +98,6 @@ #define PCIE_BUS_CLK 10000 #define TCLK (PCIE_BUS_CLK / 10) -#define CEILING_UCHAR(double) ((double-(uint8_t)(double)) > 0 ? (uint8_t)(double+1) : (uint8_t)(double)) static const uint16_t polaris10_clock_stretcher_lookup_table[2][4] = { {600, 1050, 3, 0}, {600, 1050, 6, 1} }; @@ -1807,27 +1806,25 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) ro = efuse * (max -min)/255 + min; - /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset - * there is a little difference in calculating - * volt_with_cks with windows */ + /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */ for (i = 0; i < sclk_table->count; i++) { data->smc_state_table.Sclk_CKS_masterEn0_7 |= sclk_table->entries[i].cks_enable << i; if (hwmgr->chip_id == CHIP_POLARIS10) { - volt_without_cks = (uint32_t)((2753594000 + (sclk_table->entries[i].clk/100) * 136418 -(ro - 70) * 1000000) / \ + volt_without_cks = (uint32_t)((2753594000U + (sclk_table->entries[i].clk/100) * 136418 -(ro - 70) * 1000000) / \ (2424180 - (sclk_table->entries[i].clk/100) * 1132925/1000)); - volt_with_cks = (uint32_t)((279720200 + sclk_table->entries[i].clk * 3232 - (ro - 65) * 100000000) / \ - (252248000 - sclk_table->entries[i].clk/100 * 115764)); + volt_with_cks = (uint32_t)((2797202000U + sclk_table->entries[i].clk/100 * 3232 - (ro - 65) * 1000000) / \ + (2522480 - sclk_table->entries[i].clk/100 * 115764/100)); } else { - volt_without_cks = (uint32_t)((2416794800 + (sclk_table->entries[i].clk/100) * 1476925/10 -(ro - 50) * 1000000) / \ - (2625416 - (sclk_table->entries[i].clk/100) * 12586807/10000)); - volt_with_cks = (uint32_t)((2999656000 + sclk_table->entries[i].clk * 392803/100 - (ro - 44) * 1000000) / \ - (3422454 - sclk_table->entries[i].clk/100 * 18886376/10000)); + volt_without_cks = (uint32_t)((2416794800U + (sclk_table->entries[i].clk/100) * 1476925/10 -(ro - 50) * 1000000) / \ + (2625416 - (sclk_table->entries[i].clk/100) * (12586807/10000))); + volt_with_cks = (uint32_t)((2999656000U - sclk_table->entries[i].clk/100 * 392803 - (ro - 44) * 1000000) / \ + (3422454 - sclk_table->entries[i].clk/100 * (18886376/10000))); } if (volt_without_cks >= volt_with_cks) - volt_offset = (uint8_t)CEILING_UCHAR((volt_without_cks - volt_with_cks + - sclk_table->entries[i].cks_voffset) * 100 / 625); + volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks + + sclk_table->entries[i].cks_voffset) * 100 + 624) / 625); data->smc_state_table.Sclk_voltageOffset[i] = volt_offset; } -- cgit v1.1 From 076501ff6ba265a473689c112eda9f1f34f620b5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 6 Jul 2016 16:06:53 -0700 Subject: init/Kconfig: keep Expert users menu together The "expert" menu was broken (split) such that all entries in it after KALLSYMS were displayed in the "General setup" area instead of in the "Expert users" area. Fix this by adding one kconfig dependency. Yes, the Expert users menu is fragile. Problems like this have happened several times in the past. I will attempt to isolate the Expert users menu if there is interest in that. Fixes: 4d5d5664c900 ("x86: kallsyms: disable absolute percpu symbols on !SMP") Signed-off-by: Randy Dunlap Cc: Ard Biesheuvel Cc: stable@vger.kernel.org # 4.6 Signed-off-by: Linus Torvalds --- init/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/init/Kconfig b/init/Kconfig index f755a60..c02d897 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1458,6 +1458,7 @@ config KALLSYMS_ALL config KALLSYMS_ABSOLUTE_PERCPU bool + depends on KALLSYMS default X86_64 && SMP config KALLSYMS_BASE_RELATIVE -- cgit v1.1 From 2c81a6477081966fe80b8c6daa68459bca896774 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 14 Jun 2016 16:10:41 +0100 Subject: perf/core: Fix pmu::filter_match for SW-led groups The following commit: 66eb579e66ec ("perf: allow for PMU-specific event filtering") added the pmu::filter_match() callback. This was intended to avoid HW constraints on events from resulting in extremely pessimistic scheduling. However, pmu::filter_match() is only called for the leader of each event group. When the leader is a SW event, we do not filter the groups, and may fail at pmu::add() time, and when this happens we'll give up on scheduling any event groups later in the list until they are rotated ahead of the failing group. This can result in extremely sub-optimal event scheduling behaviour, e.g. if running the following on a big.LITTLE platform: $ taskset -c 0 ./perf stat \ -e 'a57{context-switches,armv8_cortex_a57/config=0x11/}' \ -e 'a53{context-switches,armv8_cortex_a53/config=0x11/}' \ ls context-switches (0.00%) armv8_cortex_a57/config=0x11/ (0.00%) 24 context-switches (37.36%) 57589154 armv8_cortex_a53/config=0x11/ (37.36%) Here the 'a53' event group was always eligible to be scheduled, but the 'a57' group never eligible to be scheduled, as the task was always affine to a Cortex-A53 CPU. The SW (group leader) event in the 'a57' group was eligible, but the HW event failed at pmu::add() time, resulting in ctx_flexible_sched_in giving up on scheduling further groups with HW events. One way of avoiding this is to check pmu::filter_match() on siblings as well as the group leader. If any of these fail their pmu::filter_match() call, we must skip the entire group before attempting to add any events. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Fixes: 66eb579e66ec ("perf: allow for PMU-specific event filtering") Link: http://lkml.kernel.org/r/1465917041-15339-1-git-send-email-mark.rutland@arm.com [ Small readability edits. ] Signed-off-by: Ingo Molnar --- kernel/events/core.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 85cd418..43d43a2d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1678,12 +1678,33 @@ static bool is_orphaned_event(struct perf_event *event) return event->state == PERF_EVENT_STATE_DEAD; } -static inline int pmu_filter_match(struct perf_event *event) +static inline int __pmu_filter_match(struct perf_event *event) { struct pmu *pmu = event->pmu; return pmu->filter_match ? pmu->filter_match(event) : 1; } +/* + * Check whether we should attempt to schedule an event group based on + * PMU-specific filtering. An event group can consist of HW and SW events, + * potentially with a SW leader, so we must check all the filters, to + * determine whether a group is schedulable: + */ +static inline int pmu_filter_match(struct perf_event *event) +{ + struct perf_event *child; + + if (!__pmu_filter_match(event)) + return 0; + + list_for_each_entry(child, &event->sibling_list, group_entry) { + if (!__pmu_filter_match(child)) + return 0; + } + + return 1; +} + static inline int event_filter_match(struct perf_event *event) { -- cgit v1.1 From 0beef634b86a1350c31da5fcc2992f0d7c8a622b Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Jul 2016 01:23:57 -0600 Subject: xenbus: don't BUG() on user mode induced condition Inability to locate a user mode specified transaction ID should not lead to a kernel crash. For other than XS_TRANSACTION_START also don't issue anything to xenbus if the specified ID doesn't match that of any active transaction. Signed-off-by: Jan Beulich Cc: Signed-off-by: David Vrabel --- drivers/xen/xenbus/xenbus_dev_frontend.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index cacf30d..7487971 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -316,11 +316,18 @@ static int xenbus_write_transaction(unsigned msg_type, rc = -ENOMEM; goto out; } + } else { + list_for_each_entry(trans, &u->transactions, list) + if (trans->handle.id == u->u.msg.tx_id) + break; + if (&trans->list == &u->transactions) + return -ESRCH; } reply = xenbus_dev_request_and_reply(&u->u.msg); if (IS_ERR(reply)) { - kfree(trans); + if (msg_type == XS_TRANSACTION_START) + kfree(trans); rc = PTR_ERR(reply); goto out; } @@ -333,12 +340,7 @@ static int xenbus_write_transaction(unsigned msg_type, list_add(&trans->list, &u->transactions); } } else if (u->u.msg.type == XS_TRANSACTION_END) { - list_for_each_entry(trans, &u->transactions, list) - if (trans->handle.id == u->u.msg.tx_id) - break; - BUG_ON(&trans->list == &u->transactions); list_del(&trans->list); - kfree(trans); } -- cgit v1.1 From e19a6ee2460bdd0d0055a6029383422773f9999a Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 20 Jun 2016 18:28:01 +0100 Subject: arm64: kernel: Save and restore UAO and addr_limit on exception entry If we take an exception while at EL1, the exception handler inherits the original context's addr_limit and PSTATE.UAO values. To be consistent always reset addr_limit and PSTATE.UAO on (re-)entry to EL1. This prevents accidental re-use of the original context's addr_limit. Based on a similar patch for arm from Russell King. Cc: # 4.6- Acked-by: Will Deacon Reviewed-by: Mark Rutland Signed-off-by: James Morse Signed-off-by: Will Deacon --- arch/arm64/include/asm/ptrace.h | 2 ++ arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kernel/entry.S | 19 +++++++++++++++++-- arch/arm64/mm/fault.c | 3 ++- 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index a307eb6..7f94755 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -117,6 +117,8 @@ struct pt_regs { }; u64 orig_x0; u64 syscallno; + u64 orig_addr_limit; + u64 unused; // maintain 16 byte alignment }; #define arch_has_single_step() (1) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index f8e5d47..2f4ba77 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -60,6 +60,7 @@ int main(void) DEFINE(S_PC, offsetof(struct pt_regs, pc)); DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0)); DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); + DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 12e8d2b..6c3b734 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -97,7 +98,14 @@ mov x29, xzr // fp pointed to user-space .else add x21, sp, #S_FRAME_SIZE - .endif + get_thread_info tsk + /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */ + ldr x20, [tsk, #TI_ADDR_LIMIT] + str x20, [sp, #S_ORIG_ADDR_LIMIT] + mov x20, #TASK_SIZE_64 + str x20, [tsk, #TI_ADDR_LIMIT] + ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO) + .endif /* \el == 0 */ mrs x22, elr_el1 mrs x23, spsr_el1 stp lr, x21, [sp, #S_LR] @@ -128,6 +136,14 @@ .endm .macro kernel_exit, el + .if \el != 0 + /* Restore the task's original addr_limit. */ + ldr x20, [sp, #S_ORIG_ADDR_LIMIT] + str x20, [tsk, #TI_ADDR_LIMIT] + + /* No need to restore UAO, it will be restored from SPSR_EL1 */ + .endif + ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 ct_user_enter @@ -406,7 +422,6 @@ el1_irq: bl trace_hardirqs_off #endif - get_thread_info tsk irq_handler #ifdef CONFIG_PREEMPT diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 013e2cb..b1166d1 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -280,7 +280,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, } if (permission_fault(esr) && (addr < USER_DS)) { - if (get_fs() == KERNEL_DS) + /* regs->orig_addr_limit may be 0 if we entered from EL0 */ + if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); if (!search_exception_tables(regs->pc)) -- cgit v1.1 From 47c459beabe969c6751e2ea8d1f85c5fa1652d6c Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Thu, 7 Jul 2016 10:18:17 +0530 Subject: arm64: Enable workaround for Cavium erratum 27456 on thunderx-81xx Cavium erratum 27456 commit 104a0c02e8b1 ("arm64: Add workaround for Cavium erratum 27456") is applicable for thunderx-81xx pass1.0 SoC as well. Adding code to enable to 81xx. Signed-off-by: Ganapatrao Kulkarni Reviewed-by: Andrew Pinski Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 2 ++ arch/arm64/kernel/cpu_errata.c | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 87e1985..9d9fd4b 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -80,12 +80,14 @@ #define APM_CPU_PART_POTENZA 0x000 #define CAVIUM_CPU_PART_THUNDERX 0x0A1 +#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 #define BRCM_CPU_PART_VULCAN 0x516 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) +#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index d427894..af716b6 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -98,6 +98,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_RANGE(MIDR_THUNDERX, 0x00, (1 << MIDR_VARIANT_SHIFT) | 1), }, + { + /* Cavium ThunderX, T81 pass 1.0 */ + .desc = "Cavium erratum 27456", + .capability = ARM64_WORKAROUND_CAVIUM_27456, + MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00), + }, #endif { } -- cgit v1.1 From 78c4e172412de5d0456dc00d2b34050aa0b683b5 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 5 Jul 2016 17:32:29 -0400 Subject: Revert "ecryptfs: forbid opening files without mmap handler" This reverts commit 2f36db71009304b3f0b95afacd8eba1f9f046b87. It fixed a local root exploit but also introduced a dependency on the lower file system implementing an mmap operation just to open a file, which is a bit of a heavy hammer. The right fix is to have mmap depend on the existence of the mmap handler instead. Signed-off-by: Jeff Mahoney Cc: stable@vger.kernel.org Signed-off-by: Tyler Hicks --- fs/ecryptfs/kthread.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index e818f5a..866bb18 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -25,7 +25,6 @@ #include #include #include -#include #include "ecryptfs_kernel.h" struct ecryptfs_open_req { @@ -148,7 +147,7 @@ int ecryptfs_privileged_open(struct file **lower_file, flags |= IS_RDONLY(d_inode(lower_dentry)) ? O_RDONLY : O_RDWR; (*lower_file) = dentry_open(&req.path, flags, cred); if (!IS_ERR(*lower_file)) - goto have_file; + goto out; if ((flags & O_ACCMODE) == O_RDONLY) { rc = PTR_ERR((*lower_file)); goto out; @@ -166,16 +165,8 @@ int ecryptfs_privileged_open(struct file **lower_file, mutex_unlock(&ecryptfs_kthread_ctl.mux); wake_up(&ecryptfs_kthread_ctl.wait); wait_for_completion(&req.done); - if (IS_ERR(*lower_file)) { + if (IS_ERR(*lower_file)) rc = PTR_ERR(*lower_file); - goto out; - } -have_file: - if ((*lower_file)->f_op->mmap == NULL) { - fput(*lower_file); - *lower_file = NULL; - rc = -EMEDIUMTYPE; - } out: return rc; } -- cgit v1.1 From 30a46a4647fd1df9cf52e43bf467f0d9265096ca Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 7 Jul 2016 13:41:11 -0700 Subject: apparmor: fix oops, validate buffer size in apparmor_setprocattr() When proc_pid_attr_write() was changed to use memdup_user apparmor's (interface violating) assumption that the setprocattr buffer was always a single page was violated. The size test is not strictly speaking needed as proc_pid_attr_write() will reject anything larger, but for the sake of robustness we can keep it in. SMACK and SELinux look safe to me, but somebody else should probably have a look just in case. Based on original patch from Vegard Nossum modified for the case that apparmor provides null termination. Fixes: bb646cdb12e75d82258c2f2e7746d5952d3e321a Reported-by: Vegard Nossum Cc: Al Viro Cc: John Johansen Cc: Paul Moore Cc: Stephen Smalley Cc: Eric Paris Cc: Casey Schaufler Cc: stable@kernel.org Signed-off-by: John Johansen Reviewed-by: Tyler Hicks Signed-off-by: James Morris --- security/apparmor/lsm.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 2660fbc..7798e16 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -500,34 +500,34 @@ static int apparmor_setprocattr(struct task_struct *task, char *name, { struct common_audit_data sa; struct apparmor_audit_data aad = {0,}; - char *command, *args = value; + char *command, *largs = NULL, *args = value; size_t arg_size; int error; if (size == 0) return -EINVAL; - /* args points to a PAGE_SIZE buffer, AppArmor requires that - * the buffer must be null terminated or have size <= PAGE_SIZE -1 - * so that AppArmor can null terminate them - */ - if (args[size - 1] != '\0') { - if (size == PAGE_SIZE) - return -EINVAL; - args[size] = '\0'; - } - /* task can only write its own attributes */ if (current != task) return -EACCES; - args = value; + /* AppArmor requires that the buffer must be null terminated atm */ + if (args[size - 1] != '\0') { + /* null terminate */ + largs = args = kmalloc(size + 1, GFP_KERNEL); + if (!args) + return -ENOMEM; + memcpy(args, value, size); + args[size] = '\0'; + } + + error = -EINVAL; args = strim(args); command = strsep(&args, " "); if (!args) - return -EINVAL; + goto out; args = skip_spaces(args); if (!*args) - return -EINVAL; + goto out; arg_size = size - (args - (char *) value); if (strcmp(name, "current") == 0) { @@ -553,10 +553,12 @@ static int apparmor_setprocattr(struct task_struct *task, char *name, goto fail; } else /* only support the "current" and "exec" process attributes */ - return -EINVAL; + goto fail; if (!error) error = size; +out: + kfree(largs); return error; fail: @@ -565,9 +567,9 @@ fail: aad.profile = aa_current_profile(); aad.op = OP_SETPROCATTR; aad.info = name; - aad.error = -EINVAL; + aad.error = error = -EINVAL; aa_audit_msg(AUDIT_APPARMOR_DENIED, &sa, NULL); - return -EINVAL; + goto out; } static int apparmor_task_setrlimit(struct task_struct *task, -- cgit v1.1 From 7469be95a487319514adce2304ad2af3553d2fc9 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Jul 2016 01:32:04 -0600 Subject: xenbus: don't bail early from xenbus_dev_request_and_reply() xenbus_dev_request_and_reply() needs to track whether a transaction is open. For XS_TRANSACTION_START messages it calls transaction_start() and for XS_TRANSACTION_END messages it calls transaction_end(). If sending an XS_TRANSACTION_START message fails or responds with an an error, the transaction is not open and transaction_end() must be called. If sending an XS_TRANSACTION_END message fails, the transaction is still open, but if an error response is returned the transaction is closed. Commit 027bd7e89906 ("xen/xenbus: Avoid synchronous wait on XenBus stalling shutdown/restart") introduced a regression where failed XS_TRANSACTION_START messages were leaving the transaction open. This can cause problems with suspend (and migration) as all transactions must be closed before suspending. It appears that the problematic change was added accidentally, so just remove it. Signed-off-by: Jan Beulich Cc: Konrad Rzeszutek Wilk Cc: Signed-off-by: David Vrabel --- drivers/xen/xenbus/xenbus_xs.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 374b12a..0bd3d47 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -249,9 +249,6 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) mutex_unlock(&xs_state.request_mutex); - if (IS_ERR(ret)) - return ret; - if ((msg->type == XS_TRANSACTION_END) || ((req_msg.type == XS_TRANSACTION_START) && (msg->type == XS_ERROR))) -- cgit v1.1 From e5a79475a7ae171fef82608c6e11f51bb85a6745 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Jul 2016 01:32:35 -0600 Subject: xenbus: simplify xenbus_dev_request_and_reply() No need to retain a local copy of the full request message, only the type is really needed. Signed-off-by: Jan Beulich Signed-off-by: David Vrabel --- drivers/xen/xenbus/xenbus_xs.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 0bd3d47..22f7cd7 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -232,10 +232,10 @@ static void transaction_resume(void) void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) { void *ret; - struct xsd_sockmsg req_msg = *msg; + enum xsd_sockmsg_type type = msg->type; int err; - if (req_msg.type == XS_TRANSACTION_START) + if (type == XS_TRANSACTION_START) transaction_start(); mutex_lock(&xs_state.request_mutex); @@ -250,8 +250,7 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) mutex_unlock(&xs_state.request_mutex); if ((msg->type == XS_TRANSACTION_END) || - ((req_msg.type == XS_TRANSACTION_START) && - (msg->type == XS_ERROR))) + ((type == XS_TRANSACTION_START) && (msg->type == XS_ERROR))) transaction_end(); return ret; -- cgit v1.1 From 6f2d9d99213514360034c6d52d2c3919290b3504 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 8 Jul 2016 06:15:07 -0600 Subject: xen/acpi: allow xen-acpi-processor driver to load on Xen 4.7 As of Xen 4.7 PV CPUID doesn't expose either of CPUID[1].ECX[7] and CPUID[0x80000007].EDX[7] anymore, causing the driver to fail to load on both Intel and AMD systems. Doing any kind of hardware capability checks in the driver as a prerequisite was wrong anyway: With the hypervisor being in charge, all such checking should be done by it. If ACPI data gets uploaded despite some missing capability, the hypervisor is free to ignore part or all of that data. Ditch the entire check_prereq() function, and do the only valid check (xen_initial_domain()) in the caller in its place. Signed-off-by: Jan Beulich Cc: Signed-off-by: David Vrabel --- drivers/xen/xen-acpi-processor.c | 35 +++-------------------------------- 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 076970a..4ce10bc 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -423,36 +423,7 @@ upload: return 0; } -static int __init check_prereq(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - if (!xen_initial_domain()) - return -ENODEV; - - if (!acpi_gbl_FADT.smi_command) - return -ENODEV; - - if (c->x86_vendor == X86_VENDOR_INTEL) { - if (!cpu_has(c, X86_FEATURE_EST)) - return -ENODEV; - return 0; - } - if (c->x86_vendor == X86_VENDOR_AMD) { - /* Copied from powernow-k8.h, can't include ../cpufreq/powernow - * as we get compile warnings for the static functions. - */ -#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 -#define USE_HW_PSTATE 0x00000080 - u32 eax, ebx, ecx, edx; - cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); - if ((edx & USE_HW_PSTATE) != USE_HW_PSTATE) - return -ENODEV; - return 0; - } - return -ENODEV; -} /* acpi_perf_data is a pointer to percpu data. */ static struct acpi_processor_performance __percpu *acpi_perf_data; @@ -509,10 +480,10 @@ struct notifier_block xen_acpi_processor_resume_nb = { static int __init xen_acpi_processor_init(void) { unsigned int i; - int rc = check_prereq(); + int rc; - if (rc) - return rc; + if (!xen_initial_domain()) + return -ENODEV; nr_acpi_bits = get_max_acpi_id() + 1; acpi_ids_done = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL); -- cgit v1.1 From 9a7e7b571826c4399aa639af4a670642d96d935c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 8 Jul 2016 16:01:48 +0200 Subject: x86/asm/entry: Make thunk's restore a local label No need to have it appear in objdump output. No functionality change. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160708141016.GH3808@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/entry/thunk_64.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index 027aec4..627ecbc 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -33,7 +33,7 @@ .endif call \func - jmp restore + jmp .L_restore _ASM_NOKPROBE(\name) .endm @@ -54,7 +54,7 @@ #if defined(CONFIG_TRACE_IRQFLAGS) \ || defined(CONFIG_DEBUG_LOCK_ALLOC) \ || defined(CONFIG_PREEMPT) -restore: +.L_restore: popq %r11 popq %r10 popq %r9 @@ -66,5 +66,5 @@ restore: popq %rdi popq %rbp ret - _ASM_NOKPROBE(restore) + _ASM_NOKPROBE(.L_restore) #endif -- cgit v1.1 From f0fe970df3838c202ef6c07a4c2b36838ef0a88b Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 5 Jul 2016 17:32:30 -0400 Subject: ecryptfs: don't allow mmap when the lower fs doesn't support it There are legitimate reasons to disallow mmap on certain files, notably in sysfs or procfs. We shouldn't emulate mmap support on file systems that don't offer support natively. CVE-2016-1583 Signed-off-by: Jeff Mahoney Cc: stable@vger.kernel.org [tyhicks: clean up f_op check by using ecryptfs_file_to_lower()] Signed-off-by: Tyler Hicks --- fs/ecryptfs/file.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 53d0141..ca4e837 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -169,6 +169,19 @@ out: return rc; } +static int ecryptfs_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct file *lower_file = ecryptfs_file_to_lower(file); + /* + * Don't allow mmap on top of file systems that don't support it + * natively. If FILESYSTEM_MAX_STACK_DEPTH > 2 or ecryptfs + * allows recursive mounting, this will need to be extended. + */ + if (!lower_file->f_op->mmap) + return -ENODEV; + return generic_file_mmap(file, vma); +} + /** * ecryptfs_open * @inode: inode specifying file to open @@ -403,7 +416,7 @@ const struct file_operations ecryptfs_main_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = ecryptfs_compat_ioctl, #endif - .mmap = generic_file_mmap, + .mmap = ecryptfs_mmap, .open = ecryptfs_open, .flush = ecryptfs_flush, .release = ecryptfs_release, -- cgit v1.1 From 2e9d1e150abf88cb63e5d34ca286edbb95b4c53d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 Jun 2016 16:58:29 +0200 Subject: x86/entry: Avoid interrupt flag save and restore Thanks to all the work that was done by Andy Lutomirski and others, enter_from_user_mode() and prepare_exit_to_usermode() are now called only with interrupts disabled. Let's provide them a version of user_enter()/user_exit() that skips saving and restoring the interrupt flag. On an AMD-based machine I tested this patch on, with force-enabled context tracking, the speed-up in system calls was 90 clock cycles or 6%, measured with the following simple benchmark: #include #include #include #include unsigned long rdtsc() { unsigned long result; asm volatile("rdtsc; shl $32, %%rdx; mov %%eax, %%eax\n" "or %%rdx, %%rax" : "=a" (result) : : "rdx"); return result; } int main() { unsigned long tsc1, tsc2; int pid = getpid(); int i; tsc1 = rdtsc(); for (i = 0; i < 100000000; i++) kill(pid, SIGWINCH); tsc2 = rdtsc(); printf("%ld\n", tsc2 - tsc1); } Signed-off-by: Paolo Bonzini Reviewed-by: Rik van Riel Reviewed-by: Andy Lutomirski Acked-by: Paolo Bonzini Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kvm@vger.kernel.org Link: http://lkml.kernel.org/r/1466434712-31440-2-git-send-email-pbonzini@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 4 ++-- include/linux/context_tracking.h | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index ec138e5..618bc61 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -43,7 +43,7 @@ static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) __visible void enter_from_user_mode(void) { CT_WARN_ON(ct_state() != CONTEXT_USER); - user_exit(); + user_exit_irqoff(); } #else static inline void enter_from_user_mode(void) {} @@ -274,7 +274,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) ti->status &= ~TS_COMPAT; #endif - user_enter(); + user_enter_irqoff(); } #define SYSCALL_EXIT_WORK_FLAGS \ diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index d259274..d9aef2a 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -31,6 +31,19 @@ static inline void user_exit(void) context_tracking_exit(CONTEXT_USER); } +/* Called with interrupts disabled. */ +static inline void user_enter_irqoff(void) +{ + if (context_tracking_is_enabled()) + __context_tracking_enter(CONTEXT_USER); + +} +static inline void user_exit_irqoff(void) +{ + if (context_tracking_is_enabled()) + __context_tracking_exit(CONTEXT_USER); +} + static inline enum ctx_state exception_enter(void) { enum ctx_state prev_ctx; @@ -69,6 +82,8 @@ static inline enum ctx_state ct_state(void) #else static inline void user_enter(void) { } static inline void user_exit(void) { } +static inline void user_enter_irqoff(void) { } +static inline void user_exit_irqoff(void) { } static inline enum ctx_state exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; } -- cgit v1.1 From be8a18e2e98e04a5def5887d913b267865562448 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 20 Jun 2016 16:58:30 +0200 Subject: x86/entry: Inline enter_from_user_mode() This matches what is already done for prepare_exit_to_usermode(), and saves about 60 clock cycles (4% speedup) with the benchmark in the previous commit message. Signed-off-by: Paolo Bonzini Reviewed-by: Rik van Riel Reviewed-by: Andy Lutomirski Acked-by: Paolo Bonzini Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kvm@vger.kernel.org Link: http://lkml.kernel.org/r/1466434712-31440-3-git-send-email-pbonzini@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 618bc61..9e1e27d 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -40,7 +40,7 @@ static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) #ifdef CONFIG_CONTEXT_TRACKING /* Called on entry from user mode with IRQs off. */ -__visible void enter_from_user_mode(void) +__visible inline void enter_from_user_mode(void) { CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); -- cgit v1.1