From 392ea5dd0ff265f7557405fcbdf35acd34cf4ab8 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Fri, 22 Dec 2017 11:26:03 +0200 Subject: ARM: OMAP2+: hwmod_core: enable optional clocks before main clock The optional clocks must be enabled before the main clock after the transition to clkctrl controlled clocks is done. Otherwise the module we attempt to enable might be stuck in transition. Reported-by: Keerthy Tested-by: Keerthy Signed-off-by: Tero Kristo Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 5eff27e..b1d446c 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -975,6 +975,9 @@ static int _enable_clocks(struct omap_hwmod *oh) pr_debug("omap_hwmod: %s: enabling clocks\n", oh->name); + if (oh->flags & HWMOD_OPT_CLKS_NEEDED) + _enable_optional_clocks(oh); + if (oh->_clk) clk_enable(oh->_clk); @@ -983,9 +986,6 @@ static int _enable_clocks(struct omap_hwmod *oh) clk_enable(os->_clk); } - if (oh->flags & HWMOD_OPT_CLKS_NEEDED) - _enable_optional_clocks(oh); - /* The opt clocks are controlled by the device driver. */ return 0; -- cgit v1.1 From 787e1853ae8a951233ceab1b01c862d9c668358b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 8 Jan 2018 07:47:36 +0100 Subject: iio: adc: aspeed: Fix error handling path The labels and branching order of the error path of 'aspeed_adc_probe()' are broken. Re-order the labels and goto statements. Signed-off-by: Christophe JAILLET Signed-off-by: Jonathan Cameron --- drivers/iio/adc/aspeed_adc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index 327a49b..9515ca1 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -243,7 +243,7 @@ static int aspeed_adc_probe(struct platform_device *pdev) ASPEED_ADC_INIT_POLLING_TIME, ASPEED_ADC_INIT_TIMEOUT); if (ret) - goto scaler_error; + goto poll_timeout_error; } /* Start all channels in normal mode. */ @@ -274,9 +274,10 @@ iio_register_error: writel(ASPEED_OPERATION_MODE_POWER_DOWN, data->base + ASPEED_REG_ENGINE_CONTROL); clk_disable_unprepare(data->clk_scaler->clk); -reset_error: - reset_control_assert(data->rst); clk_enable_error: +poll_timeout_error: + reset_control_assert(data->rst); +reset_error: clk_hw_unregister_divider(data->clk_scaler); scaler_error: clk_hw_unregister_divider(data->clk_prescaler); -- cgit v1.1 From 8bbfbc2df6e9a37bc5c9ee674c496ea277b0bd39 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 17 Jan 2018 16:39:17 +0300 Subject: ARCv2: cache: fix slc_entire_op: flush only instead of flush-n-inv slc_entire_op with OP_FLUSH command also invalidates it. This is a preventive fix as the current use of slc_entire_op is only with OP_FLUSH_N_INV where the invalidate is required. Signed-off-by: Eugeniy Paltsev [vgupta: fixed changelog] Signed-off-by: Vineet Gupta --- arch/arc/mm/cache.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index eee924d..2072f34 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -780,7 +780,10 @@ noinline static void slc_entire_op(const int op) write_aux_reg(r, ctrl); - write_aux_reg(ARC_REG_SLC_INVALIDATE, 1); + if (op & OP_INV) /* Inv or flush-n-inv use same cmd reg */ + write_aux_reg(ARC_REG_SLC_INVALIDATE, 0x1); + else + write_aux_reg(ARC_REG_SLC_FLUSH, 0x1); /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ read_aux_reg(r); -- cgit v1.1 From a3142792f79884b867b7bf4c7d5a126a0f913332 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 18 Jan 2018 21:07:21 +0300 Subject: ARCv2: Don't pretend we may set L-bit in STATUS32 with kflag instruction As per PRM "kflag" instruction doesn't change state of L-flag ("Zero-Overhead loop disabled") in STATUS32 register so let's not act as if we can affect this bit. Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/include/asm/entry-arcv2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h index 257a68f..309f4e6 100644 --- a/arch/arc/include/asm/entry-arcv2.h +++ b/arch/arc/include/asm/entry-arcv2.h @@ -184,7 +184,7 @@ .macro FAKE_RET_FROM_EXCPN lr r9, [status32] bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK|STATUS_AE_MASK) - or r9, r9, (STATUS_L_MASK|STATUS_IE_MASK) + or r9, r9, STATUS_IE_MASK kflag r9 .endm -- cgit v1.1 From 8ff3afc159f26e44471e174077e6d16cd2a2bb91 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 18 Jan 2018 16:48:47 +0300 Subject: ARC: Enable fatal signals on boot for dev platforms It's very convenient to have fatal signals enabled on developemnt platform as this allows to catch problems that happen early in user-space (like crashing init or dynamic loader). Otherwise we may either enable it later from alive taregt console by "echo 1 > /proc/sys/kernel/print-fatal-signals" but: 1. We might be unfortunate enough to not reach working console 2. Forget to enable fatal signals and miss something interesting Given we're talking about development platforms here it shouldn't be a problem if a bit more data gets printed to debug console. Moreover this makes behavior of all our dev platforms predictable as today some platforms already have it enabled and some don't - which is way too inconvenient. Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/axs101.dts | 2 +- arch/arc/boot/dts/haps_hs_idu.dts | 2 +- arch/arc/boot/dts/nsim_700.dts | 2 +- arch/arc/boot/dts/nsim_hs.dts | 2 +- arch/arc/boot/dts/nsim_hs_idu.dts | 2 +- arch/arc/boot/dts/nsimosci.dts | 2 +- arch/arc/boot/dts/nsimosci_hs.dts | 2 +- arch/arc/boot/dts/nsimosci_hs_idu.dts | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arc/boot/dts/axs101.dts b/arch/arc/boot/dts/axs101.dts index 70aec7d..626b694 100644 --- a/arch/arc/boot/dts/axs101.dts +++ b/arch/arc/boot/dts/axs101.dts @@ -17,6 +17,6 @@ compatible = "snps,axs101", "snps,arc-sdp"; chosen { - bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 video=1280x720@60"; + bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 video=1280x720@60 print-fatal-signals=1"; }; }; diff --git a/arch/arc/boot/dts/haps_hs_idu.dts b/arch/arc/boot/dts/haps_hs_idu.dts index 215cddd..0c60330 100644 --- a/arch/arc/boot/dts/haps_hs_idu.dts +++ b/arch/arc/boot/dts/haps_hs_idu.dts @@ -22,7 +22,7 @@ }; chosen { - bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug"; + bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1"; }; aliases { diff --git a/arch/arc/boot/dts/nsim_700.dts b/arch/arc/boot/dts/nsim_700.dts index 5ee96b0..ff2f2c7 100644 --- a/arch/arc/boot/dts/nsim_700.dts +++ b/arch/arc/boot/dts/nsim_700.dts @@ -17,7 +17,7 @@ interrupt-parent = <&core_intc>; chosen { - bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8"; + bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1"; }; aliases { diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts index 8d787b2..8e2489b 100644 --- a/arch/arc/boot/dts/nsim_hs.dts +++ b/arch/arc/boot/dts/nsim_hs.dts @@ -24,7 +24,7 @@ }; chosen { - bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8"; + bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1"; }; aliases { diff --git a/arch/arc/boot/dts/nsim_hs_idu.dts b/arch/arc/boot/dts/nsim_hs_idu.dts index 4f98ebf..ed12f4947 100644 --- a/arch/arc/boot/dts/nsim_hs_idu.dts +++ b/arch/arc/boot/dts/nsim_hs_idu.dts @@ -15,7 +15,7 @@ interrupt-parent = <&core_intc>; chosen { - bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8"; + bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8 print-fatal-signals=1"; }; aliases { diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts index 3c391ba..7842e5e 100644 --- a/arch/arc/boot/dts/nsimosci.dts +++ b/arch/arc/boot/dts/nsimosci.dts @@ -20,7 +20,7 @@ /* this is for console on PGU */ /* bootargs = "console=tty0 consoleblank=0"; */ /* this is for console on serial */ - bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24"; + bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24 print-fatal-signals=1"; }; aliases { diff --git a/arch/arc/boot/dts/nsimosci_hs.dts b/arch/arc/boot/dts/nsimosci_hs.dts index 14a727c..b8838cf 100644 --- a/arch/arc/boot/dts/nsimosci_hs.dts +++ b/arch/arc/boot/dts/nsimosci_hs.dts @@ -20,7 +20,7 @@ /* this is for console on PGU */ /* bootargs = "console=tty0 consoleblank=0"; */ /* this is for console on serial */ - bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24"; + bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24 print-fatal-signals=1"; }; aliases { diff --git a/arch/arc/boot/dts/nsimosci_hs_idu.dts b/arch/arc/boot/dts/nsimosci_hs_idu.dts index 5052917..72a2c72 100644 --- a/arch/arc/boot/dts/nsimosci_hs_idu.dts +++ b/arch/arc/boot/dts/nsimosci_hs_idu.dts @@ -18,7 +18,7 @@ chosen { /* this is for console on serial */ - bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug video=640x480-24"; + bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug video=640x480-24 print-fatal-signals=1"; }; aliases { -- cgit v1.1 From 7d82c5fa057c813d13e1f828f779727214573723 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Tue, 23 Jan 2018 15:16:08 +0000 Subject: ARC: dw2 unwind: Fix trailing semicolon The trailing semicolon is an empty statement that does no operation. Removing it since it doesn't do anything. Signed-off-by: Luis de Bethencourt Signed-off-by: Vineet Gupta --- arch/arc/kernel/unwind.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 333daab..183391d 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -366,7 +366,7 @@ static void init_unwind_hdr(struct unwind_table *table, return; ret_err: - panic("Attention !!! Dwarf FDE parsing errors\n");; + panic("Attention !!! Dwarf FDE parsing errors\n"); } #ifdef CONFIG_MODULES -- cgit v1.1 From a46f24acf8bce70b5fdd6774793d121e54b99e97 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Tue, 23 Jan 2018 15:16:09 +0000 Subject: ARC: boot log: Fix trailing semicolon The trailing semicolon is an empty statement that does no operation. Removing it since it doesn't do anything. Signed-off-by: Luis de Bethencourt Signed-off-by: Vineet Gupta --- arch/arc/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 9d27331..ec12fe1 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -373,7 +373,7 @@ static void arc_chk_core_config(void) { struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; int saved = 0, present = 0; - char *opt_nm = NULL;; + char *opt_nm = NULL; if (!cpu->extn.timer0) panic("Timer0 is not present!\n"); -- cgit v1.1 From e31b617d0a63c6558485aaa730fd162faa95a766 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 22 Jan 2018 11:53:12 +0200 Subject: staging: iio: adc: ad7192: fix external frequency setting The external clock frequency was set only when selecting the internal clock, which is fixed at 4.9152 Mhz. This is incorrect, since it should be set when any of the external clock or crystal settings is selected. Added range validation for the external (crystal/clock) frequency setting. Valid values are between 2.4576 and 5.12 Mhz. Signed-off-by: Alexandru Ardelean Cc: Signed-off-by: Jonathan Cameron --- drivers/staging/iio/adc/ad7192.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/staging/iio/adc/ad7192.c b/drivers/staging/iio/adc/ad7192.c index f015955..425e8b8 100644 --- a/drivers/staging/iio/adc/ad7192.c +++ b/drivers/staging/iio/adc/ad7192.c @@ -141,6 +141,8 @@ #define AD7192_GPOCON_P1DAT BIT(1) /* P1 state */ #define AD7192_GPOCON_P0DAT BIT(0) /* P0 state */ +#define AD7192_EXT_FREQ_MHZ_MIN 2457600 +#define AD7192_EXT_FREQ_MHZ_MAX 5120000 #define AD7192_INT_FREQ_MHZ 4915200 /* NOTE: @@ -218,6 +220,12 @@ static int ad7192_calibrate_all(struct ad7192_state *st) ARRAY_SIZE(ad7192_calib_arr)); } +static inline bool ad7192_valid_external_frequency(u32 freq) +{ + return (freq >= AD7192_EXT_FREQ_MHZ_MIN && + freq <= AD7192_EXT_FREQ_MHZ_MAX); +} + static int ad7192_setup(struct ad7192_state *st, const struct ad7192_platform_data *pdata) { @@ -243,17 +251,20 @@ static int ad7192_setup(struct ad7192_state *st, id); switch (pdata->clock_source_sel) { - case AD7192_CLK_EXT_MCLK1_2: - case AD7192_CLK_EXT_MCLK2: - st->mclk = AD7192_INT_FREQ_MHZ; - break; case AD7192_CLK_INT: case AD7192_CLK_INT_CO: - if (pdata->ext_clk_hz) - st->mclk = pdata->ext_clk_hz; - else - st->mclk = AD7192_INT_FREQ_MHZ; + st->mclk = AD7192_INT_FREQ_MHZ; break; + case AD7192_CLK_EXT_MCLK1_2: + case AD7192_CLK_EXT_MCLK2: + if (ad7192_valid_external_frequency(pdata->ext_clk_hz)) { + st->mclk = pdata->ext_clk_hz; + break; + } + dev_err(&st->sd.spi->dev, "Invalid frequency setting %u\n", + pdata->ext_clk_hz); + ret = -EINVAL; + goto out; default: ret = -EINVAL; goto out; -- cgit v1.1 From a3b5655ebdb501a98a45c0d3265dca9f2fe0218a Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Tue, 23 Jan 2018 17:04:56 +0100 Subject: iio: adc: stm32: fix stm32h7_adc_enable error handling Error handling in stm32h7_adc_enable routine doesn't unwind enable sequence correctly. ADEN can only be cleared by hardware (e.g. by writing one to ADDIS). It's also better to clear ADRDY just after it's been set by hardware. Fixes: 95e339b6e85d ("iio: adc: stm32: add support for STM32H7") Signed-off-by: Fabrice Gasnier Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 7f5def4..9a2583ca 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -722,8 +722,6 @@ static int stm32h7_adc_enable(struct stm32_adc *adc) int ret; u32 val; - /* Clear ADRDY by writing one, then enable ADC */ - stm32_adc_set_bits(adc, STM32H7_ADC_ISR, STM32H7_ADRDY); stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADEN); /* Poll for ADRDY to be set (after adc startup time) */ @@ -731,8 +729,11 @@ static int stm32h7_adc_enable(struct stm32_adc *adc) val & STM32H7_ADRDY, 100, STM32_ADC_TIMEOUT_US); if (ret) { - stm32_adc_clr_bits(adc, STM32H7_ADC_CR, STM32H7_ADEN); + stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADDIS); dev_err(&indio_dev->dev, "Failed to enable ADC\n"); + } else { + /* Clear ADRDY by writing one */ + stm32_adc_set_bits(adc, STM32H7_ADC_ISR, STM32H7_ADRDY); } return ret; -- cgit v1.1 From 7d2b8e6aaf9ee87910c2337e1c59bb5d3e3ba8c5 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 25 Jan 2018 14:30:45 +0200 Subject: staging: iio: ad5933: switch buffer mode to software Since commit 152a6a884ae1 ("staging:iio:accel:sca3000 move to hybrid hard / soft buffer design.") the buffer mechanism has changed and the INDIO_BUFFER_HARDWARE flag has been unused. Since commit 2d6ca60f3284 ("iio: Add a DMAengine framework based buffer") the INDIO_BUFFER_HARDWARE flag has been re-purposed for DMA buffers. This driver has lagged behind these changes, and in order for buffers to work, the INDIO_BUFFER_SOFTWARE needs to be used. Signed-off-by: Alexandru Ardelean Fixes: 2d6ca60f3284 ("iio: Add a DMAengine framework based buffer") Cc: Signed-off-by: Jonathan Cameron --- drivers/staging/iio/impedance-analyzer/ad5933.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index 2b28fb9..3bcf494 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -648,8 +648,6 @@ static int ad5933_register_ring_funcs_and_init(struct iio_dev *indio_dev) /* Ring buffer functions - here trigger setup related */ indio_dev->setup_ops = &ad5933_ring_setup_ops; - indio_dev->modes |= INDIO_BUFFER_HARDWARE; - return 0; } @@ -762,7 +760,7 @@ static int ad5933_probe(struct i2c_client *client, indio_dev->dev.parent = &client->dev; indio_dev->info = &ad5933_info; indio_dev->name = id->name; - indio_dev->modes = INDIO_DIRECT_MODE; + indio_dev->modes = (INDIO_BUFFER_SOFTWARE | INDIO_DIRECT_MODE); indio_dev->channels = ad5933_channels; indio_dev->num_channels = ARRAY_SIZE(ad5933_channels); -- cgit v1.1 From 4a8842de8db4953fdda7866626b78b12fb8adb97 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Fri, 19 Jan 2018 16:22:05 +0100 Subject: scsi: mpt3sas: fix an out of bound write cpu_msix_table is allocated to store online cpus, but pci_irq_get_affinity may return cpu_possible_mask which is then used to access cpu_msix_table. That causes bad user experience. Fix limits access to only online cpus, I've also added an additional test to protect from an unlikely change in cpu_online_mask. [mkp: checkpatch] Fixes: 1d55abc0e98a ("scsi: mpt3sas: switch to pci_alloc_irq_vectors") Signed-off-by: Tomas Henzl Acked-by: Suganath Prabu Subramani Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 13d6e4e..59a87ca 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2410,8 +2410,11 @@ _base_assign_reply_queues(struct MPT3SAS_ADAPTER *ioc) continue; } - for_each_cpu(cpu, mask) + for_each_cpu_and(cpu, mask, cpu_online_mask) { + if (cpu >= ioc->cpu_msix_table_sz) + break; ioc->cpu_msix_table[cpu] = reply_q->msix_index; + } } return; } -- cgit v1.1 From 2ce87cc5b269510de9ca1185ca8a6e10ec78c069 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 23 Jan 2018 11:05:21 -0800 Subject: scsi: qla2xxx: Fix memory corruption during hba reset test This patch fixes memory corrpution while performing HBA Reset test. Following stack trace is seen: [ 466.397219] BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 [ 466.433669] IP: [] qlt_free_session_done+0x260/0x5f0 [qla2xxx] [ 466.467731] PGD 0 [ 466.476718] Oops: 0000 [#1] SMP Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 12ee6e0..afcb5567 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3625,6 +3625,8 @@ qla2x00_remove_one(struct pci_dev *pdev) } qla2x00_wait_for_hba_ready(base_vha); + qla2x00_wait_for_sess_deletion(base_vha); + /* * if UNLOAD flag is already set, then continue unload, * where it was set first. -- cgit v1.1 From c39813652700f3df552b6557530f1e5f782dbe2f Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Tue, 23 Jan 2018 20:11:32 -0600 Subject: scsi: ibmvfc: fix misdefined reserved field in ibmvfc_fcp_rsp_info The fcp_rsp_info structure as defined in the FC spec has an initial 3 bytes reserved field. The ibmvfc driver mistakenly defined this field as 4 bytes resulting in the rsp_code field being defined in what should be the start of the second reserved field and thus always being reported as zero by the driver. Ideally, we should wire ibmvfc up with libfc for the sake of code deduplication, and ease of maintaining standardized structures in a single place. However, for now simply fixup the definition in ibmvfc for backporting to distros on older kernels. Wiring up with libfc will be done in a followup patch. Cc: Reported-by: Hannes Reinecke Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index 9a0696f..b81a53c 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -367,7 +367,7 @@ enum ibmvfc_fcp_rsp_info_codes { }; struct ibmvfc_fcp_rsp_info { - __be16 reserved; + u8 reserved[3]; u8 rsp_code; u8 reserved2[4]; }__attribute__((packed, aligned (2))); -- cgit v1.1 From 84af7e8b895088d89f246d6b0f82717fafdebf61 Mon Sep 17 00:00:00 2001 From: Sujit Reddy Thumma Date: Wed, 24 Jan 2018 09:52:35 +0530 Subject: scsi: ufs: Enable quirk to ignore sending WRITE_SAME command WRITE_SAME command is not supported by UFS. Enable a quirk for the upper level drivers to not send WRITE SAME command. [mkp: botched patch, applied by hand] Signed-off-by: Sujit Reddy Thumma Signed-off-by: Subhash Jadavani Signed-off-by: Asutosh Das Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 011c336..8196976 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4352,6 +4352,8 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev) /* REPORT SUPPORTED OPERATION CODES is not supported */ sdev->no_report_opcodes = 1; + /* WRITE_SAME command is not supported */ + sdev->no_write_same = 1; ufshcd_set_queue_depth(sdev); -- cgit v1.1 From 52797a1d4b39716ddd300a3c463ffaf1330600a0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 24 Jan 2018 14:58:01 +0000 Subject: scsi: csiostor: remove redundant assignment to pointer 'ln' The pointer ln is assigned a value that is never read, it is re-assigned a new value in the list_for_each loop hence the initialization is redundant and can be removed. Cleans up clang warning: drivers/scsi/csiostor/csio_lnode.c:117:21: warning: Value stored to 'ln' during its initialization is never read Signed-off-by: Colin Ian King Acked-by: Varun Prakash Signed-off-by: Martin K. Petersen --- drivers/scsi/csiostor/csio_lnode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/csiostor/csio_lnode.c b/drivers/scsi/csiostor/csio_lnode.c index be5ee2d..7dbbbb8 100644 --- a/drivers/scsi/csiostor/csio_lnode.c +++ b/drivers/scsi/csiostor/csio_lnode.c @@ -114,7 +114,7 @@ static enum csio_ln_ev fwevt_to_lnevt[] = { static struct csio_lnode * csio_ln_lookup_by_portid(struct csio_hw *hw, uint8_t portid) { - struct csio_lnode *ln = hw->rln; + struct csio_lnode *ln; struct list_head *tmp; /* Match siblings lnode with portid */ -- cgit v1.1 From ecf7ff49945f5741fa1da112f994939f942031d3 Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Wed, 24 Jan 2018 08:07:06 -0800 Subject: scsi: bnx2fc: Fix check in SCSI completion handler for timed out request When a request times out we set the io_req flag BNX2FC_FLAG_IO_COMPL so that if a subsequent completion comes in on that task ID we will ignore it. The issue is that in the check for this flag there is a missing return so we will continue to process a request which may have already been returned to the ownership of the SCSI layer. This can cause unpredictable results. Solution is to add in the missing return. [mkp: typo plus title shortening] Signed-off-by: Chad Dupuis Reviewed-by: Laurence Oberman Tested-by: Laurence Oberman Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index 8e2f767..5a645b8 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -1889,6 +1889,7 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req, /* we will not receive ABTS response for this IO */ BNX2FC_IO_DBG(io_req, "Timer context finished processing " "this scsi cmd\n"); + return; } /* Cancel the timeout_work, as we received IO completion */ -- cgit v1.1 From e6f791d95313c85f3dd4a26141e28e50ae9aa0ae Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 25 Jan 2018 17:13:40 +0300 Subject: scsi: sym53c8xx_2: iterator underflow in sym_getsync() We wanted to exit the loop with "div" set to zero, but instead, if we don't hit the break then "div" is -1 when we finish the loop. It leads to an array underflow a few lines later. Signed-off-by: Dan Carpenter Reviewed-by: Johannes Thumshirn Acked-by: Matthew Wilcox Signed-off-by: Martin K. Petersen --- drivers/scsi/sym53c8xx_2/sym_hipd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c index ca360da..378af30 100644 --- a/drivers/scsi/sym53c8xx_2/sym_hipd.c +++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c @@ -536,7 +536,7 @@ sym_getsync(struct sym_hcb *np, u_char dt, u_char sfac, u_char *divp, u_char *fa * Look for the greatest clock divisor that allows an * input speed faster than the period. */ - while (div-- > 0) + while (--div > 0) if (kpc >= (div_10M[div] << 2)) break; /* -- cgit v1.1 From a7043e9529f3c367cc4d82997e00be034cbe57ca Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 25 Jan 2018 17:27:27 +0300 Subject: scsi: mptfusion: Add bounds check in mptctl_hp_targetinfo() My static checker complains about an out of bounds read: drivers/message/fusion/mptctl.c:2786 mptctl_hp_targetinfo() error: buffer overflow 'hd->sel_timeout' 255 <= u32max. It's true that we probably should have a bounds check here. Signed-off-by: Dan Carpenter Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/message/fusion/mptctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c index 8d12017..4470630 100644 --- a/drivers/message/fusion/mptctl.c +++ b/drivers/message/fusion/mptctl.c @@ -2687,6 +2687,8 @@ mptctl_hp_targetinfo(unsigned long arg) __FILE__, __LINE__, iocnum); return -ENODEV; } + if (karg.hdr.id >= MPT_MAX_FC_DEVICES) + return -EINVAL; dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_hp_targetinfo called.\n", ioc->name)); -- cgit v1.1 From c02189e12ce3bf3808cb880569d3b10249f50bd9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 25 Jan 2018 08:24:29 -0800 Subject: scsi: qla2xxx: Avoid triggering undefined behavior in qla2x00_mbx_completion() A left shift must shift less than the bit width of the left argument. Avoid triggering undefined behavior if ha->mbx_count == 32. This patch avoids that UBSAN reports the following complaint: UBSAN: Undefined behaviour in drivers/scsi/qla2xxx/qla_isr.c:275:14 shift exponent 32 is too large for 32-bit type 'int' Call Trace: dump_stack+0x4e/0x6c ubsan_epilogue+0xd/0x3b __ubsan_handle_shift_out_of_bounds+0x112/0x14c qla2x00_mbx_completion+0x1c5/0x25d [qla2xxx] qla2300_intr_handler+0x1ea/0x3bb [qla2xxx] qla2x00_mailbox_command+0x77b/0x139a [qla2xxx] qla2x00_mbx_reg_test+0x83/0x114 [qla2xxx] qla2x00_chip_diag+0x354/0x45f [qla2xxx] qla2x00_initialize_adapter+0x2c2/0xa4e [qla2xxx] qla2x00_probe_one+0x1681/0x392e [qla2xxx] pci_device_probe+0x10b/0x1f1 driver_probe_device+0x21f/0x3a4 __driver_attach+0xa9/0xe1 bus_for_each_dev+0x6e/0xb5 driver_attach+0x22/0x3c bus_add_driver+0x1d1/0x2ae driver_register+0x78/0x130 __pci_register_driver+0x75/0xa8 qla2x00_module_init+0x21b/0x267 [qla2xxx] do_one_initcall+0x5a/0x1e2 do_init_module+0x9d/0x285 load_module+0x20db/0x38e3 SYSC_finit_module+0xa8/0xbc SyS_finit_module+0x9/0xb do_syscall_64+0x77/0x271 entry_SYSCALL64_slow_path+0x25/0x25 Reported-by: Meelis Roos Signed-off-by: Bart Van Assche Cc: Himanshu Madhani Reviewed-by: Laurence Oberman Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_isr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 14109d8..89f93eb 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -272,7 +272,8 @@ qla2x00_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0) struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; /* Read all mbox registers? */ - mboxes = (1 << ha->mbx_count) - 1; + WARN_ON_ONCE(ha->mbx_count > 32); + mboxes = (1ULL << ha->mbx_count) - 1; if (!ha->mcp) ql_dbg(ql_dbg_async, vha, 0x5001, "MBX pointer ERROR.\n"); else @@ -2880,7 +2881,8 @@ qla24xx_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0) struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; /* Read all mbox registers? */ - mboxes = (1 << ha->mbx_count) - 1; + WARN_ON_ONCE(ha->mbx_count > 32); + mboxes = (1ULL << ha->mbx_count) - 1; if (!ha->mcp) ql_dbg(ql_dbg_async, vha, 0x504e, "MBX pointer ERROR.\n"); else -- cgit v1.1 From 7c0dde2b3d99fe3c54edada408d10dcd6ee0c1f7 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Sun, 28 Jan 2018 07:23:54 +0000 Subject: scsi: aic7xxx: remove aiclib.c aiclib.c is unused (and contains no code) since commit 1ff927306e08 ("[SCSI] aic7xxx: remove aiclib.c") 13 years later, finish the cleaning by removing it from tree. [mkp: tweaked patch description] Signed-off-by: Corentin Labbe Signed-off-by: Martin K. Petersen --- drivers/scsi/aic7xxx/aiclib.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 drivers/scsi/aic7xxx/aiclib.c diff --git a/drivers/scsi/aic7xxx/aiclib.c b/drivers/scsi/aic7xxx/aiclib.c deleted file mode 100644 index 828ae3d..0000000 --- a/drivers/scsi/aic7xxx/aiclib.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Implementation of Utility functions for all SCSI device types. - * - * Copyright (c) 1997, 1998, 1999 Justin T. Gibbs. - * Copyright (c) 1997, 1998 Kenneth D. Merry. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification, immediately at the beginning of the file. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/cam/scsi/scsi_all.c,v 1.38 2002/09/23 04:56:35 mjacob Exp $ - * $Id$ - */ - -#include "aiclib.h" - -- cgit v1.1 From 2e8233ab17411920bee87c0dd71790f11904f3b8 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 29 Jan 2018 12:30:16 +0000 Subject: scsi: Remove Makefile entry for oktagon files Remove line using non-existent files which were removed in commit 642978beb483 ("[SCSI] remove m68k NCR53C9x based drivers") [mkp: tweaked patch description] Signed-off-by: Corentin Labbe Acked-by: Geert Uytterhoeven Signed-off-by: Martin K. Petersen --- drivers/scsi/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index fcfd28d..de1b3fc 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -185,7 +185,6 @@ ncr53c8xx-flags-$(CONFIG_SCSI_ZALON) \ CFLAGS_ncr53c8xx.o := $(ncr53c8xx-flags-y) $(ncr53c8xx-flags-m) zalon7xx-objs := zalon.o ncr53c8xx.o NCR_Q720_mod-objs := NCR_Q720.o ncr53c8xx.o -oktagon_esp_mod-objs := oktagon_esp.o oktagon_io.o # Files generated that shall be removed upon make clean clean-files := 53c700_d.h 53c700_u.h -- cgit v1.1 From f5572475e999a1e9cd44f8704023a815f611d377 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 23 Jan 2018 15:50:03 -0800 Subject: scsi: scsi_dh: Document alua_rtpg_queue() arguments Since commit 3a025e1d1c2e ("Add optional check for bad kernel-doc comments") building with W=1 causes warnings to appear for issues in kernel-doc headers. This patch avoids that the following warnings are reported when building with W=1: drivers/scsi/device_handler/scsi_dh_alua.c:867: warning: No description found for parameter 'pg' drivers/scsi/device_handler/scsi_dh_alua.c:867: warning: No description found for parameter 'sdev' drivers/scsi/device_handler/scsi_dh_alua.c:867: warning: No description found for parameter 'qdata' drivers/scsi/device_handler/scsi_dh_alua.c:867: warning: No description found for parameter 'force' Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 022e421..4b44325 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -876,6 +876,11 @@ static void alua_rtpg_work(struct work_struct *work) /** * alua_rtpg_queue() - cause RTPG to be submitted asynchronously + * @pg: ALUA port group associated with @sdev. + * @sdev: SCSI device for which to submit an RTPG. + * @qdata: Information about the callback to invoke after the RTPG. + * @force: Whether or not to submit an RTPG if a work item that will submit an + * RTPG already has been scheduled. * * Returns true if and only if alua_rtpg_work() will be called asynchronously. * That function is responsible for calling @qdata->fn(). -- cgit v1.1 From c028c6309a9f9b385ba8c0c984eb2b6c3f368650 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 30 Jan 2018 13:17:38 +0100 Subject: cfg80211: use only 1Mbps for basic rates in mesh Mesh used to use the mandatory rates as basic rates, but we got the calculation of mandatory rates wrong until some time ago. Fix this this broke interoperability with older versions since now more basic rates are required, and thus the MBSS isn't the same and the network stops working. Fix this by simply using only 1Mbps as the basic rate in 2.4GHz. Since the changed mandatory rates only affected 2.4GHz, this is all we need to make it work again. Reported-and-tested-by: Matthias Schiffer Fixes: 1bd773c077de ("wireless: set correct mandatory rate flags") Signed-off-by: Johannes Berg --- net/wireless/mesh.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 51aa556..b12da6e 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -170,9 +170,28 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, enum nl80211_bss_scan_width scan_width; struct ieee80211_supported_band *sband = rdev->wiphy.bands[setup->chandef.chan->band]; - scan_width = cfg80211_chandef_to_scan_width(&setup->chandef); - setup->basic_rates = ieee80211_mandatory_rates(sband, - scan_width); + + if (setup->chandef.chan->band == NL80211_BAND_2GHZ) { + int i; + + /* + * Older versions selected the mandatory rates for + * 2.4 GHz as well, but were broken in that only + * 1 Mbps was regarded as a mandatory rate. Keep + * using just 1 Mbps as the default basic rate for + * mesh to be interoperable with older versions. + */ + for (i = 0; i < sband->n_bitrates; i++) { + if (sband->bitrates[i].bitrate == 10) { + setup->basic_rates = BIT(i); + break; + } + } + } else { + scan_width = cfg80211_chandef_to_scan_width(&setup->chandef); + setup->basic_rates = ieee80211_mandatory_rates(sband, + scan_width); + } } err = cfg80211_chandef_dfs_required(&rdev->wiphy, -- cgit v1.1 From c4de37ee2b55deac7d6aeac33e02e3d6be243898 Mon Sep 17 00:00:00 2001 From: Peter Oh Date: Fri, 26 Jan 2018 14:02:37 -0800 Subject: mac80211: mesh: fix wrong mesh TTL offset calculation mesh TTL offset in Mesh Channel Switch Parameters element depends on not only Secondary Channel Offset element, but also affected by HT Control field and Wide Bandwidth Channel Switch element. So use element structure to manipulate mesh channel swich param IE after removing its constant attribution to correct the miscalculation. Signed-off-by: Peter Oh Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mesh.c | 17 ++++++----------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 885d00b..61db1fb 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1467,7 +1467,7 @@ struct ieee802_11_elems { const struct ieee80211_timeout_interval_ie *timeout_int; const u8 *opmode_notif; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; - const struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie; + struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie; const struct ieee80211_bss_max_idle_period_ie *max_idle_period_ie; /* length of them, respectively */ diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 5e27364..2355553 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1253,13 +1253,12 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata, } static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len) + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee802_11_elems *elems) { struct ieee80211_mgmt *mgmt_fwd; struct sk_buff *skb; struct ieee80211_local *local = sdata->local; - u8 *pos = mgmt->u.action.u.chan_switch.variable; - size_t offset_ttl; skb = dev_alloc_skb(local->tx_headroom + len); if (!skb) @@ -1267,13 +1266,9 @@ static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata, skb_reserve(skb, local->tx_headroom); mgmt_fwd = skb_put(skb, len); - /* offset_ttl is based on whether the secondary channel - * offset is available or not. Subtract 1 from the mesh TTL - * and disable the initiator flag before forwarding. - */ - offset_ttl = (len < 42) ? 7 : 10; - *(pos + offset_ttl) -= 1; - *(pos + offset_ttl + 1) &= ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR; + elems->mesh_chansw_params_ie->mesh_ttl--; + elems->mesh_chansw_params_ie->mesh_flags &= + ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR; memcpy(mgmt_fwd, mgmt, len); eth_broadcast_addr(mgmt_fwd->da); @@ -1321,7 +1316,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, /* forward or re-broadcast the CSA frame */ if (fwd_csa) { - if (mesh_fwd_csa_frame(sdata, mgmt, len) < 0) + if (mesh_fwd_csa_frame(sdata, mgmt, len, &elems) < 0) mcsa_dbg(sdata, "Failed to forward the CSA frame"); } } -- cgit v1.1 From 745fd50f3b044db6a3922e1718306555613164b0 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 31 Jan 2018 12:04:50 +0100 Subject: drm/cirrus: Load lut in crtc_commit In the past the ast driver relied upon the fbdev emulation helpers to call ->load_lut at boot-up. But since commit b8e2b0199cc377617dc238f5106352c06dcd3fa2 Author: Peter Rosin Date: Tue Jul 4 12:36:57 2017 +0200 drm/fb-helper: factor out pseudo-palette that's cleaned up and drivers are expected to boot into a consistent lut state. This patch fixes that. Fixes: b8e2b0199cc3 ("drm/fb-helper: factor out pseudo-palette") Cc: Peter Rosin Cc: Daniel Vetter Cc: # v4.14+ References: https://bugzilla.kernel.org/show_bug.cgi?id=198123 Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20180131110450.22153-1-daniel.vetter@ffwll.ch Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/cirrus/cirrus_mode.c | 40 +++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c index cd23b1b..c91b9b0 100644 --- a/drivers/gpu/drm/cirrus/cirrus_mode.c +++ b/drivers/gpu/drm/cirrus/cirrus_mode.c @@ -294,22 +294,7 @@ static void cirrus_crtc_prepare(struct drm_crtc *crtc) { } -/* - * This is called after a mode is programmed. It should reverse anything done - * by the prepare function - */ -static void cirrus_crtc_commit(struct drm_crtc *crtc) -{ -} - -/* - * The core can pass us a set of gamma values to program. We actually only - * use this for 8-bit mode so can't perform smooth fades on deeper modes, - * but it's a requirement that we provide the function - */ -static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, - u16 *blue, uint32_t size, - struct drm_modeset_acquire_ctx *ctx) +static void cirrus_crtc_load_lut(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct cirrus_device *cdev = dev->dev_private; @@ -317,7 +302,7 @@ static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, int i; if (!crtc->enabled) - return 0; + return; r = crtc->gamma_store; g = r + crtc->gamma_size; @@ -330,6 +315,27 @@ static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, WREG8(PALETTE_DATA, *g++ >> 8); WREG8(PALETTE_DATA, *b++ >> 8); } +} + +/* + * This is called after a mode is programmed. It should reverse anything done + * by the prepare function + */ +static void cirrus_crtc_commit(struct drm_crtc *crtc) +{ + cirrus_crtc_load_lut(crtc); +} + +/* + * The core can pass us a set of gamma values to program. We actually only + * use this for 8-bit mode so can't perform smooth fades on deeper modes, + * but it's a requirement that we provide the function + */ +static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, + u16 *blue, uint32_t size, + struct drm_modeset_acquire_ctx *ctx) +{ + cirrus_crtc_load_lut(crtc); return 0; } -- cgit v1.1 From 54f809cfbd6b4a43959039f5d33596ed3297ce16 Mon Sep 17 00:00:00 2001 From: "Leo (Sunpeng) Li" Date: Wed, 17 Jan 2018 12:51:08 +0100 Subject: drm/atomic: Fix memleak on ERESTARTSYS during non-blocking commits During a non-blocking commit, it is possible to return before the commit_tail work is queued (-ERESTARTSYS, for example). Since a reference on the crtc commit object is obtained for the pending vblank event when preparing the commit, the above situation will leave us with an extra reference. Therefore, if the commit_tail worker has not consumed the event at the end of a commit, release it's reference. Changes since v1: - Also check for state->event->base.completion being set, to handle the case where stall_checks() fails in setup_crtc_commit(). Changes since v2: - Add a flag to drm_crtc_commit, to prevent dereferencing a freed event. i915 may unreference the state in a worker. Fixes: 24835e442f28 ("drm: reference count event->completion") Cc: # v4.11+ Signed-off-by: Leo (Sunpeng) Li Acked-by: Harry Wentland #v1 Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20180117115108.29608-1-maarten.lankhorst@linux.intel.com Reviewed-by: Sean Paul --- drivers/gpu/drm/drm_atomic_helper.c | 15 +++++++++++++++ include/drm/drm_atomic.h | 9 +++++++++ 2 files changed, 24 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index b16f1d6..e8c2493 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1778,6 +1778,8 @@ int drm_atomic_helper_setup_commit(struct drm_atomic_state *state, new_crtc_state->event->base.completion = &commit->flip_done; new_crtc_state->event->base.completion_release = release_crtc_commit; drm_crtc_commit_get(commit); + + commit->abort_completion = true; } for_each_oldnew_connector_in_state(state, conn, old_conn_state, new_conn_state, i) { @@ -3327,8 +3329,21 @@ EXPORT_SYMBOL(drm_atomic_helper_crtc_duplicate_state); void __drm_atomic_helper_crtc_destroy_state(struct drm_crtc_state *state) { if (state->commit) { + /* + * In the event that a non-blocking commit returns + * -ERESTARTSYS before the commit_tail work is queued, we will + * have an extra reference to the commit object. Release it, if + * the event has not been consumed by the worker. + * + * state->event may be freed, so we can't directly look at + * state->event->base.completion. + */ + if (state->event && state->commit->abort_completion) + drm_crtc_commit_put(state->commit); + kfree(state->commit->event); state->commit->event = NULL; + drm_crtc_commit_put(state->commit); } diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index 5afd6e3..c63b0b4 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -134,6 +134,15 @@ struct drm_crtc_commit { * &drm_pending_vblank_event pointer to clean up private events. */ struct drm_pending_vblank_event *event; + + /** + * @abort_completion: + * + * A flag that's set after drm_atomic_helper_setup_commit takes a second + * reference for the completion of $drm_crtc_state.event. It's used by + * the free code to remove the second reference if commit fails. + */ + bool abort_completion; }; struct __drm_planes_state { -- cgit v1.1 From 498e7e7ed1fd72c275a682f0903c4a20cc538658 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Thu, 1 Feb 2018 10:18:59 -0800 Subject: Input: mms114 - fix license module information The driver has been released with GNU Public License v2 as stated in the header, but the module license information has been tagged as "GPL" (GNU Public License v2 or later). Fix the module license information so that it matches the one in the header as "GPL v2". Fixes: 07b8481d4aff ("Input: add MELFAS mms114 touchscreen driver") Reported-by: Marcus Folkesson Signed-off-by: Andi Shyti Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/mms114.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c index db4f6bb..6823674 100644 --- a/drivers/input/touchscreen/mms114.c +++ b/drivers/input/touchscreen/mms114.c @@ -624,4 +624,4 @@ module_i2c_driver(mms114_driver); /* Module information */ MODULE_AUTHOR("Joonyoung Shim "); MODULE_DESCRIPTION("MELFAS mms114 Touchscreen driver"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); -- cgit v1.1 From 0004520af32fca8b40abe78c11654be4e9e20fdf Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Thu, 1 Feb 2018 10:22:20 -0800 Subject: Input: mms114 - add SPDX identifier Replace the original license statement with the SPDX identifier. Add also one line of description as recommended by the COPYING file. Signed-off-by: Andi Shyti Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/mms114.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c index 6823674..a5ab774 100644 --- a/drivers/input/touchscreen/mms114.c +++ b/drivers/input/touchscreen/mms114.c @@ -1,11 +1,8 @@ -/* - * Copyright (C) 2012 Samsung Electronics Co.Ltd - * Author: Joonyoung Shim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ +// SPDX-License-Identifier: GPL-2.0 +// Melfas MMS114/MMS152 touchscreen device driver +// +// Copyright (c) 2012 Samsung Electronics Co., Ltd. +// Author: Joonyoung Shim #include #include -- cgit v1.1 From 511051d509ec54642dd6d30fdf2caa33c23619cc Mon Sep 17 00:00:00 2001 From: Andreas Klinger Date: Thu, 1 Feb 2018 21:49:24 +0100 Subject: iio: srf08: fix link error "devm_iio_triggered_buffer_setup" undefined Functions for triggered buffer support are needed by this module. If they are not defined accidentally by another driver, there's an error thrown out while linking. Add a select of IIO_BUFFER and IIO_TRIGGERED_BUFFER in the Kconfig file. Signed-off-by: Andreas Klinger Fixes: a83195937151 ("iio: srf08: add triggered buffer support") Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/proximity/Kconfig b/drivers/iio/proximity/Kconfig index fcb1c4b..f726f94 100644 --- a/drivers/iio/proximity/Kconfig +++ b/drivers/iio/proximity/Kconfig @@ -68,6 +68,8 @@ config SX9500 config SRF08 tristate "Devantech SRF02/SRF08/SRF10 ultrasonic ranger sensor" + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER depends on I2C help Say Y here to build a driver for Devantech SRF02/SRF08/SRF10 -- cgit v1.1 From 827cc2fa024dd6517d62de7a44c7b42f32af371b Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Mon, 5 Feb 2018 02:21:31 +0100 Subject: ARC: Fix malformed ARC_EMUL_UNALIGNED default 'default N' should be 'default n', though they happen to have the same effect here, due to undefined symbols (N in this case) evaluating to n in a tristate sense. Remove the default from ARC_EMUL_UNALIGNED instead of changing it. bool and tristate symbols implicitly default to n. Discovered with the https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_ulfalizer_Kconfiglib_blob_master_examples_list-5Fundefined.py&d=DwIBAg&c=DPL6_X_6JkXFx7AXWqB0tg&r=c14YS-cH-kdhTOW89KozFhBtBJgs1zXscZojEZQ0THs&m=WxxD8ozR7QQUVzNCBksiznaisBGO_crN7PBOvAoju8s&s=1LmxsNqxwT-7wcInVpZ6Z1J27duZKSoyKxHIJclXU_M&e= script. Signed-off-by: Ulf Magnusson Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 9d5fd00..6a232f7 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -487,7 +487,6 @@ config ARC_CURR_IN_REG config ARC_EMUL_UNALIGNED bool "Emulate unaligned memory access (userspace only)" - default N select SYSCTL_ARCH_UNALIGN_NO_WARN select SYSCTL_ARCH_UNALIGN_ALLOW depends on ISA_ARCOMPACT -- cgit v1.1 From 05382333595612204d6c91820bd77be20119cb9b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 6 Feb 2018 12:08:10 +0100 Subject: arc: dts: use 'atmel' as manufacturer for at24 in axs10x_mb Using compatible strings without the part for at24 is deprecated since commit 6da28acf745f ("dt-bindings: at24: consistently document the compatible property"). Use a correct 'atmel,' value. Signed-off-by: Bartosz Golaszewski Acked-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/axs10x_mb.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index 74d070c..47b74fb 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -214,13 +214,13 @@ }; eeprom@0x54{ - compatible = "24c01"; + compatible = "atmel,24c01"; reg = <0x54>; pagesize = <0x8>; }; eeprom@0x57{ - compatible = "24c04"; + compatible = "atmel,24c04"; reg = <0x57>; pagesize = <0x8>; }; -- cgit v1.1 From 50dbd09c56db0555813aa2824dc4fe8f1fc06aaa Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 23 Jan 2018 16:33:46 -0800 Subject: scsi: qla2xxx: Fix a locking imbalance in qlt_24xx_handle_els() Ensure that upon return the tgt->ha->tgt.sess_lock spin lock is unlocked no matter which code path is taken through this function. This was detected by sparse. Fixes: 82abdcaf3ede ("scsi: qla2xxx: Allow target mode to accept PRLI in dual mode") Signed-off-by: Bart Van Assche Cc: Himanshu Madhani Cc: Quinn Tran Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index fc89af8..896b2d8 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -4871,8 +4871,6 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, sess); qlt_send_term_imm_notif(vha, iocb, 1); res = 0; - spin_lock_irqsave(&tgt->ha->tgt.sess_lock, - flags); break; } -- cgit v1.1 From fd2c19b2a28bb574b3914466a68ef830212d5cf7 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 31 Jan 2018 09:16:56 +0100 Subject: netfilter: x_tables: remove size check Back in 2002 vmalloc used to BUG on too large sizes. We are much better behaved these days and vmalloc simply returns NULL for those. Remove the check as it simply not needed and the comment is even misleading. Link: http://lkml.kernel.org/r/20180131081916.GO21609@dhcp22.suse.cz Suggested-by: Andrew Morton Signed-off-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Florian Westphal Cc: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 2f685ee..97e06a0 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1004,10 +1004,6 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) if (sz < sizeof(*info)) return NULL; - /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ - if ((size >> PAGE_SHIFT) + 2 > totalram_pages) - return NULL; - /* __GFP_NORETRY is not fully supported by kvmalloc but it should * work reasonably well if sz is too large and bail out rather * than shoot all processes down before realizing there is nothing -- cgit v1.1 From b3e456fce9f51d6276e576d00271e2813c1b8b67 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 7 Feb 2018 21:59:17 -0800 Subject: netfilter: ipt_CLUSTERIP: fix a race condition of proc file creation There is a race condition between clusterip_config_entry_put() and clusterip_config_init(), after we release the spinlock in clusterip_config_entry_put(), a new proc file with a same IP could be created immediately since it is already removed from the configs list, therefore it triggers this warning: ------------[ cut here ]------------ proc_dir_entry 'ipt_CLUSTERIP/172.20.0.170' already registered WARNING: CPU: 1 PID: 4152 at fs/proc/generic.c:330 proc_register+0x2a4/0x370 fs/proc/generic.c:329 Kernel panic - not syncing: panic_on_warn set ... As a quick fix, just move the proc_remove() inside the spinlock. Reported-by: Fixes: 6c5d5cfbe3c5 ("netfilter: ipt_CLUSTERIP: check duplicate config when initializing") Tested-by: Paolo Abeni Cc: Xin Long Cc: Pablo Neira Ayuso Signed-off-by: Cong Wang Reviewed-by: Xin Long Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 3a84a60..1ff72b8 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -107,12 +107,6 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c) local_bh_disable(); if (refcount_dec_and_lock(&c->entries, &cn->lock)) { - list_del_rcu(&c->list); - spin_unlock(&cn->lock); - local_bh_enable(); - - unregister_netdevice_notifier(&c->notifier); - /* In case anyone still accesses the file, the open/close * functions are also incrementing the refcount on their own, * so it's safe to remove the entry even if it's in use. */ @@ -120,6 +114,12 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c) if (cn->procdir) proc_remove(c->pde); #endif + list_del_rcu(&c->list); + spin_unlock(&cn->lock); + local_bh_enable(); + + unregister_netdevice_notifier(&c->notifier); + return; } local_bh_enable(); -- cgit v1.1 From 3a129cc2151425e5aeb69aeb25fbc994ec738137 Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Sun, 4 Feb 2018 18:45:21 +0100 Subject: vsprintf: avoid misleading "(null)" for %px Like %pK already does, print "00000000" instead. This confused people -- the convention is that "(null)" means you tried to dereference a null pointer as opposed to printing the address. Link: http://lkml.kernel.org/r/20180204174521.21383-1-kilobyte@angband.pl To: Sergey Senozhatsky To: Steven Rostedt To: linux-kernel@vger.kernel.org Cc: Andrew Morton Cc: Joe Perches Cc: Kees Cook Cc: "Roberts, William C" Cc: Linus Torvalds Cc: David Laight Cc: Randy Dunlap Cc: Geert Uytterhoeven Signed-off-by: Adam Borowski Signed-off-by: Petr Mladek --- lib/vsprintf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 77ee6ce..d7a708f 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1849,7 +1849,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, { const int default_width = 2 * sizeof(void *); - if (!ptr && *fmt != 'K') { + if (!ptr && *fmt != 'K' && *fmt != 'x') { /* * Print (null) with the same width as a pointer so it makes * tabular output look nice. -- cgit v1.1 From f63248fac563125fd5a2f0bc780ce7a299872cab Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 8 Feb 2018 14:43:05 +0100 Subject: regulator: stm32-vrefbuf: fix check on ready flag stm32_vrefbuf_enable() wrongly checks VRR bit: 0 stands for not ready, 1 for ready. It currently checks the opposite. This makes enable routine to exit immediately without waiting for ready flag. Fixes: 0cdbf481e927 ("regulator: Add support for stm32-vrefbuf") Signed-off-by: Fabrice Gasnier Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/regulator/stm32-vrefbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/stm32-vrefbuf.c b/drivers/regulator/stm32-vrefbuf.c index 72c8b3e..e0a9c44 100644 --- a/drivers/regulator/stm32-vrefbuf.c +++ b/drivers/regulator/stm32-vrefbuf.c @@ -51,7 +51,7 @@ static int stm32_vrefbuf_enable(struct regulator_dev *rdev) * arbitrary timeout. */ ret = readl_poll_timeout(priv->base + STM32_VREFBUF_CSR, val, - !(val & STM32_VRR), 650, 10000); + val & STM32_VRR, 650, 10000); if (ret) { dev_err(&rdev->dev, "stm32 vrefbuf timed out!\n"); val = readl_relaxed(priv->base + STM32_VREFBUF_CSR); -- cgit v1.1 From ea4f7bd2aca9f68470e9aac0fc9432fd180b1fe7 Mon Sep 17 00:00:00 2001 From: Zhang Bo Date: Mon, 5 Feb 2018 14:56:21 -0800 Subject: Input: matrix_keypad - fix race when disabling interrupts If matrix_keypad_stop() is executing and the keypad interrupt is triggered, disable_row_irqs() may be called by both matrix_keypad_interrupt() and matrix_keypad_stop() at the same time, causing interrupts to be disabled twice and the keypad being "stuck" after resuming. Take lock when setting keypad->stopped to ensure that ISR will not race with matrix_keypad_stop() disabling interrupts. Signed-off-by: Zhang Bo Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/matrix_keypad.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c index 1f316d6..41614c1 100644 --- a/drivers/input/keyboard/matrix_keypad.c +++ b/drivers/input/keyboard/matrix_keypad.c @@ -218,8 +218,10 @@ static void matrix_keypad_stop(struct input_dev *dev) { struct matrix_keypad *keypad = input_get_drvdata(dev); + spin_lock_irq(&keypad->lock); keypad->stopped = true; - mb(); + spin_unlock_irq(&keypad->lock); + flush_work(&keypad->work.work); /* * matrix_keypad_scan() will leave IRQs enabled; -- cgit v1.1 From 54e02162d4454a99227f520948bf4494c3d972d0 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sun, 11 Feb 2018 11:28:12 +0800 Subject: ptr_ring: prevent integer overflow when calculating size Switch to use dividing to prevent integer overflow when size is too big to calculate allocation size properly. Reported-by: Eric Biggers Fixes: 6e6e41c31122 ("ptr_ring: fail early if queue occupies more than KMALLOC_MAX_SIZE") Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index b884b77..e633522 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -469,7 +469,7 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, */ static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) { - if (size * sizeof(void *) > KMALLOC_MAX_SIZE) + if (size > KMALLOC_MAX_SIZE / sizeof(void *)) return NULL; return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO); } -- cgit v1.1 From 7ac8ff95f48cbfa609a060fd6a1e361dd62feeb3 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 11 Feb 2018 18:10:28 -0500 Subject: mvpp2: fix multicast address filter IPv6 doesn't work on the MacchiatoBIN board. It is caused by broken multicast address filter in the mvpp2 driver. The driver loads doesn't load any multicast entries if "allmulti" is not set. This condition should be reversed. The condition !netdev_mc_empty(dev) is useless (because netdev_for_each_mc_addr is nop if the list is empty). This patch also fixes a possible overflow of the multicast list - if mvpp2_prs_mac_da_accept fails, we set the allmulti flag and retry. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index a1d7b88..5a1668c 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -7137,6 +7137,7 @@ static void mvpp2_set_rx_mode(struct net_device *dev) int id = port->id; bool allmulti = dev->flags & IFF_ALLMULTI; +retry: mvpp2_prs_mac_promisc_set(priv, id, dev->flags & IFF_PROMISC); mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_ALL, allmulti); mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_IP6, allmulti); @@ -7144,9 +7145,13 @@ static void mvpp2_set_rx_mode(struct net_device *dev) /* Remove all port->id's mcast enries */ mvpp2_prs_mcast_del_all(priv, id); - if (allmulti && !netdev_mc_empty(dev)) { - netdev_for_each_mc_addr(ha, dev) - mvpp2_prs_mac_da_accept(priv, id, ha->addr, true); + if (!allmulti) { + netdev_for_each_mc_addr(ha, dev) { + if (mvpp2_prs_mac_da_accept(priv, id, ha->addr, true)) { + allmulti = true; + goto retry; + } + } } } -- cgit v1.1 From 73e42e18669934fa96cf2bb54291da54177076d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Mon, 22 Jan 2018 18:46:22 +0100 Subject: arm64: dts: rockchip: fix rock64 gmac2io stability issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit enables thresh dma mode as this forces to disable checksuming, and chooses delay values which make the interface stable. These changes are needed, because ROCK64 is faced with two problems: 1. tx checksuming does not work with packets larger than 1498, 2. the default delays for tx/rx are not stable when using 1Gbps connection. Delays were found out with: https://github.com/ayufan-rock64/linux-build/tree/master/recipes/gmac-delays-test Signed-off-by: Kamil Trzciński Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328-rock64.dts | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index 3890468..2825772 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -132,17 +132,16 @@ assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>; assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>; clock_in_out = "input"; - /* shows instability at 1GBit right now */ - max-speed = <100>; phy-supply = <&vcc_io>; phy-mode = "rgmii"; pinctrl-names = "default"; pinctrl-0 = <&rgmiim1_pins>; + snps,force_thresh_dma_mode; snps,reset-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>; snps,reset-active-low; snps,reset-delays-us = <0 10000 50000>; - tx_delay = <0x26>; - rx_delay = <0x11>; + tx_delay = <0x24>; + rx_delay = <0x18>; status = "okay"; }; -- cgit v1.1 From 2b7d2ed1af2e2c0c90a1a8b97926b7b6c6cb03ed Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Fri, 9 Feb 2018 16:51:48 +0800 Subject: arm64: dts: rockchip: correct ep-gpios for rk3399-sapphire The endpoint control gpio for rk3399-sapphire boards is gpio2_a4, so correct it now. Signed-off-by: Shawn Lin Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi index 0f873c8..ce592a4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi @@ -457,7 +457,7 @@ assigned-clocks = <&cru SCLK_PCIEPHY_REF>; assigned-clock-parents = <&cru SCLK_PCIEPHY_REF100M>; assigned-clock-rates = <100000000>; - ep-gpios = <&gpio3 RK_PB5 GPIO_ACTIVE_HIGH>; + ep-gpios = <&gpio2 RK_PA4 GPIO_ACTIVE_HIGH>; num-lanes = <4>; pinctrl-names = "default"; pinctrl-0 = <&pcie_clkreqn_cpm>; -- cgit v1.1 From 7b0390eabdd1dec50f60ad25e7e706875bfa223e Mon Sep 17 00:00:00 2001 From: Yakir Yang Date: Wed, 7 Feb 2018 17:31:48 +0100 Subject: arm64: dts: rockchip: introduce pclk_vio_grf in rk3399-eDP device node The pclk_vio_grf supply power for VIO GRF IOs, if it is disabled, driver would failed to operate the VIO GRF registers. The clock is optional but one of the side effects of don't have this clk is that the Samsung Chromebook Plus fails to recover display after a suspend/resume with following errors: rockchip-dp ff970000.edp: Input stream clock not detected. rockchip-dp ff970000.edp: Timeout of video streamclk ok rockchip-dp ff970000.edp: unable to config video Signed-off-by: Yakir Yang Signed-off-by: Enric Balletbo i Serra [this should also fix display failures when building rockchip-drm as module] Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 7aa2144..2605118 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1739,8 +1739,8 @@ compatible = "rockchip,rk3399-edp"; reg = <0x0 0xff970000 0x0 0x8000>; interrupts = ; - clocks = <&cru PCLK_EDP>, <&cru PCLK_EDP_CTRL>; - clock-names = "dp", "pclk"; + clocks = <&cru PCLK_EDP>, <&cru PCLK_EDP_CTRL>, <&cru PCLK_VIO_GRF>; + clock-names = "dp", "pclk", "grf"; pinctrl-names = "default"; pinctrl-0 = <&edp_hpd>; power-domains = <&power RK3399_PD_EDP>; -- cgit v1.1 From bff52352e0ccc2481f2b6b0d612ff8ff56c50f3a Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Mon, 18 Dec 2017 16:14:36 +0100 Subject: usb: dwc3: of-simple: fix oops by unbalanced clk disable call dwc3_of_simple_dev_pm_ops has never been used since commit a0d8c4cfdf31 ("usb: dwc3: of-simple: set dev_pm_ops"), but this commit has brought and oops when unbind the device due this sequence: dwc3_of_simple_remove -> clk_disable ... -> pm_runtime_put_sync -> dwc3_of_simple_runtime_suspend -> clk_disable (again) This double call to clk_core_disable causes a kernel oops like this: WARNING: CPU: 1 PID: 4022 at drivers/clk/clk.c:656 clk_core_disable+0x78/0x80 CPU: 1 PID: 4022 Comm: bash Not tainted 4.15.0-rc4+ #44 Hardware name: Google Kevin (DT) pstate: 80000085 (Nzcv daIf -PAN -UAO) pc : clk_core_disable+0x78/0x80 lr : clk_core_disable_lock+0x20/0x38 sp : ffff00000bbf3a90 ... Call trace: clk_core_disable+0x78/0x80 clk_disable+0x1c/0x30 dwc3_of_simple_runtime_suspend+0x30/0x50 pm_generic_runtime_suspend+0x28/0x40 This patch fixes the unbalanced clk disable call by setting the num_clocks variable to zero once the clocks were disabled. Fixes: a0d8c4cfdf31 ("usb: dwc3: of-simple: set dev_pm_ops") Signed-off-by: Enric Balletbo i Serra Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-of-simple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c index 7ae0eef..e54c362 100644 --- a/drivers/usb/dwc3/dwc3-of-simple.c +++ b/drivers/usb/dwc3/dwc3-of-simple.c @@ -143,6 +143,7 @@ static int dwc3_of_simple_remove(struct platform_device *pdev) clk_disable_unprepare(simple->clks[i]); clk_put(simple->clks[i]); } + simple->num_clocks = 0; reset_control_assert(simple->resets); reset_control_put(simple->resets); -- cgit v1.1 From e3190868e5f52fb26544f16463593d54ce46ce61 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 12 Jan 2018 20:00:56 +0900 Subject: usb: gadget: udc: renesas_usb3: fix oops in renesas_usb3_remove() This patch fixes an issue that the renesas_usb3_remove() causes NULL pointer dereference because the usb3_to_dev() macro will use the gadget instance and it will be deleted before. Fixes: cf06df3fae28 ("usb: gadget: udc: renesas_usb3: move pm_runtime_{en,dis}able()") Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/renesas_usb3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 6e87af2..409cde4 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -2410,7 +2410,7 @@ static int renesas_usb3_remove(struct platform_device *pdev) __renesas_usb3_ep_free_request(usb3->ep0_req); if (usb3->phy) phy_put(usb3->phy); - pm_runtime_disable(usb3_to_dev(usb3)); + pm_runtime_disable(&pdev->dev); return 0; } -- cgit v1.1 From 6180026341e852a250e1f97ebdcf71684a3c81b9 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 12 Jan 2018 18:18:05 -0800 Subject: usb: dwc3: gadget: Set maxpacket size for ep0 IN There are 2 control endpoint structures for DWC3. However, the driver only updates the OUT direction control endpoint structure during ConnectDone event. DWC3 driver needs to update the endpoint max packet size for control IN endpoint as well. If the max packet size is not properly set, then the driver will incorrectly calculate the data transfer size and fail to send ZLP for HS/FS 3-stage control read transfer. The fix is simply to update the max packet size for the ep0 IN direction during ConnectDone event. Cc: stable@vger.kernel.org Fixes: 72246da40f37 ("usb: Introduce DesignWare USB3 DRD Driver") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 616ef49..2bda4eb 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2745,6 +2745,8 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc) break; } + dwc->eps[1]->endpoint.maxpacket = dwc->gadget.ep0->maxpacket; + /* Enable USB2 LPM Capability */ if ((dwc->revision > DWC3_REVISION_194A) && -- cgit v1.1 From f035d139ffece7b6a7b8bfb17bd0ba715ee57a04 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 12 Jan 2018 18:18:27 -0800 Subject: usb: dwc3: ep0: Reset TRB counter for ep0 IN DWC3 tracks TRB counter for each ep0 direction separately. In control read transfer completion handler, the driver needs to reset the TRB enqueue counter for ep0 IN direction. Currently the driver only resets the TRB counter for control OUT endpoint. Check for the data direction and properly reset the TRB counter from correct control endpoint. Cc: stable@vger.kernel.org Fixes: c2da2ff00606 ("usb: dwc3: ep0: don't use ep0in for transfers") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/ep0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 9c2e4a1..18be31d 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -854,7 +854,12 @@ static void dwc3_ep0_complete_data(struct dwc3 *dwc, trb++; trb->ctrl &= ~DWC3_TRB_CTRL_HWO; trace_dwc3_complete_trb(ep0, trb); - ep0->trb_enqueue = 0; + + if (r->direction) + dwc->eps[1]->trb_enqueue = 0; + else + dwc->eps[0]->trb_enqueue = 0; + dwc->ep0_bounced = false; } -- cgit v1.1 From 8813a59ed892305b5ac1b5b901740b1ad4b5fefa Mon Sep 17 00:00:00 2001 From: John Keeping Date: Fri, 12 Jan 2018 18:43:32 +0000 Subject: usb: gadget: f_uac2: fix bFirstInterface in composite gadget If there are multiple functions associated with a configuration, then the UAC2 interfaces may not start at zero. Set the correct first interface number in the association descriptor so that the audio interfaces are enumerated correctly in this case. Reviewed-by: Krzysztof Opasiak Signed-off-by: John Keeping Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_uac2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 11fe788..d2dc1f0 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -524,6 +524,8 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); return ret; } + iad_desc.bFirstInterface = ret; + std_ac_if_desc.bInterfaceNumber = ret; uac2->ac_intf = ret; uac2->ac_alt = 0; -- cgit v1.1 From 00b42170c86f90ac9dea83a7dfcd3f0c38098fe2 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 17 Jan 2018 13:22:49 -0800 Subject: usb: dwc3: Undo PHY init if soft reset fails In this function, we init the USB2 and USB3 PHYs, but if soft reset times out, we don't unwind this. Noticed by inspection. Signed-off-by: Brian Norris Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index ade2ab0..bc2467f 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -234,6 +234,9 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc) udelay(1); } while (--retries); + phy_exit(dwc->usb3_generic_phy); + phy_exit(dwc->usb2_generic_phy); + return -ETIMEDOUT; } -- cgit v1.1 From c4a5153e87fdf6805f63ff57556260e2554155a5 Mon Sep 17 00:00:00 2001 From: Manu Gautam Date: Thu, 18 Jan 2018 16:54:30 +0530 Subject: usb: dwc3: core: Power-off core/PHYs on system_suspend in host mode Commit 689bf72c6e0d ("usb: dwc3: Don't reinitialize core during host bus-suspend/resume") updated suspend/resume routines to not power_off and reinit PHYs/core for host mode. It broke platforms that rely on DWC3 core to power_off PHYs to enter low power state on system suspend. Perform dwc3_core_exit/init only during host mode system_suspend/ resume to addresses power regression from above mentioned patch and also allow USB session to stay connected across runtime_suspend/resume in host mode. While at it also replace existing checks for HOST only dr_mode with current_dr_role to have similar core driver behavior for both Host-only and DRD+Host configurations. Fixes: 689bf72c6e0d ("usb: dwc3: Don't reinitialize core during host bus-suspend/resume") Reviewed-by: Roger Quadros Signed-off-by: Manu Gautam Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index bc2467f..59511f2 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -100,6 +100,8 @@ static void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode) reg &= ~(DWC3_GCTL_PRTCAPDIR(DWC3_GCTL_PRTCAP_OTG)); reg |= DWC3_GCTL_PRTCAPDIR(mode); dwc3_writel(dwc->regs, DWC3_GCTL, reg); + + dwc->current_dr_role = mode; } static void __dwc3_set_mode(struct work_struct *work) @@ -133,8 +135,6 @@ static void __dwc3_set_mode(struct work_struct *work) dwc3_set_prtcap(dwc, dwc->desired_dr_role); - dwc->current_dr_role = dwc->desired_dr_role; - spin_unlock_irqrestore(&dwc->lock, flags); switch (dwc->desired_dr_role) { @@ -219,7 +219,7 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc) * XHCI driver will reset the host block. If dwc3 was configured for * host-only mode, then we can return early. */ - if (dwc->dr_mode == USB_DR_MODE_HOST) + if (dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST) return 0; reg = dwc3_readl(dwc->regs, DWC3_DCTL); @@ -919,7 +919,6 @@ static int dwc3_core_init_mode(struct dwc3 *dwc) switch (dwc->dr_mode) { case USB_DR_MODE_PERIPHERAL: - dwc->current_dr_role = DWC3_GCTL_PRTCAP_DEVICE; dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); if (dwc->usb2_phy) @@ -935,7 +934,6 @@ static int dwc3_core_init_mode(struct dwc3 *dwc) } break; case USB_DR_MODE_HOST: - dwc->current_dr_role = DWC3_GCTL_PRTCAP_HOST; dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST); if (dwc->usb2_phy) @@ -1287,7 +1285,7 @@ static int dwc3_remove(struct platform_device *pdev) } #ifdef CONFIG_PM -static int dwc3_suspend_common(struct dwc3 *dwc) +static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) { unsigned long flags; @@ -1299,6 +1297,10 @@ static int dwc3_suspend_common(struct dwc3 *dwc) dwc3_core_exit(dwc); break; case DWC3_GCTL_PRTCAP_HOST: + /* do nothing during host runtime_suspend */ + if (!PMSG_IS_AUTO(msg)) + dwc3_core_exit(dwc); + break; default: /* do nothing */ break; @@ -1307,7 +1309,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc) return 0; } -static int dwc3_resume_common(struct dwc3 *dwc) +static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) { unsigned long flags; int ret; @@ -1323,6 +1325,13 @@ static int dwc3_resume_common(struct dwc3 *dwc) spin_unlock_irqrestore(&dwc->lock, flags); break; case DWC3_GCTL_PRTCAP_HOST: + /* nothing to do on host runtime_resume */ + if (!PMSG_IS_AUTO(msg)) { + ret = dwc3_core_init(dwc); + if (ret) + return ret; + } + break; default: /* do nothing */ break; @@ -1334,12 +1343,11 @@ static int dwc3_resume_common(struct dwc3 *dwc) static int dwc3_runtime_checks(struct dwc3 *dwc) { switch (dwc->current_dr_role) { - case USB_DR_MODE_PERIPHERAL: - case USB_DR_MODE_OTG: + case DWC3_GCTL_PRTCAP_DEVICE: if (dwc->connected) return -EBUSY; break; - case USB_DR_MODE_HOST: + case DWC3_GCTL_PRTCAP_HOST: default: /* do nothing */ break; @@ -1356,7 +1364,7 @@ static int dwc3_runtime_suspend(struct device *dev) if (dwc3_runtime_checks(dwc)) return -EBUSY; - ret = dwc3_suspend_common(dwc); + ret = dwc3_suspend_common(dwc, PMSG_AUTO_SUSPEND); if (ret) return ret; @@ -1372,7 +1380,7 @@ static int dwc3_runtime_resume(struct device *dev) device_init_wakeup(dev, false); - ret = dwc3_resume_common(dwc); + ret = dwc3_resume_common(dwc, PMSG_AUTO_RESUME); if (ret) return ret; @@ -1419,7 +1427,7 @@ static int dwc3_suspend(struct device *dev) struct dwc3 *dwc = dev_get_drvdata(dev); int ret; - ret = dwc3_suspend_common(dwc); + ret = dwc3_suspend_common(dwc, PMSG_SUSPEND); if (ret) return ret; @@ -1435,7 +1443,7 @@ static int dwc3_resume(struct device *dev) pinctrl_pm_select_default_state(dev); - ret = dwc3_resume_common(dwc); + ret = dwc3_resume_common(dwc, PMSG_RESUME); if (ret) return ret; -- cgit v1.1 From 499350865387f8b8c40a9e9453a9a7eb3cec5dc4 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 18 Jan 2018 00:22:45 -0200 Subject: usb: phy: mxs: Fix NULL pointer dereference on i.MX23/28 Commit e93650994a95 ("usb: phy: mxs: add usb charger type detection") causes the following kernel hang on i.MX28: [ 2.207973] usbcore: registered new interface driver usb-storage [ 2.235659] Unable to handle kernel NULL pointer dereference at virtual address 00000188 [ 2.244195] pgd = (ptrval) [ 2.246994] [00000188] *pgd=00000000 [ 2.250676] Internal error: Oops: 5 [#1] ARM [ 2.254979] Modules linked in: [ 2.258089] CPU: 0 PID: 1 Comm: swapper Not tainted 4.15.0-rc8-next-20180117-00002-g75d5f21 #7 [ 2.266724] Hardware name: Freescale MXS (Device Tree) [ 2.271921] PC is at regmap_read+0x0/0x5c [ 2.275977] LR is at mxs_phy_charger_detect+0x34/0x1dc mxs_phy_charger_detect() makes accesses to the anatop registers via regmap, however i.MX23/28 do not have such registers, which causes a NULL pointer dereference. Fix the issue by doing a NULL check on the 'regmap' pointer. Fixes: e93650994a95 ("usb: phy: mxs: add usb charger type detection") Cc: # v4.15 Reviewed-by: Li Jun Acked-by: Peter Chen Signed-off-by: Fabio Estevam Signed-off-by: Felipe Balbi --- drivers/usb/phy/phy-mxs-usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/phy/phy-mxs-usb.c b/drivers/usb/phy/phy-mxs-usb.c index da031c4..fbec863 100644 --- a/drivers/usb/phy/phy-mxs-usb.c +++ b/drivers/usb/phy/phy-mxs-usb.c @@ -602,6 +602,9 @@ static enum usb_charger_type mxs_phy_charger_detect(struct usb_phy *phy) void __iomem *base = phy->io_priv; enum usb_charger_type chgr_type = UNKNOWN_TYPE; + if (!regmap) + return UNKNOWN_TYPE; + if (mxs_charger_data_contact_detect(mxs_phy)) return chgr_type; -- cgit v1.1 From 6cf439e0d37463e42784271179c8a308fd7493c6 Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Wed, 24 Jan 2018 00:11:53 -0800 Subject: usb: gadget: f_fs: Process all descriptors during bind During _ffs_func_bind(), the received descriptors are evaluated to prepare for binding with the gadget in order to allocate endpoints and optionally set up OS descriptors. However, the high- and super-speed descriptors are only parsed based on whether the gadget_is_dualspeed() and gadget_is_superspeed() calls are true, respectively. This is a problem in case a userspace program always provides all of the {full,high,super,OS} descriptors when configuring a function. Then, for example if a gadget device is not capable of SuperSpeed, the call to ffs_do_descs() for the SS descriptors is skipped, resulting in an incorrect offset calculation for the vla_ptr when moving on to the OS descriptors that follow. This causes ffs_do_os_descs() to fail as it is now looking at the SS descriptors' offset within the raw_descs buffer instead. _ffs_func_bind() should evaluate the descriptors unconditionally, so remove the checks for gadget speed. Fixes: f0175ab51993 ("usb: gadget: f_fs: OS descriptors support") Cc: stable@vger.kernel.org Co-Developed-by: Mayank Rana Signed-off-by: Mayank Rana Signed-off-by: Jack Pham Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 8f2cf3b..49fc589 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2979,10 +2979,8 @@ static int _ffs_func_bind(struct usb_configuration *c, struct ffs_data *ffs = func->ffs; const int full = !!func->ffs->fs_descs_count; - const int high = gadget_is_dualspeed(func->gadget) && - func->ffs->hs_descs_count; - const int super = gadget_is_superspeed(func->gadget) && - func->ffs->ss_descs_count; + const int high = !!func->ffs->hs_descs_count; + const int super = !!func->ffs->ss_descs_count; int fs_len, hs_len, ss_len, ret, i; struct ffs_ep *eps_ptr; -- cgit v1.1 From 675272d092e4a5570bace92593776f7348daf4c5 Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Wed, 24 Jan 2018 23:58:20 -0800 Subject: usb: gadget: f_fs: Use config_ep_by_speed() In commit 2bfa0719ac2a ("usb: gadget: function: f_fs: pass companion descriptor along") there is a pointer arithmetic bug where the comp_desc is obtained as follows: comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds + USB_DT_ENDPOINT_SIZE); Since ds is a pointer to usb_endpoint_descriptor, adding 7 to it ends up going out of bounds (7 * sizeof(struct usb_endpoint_descriptor), which is actually 7*9 bytes) past the SS descriptor. As a result the maxburst value will be read incorrectly, and the UDC driver will also get a garbage comp_desc (assuming it uses it). Since Felipe wrote, "Eventually, f_fs.c should be converted to use config_ep_by_speed() like all other functions, though", let's finally do it. This allows the other usb_ep fields to be properly populated, such as maxpacket and mult. It also eliminates the awkward speed-based descriptor lookup since config_ep_by_speed() does that already using the ones found in struct usb_function. Fixes: 2bfa0719ac2a ("usb: gadget: function: f_fs: pass companion descriptor along") Cc: stable@vger.kernel.org Signed-off-by: Jack Pham Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 38 +++++++------------------------------- 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 49fc589..c2592d8 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1855,44 +1855,20 @@ static int ffs_func_eps_enable(struct ffs_function *func) spin_lock_irqsave(&func->ffs->eps_lock, flags); while(count--) { - struct usb_endpoint_descriptor *ds; - struct usb_ss_ep_comp_descriptor *comp_desc = NULL; - int needs_comp_desc = false; - int desc_idx; - - if (ffs->gadget->speed == USB_SPEED_SUPER) { - desc_idx = 2; - needs_comp_desc = true; - } else if (ffs->gadget->speed == USB_SPEED_HIGH) - desc_idx = 1; - else - desc_idx = 0; - - /* fall-back to lower speed if desc missing for current speed */ - do { - ds = ep->descs[desc_idx]; - } while (!ds && --desc_idx >= 0); - - if (!ds) { - ret = -EINVAL; - break; - } - ep->ep->driver_data = ep; - ep->ep->desc = ds; - if (needs_comp_desc) { - comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds + - USB_DT_ENDPOINT_SIZE); - ep->ep->maxburst = comp_desc->bMaxBurst + 1; - ep->ep->comp_desc = comp_desc; + ret = config_ep_by_speed(func->gadget, &func->function, ep->ep); + if (ret) { + pr_err("%s: config_ep_by_speed(%s) returned %d\n", + __func__, ep->ep->name, ret); + break; } ret = usb_ep_enable(ep->ep); if (likely(!ret)) { epfile->ep = ep; - epfile->in = usb_endpoint_dir_in(ds); - epfile->isoc = usb_endpoint_xfer_isoc(ds); + epfile->in = usb_endpoint_dir_in(ep->ep->desc); + epfile->isoc = usb_endpoint_xfer_isoc(ep->ep->desc); } else { break; } -- cgit v1.1 From c49f63055e252810e5d6c83a4943b18db16b3cd8 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 22 Jan 2018 15:01:42 +0200 Subject: usb: dwc3: omap: don't miss events during suspend/resume The USB cable state can change during suspend/resume so be sure to check and update the extcon state. Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-omap.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index a4719e8..ed8b865 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -582,9 +582,25 @@ static int dwc3_omap_resume(struct device *dev) return 0; } +static void dwc3_omap_complete(struct device *dev) +{ + struct dwc3_omap *omap = dev_get_drvdata(dev); + + if (extcon_get_state(omap->edev, EXTCON_USB)) + dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_VALID); + else + dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_OFF); + + if (extcon_get_state(omap->edev, EXTCON_USB_HOST)) + dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_GROUND); + else + dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_FLOAT); +} + static const struct dev_pm_ops dwc3_omap_dev_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(dwc3_omap_suspend, dwc3_omap_resume) + .complete = dwc3_omap_complete, }; #define DEV_PM_OPS (&dwc3_omap_dev_pm_ops) -- cgit v1.1 From e74bd4d358e5455233f1dcc3975425905b270b91 Mon Sep 17 00:00:00 2001 From: Manu Gautam Date: Thu, 21 Dec 2017 09:54:25 +0530 Subject: usb: gadget: core: Fix use-after-free of usb_request Driver is tracing usb_request after freeing it. Fix it by changing the order. Signed-off-by: Manu Gautam Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 859d5b1..1f8b19d 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -180,8 +180,8 @@ EXPORT_SYMBOL_GPL(usb_ep_alloc_request); void usb_ep_free_request(struct usb_ep *ep, struct usb_request *req) { - ep->ops->free_request(ep, req); trace_usb_ep_free_request(ep, req, 0); + ep->ops->free_request(ep, req); } EXPORT_SYMBOL_GPL(usb_ep_free_request); -- cgit v1.1 From b16ea8b9492e99e03b1269fe93ebdbf8e4eabf8a Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 2 Feb 2018 13:21:35 -0800 Subject: usb: dwc3: Fix GDBGFIFOSPACE_TYPE values The FIFO/Queue type values are incorrect. Correct them according to DWC_usb3 programming guide section 1.2.27 (or DWC_usb31 section 1.2.25). Additionally, this patch includes ProtocolStatusQ and AuxEventQ types. Fixes: cf6d867d3b57 ("usb: dwc3: core: add fifo space helper") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 03c7aaa..185b960 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -158,13 +158,15 @@ #define DWC3_GDBGFIFOSPACE_TYPE(n) (((n) << 5) & 0x1e0) #define DWC3_GDBGFIFOSPACE_SPACE_AVAILABLE(n) (((n) >> 16) & 0xffff) -#define DWC3_TXFIFOQ 1 -#define DWC3_RXFIFOQ 3 -#define DWC3_TXREQQ 5 -#define DWC3_RXREQQ 7 -#define DWC3_RXINFOQ 9 -#define DWC3_DESCFETCHQ 13 -#define DWC3_EVENTQ 15 +#define DWC3_TXFIFOQ 0 +#define DWC3_RXFIFOQ 1 +#define DWC3_TXREQQ 2 +#define DWC3_RXREQQ 3 +#define DWC3_RXINFOQ 4 +#define DWC3_PSTATQ 5 +#define DWC3_DESCFETCHQ 6 +#define DWC3_EVENTQ 7 +#define DWC3_AUXEVENTQ 8 /* Global RX Threshold Configuration Register */ #define DWC3_GRXTHRCFG_MAXRXBURSTSIZE(n) (((n) & 0x1f) << 19) -- cgit v1.1 From 20bf410ecf9e9c045f4b0548d516dd3de8691074 Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Mon, 5 Feb 2018 02:21:23 +0100 Subject: usb: gadget: udc: Remove USB_GADGET_DUALSPEED select USB_GADGET_DUALSPEED was removed by commit 85b8614d7223 ("usb: gadget: get rid of USB_GADGET_{DUAL,SUPER}SPEED"), but the USB_SNP_UDC_PLAT symbol still selects it. Remove the USB_GADGET_DUALSPEED select from USB_SNP_UDC_PLAT. Discovered with the https://github.com/ulfalizer/Kconfiglib/blob/master/examples/list_undefined.py script. Signed-off-by: Ulf Magnusson Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig index 1e95670..0875d38 100644 --- a/drivers/usb/gadget/udc/Kconfig +++ b/drivers/usb/gadget/udc/Kconfig @@ -274,7 +274,6 @@ config USB_SNP_UDC_PLAT tristate "Synopsys USB 2.0 Device controller" depends on USB_GADGET && OF && HAS_DMA depends on EXTCON || EXTCON=n - select USB_GADGET_DUALSPEED select USB_SNP_CORE default ARCH_BCM_IPROC help -- cgit v1.1 From 17aa31f13cad25daa19d3f923323f552e87bc874 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 5 Feb 2018 17:12:35 +0900 Subject: usb: renesas_usbhs: missed the "running" flag in usb_dmac with rx path This fixes an issue that a gadget driver (usb_f_fs) is possible to stop rx transactions after the usb-dmac is used because the following functions missed to set/check the "running" flag. - usbhsf_dma_prepare_pop_with_usb_dmac() - usbhsf_dma_pop_done_with_usb_dmac() So, if next transaction uses pio, the usbhsf_prepare_pop() can not start the transaction because the "running" flag is 0. Fixes: 8355b2b3082d ("usb: renesas_usbhs: fix the behavior of some usbhs_pkt_handle") Cc: # v3.19+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/renesas_usbhs/fifo.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 5925d11..39fa2fc 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -982,6 +982,10 @@ static int usbhsf_dma_prepare_pop_with_usb_dmac(struct usbhs_pkt *pkt, if ((uintptr_t)pkt->buf & (USBHS_USB_DMAC_XFER_SIZE - 1)) goto usbhsf_pio_prepare_pop; + /* return at this time if the pipe is running */ + if (usbhs_pipe_is_running(pipe)) + return 0; + usbhs_pipe_config_change_bfre(pipe, 1); ret = usbhsf_fifo_select(pipe, fifo, 0); @@ -1172,6 +1176,7 @@ static int usbhsf_dma_pop_done_with_usb_dmac(struct usbhs_pkt *pkt, usbhsf_fifo_clear(pipe, fifo); pkt->actual = usbhs_dma_calc_received_size(pkt, chan, rcv_len); + usbhs_pipe_running(pipe, 0); usbhsf_dma_stop(pipe, fifo); usbhsf_dma_unmap(pkt); usbhsf_fifo_unselect(pipe, pipe->fifo); -- cgit v1.1 From 20c63f4089cceab803438c383631963e34c4d8e5 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 12 Feb 2018 00:14:42 +0100 Subject: usb: gadget: fsl_udc_core: fix ep valid checks Clang reports the following warning: drivers/usb/gadget/udc/fsl_udc_core.c:1312:10: warning: address of array 'ep->name' will always evaluate to 'true' [-Wpointer-bool-conversion] if (ep->name) ~~ ~~~~^~~~ It seems that the authors intention was to check if the ep has been configured through struct_ep_setup. Check whether struct usb_ep name pointer has been set instead. Signed-off-by: Stefan Agner Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/fsl_udc_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c index e5b4ee9..56b517a 100644 --- a/drivers/usb/gadget/udc/fsl_udc_core.c +++ b/drivers/usb/gadget/udc/fsl_udc_core.c @@ -1305,7 +1305,7 @@ static void udc_reset_ep_queue(struct fsl_udc *udc, u8 pipe) { struct fsl_ep *ep = get_ep_by_pipe(udc, pipe); - if (ep->name) + if (ep->ep.name) nuke(ep, -ESHUTDOWN); } @@ -1693,7 +1693,7 @@ static void dtd_complete_irq(struct fsl_udc *udc) curr_ep = get_ep_by_pipe(udc, i); /* If the ep is configured */ - if (curr_ep->name == NULL) { + if (!curr_ep->ep.name) { WARNING("Invalid EP?"); continue; } -- cgit v1.1 From 201ec568c57a43dbc73c7ac00e73c3c2d39559fc Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Tue, 16 Jan 2018 16:03:32 +0400 Subject: usb: dwc2: Add safety check in setting of descriptor chain pointers In some cases device sending ZLP IN on non EP0 which reassigning EP0 OUT descriptor pointer to that EP. Dedicated for EP0 OUT descriptor multiple time re-used by other EP while that descriptor already in use by EP0 OUT for SETUP transaction. As result when SETUP packet received BNA interrupt asserting. In dwc2_hsotg_program_zlp() function dwc2_gadget_set_ep0_desc_chain() must be called only for EP0. Acked-by: John Youn Signed-off-by: Minas Harutyunyan Signed-off-by: Grigor Tovmasyan Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/gadget.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index e4c3ce0..57c7400 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -1917,7 +1917,9 @@ static void dwc2_hsotg_program_zlp(struct dwc2_hsotg *hsotg, /* Not specific buffer needed for ep0 ZLP */ dma_addr_t dma = hs_ep->desc_list_dma; - dwc2_gadget_set_ep0_desc_chain(hsotg, hs_ep); + if (!index) + dwc2_gadget_set_ep0_desc_chain(hsotg, hs_ep); + dwc2_gadget_config_nonisoc_xfer_ddma(hs_ep, dma, 0); } else { dwc2_writel(DXEPTSIZ_MC(1) | DXEPTSIZ_PKTCNT(1) | -- cgit v1.1 From 9e95a66cce7250c358d496e1c3b62e29ce79ef40 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Tue, 16 Jan 2018 16:03:58 +0400 Subject: usb: dwc2: Add safety check for STSPHSERCVD intr STSPHSERCVD (status phase received) interrupt should be handled when EP0 is in DWC2_EP0_DATA_OUT state. Sometimes STSPHSERCVD interrupt asserted , when EP0 is not in DATA_OUT state. Spurios interrupt. Acked-by: John Youn Signed-off-by: Minas Harutyunyan Signed-off-by: Grigor Tovmasyan Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/gadget.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 57c7400..d6222e9 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -2976,9 +2976,13 @@ static void dwc2_hsotg_epint(struct dwc2_hsotg *hsotg, unsigned int idx, if (ints & DXEPINT_STSPHSERCVD) { dev_dbg(hsotg->dev, "%s: StsPhseRcvd\n", __func__); - /* Move to STATUS IN for DDMA */ - if (using_desc_dma(hsotg)) - dwc2_hsotg_ep0_zlp(hsotg, true); + /* Safety check EP0 state when STSPHSERCVD asserted */ + if (hsotg->ep0_state == DWC2_EP0_DATA_OUT) { + /* Move to STATUS IN for DDMA */ + if (using_desc_dma(hsotg)) + dwc2_hsotg_ep0_zlp(hsotg, true); + } + } if (ints & DXEPINT_BACK2BACKSETUP) -- cgit v1.1 From 755d739534f998d92e348fba8ffb0478416576e7 Mon Sep 17 00:00:00 2001 From: Vardan Mikayelyan Date: Tue, 16 Jan 2018 16:04:24 +0400 Subject: usb: dwc2: Fix dwc2_hsotg_core_init_disconnected() We should call dwc2_hsotg_enqueue_setup() after properly setting lx_state. Because it may cause error-out from dwc2_hsotg_enqueue_setup() due to wrong value in lx_state. Issue can be reproduced by loading driver while connected A-Connector (start in A-HOST mode) then disconnect A-Connector to switch to B-DEVICE. Acked-by: John Youn Signed-off-by: Vardan Mikayelyan Signed-off-by: Grigor Tovmasyan Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/gadget.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index d6222e9..5bcad1d 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -3381,12 +3381,6 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg, dwc2_writel(dwc2_hsotg_ep0_mps(hsotg->eps_out[0]->ep.maxpacket) | DXEPCTL_USBACTEP, hsotg->regs + DIEPCTL0); - dwc2_hsotg_enqueue_setup(hsotg); - - dev_dbg(hsotg->dev, "EP0: DIEPCTL0=0x%08x, DOEPCTL0=0x%08x\n", - dwc2_readl(hsotg->regs + DIEPCTL0), - dwc2_readl(hsotg->regs + DOEPCTL0)); - /* clear global NAKs */ val = DCTL_CGOUTNAK | DCTL_CGNPINNAK; if (!is_usb_reset) @@ -3397,6 +3391,12 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg, mdelay(3); hsotg->lx_state = DWC2_L0; + + dwc2_hsotg_enqueue_setup(hsotg); + + dev_dbg(hsotg->dev, "EP0: DIEPCTL0=0x%08x, DOEPCTL0=0x%08x\n", + dwc2_readl(hsotg->regs + DIEPCTL0), + dwc2_readl(hsotg->regs + DOEPCTL0)); } static void dwc2_hsotg_core_disconnect(struct dwc2_hsotg *hsotg) -- cgit v1.1 From 49527bc0e8d207e87bf3ebe8eb8cce7353372d79 Mon Sep 17 00:00:00 2001 From: Yixun Lan Date: Thu, 18 Jan 2018 22:17:57 +0800 Subject: pinctrl: meson-axg: adjust uart_ao_b pin group naming Simply adjust the pin group to _x _y _z style, as to keep the consistency in DT with previous naming scheme. Fixes: 83c566806a68 ("pinctrl: meson-axg: Add new pinctrl driver for Meson AXG SoC") Signed-off-by: Yixun Lan Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson-axg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson-axg.c b/drivers/pinctrl/meson/pinctrl-meson-axg.c index 1fda9d6..4b91ff7 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-axg.c +++ b/drivers/pinctrl/meson/pinctrl-meson-axg.c @@ -716,7 +716,7 @@ static const char * const uart_b_groups[] = { "uart_tx_b_x", "uart_rx_b_x", "uart_cts_b_x", "uart_rts_b_x", }; -static const char * const uart_ao_b_gpioz_groups[] = { +static const char * const uart_ao_b_z_groups[] = { "uart_ao_tx_b_z", "uart_ao_rx_b_z", "uart_ao_cts_b_z", "uart_ao_rts_b_z", }; @@ -855,7 +855,7 @@ static struct meson_pmx_func meson_axg_periphs_functions[] = { FUNCTION(nand), FUNCTION(uart_a), FUNCTION(uart_b), - FUNCTION(uart_ao_b_gpioz), + FUNCTION(uart_ao_b_z), FUNCTION(i2c0), FUNCTION(i2c1), FUNCTION(i2c2), -- cgit v1.1 From 2623ab651ff6598ed0cae1f20673a9c6a057263f Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Thu, 25 Jan 2018 14:37:49 +0800 Subject: dt-bindings: thermal: imx: update the binding to new method Due to the old method has already been marked as deprecated in binding doc, so obviously it's better to update the example to new bindings as well. Cc: Shawn Guo Cc: Leonard Crestez Cc: Zhang Rui Cc: Eduardo Valentin Cc: Rob Herring Cc: Mark Rutland Signed-off-by: Dong Aisheng Acked-by: Shawn Guo Signed-off-by: Rob Herring --- .../devicetree/bindings/thermal/imx-thermal.txt | 25 ++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Documentation/devicetree/bindings/thermal/imx-thermal.txt b/Documentation/devicetree/bindings/thermal/imx-thermal.txt index 28be51a..379eb76 100644 --- a/Documentation/devicetree/bindings/thermal/imx-thermal.txt +++ b/Documentation/devicetree/bindings/thermal/imx-thermal.txt @@ -22,7 +22,32 @@ Optional properties: - clocks : thermal sensor's clock source. Example: +ocotp: ocotp@21bc000 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,imx6sx-ocotp", "syscon"; + reg = <0x021bc000 0x4000>; + clocks = <&clks IMX6SX_CLK_OCOTP>; + tempmon_calib: calib@38 { + reg = <0x38 4>; + }; + + tempmon_temp_grade: temp-grade@20 { + reg = <0x20 4>; + }; +}; + +tempmon: tempmon { + compatible = "fsl,imx6sx-tempmon", "fsl,imx6q-tempmon"; + interrupts = ; + fsl,tempmon = <&anatop>; + nvmem-cells = <&tempmon_calib>, <&tempmon_temp_grade>; + nvmem-cell-names = "calib", "temp_grade"; + clocks = <&clks IMX6SX_CLK_PLL3_USB_OTG>; +}; + +Legacy method (Deprecated): tempmon { compatible = "fsl,imx6q-tempmon"; fsl,tempmon = <&anatop>; -- cgit v1.1 From b4bf200bc05747fb69c67a133a25f51636d5bae3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 31 Jan 2018 16:02:55 +0200 Subject: auxdisplay: Move arm-charlcd binding to correct folder This is a follow up to the commit 00846a4425d3 ("auxdisplay: Move arm-charlcd.c to drivers/auxdisplay folder") for Device Tree binding. No functional change. Signed-off-by: Andy Shevchenko Signed-off-by: Rob Herring --- .../devicetree/bindings/auxdisplay/arm-charlcd.txt | 18 ++++++++++++++++++ Documentation/devicetree/bindings/misc/arm-charlcd.txt | 18 ------------------ 2 files changed, 18 insertions(+), 18 deletions(-) create mode 100644 Documentation/devicetree/bindings/auxdisplay/arm-charlcd.txt delete mode 100644 Documentation/devicetree/bindings/misc/arm-charlcd.txt diff --git a/Documentation/devicetree/bindings/auxdisplay/arm-charlcd.txt b/Documentation/devicetree/bindings/auxdisplay/arm-charlcd.txt new file mode 100644 index 0000000..e28e2aa --- /dev/null +++ b/Documentation/devicetree/bindings/auxdisplay/arm-charlcd.txt @@ -0,0 +1,18 @@ +ARM Versatile Character LCD +----------------------------------------------------- +This binding defines the character LCD interface found on ARM Versatile AB +and PB reference platforms. + +Required properties: +- compatible : "arm,versatile-clcd" +- reg : Location and size of character LCD registers + +Optional properties: +- interrupts - single interrupt for character LCD. The character LCD can + operate in polled mode without an interrupt. + +Example: + lcd@10008000 { + compatible = "arm,versatile-lcd"; + reg = <0x10008000 0x1000>; + }; diff --git a/Documentation/devicetree/bindings/misc/arm-charlcd.txt b/Documentation/devicetree/bindings/misc/arm-charlcd.txt deleted file mode 100644 index e28e2aa..0000000 --- a/Documentation/devicetree/bindings/misc/arm-charlcd.txt +++ /dev/null @@ -1,18 +0,0 @@ -ARM Versatile Character LCD ------------------------------------------------------ -This binding defines the character LCD interface found on ARM Versatile AB -and PB reference platforms. - -Required properties: -- compatible : "arm,versatile-clcd" -- reg : Location and size of character LCD registers - -Optional properties: -- interrupts - single interrupt for character LCD. The character LCD can - operate in polled mode without an interrupt. - -Example: - lcd@10008000 { - compatible = "arm,versatile-lcd"; - reg = <0x10008000 0x1000>; - }; -- cgit v1.1 From 1b12580af1d0677c3c3a19e35bfe5d59b03f737f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 12 Feb 2018 17:15:40 +0800 Subject: bridge: check brport attr show in brport_show Now br_sysfs_if file flush doesn't have attr show. To read it will cause kernel panic after users chmod u+r this file. Xiong found this issue when running the commands: ip link add br0 type bridge ip link add type veth ip link set veth0 master br0 chmod u+r /sys/devices/virtual/net/veth0/brport/flush timeout 3 cat /sys/devices/virtual/net/veth0/brport/flush kernel crashed with NULL a pointer dereference call trace. This patch is to fix it by return -EINVAL when brport_attr->show is null, just the same as the check for brport_attr->store in brport_store(). Fixes: 9cf637473c85 ("bridge: add sysfs hook to flush forwarding table") Reported-by: Xiong Zhou Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/bridge/br_sysfs_if.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 0254c35..126a8ea 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -255,6 +255,9 @@ static ssize_t brport_show(struct kobject *kobj, struct brport_attribute *brport_attr = to_brport_attr(attr); struct net_bridge_port *p = to_brport(kobj); + if (!brport_attr->show) + return -EINVAL; + return brport_attr->show(p, buf); } -- cgit v1.1 From 27af86bb038d9c8b8066cd17854ddaf2ea92bce1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 12 Feb 2018 18:29:06 +0800 Subject: sctp: do not pr_err for the duplicated node in transport rhlist The pr_err in sctp_hash_transport was supposed to report a sctp bug for using rhashtable/rhlist. The err '-EEXIST' introduced in Commit cd2b70875058 ("sctp: check duplicate node before inserting a new transport") doesn't belong to that case. So just return -EEXIST back without pr_err any kmsg. Fixes: cd2b70875058 ("sctp: check duplicate node before inserting a new transport") Reported-by: Wei Chen Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/input.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 141c9c4..0247cc4 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -897,15 +897,12 @@ int sctp_hash_transport(struct sctp_transport *t) rhl_for_each_entry_rcu(transport, tmp, list, node) if (transport->asoc->ep == t->asoc->ep) { rcu_read_unlock(); - err = -EEXIST; - goto out; + return -EEXIST; } rcu_read_unlock(); err = rhltable_insert_key(&sctp_transport_hashtable, &arg, &t->node, sctp_hash_params); - -out: if (err) pr_err_once("insert transport fail, errno %d\n", err); -- cgit v1.1 From 947820b9595aa99f73de033ddcfe4c729c903c75 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 12 Feb 2018 18:29:51 +0800 Subject: sctp: add SCTP_CID_I_DATA and SCTP_CID_I_FWD_TSN conversion in sctp_cname After the support for SCTP_CID_I_DATA and SCTP_CID_I_FWD_TSN chunks, the corresp conversion in sctp_cname should also be added. Otherwise, in some places, pr_debug will print them as "unknown chunk". Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/debug.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sctp/debug.c b/net/sctp/debug.c index 291c97b..8f6c2e8 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -81,6 +81,12 @@ const char *sctp_cname(const union sctp_subtype cid) case SCTP_CID_RECONF: return "RECONF"; + case SCTP_CID_I_DATA: + return "I_DATA"; + + case SCTP_CID_I_FWD_TSN: + return "I_FWD_TSN"; + default: break; } -- cgit v1.1 From fb23403536eabe81ee90d32cb3051030b871d988 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 12 Feb 2018 18:31:24 +0800 Subject: sctp: remove the useless check in sctp_renege_events Remove the 'if (chunk)' check in sctp_renege_events for idata process, as all renege commands are generated in sctp_eat_data and it can't be NULL. The same thing we already did for common data in sctp_ulpq_renege. Fixes: 94014e8d871a ("sctp: implement renege_events for sctp_stream_interleave") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/stream_interleave.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c index 8c7cf8f..86c26ec 100644 --- a/net/sctp/stream_interleave.c +++ b/net/sctp/stream_interleave.c @@ -954,12 +954,8 @@ static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, __u32 freed = 0; __u16 needed; - if (chunk) { - needed = ntohs(chunk->chunk_hdr->length); - needed -= sizeof(struct sctp_idata_chunk); - } else { - needed = SCTP_DEFAULT_MAXWINDOW; - } + needed = ntohs(chunk->chunk_hdr->length) - + sizeof(struct sctp_idata_chunk); if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) { freed = sctp_ulpq_renege_list(ulpq, &ulpq->lobby, needed); -- cgit v1.1 From 808cf9e38cd7923036a99f459ccc8cf2955e47af Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Mon, 12 Feb 2018 12:57:04 +0200 Subject: tcp: Honor the eor bit in tcp_mtu_probe Avoid SKB coalescing if eor bit is set in one of the relevant SKBs. Fixes: c134ecb87817 ("tcp: Make use of MSG_EOR in tcp_sendmsg") Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e9f985e..b2bca37 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2027,6 +2027,24 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk) } } +static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len) +{ + struct sk_buff *skb, *next; + + skb = tcp_send_head(sk); + tcp_for_write_queue_from_safe(skb, next, sk) { + if (len <= skb->len) + break; + + if (unlikely(TCP_SKB_CB(skb)->eor)) + return false; + + len -= skb->len; + } + + return true; +} + /* Create a new MTU probe if we are ready. * MTU probe is regularly attempting to increase the path MTU by * deliberately sending larger packets. This discovers routing @@ -2099,6 +2117,9 @@ static int tcp_mtu_probe(struct sock *sk) return 0; } + if (!tcp_can_coalesce_send_queue_head(sk, probe_size)) + return -1; + /* We're allowed to probe. Build it now. */ nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false); if (!nskb) @@ -2134,6 +2155,10 @@ static int tcp_mtu_probe(struct sock *sk) /* We've eaten all the data from this skb. * Throw it away. */ TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; + /* If this is the last SKB we copy and eor is set + * we need to propagate it to the new skb. + */ + TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor; tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); } else { -- cgit v1.1 From 18a5b052bb1ae77453c5e50fffe3470ced9ed82f Mon Sep 17 00:00:00 2001 From: Ingo van Lil Date: Mon, 12 Feb 2018 12:02:52 +0100 Subject: net: phy: fix wrong mask to phy_modify() When forcing a specific link mode, the PHY driver must clear the existing speed and duplex bits in BMCR while preserving some other control bits. This logic was accidentally inverted with the introduction of phy_modify(). Fixes: fea23fb591cc ("net: phy: convert read-modify-write to phy_modify()") Signed-off-by: Ingo van Lil Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index b13eed2..d39ae77 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1382,7 +1382,7 @@ int genphy_setup_forced(struct phy_device *phydev) ctl |= BMCR_FULLDPLX; return phy_modify(phydev, MII_BMCR, - BMCR_LOOPBACK | BMCR_ISOLATE | BMCR_PDOWN, ctl); + ~(BMCR_LOOPBACK | BMCR_ISOLATE | BMCR_PDOWN), ctl); } EXPORT_SYMBOL(genphy_setup_forced); -- cgit v1.1 From dd62c236c0fe1166d037485494ec5ff6545480eb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 12 Feb 2018 14:40:00 +0100 Subject: ravb: Remove obsolete explicit clock handling for WoL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, if Wake-on-LAN is enabled, the EtherAVB device's module clock is manually kept running during system suspend, to make sure the device stays active. Since commit 91c719f5ec6671f7 ("soc: renesas: rcar-sysc: Keep wakeup sources active during system suspend") , this workaround is no longer needed. Hence remove all explicit clock handling to keep the device active. Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index c87f57c..a95fbd5 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2255,9 +2255,6 @@ static int ravb_wol_setup(struct net_device *ndev) /* Enable MagicPacket */ ravb_modify(ndev, ECMR, ECMR_MPDE, ECMR_MPDE); - /* Increased clock usage so device won't be suspended */ - clk_enable(priv->clk); - return enable_irq_wake(priv->emac_irq); } @@ -2276,9 +2273,6 @@ static int ravb_wol_restore(struct net_device *ndev) if (ret < 0) return ret; - /* Restore clock usage count */ - clk_disable(priv->clk); - return disable_irq_wake(priv->emac_irq); } -- cgit v1.1 From b4580c952e89a332f077038ef19a7582950c082d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 12 Feb 2018 14:42:36 +0100 Subject: sh_eth: Remove obsolete explicit clock handling for WoL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, if Wake-on-LAN is enabled, the SH-ETH device's module clock is manually kept running during system suspend, to make sure the device stays active. Since commits 91c719f5ec6671f7 ("soc: renesas: rcar-sysc: Keep wakeup sources active during system suspend") and 744dddcae84441b1 ("clk: renesas: mstp: Keep wakeup sources active during system suspend"), this workaround is no longer needed. Hence remove all explicit clock handling to keep the device active. Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index a197e11..92dcf87 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include @@ -2304,7 +2303,7 @@ static void sh_eth_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) wol->supported = 0; wol->wolopts = 0; - if (mdp->cd->magic && mdp->clk) { + if (mdp->cd->magic) { wol->supported = WAKE_MAGIC; wol->wolopts = mdp->wol_enabled ? WAKE_MAGIC : 0; } @@ -2314,7 +2313,7 @@ static int sh_eth_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) { struct sh_eth_private *mdp = netdev_priv(ndev); - if (!mdp->cd->magic || !mdp->clk || wol->wolopts & ~WAKE_MAGIC) + if (!mdp->cd->magic || wol->wolopts & ~WAKE_MAGIC) return -EOPNOTSUPP; mdp->wol_enabled = !!(wol->wolopts & WAKE_MAGIC); @@ -3153,11 +3152,6 @@ static int sh_eth_drv_probe(struct platform_device *pdev) goto out_release; } - /* Get clock, if not found that's OK but Wake-On-Lan is unavailable */ - mdp->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(mdp->clk)) - mdp->clk = NULL; - ndev->base_addr = res->start; spin_lock_init(&mdp->lock); @@ -3278,7 +3272,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev) if (ret) goto out_napi_del; - if (mdp->cd->magic && mdp->clk) + if (mdp->cd->magic) device_set_wakeup_capable(&pdev->dev, 1); /* print device information */ @@ -3331,9 +3325,6 @@ static int sh_eth_wol_setup(struct net_device *ndev) /* Enable MagicPacket */ sh_eth_modify(ndev, ECMR, ECMR_MPDE, ECMR_MPDE); - /* Increased clock usage so device won't be suspended */ - clk_enable(mdp->clk); - return enable_irq_wake(ndev->irq); } @@ -3359,9 +3350,6 @@ static int sh_eth_wol_restore(struct net_device *ndev) if (ret < 0) return ret; - /* Restore clock usage count */ - clk_disable(mdp->clk); - return disable_irq_wake(ndev->irq); } -- cgit v1.1 From 8e021a14d908475fea89ef85b5421865f7ad650d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 12 Feb 2018 17:10:19 +0300 Subject: net: thunderbolt: Tear down connection properly on suspend When suspending to mem or disk the Thunderbolt controller typically goes down as well tearing down the connection automatically. However, when suspend to idle is used this does not happen so we need to make sure the connection is properly disconnected before it can be re-established during resume. Fixes: e69b6c02b4c3 ("net: Add support for networking over Thunderbolt cable") Signed-off-by: Mika Westerberg Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/thunderbolt.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c index ca5e375..71cf9ab 100644 --- a/drivers/net/thunderbolt.c +++ b/drivers/net/thunderbolt.c @@ -1270,10 +1270,7 @@ static int __maybe_unused tbnet_suspend(struct device *dev) stop_login(net); if (netif_running(net->dev)) { netif_device_detach(net->dev); - tb_ring_stop(net->rx_ring.ring); - tb_ring_stop(net->tx_ring.ring); - tbnet_free_buffers(&net->rx_ring); - tbnet_free_buffers(&net->tx_ring); + tbnet_tear_down(net, true); } return 0; -- cgit v1.1 From 027d351c541744c0c780dd5801c63e4b90750b90 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 12 Feb 2018 17:10:20 +0300 Subject: net: thunderbolt: Run disconnect flow asynchronously when logout is received The control channel calls registered callbacks when control messages such as XDomain protocol messages are received. The control channel handling is done in a worker running on system workqueue which means the networking driver can't run tear down flow which includes sending disconnect request and waiting for a reply in the same worker. Otherwise reply is never received (as the work is already running) and the operation times out. To fix this run disconnect ThunderboltIP flow asynchronously once ThunderboltIP logout message is received. Fixes: e69b6c02b4c3 ("net: Add support for networking over Thunderbolt cable") Signed-off-by: Mika Westerberg Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/thunderbolt.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c index 71cf9ab..e0d6760 100644 --- a/drivers/net/thunderbolt.c +++ b/drivers/net/thunderbolt.c @@ -166,6 +166,8 @@ struct tbnet_ring { * @connected_work: Worker that finalizes the ThunderboltIP connection * setup and enables DMA paths for high speed data * transfers + * @disconnect_work: Worker that handles tearing down the ThunderboltIP + * connection * @rx_hdr: Copy of the currently processed Rx frame. Used when a * network packet consists of multiple Thunderbolt frames. * In host byte order. @@ -190,6 +192,7 @@ struct tbnet { int login_retries; struct delayed_work login_work; struct work_struct connected_work; + struct work_struct disconnect_work; struct thunderbolt_ip_frame_header rx_hdr; struct tbnet_ring rx_ring; atomic_t frame_id; @@ -445,7 +448,7 @@ static int tbnet_handle_packet(const void *buf, size_t size, void *data) case TBIP_LOGOUT: ret = tbnet_logout_response(net, route, sequence, command_id); if (!ret) - tbnet_tear_down(net, false); + queue_work(system_long_wq, &net->disconnect_work); break; default: @@ -659,6 +662,13 @@ static void tbnet_login_work(struct work_struct *work) } } +static void tbnet_disconnect_work(struct work_struct *work) +{ + struct tbnet *net = container_of(work, typeof(*net), disconnect_work); + + tbnet_tear_down(net, false); +} + static bool tbnet_check_frame(struct tbnet *net, const struct tbnet_frame *tf, const struct thunderbolt_ip_frame_header *hdr) { @@ -881,6 +891,7 @@ static int tbnet_stop(struct net_device *dev) napi_disable(&net->napi); + cancel_work_sync(&net->disconnect_work); tbnet_tear_down(net, true); tb_ring_free(net->rx_ring.ring); @@ -1195,6 +1206,7 @@ static int tbnet_probe(struct tb_service *svc, const struct tb_service_id *id) net = netdev_priv(dev); INIT_DELAYED_WORK(&net->login_work, tbnet_login_work); INIT_WORK(&net->connected_work, tbnet_connected_work); + INIT_WORK(&net->disconnect_work, tbnet_disconnect_work); mutex_init(&net->connection_lock); atomic_set(&net->command_id, 0); atomic_set(&net->frame_id, 0); -- cgit v1.1 From 07a2e1cf398187814b405665b19d36425ec7a962 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 12 Feb 2018 18:20:11 +0100 Subject: net: cavium: fix NULL pointer dereference in cavium_ptp_put Prevent a kernel panic on reboot if ptp_clock is NULL by checking the ptp pointer before using it. Signed-off-by: Jan Glauber Fixes: 8c56df372bc1 ("net: add support for Cavium PTP coprocessor") Cc: Radoslaw Biernacki Cc: Aleksey Makarov Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/common/cavium_ptp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/cavium/common/cavium_ptp.c b/drivers/net/ethernet/cavium/common/cavium_ptp.c index c87c9c6..d59497a 100644 --- a/drivers/net/ethernet/cavium/common/cavium_ptp.c +++ b/drivers/net/ethernet/cavium/common/cavium_ptp.c @@ -75,6 +75,8 @@ EXPORT_SYMBOL(cavium_ptp_get); void cavium_ptp_put(struct cavium_ptp *ptp) { + if (!ptp) + return; pci_dev_put(ptp->pdev); } EXPORT_SYMBOL(cavium_ptp_put); -- cgit v1.1 From 5b4e64beb6ab40f5d8b44500fe0fc201c25a0f16 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 12 Feb 2018 20:46:28 +0100 Subject: extcon: axp288: Constify the axp288_pwr_up_down_info array Make the axp288_pwr_up_down_info array const char * const, this leads to the following section size changes: .text 0x674 -> 0x664 .data 0x148 -> 0x0f0 .rodata 0x0b4 -> 0x114 Signed-off-by: Hans de Goede Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-axp288.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c index 0a44d43..c8f7b64 100644 --- a/drivers/extcon/extcon-axp288.c +++ b/drivers/extcon/extcon-axp288.c @@ -106,7 +106,7 @@ struct axp288_extcon_info { }; /* Power up/down reason string array */ -static char *axp288_pwr_up_down_info[] = { +static const char * const axp288_pwr_up_down_info[] = { "Last wake caused by user pressing the power button", "Last wake caused by a charger insertion", "Last wake caused by a battery insertion", @@ -124,7 +124,7 @@ static char *axp288_pwr_up_down_info[] = { */ static void axp288_extcon_log_rsi(struct axp288_extcon_info *info) { - char **rsi; + const char * const *rsi; unsigned int val, i, clear_mask = 0; int ret; -- cgit v1.1 From d82e233cee26ceacb9feb937a21bfb61b1826860 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 12 Feb 2018 20:46:29 +0100 Subject: Revert "extcon: axp288: Redo charger type detection a couple of seconds after probe()" Redoing the charger type detection to give the usb-role-switch code time to properly set the role-switch is no good for mainline, since the usb-role-switch code is not yet in mainline (my bad, sorry). Also once we've that code there are better ways to fix this which are not prone to racing as doing a retry after 2 seconds is. This reverts commit 50082c17bb1455acacd376ae30dff92f2e1addbd. Signed-off-by: Hans de Goede Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-axp288.c | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c index c8f7b64..3ec4c71 100644 --- a/drivers/extcon/extcon-axp288.c +++ b/drivers/extcon/extcon-axp288.c @@ -1,7 +1,6 @@ /* * extcon-axp288.c - X-Power AXP288 PMIC extcon cable detection driver * - * Copyright (C) 2016-2017 Hans de Goede * Copyright (C) 2015 Intel Corporation * Author: Ramakrishna Pallala * @@ -98,11 +97,9 @@ struct axp288_extcon_info { struct device *dev; struct regmap *regmap; struct regmap_irq_chip_data *regmap_irqc; - struct delayed_work det_work; int irq[EXTCON_IRQ_END]; struct extcon_dev *edev; unsigned int previous_cable; - bool first_detect_done; }; /* Power up/down reason string array */ @@ -140,25 +137,6 @@ static void axp288_extcon_log_rsi(struct axp288_extcon_info *info) regmap_write(info->regmap, AXP288_PS_BOOT_REASON_REG, clear_mask); } -static void axp288_chrg_detect_complete(struct axp288_extcon_info *info) -{ - /* - * We depend on other drivers to do things like mux the data lines, - * enable/disable vbus based on the id-pin, etc. Sometimes the BIOS has - * not set these things up correctly resulting in the initial charger - * cable type detection giving a wrong result and we end up not charging - * or charging at only 0.5A. - * - * So we schedule a second cable type detection after 2 seconds to - * give the other drivers time to load and do their thing. - */ - if (!info->first_detect_done) { - queue_delayed_work(system_wq, &info->det_work, - msecs_to_jiffies(2000)); - info->first_detect_done = true; - } -} - static int axp288_handle_chrg_det_event(struct axp288_extcon_info *info) { int ret, stat, cfg, pwr_stat; @@ -223,8 +201,6 @@ no_vbus: info->previous_cable = cable; } - axp288_chrg_detect_complete(info); - return 0; dev_det_ret: @@ -246,11 +222,8 @@ static irqreturn_t axp288_extcon_isr(int irq, void *data) return IRQ_HANDLED; } -static void axp288_extcon_det_work(struct work_struct *work) +static void axp288_extcon_enable(struct axp288_extcon_info *info) { - struct axp288_extcon_info *info = - container_of(work, struct axp288_extcon_info, det_work.work); - regmap_update_bits(info->regmap, AXP288_BC_GLOBAL_REG, BC_GLOBAL_RUN, 0); /* Enable the charger detection logic */ @@ -272,7 +245,6 @@ static int axp288_extcon_probe(struct platform_device *pdev) info->regmap = axp20x->regmap; info->regmap_irqc = axp20x->regmap_irqc; info->previous_cable = EXTCON_NONE; - INIT_DELAYED_WORK(&info->det_work, axp288_extcon_det_work); platform_set_drvdata(pdev, info); @@ -318,7 +290,7 @@ static int axp288_extcon_probe(struct platform_device *pdev) } /* Start charger cable type detection */ - queue_delayed_work(system_wq, &info->det_work, 0); + axp288_extcon_enable(info); return 0; } -- cgit v1.1 From 2363ec931e88ee095e5ac2e87e1c3b3b741f6fdc Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 18 Dec 2017 11:27:13 +0100 Subject: ARM64: dts: meson-gxl: add internal ethernet PHY irq Add the interrupt of the internal ethernet PHY Signed-off-by: Jerome Brunet Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gxl.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index 4f355f1..c851411 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -631,6 +631,7 @@ internal_phy: ethernet-phy@8 { compatible = "ethernet-phy-id0181.4400", "ethernet-phy-ieee802.3-c22"; + interrupts = ; reg = <8>; max-speed = <100>; }; -- cgit v1.1 From 77f5cdbd78ec5e17022725a5da476f4ca08b1dfa Mon Sep 17 00:00:00 2001 From: Yixun Lan Date: Thu, 11 Jan 2018 10:33:57 +0800 Subject: ARM64: dts: meson: uart: fix address space range The address space range is actually 0x18, fixed here. Reviewed-by: Jerome Brunet Signed-off-by: Yixun Lan Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-axg.dtsi | 4 ++-- arch/arm64/boot/dts/amlogic/meson-gx.dtsi | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi index a806326..70c776e 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi @@ -165,14 +165,14 @@ uart_A: serial@24000 { compatible = "amlogic,meson-gx-uart", "amlogic,meson-uart"; - reg = <0x0 0x24000 0x0 0x14>; + reg = <0x0 0x24000 0x0 0x18>; interrupts = ; status = "disabled"; }; uart_B: serial@23000 { compatible = "amlogic,meson-gx-uart", "amlogic,meson-uart"; - reg = <0x0 0x23000 0x0 0x14>; + reg = <0x0 0x23000 0x0 0x18>; interrupts = ; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 6cb3c2a..4ee2e79 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -235,14 +235,14 @@ uart_A: serial@84c0 { compatible = "amlogic,meson-gx-uart"; - reg = <0x0 0x84c0 0x0 0x14>; + reg = <0x0 0x84c0 0x0 0x18>; interrupts = ; status = "disabled"; }; uart_B: serial@84dc { compatible = "amlogic,meson-gx-uart"; - reg = <0x0 0x84dc 0x0 0x14>; + reg = <0x0 0x84dc 0x0 0x18>; interrupts = ; status = "disabled"; }; @@ -287,7 +287,7 @@ uart_C: serial@8700 { compatible = "amlogic,meson-gx-uart"; - reg = <0x0 0x8700 0x0 0x14>; + reg = <0x0 0x8700 0x0 0x18>; interrupts = ; status = "disabled"; }; @@ -404,14 +404,14 @@ uart_AO: serial@4c0 { compatible = "amlogic,meson-gx-uart", "amlogic,meson-ao-uart"; - reg = <0x0 0x004c0 0x0 0x14>; + reg = <0x0 0x004c0 0x0 0x18>; interrupts = ; status = "disabled"; }; uart_AO_B: serial@4e0 { compatible = "amlogic,meson-gx-uart", "amlogic,meson-ao-uart"; - reg = <0x0 0x004e0 0x0 0x14>; + reg = <0x0 0x004e0 0x0 0x18>; interrupts = ; status = "disabled"; }; -- cgit v1.1 From c662f77331c98018ed256501557b4dd67133fbd7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 13 Feb 2018 15:16:01 +1100 Subject: KVM: PPC: Fix compile error that occurs when CONFIG_ALTIVEC=n Commit accb757d798c ("KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_run", 2017-12-04) added a "goto out" statement and an "out:" label to kvm_arch_vcpu_ioctl_run(). Since the only "goto out" is inside a CONFIG_VSX block, compiling with CONFIG_VSX=n gives a warning that label "out" is defined but not used, and because arch/powerpc is compiled with -Werror, that becomes a compile error that makes the kernel build fail. Merge commit 1ab03c072feb ("Merge tag 'kvm-ppc-next-4.16-2' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc", 2018-02-09) added a similar block of code inside a #ifdef CONFIG_ALTIVEC, with a "goto out" statement. In order to make the build succeed, this adds a #ifdef around the "out:" label. This is a minimal, ugly fix, to be replaced later by a refactoring of the code. Since CONFIG_VSX depends on CONFIG_ALTIVEC, it is sufficient to use #ifdef CONFIG_ALTIVEC here. Fixes: accb757d798c ("KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_run") Reported-by: Christian Zigotzky Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/powerpc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 403e642..0083142 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1608,7 +1608,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_sigset_deactivate(vcpu); +#ifdef CONFIG_ALTIVEC out: +#endif vcpu_put(vcpu); return r; } -- cgit v1.1 From 6df3877fc962c2bb3d0438633dfd24a185af6838 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 13 Feb 2018 15:45:21 +1100 Subject: KVM: PPC: Book3S: Fix compile error that occurs with some gcc versions Some versions of gcc generate a warning that the variable "emulated" may be used uninitialized in function kvmppc_handle_load128_by2x64(). It would be used uninitialized if kvmppc_handle_load128_by2x64 was ever called with vcpu->arch.mmio_vmx_copy_nums == 0, but neither of the callers ever do that, so there is no actual bug. When gcc generates a warning, it causes the build to fail because arch/powerpc is compiled with -Werror. This silences the warning by initializing "emulated" to EMULATE_DONE. Fixes: 09f984961c13 ("KVM: PPC: Book3S: Add MMIO emulation for VMX instructions") Reported-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/powerpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0083142..52c2053 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1345,7 +1345,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu, int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int rt, int is_default_endian) { - enum emulation_result emulated; + enum emulation_result emulated = EMULATE_DONE; while (vcpu->arch.mmio_vmx_copy_nums) { emulated = __kvmppc_handle_load(run, vcpu, rt, 8, -- cgit v1.1 From bda44ca2954e8e812aec71161ee191ab708ce568 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Fri, 2 Feb 2018 14:45:21 +0100 Subject: MAINTAINERS: update email address for Gregory CLEMENT Free Electrons is now Bootlin, change my email address accordingly. Actually the free-electrons.com emails are still valid but as I don't know for how many time, it's better to do the change now. Signed-off-by: Gregory CLEMENT --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3bdc260..5f33e6c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1590,7 +1590,7 @@ ARM/Marvell Dove/MV78xx0/Orion SOC support M: Jason Cooper M: Andrew Lunn M: Sebastian Hesselbarth -M: Gregory Clement +M: Gregory Clement L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/soc/dove/ @@ -1604,7 +1604,7 @@ F: arch/arm/boot/dts/orion5x* ARM/Marvell Kirkwood and Armada 370, 375, 38x, 39x, XP, 3700, 7K/8K SOC support M: Jason Cooper M: Andrew Lunn -M: Gregory Clement +M: Gregory Clement M: Sebastian Hesselbarth L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -- cgit v1.1 From 8aa36a8dcde3183d84db7b0d622ffddcebb61077 Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Mon, 5 Feb 2018 02:21:13 +0100 Subject: ARM: mvebu: Fix broken PL310_ERRATA_753970 selects The MACH_ARMADA_375 and MACH_ARMADA_38X boards select ARM_ERRATA_753970, but it was renamed to PL310_ERRATA_753970 by commit fa0ce4035d48 ("ARM: 7162/1: errata: tidy up Kconfig options for PL310 errata workarounds"). Fix the selects to use the new name. Discovered with the https://github.com/ulfalizer/Kconfiglib/blob/master/examples/list_undefined.py script. Fixes: fa0ce4035d48 ("ARM: 7162/1: errata: tidy up Kconfig options for PL310 errata workarounds" cc: stable@vger.kernel.org Signed-off-by: Ulf Magnusson Signed-off-by: Gregory CLEMENT --- arch/arm/mach-mvebu/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index 6b32dc5..2c20599 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig @@ -41,7 +41,7 @@ config MACH_ARMADA_375 depends on ARCH_MULTI_V7 select ARMADA_370_XP_IRQ select ARM_ERRATA_720789 - select ARM_ERRATA_753970 + select PL310_ERRATA_753970 select ARM_GIC select ARMADA_375_CLK select HAVE_ARM_SCU @@ -57,7 +57,7 @@ config MACH_ARMADA_38X bool "Marvell Armada 380/385 boards" depends on ARCH_MULTI_V7 select ARM_ERRATA_720789 - select ARM_ERRATA_753970 + select PL310_ERRATA_753970 select ARM_GIC select ARM_GLOBAL_TIMER select CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK -- cgit v1.1 From da360299b6734135a5f66d7db458dcc7801c826a Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Mon, 12 Feb 2018 23:59:51 +0100 Subject: uapi/if_ether.h: move __UAPI_DEF_ETHHDR libc define This fixes a compile problem of some user space applications by not including linux/libc-compat.h in uapi/if_ether.h. linux/libc-compat.h checks which "features" the header files, included from the libc, provide to make the Linux kernel uapi header files only provide no conflicting structures and enums. If a user application mixes kernel headers and libc headers it could happen that linux/libc-compat.h gets included too early where not all other libc headers are included yet. Then the linux/libc-compat.h would not prevent all the redefinitions and we run into compile problems. This patch removes the include of linux/libc-compat.h from uapi/if_ether.h to fix the recently introduced case, but not all as this is more or less impossible. It is no problem to do the check directly in the if_ether.h file and not in libc-compat.h as this does not need any fancy glibc header detection as glibc never provided struct ethhdr and should define __UAPI_DEF_ETHHDR by them self when they will provide this. The following test program did not compile correctly any more: #include #include #include int main(void) { return 0; } Fixes: 6926e041a892 ("uapi/if_ether.h: prevent redefinition of struct ethhdr") Reported-by: Guillaume Nault Cc: # 4.15 Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 6 +++++- include/uapi/linux/libc-compat.h | 6 ------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index f8cb576..8bbbcb5 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -23,7 +23,6 @@ #define _UAPI_LINUX_IF_ETHER_H #include -#include /* * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble @@ -151,6 +150,11 @@ * This is an Ethernet frame header. */ +/* allow libcs like musl to deactivate this, glibc does not implement this. */ +#ifndef __UAPI_DEF_ETHHDR +#define __UAPI_DEF_ETHHDR 1 +#endif + #if __UAPI_DEF_ETHHDR struct ethhdr { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index fc29efaa..8254c93 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -264,10 +264,4 @@ #endif /* __GLIBC__ */ -/* Definitions for if_ether.h */ -/* allow libcs like musl to deactivate this, glibc does not implement this. */ -#ifndef __UAPI_DEF_ETHHDR -#define __UAPI_DEF_ETHHDR 1 -#endif - #endif /* _UAPI_LIBC_COMPAT_H */ -- cgit v1.1 From d4e9a408ef5de35dd82c1337b9fe48348b70047c Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 13 Feb 2018 11:11:30 +0100 Subject: net: af_unix: fix typo in UNIX_SKB_FRAGS_SZ comment Change "minimun" to "minimum". Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d545e1d..2d465bd 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1825,7 +1825,7 @@ out: } /* We use paged skbs for stream sockets, and limit occupancy to 32768 - * bytes, and a minimun of a full page. + * bytes, and a minimum of a full page. */ #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) -- cgit v1.1 From 0f2d2b2736b08dafa3bde31d048750fbc8df3a31 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 13 Feb 2018 11:22:42 +0100 Subject: mlxsw: spectrum_router: Fix error path in mlxsw_sp_vr_create Since mlxsw_sp_fib_create() and mlxsw_sp_mr_table_create() use ERR_PTR macro to propagate int err through return of a pointer, the return value is not NULL in case of failure. So if one of the calls fails, one of vr->fib4, vr->fib6 or vr->mr4_table is not NULL and mlxsw_sp_vr_is_used wrongly assumes that vr is in use which leads to crash like following one: [ 1293.949291] BUG: unable to handle kernel NULL pointer dereference at 00000000000006c9 [ 1293.952729] IP: mlxsw_sp_mr_table_flush+0x15/0x70 [mlxsw_spectrum] Fix this by using local variables to hold the pointers and set vr->* only in case everything went fine. Fixes: 76610ebbde18 ("mlxsw: spectrum_router: Refactor virtual router handling") Fixes: a3d9bc506d64 ("mlxsw: spectrum_router: Extend virtual routers with IPv6 support") Fixes: d42b0965b1d4 ("mlxsw: spectrum_router: Add multicast routes notification handling functionality") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 32 ++++++++++++---------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index f0b25ba..dcc6305 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -788,6 +788,9 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, u32 tb_id, struct netlink_ext_ack *extack) { + struct mlxsw_sp_mr_table *mr4_table; + struct mlxsw_sp_fib *fib4; + struct mlxsw_sp_fib *fib6; struct mlxsw_sp_vr *vr; int err; @@ -796,29 +799,30 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers"); return ERR_PTR(-EBUSY); } - vr->fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); - if (IS_ERR(vr->fib4)) - return ERR_CAST(vr->fib4); - vr->fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6); - if (IS_ERR(vr->fib6)) { - err = PTR_ERR(vr->fib6); + fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); + if (IS_ERR(fib4)) + return ERR_CAST(fib4); + fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(fib6)) { + err = PTR_ERR(fib6); goto err_fib6_create; } - vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id, - MLXSW_SP_L3_PROTO_IPV4); - if (IS_ERR(vr->mr4_table)) { - err = PTR_ERR(vr->mr4_table); + mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id, + MLXSW_SP_L3_PROTO_IPV4); + if (IS_ERR(mr4_table)) { + err = PTR_ERR(mr4_table); goto err_mr_table_create; } + vr->fib4 = fib4; + vr->fib6 = fib6; + vr->mr4_table = mr4_table; vr->tb_id = tb_id; return vr; err_mr_table_create: - mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6); - vr->fib6 = NULL; + mlxsw_sp_fib_destroy(mlxsw_sp, fib6); err_fib6_create: - mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4); - vr->fib4 = NULL; + mlxsw_sp_fib_destroy(mlxsw_sp, fib4); return ERR_PTR(err); } -- cgit v1.1 From bb047ddd145860ff24820320a21f03cf8c071b22 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 13 Feb 2018 12:00:16 +0100 Subject: net: sched: don't set q pointer for shared blocks It is pointless to set block->q for block which are shared among multiple qdiscs. So remove the assignment in that case. Do a bit of code reshuffle to make block->index initialized at that point so we can use tcf_block_shared() helper. Reported-by: Cong Wang Fixes: 4861738775d7 ("net: sched: introduce shared filter blocks infrastructure") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2bc1bc2..a7dc727 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -376,17 +376,12 @@ struct tcf_net { static unsigned int tcf_net_id; static int tcf_block_insert(struct tcf_block *block, struct net *net, - u32 block_index, struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack) { struct tcf_net *tn = net_generic(net, tcf_net_id); - int err; - err = idr_alloc_u32(&tn->idr, block, &block_index, block_index, - GFP_KERNEL); - if (err) - return err; - block->index = block_index; - return 0; + return idr_alloc_u32(&tn->idr, block, &block->index, block->index, + GFP_KERNEL); } static void tcf_block_remove(struct tcf_block *block, struct net *net) @@ -397,6 +392,7 @@ static void tcf_block_remove(struct tcf_block *block, struct net *net) } static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, + u32 block_index, struct netlink_ext_ack *extack) { struct tcf_block *block; @@ -419,10 +415,13 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, err = -ENOMEM; goto err_chain_create; } - block->net = qdisc_net(q); block->refcnt = 1; block->net = net; - block->q = q; + block->index = block_index; + + /* Don't store q pointer for blocks which are shared */ + if (!tcf_block_shared(block)) + block->q = q; return block; err_chain_create: @@ -518,13 +517,12 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, } if (!block) { - block = tcf_block_create(net, q, extack); + block = tcf_block_create(net, q, ei->block_index, extack); if (IS_ERR(block)) return PTR_ERR(block); created = true; - if (ei->block_index) { - err = tcf_block_insert(block, net, - ei->block_index, extack); + if (tcf_block_shared(block)) { + err = tcf_block_insert(block, net, extack); if (err) goto err_block_insert; } -- cgit v1.1 From 339c21d7c459238135d87da8fefbfd25d98bc375 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 13 Feb 2018 12:00:17 +0100 Subject: net: sched: fix tc_u_common lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The offending commit wrongly assumes 1:1 mapping between block and q. However, there are multiple blocks for a single q for classful qdiscs. Since the obscure tc_u_common sharing mechanism expects it to be shared among a qdisc, fix it by storing q pointer in case the block is not shared. Reported-by: Paweł Staszewski Reported-by: Cong Wang Fixes: 7fa9d974f3c2 ("net: sched: cls_u32: use block instead of q in tc_u_common") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 6c7601a..ed8b6a2 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -96,7 +96,7 @@ struct tc_u_hnode { struct tc_u_common { struct tc_u_hnode __rcu *hlist; - struct tcf_block *block; + void *ptr; int refcnt; struct idr handle_idr; struct hlist_node hnode; @@ -330,9 +330,25 @@ static struct hlist_head *tc_u_common_hash; #define U32_HASH_SHIFT 10 #define U32_HASH_SIZE (1 << U32_HASH_SHIFT) +static void *tc_u_common_ptr(const struct tcf_proto *tp) +{ + struct tcf_block *block = tp->chain->block; + + /* The block sharing is currently supported only + * for classless qdiscs. In that case we use block + * for tc_u_common identification. In case the + * block is not shared, block->q is a valid pointer + * and we can use that. That works for classful qdiscs. + */ + if (tcf_block_shared(block)) + return block; + else + return block->q; +} + static unsigned int tc_u_hash(const struct tcf_proto *tp) { - return hash_ptr(tp->chain->block, U32_HASH_SHIFT); + return hash_ptr(tc_u_common_ptr(tp), U32_HASH_SHIFT); } static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp) @@ -342,7 +358,7 @@ static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp) h = tc_u_hash(tp); hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) { - if (tc->block == tp->chain->block) + if (tc->ptr == tc_u_common_ptr(tp)) return tc; } return NULL; @@ -371,7 +387,7 @@ static int u32_init(struct tcf_proto *tp) kfree(root_ht); return -ENOBUFS; } - tp_c->block = tp->chain->block; + tp_c->ptr = tc_u_common_ptr(tp); INIT_HLIST_NODE(&tp_c->hnode); idr_init(&tp_c->handle_idr); -- cgit v1.1 From b2c93e300a11b419b4c67ce08e16fa1436d8118c Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 6 Feb 2018 16:23:39 -0600 Subject: selftests: sync: missing CFLAGS while compiling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on patch: https://patchwork.kernel.org/patch/10042045/ arch64-linux-gnu-gcc -c sync.c -o sync/sync.o sync.c:42:29: fatal error: linux/sync_file.h: No such file or directory #include ^ CFLAGS is not used during the compile step, so the system instead of kernel headers are used. Fix this by adding CFLAGS to the OBJS compile rule. Reported-by: Lei Yang Signed-off-by: Anders Roxell Signed-off-by: Daniel Díaz Signed-off-by: Shuah Khan --- tools/testing/selftests/sync/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile index b3c8ba3..d0121a8 100644 --- a/tools/testing/selftests/sync/Makefile +++ b/tools/testing/selftests/sync/Makefile @@ -30,7 +30,7 @@ $(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS) $(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS) $(OBJS): $(OUTPUT)/%.o: %.c - $(CC) -c $^ -o $@ + $(CC) -c $^ -o $@ $(CFLAGS) $(TESTS): $(OUTPUT)/%.o: %.c $(CC) -c $^ -o $@ -- cgit v1.1 From 70b574e7d719bdf96d26528cb289f3e782e83979 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Feb 2018 11:59:50 +0100 Subject: selftest/vDSO: fix O= The vDSO selftests ignored the O= or KBUILD_OUTPUT= parameters. Fix it. Signed-off-by: Dominik Brodowski Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/Makefile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 3d5a62ff..f5d7a78 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +include ../lib.mk + ifndef CROSS_COMPILE CFLAGS := -std=gnu99 CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector @@ -6,16 +8,14 @@ ifeq ($(CONFIG_X86_32),y) LDLIBS += -lgcc_s endif -TEST_PROGS := vdso_test vdso_standalone_test_x86 +TEST_PROGS := $(OUTPUT)/vdso_test $(OUTPUT)/vdso_standalone_test_x86 all: $(TEST_PROGS) -vdso_test: parse_vdso.c vdso_test.c -vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c +$(OUTPUT)/vdso_test: parse_vdso.c vdso_test.c +$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \ vdso_standalone_test_x86.c parse_vdso.c \ - -o vdso_standalone_test_x86 + -o $@ -include ../lib.mk -clean: - rm -fr $(TEST_PROGS) +EXTRA_CLEAN := $(TEST_PROGS) endif -- cgit v1.1 From d4014d8cc6dfa964e3e66df525de2384e3583018 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Tue, 13 Feb 2018 09:46:16 -0800 Subject: rds: do not call ->conn_alloc with GFP_KERNEL Commit ebeeb1ad9b8a ("rds: tcp: use rds_destroy_pending() to synchronize netns/module teardown and rds connection/workq management") adds an rcu read critical section to __rd_conn_create. The memory allocations in that critcal section need to use GFP_ATOMIC to avoid sleeping. This patch was verified with syzkaller reproducer. Reported-by: syzbot+a0564419941aaae3fe3c@syzkaller.appspotmail.com Fixes: ebeeb1ad9b8a ("rds: tcp: use rds_destroy_pending() to synchronize netns/module teardown and rds connection/workq management") Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/connection.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index 94e190fe..2da3176 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -224,7 +224,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, if (rds_destroy_pending(conn)) ret = -ENETDOWN; else - ret = trans->conn_alloc(conn, gfp); + ret = trans->conn_alloc(conn, GFP_ATOMIC); if (ret) { rcu_read_unlock(); kfree(conn->c_path); -- cgit v1.1 From 64136fb76039defd193e9e885bb722919d220021 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20D=C3=ADaz?= Date: Tue, 6 Feb 2018 17:52:05 -0600 Subject: selftests/android: Fix line continuation in Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Makefile lacks a couple of line continuation backslashes in an `if' clause, which can make the subsequent rsync command go awry over the whole filesystem (`rsync -a / /`). /bin/sh: -c: line 5: syntax error: unexpected end of file make[1]: [all] Error 1 (ignored) TEST=$DIR"_test.sh"; \ if [ -e $DIR/$TEST ]; then /bin/sh: -c: line 2: syntax error: unexpected end of file make[1]: [all] Error 1 (ignored) rsync -a $DIR/$TEST $BUILD_TARGET/; [...a myriad of:] [ rsync: readlink_stat("...") failed: Permission denied (13)] [ skipping non-regular file "..."] [ rsync: opendir "..." failed: Permission denied (13)] [and many other errors...] fi make[1]: fi: Command not found make[1]: [all] Error 127 (ignored) done make[1]: done: Command not found make[1]: [all] Error 127 (ignored) Signed-off-by: Daniel Díaz Acked-by: Pintu Agarwal Signed-off-by: Shuah Khan --- tools/testing/selftests/android/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile index 1a74922..f6304d2 100644 --- a/tools/testing/selftests/android/Makefile +++ b/tools/testing/selftests/android/Makefile @@ -11,11 +11,11 @@ all: BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ - #SUBDIR test prog name should be in the form: SUBDIR_test.sh + #SUBDIR test prog name should be in the form: SUBDIR_test.sh \ TEST=$$DIR"_test.sh"; \ - if [ -e $$DIR/$$TEST ]; then - rsync -a $$DIR/$$TEST $$BUILD_TARGET/; - fi + if [ -e $$DIR/$$TEST ]; then \ + rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \ + fi \ done override define RUN_TESTS -- cgit v1.1 From 9a379e77033f02c4a071891afdf0f0a01eff8ccb Mon Sep 17 00:00:00 2001 From: Naresh Kamboju Date: Wed, 7 Feb 2018 14:47:20 +0530 Subject: selftests: pstore: Adding config fragment CONFIG_PSTORE_RAM=m pstore_tests and pstore_post_reboot_tests need CONFIG_PSTORE_RAM=m Signed-off-by: Naresh Kamboju Acked-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/pstore/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/pstore/config b/tools/testing/selftests/pstore/config index 6a8e5a9..d148f9f 100644 --- a/tools/testing/selftests/pstore/config +++ b/tools/testing/selftests/pstore/config @@ -2,3 +2,4 @@ CONFIG_MISC_FILESYSTEMS=y CONFIG_PSTORE=y CONFIG_PSTORE_PMSG=y CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=m -- cgit v1.1 From 9a606f8d55cfc932ec02172aaed4124fdc150047 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 6 Feb 2018 16:20:44 -0600 Subject: selftests: memfd: add config fragment for fuse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memfd test requires to insert the fuse module (CONFIG_FUSE_FS). Signed-off-by: Anders Roxell Signed-off-by: Daniel Díaz Signed-off-by: Shuah Khan --- tools/testing/selftests/memfd/config | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/memfd/config diff --git a/tools/testing/selftests/memfd/config b/tools/testing/selftests/memfd/config new file mode 100644 index 0000000..835c7f4 --- /dev/null +++ b/tools/testing/selftests/memfd/config @@ -0,0 +1 @@ +CONFIG_FUSE_FS=m -- cgit v1.1 From 0434352d3d2e950cf5e743f6062abd87de22f960 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 13 Feb 2018 20:25:50 +0100 Subject: extcon: int3496: process id-pin first so that we start with the right status Some other drivers may be waiting for our extcon to show-up, exiting their probe methods with -EPROBE_DEFER until we show up. These drivers will typically get the cable state directly after getting the extcon, this commit changes the int3496 code to wait for the initial processing of the id-pin to complete before exiting probe() with 0, which will cause devices waiting on the defered probe to get reprobed. This fixes a race where the initial work might still be running while other drivers were already calling extcon_get_state(). Fixes: 2f556bdb9f2e ("extcon: int3496: Add Intel INT3496 ACPI ... driver") Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-intel-int3496.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c index c8691b5..191e99f 100644 --- a/drivers/extcon/extcon-intel-int3496.c +++ b/drivers/extcon/extcon-intel-int3496.c @@ -153,8 +153,9 @@ static int int3496_probe(struct platform_device *pdev) return ret; } - /* queue initial processing of id-pin */ + /* process id-pin so that we start with the right status */ queue_delayed_work(system_wq, &data->work, 0); + flush_delayed_work(&data->work); platform_set_drvdata(pdev, data); -- cgit v1.1 From cabe92a55e3a12005a4ac4d3954c9a174b0efe2a Mon Sep 17 00:00:00 2001 From: "Michael Kelley (EOSG)" Date: Wed, 24 Jan 2018 22:49:57 +0000 Subject: scsi: storvsc: Increase cmd_per_lun for higher speed devices Increase cmd_per_lun to allow more I/Os in progress per device, particularly for NVMe's. The Hyper-V host side can handle the higher count with no issues. Signed-off-by: Michael Kelley Reviewed-by: K. Y. Srinivasan Acked-by: K. Y. Srinivasan Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index e07907d..8eadb30 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1656,7 +1656,7 @@ static struct scsi_host_template scsi_driver = { .eh_timed_out = storvsc_eh_timed_out, .slave_alloc = storvsc_device_alloc, .slave_configure = storvsc_device_configure, - .cmd_per_lun = 255, + .cmd_per_lun = 2048, .this_id = -1, .use_clustering = ENABLE_CLUSTERING, /* Make sure we dont get a sg segment crosses a page boundary */ -- cgit v1.1 From eaf75d1815dad230dac2f1e8f1dc0349b2d50071 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 1 Feb 2018 10:33:17 -0800 Subject: scsi: qla2xxx: Fix double free bug after firmware timeout This patch is based on Max's original patch. When the qla2xxx firmware is unavailable, eventually qla2x00_sp_timeout() is reached, which calls the timeout function and frees the srb_t instance. The timeout function always resolves to qla2x00_async_iocb_timeout(), which invokes another callback function called "done". All of these qla2x00_*_sp_done() callbacks also free the srb_t instance; after returning to qla2x00_sp_timeout(), it is freed again. The fix is to remove the "sp->free(sp)" call from qla2x00_sp_timeout() and add it to those code paths in qla2x00_async_iocb_timeout() which do not already free the object. This is how it looks like with KASAN: BUG: KASAN: use-after-free in qla2x00_sp_timeout+0x228/0x250 Read of size 8 at addr ffff88278147a590 by task swapper/2/0 Allocated by task 1502: save_stack+0x33/0xa0 kasan_kmalloc+0xa0/0xd0 kmem_cache_alloc+0xb8/0x1c0 mempool_alloc+0xd6/0x260 qla24xx_async_gnl+0x3c5/0x1100 Freed by task 0: save_stack+0x33/0xa0 kasan_slab_free+0x72/0xc0 kmem_cache_free+0x75/0x200 qla24xx_async_gnl_sp_done+0x556/0x9e0 qla2x00_async_iocb_timeout+0x1c7/0x420 qla2x00_sp_timeout+0x16d/0x250 call_timer_fn+0x36/0x200 The buggy address belongs to the object at ffff88278147a440 which belongs to the cache qla2xxx_srbs of size 344 The buggy address is located 336 bytes inside of 344-byte region [ffff88278147a440, ffff88278147a598) Reported-by: Max Kellermann Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Cc: Max Kellermann Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_init.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index aececf66..2dea112 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -59,8 +59,6 @@ qla2x00_sp_timeout(struct timer_list *t) req->outstanding_cmds[sp->handle] = NULL; iocb = &sp->u.iocb_cmd; iocb->timeout(sp); - if (sp->type != SRB_ELS_DCMD) - sp->free(sp); spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); } @@ -102,7 +100,6 @@ qla2x00_async_iocb_timeout(void *data) srb_t *sp = data; fc_port_t *fcport = sp->fcport; struct srb_iocb *lio = &sp->u.iocb_cmd; - struct event_arg ea; if (fcport) { ql_dbg(ql_dbg_disc, fcport->vha, 0x2071, @@ -117,25 +114,13 @@ qla2x00_async_iocb_timeout(void *data) switch (sp->type) { case SRB_LOGIN_CMD: - if (!fcport) - break; /* Retry as needed. */ lio->u.logio.data[0] = MBS_COMMAND_ERROR; lio->u.logio.data[1] = lio->u.logio.flags & SRB_LOGIN_RETRIED ? QLA_LOGIO_LOGIN_RETRIED : 0; - memset(&ea, 0, sizeof(ea)); - ea.event = FCME_PLOGI_DONE; - ea.fcport = sp->fcport; - ea.data[0] = lio->u.logio.data[0]; - ea.data[1] = lio->u.logio.data[1]; - ea.sp = sp; - qla24xx_handle_plogi_done_event(fcport->vha, &ea); + sp->done(sp, QLA_FUNCTION_TIMEOUT); break; case SRB_LOGOUT_CMD: - if (!fcport) - break; - qlt_logo_completion_handler(fcport, QLA_FUNCTION_TIMEOUT); - break; case SRB_CT_PTHRU_CMD: case SRB_MB_IOCB: case SRB_NACK_PLOGI: @@ -235,12 +220,10 @@ static void qla2x00_async_logout_sp_done(void *ptr, int res) { srb_t *sp = ptr; - struct srb_iocb *lio = &sp->u.iocb_cmd; sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); - if (!test_bit(UNLOADING, &sp->vha->dpc_flags)) - qla2x00_post_async_logout_done_work(sp->vha, sp->fcport, - lio->u.logio.data); + sp->fcport->login_gen++; + qlt_logo_completion_handler(sp->fcport, res); sp->free(sp); } -- cgit v1.1 From f3767225021a48fc419d963559793e585da88b3d Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Thu, 1 Feb 2018 10:33:18 -0800 Subject: scsi: qla2xxx: Fix incorrect handle for abort IOCB This patch fixes incorrect handle used for abort IOCB. Fixes: b027a5ace443 ("scsi: qla2xxx: Fix queue ID for async abort with Multiqueue") Signed-off-by: Darren Trapp Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_iocb.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 1b62e94..8d00d55 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -3275,12 +3275,11 @@ qla24xx_abort_iocb(srb_t *sp, struct abort_entry_24xx *abt_iocb) memset(abt_iocb, 0, sizeof(struct abort_entry_24xx)); abt_iocb->entry_type = ABORT_IOCB_TYPE; abt_iocb->entry_count = 1; - abt_iocb->handle = - cpu_to_le32(MAKE_HANDLE(aio->u.abt.req_que_no, - aio->u.abt.cmd_hndl)); + abt_iocb->handle = cpu_to_le32(MAKE_HANDLE(req->id, sp->handle)); abt_iocb->nport_handle = cpu_to_le16(sp->fcport->loop_id); abt_iocb->handle_to_abort = - cpu_to_le32(MAKE_HANDLE(req->id, aio->u.abt.cmd_hndl)); + cpu_to_le32(MAKE_HANDLE(aio->u.abt.req_que_no, + aio->u.abt.cmd_hndl)); abt_iocb->port_id[0] = sp->fcport->d_id.b.al_pa; abt_iocb->port_id[1] = sp->fcport->d_id.b.area; abt_iocb->port_id[2] = sp->fcport->d_id.b.domain; -- cgit v1.1 From 1683ce57f568c7c92d53e9234624a53554a29cd5 Mon Sep 17 00:00:00 2001 From: Andrew Vasquez Date: Wed, 7 Feb 2018 08:12:35 -0800 Subject: scsi: qedi: Fix truncation of CHAP name and secret The data in NVRAM is not guaranteed to be NUL terminated. Since snprintf expects byte-stream to accommodate null byte, the CHAP secret is truncated. Use sprintf instead of snprintf to fix the truncation of CHAP name and secret. Signed-off-by: Andrew Vasquez Signed-off-by: Nilesh Javali Reviewed-by: Bart Van Assche Acked-by: Chris Leech Acked-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 58596d1..7c05be6 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1830,8 +1830,8 @@ static ssize_t qedi_show_boot_ini_info(void *data, int type, char *buf) switch (type) { case ISCSI_BOOT_INI_INITIATOR_NAME: - rc = snprintf(str, NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, "%s\n", - initiator->initiator_name.byte); + rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, + initiator->initiator_name.byte); break; default: rc = 0; @@ -1898,8 +1898,8 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type, switch (type) { case ISCSI_BOOT_TGT_NAME: - rc = snprintf(str, NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, "%s\n", - block->target[idx].target_name.byte); + rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, + block->target[idx].target_name.byte); break; case ISCSI_BOOT_TGT_IP_ADDR: if (ipv6_en) @@ -1920,20 +1920,20 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type, block->target[idx].lun.value[0]); break; case ISCSI_BOOT_TGT_CHAP_NAME: - rc = snprintf(str, NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, "%s\n", - chap_name); + rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + chap_name); break; case ISCSI_BOOT_TGT_CHAP_SECRET: - rc = snprintf(str, NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, "%s\n", - chap_secret); + rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + chap_secret); break; case ISCSI_BOOT_TGT_REV_CHAP_NAME: - rc = snprintf(str, NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, "%s\n", - mchap_name); + rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + mchap_name); break; case ISCSI_BOOT_TGT_REV_CHAP_SECRET: - rc = snprintf(str, NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, "%s\n", - mchap_secret); + rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + mchap_secret); break; case ISCSI_BOOT_TGT_FLAGS: rc = snprintf(str, 3, "%hhd\n", SYSFS_FLAG_FW_SEL_BOOT); -- cgit v1.1 From 2c08fe64e4f3b8528f6880b2bd7a66cce6fbcec3 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Wed, 7 Feb 2018 08:12:36 -0800 Subject: scsi: qedi: Cleanup local str variable Signed-off-by: Nilesh Javali Reviewed-by: Bart Van Assche Acked-by: Chris Leech Acked-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 7c05be6..8b637d1 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1723,7 +1723,6 @@ static ssize_t qedi_show_boot_eth_info(void *data, int type, char *buf) { struct qedi_ctx *qedi = data; struct nvm_iscsi_initiator *initiator; - char *str = buf; int rc = 1; u32 ipv6_en, dhcp_en, ip_len; struct nvm_iscsi_block *block; @@ -1757,32 +1756,32 @@ static ssize_t qedi_show_boot_eth_info(void *data, int type, char *buf) switch (type) { case ISCSI_BOOT_ETH_IP_ADDR: - rc = snprintf(str, ip_len, fmt, ip); + rc = snprintf(buf, ip_len, fmt, ip); break; case ISCSI_BOOT_ETH_SUBNET_MASK: - rc = snprintf(str, ip_len, fmt, sub); + rc = snprintf(buf, ip_len, fmt, sub); break; case ISCSI_BOOT_ETH_GATEWAY: - rc = snprintf(str, ip_len, fmt, gw); + rc = snprintf(buf, ip_len, fmt, gw); break; case ISCSI_BOOT_ETH_FLAGS: - rc = snprintf(str, 3, "%hhd\n", + rc = snprintf(buf, 3, "%hhd\n", SYSFS_FLAG_FW_SEL_BOOT); break; case ISCSI_BOOT_ETH_INDEX: - rc = snprintf(str, 3, "0\n"); + rc = snprintf(buf, 3, "0\n"); break; case ISCSI_BOOT_ETH_MAC: - rc = sysfs_format_mac(str, qedi->mac, ETH_ALEN); + rc = sysfs_format_mac(buf, qedi->mac, ETH_ALEN); break; case ISCSI_BOOT_ETH_VLAN: - rc = snprintf(str, 12, "%d\n", + rc = snprintf(buf, 12, "%d\n", GET_FIELD2(initiator->generic_cont0, NVM_ISCSI_CFG_INITIATOR_VLAN)); break; case ISCSI_BOOT_ETH_ORIGIN: if (dhcp_en) - rc = snprintf(str, 3, "3\n"); + rc = snprintf(buf, 3, "3\n"); break; default: rc = 0; @@ -1818,7 +1817,6 @@ static ssize_t qedi_show_boot_ini_info(void *data, int type, char *buf) { struct qedi_ctx *qedi = data; struct nvm_iscsi_initiator *initiator; - char *str = buf; int rc; struct nvm_iscsi_block *block; @@ -1830,7 +1828,7 @@ static ssize_t qedi_show_boot_ini_info(void *data, int type, char *buf) switch (type) { case ISCSI_BOOT_INI_INITIATOR_NAME: - rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, initiator->initiator_name.byte); break; default: @@ -1859,7 +1857,6 @@ static ssize_t qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type, char *buf, enum qedi_nvm_tgts idx) { - char *str = buf; int rc = 1; u32 ctrl_flags, ipv6_en, chap_en, mchap_en, ip_len; struct nvm_iscsi_block *block; @@ -1898,48 +1895,48 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type, switch (type) { case ISCSI_BOOT_TGT_NAME: - rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, block->target[idx].target_name.byte); break; case ISCSI_BOOT_TGT_IP_ADDR: if (ipv6_en) - rc = snprintf(str, ip_len, "%pI6\n", + rc = snprintf(buf, ip_len, "%pI6\n", block->target[idx].ipv6_addr.byte); else - rc = snprintf(str, ip_len, "%pI4\n", + rc = snprintf(buf, ip_len, "%pI4\n", block->target[idx].ipv4_addr.byte); break; case ISCSI_BOOT_TGT_PORT: - rc = snprintf(str, 12, "%d\n", + rc = snprintf(buf, 12, "%d\n", GET_FIELD2(block->target[idx].generic_cont0, NVM_ISCSI_CFG_TARGET_TCP_PORT)); break; case ISCSI_BOOT_TGT_LUN: - rc = snprintf(str, 22, "%.*d\n", + rc = snprintf(buf, 22, "%.*d\n", block->target[idx].lun.value[1], block->target[idx].lun.value[0]); break; case ISCSI_BOOT_TGT_CHAP_NAME: - rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, chap_name); break; case ISCSI_BOOT_TGT_CHAP_SECRET: - rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, chap_secret); break; case ISCSI_BOOT_TGT_REV_CHAP_NAME: - rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, mchap_name); break; case ISCSI_BOOT_TGT_REV_CHAP_SECRET: - rc = sprintf(str, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, mchap_secret); break; case ISCSI_BOOT_TGT_FLAGS: - rc = snprintf(str, 3, "%hhd\n", SYSFS_FLAG_FW_SEL_BOOT); + rc = snprintf(buf, 3, "%hhd\n", SYSFS_FLAG_FW_SEL_BOOT); break; case ISCSI_BOOT_TGT_NIC_ASSOC: - rc = snprintf(str, 3, "0\n"); + rc = snprintf(buf, 3, "0\n"); break; default: rc = 0; -- cgit v1.1 From 00c20cdc79259c6c5bf978b21af96c2d3edb646d Mon Sep 17 00:00:00 2001 From: Meelis Roos Date: Fri, 9 Feb 2018 08:57:44 +0200 Subject: scsi: aacraid: fix shutdown crash when init fails When aacraid init fails with "AAC0: adapter self-test failed.", shutdown leads to UBSAN warning and then oops: [154316.118423] ================================================================================ [154316.118508] UBSAN: Undefined behaviour in drivers/scsi/scsi_lib.c:2328:27 [154316.118566] member access within null pointer of type 'struct Scsi_Host' [154316.118631] CPU: 2 PID: 14530 Comm: reboot Tainted: G W 4.15.0-dirty #89 [154316.118701] Hardware name: Hewlett Packard HP NetServer/HP System Board, BIOS 4.06.46 PW 06/25/2003 [154316.118774] Call Trace: [154316.118848] dump_stack+0x48/0x65 [154316.118916] ubsan_epilogue+0xe/0x40 [154316.118976] __ubsan_handle_type_mismatch+0xfb/0x180 [154316.119043] scsi_block_requests+0x20/0x30 [154316.119135] aac_shutdown+0x18/0x40 [aacraid] [154316.119196] pci_device_shutdown+0x33/0x50 [154316.119269] device_shutdown+0x18a/0x390 [...] [154316.123435] BUG: unable to handle kernel NULL pointer dereference at 000000f4 [154316.123515] IP: scsi_block_requests+0xa/0x30 This is because aac_shutdown() does struct Scsi_Host *shost = pci_get_drvdata(dev); scsi_block_requests(shost); and that assumes shost has been assigned with pci_set_drvdata(). However, pci_set_drvdata(pdev, shost) is done in aac_probe_one() far after bailing out with error from calling the init function ((*aac_drivers[index].init)(aac)), and when the init function fails, no error is returned from aac_probe_one() so PCI layer assumes there is driver attached, and tries to shut it down later. Fix it by returning error from aac_probe_one() when card-specific init function fails. This fixes reboot on my HP NetRAID-4M with dead battery. Signed-off-by: Meelis Roos Reviewed-by: Dave Carroll Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/linit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index ad6ec57..b730e8e 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -1690,8 +1690,10 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) * Map in the registers from the adapter. */ aac->base_size = AAC_MIN_FOOTPRINT_SIZE; - if ((*aac_drivers[index].init)(aac)) + if ((*aac_drivers[index].init)(aac)) { + error = -ENODEV; goto out_unmap; + } if (aac->sync_mode) { if (aac_sync_mode) -- cgit v1.1 From 1bc5ad3a6acdcf56f83272f2de1cd2389ea9e9e2 Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Sun, 11 Feb 2018 22:48:41 -0800 Subject: scsi: qla4xxx: skip error recovery in case of register disconnect. A system crashes when continuously removing/re-adding the storage controller. Signed-off-by: Manish Rangankar Reviewed-by: Ewan D. Milne Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen --- drivers/scsi/qla4xxx/ql4_def.h | 2 ++ drivers/scsi/qla4xxx/ql4_os.c | 46 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h index fc23371..817f312 100644 --- a/drivers/scsi/qla4xxx/ql4_def.h +++ b/drivers/scsi/qla4xxx/ql4_def.h @@ -168,6 +168,8 @@ #define DEV_DB_NON_PERSISTENT 0 #define DEV_DB_PERSISTENT 1 +#define QL4_ISP_REG_DISCONNECT 0xffffffffU + #define COPY_ISID(dst_isid, src_isid) { \ int i, j; \ for (i = 0, j = ISID_SIZE - 1; i < ISID_SIZE;) \ diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 82e889b..fc2c97d 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -262,6 +262,24 @@ static struct iscsi_transport qla4xxx_iscsi_transport = { static struct scsi_transport_template *qla4xxx_scsi_transport; +static int qla4xxx_isp_check_reg(struct scsi_qla_host *ha) +{ + u32 reg_val = 0; + int rval = QLA_SUCCESS; + + if (is_qla8022(ha)) + reg_val = readl(&ha->qla4_82xx_reg->host_status); + else if (is_qla8032(ha) || is_qla8042(ha)) + reg_val = qla4_8xxx_rd_direct(ha, QLA8XXX_PEG_ALIVE_COUNTER); + else + reg_val = readw(&ha->reg->ctrl_status); + + if (reg_val == QL4_ISP_REG_DISCONNECT) + rval = QLA_ERROR; + + return rval; +} + static int qla4xxx_send_ping(struct Scsi_Host *shost, uint32_t iface_num, uint32_t iface_type, uint32_t payload_size, uint32_t pid, struct sockaddr *dst_addr) @@ -9186,10 +9204,17 @@ static int qla4xxx_eh_abort(struct scsi_cmnd *cmd) struct srb *srb = NULL; int ret = SUCCESS; int wait = 0; + int rval; ql4_printk(KERN_INFO, ha, "scsi%ld:%d:%llu: Abort command issued cmd=%p, cdb=0x%x\n", ha->host_no, id, lun, cmd, cmd->cmnd[0]); + rval = qla4xxx_isp_check_reg(ha); + if (rval != QLA_SUCCESS) { + ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n"); + return FAILED; + } + spin_lock_irqsave(&ha->hardware_lock, flags); srb = (struct srb *) CMD_SP(cmd); if (!srb) { @@ -9241,6 +9266,7 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd) struct scsi_qla_host *ha = to_qla_host(cmd->device->host); struct ddb_entry *ddb_entry = cmd->device->hostdata; int ret = FAILED, stat; + int rval; if (!ddb_entry) return ret; @@ -9260,6 +9286,12 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd) cmd, jiffies, cmd->request->timeout / HZ, ha->dpc_flags, cmd->result, cmd->allowed)); + rval = qla4xxx_isp_check_reg(ha); + if (rval != QLA_SUCCESS) { + ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n"); + return FAILED; + } + /* FIXME: wait for hba to go online */ stat = qla4xxx_reset_lun(ha, ddb_entry, cmd->device->lun); if (stat != QLA_SUCCESS) { @@ -9303,6 +9335,7 @@ static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd) struct scsi_qla_host *ha = to_qla_host(cmd->device->host); struct ddb_entry *ddb_entry = cmd->device->hostdata; int stat, ret; + int rval; if (!ddb_entry) return FAILED; @@ -9320,6 +9353,12 @@ static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd) ha->host_no, cmd, jiffies, cmd->request->timeout / HZ, ha->dpc_flags, cmd->result, cmd->allowed)); + rval = qla4xxx_isp_check_reg(ha); + if (rval != QLA_SUCCESS) { + ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n"); + return FAILED; + } + stat = qla4xxx_reset_target(ha, ddb_entry); if (stat != QLA_SUCCESS) { starget_printk(KERN_INFO, scsi_target(cmd->device), @@ -9374,9 +9413,16 @@ static int qla4xxx_eh_host_reset(struct scsi_cmnd *cmd) { int return_status = FAILED; struct scsi_qla_host *ha; + int rval; ha = to_qla_host(cmd->device->host); + rval = qla4xxx_isp_check_reg(ha); + if (rval != QLA_SUCCESS) { + ql4_printk(KERN_INFO, ha, "PCI/Register disconnect, exiting.\n"); + return FAILED; + } + if ((is_qla8032(ha) || is_qla8042(ha)) && ql4xdontresethba) qla4_83xx_set_idc_dontreset(ha); -- cgit v1.1 From 9a3efb6b661f71d5675369ace9257833f0e78ef3 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 13 Feb 2018 19:00:21 -0800 Subject: bpf: fix memory leak in lpm_trie map_free callback function There is a memory leak happening in lpm_trie map_free callback function trie_free. The trie structure itself does not get freed. Also, trie_free function did not do synchronize_rcu before freeing various data structures. This is incorrect as some rcu_read_lock region(s) for lookup, update, delete or get_next_key may not complete yet. The fix is to add synchronize_rcu in the beginning of trie_free. The useless spin_lock is removed from this function as well. Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation") Reported-by: Mathieu Malaterre Reported-by: Alexei Starovoitov Tested-by: Mathieu Malaterre Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- kernel/bpf/lpm_trie.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 7b469d1..a75e02c 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -555,7 +555,10 @@ static void trie_free(struct bpf_map *map) struct lpm_trie_node __rcu **slot; struct lpm_trie_node *node; - raw_spin_lock(&trie->lock); + /* Wait for outstanding programs to complete + * update/lookup/delete/get_next_key and free the trie. + */ + synchronize_rcu(); /* Always start at the root and walk down to a node that has no * children. Then free that node, nullify its reference in the parent @@ -569,7 +572,7 @@ static void trie_free(struct bpf_map *map) node = rcu_dereference_protected(*slot, lockdep_is_held(&trie->lock)); if (!node) - goto unlock; + goto out; if (rcu_access_pointer(node->child[0])) { slot = &node->child[0]; @@ -587,8 +590,8 @@ static void trie_free(struct bpf_map *map) } } -unlock: - raw_spin_unlock(&trie->lock); +out: + kfree(trie); } static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) -- cgit v1.1 From 952fad8e323975c4e826b659087d2648777594a6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2018 15:33:52 -0800 Subject: bpf: fix sock_map_alloc() error path In case user program provides silly parameters, we want a map_alloc() handler to return an error, not a NULL pointer, otherwise we crash later in find_and_alloc_map() Fixes: 1aa12bdf1bfb ("bpf: sockmap, add sock close() hook to remove socks") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- kernel/bpf/sockmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 48c3341..a927e89 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -521,8 +521,8 @@ static struct smap_psock *smap_init_psock(struct sock *sock, static struct bpf_map *sock_map_alloc(union bpf_attr *attr) { struct bpf_stab *stab; - int err = -EINVAL; u64 cost; + int err; if (!capable(CAP_NET_ADMIN)) return ERR_PTR(-EPERM); @@ -547,6 +547,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) /* make sure page count doesn't overflow */ cost = (u64) stab->map.max_entries * sizeof(struct sock *); + err = -EINVAL; if (cost >= U32_MAX - PAGE_SIZE) goto free_stab; -- cgit v1.1 From a9810327726b01404ecde082c075a7468c433ddf Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 29 Jan 2018 12:22:45 +0100 Subject: KVM: s390: optimize wakeup for exitless interrupts For interrupt injection of floating interrupts we queue the interrupt either in the GISA or in the floating interrupt list. The first CPU that looks at these data structures - either in KVM code or hardware will then deliver that interrupt. To minimize latency we also: -a: choose a VCPU to deliver that interrupt. We prefer idle CPUs -b: we wake up the host thread that runs the VCPU -c: set an I/O intervention bit for that CPU so that it exits guest context as soon as the PSW I/O mask is enabled This will make sure that this CPU will execute the interrupt delivery code of KVM very soon. We can now optimize the injection case if we have exitless interrupts. The wakeup is still necessary in case the target CPU sleeps. We can avoid the I/O intervention request bit though. Whenever this intervention request would be handled, the hardware could also directly inject the interrupt on that CPU, no need to go through the interrupt injection loop of KVM. Cc: Michael Mueller Reviewed-by: Halil Pasic Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index aabf46f..337a69b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1701,7 +1701,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type) kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT); break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); + if (!(type & KVM_S390_INT_IO_AI_MASK && kvm->arch.gisa)) + kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); break; default: kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT); -- cgit v1.1 From 8846f3175c6bf16382b06a4b9755e5296c0f921c Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 12 Feb 2018 12:33:39 +0000 Subject: KVM: s390: do not set intervention requests for GISA interrupts If GISA is available, we do not have to kick CPUs out of SIE to deliver interrupts. The hardware can deliver such interrupts while running. Cc: Michael Mueller Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 337a69b..e399495 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -236,10 +236,15 @@ static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gis return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); } -static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) +static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu) { return vcpu->kvm->arch.float_int.pending_irqs | - vcpu->arch.local_int.pending_irqs | + vcpu->arch.local_int.pending_irqs; +} + +static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) +{ + return pending_irqs_no_gisa(vcpu) | kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7; } @@ -337,7 +342,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) { - if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK)) + if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK)) return; else if (psw_ioint_disabled(vcpu)) kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT); -- cgit v1.1 From f315104ad8b0c32be13eac628569ae707c332cb5 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 13 Feb 2018 13:55:49 +0000 Subject: KVM: s390: force bp isolation for VSIE If the guest runs with bp isolation when doing a SIE instruction, we must also run the nested guest with bp isolation when emulating that SIE instruction. This is done by activating BPBC in the lpar, which acts as an override for lower level guests. Signed-off-by: Christian Borntraeger Reviewed-by: Janosch Frank Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/vsie.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index ec77270..8961e39 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -821,6 +821,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; + int guest_bp_isolation; int rc; handle_last_fault(vcpu, vsie_page); @@ -831,6 +832,20 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) s390_handle_mcck(); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + + /* save current guest state of bp isolation override */ + guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST); + + /* + * The guest is running with BPBC, so we have to force it on for our + * nested guest. This is done by enabling BPBC globally, so the BPBC + * control in the SCB (which the nested guest can modify) is simply + * ignored. + */ + if (test_kvm_facility(vcpu->kvm, 82) && + vcpu->arch.sie_block->fpf & FPF_BPBC) + set_thread_flag(TIF_ISOLATE_BP_GUEST); + local_irq_disable(); guest_enter_irqoff(); local_irq_enable(); @@ -840,6 +855,11 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) local_irq_disable(); guest_exit_irqoff(); local_irq_enable(); + + /* restore guest state for bp isolation override */ + if (!guest_bp_isolation) + clear_thread_flag(TIF_ISOLATE_BP_GUEST); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); if (rc == -EINTR) { -- cgit v1.1 From 6db4263fec9e550e0cdaed732f4af77a44c10f5f Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 8 Apr 2016 17:52:39 +0200 Subject: KVM: s390: use switch vs jump table in priv.c Instead of having huge jump tables for function selection, let's use normal switch/case statements for the instruction handlers in priv.c This allows the compiler to make the right decision depending on the situation (e.g. avoid jump-tables for thunks). Signed-off-by: Christian Borntraeger Reviewed-by: Cornelia Huck Reviewed-by: Janosch Frank Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/priv.c | 183 +++++++++++++++++++++++++-------------------------- 1 file changed, 91 insertions(+), 92 deletions(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index c4c4e15..a74578c 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -795,55 +795,60 @@ out: return rc; } -static const intercept_handler_t b2_handlers[256] = { - [0x02] = handle_stidp, - [0x04] = handle_set_clock, - [0x10] = handle_set_prefix, - [0x11] = handle_store_prefix, - [0x12] = handle_store_cpu_address, - [0x14] = kvm_s390_handle_vsie, - [0x21] = handle_ipte_interlock, - [0x29] = handle_iske, - [0x2a] = handle_rrbe, - [0x2b] = handle_sske, - [0x2c] = handle_test_block, - [0x30] = handle_io_inst, - [0x31] = handle_io_inst, - [0x32] = handle_io_inst, - [0x33] = handle_io_inst, - [0x34] = handle_io_inst, - [0x35] = handle_io_inst, - [0x36] = handle_io_inst, - [0x37] = handle_io_inst, - [0x38] = handle_io_inst, - [0x39] = handle_io_inst, - [0x3a] = handle_io_inst, - [0x3b] = handle_io_inst, - [0x3c] = handle_io_inst, - [0x50] = handle_ipte_interlock, - [0x56] = handle_sthyi, - [0x5f] = handle_io_inst, - [0x74] = handle_io_inst, - [0x76] = handle_io_inst, - [0x7d] = handle_stsi, - [0xb1] = handle_stfl, - [0xb2] = handle_lpswe, -}; - int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) { - intercept_handler_t handler; - - /* - * A lot of B2 instructions are priviledged. Here we check for - * the privileged ones, that we can handle in the kernel. - * Anything else goes to userspace. - */ - handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; - if (handler) - return handler(vcpu); - - return -EOPNOTSUPP; + switch (vcpu->arch.sie_block->ipa & 0x00ff) { + case 0x02: + return handle_stidp(vcpu); + case 0x04: + return handle_set_clock(vcpu); + case 0x10: + return handle_set_prefix(vcpu); + case 0x11: + return handle_store_prefix(vcpu); + case 0x12: + return handle_store_cpu_address(vcpu); + case 0x14: + return kvm_s390_handle_vsie(vcpu); + case 0x21: + case 0x50: + return handle_ipte_interlock(vcpu); + case 0x29: + return handle_iske(vcpu); + case 0x2a: + return handle_rrbe(vcpu); + case 0x2b: + return handle_sske(vcpu); + case 0x2c: + return handle_test_block(vcpu); + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3a: + case 0x3b: + case 0x3c: + case 0x5f: + case 0x74: + case 0x76: + return handle_io_inst(vcpu); + case 0x56: + return handle_sthyi(vcpu); + case 0x7d: + return handle_stsi(vcpu); + case 0xb1: + return handle_stfl(vcpu); + case 0xb2: + return handle_lpswe(vcpu); + default: + return -EOPNOTSUPP; + } } static int handle_epsw(struct kvm_vcpu *vcpu) @@ -1105,25 +1110,22 @@ static int handle_essa(struct kvm_vcpu *vcpu) return 0; } -static const intercept_handler_t b9_handlers[256] = { - [0x8a] = handle_ipte_interlock, - [0x8d] = handle_epsw, - [0x8e] = handle_ipte_interlock, - [0x8f] = handle_ipte_interlock, - [0xab] = handle_essa, - [0xaf] = handle_pfmf, -}; - int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) { - intercept_handler_t handler; - - /* This is handled just as for the B2 instructions. */ - handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; - if (handler) - return handler(vcpu); - - return -EOPNOTSUPP; + switch (vcpu->arch.sie_block->ipa & 0x00ff) { + case 0x8a: + case 0x8e: + case 0x8f: + return handle_ipte_interlock(vcpu); + case 0x8d: + return handle_epsw(vcpu); + case 0xab: + return handle_essa(vcpu); + case 0xaf: + return handle_pfmf(vcpu); + default: + return -EOPNOTSUPP; + } } int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) @@ -1271,22 +1273,20 @@ static int handle_stctg(struct kvm_vcpu *vcpu) return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0; } -static const intercept_handler_t eb_handlers[256] = { - [0x2f] = handle_lctlg, - [0x25] = handle_stctg, - [0x60] = handle_ri, - [0x61] = handle_ri, - [0x62] = handle_ri, -}; - int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) { - intercept_handler_t handler; - - handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; - if (handler) - return handler(vcpu); - return -EOPNOTSUPP; + switch (vcpu->arch.sie_block->ipb & 0x000000ff) { + case 0x25: + return handle_stctg(vcpu); + case 0x2f: + return handle_lctlg(vcpu); + case 0x60: + case 0x61: + case 0x62: + return handle_ri(vcpu); + default: + return -EOPNOTSUPP; + } } static int handle_tprot(struct kvm_vcpu *vcpu) @@ -1346,10 +1346,12 @@ out_unlock: int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) { - /* For e5xx... instructions we only handle TPROT */ - if ((vcpu->arch.sie_block->ipa & 0x00ff) == 0x01) + switch (vcpu->arch.sie_block->ipa & 0x00ff) { + case 0x01: return handle_tprot(vcpu); - return -EOPNOTSUPP; + default: + return -EOPNOTSUPP; + } } static int handle_sckpf(struct kvm_vcpu *vcpu) @@ -1380,17 +1382,14 @@ static int handle_ptff(struct kvm_vcpu *vcpu) return 0; } -static const intercept_handler_t x01_handlers[256] = { - [0x04] = handle_ptff, - [0x07] = handle_sckpf, -}; - int kvm_s390_handle_01(struct kvm_vcpu *vcpu) { - intercept_handler_t handler; - - handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; - if (handler) - return handler(vcpu); - return -EOPNOTSUPP; + switch (vcpu->arch.sie_block->ipa & 0x00ff) { + case 0x04: + return handle_ptff(vcpu); + case 0x07: + return handle_sckpf(vcpu); + default: + return -EOPNOTSUPP; + } } -- cgit v1.1 From cb7485da3ed1ac4ef6c71d4b2b715f8b87f118c8 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 6 Feb 2018 10:19:28 +0000 Subject: KVM: s390: use switch vs jump table in intercept.c Instead of having huge jump tables for function selection, let's use normal switch/case statements for the instruction handlers in intercept.c We can now also get rid of intercept_handler_t. This allows the compiler to make the right decision depending on the situation (e.g. avoid jump-tables for thunks). Signed-off-by: Christian Borntraeger Reviewed-by: Janosch Frank Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/intercept.c | 51 +++++++++++++++++++++++++++-------------------- arch/s390/kvm/kvm-s390.h | 2 -- 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 9c7d707..07c6e81 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -22,22 +22,6 @@ #include "trace.h" #include "trace-s390.h" - -static const intercept_handler_t instruction_handlers[256] = { - [0x01] = kvm_s390_handle_01, - [0x82] = kvm_s390_handle_lpsw, - [0x83] = kvm_s390_handle_diag, - [0xaa] = kvm_s390_handle_aa, - [0xae] = kvm_s390_handle_sigp, - [0xb2] = kvm_s390_handle_b2, - [0xb6] = kvm_s390_handle_stctl, - [0xb7] = kvm_s390_handle_lctl, - [0xb9] = kvm_s390_handle_b9, - [0xe3] = kvm_s390_handle_e3, - [0xe5] = kvm_s390_handle_e5, - [0xeb] = kvm_s390_handle_eb, -}; - u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu) { struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; @@ -129,16 +113,39 @@ static int handle_validity(struct kvm_vcpu *vcpu) static int handle_instruction(struct kvm_vcpu *vcpu) { - intercept_handler_t handler; - vcpu->stat.exit_instruction++; trace_kvm_s390_intercept_instruction(vcpu, vcpu->arch.sie_block->ipa, vcpu->arch.sie_block->ipb); - handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8]; - if (handler) - return handler(vcpu); - return -EOPNOTSUPP; + + switch (vcpu->arch.sie_block->ipa >> 8) { + case 0x01: + return kvm_s390_handle_01(vcpu); + case 0x82: + return kvm_s390_handle_lpsw(vcpu); + case 0x83: + return kvm_s390_handle_diag(vcpu); + case 0xaa: + return kvm_s390_handle_aa(vcpu); + case 0xae: + return kvm_s390_handle_sigp(vcpu); + case 0xb2: + return kvm_s390_handle_b2(vcpu); + case 0xb6: + return kvm_s390_handle_stctl(vcpu); + case 0xb7: + return kvm_s390_handle_lctl(vcpu); + case 0xb9: + return kvm_s390_handle_b9(vcpu); + case 0xe3: + return kvm_s390_handle_e3(vcpu); + case 0xe5: + return kvm_s390_handle_e5(vcpu); + case 0xeb: + return kvm_s390_handle_eb(vcpu); + default: + return -EOPNOTSUPP; + } } static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index bd31b37..3c0a975 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -19,8 +19,6 @@ #include #include -typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); - /* Transactional Memory Execution related macros */ #define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE)) #define TDB_FORMAT1 1 -- cgit v1.1 From baabee67f4135e3de87bc874929ac50637aacb0d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 6 Feb 2018 15:17:43 +0100 Subject: KVM: s390: use switch vs jump table in interrupt.c Just like for the interception handlers, let's also use a switch-case in our interrupt delivery code. Signed-off-by: David Hildenbrand Message-Id: <20180206141743.24497-1-david@redhat.com> Reviewed-by: Cornelia Huck Reviewed-by: Janosch Frank Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 84 ++++++++++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index e399495..3f2c49b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -187,12 +187,6 @@ static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu) return kvm_s390_get_cpu_timer(vcpu) >> 63; } -static inline int is_ioirq(unsigned long irq_type) -{ - return ((irq_type >= IRQ_PEND_IO_ISC_7) && - (irq_type <= IRQ_PEND_IO_ISC_0)); -} - static uint64_t isc_to_isc_bits(int isc) { return (0x80 >> isc) << 24; @@ -1016,24 +1010,6 @@ out: return rc; } -typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu); - -static const deliver_irq_t deliver_irq_funcs[] = { - [IRQ_PEND_MCHK_EX] = __deliver_machine_check, - [IRQ_PEND_MCHK_REP] = __deliver_machine_check, - [IRQ_PEND_PROG] = __deliver_prog, - [IRQ_PEND_EXT_EMERGENCY] = __deliver_emergency_signal, - [IRQ_PEND_EXT_EXTERNAL] = __deliver_external_call, - [IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc, - [IRQ_PEND_EXT_CPU_TIMER] = __deliver_cpu_timer, - [IRQ_PEND_RESTART] = __deliver_restart, - [IRQ_PEND_SET_PREFIX] = __deliver_set_prefix, - [IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init, - [IRQ_PEND_EXT_SERVICE] = __deliver_service, - [IRQ_PEND_PFAULT_DONE] = __deliver_pfault_done, - [IRQ_PEND_VIRTIO] = __deliver_virtio, -}; - /* Check whether an external call is pending (deliverable or not) */ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) { @@ -1197,7 +1173,6 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - deliver_irq_t func; int rc = 0; unsigned long irq_type; unsigned long irqs; @@ -1217,16 +1192,57 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) while ((irqs = deliverable_irqs(vcpu)) && !rc) { /* bits are in the reverse order of interrupt priority */ irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT); - if (is_ioirq(irq_type)) { + switch (irq_type) { + case IRQ_PEND_IO_ISC_0: + case IRQ_PEND_IO_ISC_1: + case IRQ_PEND_IO_ISC_2: + case IRQ_PEND_IO_ISC_3: + case IRQ_PEND_IO_ISC_4: + case IRQ_PEND_IO_ISC_5: + case IRQ_PEND_IO_ISC_6: + case IRQ_PEND_IO_ISC_7: rc = __deliver_io(vcpu, irq_type); - } else { - func = deliver_irq_funcs[irq_type]; - if (!func) { - WARN_ON_ONCE(func == NULL); - clear_bit(irq_type, &li->pending_irqs); - continue; - } - rc = func(vcpu); + break; + case IRQ_PEND_MCHK_EX: + case IRQ_PEND_MCHK_REP: + rc = __deliver_machine_check(vcpu); + break; + case IRQ_PEND_PROG: + rc = __deliver_prog(vcpu); + break; + case IRQ_PEND_EXT_EMERGENCY: + rc = __deliver_emergency_signal(vcpu); + break; + case IRQ_PEND_EXT_EXTERNAL: + rc = __deliver_external_call(vcpu); + break; + case IRQ_PEND_EXT_CLOCK_COMP: + rc = __deliver_ckc(vcpu); + break; + case IRQ_PEND_EXT_CPU_TIMER: + rc = __deliver_cpu_timer(vcpu); + break; + case IRQ_PEND_RESTART: + rc = __deliver_restart(vcpu); + break; + case IRQ_PEND_SET_PREFIX: + rc = __deliver_set_prefix(vcpu); + break; + case IRQ_PEND_PFAULT_INIT: + rc = __deliver_pfault_init(vcpu); + break; + case IRQ_PEND_EXT_SERVICE: + rc = __deliver_service(vcpu); + break; + case IRQ_PEND_PFAULT_DONE: + rc = __deliver_pfault_done(vcpu); + break; + case IRQ_PEND_VIRTIO: + rc = __deliver_virtio(vcpu); + break; + default: + WARN_ONCE(1, "Unknown pending irq type %ld", irq_type); + clear_bit(irq_type, &li->pending_irqs); } } -- cgit v1.1 From 7fc17e909edfb9bf421ee04e981d3d474175c7c7 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 14 Feb 2018 22:17:34 +0800 Subject: bpf: cpumap: use GFP_KERNEL instead of GFP_ATOMIC in __cpu_map_entry_alloc() There're several implications after commit 0bf7800f1799 ("ptr_ring: try vmalloc() when kmalloc() fails") with the using of vmalloc() since can't allow GFP_ATOMIC but mandate GFP_KERNEL. This will lead a WARN since cpumap try to call with GFP_ATOMIC. Fortunately, entry allocation of cpumap can only be done through syscall path which means GFP_ATOMIC is not necessary, so fixing this by replacing GFP_ATOMIC with GFP_KERNEL. Reported-by: syzbot+1a240cdb1f4cc88819df@syzkaller.appspotmail.com Fixes: 0bf7800f1799 ("ptr_ring: try vmalloc() when kmalloc() fails") Cc: Michal Hocko Cc: Daniel Borkmann Cc: Matthew Wilcox Cc: Jesper Dangaard Brouer Cc: akpm@linux-foundation.org Cc: dhowells@redhat.com Cc: hannes@cmpxchg.org Signed-off-by: Jason Wang Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- kernel/bpf/cpumap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index fbfdada6..a4bb0b3 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -334,7 +334,7 @@ static int cpu_map_kthread_run(void *data) static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, int map_id) { - gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN; + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; struct bpf_cpu_map_entry *rcpu; int numa, err; -- cgit v1.1 From c5489f9fc053c744c609f34b32efca395cc2fdad Mon Sep 17 00:00:00 2001 From: Michal Oleszczyk Date: Fri, 2 Feb 2018 13:10:29 +0100 Subject: sgtl5000: change digital_mute policy Current implementation mute codec in global way (DAC block). That means when user routes sound not from I2S but from AUX source (LINE_IN) it also will be muted by alsa core. This should not happen. Signed-off-by: Michal Oleszczyk Signed-off-by: Mark Brown --- sound/soc/codecs/sgtl5000.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index e1ab553..c445a07 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -529,10 +529,15 @@ static const struct snd_kcontrol_new sgtl5000_snd_controls[] = { static int sgtl5000_digital_mute(struct snd_soc_dai *codec_dai, int mute) { struct snd_soc_codec *codec = codec_dai->codec; - u16 adcdac_ctrl = SGTL5000_DAC_MUTE_LEFT | SGTL5000_DAC_MUTE_RIGHT; + u16 i2s_pwr = SGTL5000_I2S_IN_POWERUP; - snd_soc_update_bits(codec, SGTL5000_CHIP_ADCDAC_CTRL, - adcdac_ctrl, mute ? adcdac_ctrl : 0); + /* + * During 'digital mute' do not mute DAC + * because LINE_IN would be muted aswell. We want to mute + * only I2S block - this can be done by powering it off + */ + snd_soc_update_bits(codec, SGTL5000_CHIP_DIG_POWER, + i2s_pwr, mute ? 0 : i2s_pwr); return 0; } @@ -1237,6 +1242,10 @@ static int sgtl5000_probe(struct snd_soc_codec *codec) */ snd_soc_write(codec, SGTL5000_DAP_CTRL, 0); + /* Unmute DAC after start */ + snd_soc_update_bits(codec, SGTL5000_CHIP_ADCDAC_CTRL, + SGTL5000_DAC_MUTE_LEFT | SGTL5000_DAC_MUTE_RIGHT, 0); + return 0; err: -- cgit v1.1 From db35340c536f1af0108ec9a0b2126a05d358d14a Mon Sep 17 00:00:00 2001 From: Qi Hou Date: Thu, 11 Jan 2018 12:54:43 +0800 Subject: ARM: OMAP2+: timer: fix a kmemleak caused in omap_get_timer_dt When more than one GP timers are used as kernel system timers and the corresponding nodes in device-tree are marked with the same "disabled" property, then the "attr" field of the property will be initialized more than once as the property being added to sys file system via __of_add_property_sysfs(). In __of_add_property_sysfs(), the "name" field of pp->attr.attr is set directly to the return value of safe_name(), without taking care of whether it's already a valid pointer to a memory block. If it is, its old value will always be overwritten by the new one and the memory block allocated before will a "ghost", then a kmemleak happened. That the same "disabled" property being added to different nodes of device tree would cause that kind of kmemleak overhead, at least once. To fix it, allocate the property dynamically, and delete static one. Signed-off-by: Qi Hou Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/timer.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index ece09c9..d61fbd7 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -156,12 +156,6 @@ static struct clock_event_device clockevent_gpt = { .tick_resume = omap2_gp_timer_shutdown, }; -static struct property device_disabled = { - .name = "status", - .length = sizeof("disabled"), - .value = "disabled", -}; - static const struct of_device_id omap_timer_match[] __initconst = { { .compatible = "ti,omap2420-timer", }, { .compatible = "ti,omap3430-timer", }, @@ -203,8 +197,17 @@ static struct device_node * __init omap_get_timer_dt(const struct of_device_id * of_get_property(np, "ti,timer-secure", NULL))) continue; - if (!of_device_is_compatible(np, "ti,omap-counter32k")) - of_add_property(np, &device_disabled); + if (!of_device_is_compatible(np, "ti,omap-counter32k")) { + struct property *prop; + + prop = kzalloc(sizeof(*prop), GFP_KERNEL); + if (!prop) + return NULL; + prop->name = "status"; + prop->value = "disabled"; + prop->length = strlen(prop->value); + of_add_property(np, prop); + } return np; } -- cgit v1.1 From dbe7d4c6d11999bda20bcea2572263150ff231ef Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Mon, 5 Feb 2018 18:05:00 +0100 Subject: ASoC: samsung: Add the DT binding files entry to MAINTAINERS This patch adds missing DT binding files to the Samsung ASoC drivers entry. Signed-off-by: Sylwester Nawrocki Acked-by: Krzysztof Kozlowski Signed-off-by: Mark Brown --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3bdc260..2161c1d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12091,6 +12091,7 @@ M: Sylwester Nawrocki L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported F: sound/soc/samsung/ +F: Documentation/devicetree/bindings/sound/samsung* SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER M: Krzysztof Kozlowski -- cgit v1.1 From d3be6d2a08bd26580562d9714d3d97ea9ba22c73 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 9 Feb 2018 08:15:53 -0800 Subject: ARM: OMAP3: Fix prm wake interrupt for resume For platform_suspend_ops, the finish call is too late to re-enable wake irqs and we need re-enable wake irqs on wake call instead. Otherwise noirq resume for devices has already happened. And then dev_pm_disarm_wake_irq() has already disabled the dedicated wake irqs when the interrupt triggers and the wake irq is never handled. For devices that are already in PM runtime suspended state when we enter suspend this means that a possible wake irq will never trigger. And this can lead into a situation where a device has a pending padconf wake irq, and the device will stay unresponsive to any further wake irqs. This issue can be easily reproduced by setting serial console log level to zero, letting the serial console idle, and suspend the system from an ssh terminal. Then try to wake up the system by typing to the serial console. Note that this affects only omap3 PRM interrupt as that's currently the only omap variant that does anything in omap_pm_wake(). In general, for the wake irqs to work, the interrupt must have either IRQF_NO_SUSPEND or IRQF_EARLY_RESUME set for it to trigger before dev_pm_disarm_wake_irq() disables the wake irqs. Reported-by: Grygorii Strashko Cc: Tero Kristo Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c index 366158a..6f68576 100644 --- a/arch/arm/mach-omap2/pm.c +++ b/arch/arm/mach-omap2/pm.c @@ -186,7 +186,7 @@ static void omap_pm_end(void) cpu_idle_poll_ctrl(false); } -static void omap_pm_finish(void) +static void omap_pm_wake(void) { if (soc_is_omap34xx()) omap_prcm_irq_complete(); @@ -196,7 +196,7 @@ static const struct platform_suspend_ops omap_pm_ops = { .begin = omap_pm_begin, .end = omap_pm_end, .enter = omap_pm_enter, - .finish = omap_pm_finish, + .wake = omap_pm_wake, .valid = suspend_valid_only_mem, }; -- cgit v1.1 From fe27f16794f313f5fc16f6d2f42d8c2b2f4d70cc Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 9 Feb 2018 09:35:56 -0800 Subject: ARM: OMAP2+: Fix sar_base inititalization for HS omaps HS omaps use irq_save_secure_context() instead of irq_save_context() so sar_base will never get initialized and irq_sar_clear() gets called with a wrong address for HS omaps from irq_restore_context(). Starting with commit f4b9f40ae95b ("ARM: OMAP4+: Initialize SAR RAM base early for proper CPU1 reset for kexec") we have it available, and this ideally would been fixed with that commit already. Fixes: f4b9f40ae95b ("ARM: OMAP4+: Initialize SAR RAM base early for proper CPU1 reset for kexec") Cc: Andrew F. Davis Cc: Dave Gerlach Cc: Keerthy Cc: Santosh Shilimkar Cc: Tero Kristo Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap-wakeupgen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c index 4bb6751..fc5fb77 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.c +++ b/arch/arm/mach-omap2/omap-wakeupgen.c @@ -299,8 +299,6 @@ static void irq_save_context(void) if (soc_is_dra7xx()) return; - if (!sar_base) - sar_base = omap4_get_sar_ram_base(); if (wakeupgen_ops && wakeupgen_ops->save_context) wakeupgen_ops->save_context(); } @@ -598,6 +596,8 @@ static int __init wakeupgen_init(struct device_node *node, irq_hotplug_init(); irq_pm_init(); + sar_base = omap4_get_sar_ram_base(); + return 0; } IRQCHIP_DECLARE(ti_wakeupgen, "ti,omap4-wugen-mpu", wakeupgen_init); -- cgit v1.1 From 8cbbf1745dcde7ba7e423dc70619d223de90fd43 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 2 Jan 2018 16:25:35 +0100 Subject: ARM: OMAP1: clock: Fix debugfs_create_*() usage When exposing data access through debugfs, the correct debugfs_create_*() functions must be used, depending on data type. Remove all casts from data pointers passed to debugfs_create_*() functions, as such casts prevent the compiler from flagging bugs. Correct all wrong usage: - clk.rate is unsigned long, not u32, - clk.flags is u8, not u32, which exposed the successive clk.rate_offset and clk.src_offset fields. Signed-off-by: Geert Uytterhoeven Acked-by: Aaro Koskinen Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/clock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c index 43e3e18..fa51241 100644 --- a/arch/arm/mach-omap1/clock.c +++ b/arch/arm/mach-omap1/clock.c @@ -1011,17 +1011,17 @@ static int clk_debugfs_register_one(struct clk *c) return -ENOMEM; c->dent = d; - d = debugfs_create_u8("usecount", S_IRUGO, c->dent, (u8 *)&c->usecount); + d = debugfs_create_u8("usecount", S_IRUGO, c->dent, &c->usecount); if (!d) { err = -ENOMEM; goto err_out; } - d = debugfs_create_u32("rate", S_IRUGO, c->dent, (u32 *)&c->rate); + d = debugfs_create_ulong("rate", S_IRUGO, c->dent, &c->rate); if (!d) { err = -ENOMEM; goto err_out; } - d = debugfs_create_x32("flags", S_IRUGO, c->dent, (u32 *)&c->flags); + d = debugfs_create_x8("flags", S_IRUGO, c->dent, &c->flags); if (!d) { err = -ENOMEM; goto err_out; -- cgit v1.1 From 64116257144bfbe744dd5b41bb0d160ccf65e339 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 13 Feb 2018 15:15:33 +0100 Subject: ARM: dts: OMAP5: uevm: Fix "debounce-interval" property misspelling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "debounce_interval" was never supported. Signed-off-by: Geert Uytterhoeven Cc: Benoît Cousson Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-uevm.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts index ec2c8ba..592e17f 100644 --- a/arch/arm/boot/dts/omap5-uevm.dts +++ b/arch/arm/boot/dts/omap5-uevm.dts @@ -47,7 +47,7 @@ gpios = <&gpio3 19 GPIO_ACTIVE_LOW>; /* gpio3_83 */ wakeup-source; autorepeat; - debounce_interval = <50>; + debounce-interval = <50>; }; }; -- cgit v1.1 From 74402055a2d3ec998a1ded599e86185a27d9bbf4 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Thu, 25 Jan 2018 14:10:37 -0600 Subject: ARM: dts: LogicPD Torpedo: Fix I2C1 pinmux The pinmuxing was missing for I2C1 which was causing intermittent issues with the PMIC which is connected to I2C1. The bootloader did not quite configure the I2C1 either, so when running at 2.6MHz, it was generating errors at time. This correctly sets the I2C1 pinmuxing so it can operate at 2.6MHz Fixes: 687c27676151 ("ARM: dts: Add minimal support for LogicPD Torpedo DM3730 devkit") Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-torpedo-som.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi index b50b796..4791544 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi @@ -66,6 +66,8 @@ }; &i2c1 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c1_pins>; clock-frequency = <2600000>; twl: twl@48 { @@ -136,6 +138,12 @@ OMAP3_CORE1_IOPAD(0x21b8, PIN_INPUT | MUX_MODE0) /* hsusb0_data7.hsusb0_data7 */ >; }; + i2c1_pins: pinmux_i2c1_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0) /* i2c1_scl.i2c1_scl */ + OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0) /* i2c1_sda.i2c1_sda */ + >; + }; }; &uart2 { -- cgit v1.1 From 84c7efd607e7fb6933920322086db64654f669b2 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sat, 27 Jan 2018 15:27:05 -0600 Subject: ARM: dts: LogicPD SOM-LV: Fix I2C1 pinmux The pinmuxing was missing for I2C1 which was causing intermittent issues with the PMIC which is connected to I2C1. The bootloader did not quite configure the I2C1 either, so when running at 2.6MHz, it was generating errors at times. This correctly sets the I2C1 pinmuxing so it can operate at 2.6MHz Fixes: ab8dd3aed011 ("ARM: DTS: Add minimal Support for Logic PD DM3730 SOM-LV") Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-som-lv.dtsi | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index c1aa7a4..a30ee9f 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -71,6 +71,8 @@ }; &i2c1 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c1_pins>; clock-frequency = <2600000>; twl: twl@48 { @@ -189,7 +191,12 @@ >; }; - + i2c1_pins: pinmux_i2c1_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0) /* i2c1_scl.i2c1_scl */ + OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0) /* i2c1_sda.i2c1_sda */ + >; + }; }; &omap3_pmx_wkup { -- cgit v1.1 From 9c481b908b011398b1491752271cd1e2c9ad5758 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 14 Feb 2018 15:31:00 +0100 Subject: bpf: fix bpf_prog_array_copy_to_user warning from perf event prog query syzkaller tried to perform a prog query in perf_event_query_prog_array() where struct perf_event_query_bpf had an ids_len of 1,073,741,353 and thus causing a warning due to failed kcalloc() allocation out of the bpf_prog_array_copy_to_user() helper. Given we cannot attach more than 64 programs to a perf event, there's no point in allowing huge ids_len. Therefore, allow a buffer that would fix the maximum number of ids and also add a __GFP_NOWARN to the temporary ids buffer. Fixes: f371b304f12e ("bpf/tracing: allow user space to query prog array on the same tp") Fixes: 0911287ce32b ("bpf: fix bpf_prog_array_copy_to_user() issues") Reported-by: syzbot+cab5816b0edbabf598b3@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 2 +- kernel/trace/bpf_trace.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 29ca920..d315b39 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1590,7 +1590,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, * so always copy 'cnt' prog_ids to the user. * In a rare race the user will see zero prog_ids */ - ids = kcalloc(cnt, sizeof(u32), GFP_USER); + ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN); if (!ids) return -ENOMEM; rcu_read_lock(); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index fc2838a..c0a9e31 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -872,6 +872,8 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) return -EINVAL; if (copy_from_user(&query, uquery, sizeof(query))) return -EFAULT; + if (query.ids_len > BPF_TRACE_MAX_PROGS) + return -E2BIG; mutex_lock(&bpf_event_mutex); ret = bpf_prog_array_copy_info(event->tp_event->prog_array, -- cgit v1.1 From ac5b70198adc25c73fba28de4f78adcee8f6be0b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 12 Feb 2018 21:35:31 -0800 Subject: net: fix race on decreasing number of TX queues netif_set_real_num_tx_queues() can be called when netdev is up. That usually happens when user requests change of number of channels/rings with ethtool -L. The procedure for changing the number of queues involves resetting the qdiscs and setting dev->num_tx_queues to the new value. When the new value is lower than the old one, extra care has to be taken to ensure ordering of accesses to the number of queues vs qdisc reset. Currently the queues are reset before new dev->num_tx_queues is assigned, leaving a window of time where packets can be enqueued onto the queues going down, leading to a likely crash in the drivers, since most drivers don't check if TX skbs are assigned to an active queue. Fixes: e6484930d7c7 ("net: allocate tx queues in register_netdevice") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/core/dev.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index dda9d7b..d4362be 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2382,8 +2382,11 @@ EXPORT_SYMBOL(netdev_set_num_tc); */ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) { + bool disabling; int rc; + disabling = txq < dev->real_num_tx_queues; + if (txq < 1 || txq > dev->num_tx_queues) return -EINVAL; @@ -2399,15 +2402,19 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->num_tc) netif_setup_tc(dev, txq); - if (txq < dev->real_num_tx_queues) { + dev->real_num_tx_queues = txq; + + if (disabling) { + synchronize_net(); qdisc_reset_all_tx_gt(dev, txq); #ifdef CONFIG_XPS netif_reset_xps_queues_gt(dev, txq); #endif } + } else { + dev->real_num_tx_queues = txq; } - dev->real_num_tx_queues = txq; return 0; } EXPORT_SYMBOL(netif_set_real_num_tx_queues); -- cgit v1.1 From fae8b6f4a6be42372f8b7ffda39c3ca2cd951dc1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 13 Feb 2018 19:29:13 +0800 Subject: sctp: fix some copy-paste errors for file comments This patch is to fix the file comments in stream.c and stream_interleave.c v1->v2: rephrase the comment for stream.c according to Neil's suggestion. Fixes: a83863174a61 ("sctp: prepare asoc stream for stream reconf") Fixes: 0c3f6f655487 ("sctp: implement make_datafrag for sctp_stream_interleave") Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/stream.c | 2 +- net/sctp/stream_interleave.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index cedf672..f799043 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -6,7 +6,7 @@ * * This file is part of the SCTP kernel implementation * - * These functions manipulate sctp tsn mapping array. + * This file contains sctp stream maniuplation primitives and helpers. * * This SCTP implementation is free software; * you can redistribute it and/or modify it under the terms of diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c index 86c26ec..65ac03b 100644 --- a/net/sctp/stream_interleave.c +++ b/net/sctp/stream_interleave.c @@ -3,7 +3,8 @@ * * This file is part of the SCTP kernel implementation * - * These functions manipulate sctp stream queue/scheduling. + * These functions implement sctp stream message interleaving, mostly + * including I-DATA and I-FORWARD-TSN chunks process. * * This SCTP implementation is free software; * you can redistribute it and/or modify it under the terms of -- cgit v1.1 From e6dbe9397ea754e80f59d852a74fc289fa8b0f3a Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 13 Feb 2018 17:59:22 +0100 Subject: Revert "net: thunderx: Add support for xdp redirect" This reverts commit aa136d0c82fcd6af14535853c30e219e02b2692d. As I previously[1] pointed out this implementation of XDP_REDIRECT is wrong. XDP_REDIRECT is a facility that must work between different NIC drivers. Another NIC driver can call ndo_xdp_xmit/nicvf_xdp_xmit, but your driver patch assumes payload data (at top of page) will contain a queue index and a DMA addr, this is not true and worse will likely contain garbage. Given you have not fixed this in due time (just reached v4.16-rc1), the only option I see is a revert. [1] http://lkml.kernel.org/r/20171211130902.482513d3@redhat.com Cc: Sunil Goutham Cc: Christina Jacob Cc: Aleksey Makarov Fixes: aa136d0c82fc ("net: thunderx: Add support for xdp redirect") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 110 +++++---------------- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 11 +-- drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 4 - 3 files changed, 31 insertions(+), 94 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index b68cde9..7d9c5ff 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -67,11 +67,6 @@ module_param(cpi_alg, int, S_IRUGO); MODULE_PARM_DESC(cpi_alg, "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)"); -struct nicvf_xdp_tx { - u64 dma_addr; - u8 qidx; -}; - static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx) { if (nic->sqs_mode) @@ -507,29 +502,14 @@ static int nicvf_init_resources(struct nicvf *nic) return 0; } -static void nicvf_unmap_page(struct nicvf *nic, struct page *page, u64 dma_addr) -{ - /* Check if it's a recycled page, if not unmap the DMA mapping. - * Recycled page holds an extra reference. - */ - if (page_ref_count(page) == 1) { - dma_addr &= PAGE_MASK; - dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, - RCV_FRAG_LEN + XDP_HEADROOM, - DMA_FROM_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); - } -} - static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, struct cqe_rx_t *cqe_rx, struct snd_queue *sq, struct rcv_queue *rq, struct sk_buff **skb) { struct xdp_buff xdp; struct page *page; - struct nicvf_xdp_tx *xdp_tx = NULL; u32 action; - u16 len, err, offset = 0; + u16 len, offset = 0; u64 dma_addr, cpu_addr; void *orig_data; @@ -543,7 +523,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, cpu_addr = (u64)phys_to_virt(cpu_addr); page = virt_to_page((void *)cpu_addr); - xdp.data_hard_start = page_address(page) + RCV_BUF_HEADROOM; + xdp.data_hard_start = page_address(page); xdp.data = (void *)cpu_addr; xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + len; @@ -563,7 +543,18 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, switch (action) { case XDP_PASS: - nicvf_unmap_page(nic, page, dma_addr); + /* Check if it's a recycled page, if not + * unmap the DMA mapping. + * + * Recycled page holds an extra reference. + */ + if (page_ref_count(page) == 1) { + dma_addr &= PAGE_MASK; + dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, + RCV_FRAG_LEN + XDP_PACKET_HEADROOM, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + } /* Build SKB and pass on packet to network stack */ *skb = build_skb(xdp.data, @@ -576,20 +567,6 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, case XDP_TX: nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len); return true; - case XDP_REDIRECT: - /* Save DMA address for use while transmitting */ - xdp_tx = (struct nicvf_xdp_tx *)page_address(page); - xdp_tx->dma_addr = dma_addr; - xdp_tx->qidx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx); - - err = xdp_do_redirect(nic->pnicvf->netdev, &xdp, prog); - if (!err) - return true; - - /* Free the page on error */ - nicvf_unmap_page(nic, page, dma_addr); - put_page(page); - break; default: bpf_warn_invalid_xdp_action(action); /* fall through */ @@ -597,7 +574,18 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, trace_xdp_exception(nic->netdev, prog, action); /* fall through */ case XDP_DROP: - nicvf_unmap_page(nic, page, dma_addr); + /* Check if it's a recycled page, if not + * unmap the DMA mapping. + * + * Recycled page holds an extra reference. + */ + if (page_ref_count(page) == 1) { + dma_addr &= PAGE_MASK; + dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, + RCV_FRAG_LEN + XDP_PACKET_HEADROOM, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + } put_page(page); return true; } @@ -1864,50 +1852,6 @@ static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp) } } -static int nicvf_xdp_xmit(struct net_device *netdev, struct xdp_buff *xdp) -{ - struct nicvf *nic = netdev_priv(netdev); - struct nicvf *snic = nic; - struct nicvf_xdp_tx *xdp_tx; - struct snd_queue *sq; - struct page *page; - int err, qidx; - - if (!netif_running(netdev) || !nic->xdp_prog) - return -EINVAL; - - page = virt_to_page(xdp->data); - xdp_tx = (struct nicvf_xdp_tx *)page_address(page); - qidx = xdp_tx->qidx; - - if (xdp_tx->qidx >= nic->xdp_tx_queues) - return -EINVAL; - - /* Get secondary Qset's info */ - if (xdp_tx->qidx >= MAX_SND_QUEUES_PER_QS) { - qidx = xdp_tx->qidx / MAX_SND_QUEUES_PER_QS; - snic = (struct nicvf *)nic->snicvf[qidx - 1]; - if (!snic) - return -EINVAL; - qidx = xdp_tx->qidx % MAX_SND_QUEUES_PER_QS; - } - - sq = &snic->qs->sq[qidx]; - err = nicvf_xdp_sq_append_pkt(snic, sq, (u64)xdp->data, - xdp_tx->dma_addr, - xdp->data_end - xdp->data); - if (err) - return -ENOMEM; - - nicvf_xdp_sq_doorbell(snic, sq, qidx); - return 0; -} - -static void nicvf_xdp_flush(struct net_device *dev) -{ - return; -} - static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr) { struct hwtstamp_config config; @@ -1986,8 +1930,6 @@ static const struct net_device_ops nicvf_netdev_ops = { .ndo_fix_features = nicvf_fix_features, .ndo_set_features = nicvf_set_features, .ndo_bpf = nicvf_xdp, - .ndo_xdp_xmit = nicvf_xdp_xmit, - .ndo_xdp_flush = nicvf_xdp_flush, .ndo_do_ioctl = nicvf_ioctl, }; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 3eae9ff..d42704d 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -204,7 +204,7 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr, /* Reserve space for header modifications by BPF program */ if (rbdr->is_xdp) - buf_len += XDP_HEADROOM; + buf_len += XDP_PACKET_HEADROOM; /* Check if it's recycled */ if (pgcache) @@ -224,9 +224,8 @@ ret: nic->rb_page = NULL; return -ENOMEM; } - if (pgcache) - pgcache->dma_addr = *rbuf + XDP_HEADROOM; + pgcache->dma_addr = *rbuf + XDP_PACKET_HEADROOM; nic->rb_page_offset += buf_len; } @@ -1244,7 +1243,7 @@ int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq, int qentry; if (subdesc_cnt > sq->xdp_free_cnt) - return -1; + return 0; qentry = nicvf_get_sq_desc(sq, subdesc_cnt); @@ -1255,7 +1254,7 @@ int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq, sq->xdp_desc_cnt += subdesc_cnt; - return 0; + return 1; } /* Calculate no of SQ subdescriptors needed to transmit all @@ -1656,7 +1655,7 @@ static void nicvf_unmap_rcv_buffer(struct nicvf *nic, u64 dma_addr, if (page_ref_count(page) != 1) return; - len += XDP_HEADROOM; + len += XDP_PACKET_HEADROOM; /* Receive buffers in XDP mode are mapped from page start */ dma_addr &= PAGE_MASK; } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index ce1eed7..5e9a03c 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -11,7 +11,6 @@ #include #include -#include #include #include "q_struct.h" @@ -94,9 +93,6 @@ #define RCV_FRAG_LEN (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) -#define RCV_BUF_HEADROOM 128 /* To store dma address for XDP redirect */ -#define XDP_HEADROOM (XDP_PACKET_HEADROOM + RCV_BUF_HEADROOM) - #define MAX_CQES_FOR_TX ((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \ MAX_CQE_PER_PKT_XMIT) -- cgit v1.1 From cc85c02edfe48a34865ae00f7d22298a3fdd17aa Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 13 Feb 2018 15:32:50 -0600 Subject: ibmvnic: Wait until reset is complete to set carrier on Pushes back setting the carrier on until the end of the reset code. This resolves a bug where a watchdog timer was detecting that a TX queue had stalled before the adapter reset was complete. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2744726..1a2d8d6 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1670,8 +1670,6 @@ static int do_reset(struct ibmvnic_adapter *adapter, return 0; } - netif_carrier_on(netdev); - /* kick napi */ for (i = 0; i < adapter->req_rx_queues; i++) napi_schedule(&adapter->napi[i]); @@ -1679,6 +1677,8 @@ static int do_reset(struct ibmvnic_adapter *adapter, if (adapter->reset_reason != VNIC_RESET_FAILOVER) netdev_notify_peers(netdev); + netif_carrier_on(netdev); + return 0; } -- cgit v1.1 From 34f0f4e3f48810b0ba080bf2a65370b0cc179c51 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 13 Feb 2018 18:23:40 -0600 Subject: ibmvnic: Fix login buffer memory leaks During device bringup, the driver exchanges login buffers with firmware. These buffers contain information such number of TX and RX queues alloted to the device, RX buffer size, etc. These buffers weren't being properly freed on device reset or close. We can free the buffer we send to firmware as soon as we get a response. There is information in the response buffer that the driver needs for normal operation so retain it until the next reset or removal. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 1a2d8d6..8625f5e 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -791,6 +791,18 @@ static int ibmvnic_login(struct net_device *netdev) return 0; } +static void release_login_buffer(struct ibmvnic_adapter *adapter) +{ + kfree(adapter->login_buf); + adapter->login_buf = NULL; +} + +static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter) +{ + kfree(adapter->login_rsp_buf); + adapter->login_rsp_buf = NULL; +} + static void release_resources(struct ibmvnic_adapter *adapter) { int i; @@ -813,6 +825,8 @@ static void release_resources(struct ibmvnic_adapter *adapter) } } } + + release_login_rsp_buffer(adapter); } static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state) @@ -3013,6 +3027,7 @@ static void send_login(struct ibmvnic_adapter *adapter) struct vnic_login_client_data *vlcd; int i; + release_login_rsp_buffer(adapter); client_data_len = vnic_client_data_len(adapter); buffer_size = @@ -3708,6 +3723,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz, DMA_BIDIRECTIONAL); + release_login_buffer(adapter); dma_unmap_single(dev, adapter->login_rsp_buf_token, adapter->login_rsp_buf_sz, DMA_BIDIRECTIONAL); -- cgit v1.1 From 6e4842ddfc2b08931ebd6c0bc95322dd56e5232b Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 13 Feb 2018 18:23:41 -0600 Subject: ibmvnic: Fix NAPI structures memory leak This memory is allocated during initialization but never freed, so do that now. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 8625f5e..23e0b42 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -825,6 +825,8 @@ static void release_resources(struct ibmvnic_adapter *adapter) } } } + kfree(adapter->napi); + adapter->napi = NULL; release_login_rsp_buffer(adapter); } -- cgit v1.1 From 4b9b0f01350500173f17e2b2e65beb4df4ef99c7 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 13 Feb 2018 18:23:42 -0600 Subject: ibmvnic: Free RX socket buffer in case of adapter error If a RX buffer is returned to the client driver with an error, free the corresponding socket buffer before continuing. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 23e0b42..bc93fa2 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1869,6 +1869,7 @@ restart_poll: be16_to_cpu(next->rx_comp.rc)); /* free the entry */ next->rx_comp.first = 0; + dev_kfree_skb_any(rx_buff->skb); remove_buff_from_pool(adapter, rx_buff); continue; } -- cgit v1.1 From d0869c0071e40c4407d1a4d7c9497653cf47253b Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 13 Feb 2018 18:23:43 -0600 Subject: ibmvnic: Clean RX pool buffers during device close During device close or reset, there were some cases of outstanding RX socket buffers not being freed. Include a function similar to the one that already exists to clean TX socket buffers in this case. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index bc93fa2..996f475 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1073,6 +1073,35 @@ static int ibmvnic_open(struct net_device *netdev) return rc; } +static void clean_rx_pools(struct ibmvnic_adapter *adapter) +{ + struct ibmvnic_rx_pool *rx_pool; + u64 rx_entries; + int rx_scrqs; + int i, j; + + if (!adapter->rx_pool) + return; + + rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); + rx_entries = adapter->req_rx_add_entries_per_subcrq; + + /* Free any remaining skbs in the rx buffer pools */ + for (i = 0; i < rx_scrqs; i++) { + rx_pool = &adapter->rx_pool[i]; + if (!rx_pool) + continue; + + netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i); + for (j = 0; j < rx_entries; j++) { + if (rx_pool->rx_buff[j].skb) { + dev_kfree_skb_any(rx_pool->rx_buff[j].skb); + rx_pool->rx_buff[j].skb = NULL; + } + } + } +} + static void clean_tx_pools(struct ibmvnic_adapter *adapter) { struct ibmvnic_tx_pool *tx_pool; @@ -1150,7 +1179,7 @@ static int __ibmvnic_close(struct net_device *netdev) } } } - + clean_rx_pools(adapter); clean_tx_pools(adapter); adapter->state = VNIC_CLOSED; return rc; -- cgit v1.1 From 01ea306f2ac2baff98d472da719193e738759d93 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 8 Feb 2018 12:19:00 +0100 Subject: netfilter: drop outermost socket lock in getsockopt() The Syzbot reported a possible deadlock in the netfilter area caused by rtnl lock, xt lock and socket lock being acquired with a different order on different code paths, leading to the following backtrace: Reviewed-by: Xin Long ====================================================== WARNING: possible circular locking dependency detected 4.15.0+ #301 Not tainted ------------------------------------------------------ syzkaller233489/4179 is trying to acquire lock: (rtnl_mutex){+.+.}, at: [<0000000048e996fd>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 but task is already holding lock: (&xt[i].mutex){+.+.}, at: [<00000000328553a2>] xt_find_table_lock+0x3e/0x3e0 net/netfilter/x_tables.c:1041 which lock already depends on the new lock. === Since commit 3f34cfae1230 ("netfilter: on sockopt() acquire sock lock only in the required scope"), we already acquire the socket lock in the innermost scope, where needed. In such commit I forgot to remove the outer-most socket lock from the getsockopt() path, this commit addresses the issues dropping it now. v1 -> v2: fix bad subj, added relavant 'fixes' tag Fixes: 22265a5c3c10 ("netfilter: xt_TEE: resolve oif using netdevice notifiers") Fixes: 202f59afd441 ("netfilter: ipt_CLUSTERIP: do not hold dev") Fixes: 3f34cfae1230 ("netfilter: on sockopt() acquire sock lock only in the required scope") Reported-by: syzbot+ddde1c7b7ff7442d7f2d@syzkaller.appspotmail.com Suggested-by: Florian Westphal Signed-off-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso --- net/ipv4/ip_sockglue.c | 7 +------ net/ipv6/ipv6_sockglue.c | 10 ++-------- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 008be04..9c41a0c 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1567,10 +1567,7 @@ int ip_getsockopt(struct sock *sk, int level, if (get_user(len, optlen)) return -EFAULT; - lock_sock(sk); - err = nf_getsockopt(sk, PF_INET, optname, optval, - &len); - release_sock(sk); + err = nf_getsockopt(sk, PF_INET, optname, optval, &len); if (err >= 0) err = put_user(len, optlen); return err; @@ -1602,9 +1599,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - lock_sock(sk); err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len); - release_sock(sk); if (err >= 0) err = put_user(len, optlen); return err; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d78d41f..2453516 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1367,10 +1367,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - lock_sock(sk); - err = nf_getsockopt(sk, PF_INET6, optname, optval, - &len); - release_sock(sk); + err = nf_getsockopt(sk, PF_INET6, optname, optval, &len); if (err >= 0) err = put_user(len, optlen); } @@ -1409,10 +1406,7 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - lock_sock(sk); - err = compat_nf_getsockopt(sk, PF_INET6, - optname, optval, &len); - release_sock(sk); + err = compat_nf_getsockopt(sk, PF_INET6, optname, optval, &len); if (err >= 0) err = put_user(len, optlen); } -- cgit v1.1 From e5d1a1eec0f4b51d0a7a6457d0b1b99b34f3e901 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 14 Feb 2018 13:37:58 +0800 Subject: tipc: Refactor __tipc_nl_compat_doit As preparation for adding RTNL to make (*cmd->transcode)() and (*cmd->transcode)() constantly protected by RTNL lock, we move out of memory allocations existing between them as many as possible so that the time of holding RTNL can be minimized in __tipc_nl_compat_doit(). Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index e48f0b2..9741690 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -285,10 +285,6 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, if (!trans_buf) return -ENOMEM; - err = (*cmd->transcode)(cmd, trans_buf, msg); - if (err) - goto trans_out; - attrbuf = kmalloc((tipc_genl_family.maxattr + 1) * sizeof(struct nlattr *), GFP_KERNEL); if (!attrbuf) { @@ -296,27 +292,32 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, goto trans_out; } - err = nla_parse(attrbuf, tipc_genl_family.maxattr, - (const struct nlattr *)trans_buf->data, - trans_buf->len, NULL, NULL); - if (err) - goto parse_out; - doit_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!doit_buf) { err = -ENOMEM; - goto parse_out; + goto attrbuf_out; } - doit_buf->sk = msg->dst_sk; - memset(&info, 0, sizeof(info)); info.attrs = attrbuf; + err = (*cmd->transcode)(cmd, trans_buf, msg); + if (err) + goto doit_out; + + err = nla_parse(attrbuf, tipc_genl_family.maxattr, + (const struct nlattr *)trans_buf->data, + trans_buf->len, NULL, NULL); + if (err) + goto doit_out; + + doit_buf->sk = msg->dst_sk; + err = (*cmd->doit)(doit_buf, &info); +doit_out: kfree_skb(doit_buf); -parse_out: +attrbuf_out: kfree(attrbuf); trans_out: kfree_skb(trans_buf); -- cgit v1.1 From d59d8b77abf4308e9c6809298341e275eac38404 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 14 Feb 2018 13:37:59 +0800 Subject: tipc: Introduce __tipc_nl_bearer_disable Introduce __tipc_nl_bearer_disable() which doesn't hold RTNL lock. Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bearer.c | 19 +++++++++++++------ net/tipc/bearer.h | 1 + 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index c800147..61b6625 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -813,7 +813,7 @@ err_out: return err; } -int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) +int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) { int err; char *name; @@ -835,19 +835,26 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); - rtnl_lock(); bearer = tipc_bearer_find(net, name); - if (!bearer) { - rtnl_unlock(); + if (!bearer) return -EINVAL; - } bearer_disable(net, bearer); - rtnl_unlock(); return 0; } +int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_bearer_disable(skb, info); + rtnl_unlock(); + + return err; +} + int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) { int err; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 42d6eee..bcc6d5f 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -188,6 +188,7 @@ extern struct tipc_media udp_media_info; #endif int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info); -- cgit v1.1 From 45cf7edfbc07b2208d7b4a79d4a36aeddf16aefd Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 14 Feb 2018 13:38:00 +0800 Subject: tipc: Introduce __tipc_nl_bearer_enable Introduce __tipc_nl_bearer_enable() which doesn't hold RTNL lock. Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bearer.c | 17 ++++++++++------- net/tipc/bearer.h | 1 + 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 61b6625..faf8fa0 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -855,7 +855,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) return err; } -int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) +int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) { int err; char *bearer; @@ -897,15 +897,18 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); } + return tipc_enable_bearer(net, bearer, domain, prio, attrs); +} + +int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) +{ + int err; + rtnl_lock(); - err = tipc_enable_bearer(net, bearer, domain, prio, attrs); - if (err) { - rtnl_unlock(); - return err; - } + err = __tipc_nl_bearer_enable(skb, info); rtnl_unlock(); - return 0; + return err; } int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index bcc6d5f..fc81150 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -190,6 +190,7 @@ extern struct tipc_media udp_media_info; int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); -- cgit v1.1 From 93532bb1d436984dac60c92d1a93eecda4fecb29 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 14 Feb 2018 13:38:01 +0800 Subject: tipc: Introduce __tipc_nl_bearer_set Introduce __tipc_nl_bearer_set() which doesn't holding RTNL lock. Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bearer.c | 23 ++++++++++++++--------- net/tipc/bearer.h | 1 + 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index faf8fa0..f92c9c5 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -954,7 +954,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) return 0; } -int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) +int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) { int err; char *name; @@ -975,22 +975,17 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) return -EINVAL; name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); - rtnl_lock(); b = tipc_bearer_find(net, name); - if (!b) { - rtnl_unlock(); + if (!b) return -EINVAL; - } if (attrs[TIPC_NLA_BEARER_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], props); - if (err) { - rtnl_unlock(); + if (err) return err; - } if (props[TIPC_NLA_PROP_TOL]) b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); @@ -999,11 +994,21 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_WIN]) b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); } - rtnl_unlock(); return 0; } +int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_bearer_set(skb, info); + rtnl_unlock(); + + return err; +} + static int __tipc_nl_add_media(struct tipc_nl_msg *msg, struct tipc_media *media, int nlflags) { diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index fc81150..cc0f529 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -194,6 +194,7 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info); int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb); -- cgit v1.1 From 07ffb22357323c7189921935b24d68018e1a2b68 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 14 Feb 2018 13:38:02 +0800 Subject: tipc: Introduce __tipc_nl_media_set Introduce __tipc_nl_media_set() which doesn't hold RTNL lock. Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bearer.c | 23 ++++++++++++++--------- net/tipc/bearer.h | 1 + 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index f92c9c5..3e3dce3 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1130,7 +1130,7 @@ err_out: return err; } -int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) +int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) { int err; char *name; @@ -1148,22 +1148,17 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) return -EINVAL; name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); - rtnl_lock(); m = tipc_media_find(name); - if (!m) { - rtnl_unlock(); + if (!m) return -EINVAL; - } if (attrs[TIPC_NLA_MEDIA_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_MEDIA_PROP], props); - if (err) { - rtnl_unlock(); + if (err) return err; - } if (props[TIPC_NLA_PROP_TOL]) m->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); @@ -1172,7 +1167,17 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_WIN]) m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); } - rtnl_unlock(); return 0; } + +int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_media_set(skb, info); + rtnl_unlock(); + + return err; +} diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index cc0f529..a53613d 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -200,6 +200,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info); int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info); int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); -- cgit v1.1 From 5631f65decf390ae480d157838c0c393a991328e Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 14 Feb 2018 13:38:03 +0800 Subject: tipc: Introduce __tipc_nl_net_set Introduce __tipc_nl_net_set() which doesn't hold RTNL lock. Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/net.c | 15 ++++++++++++--- net/tipc/net.h | 1 + 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/net/tipc/net.c b/net/tipc/net.c index 719c592..1a2fde0 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -200,7 +200,7 @@ out: return skb->len; } -int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) +int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); @@ -241,10 +241,19 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) if (!tipc_addr_node_valid(addr)) return -EINVAL; - rtnl_lock(); tipc_net_start(net, addr); - rtnl_unlock(); } return 0; } + +int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_net_set(skb, info); + rtnl_unlock(); + + return err; +} diff --git a/net/tipc/net.h b/net/tipc/net.h index c7c2549..c0306aa 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -47,5 +47,6 @@ void tipc_net_stop(struct net *net); int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); #endif -- cgit v1.1 From ed4ffdfec26dfe1bb02435afd1e01f61426f7212 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 14 Feb 2018 13:38:04 +0800 Subject: tipc: Fix missing RTNL lock protection during setting link properties Currently when user changes link properties, TIPC first checks if user's command message contains media name or bearer name through tipc_media_find() or tipc_bearer_find() which is protected by RTNL lock. But when tipc_nl_compat_link_set() conducts the checking with the two functions, it doesn't hold RTNL lock at all, as a result, the following complaints were reported: audit: type=1400 audit(1514679888.244:9): avc: denied { write } for pid=3194 comm="syzkaller021477" path="socket:[11143]" dev="sockfs" ino=11143 scontext=unconfined_u:system_r:insmod_t:s0-s0:c0.c1023 tcontext=unconfined_u:system_r:insmod_t:s0-s0:c0.c1023 tclass=netlink_generic_socket permissive=1 Reviewed-by: Kirill Tkhai ============================= WARNING: suspicious RCU usage 4.15.0-rc5+ #152 Not tainted ----------------------------- net/tipc/bearer.c:177 suspicious rcu_dereference_protected() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by syzkaller021477/3194: #0: (cb_lock){++++}, at: [<00000000d20133ea>] genl_rcv+0x19/0x40 net/netlink/genetlink.c:634 #1: (genl_mutex){+.+.}, at: [<00000000fcc5d1bc>] genl_lock net/netlink/genetlink.c:33 [inline] #1: (genl_mutex){+.+.}, at: [<00000000fcc5d1bc>] genl_rcv_msg+0x115/0x140 net/netlink/genetlink.c:622 stack backtrace: CPU: 1 PID: 3194 Comm: syzkaller021477 Not tainted 4.15.0-rc5+ #152 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 lockdep_rcu_suspicious+0x123/0x170 kernel/locking/lockdep.c:4585 tipc_bearer_find+0x2b4/0x3b0 net/tipc/bearer.c:177 tipc_nl_compat_link_set+0x329/0x9f0 net/tipc/netlink_compat.c:729 __tipc_nl_compat_doit net/tipc/netlink_compat.c:288 [inline] tipc_nl_compat_doit+0x15b/0x660 net/tipc/netlink_compat.c:335 tipc_nl_compat_handle net/tipc/netlink_compat.c:1119 [inline] tipc_nl_compat_recv+0x112f/0x18f0 net/tipc/netlink_compat.c:1201 genl_family_rcv_msg+0x7b7/0xfb0 net/netlink/genetlink.c:599 genl_rcv_msg+0xb2/0x140 net/netlink/genetlink.c:624 netlink_rcv_skb+0x21e/0x460 net/netlink/af_netlink.c:2408 genl_rcv+0x28/0x40 net/netlink/genetlink.c:635 netlink_unicast_kernel net/netlink/af_netlink.c:1275 [inline] netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1301 netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1864 sock_sendmsg_nosec net/socket.c:636 [inline] sock_sendmsg+0xca/0x110 net/socket.c:646 sock_write_iter+0x31a/0x5d0 net/socket.c:915 call_write_iter include/linux/fs.h:1772 [inline] new_sync_write fs/read_write.c:469 [inline] __vfs_write+0x684/0x970 fs/read_write.c:482 vfs_write+0x189/0x510 fs/read_write.c:544 SYSC_write fs/read_write.c:589 [inline] SyS_write+0xef/0x220 fs/read_write.c:581 do_syscall_32_irqs_on arch/x86/entry/common.c:327 [inline] do_fast_syscall_32+0x3ee/0xf9d arch/x86/entry/common.c:389 entry_SYSENTER_compat+0x54/0x63 arch/x86/entry/entry_64_compat.S:129 In order to correct the mistake, __tipc_nl_compat_doit() has been protected by RTNL lock, which means the whole operation of setting bearer/media properties is under RTNL protection. Signed-off-by: Ying Xue Reported-by: syzbot Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 9741690..4492cda 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -301,6 +301,7 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, memset(&info, 0, sizeof(info)); info.attrs = attrbuf; + rtnl_lock(); err = (*cmd->transcode)(cmd, trans_buf, msg); if (err) goto doit_out; @@ -315,6 +316,7 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, err = (*cmd->doit)(doit_buf, &info); doit_out: + rtnl_unlock(); kfree_skb(doit_buf); attrbuf_out: @@ -723,13 +725,13 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, media = tipc_media_find(lc->name); if (media) { - cmd->doit = &tipc_nl_media_set; + cmd->doit = &__tipc_nl_media_set; return tipc_nl_compat_media_set(skb, msg); } bearer = tipc_bearer_find(msg->net, lc->name); if (bearer) { - cmd->doit = &tipc_nl_bearer_set; + cmd->doit = &__tipc_nl_bearer_set; return tipc_nl_compat_bearer_set(skb, msg); } @@ -1090,12 +1092,12 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg) return tipc_nl_compat_dumpit(&dump, msg); case TIPC_CMD_ENABLE_BEARER: msg->req_type = TIPC_TLV_BEARER_CONFIG; - doit.doit = tipc_nl_bearer_enable; + doit.doit = __tipc_nl_bearer_enable; doit.transcode = tipc_nl_compat_bearer_enable; return tipc_nl_compat_doit(&doit, msg); case TIPC_CMD_DISABLE_BEARER: msg->req_type = TIPC_TLV_BEARER_NAME; - doit.doit = tipc_nl_bearer_disable; + doit.doit = __tipc_nl_bearer_disable; doit.transcode = tipc_nl_compat_bearer_disable; return tipc_nl_compat_doit(&doit, msg); case TIPC_CMD_SHOW_LINK_STATS: @@ -1149,12 +1151,12 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg) return tipc_nl_compat_dumpit(&dump, msg); case TIPC_CMD_SET_NODE_ADDR: msg->req_type = TIPC_TLV_NET_ADDR; - doit.doit = tipc_nl_net_set; + doit.doit = __tipc_nl_net_set; doit.transcode = tipc_nl_compat_net_set; return tipc_nl_compat_doit(&doit, msg); case TIPC_CMD_SET_NETID: msg->req_type = TIPC_TLV_UNSIGNED; - doit.doit = tipc_nl_net_set; + doit.doit = __tipc_nl_net_set; doit.transcode = tipc_nl_compat_net_set; return tipc_nl_compat_doit(&doit, msg); case TIPC_CMD_GET_NETID: -- cgit v1.1 From 57ebd808a97d7c5b1e1afb937c2db22beba3c1f8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Feb 2018 13:46:25 +0100 Subject: netfilter: add back stackpointer size checks The rationale for removing the check is only correct for rulesets generated by ip(6)tables. In iptables, a jump can only occur to a user-defined chain, i.e. because we size the stack based on number of user-defined chains we cannot exceed stack size. However, the underlying binary format has no such restriction, and the validation step only ensures that the jump target is a valid rule start point. IOW, its possible to build a rule blob that has no user-defined chains but does contain a jump. If this happens, no jump stack gets allocated and crash occurs because no jumpstack was allocated. Fixes: 7814b6ec6d0d6 ("netfilter: xtables: don't save/restore jumpstack offset") Reported-by: syzbot+e783f671527912cd9403@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 4 ++++ net/ipv4/netfilter/ip_tables.c | 7 ++++++- net/ipv6/netfilter/ip6_tables.c | 4 ++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 4ffe302..e3e420f 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -252,6 +252,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, } if (table_base + v != arpt_next_entry(e)) { + if (unlikely(stackidx >= private->stacksize)) { + verdict = NF_DROP; + break; + } jumpstack[stackidx++] = e; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 9a71f31..e38395a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -330,8 +330,13 @@ ipt_do_table(struct sk_buff *skb, continue; } if (table_base + v != ipt_next_entry(e) && - !(e->ip.flags & IPT_F_GOTO)) + !(e->ip.flags & IPT_F_GOTO)) { + if (unlikely(stackidx >= private->stacksize)) { + verdict = NF_DROP; + break; + } jumpstack[stackidx++] = e; + } e = get_entry(table_base, v); continue; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index af4c917..62358b9 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -352,6 +352,10 @@ ip6t_do_table(struct sk_buff *skb, } if (table_base + v != ip6t_next_entry(e) && !(e->ipv6.flags & IP6T_F_GOTO)) { + if (unlikely(stackidx >= private->stacksize)) { + verdict = NF_DROP; + break; + } jumpstack[stackidx++] = e; } -- cgit v1.1 From a65820e6956782af6c5330749ae37222350d8d3f Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 14 Feb 2018 18:05:31 +1100 Subject: docs: segmentation-offloads.txt: update for UFO depreciation UFO is deprecated except for tuntap and packet per 0c19f846d582, ("net: accept UFO datagrams from tuntap and packet"). Update UFO docs to reflect this. Signed-off-by: Daniel Axtens Signed-off-by: David S. Miller --- Documentation/networking/segmentation-offloads.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt index 2f09455..2cda12a 100644 --- a/Documentation/networking/segmentation-offloads.txt +++ b/Documentation/networking/segmentation-offloads.txt @@ -49,6 +49,10 @@ datagram into multiple IPv4 fragments. Many of the requirements for UDP fragmentation offload are the same as TSO. However the IPv4 ID for fragments should not increment as a single IPv4 datagram is fragmented. +UFO is deprecated: modern kernels will no longer generate UFO skbs, but can +still receive them from tuntap and similar devices. Offload of UDP-based +tunnel protocols is still supported. + IPIP, SIT, GRE, UDP Tunnel, and Remote Checksum Offloads ======================================================== -- cgit v1.1 From bc3c2431d4173816240679a02fd4d74685e94bc8 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 14 Feb 2018 18:05:32 +1100 Subject: docs: segmentation-offloads.txt: Fix ref to SKB_GSO_TUNNEL_REMCSUM The doc originally called it SKB_GSO_REMCSUM. Fix it. Fixes: f7a6272bf3cb ("Documentation: Add documentation for TSO and GSO features") Signed-off-by: Daniel Axtens Signed-off-by: David S. Miller --- Documentation/networking/segmentation-offloads.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt index 2cda12a..b247471 100644 --- a/Documentation/networking/segmentation-offloads.txt +++ b/Documentation/networking/segmentation-offloads.txt @@ -87,10 +87,10 @@ SKB_GSO_UDP_TUNNEL_CSUM. These two additional tunnel types reflect the fact that the outer header also requests to have a non-zero checksum included in the outer header. -Finally there is SKB_GSO_REMCSUM which indicates that a given tunnel header -has requested a remote checksum offload. In this case the inner headers -will be left with a partial checksum and only the outer header checksum -will be computed. +Finally there is SKB_GSO_TUNNEL_REMCSUM which indicates that a given tunnel +header has requested a remote checksum offload. In this case the inner +headers will be left with a partial checksum and only the outer header +checksum will be computed. Generic Segmentation Offload ============================ -- cgit v1.1 From a677088922831d94d292ca3891b148a8ba0b5fa1 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 14 Feb 2018 18:05:33 +1100 Subject: docs: segmentation-offloads.txt: add SCTP info Most of this is extracted from 90017accff61 ("sctp: Add GSO support"), with some extra text about GSO_BY_FRAGS and the need to check for it. Cc: Marcelo Ricardo Leitner Signed-off-by: Daniel Axtens Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- Documentation/networking/segmentation-offloads.txt | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt index b247471..d47480b 100644 --- a/Documentation/networking/segmentation-offloads.txt +++ b/Documentation/networking/segmentation-offloads.txt @@ -13,6 +13,7 @@ The following technologies are described: * Generic Segmentation Offload - GSO * Generic Receive Offload - GRO * Partial Generic Segmentation Offload - GSO_PARTIAL + * SCTP accelleration with GSO - GSO_BY_FRAGS TCP Segmentation Offload ======================== @@ -132,3 +133,28 @@ values for if the header was simply duplicated. The one exception to this is the outer IPv4 ID field. It is up to the device drivers to guarantee that the IPv4 ID field is incremented in the case that a given header does not have the DF bit set. + +SCTP accelleration with GSO +=========================== + +SCTP - despite the lack of hardware support - can still take advantage of +GSO to pass one large packet through the network stack, rather than +multiple small packets. + +This requires a different approach to other offloads, as SCTP packets +cannot be just segmented to (P)MTU. Rather, the chunks must be contained in +IP segments, padding respected. So unlike regular GSO, SCTP can't just +generate a big skb, set gso_size to the fragmentation point and deliver it +to IP layer. + +Instead, the SCTP protocol layer builds an skb with the segments correctly +padded and stored as chained skbs, and skb_segment() splits based on those. +To signal this, gso_size is set to the special value GSO_BY_FRAGS. + +Therefore, any code in the core networking stack must be aware of the +possibility that gso_size will be GSO_BY_FRAGS and handle that case +appropriately. (For size checks, the skb_gso_validate_*_len family of +helpers do this automatically.) + +This also affects drivers with the NETIF_F_FRAGLIST & NETIF_F_GSO_SCTP bits +set. Note also that NETIF_F_GSO_SCTP is included in NETIF_F_GSO_SOFTWARE. -- cgit v1.1 From a1dfa6812b682eef750412dd5a90e7d38d7af068 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Wed, 14 Feb 2018 10:46:06 +0200 Subject: tls: retrun the correct IV in getsockopt Current code returns four bytes of salt followed by four bytes of IV. This patch returns all eight bytes of IV. fixes: 3c4d7559159b ("tls: kernel TLS support") Signed-off-by: Boris Pismenny Signed-off-by: David S. Miller --- net/tls/tls_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index b0d5fce..a6c3702 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -308,7 +308,8 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval, goto out; } lock_sock(sk); - memcpy(crypto_info_aes_gcm_128->iv, ctx->iv, + memcpy(crypto_info_aes_gcm_128->iv, + ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, TLS_CIPHER_AES_GCM_128_IV_SIZE); release_sock(sk); if (copy_to_user(optval, -- cgit v1.1 From 257082e6ae23e92898440f6bcb2857555bf7957c Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Wed, 14 Feb 2018 10:46:07 +0200 Subject: tls: reset the crypto info if copy_from_user fails copy_from_user could copy some partial information, as a result TLS_CRYPTO_INFO_READY(crypto_info) could be true while crypto_info is using uninitialzed data. This patch resets crypto_info when copy_from_user fails. fixes: 3c4d7559159b ("tls: kernel TLS support") Signed-off-by: Boris Pismenny Signed-off-by: David S. Miller --- net/tls/tls_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index a6c3702..c105f86 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -376,7 +376,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info)); if (rc) { rc = -EFAULT; - goto out; + goto err_crypto_info; } /* check version */ -- cgit v1.1 From c410c1966fe6fcfb23bcac0924aaa6a6e7449829 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Wed, 14 Feb 2018 10:46:08 +0200 Subject: tls: getsockopt return record sequence number Return the TLS record sequence number in getsockopt. Signed-off-by: Boris Pismenny Signed-off-by: David S. Miller --- net/tls/tls_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index c105f86..e9b4b53 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -311,6 +311,8 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval, memcpy(crypto_info_aes_gcm_128->iv, ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->rec_seq, + TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); release_sock(sk); if (copy_to_user(optval, crypto_info_aes_gcm_128, -- cgit v1.1 From db93a3632b0f8773a3899e04a3a3e0aa7a26eb46 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 8 Feb 2018 13:53:52 -0800 Subject: netfilter: ipt_CLUSTERIP: fix a refcount bug in clusterip_config_find_get() In clusterip_config_find_get() we hold RCU read lock so it could run concurrently with clusterip_config_entry_put(), as a result, the refcnt could go back to 1 from 0, which leads to a double list_del()... Just replace refcount_inc() with refcount_inc_not_zero(), as for c->refcount. Fixes: d73f33b16883 ("netfilter: CLUSTERIP: RCU conversion") Cc: Eric Dumazet Cc: Pablo Neira Ayuso Cc: Florian Westphal Signed-off-by: Cong Wang Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1ff72b8..4b02ab3 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -154,8 +154,12 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry) #endif if (unlikely(!refcount_inc_not_zero(&c->refcount))) c = NULL; - else if (entry) - refcount_inc(&c->entries); + else if (entry) { + if (unlikely(!refcount_inc_not_zero(&c->entries))) { + clusterip_config_put(c); + c = NULL; + } + } } rcu_read_unlock_bh(); -- cgit v1.1 From 0cc9501f94592125b2012452c57054b8215bcf33 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:51:59 +0100 Subject: netfilter: x_tables: remove pr_info where possible remove several pr_info messages that cannot be triggered with iptables, the check is only to ensure input is sane. iptables(8) already prints error messages in these cases. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_ECN.c | 10 ++++------ net/netfilter/xt_CHECKSUM.c | 5 ++--- net/netfilter/xt_DSCP.c | 4 +--- net/netfilter/xt_HL.c | 13 +++---------- net/netfilter/xt_HMARK.c | 10 ++++------ net/netfilter/xt_LED.c | 4 +--- net/netfilter/xt_dscp.c | 4 +--- 7 files changed, 16 insertions(+), 34 deletions(-) diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 2707652..39ff167 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -98,14 +98,12 @@ static int ecn_tg_check(const struct xt_tgchk_param *par) const struct ipt_ECN_info *einfo = par->targinfo; const struct ipt_entry *e = par->entryinfo; - if (einfo->operation & IPT_ECN_OP_MASK) { - pr_info("unsupported ECN operation %x\n", einfo->operation); + if (einfo->operation & IPT_ECN_OP_MASK) return -EINVAL; - } - if (einfo->ip_ect & ~IPT_ECN_IP_MASK) { - pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect); + + if (einfo->ip_ect & ~IPT_ECN_IP_MASK) return -EINVAL; - } + if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { pr_info("cannot use TCP operations on a non-tcp rule\n"); diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c index 0f642ef..ea3c570 100644 --- a/net/netfilter/xt_CHECKSUM.c +++ b/net/netfilter/xt_CHECKSUM.c @@ -39,10 +39,9 @@ static int checksum_tg_check(const struct xt_tgchk_param *par) pr_info("unsupported CHECKSUM operation %x\n", einfo->operation); return -EINVAL; } - if (!einfo->operation) { - pr_info("no CHECKSUM operation enabled\n"); + if (!einfo->operation) return -EINVAL; - } + return 0; } diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 3f83d38..098ed85 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -66,10 +66,8 @@ static int dscp_tg_check(const struct xt_tgchk_param *par) { const struct xt_DSCP_info *info = par->targinfo; - if (info->dscp > XT_DSCP_MAX) { - pr_info("dscp %x out of range\n", info->dscp); + if (info->dscp > XT_DSCP_MAX) return -EDOM; - } return 0; } diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c index 1535e87..4653b07 100644 --- a/net/netfilter/xt_HL.c +++ b/net/netfilter/xt_HL.c @@ -105,10 +105,8 @@ static int ttl_tg_check(const struct xt_tgchk_param *par) { const struct ipt_TTL_info *info = par->targinfo; - if (info->mode > IPT_TTL_MAXMODE) { - pr_info("TTL: invalid or unknown mode %u\n", info->mode); + if (info->mode > IPT_TTL_MAXMODE) return -EINVAL; - } if (info->mode != IPT_TTL_SET && info->ttl == 0) return -EINVAL; return 0; @@ -118,15 +116,10 @@ static int hl_tg6_check(const struct xt_tgchk_param *par) { const struct ip6t_HL_info *info = par->targinfo; - if (info->mode > IP6T_HL_MAXMODE) { - pr_info("invalid or unknown mode %u\n", info->mode); + if (info->mode > IP6T_HL_MAXMODE) return -EINVAL; - } - if (info->mode != IP6T_HL_SET && info->hop_limit == 0) { - pr_info("increment/decrement does not " - "make sense with value 0\n"); + if (info->mode != IP6T_HL_SET && info->hop_limit == 0) return -EINVAL; - } return 0; } diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c index 60e6dbe..dd08cc1 100644 --- a/net/netfilter/xt_HMARK.c +++ b/net/netfilter/xt_HMARK.c @@ -313,10 +313,9 @@ static int hmark_tg_check(const struct xt_tgchk_param *par) { const struct xt_hmark_info *info = par->targinfo; - if (!info->hmodulus) { - pr_info("xt_HMARK: hash modulus can't be zero\n"); + if (!info->hmodulus) return -EINVAL; - } + if (info->proto_mask && (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) { pr_info("xt_HMARK: proto mask must be zero with L3 mode\n"); @@ -324,10 +323,9 @@ static int hmark_tg_check(const struct xt_tgchk_param *par) } if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) && (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) | - XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) { - pr_info("xt_HMARK: spi-mask and port-mask can't be combined\n"); + XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) return -EINVAL; - } + if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) && (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) | XT_HMARK_FLAG(XT_HMARK_DPORT)))) { diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index 1dcad89..ece311c 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -111,10 +111,8 @@ static int led_tg_check(const struct xt_tgchk_param *par) struct xt_led_info_internal *ledinternal; int err; - if (ledinfo->id[0] == '\0') { - pr_info("No 'id' parameter given.\n"); + if (ledinfo->id[0] == '\0') return -EINVAL; - } mutex_lock(&xt_led_mutex); diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index 236ac80..a4c2b86 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -46,10 +46,8 @@ static int dscp_mt_check(const struct xt_mtchk_param *par) { const struct xt_dscp_info *info = par->matchinfo; - if (info->dscp > XT_DSCP_MAX) { - pr_info("dscp %x out of range\n", info->dscp); + if (info->dscp > XT_DSCP_MAX) return -EDOM; - } return 0; } -- cgit v1.1 From 1b6cd67191e16a66f69c9881d878204c3143f03f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:00 +0100 Subject: netfilter: x_tables: use pr ratelimiting in xt core most messages are converted to info, since they occur in response to wrong usage. Size mismatch however is a real error (xtables ABI bug) that should not occur. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 70 +++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 97e06a0..fa1655a 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -434,36 +434,35 @@ int xt_check_match(struct xt_mtchk_param *par, * ebt_among is exempt from centralized matchsize checking * because it uses a dynamic-size data set. */ - pr_err("%s_tables: %s.%u match: invalid size " - "%u (kernel) != (user) %u\n", - xt_prefix[par->family], par->match->name, - par->match->revision, - XT_ALIGN(par->match->matchsize), size); + pr_err_ratelimited("%s_tables: %s.%u match: invalid size %u (kernel) != (user) %u\n", + xt_prefix[par->family], par->match->name, + par->match->revision, + XT_ALIGN(par->match->matchsize), size); return -EINVAL; } if (par->match->table != NULL && strcmp(par->match->table, par->table) != 0) { - pr_err("%s_tables: %s match: only valid in %s table, not %s\n", - xt_prefix[par->family], par->match->name, - par->match->table, par->table); + pr_info_ratelimited("%s_tables: %s match: only valid in %s table, not %s\n", + xt_prefix[par->family], par->match->name, + par->match->table, par->table); return -EINVAL; } if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) { char used[64], allow[64]; - pr_err("%s_tables: %s match: used from hooks %s, but only " - "valid from %s\n", - xt_prefix[par->family], par->match->name, - textify_hooks(used, sizeof(used), par->hook_mask, - par->family), - textify_hooks(allow, sizeof(allow), par->match->hooks, - par->family)); + pr_info_ratelimited("%s_tables: %s match: used from hooks %s, but only valid from %s\n", + xt_prefix[par->family], par->match->name, + textify_hooks(used, sizeof(used), + par->hook_mask, par->family), + textify_hooks(allow, sizeof(allow), + par->match->hooks, + par->family)); return -EINVAL; } if (par->match->proto && (par->match->proto != proto || inv_proto)) { - pr_err("%s_tables: %s match: only valid for protocol %u\n", - xt_prefix[par->family], par->match->name, - par->match->proto); + pr_info_ratelimited("%s_tables: %s match: only valid for protocol %u\n", + xt_prefix[par->family], par->match->name, + par->match->proto); return -EINVAL; } if (par->match->checkentry != NULL) { @@ -814,36 +813,35 @@ int xt_check_target(struct xt_tgchk_param *par, int ret; if (XT_ALIGN(par->target->targetsize) != size) { - pr_err("%s_tables: %s.%u target: invalid size " - "%u (kernel) != (user) %u\n", - xt_prefix[par->family], par->target->name, - par->target->revision, - XT_ALIGN(par->target->targetsize), size); + pr_err_ratelimited("%s_tables: %s.%u target: invalid size %u (kernel) != (user) %u\n", + xt_prefix[par->family], par->target->name, + par->target->revision, + XT_ALIGN(par->target->targetsize), size); return -EINVAL; } if (par->target->table != NULL && strcmp(par->target->table, par->table) != 0) { - pr_err("%s_tables: %s target: only valid in %s table, not %s\n", - xt_prefix[par->family], par->target->name, - par->target->table, par->table); + pr_info_ratelimited("%s_tables: %s target: only valid in %s table, not %s\n", + xt_prefix[par->family], par->target->name, + par->target->table, par->table); return -EINVAL; } if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) { char used[64], allow[64]; - pr_err("%s_tables: %s target: used from hooks %s, but only " - "usable from %s\n", - xt_prefix[par->family], par->target->name, - textify_hooks(used, sizeof(used), par->hook_mask, - par->family), - textify_hooks(allow, sizeof(allow), par->target->hooks, - par->family)); + pr_info_ratelimited("%s_tables: %s target: used from hooks %s, but only usable from %s\n", + xt_prefix[par->family], par->target->name, + textify_hooks(used, sizeof(used), + par->hook_mask, par->family), + textify_hooks(allow, sizeof(allow), + par->target->hooks, + par->family)); return -EINVAL; } if (par->target->proto && (par->target->proto != proto || inv_proto)) { - pr_err("%s_tables: %s target: only valid for protocol %u\n", - xt_prefix[par->family], par->target->name, - par->target->proto); + pr_info_ratelimited("%s_tables: %s target: only valid for protocol %u\n", + xt_prefix[par->family], par->target->name, + par->target->proto); return -EINVAL; } if (par->target->checkentry != NULL) { -- cgit v1.1 From 11f7aee2326f37f9d3abba27bb61d92ec09fbfde Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:01 +0100 Subject: netfilter: xt_CT: use pr ratelimiting checkpatch complains about line > 80 but this would require splitting "literal" over two lines which is worse. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_CT.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 5a152e2..8790190 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -82,15 +82,14 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name, proto = xt_ct_find_proto(par); if (!proto) { - pr_info("You must specify a L4 protocol, and not use " - "inversions on it.\n"); + pr_info_ratelimited("You must specify a L4 protocol and not use inversions on it\n"); return -ENOENT; } helper = nf_conntrack_helper_try_module_get(helper_name, par->family, proto); if (helper == NULL) { - pr_info("No such helper \"%s\"\n", helper_name); + pr_info_ratelimited("No such helper \"%s\"\n", helper_name); return -ENOENT; } @@ -124,6 +123,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, const struct nf_conntrack_l4proto *l4proto; struct ctnl_timeout *timeout; struct nf_conn_timeout *timeout_ext; + const char *errmsg = NULL; int ret = 0; u8 proto; @@ -131,29 +131,29 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook); if (timeout_find_get == NULL) { ret = -ENOENT; - pr_info("Timeout policy base is empty\n"); + errmsg = "Timeout policy base is empty"; goto out; } proto = xt_ct_find_proto(par); if (!proto) { ret = -EINVAL; - pr_info("You must specify a L4 protocol, and not use " - "inversions on it.\n"); + errmsg = "You must specify a L4 protocol and not use inversions on it"; goto out; } timeout = timeout_find_get(par->net, timeout_name); if (timeout == NULL) { ret = -ENOENT; - pr_info("No such timeout policy \"%s\"\n", timeout_name); + pr_info_ratelimited("No such timeout policy \"%s\"\n", + timeout_name); goto out; } if (timeout->l3num != par->family) { ret = -EINVAL; - pr_info("Timeout policy `%s' can only be used by L3 protocol " - "number %d\n", timeout_name, timeout->l3num); + pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n", + timeout_name, 3, timeout->l3num); goto err_put_timeout; } /* Make sure the timeout policy matches any existing protocol tracker, @@ -162,9 +162,8 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, l4proto = __nf_ct_l4proto_find(par->family, proto); if (timeout->l4proto->l4proto != l4proto->l4proto) { ret = -EINVAL; - pr_info("Timeout policy `%s' can only be used by L4 protocol " - "number %d\n", - timeout_name, timeout->l4proto->l4proto); + pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n", + timeout_name, 4, timeout->l4proto->l4proto); goto err_put_timeout; } timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC); @@ -180,6 +179,8 @@ err_put_timeout: __xt_ct_tg_timeout_put(timeout); out: rcu_read_unlock(); + if (errmsg) + pr_info_ratelimited("%s\n", errmsg); return ret; #else return -EOPNOTSUPP; -- cgit v1.1 From e016c5e43db51875c2b541b59bd217494d213174 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:02 +0100 Subject: netfilter: xt_NFQUEUE: use pr ratelimiting switch this to info, since these aren't really errors. We only use printk because we cannot report meaningful errors in the xtables framework. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_NFQUEUE.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index a360b99..a9aca80 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -8,6 +8,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include @@ -67,13 +69,13 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par) init_hashrandom(&jhash_initval); if (info->queues_total == 0) { - pr_err("NFQUEUE: number of total queues is 0\n"); + pr_info_ratelimited("number of total queues is 0\n"); return -EINVAL; } maxid = info->queues_total - 1 + info->queuenum; if (maxid > 0xffff) { - pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n", - info->queues_total, maxid); + pr_info_ratelimited("number of queues (%u) out of range (got %u)\n", + info->queues_total, maxid); return -ERANGE; } if (par->target->revision == 2 && info->flags > 1) -- cgit v1.1 From c82b31c5f5608f7f069c584ac169f5691a92d3f5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:03 +0100 Subject: netfilter: xt_set: use pr ratelimiting also convert this to info for consistency. These errors are informational message to user, given iptables doesn't have netlink extack equivalent. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_set.c | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 16b6b11..6f4c521 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -92,12 +92,12 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find set identified by id %u to match\n", - info->match_set.index); + pr_info_ratelimited("Cannot find set identified by id %u to match\n", + info->match_set.index); return -ENOENT; } if (info->match_set.u.flags[IPSET_DIM_MAX - 1] != 0) { - pr_warn("Protocol error: set match dimension is over the limit!\n"); + pr_info_ratelimited("set match dimension is over the limit!\n"); ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; } @@ -143,12 +143,12 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par) index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find set identified by id %u to match\n", - info->match_set.index); + pr_info_ratelimited("Cannot find set identified by id %u to match\n", + info->match_set.index); return -ENOENT; } if (info->match_set.dim > IPSET_DIM_MAX) { - pr_warn("Protocol error: set match dimension is over the limit!\n"); + pr_info_ratelimited("set match dimension is over the limit!\n"); ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; } @@ -241,8 +241,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) if (info->add_set.index != IPSET_INVALID_ID) { index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find add_set index %u as target\n", - info->add_set.index); + pr_info_ratelimited("Cannot find add_set index %u as target\n", + info->add_set.index); return -ENOENT; } } @@ -250,8 +250,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) if (info->del_set.index != IPSET_INVALID_ID) { index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find del_set index %u as target\n", - info->del_set.index); + pr_info_ratelimited("Cannot find del_set index %u as target\n", + info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; @@ -259,7 +259,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) } if (info->add_set.u.flags[IPSET_DIM_MAX - 1] != 0 || info->del_set.u.flags[IPSET_DIM_MAX - 1] != 0) { - pr_warn("Protocol error: SET target dimension is over the limit!\n"); + pr_info_ratelimited("SET target dimension over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) @@ -316,8 +316,8 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) if (info->add_set.index != IPSET_INVALID_ID) { index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find add_set index %u as target\n", - info->add_set.index); + pr_info_ratelimited("Cannot find add_set index %u as target\n", + info->add_set.index); return -ENOENT; } } @@ -325,8 +325,8 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) if (info->del_set.index != IPSET_INVALID_ID) { index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find del_set index %u as target\n", - info->del_set.index); + pr_info_ratelimited("Cannot find del_set index %u as target\n", + info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; @@ -334,7 +334,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) } if (info->add_set.dim > IPSET_DIM_MAX || info->del_set.dim > IPSET_DIM_MAX) { - pr_warn("Protocol error: SET target dimension is over the limit!\n"); + pr_info_ratelimited("SET target dimension over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) @@ -444,8 +444,8 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find add_set index %u as target\n", - info->add_set.index); + pr_info_ratelimited("Cannot find add_set index %u as target\n", + info->add_set.index); return -ENOENT; } } @@ -454,8 +454,8 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find del_set index %u as target\n", - info->del_set.index); + pr_info_ratelimited("Cannot find del_set index %u as target\n", + info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); @@ -465,7 +465,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) if (info->map_set.index != IPSET_INVALID_ID) { if (strncmp(par->table, "mangle", 7)) { - pr_warn("--map-set only usable from mangle table\n"); + pr_info_ratelimited("--map-set only usable from mangle table\n"); return -EINVAL; } if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) | @@ -473,14 +473,14 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) !(par->hook_mask & (1 << NF_INET_FORWARD | 1 << NF_INET_LOCAL_OUT | 1 << NF_INET_POST_ROUTING))) { - pr_warn("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); + pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); return -EINVAL; } index = ip_set_nfnl_get_byindex(par->net, info->map_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find map_set index %u as target\n", - info->map_set.index); + pr_info_ratelimited("Cannot find map_set index %u as target\n", + info->map_set.index); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); @@ -494,7 +494,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) if (info->add_set.dim > IPSET_DIM_MAX || info->del_set.dim > IPSET_DIM_MAX || info->map_set.dim > IPSET_DIM_MAX) { - pr_warn("Protocol error: SET target dimension is over the limit!\n"); + pr_info_ratelimited("SET target dimension over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) -- cgit v1.1 From 7ecbf1033521194e544477377ff7e837d25f1ef3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:04 +0100 Subject: netfilter: bridge: use pr ratelimiting ebt_among still uses pr_err -- these errors indicate ebtables tool bug, not a usage error. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_among.c | 10 +++++----- net/bridge/netfilter/ebt_limit.c | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index 279527f..ce7152a 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -187,17 +187,17 @@ static int ebt_among_mt_check(const struct xt_mtchk_param *par) expected_length += ebt_mac_wormhash_size(wh_src); if (em->match_size != EBT_ALIGN(expected_length)) { - pr_info("wrong size: %d against expected %d, rounded to %zd\n", - em->match_size, expected_length, - EBT_ALIGN(expected_length)); + pr_err_ratelimited("wrong size: %d against expected %d, rounded to %zd\n", + em->match_size, expected_length, + EBT_ALIGN(expected_length)); return -EINVAL; } if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) { - pr_info("dst integrity fail: %x\n", -err); + pr_err_ratelimited("dst integrity fail: %x\n", -err); return -EINVAL; } if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) { - pr_info("src integrity fail: %x\n", -err); + pr_err_ratelimited("src integrity fail: %x\n", -err); return -EINVAL; } return 0; diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c index 61a9f1b..165b9d6 100644 --- a/net/bridge/netfilter/ebt_limit.c +++ b/net/bridge/netfilter/ebt_limit.c @@ -72,8 +72,8 @@ static int ebt_limit_mt_check(const struct xt_mtchk_param *par) /* Check for overflow. */ if (info->burst == 0 || user2credits(info->avg * info->burst) < user2credits(info->avg)) { - pr_info("overflow, try lower: %u/%u\n", - info->avg, info->burst); + pr_info_ratelimited("overflow, try lower: %u/%u\n", + info->avg, info->burst); return -EINVAL; } -- cgit v1.1 From cc48baefdfff83e3774811f69eb181b8850bd8af Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:05 +0100 Subject: netfilter: x_tables: rate-limit table mismatch warnings Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_rpfilter.c | 4 ++-- net/ipv6/netfilter/ip6t_rpfilter.c | 4 ++-- net/netfilter/xt_CONNSECMARK.c | 4 ++-- net/netfilter/xt_SECMARK.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 37fb955..5d107dd 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -111,8 +111,8 @@ static int rpfilter_check(const struct xt_mtchk_param *par) if (strcmp(par->table, "mangle") != 0 && strcmp(par->table, "raw") != 0) { - pr_info("match only valid in the \'raw\' " - "or \'mangle\' tables, not \'%s\'.\n", par->table); + pr_info_ratelimited("only valid in \'raw\' or \'mangle\' table, not \'%s\'\n", + par->table); return -EINVAL; } diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index b12e61b..ddf3111 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -109,8 +109,8 @@ static int rpfilter_check(const struct xt_mtchk_param *par) if (strcmp(par->table, "mangle") != 0 && strcmp(par->table, "raw") != 0) { - pr_info("match only valid in the \'raw\' " - "or \'mangle\' tables, not \'%s\'.\n", par->table); + pr_info_ratelimited("only valid in \'raw\' or \'mangle\' table, not \'%s\'\n", + par->table); return -EINVAL; } diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index da56c06..6f30cd3 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -91,8 +91,8 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par) if (strcmp(par->table, "mangle") != 0 && strcmp(par->table, "security") != 0) { - pr_info("target only valid in the \'mangle\' " - "or \'security\' tables, not \'%s\'.\n", par->table); + pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n", + par->table); return -EINVAL; } diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 9faf5e0..5c5cd78 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -86,8 +86,8 @@ static int secmark_tg_check(const struct xt_tgchk_param *par) if (strcmp(par->table, "mangle") != 0 && strcmp(par->table, "security") != 0) { - pr_info("target only valid in the \'mangle\' " - "or \'security\' tables, not \'%s\'.\n", par->table); + pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n", + par->table); return -EINVAL; } -- cgit v1.1 From c08e5e1ee6d65917af2bb12c2c568d637a682c44 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:06 +0100 Subject: netfilter: x_tables: use pr ratelimiting in matches/targets all of these print simple error message - use single pr_ratelimit call. checkpatch complains about lines > 80 but this would require splitting several "literals" over multiple lines which is worse. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_HMARK.c | 17 +++++++++++------ net/netfilter/xt_addrtype.c | 33 ++++++++++++++++----------------- net/netfilter/xt_policy.c | 23 +++++++++++++---------- 3 files changed, 40 insertions(+), 33 deletions(-) diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c index dd08cc1..9c75f41 100644 --- a/net/netfilter/xt_HMARK.c +++ b/net/netfilter/xt_HMARK.c @@ -9,6 +9,8 @@ * the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -312,15 +314,15 @@ hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par) static int hmark_tg_check(const struct xt_tgchk_param *par) { const struct xt_hmark_info *info = par->targinfo; + const char *errmsg = "proto mask must be zero with L3 mode"; if (!info->hmodulus) return -EINVAL; if (info->proto_mask && - (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) { - pr_info("xt_HMARK: proto mask must be zero with L3 mode\n"); - return -EINVAL; - } + (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) + goto err; + if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) && (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) | XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) @@ -329,10 +331,13 @@ static int hmark_tg_check(const struct xt_tgchk_param *par) if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) && (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) | XT_HMARK_FLAG(XT_HMARK_DPORT)))) { - pr_info("xt_HMARK: spi-set and port-set can't be combined\n"); - return -EINVAL; + errmsg = "spi-set and port-set can't be combined"; + goto err; } return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; } static struct xt_target hmark_tg_reg[] __read_mostly = { diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 911a7c0..89e281b 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -164,48 +164,47 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) { + const char *errmsg = "both incoming and outgoing interface limitation cannot be selected"; struct xt_addrtype_info_v1 *info = par->matchinfo; if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN && - info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) { - pr_info("both incoming and outgoing " - "interface limitation cannot be selected\n"); - return -EINVAL; - } + info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) + goto err; if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) && info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) { - pr_info("output interface limitation " - "not valid in PREROUTING and INPUT\n"); - return -EINVAL; + errmsg = "output interface limitation not valid in PREROUTING and INPUT"; + goto err; } if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) { - pr_info("input interface limitation " - "not valid in POSTROUTING and OUTPUT\n"); - return -EINVAL; + errmsg = "input interface limitation not valid in POSTROUTING and OUTPUT"; + goto err; } #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) if (par->family == NFPROTO_IPV6) { if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) { - pr_err("ipv6 BLACKHOLE matching not supported\n"); - return -EINVAL; + errmsg = "ipv6 BLACKHOLE matching not supported"; + goto err; } if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) { - pr_err("ipv6 PROHIBIT (THROW, NAT ..) matching not supported\n"); - return -EINVAL; + errmsg = "ipv6 PROHIBIT (THROW, NAT ..) matching not supported"; + goto err; } if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) { - pr_err("ipv6 does not support BROADCAST matching\n"); - return -EINVAL; + errmsg = "ipv6 does not support BROADCAST matching"; + goto err; } } #endif return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; } static struct xt_match addrtype_mt_reg[] __read_mostly = { diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index 5639fb0..13f8ccf 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -132,26 +132,29 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par) static int policy_mt_check(const struct xt_mtchk_param *par) { const struct xt_policy_info *info = par->matchinfo; + const char *errmsg = "neither incoming nor outgoing policy selected"; + + if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) + goto err; - if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) { - pr_info("neither incoming nor outgoing policy selected\n"); - return -EINVAL; - } if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) { - pr_info("output policy not valid in PREROUTING and INPUT\n"); - return -EINVAL; + errmsg = "output policy not valid in PREROUTING and INPUT"; + goto err; } if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) { - pr_info("input policy not valid in POSTROUTING and OUTPUT\n"); - return -EINVAL; + errmsg = "input policy not valid in POSTROUTING and OUTPUT"; + goto err; } if (info->len > XT_POLICY_MAX_ELEM) { - pr_info("too many policy elements\n"); - return -EINVAL; + errmsg = "too many policy elements"; + goto err; } return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; } static struct xt_match policy_mt_reg[] __read_mostly = { -- cgit v1.1 From b26066447bb8599b393b2dd2bbeb68767e09ba07 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:07 +0100 Subject: netfilter: x_tables: use pr ratelimiting in all remaining spots Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_ECN.c | 2 +- net/ipv4/netfilter/ipt_REJECT.c | 4 ++-- net/ipv4/netfilter/ipt_rpfilter.c | 2 +- net/ipv6/netfilter/ip6t_REJECT.c | 4 ++-- net/ipv6/netfilter/ip6t_rpfilter.c | 2 +- net/ipv6/netfilter/ip6t_srh.c | 6 ++++-- net/netfilter/xt_AUDIT.c | 4 ++-- net/netfilter/xt_CHECKSUM.c | 3 ++- net/netfilter/xt_CONNSECMARK.c | 6 +++--- net/netfilter/xt_LED.c | 2 +- net/netfilter/xt_SECMARK.c | 14 ++++++++------ net/netfilter/xt_TCPMSS.c | 10 ++++------ net/netfilter/xt_TPROXY.c | 6 ++---- net/netfilter/xt_bpf.c | 4 +++- net/netfilter/xt_cgroup.c | 8 +++++--- net/netfilter/xt_cluster.c | 8 +++----- net/netfilter/xt_connbytes.c | 4 ++-- net/netfilter/xt_connlabel.c | 7 ++++--- net/netfilter/xt_connmark.c | 8 ++++---- net/netfilter/xt_conntrack.c | 4 ++-- net/netfilter/xt_ecn.c | 4 ++-- net/netfilter/xt_hashlimit.c | 24 +++++++++++++----------- net/netfilter/xt_helper.c | 4 ++-- net/netfilter/xt_ipcomp.c | 2 +- net/netfilter/xt_ipvs.c | 3 ++- net/netfilter/xt_l2tp.c | 22 +++++++++++++--------- net/netfilter/xt_limit.c | 4 ++-- net/netfilter/xt_nat.c | 5 +++-- net/netfilter/xt_nfacct.c | 6 ++++-- net/netfilter/xt_physdev.c | 4 +--- net/netfilter/xt_recent.c | 14 ++++++-------- net/netfilter/xt_socket.c | 10 ++++++---- net/netfilter/xt_state.c | 4 ++-- net/netfilter/xt_time.c | 6 +++--- 34 files changed, 116 insertions(+), 104 deletions(-) diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 39ff167..aaaf9a8 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -106,7 +106,7 @@ static int ecn_tg_check(const struct xt_tgchk_param *par) if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { - pr_info("cannot use TCP operations on a non-tcp rule\n"); + pr_info_ratelimited("cannot use operation on non-tcp rule\n"); return -EINVAL; } return 0; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 8bd0d7b..e8bed33 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -74,13 +74,13 @@ static int reject_tg_check(const struct xt_tgchk_param *par) const struct ipt_entry *e = par->entryinfo; if (rejinfo->with == IPT_ICMP_ECHOREPLY) { - pr_info("ECHOREPLY no longer supported.\n"); + pr_info_ratelimited("ECHOREPLY no longer supported.\n"); return -EINVAL; } else if (rejinfo->with == IPT_TCP_RESET) { /* Must specify that it's a TCP packet */ if (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO)) { - pr_info("TCP_RESET invalid for non-tcp\n"); + pr_info_ratelimited("TCP_RESET invalid for non-tcp\n"); return -EINVAL; } } diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 5d107dd..fd01f13 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -105,7 +105,7 @@ static int rpfilter_check(const struct xt_mtchk_param *par) const struct xt_rpfilter_info *info = par->matchinfo; unsigned int options = ~XT_RPFILTER_OPTION_MASK; if (info->flags & options) { - pr_info("unknown options encountered"); + pr_info_ratelimited("unknown options\n"); return -EINVAL; } diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index fa51a20..38dea8f 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -85,14 +85,14 @@ static int reject_tg6_check(const struct xt_tgchk_param *par) const struct ip6t_entry *e = par->entryinfo; if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) { - pr_info("ECHOREPLY is not supported.\n"); + pr_info_ratelimited("ECHOREPLY is not supported\n"); return -EINVAL; } else if (rejinfo->with == IP6T_TCP_RESET) { /* Must specify that it's a TCP packet */ if (!(e->ipv6.flags & IP6T_F_PROTO) || e->ipv6.proto != IPPROTO_TCP || (e->ipv6.invflags & XT_INV_PROTO)) { - pr_info("TCP_RESET illegal for non-tcp\n"); + pr_info_ratelimited("TCP_RESET illegal for non-tcp\n"); return -EINVAL; } } diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index ddf3111..94deb69 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -103,7 +103,7 @@ static int rpfilter_check(const struct xt_mtchk_param *par) unsigned int options = ~XT_RPFILTER_OPTION_MASK; if (info->flags & options) { - pr_info("unknown options encountered"); + pr_info_ratelimited("unknown options\n"); return -EINVAL; } diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c index 9642164..33719d5 100644 --- a/net/ipv6/netfilter/ip6t_srh.c +++ b/net/ipv6/netfilter/ip6t_srh.c @@ -122,12 +122,14 @@ static int srh_mt6_check(const struct xt_mtchk_param *par) const struct ip6t_srh *srhinfo = par->matchinfo; if (srhinfo->mt_flags & ~IP6T_SRH_MASK) { - pr_err("unknown srh match flags %X\n", srhinfo->mt_flags); + pr_info_ratelimited("unknown srh match flags %X\n", + srhinfo->mt_flags); return -EINVAL; } if (srhinfo->mt_invflags & ~IP6T_SRH_INV_MASK) { - pr_err("unknown srh invflags %X\n", srhinfo->mt_invflags); + pr_info_ratelimited("unknown srh invflags %X\n", + srhinfo->mt_invflags); return -EINVAL; } diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index c502419..f368ee6 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -120,8 +120,8 @@ static int audit_tg_check(const struct xt_tgchk_param *par) const struct xt_audit_info *info = par->targinfo; if (info->type > XT_AUDIT_TYPE_MAX) { - pr_info("Audit type out of range (valid range: 0..%hhu)\n", - XT_AUDIT_TYPE_MAX); + pr_info_ratelimited("Audit type out of range (valid range: 0..%hhu)\n", + XT_AUDIT_TYPE_MAX); return -ERANGE; } diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c index ea3c570..9f4151e 100644 --- a/net/netfilter/xt_CHECKSUM.c +++ b/net/netfilter/xt_CHECKSUM.c @@ -36,7 +36,8 @@ static int checksum_tg_check(const struct xt_tgchk_param *par) const struct xt_CHECKSUM_info *einfo = par->targinfo; if (einfo->operation & ~XT_CHECKSUM_OP_FILL) { - pr_info("unsupported CHECKSUM operation %x\n", einfo->operation); + pr_info_ratelimited("unsupported CHECKSUM operation %x\n", + einfo->operation); return -EINVAL; } if (!einfo->operation) diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 6f30cd3..f3f1caa 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -102,14 +102,14 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par) break; default: - pr_info("invalid mode: %hu\n", info->mode); + pr_info_ratelimited("invalid mode: %hu\n", info->mode); return -EINVAL; } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index ece311c..4472424 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -136,7 +136,7 @@ static int led_tg_check(const struct xt_tgchk_param *par) err = led_trigger_register(&ledinternal->netfilter_led_trigger); if (err) { - pr_err("Trigger name is already in use.\n"); + pr_info_ratelimited("Trigger name is already in use.\n"); goto exit_alloc; } diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 5c5cd78..4ad5fe2 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -60,18 +60,20 @@ static int checkentry_lsm(struct xt_secmark_target_info *info) &info->secid); if (err) { if (err == -EINVAL) - pr_info("invalid security context \'%s\'\n", info->secctx); + pr_info_ratelimited("invalid security context \'%s\'\n", + info->secctx); return err; } if (!info->secid) { - pr_info("unable to map security context \'%s\'\n", info->secctx); + pr_info_ratelimited("unable to map security context \'%s\'\n", + info->secctx); return -ENOENT; } err = security_secmark_relabel_packet(info->secid); if (err) { - pr_info("unable to obtain relabeling permission\n"); + pr_info_ratelimited("unable to obtain relabeling permission\n"); return err; } @@ -92,8 +94,8 @@ static int secmark_tg_check(const struct xt_tgchk_param *par) } if (mode && mode != info->mode) { - pr_info("mode already set to %hu cannot mix with " - "rules for mode %hu\n", mode, info->mode); + pr_info_ratelimited("mode already set to %hu cannot mix with rules for mode %hu\n", + mode, info->mode); return -EINVAL; } @@ -101,7 +103,7 @@ static int secmark_tg_check(const struct xt_tgchk_param *par) case SECMARK_MODE_SEL: break; default: - pr_info("invalid mode: %hu\n", info->mode); + pr_info_ratelimited("invalid mode: %hu\n", info->mode); return -EINVAL; } diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 99bb8e4..98efb20 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -273,8 +273,7 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par) (par->hook_mask & ~((1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_POST_ROUTING))) != 0) { - pr_info("path-MTU clamping only supported in " - "FORWARD, OUTPUT and POSTROUTING hooks\n"); + pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); return -EINVAL; } if (par->nft_compat) @@ -283,7 +282,7 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par) xt_ematch_foreach(ematch, e) if (find_syn_match(ematch)) return 0; - pr_info("Only works on TCP SYN packets\n"); + pr_info_ratelimited("Only works on TCP SYN packets\n"); return -EINVAL; } @@ -298,8 +297,7 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par) (par->hook_mask & ~((1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_POST_ROUTING))) != 0) { - pr_info("path-MTU clamping only supported in " - "FORWARD, OUTPUT and POSTROUTING hooks\n"); + pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); return -EINVAL; } if (par->nft_compat) @@ -308,7 +306,7 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par) xt_ematch_foreach(ematch, e) if (find_syn_match(ematch)) return 0; - pr_info("Only works on TCP SYN packets\n"); + pr_info_ratelimited("Only works on TCP SYN packets\n"); return -EINVAL; } #endif diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 17d7705..8c89323 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -540,8 +540,7 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par) !(i->invflags & IP6T_INV_PROTO)) return 0; - pr_info("Can be used only in combination with " - "either -p tcp or -p udp\n"); + pr_info_ratelimited("Can be used only with -p tcp or -p udp\n"); return -EINVAL; } #endif @@ -559,8 +558,7 @@ static int tproxy_tg4_check(const struct xt_tgchk_param *par) && !(i->invflags & IPT_INV_PROTO)) return 0; - pr_info("Can be used only in combination with " - "either -p tcp or -p udp\n"); + pr_info_ratelimited("Can be used only with -p tcp or -p udp\n"); return -EINVAL; } diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 06b090d..a2cf8a62 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -7,6 +7,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -34,7 +36,7 @@ static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len, program.filter = insns; if (bpf_prog_create(ret, &program)) { - pr_info("bpf: check failed: parse error\n"); + pr_info_ratelimited("check failed: parse error\n"); return -EINVAL; } diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 891f4e7..7df2dec 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -12,6 +12,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -48,7 +50,7 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) } if (info->has_path && info->has_classid) { - pr_info("xt_cgroup: both path and classid specified\n"); + pr_info_ratelimited("path and classid specified\n"); return -EINVAL; } @@ -56,8 +58,8 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) if (info->has_path) { cgrp = cgroup_get_from_path(info->path); if (IS_ERR(cgrp)) { - pr_info("xt_cgroup: invalid path, errno=%ld\n", - PTR_ERR(cgrp)); + pr_info_ratelimited("invalid path, errno=%ld\n", + PTR_ERR(cgrp)); return -EINVAL; } info->priv = cgrp; diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 57ef175..00686889 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -135,14 +135,12 @@ static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) struct xt_cluster_match_info *info = par->matchinfo; if (info->total_nodes > XT_CLUSTER_NODES_MAX) { - pr_info("you have exceeded the maximum " - "number of cluster nodes (%u > %u)\n", - info->total_nodes, XT_CLUSTER_NODES_MAX); + pr_info_ratelimited("you have exceeded the maximum number of cluster nodes (%u > %u)\n", + info->total_nodes, XT_CLUSTER_NODES_MAX); return -EINVAL; } if (info->node_mask >= (1ULL << info->total_nodes)) { - pr_info("this node mask cannot be " - "higher than the total number of nodes\n"); + pr_info_ratelimited("node mask cannot exceed total number of nodes\n"); return -EDOM; } return 0; diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index cad0b7b..93cb018 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -112,8 +112,8 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); /* * This filter cannot function correctly unless connection tracking diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index 2337287..4fa4efd 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -57,14 +57,15 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par) int ret; if (info->options & ~options) { - pr_err("Unknown options in mask %x\n", info->options); + pr_info_ratelimited("Unknown options in mask %x\n", + info->options); return -EINVAL; } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index ec377cc..809639c 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -79,8 +79,8 @@ static int connmark_tg_check(const struct xt_tgchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } @@ -109,8 +109,8 @@ static int connmark_mt_check(const struct xt_mtchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 39cf1d0..df80fe7 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -272,8 +272,8 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c index 3c831a8..c7ad4af 100644 --- a/net/netfilter/xt_ecn.c +++ b/net/netfilter/xt_ecn.c @@ -97,7 +97,7 @@ static int ecn_mt_check4(const struct xt_mtchk_param *par) if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) { - pr_info("cannot match TCP bits in rule for non-tcp packets\n"); + pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n"); return -EINVAL; } @@ -139,7 +139,7 @@ static int ecn_mt_check6(const struct xt_mtchk_param *par) if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && (ip->proto != IPPROTO_TCP || ip->invflags & IP6T_INV_PROTO)) { - pr_info("cannot match TCP bits in rule for non-tcp packets\n"); + pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n"); return -EINVAL; } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index ca68474..aa96027 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -523,7 +523,8 @@ static u64 user2rate(u64 user) if (user != 0) { return div64_u64(XT_HASHLIMIT_SCALE_v2, user); } else { - pr_warn("invalid rate from userspace: %llu\n", user); + pr_info_ratelimited("invalid rate from userspace: %llu\n", + user); return 0; } } @@ -865,33 +866,34 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, } if (cfg->mode & ~XT_HASHLIMIT_ALL) { - pr_info("Unknown mode mask %X, kernel too old?\n", - cfg->mode); + pr_info_ratelimited("Unknown mode mask %X, kernel too old?\n", + cfg->mode); return -EINVAL; } /* Check for overflow. */ if (revision >= 3 && cfg->mode & XT_HASHLIMIT_RATE_MATCH) { if (cfg->avg == 0 || cfg->avg > U32_MAX) { - pr_info("hashlimit invalid rate\n"); + pr_info_ratelimited("invalid rate\n"); return -ERANGE; } if (cfg->interval == 0) { - pr_info("hashlimit invalid interval\n"); + pr_info_ratelimited("invalid interval\n"); return -EINVAL; } } else if (cfg->mode & XT_HASHLIMIT_BYTES) { if (user2credits_byte(cfg->avg) == 0) { - pr_info("overflow, rate too high: %llu\n", cfg->avg); + pr_info_ratelimited("overflow, rate too high: %llu\n", + cfg->avg); return -EINVAL; } } else if (cfg->burst == 0 || - user2credits(cfg->avg * cfg->burst, revision) < - user2credits(cfg->avg, revision)) { - pr_info("overflow, try lower: %llu/%llu\n", - cfg->avg, cfg->burst); - return -ERANGE; + user2credits(cfg->avg * cfg->burst, revision) < + user2credits(cfg->avg, revision)) { + pr_info_ratelimited("overflow, try lower: %llu/%llu\n", + cfg->avg, cfg->burst); + return -ERANGE; } mutex_lock(&hashlimit_mutex); diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 38a7815..fd077ae 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -61,8 +61,8 @@ static int helper_mt_check(const struct xt_mtchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } info->name[sizeof(info->name) - 1] = '\0'; diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c index 7ca64a5..57f1df5 100644 --- a/net/netfilter/xt_ipcomp.c +++ b/net/netfilter/xt_ipcomp.c @@ -72,7 +72,7 @@ static int comp_mt_check(const struct xt_mtchk_param *par) /* Must specify no unknown invflags */ if (compinfo->invflags & ~XT_IPCOMP_INV_MASK) { - pr_err("unknown flags %X\n", compinfo->invflags); + pr_info_ratelimited("unknown flags %X\n", compinfo->invflags); return -EINVAL; } return 0; diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 42540d2..1d950a6 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -158,7 +158,8 @@ static int ipvs_mt_check(const struct xt_mtchk_param *par) && par->family != NFPROTO_IPV6 #endif ) { - pr_info("protocol family %u not supported\n", par->family); + pr_info_ratelimited("protocol family %u not supported\n", + par->family); return -EINVAL; } diff --git a/net/netfilter/xt_l2tp.c b/net/netfilter/xt_l2tp.c index 8aee572..c43482b 100644 --- a/net/netfilter/xt_l2tp.c +++ b/net/netfilter/xt_l2tp.c @@ -216,7 +216,7 @@ static int l2tp_mt_check(const struct xt_mtchk_param *par) /* Check for invalid flags */ if (info->flags & ~(XT_L2TP_TID | XT_L2TP_SID | XT_L2TP_VERSION | XT_L2TP_TYPE)) { - pr_info("unknown flags: %x\n", info->flags); + pr_info_ratelimited("unknown flags: %x\n", info->flags); return -EINVAL; } @@ -225,7 +225,8 @@ static int l2tp_mt_check(const struct xt_mtchk_param *par) (!(info->flags & XT_L2TP_SID)) && ((!(info->flags & XT_L2TP_TYPE)) || (info->type != XT_L2TP_TYPE_CONTROL))) { - pr_info("invalid flags combination: %x\n", info->flags); + pr_info_ratelimited("invalid flags combination: %x\n", + info->flags); return -EINVAL; } @@ -234,19 +235,22 @@ static int l2tp_mt_check(const struct xt_mtchk_param *par) */ if (info->flags & XT_L2TP_VERSION) { if ((info->version < 2) || (info->version > 3)) { - pr_info("wrong L2TP version: %u\n", info->version); + pr_info_ratelimited("wrong L2TP version: %u\n", + info->version); return -EINVAL; } if (info->version == 2) { if ((info->flags & XT_L2TP_TID) && (info->tid > 0xffff)) { - pr_info("v2 tid > 0xffff: %u\n", info->tid); + pr_info_ratelimited("v2 tid > 0xffff: %u\n", + info->tid); return -EINVAL; } if ((info->flags & XT_L2TP_SID) && (info->sid > 0xffff)) { - pr_info("v2 sid > 0xffff: %u\n", info->sid); + pr_info_ratelimited("v2 sid > 0xffff: %u\n", + info->sid); return -EINVAL; } } @@ -268,13 +272,13 @@ static int l2tp_mt_check4(const struct xt_mtchk_param *par) if ((ip->proto != IPPROTO_UDP) && (ip->proto != IPPROTO_L2TP)) { - pr_info("missing protocol rule (udp|l2tpip)\n"); + pr_info_ratelimited("missing protocol rule (udp|l2tpip)\n"); return -EINVAL; } if ((ip->proto == IPPROTO_L2TP) && (info->version == 2)) { - pr_info("v2 doesn't support IP mode\n"); + pr_info_ratelimited("v2 doesn't support IP mode\n"); return -EINVAL; } @@ -295,13 +299,13 @@ static int l2tp_mt_check6(const struct xt_mtchk_param *par) if ((ip->proto != IPPROTO_UDP) && (ip->proto != IPPROTO_L2TP)) { - pr_info("missing protocol rule (udp|l2tpip)\n"); + pr_info_ratelimited("missing protocol rule (udp|l2tpip)\n"); return -EINVAL; } if ((ip->proto == IPPROTO_L2TP) && (info->version == 2)) { - pr_info("v2 doesn't support IP mode\n"); + pr_info_ratelimited("v2 doesn't support IP mode\n"); return -EINVAL; } diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 61403b7..55d18cd 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -106,8 +106,8 @@ static int limit_mt_check(const struct xt_mtchk_param *par) /* Check for overflow. */ if (r->burst == 0 || user2credits(r->avg * r->burst) < user2credits(r->avg)) { - pr_info("Overflow, try lower: %u/%u\n", - r->avg, r->burst); + pr_info_ratelimited("Overflow, try lower: %u/%u\n", + r->avg, r->burst); return -ERANGE; } diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c index 0fd14d1..bdb689c 100644 --- a/net/netfilter/xt_nat.c +++ b/net/netfilter/xt_nat.c @@ -8,6 +8,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -19,8 +21,7 @@ static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par) const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; if (mr->rangesize != 1) { - pr_info("%s: multiple ranges no longer supported\n", - par->target->name); + pr_info_ratelimited("multiple ranges no longer supported\n"); return -EINVAL; } return nf_ct_netns_get(par->net, par->family); diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index 6f92d25..c8674de 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -6,6 +6,8 @@ * it under the terms of the GNU General Public License version 2 (or any * later at your option) as published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include @@ -39,8 +41,8 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par) nfacct = nfnl_acct_find_get(par->net, info->name); if (nfacct == NULL) { - pr_info("xt_nfacct: accounting object with name `%s' " - "does not exists\n", info->name); + pr_info_ratelimited("accounting object `%s' does not exists\n", + info->name); return -ENOENT; } info->nfacct = nfacct; diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index bb33598..9d6d67b 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -107,9 +107,7 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) info->invert & XT_PHYSDEV_OP_BRIDGED) && par->hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) { - pr_info("using --physdev-out and --physdev-is-out are only " - "supported in the FORWARD and POSTROUTING chains with " - "bridged traffic.\n"); + pr_info_ratelimited("--physdev-out and --physdev-is-out only supported in the FORWARD and POSTROUTING chains with bridged traffic\n"); if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) return -EINVAL; } diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 245fa35..6d232d1 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -342,8 +342,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par, net_get_random_once(&hash_rnd, sizeof(hash_rnd)); if (info->check_set & ~XT_RECENT_VALID_FLAGS) { - pr_info("Unsupported user space flags (%08x)\n", - info->check_set); + pr_info_ratelimited("Unsupported userspace flags (%08x)\n", + info->check_set); return -EINVAL; } if (hweight8(info->check_set & @@ -357,8 +357,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par, if ((info->check_set & XT_RECENT_REAP) && !info->seconds) return -EINVAL; if (info->hit_count >= XT_RECENT_MAX_NSTAMPS) { - pr_info("hitcount (%u) is larger than allowed maximum (%u)\n", - info->hit_count, XT_RECENT_MAX_NSTAMPS - 1); + pr_info_ratelimited("hitcount (%u) is larger than allowed maximum (%u)\n", + info->hit_count, XT_RECENT_MAX_NSTAMPS - 1); return -EINVAL; } if (info->name[0] == '\0' || @@ -587,7 +587,7 @@ recent_mt_proc_write(struct file *file, const char __user *input, add = true; break; default: - pr_info("Need \"+ip\", \"-ip\" or \"/\"\n"); + pr_info_ratelimited("Need \"+ip\", \"-ip\" or \"/\"\n"); return -EINVAL; } @@ -601,10 +601,8 @@ recent_mt_proc_write(struct file *file, const char __user *input, succ = in4_pton(c, size, (void *)&addr, '\n', NULL); } - if (!succ) { - pr_info("illegal address written to procfs\n"); + if (!succ) return -EINVAL; - } spin_lock_bh(&recent_lock); e = recent_entry_lookup(t, &addr, family, 0); diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 575d215..2ac7f67 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -171,7 +171,8 @@ static int socket_mt_v1_check(const struct xt_mtchk_param *par) return err; if (info->flags & ~XT_SOCKET_FLAGS_V1) { - pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1); + pr_info_ratelimited("unknown flags 0x%x\n", + info->flags & ~XT_SOCKET_FLAGS_V1); return -EINVAL; } return 0; @@ -187,7 +188,8 @@ static int socket_mt_v2_check(const struct xt_mtchk_param *par) return err; if (info->flags & ~XT_SOCKET_FLAGS_V2) { - pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2); + pr_info_ratelimited("unknown flags 0x%x\n", + info->flags & ~XT_SOCKET_FLAGS_V2); return -EINVAL; } return 0; @@ -203,8 +205,8 @@ static int socket_mt_v3_check(const struct xt_mtchk_param *par) if (err) return err; if (info->flags & ~XT_SOCKET_FLAGS_V3) { - pr_info("unknown flags 0x%x\n", - info->flags & ~XT_SOCKET_FLAGS_V3); + pr_info_ratelimited("unknown flags 0x%x\n", + info->flags & ~XT_SOCKET_FLAGS_V3); return -EINVAL; } return 0; diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index 5fbd791..0b41c0b 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -44,8 +44,8 @@ static int state_mt_check(const struct xt_mtchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 1b01eec..0160f50 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -235,13 +235,13 @@ static int time_mt_check(const struct xt_mtchk_param *par) if (info->daytime_start > XT_TIME_MAX_DAYTIME || info->daytime_stop > XT_TIME_MAX_DAYTIME) { - pr_info("invalid argument - start or " - "stop time greater than 23:59:59\n"); + pr_info_ratelimited("invalid argument - start or stop time greater than 23:59:59\n"); return -EDOM; } if (info->flags & ~XT_TIME_ALL_FLAGS) { - pr_info("unknown flags 0x%x\n", info->flags & ~XT_TIME_ALL_FLAGS); + pr_info_ratelimited("unknown flags 0x%x\n", + info->flags & ~XT_TIME_ALL_FLAGS); return -EINVAL; } -- cgit v1.1 From d682026dd3c548a408415cd75882e5d081147f5b Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Mon, 12 Feb 2018 21:45:42 +0800 Subject: .gitignore: ignore ASN.1 auto generated files when build kernel with default configure, files: generatenet/ipv4/netfilter/nf_nat_snmp_basic-asn1.c net/ipv4/netfilter/nf_nat_snmp_basic-asn1.h will be automatically generated by ASN.1 compiler, so No need to track them in git, it's better to ignore them. Signed-off-by: Zhu Lingshan Signed-off-by: Pablo Neira Ayuso --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 705e099..1be78fd 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,7 @@ all.config # Kdevelop4 *.kdev4 + +#Automatically generated by ASN.1 compiler +net/ipv4/netfilter/nf_nat_snmp_basic-asn1.c +net/ipv4/netfilter/nf_nat_snmp_basic-asn1.h -- cgit v1.1 From 10414014bc085aac9f787a5890b33b5605fbcfc4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 12 Feb 2018 18:49:39 +0100 Subject: netfilter: x_tables: fix missing timer initialization in xt_LED syzbot reported that xt_LED may try to use the ledinternal->timer without previously initializing it: ------------[ cut here ]------------ kernel BUG at kernel/time/timer.c:958! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 1826 Comm: kworker/1:2 Not tainted 4.15.0+ #306 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:__mod_timer kernel/time/timer.c:958 [inline] RIP: 0010:mod_timer+0x7d6/0x13c0 kernel/time/timer.c:1102 RSP: 0018:ffff8801d24fe9f8 EFLAGS: 00010293 RAX: ffff8801d25246c0 RBX: ffff8801aec6cb50 RCX: ffffffff816052c6 RDX: 0000000000000000 RSI: 00000000fffbd14b RDI: ffff8801aec6cb68 RBP: ffff8801d24fec98 R08: 0000000000000000 R09: 1ffff1003a49fd6c R10: ffff8801d24feb28 R11: 0000000000000005 R12: dffffc0000000000 R13: ffff8801d24fec70 R14: 00000000fffbd14b R15: ffff8801af608f90 FS: 0000000000000000(0000) GS:ffff8801db500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000206d6fd0 CR3: 0000000006a22001 CR4: 00000000001606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: led_tg+0x1db/0x2e0 net/netfilter/xt_LED.c:75 ip6t_do_table+0xc2a/0x1a30 net/ipv6/netfilter/ip6_tables.c:365 ip6table_raw_hook+0x65/0x80 net/ipv6/netfilter/ip6table_raw.c:42 nf_hook_entry_hookfn include/linux/netfilter.h:120 [inline] nf_hook_slow+0xba/0x1a0 net/netfilter/core.c:483 nf_hook.constprop.27+0x3f6/0x830 include/linux/netfilter.h:243 NF_HOOK include/linux/netfilter.h:286 [inline] ndisc_send_skb+0xa51/0x1370 net/ipv6/ndisc.c:491 ndisc_send_ns+0x38a/0x870 net/ipv6/ndisc.c:633 addrconf_dad_work+0xb9e/0x1320 net/ipv6/addrconf.c:4008 process_one_work+0xbbf/0x1af0 kernel/workqueue.c:2113 worker_thread+0x223/0x1990 kernel/workqueue.c:2247 kthread+0x33c/0x400 kernel/kthread.c:238 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:429 Code: 85 2a 0b 00 00 4d 8b 3c 24 4d 85 ff 75 9f 4c 8b bd 60 fd ff ff e8 bb 57 10 00 65 ff 0d 94 9a a1 7e e9 d9 fc ff ff e8 aa 57 10 00 <0f> 0b e8 a3 57 10 00 e9 14 fb ff ff e8 99 57 10 00 4c 89 bd 70 RIP: __mod_timer kernel/time/timer.c:958 [inline] RSP: ffff8801d24fe9f8 RIP: mod_timer+0x7d6/0x13c0 kernel/time/timer.c:1102 RSP: ffff8801d24fe9f8 ---[ end trace f661ab06f5dd8b3d ]--- The ledinternal struct can be shared between several different xt_LED targets, but the related timer is currently initialized only if the first target requires it. Fix it by unconditionally initializing the timer struct. v1 -> v2: call del_timer_sync() unconditionally, too. Fixes: 268cb38e1802 ("netfilter: x_tables: add LED trigger target") Reported-by: syzbot+10c98dc5725c6c8fc7fb@syzkaller.appspotmail.com Signed-off-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_LED.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index 4472424..1984644 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -140,9 +140,10 @@ static int led_tg_check(const struct xt_tgchk_param *par) goto exit_alloc; } - /* See if we need to set up a timer */ - if (ledinfo->delay > 0) - timer_setup(&ledinternal->timer, led_timeout_callback, 0); + /* Since the letinternal timer can be shared between multiple targets, + * always set it up, even if the current target does not need it + */ + timer_setup(&ledinternal->timer, led_timeout_callback, 0); list_add_tail(&ledinternal->list, &xt_led_triggers); @@ -179,8 +180,7 @@ static void led_tg_destroy(const struct xt_tgdtor_param *par) list_del(&ledinternal->list); - if (ledinfo->delay > 0) - del_timer_sync(&ledinternal->timer); + del_timer_sync(&ledinternal->timer); led_trigger_unregister(&ledinternal->netfilter_led_trigger); -- cgit v1.1 From db57ccf0f2f4624b4c4758379f8165277504fbd7 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 14 Feb 2018 17:21:19 +0100 Subject: netfilter: nat: cope with negative port range syzbot reported a division by 0 bug in the netfilter nat code: divide error: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 4168 Comm: syzkaller034710 Not tainted 4.16.0-rc1+ #309 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:nf_nat_l4proto_unique_tuple+0x291/0x530 net/netfilter/nf_nat_proto_common.c:88 RSP: 0018:ffff8801b2466778 EFLAGS: 00010246 RAX: 000000000000f153 RBX: ffff8801b2466dd8 RCX: ffff8801b2466c7c RDX: 0000000000000000 RSI: ffff8801b2466c58 RDI: ffff8801db5293ac RBP: ffff8801b24667d8 R08: ffff8801b8ba6dc0 R09: ffffffff88af5900 R10: ffff8801b24666f0 R11: 0000000000000000 R12: 000000002990f153 R13: 0000000000000001 R14: 0000000000000000 R15: ffff8801b2466c7c FS: 00000000017e3880(0000) GS:ffff8801db500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000208fdfe4 CR3: 00000001b5340002 CR4: 00000000001606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: dccp_unique_tuple+0x40/0x50 net/netfilter/nf_nat_proto_dccp.c:30 get_unique_tuple+0xc28/0x1c10 net/netfilter/nf_nat_core.c:362 nf_nat_setup_info+0x1c2/0xe00 net/netfilter/nf_nat_core.c:406 nf_nat_redirect_ipv6+0x306/0x730 net/netfilter/nf_nat_redirect.c:124 redirect_tg6+0x7f/0xb0 net/netfilter/xt_REDIRECT.c:34 ip6t_do_table+0xc2a/0x1a30 net/ipv6/netfilter/ip6_tables.c:365 ip6table_nat_do_chain+0x65/0x80 net/ipv6/netfilter/ip6table_nat.c:41 nf_nat_ipv6_fn+0x594/0xa80 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c:302 nf_nat_ipv6_local_fn+0x33/0x5d0 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c:407 ip6table_nat_local_fn+0x2c/0x40 net/ipv6/netfilter/ip6table_nat.c:69 nf_hook_entry_hookfn include/linux/netfilter.h:120 [inline] nf_hook_slow+0xba/0x1a0 net/netfilter/core.c:483 nf_hook include/linux/netfilter.h:243 [inline] NF_HOOK include/linux/netfilter.h:286 [inline] ip6_xmit+0x10ec/0x2260 net/ipv6/ip6_output.c:277 inet6_csk_xmit+0x2fc/0x580 net/ipv6/inet6_connection_sock.c:139 dccp_transmit_skb+0x9ac/0x10f0 net/dccp/output.c:142 dccp_connect+0x369/0x670 net/dccp/output.c:564 dccp_v6_connect+0xe17/0x1bf0 net/dccp/ipv6.c:946 __inet_stream_connect+0x2d4/0xf00 net/ipv4/af_inet.c:620 inet_stream_connect+0x58/0xa0 net/ipv4/af_inet.c:684 SYSC_connect+0x213/0x4a0 net/socket.c:1639 SyS_connect+0x24/0x30 net/socket.c:1620 do_syscall_64+0x282/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x26/0x9b RIP: 0033:0x441c69 RSP: 002b:00007ffe50cc0be8 EFLAGS: 00000217 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: ffffffffffffffff RCX: 0000000000441c69 RDX: 000000000000001c RSI: 00000000208fdfe4 RDI: 0000000000000003 RBP: 00000000006cc018 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000538 R11: 0000000000000217 R12: 0000000000403590 R13: 0000000000403620 R14: 0000000000000000 R15: 0000000000000000 Code: 48 89 f0 83 e0 07 83 c0 01 38 d0 7c 08 84 d2 0f 85 46 02 00 00 48 8b 45 c8 44 0f b7 20 e8 88 97 04 fd 31 d2 41 0f b7 c4 4c 89 f9 <41> f7 f6 48 c1 e9 03 48 b8 00 00 00 00 00 fc ff df 0f b6 0c 01 RIP: nf_nat_l4proto_unique_tuple+0x291/0x530 net/netfilter/nf_nat_proto_common.c:88 RSP: ffff8801b2466778 The problem is that currently we don't have any check on the configured port range. A port range == -1 triggers the bug, while other negative values may require a very long time to complete the following loop. This commit addresses the issue swapping the two ends on negative ranges. The check is performed in nf_nat_l4proto_unique_tuple() since the nft nat loads the port values from nft registers at runtime. v1 -> v2: use the correct 'Fixes' tag v2 -> v3: update commit message, drop unneeded READ_ONCE() Fixes: 5b1158e909ec ("[NETFILTER]: Add NAT support for nf_conntrack") Reported-by: syzbot+8012e198bd037f4871e5@syzkaller.appspotmail.com Signed-off-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_proto_common.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c index fbce552..7d7466d 100644 --- a/net/netfilter/nf_nat_proto_common.c +++ b/net/netfilter/nf_nat_proto_common.c @@ -41,7 +41,7 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, const struct nf_conn *ct, u16 *rover) { - unsigned int range_size, min, i; + unsigned int range_size, min, max, i; __be16 *portptr; u_int16_t off; @@ -71,7 +71,10 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, } } else { min = ntohs(range->min_proto.all); - range_size = ntohs(range->max_proto.all) - min + 1; + max = ntohs(range->max_proto.all); + if (unlikely(max < min)) + swap(max, min); + range_size = max - min + 1; } if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) { -- cgit v1.1 From 52c84d36b7e2f8197a9a6174d6f901a7c7afb850 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 14 Feb 2018 22:42:54 -0800 Subject: tools: bpftool: preserve JSON for batch mode when dumping insns to file Print a "null" JSON object to standard output when bpftool is used to print program instructions to a file, so as to avoid breaking JSON output on batch mode. This null object was added for most commands in a previous commit, but this specific case had been omitted. Fixes: 004b45c0e51a ("tools: bpftool: provide JSON output for all possible commands") Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/prog.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index e8e2baa..e549e32 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -774,6 +774,9 @@ static int do_dump(int argc, char **argv) n < 0 ? strerror(errno) : "short write"); goto err_free; } + + if (json_output) + jsonw_null(json_wtr); } else { if (member_len == &info.jited_prog_len) { const char *name = NULL; -- cgit v1.1 From 9be6d411b0c473d31f756993b8b41bb16b0679c1 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 14 Feb 2018 22:42:55 -0800 Subject: tools: bpftool: preserve JSON output on errors on batch file parsing Before this patch, perror() function is used in some cases when bpftool fails to parse its input file in batch mode. This function does not integrate well with the rest of the output when JSON is used, so we replace it by something that is compliant. Most calls to perror() had already been replaced in a previous patch, this one is a leftover. Fixes: d319c8e101c5 ("tools: bpftool: preserve JSON output on errors on batch file parsing") Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 3a0396d..185acfa 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -244,7 +244,7 @@ static int do_batch(int argc, char **argv) } if (errno && errno != ENOENT) { - perror("reading batch file failed"); + p_err("reading batch file failed: %s", strerror(errno)); err = -1; } else { p_info("processed %d lines", lines); -- cgit v1.1 From 5ce0bad4ccd04c8a989e94d3c89e4e796ac22e48 Mon Sep 17 00:00:00 2001 From: Daniel Schultz Date: Tue, 13 Feb 2018 10:44:32 +0100 Subject: ARM: dts: rockchip: Remove 1.8 GHz operation point from phycore som Rockchip recommends to run the CPU cores only with operations points of 1.6 GHz or lower. Removed the cpu0 node with too high operation points and use the default values instead. Fixes: 903d31e34628 ("ARM: dts: rockchip: Add support for phyCORE-RK3288 SoM") Cc: stable@vger.kernel.org Signed-off-by: Daniel Schultz Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288-phycore-som.dtsi | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/arch/arm/boot/dts/rk3288-phycore-som.dtsi b/arch/arm/boot/dts/rk3288-phycore-som.dtsi index 99cfae8..5eae477 100644 --- a/arch/arm/boot/dts/rk3288-phycore-som.dtsi +++ b/arch/arm/boot/dts/rk3288-phycore-som.dtsi @@ -110,26 +110,6 @@ }; }; -&cpu0 { - cpu0-supply = <&vdd_cpu>; - operating-points = < - /* KHz uV */ - 1800000 1400000 - 1608000 1350000 - 1512000 1300000 - 1416000 1200000 - 1200000 1100000 - 1008000 1050000 - 816000 1000000 - 696000 950000 - 600000 900000 - 408000 900000 - 312000 900000 - 216000 900000 - 126000 900000 - >; -}; - &emmc { status = "okay"; bus-width = <8>; -- cgit v1.1 From c927b080c67e3e97193c81fc1d27f4251bf4e036 Mon Sep 17 00:00:00 2001 From: Kamil Konieczny Date: Wed, 7 Feb 2018 16:52:09 +0100 Subject: crypto: s5p-sss - Fix kernel Oops in AES-ECB mode In AES-ECB mode crypt is done with key only, so any use of IV can cause kernel Oops. Use IV only in AES-CBC and AES-CTR. Signed-off-by: Kamil Konieczny Reported-by: Anand Moon Reviewed-by: Krzysztof Kozlowski Tested-by: Anand Moon Cc: stable@vger.kernel.org # can be applied after commit 8f9702aad138 Signed-off-by: Herbert Xu --- drivers/crypto/s5p-sss.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index 188f44b..5d64c08 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -1922,15 +1922,21 @@ static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode) uint32_t aes_control; unsigned long flags; int err; + u8 *iv; aes_control = SSS_AES_KEY_CHANGE_MODE; if (mode & FLAGS_AES_DECRYPT) aes_control |= SSS_AES_MODE_DECRYPT; - if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CBC) + if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CBC) { aes_control |= SSS_AES_CHAIN_MODE_CBC; - else if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CTR) + iv = req->info; + } else if ((mode & FLAGS_AES_MODE_MASK) == FLAGS_AES_CTR) { aes_control |= SSS_AES_CHAIN_MODE_CTR; + iv = req->info; + } else { + iv = NULL; /* AES_ECB */ + } if (dev->ctx->keylen == AES_KEYSIZE_192) aes_control |= SSS_AES_KEY_SIZE_192; @@ -1961,7 +1967,7 @@ static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode) goto outdata_error; SSS_AES_WRITE(dev, AES_CONTROL, aes_control); - s5p_set_aes(dev, dev->ctx->aes_key, req->info, dev->ctx->keylen); + s5p_set_aes(dev, dev->ctx->aes_key, iv, dev->ctx->keylen); s5p_set_dma_indata(dev, dev->sg_src); s5p_set_dma_outdata(dev, dev->sg_dst); -- cgit v1.1 From 8874ae5f15f3feef3b4a415b9aed51edcf449aa1 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 23 Jan 2018 09:35:14 +0000 Subject: USB: gadget: udc: Add missing platform_device_put() on error in bdc_pci_probe() Add the missing platform_device_put() before return from bdc_pci_probe() in the platform_device_add_resources() error handling case. Fixes: efed421a94e6 ("usb: gadget: Add UDC driver for Broadcom USB3.0 device controller IP BDC") Signed-off-by: Wei Yongjun Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/bdc/bdc_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/udc/bdc/bdc_pci.c b/drivers/usb/gadget/udc/bdc/bdc_pci.c index 1e940f0..6dbc489 100644 --- a/drivers/usb/gadget/udc/bdc/bdc_pci.c +++ b/drivers/usb/gadget/udc/bdc/bdc_pci.c @@ -77,6 +77,7 @@ static int bdc_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) if (ret) { dev_err(&pci->dev, "couldn't add resources to bdc device\n"); + platform_device_put(bdc); return ret; } -- cgit v1.1 From 98112041bcca164676367e261c8c1073ef70cb51 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 12 Feb 2018 15:30:08 +0200 Subject: usb: dwc3: core: Fix ULPI PHYs and prevent phy_get/ulpi_init during suspend/resume In order for ULPI PHYs to work, dwc3_phy_setup() and dwc3_ulpi_init() must be doene before dwc3_core_get_phy(). commit 541768b08a40 ("usb: dwc3: core: Call dwc3_core_get_phy() before initializing phys") broke this. The other issue is that dwc3_core_get_phy() and dwc3_ulpi_init() should be called only once during the life cycle of the driver. However, as dwc3_core_init() is called during system suspend/resume it will result in multiple calls to dwc3_core_get_phy() and dwc3_ulpi_init() which is wrong. Fix this by moving dwc3_ulpi_init() out of dwc3_phy_setup() into dwc3_core_ulpi_init(). Use a flag 'ulpi_ready' to ensure that dwc3_core_ulpi_init() is called only once from dwc3_core_init(). Use another flag 'phys_ready' to call dwc3_core_get_phy() only once from dwc3_core_init(). Fixes: 541768b08a40 ("usb: dwc3: core: Call dwc3_core_get_phy() before initializing phys") Fixes: f54edb539c11 ("usb: dwc3: core: initialize ULPI before trying to get the PHY") Cc: linux-stable # >= v4.13 Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.c | 47 ++++++++++++++++++++++++++++++++++++----------- drivers/usb/dwc3/core.h | 5 +++++ 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 59511f2..f1d838a 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -486,6 +486,22 @@ static void dwc3_cache_hwparams(struct dwc3 *dwc) parms->hwparams8 = dwc3_readl(dwc->regs, DWC3_GHWPARAMS8); } +static int dwc3_core_ulpi_init(struct dwc3 *dwc) +{ + int intf; + int ret = 0; + + intf = DWC3_GHWPARAMS3_HSPHY_IFC(dwc->hwparams.hwparams3); + + if (intf == DWC3_GHWPARAMS3_HSPHY_IFC_ULPI || + (intf == DWC3_GHWPARAMS3_HSPHY_IFC_UTMI_ULPI && + dwc->hsphy_interface && + !strncmp(dwc->hsphy_interface, "ulpi", 4))) + ret = dwc3_ulpi_init(dwc); + + return ret; +} + /** * dwc3_phy_setup - Configure USB PHY Interface of DWC3 Core * @dwc: Pointer to our controller context structure @@ -497,7 +513,6 @@ static void dwc3_cache_hwparams(struct dwc3 *dwc) static int dwc3_phy_setup(struct dwc3 *dwc) { u32 reg; - int ret; reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)); @@ -568,9 +583,6 @@ static int dwc3_phy_setup(struct dwc3 *dwc) } /* FALLTHROUGH */ case DWC3_GHWPARAMS3_HSPHY_IFC_ULPI: - ret = dwc3_ulpi_init(dwc); - if (ret) - return ret; /* FALLTHROUGH */ default: break; @@ -727,6 +739,7 @@ static void dwc3_core_setup_global_control(struct dwc3 *dwc) } static int dwc3_core_get_phy(struct dwc3 *dwc); +static int dwc3_core_ulpi_init(struct dwc3 *dwc); /** * dwc3_core_init - Low-level initialization of DWC3 Core @@ -758,17 +771,27 @@ static int dwc3_core_init(struct dwc3 *dwc) dwc->maximum_speed = USB_SPEED_HIGH; } - ret = dwc3_core_get_phy(dwc); + ret = dwc3_phy_setup(dwc); if (ret) goto err0; - ret = dwc3_core_soft_reset(dwc); - if (ret) - goto err0; + if (!dwc->ulpi_ready) { + ret = dwc3_core_ulpi_init(dwc); + if (ret) + goto err0; + dwc->ulpi_ready = true; + } - ret = dwc3_phy_setup(dwc); + if (!dwc->phys_ready) { + ret = dwc3_core_get_phy(dwc); + if (ret) + goto err0a; + dwc->phys_ready = true; + } + + ret = dwc3_core_soft_reset(dwc); if (ret) - goto err0; + goto err0a; dwc3_core_setup_global_control(dwc); dwc3_core_num_eps(dwc); @@ -841,6 +864,9 @@ err1: phy_exit(dwc->usb2_generic_phy); phy_exit(dwc->usb3_generic_phy); +err0a: + dwc3_ulpi_exit(dwc); + err0: return ret; } @@ -1235,7 +1261,6 @@ err4: err3: dwc3_free_event_buffers(dwc); - dwc3_ulpi_exit(dwc); err2: pm_runtime_allow(&pdev->dev); diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 185b960..860d2bc 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -797,7 +797,9 @@ struct dwc3_scratchpad_array { * @usb3_phy: pointer to USB3 PHY * @usb2_generic_phy: pointer to USB2 PHY * @usb3_generic_phy: pointer to USB3 PHY + * @phys_ready: flag to indicate that PHYs are ready * @ulpi: pointer to ulpi interface + * @ulpi_ready: flag to indicate that ULPI is initialized * @u2sel: parameter from Set SEL request. * @u2pel: parameter from Set SEL request. * @u1sel: parameter from Set SEL request. @@ -895,7 +897,10 @@ struct dwc3 { struct phy *usb2_generic_phy; struct phy *usb3_generic_phy; + bool phys_ready; + struct ulpi *ulpi; + bool ulpi_ready; void __iomem *regs; size_t regs_size; -- cgit v1.1 From d39b6ea4f8c90e9e5f03a06b6a4fd4af11e2f617 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 15 Feb 2018 09:18:55 -0800 Subject: bus: ti-sysc: Fix checking of no-reset-on-init quirk We are currently only checking for the first entry in the table while we should check them all. Usual no-idle-on-init is together with no-reset-on-init, so this has gone unnoticed. Fixes: 566a9b05e1fa ("bus: ti-sysc: Handle module quirks based dts configuration") Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 4d46003..cdaeeea 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -630,7 +630,7 @@ static int sysc_init_dts_quirks(struct sysc *ddata) for (i = 0; i < ARRAY_SIZE(sysc_dts_quirks); i++) { prop = of_get_property(np, sysc_dts_quirks[i].name, &len); if (!prop) - break; + continue; ddata->cfg.quirks |= sysc_dts_quirks[i].mask; } -- cgit v1.1 From bde0716d1f076e4c913c7946bcc858f71243c7a0 Mon Sep 17 00:00:00 2001 From: Joe Lee Date: Mon, 12 Feb 2018 14:24:46 +0200 Subject: xhci: workaround for AMD Promontory disabled ports wakeup For AMD Promontory xHCI host, although you can disable USB ports in BIOS settings, those ports will be enabled anyway after you remove a device on that port and re-plug it in again. It's a known limitation of the chip. As a workaround we can clear the PORT_WAKE_BITS. [commit and code comment rephrasing -Mathias] Signed-off-by: Joe Lee Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/pci-quirks.c | 109 ++++++++++++++++++++++++++++++++++++++++++ drivers/usb/host/pci-quirks.h | 5 ++ drivers/usb/host/xhci-hub.c | 7 +++ drivers/usb/host/xhci-pci.c | 11 +++++ drivers/usb/host/xhci.h | 2 +- 5 files changed, 133 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 1615367..67ad4bb 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -66,6 +66,23 @@ #define AX_INDXC 0x30 #define AX_DATAC 0x34 +#define PT_ADDR_INDX 0xE8 +#define PT_READ_INDX 0xE4 +#define PT_SIG_1_ADDR 0xA520 +#define PT_SIG_2_ADDR 0xA521 +#define PT_SIG_3_ADDR 0xA522 +#define PT_SIG_4_ADDR 0xA523 +#define PT_SIG_1_DATA 0x78 +#define PT_SIG_2_DATA 0x56 +#define PT_SIG_3_DATA 0x34 +#define PT_SIG_4_DATA 0x12 +#define PT4_P1_REG 0xB521 +#define PT4_P2_REG 0xB522 +#define PT2_P1_REG 0xD520 +#define PT2_P2_REG 0xD521 +#define PT1_P1_REG 0xD522 +#define PT1_P2_REG 0xD523 + #define NB_PCIE_INDX_ADDR 0xe0 #define NB_PCIE_INDX_DATA 0xe4 #define PCIE_P_CNTL 0x10040 @@ -513,6 +530,98 @@ void usb_amd_dev_put(void) EXPORT_SYMBOL_GPL(usb_amd_dev_put); /* + * Check if port is disabled in BIOS on AMD Promontory host. + * BIOS Disabled ports may wake on connect/disconnect and need + * driver workaround to keep them disabled. + * Returns true if port is marked disabled. + */ +bool usb_amd_pt_check_port(struct device *device, int port) +{ + unsigned char value, port_shift; + struct pci_dev *pdev; + u16 reg; + + pdev = to_pci_dev(device); + pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_1_ADDR); + + pci_read_config_byte(pdev, PT_READ_INDX, &value); + if (value != PT_SIG_1_DATA) + return false; + + pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_2_ADDR); + + pci_read_config_byte(pdev, PT_READ_INDX, &value); + if (value != PT_SIG_2_DATA) + return false; + + pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_3_ADDR); + + pci_read_config_byte(pdev, PT_READ_INDX, &value); + if (value != PT_SIG_3_DATA) + return false; + + pci_write_config_word(pdev, PT_ADDR_INDX, PT_SIG_4_ADDR); + + pci_read_config_byte(pdev, PT_READ_INDX, &value); + if (value != PT_SIG_4_DATA) + return false; + + /* Check disabled port setting, if bit is set port is enabled */ + switch (pdev->device) { + case 0x43b9: + case 0x43ba: + /* + * device is AMD_PROMONTORYA_4(0x43b9) or PROMONTORYA_3(0x43ba) + * PT4_P1_REG bits[7..1] represents USB2.0 ports 6 to 0 + * PT4_P2_REG bits[6..0] represents ports 13 to 7 + */ + if (port > 6) { + reg = PT4_P2_REG; + port_shift = port - 7; + } else { + reg = PT4_P1_REG; + port_shift = port + 1; + } + break; + case 0x43bb: + /* + * device is AMD_PROMONTORYA_2(0x43bb) + * PT2_P1_REG bits[7..5] represents USB2.0 ports 2 to 0 + * PT2_P2_REG bits[5..0] represents ports 9 to 3 + */ + if (port > 2) { + reg = PT2_P2_REG; + port_shift = port - 3; + } else { + reg = PT2_P1_REG; + port_shift = port + 5; + } + break; + case 0x43bc: + /* + * device is AMD_PROMONTORYA_1(0x43bc) + * PT1_P1_REG[7..4] represents USB2.0 ports 3 to 0 + * PT1_P2_REG[5..0] represents ports 9 to 4 + */ + if (port > 3) { + reg = PT1_P2_REG; + port_shift = port - 4; + } else { + reg = PT1_P1_REG; + port_shift = port + 4; + } + break; + default: + return false; + } + pci_write_config_word(pdev, PT_ADDR_INDX, reg); + pci_read_config_byte(pdev, PT_READ_INDX, &value); + + return !(value & BIT(port_shift)); +} +EXPORT_SYMBOL_GPL(usb_amd_pt_check_port); + +/* * Make sure the controller is completely inactive, unable to * generate interrupts or do DMA. */ diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h index b68dcb5..4ca0d9b 100644 --- a/drivers/usb/host/pci-quirks.h +++ b/drivers/usb/host/pci-quirks.h @@ -17,6 +17,7 @@ void usb_enable_intel_xhci_ports(struct pci_dev *xhci_pdev); void usb_disable_xhci_ports(struct pci_dev *xhci_pdev); void sb800_prefetch(struct device *dev, int on); bool usb_xhci_needs_pci_reset(struct pci_dev *pdev); +bool usb_amd_pt_check_port(struct device *device, int port); #else struct pci_dev; static inline void usb_amd_quirk_pll_disable(void) {} @@ -25,6 +26,10 @@ static inline void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev) {} static inline void usb_amd_dev_put(void) {} static inline void usb_disable_xhci_ports(struct pci_dev *xhci_pdev) {} static inline void sb800_prefetch(struct device *dev, int on) {} +static inline bool usb_amd_pt_check_port(struct device *device, int port) +{ + return false; +} #endif /* CONFIG_USB_PCI */ #endif /* __LINUX_USB_PCI_QUIRKS_H */ diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 46d5e08..1df0c36 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1522,6 +1522,13 @@ int xhci_bus_suspend(struct usb_hcd *hcd) t2 |= PORT_WKOC_E | PORT_WKCONN_E; t2 &= ~PORT_WKDISC_E; } + + if ((xhci->quirks & XHCI_U2_DISABLE_WAKE) && + (hcd->speed < HCD_USB3)) { + if (usb_amd_pt_check_port(hcd->self.controller, + port_index)) + t2 &= ~PORT_WAKE_BITS; + } } else t2 &= ~PORT_WAKE_BITS; diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 6c79037..5262fa5 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -42,6 +42,10 @@ #define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8 #define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0 +#define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 +#define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba +#define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb +#define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 static const char hcd_name[] = "xhci_hcd"; @@ -125,6 +129,13 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_AMD) xhci->quirks |= XHCI_TRUST_TX_LENGTH; + if ((pdev->vendor == PCI_VENDOR_ID_AMD) && + ((pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4) || + (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_3) || + (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_2) || + (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_1))) + xhci->quirks |= XHCI_U2_DISABLE_WAKE; + if (pdev->vendor == PCI_VENDOR_ID_INTEL) { xhci->quirks |= XHCI_LPM_SUPPORT; xhci->quirks |= XHCI_INTEL_HOST; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 96099a2..e4d7d3d 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1822,7 +1822,7 @@ struct xhci_hcd { /* For controller with a broken Port Disable implementation */ #define XHCI_BROKEN_PORT_PED (1 << 25) #define XHCI_LIMIT_ENDPOINT_INTERVAL_7 (1 << 26) -/* Reserved. It was XHCI_U2_DISABLE_WAKE */ +#define XHCI_U2_DISABLE_WAKE (1 << 27) #define XHCI_ASMEDIA_MODIFY_FLOWCONTROL (1 << 28) #define XHCI_HW_LPM_DISABLE (1 << 29) -- cgit v1.1 From 1208d8a84fdcae6b395c57911cdf907450d30e70 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 12 Feb 2018 14:24:47 +0200 Subject: xhci: Don't print a warning when setting link state for disabled ports When disabling a USB3 port the hub driver will set the port link state to U3 to prevent "ejected" or "safely removed" devices that are still physically connected from immediately re-enumerating. If the device was really unplugged, then error messages were printed as the hub tries to set the U3 link state for a port that is no longer enabled. xhci-hcd ee000000.usb: Cannot set link state. usb usb8-port1: cannot disable (err = -32) Don't print error message in xhci-hub if hub tries to set port link state for a disabled port. Return -ENODEV instead which also silences hub driver. Signed-off-by: Mathias Nyman Tested-by: Yoshihiro Shimoda Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 1df0c36..72ebbc9 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1224,17 +1224,17 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, temp = readl(port_array[wIndex]); break; } - - /* Software should not attempt to set - * port link state above '3' (U3) and the port - * must be enabled. - */ - if ((temp & PORT_PE) == 0 || - (link_state > USB_SS_PORT_LS_U3)) { - xhci_warn(xhci, "Cannot set link state.\n"); + /* Port must be enabled */ + if (!(temp & PORT_PE)) { + retval = -ENODEV; + break; + } + /* Can't set port link state above '3' (U3) */ + if (link_state > USB_SS_PORT_LS_U3) { + xhci_warn(xhci, "Cannot set port %d link state %d\n", + wIndex, link_state); goto error; } - if (link_state == USB_SS_PORT_LS_U3) { slot_id = xhci_find_slot_id_by_port(hcd, xhci, wIndex + 1); -- cgit v1.1 From fa2dfd0ec22e0069c84dfae162972cbbc7c75488 Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Mon, 12 Feb 2018 14:24:48 +0200 Subject: xhci: Fix NULL pointer in xhci debugfs Commit dde634057da7 ("xhci: Fix use-after-free in xhci debugfs") causes a null pointer dereference while fixing xhci-debugfs usage of ring pointers that were freed during hibernate. The fix passed addresses to ring pointers instead, but forgot to do this change for the xhci_ring_trb_show function. The address of the ring pointer passed to xhci-debugfs was of a temporary ring pointer "new_ring" instead of the actual ring "ring" pointer. The temporary new_ring pointer will be set to NULL later causing the NULL pointer dereference. This issue was seen when reading xhci related files in debugfs: cat /sys/kernel/debug/usb/xhci/*/devices/*/ep*/trbs [ 184.604861] BUG: unable to handle kernel NULL pointer dereference at (null) [ 184.613776] IP: xhci_ring_trb_show+0x3a/0x890 [ 184.618733] PGD 264193067 P4D 264193067 PUD 263238067 PMD 0 [ 184.625184] Oops: 0000 [#1] SMP [ 184.726410] RIP: 0010:xhci_ring_trb_show+0x3a/0x890 [ 184.731944] RSP: 0018:ffffba8243c0fd90 EFLAGS: 00010246 [ 184.737880] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00000000000295d6 [ 184.746020] RDX: 00000000000295d5 RSI: 0000000000000001 RDI: ffff971a6418d400 [ 184.754121] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 [ 184.762222] R10: ffff971a64c98a80 R11: ffff971a62a00e40 R12: ffff971a62a85500 [ 184.770325] R13: 0000000000020000 R14: ffff971a6418d400 R15: ffff971a6418d400 [ 184.778448] FS: 00007fe725a79700(0000) GS:ffff971a6ec00000(0000) knlGS:0000000000000000 [ 184.787644] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 184.794168] CR2: 0000000000000000 CR3: 000000025f365005 CR4: 00000000003606f0 [ 184.802318] Call Trace: [ 184.805094] ? seq_read+0x281/0x3b0 [ 184.809068] seq_read+0xeb/0x3b0 [ 184.812735] full_proxy_read+0x4d/0x70 [ 184.817007] __vfs_read+0x23/0x120 [ 184.820870] vfs_read+0x91/0x130 [ 184.824538] SyS_read+0x42/0x90 [ 184.828106] entry_SYSCALL_64_fastpath+0x1a/0x7d Fixes: dde634057da7 ("xhci: Fix use-after-free in xhci debugfs") Cc: # v4.15 Signed-off-by: Zhengjun Xing Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c index e26e685..5851052 100644 --- a/drivers/usb/host/xhci-debugfs.c +++ b/drivers/usb/host/xhci-debugfs.c @@ -211,7 +211,7 @@ static void xhci_ring_dump_segment(struct seq_file *s, static int xhci_ring_trb_show(struct seq_file *s, void *unused) { int i; - struct xhci_ring *ring = s->private; + struct xhci_ring *ring = *(struct xhci_ring **)s->private; struct xhci_segment *seg = ring->first_seg; for (i = 0; i < ring->num_segs; i++) { @@ -387,7 +387,7 @@ void xhci_debugfs_create_endpoint(struct xhci_hcd *xhci, snprintf(epriv->name, sizeof(epriv->name), "ep%02d", ep_index); epriv->root = xhci_debugfs_create_ring_dir(xhci, - &dev->eps[ep_index].new_ring, + &dev->eps[ep_index].ring, epriv->name, spriv->root); spriv->eps[ep_index] = epriv; -- cgit v1.1 From d91676717261578f429d3577dbe9154b26e8abf7 Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Mon, 12 Feb 2018 14:24:49 +0200 Subject: xhci: Fix xhci debugfs devices node disappearance after hibernation During system resume from hibernation, xhci host is reset, all the nodes in devices folder are removed in xhci_mem_cleanup function. Later nodes in /sys/kernel/debug/usb/xhci/* are created again in function xhci_run, but the nodes already exist, so the nodes still keep the old ones, finally device nodes in xhci debugfs folder /sys/kernel/debug/usb/xhci/*/devices/* are disappeared. This fix removed xhci debugfs nodes before the nodes are re-created, so all the nodes in xhci debugfs can be re-created successfully. Fixes: 02b6fdc2a153 ("usb: xhci: Add debugfs interface for xHCI driver") Cc: # v4.15 Signed-off-by: Zhengjun Xing Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 1eeb339..b01bd64 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1014,6 +1014,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) xhci_dbg(xhci, "cleaning up memory\n"); xhci_mem_cleanup(xhci); + xhci_debugfs_exit(xhci); xhci_dbg(xhci, "xhci_stop completed - status = %x\n", readl(&xhci->op_regs->status)); -- cgit v1.1 From 8c5a93ebf7ac56d47f879b3c7c2f8c83b40c2cdb Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Mon, 12 Feb 2018 14:24:50 +0200 Subject: xhci: xhci debugfs device nodes weren't removed after device plugged out There is a bug after plugged out USB device, the device and its ep00 nodes are still kept, we need to remove the nodes in xhci_free_dev when USB device is plugged out. Fixes: 052f71e25a7e ("xhci: Fix xhci debugfs NULL pointer dereference in resume from hibernate") Cc: # v4.15 Signed-off-by: Zhengjun Xing Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index b01bd64..4adb6da 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3545,12 +3545,10 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) virt_dev->eps[i].ep_state &= ~EP_STOP_CMD_PENDING; del_timer_sync(&virt_dev->eps[i].stop_cmd_timer); } - + xhci_debugfs_remove_slot(xhci, udev->slot_id); ret = xhci_disable_slot(xhci, udev->slot_id); - if (ret) { - xhci_debugfs_remove_slot(xhci, udev->slot_id); + if (ret) xhci_free_virt_device(xhci, udev->slot_id); - } } int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) -- cgit v1.1 From 11cd764dc9a030991880ad4d51db93918afa5822 Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Mon, 12 Feb 2018 14:24:51 +0200 Subject: xhci: fix xhci debugfs errors in xhci_stop In function xhci_stop, xhci_debugfs_exit called before xhci_mem_cleanup. xhci_debugfs_exit removed the xhci debugfs root nodes, xhci_mem_cleanup called function xhci_free_virt_devices_depth_first which in turn called function xhci_debugfs_remove_slot. Function xhci_debugfs_remove_slot removed the nodes for devices, the nodes folders are sub folder of xhci debugfs. It is unreasonable to remove xhci debugfs root folder before xhci debugfs sub folder. Function xhci_mem_cleanup should be called before function xhci_debugfs_exit. Fixes: 02b6fdc2a153 ("usb: xhci: Add debugfs interface for xHCI driver") Cc: # v4.15 Signed-off-by: Zhengjun Xing Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 4adb6da..25d4b748 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -646,8 +646,6 @@ static void xhci_stop(struct usb_hcd *hcd) return; } - xhci_debugfs_exit(xhci); - xhci_dbc_exit(xhci); spin_lock_irq(&xhci->lock); @@ -680,6 +678,7 @@ static void xhci_stop(struct usb_hcd *hcd) xhci_dbg_trace(xhci, trace_xhci_dbg_init, "cleaning up memory"); xhci_mem_cleanup(xhci); + xhci_debugfs_exit(xhci); xhci_dbg_trace(xhci, trace_xhci_dbg_init, "xhci_stop completed - status = %x", readl(&xhci->op_regs->status)); -- cgit v1.1 From 71a0483d56e784b1e11f38f10d7e22d265dbe244 Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Thu, 1 Feb 2018 10:32:32 +0100 Subject: USB: serial: option: Add support for Quectel EP06 The Quectel EP06 is a Cat. 6 LTE modem, and the interface mapping is as follows: 0: Diag 1: NMEA 2: AT 3: Modem Interface 4 is QMI and interface 5 is ADB, so they are blacklisted. This patch should also be considered for -stable. The QMI-patch for this modem is already in the -stable-queue. v1->v2: * Updated commit prefix (thanks Johan Hovold) * Updated commit message slightly. Signed-off-by: Kristian Evensen Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 5db8ed5..2d8d915 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -241,6 +241,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EC21 0x0121 #define QUECTEL_PRODUCT_EC25 0x0125 #define QUECTEL_PRODUCT_BG96 0x0296 +#define QUECTEL_PRODUCT_EP06 0x0306 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -689,6 +690,10 @@ static const struct option_blacklist_info yuga_clm920_nc5_blacklist = { .reserved = BIT(1) | BIT(4), }; +static const struct option_blacklist_info quectel_ep06_blacklist = { + .reserved = BIT(4) | BIT(5), +}; + static const struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) }, @@ -1203,6 +1208,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06), + .driver_info = (kernel_ulong_t)&quectel_ep06_blacklist }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), -- cgit v1.1 From b2685bdacdaab065c172b97b55ab46c6be77a037 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Fri, 2 Feb 2018 13:51:39 +0800 Subject: ohci-hcd: Fix race condition caused by ohci_urb_enqueue() and io_watchdog_func() Running io_watchdog_func() while ohci_urb_enqueue() is running can cause a race condition where ohci->prev_frame_no is corrupted and the watchdog can mis-detect following error: ohci-platform 664a0800.usb: frame counter not updating; disabled ohci-platform 664a0800.usb: HC died; cleaning up Specifically, following scenario causes a race condition: 1. ohci_urb_enqueue() calls spin_lock_irqsave(&ohci->lock, flags) and enters the critical section 2. ohci_urb_enqueue() calls timer_pending(&ohci->io_watchdog) and it returns false 3. ohci_urb_enqueue() sets ohci->prev_frame_no to a frame number read by ohci_frame_no(ohci) 4. ohci_urb_enqueue() schedules io_watchdog_func() with mod_timer() 5. ohci_urb_enqueue() calls spin_unlock_irqrestore(&ohci->lock, flags) and exits the critical section 6. Later, ohci_urb_enqueue() is called 7. ohci_urb_enqueue() calls spin_lock_irqsave(&ohci->lock, flags) and enters the critical section 8. The timer scheduled on step 4 expires and io_watchdog_func() runs 9. io_watchdog_func() calls spin_lock_irqsave(&ohci->lock, flags) and waits on it because ohci_urb_enqueue() is already in the critical section on step 7 10. ohci_urb_enqueue() calls timer_pending(&ohci->io_watchdog) and it returns false 11. ohci_urb_enqueue() sets ohci->prev_frame_no to new frame number read by ohci_frame_no(ohci) because the frame number proceeded between step 3 and 6 12. ohci_urb_enqueue() schedules io_watchdog_func() with mod_timer() 13. ohci_urb_enqueue() calls spin_unlock_irqrestore(&ohci->lock, flags) and exits the critical section, then wake up io_watchdog_func() which is waiting on step 9 14. io_watchdog_func() enters the critical section 15. io_watchdog_func() calls ohci_frame_no(ohci) and set frame_no variable to the frame number 16. io_watchdog_func() compares frame_no and ohci->prev_frame_no On step 16, because this calling of io_watchdog_func() is scheduled on step 4, the frame number set in ohci->prev_frame_no is expected to the number set on step 3. However, ohci->prev_frame_no is overwritten on step 11. Because step 16 is executed soon after step 11, the frame number might not proceed, so ohci->prev_frame_no must equals to frame_no. To address above scenario, this patch introduces a special sentinel value IO_WATCHDOG_OFF and set this value to ohci->prev_frame_no when the watchdog is not pending or running. When ohci_urb_enqueue() schedules the watchdog (step 4 and 12 above), it compares ohci->prev_frame_no to IO_WATCHDOG_OFF so that ohci->prev_frame_no is not overwritten while io_watchdog_func() is running. Signed-off-by: Shigeru Yoshida Signed-off-by: Haiqing Bai Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-hcd.c | 10 +++++++--- drivers/usb/host/ohci-hub.c | 4 +++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index ee96763..84f88fa 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -74,6 +74,7 @@ static const char hcd_name [] = "ohci_hcd"; #define STATECHANGE_DELAY msecs_to_jiffies(300) #define IO_WATCHDOG_DELAY msecs_to_jiffies(275) +#define IO_WATCHDOG_OFF 0xffffff00 #include "ohci.h" #include "pci-quirks.h" @@ -231,7 +232,7 @@ static int ohci_urb_enqueue ( } /* Start up the I/O watchdog timer, if it's not running */ - if (!timer_pending(&ohci->io_watchdog) && + if (ohci->prev_frame_no == IO_WATCHDOG_OFF && list_empty(&ohci->eds_in_use) && !(ohci->flags & OHCI_QUIRK_QEMU)) { ohci->prev_frame_no = ohci_frame_no(ohci); @@ -501,6 +502,7 @@ static int ohci_init (struct ohci_hcd *ohci) return 0; timer_setup(&ohci->io_watchdog, io_watchdog_func, 0); + ohci->prev_frame_no = IO_WATCHDOG_OFF; ohci->hcca = dma_alloc_coherent (hcd->self.controller, sizeof(*ohci->hcca), &ohci->hcca_dma, GFP_KERNEL); @@ -730,7 +732,7 @@ static void io_watchdog_func(struct timer_list *t) u32 head; struct ed *ed; struct td *td, *td_start, *td_next; - unsigned frame_no; + unsigned frame_no, prev_frame_no = IO_WATCHDOG_OFF; unsigned long flags; spin_lock_irqsave(&ohci->lock, flags); @@ -835,7 +837,7 @@ static void io_watchdog_func(struct timer_list *t) } } if (!list_empty(&ohci->eds_in_use)) { - ohci->prev_frame_no = frame_no; + prev_frame_no = frame_no; ohci->prev_wdh_cnt = ohci->wdh_cnt; ohci->prev_donehead = ohci_readl(ohci, &ohci->regs->donehead); @@ -845,6 +847,7 @@ static void io_watchdog_func(struct timer_list *t) } done: + ohci->prev_frame_no = prev_frame_no; spin_unlock_irqrestore(&ohci->lock, flags); } @@ -973,6 +976,7 @@ static void ohci_stop (struct usb_hcd *hcd) if (quirk_nec(ohci)) flush_work(&ohci->nec_work); del_timer_sync(&ohci->io_watchdog); + ohci->prev_frame_no = IO_WATCHDOG_OFF; ohci_writel (ohci, OHCI_INTR_MIE, &ohci->regs->intrdisable); ohci_usb_reset(ohci); diff --git a/drivers/usb/host/ohci-hub.c b/drivers/usb/host/ohci-hub.c index fb7aaa3..634f3c7 100644 --- a/drivers/usb/host/ohci-hub.c +++ b/drivers/usb/host/ohci-hub.c @@ -311,8 +311,10 @@ static int ohci_bus_suspend (struct usb_hcd *hcd) rc = ohci_rh_suspend (ohci, 0); spin_unlock_irq (&ohci->lock); - if (rc == 0) + if (rc == 0) { del_timer_sync(&ohci->io_watchdog); + ohci->prev_frame_no = IO_WATCHDOG_OFF; + } return rc; } -- cgit v1.1 From 009f41aed4b3e11e6dc1e3c07377a10c20f1a5ed Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 26 Jan 2018 11:56:50 -0700 Subject: usbip: keep usbip_device sockfd state in sync with tcp_socket Keep usbip_device sockfd state in sync with tcp_socket. When tcp_socket is reset to null, reset sockfd to -1 to keep it in sync. Signed-off-by: Shuah Khan Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_dev.c | 3 +++ drivers/usb/usbip/vhci_hcd.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index 49e5524..dd8ef36 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -73,6 +73,7 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a goto err; sdev->ud.tcp_socket = socket; + sdev->ud.sockfd = sockfd; spin_unlock_irq(&sdev->ud.lock); @@ -172,6 +173,7 @@ static void stub_shutdown_connection(struct usbip_device *ud) if (ud->tcp_socket) { sockfd_put(ud->tcp_socket); ud->tcp_socket = NULL; + ud->sockfd = -1; } /* 3. free used data */ @@ -266,6 +268,7 @@ static struct stub_device *stub_device_alloc(struct usb_device *udev) sdev->ud.status = SDEV_ST_AVAILABLE; spin_lock_init(&sdev->ud.lock); sdev->ud.tcp_socket = NULL; + sdev->ud.sockfd = -1; INIT_LIST_HEAD(&sdev->priv_init); INIT_LIST_HEAD(&sdev->priv_tx); diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index c3e1008..20e3d46 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -984,6 +984,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud) if (vdev->ud.tcp_socket) { sockfd_put(vdev->ud.tcp_socket); vdev->ud.tcp_socket = NULL; + vdev->ud.sockfd = -1; } pr_info("release socket\n"); @@ -1030,6 +1031,7 @@ static void vhci_device_reset(struct usbip_device *ud) if (ud->tcp_socket) { sockfd_put(ud->tcp_socket); ud->tcp_socket = NULL; + ud->sockfd = -1; } ud->status = VDEV_ST_NULL; -- cgit v1.1 From 02a10f061a3f8bca1b37332672f50a107198adbe Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Thu, 1 Feb 2018 12:26:43 +0800 Subject: usb: host: ehci: use correct device pointer for dma ops commit a8c06e407ef9 ("usb: separate out sysdev pointer from usb_bus") converted to use hcd->self.sysdev for DMA operations instead of hcd->self.controller, but forgot to do it for hcd test mode. Replace the correct one in this commit. Fixes: a8c06e407ef9 ("usb: separate out sysdev pointer from usb_bus") Signed-off-by: Peter Chen Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c index facafdf..d7641cb 100644 --- a/drivers/usb/host/ehci-hub.c +++ b/drivers/usb/host/ehci-hub.c @@ -774,12 +774,12 @@ static struct urb *request_single_step_set_feature_urb( atomic_inc(&urb->use_count); atomic_inc(&urb->dev->urbnum); urb->setup_dma = dma_map_single( - hcd->self.controller, + hcd->self.sysdev, urb->setup_packet, sizeof(struct usb_ctrlrequest), DMA_TO_DEVICE); urb->transfer_dma = dma_map_single( - hcd->self.controller, + hcd->self.sysdev, urb->transfer_buffer, urb->transfer_buffer_length, DMA_FROM_DEVICE); -- cgit v1.1 From d6efa938ac366fe8cb92d6157f74d43cc35f1c67 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 5 Feb 2018 17:12:35 +0900 Subject: usb: renesas_usbhs: missed the "running" flag in usb_dmac with rx path This fixes an issue that a gadget driver (usb_f_fs) is possible to stop rx transactions after the usb-dmac is used because the following functions missed to set/check the "running" flag. - usbhsf_dma_prepare_pop_with_usb_dmac() - usbhsf_dma_pop_done_with_usb_dmac() So, if next transaction uses pio, the usbhsf_prepare_pop() can not start the transaction because the "running" flag is 0. Fixes: 8355b2b3082d ("usb: renesas_usbhs: fix the behavior of some usbhs_pkt_handle") Cc: # v3.19+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/fifo.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 5925d11..39fa2fc 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -982,6 +982,10 @@ static int usbhsf_dma_prepare_pop_with_usb_dmac(struct usbhs_pkt *pkt, if ((uintptr_t)pkt->buf & (USBHS_USB_DMAC_XFER_SIZE - 1)) goto usbhsf_pio_prepare_pop; + /* return at this time if the pipe is running */ + if (usbhs_pipe_is_running(pipe)) + return 0; + usbhs_pipe_config_change_bfre(pipe, 1); ret = usbhsf_fifo_select(pipe, fifo, 0); @@ -1172,6 +1176,7 @@ static int usbhsf_dma_pop_done_with_usb_dmac(struct usbhs_pkt *pkt, usbhsf_fifo_clear(pipe, fifo); pkt->actual = usbhs_dma_calc_received_size(pkt, chan, rcv_len); + usbhs_pipe_running(pipe, 0); usbhsf_dma_stop(pipe, fifo); usbhsf_dma_unmap(pkt); usbhsf_fifo_unselect(pipe, pipe->fifo); -- cgit v1.1 From 52ad2bd8918158266fc88a05f95429b56b6a33c5 Mon Sep 17 00:00:00 2001 From: Karsten Koop Date: Fri, 9 Feb 2018 09:12:06 +0000 Subject: usb: ldusb: add PIDs for new CASSY devices supported by this driver This patch adds support for new CASSY devices to the ldusb driver. The PIDs are also added to the ignore list in hid-quirks. Signed-off-by: Karsten Koop Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/hid-quirks.c | 3 +++ drivers/usb/misc/ldusb.c | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 43ddcdf..9454ac1 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -645,6 +645,9 @@ #define USB_DEVICE_ID_LD_MICROCASSYTIME 0x1033 #define USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE 0x1035 #define USB_DEVICE_ID_LD_MICROCASSYPH 0x1038 +#define USB_DEVICE_ID_LD_POWERANALYSERCASSY 0x1040 +#define USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY 0x1042 +#define USB_DEVICE_ID_LD_MACHINETESTCASSY 0x1043 #define USB_DEVICE_ID_LD_JWM 0x1080 #define USB_DEVICE_ID_LD_DMMP 0x1081 #define USB_DEVICE_ID_LD_UMIP 0x1090 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 5f6035a..e92b77f 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -809,6 +809,9 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTIME) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYPH) }, + { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERANALYSERCASSY) }, + { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY) }, + { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETESTCASSY) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_JWM) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_DMMP) }, { HID_USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIP) }, diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index 63b9e85..236a60f 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -42,6 +42,9 @@ #define USB_DEVICE_ID_LD_MICROCASSYTIME 0x1033 /* USB Product ID of Micro-CASSY Time (reserved) */ #define USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE 0x1035 /* USB Product ID of Micro-CASSY Temperature */ #define USB_DEVICE_ID_LD_MICROCASSYPH 0x1038 /* USB Product ID of Micro-CASSY pH */ +#define USB_DEVICE_ID_LD_POWERANALYSERCASSY 0x1040 /* USB Product ID of Power Analyser CASSY */ +#define USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY 0x1042 /* USB Product ID of Converter Controller CASSY */ +#define USB_DEVICE_ID_LD_MACHINETESTCASSY 0x1043 /* USB Product ID of Machine Test CASSY */ #define USB_DEVICE_ID_LD_JWM 0x1080 /* USB Product ID of Joule and Wattmeter */ #define USB_DEVICE_ID_LD_DMMP 0x1081 /* USB Product ID of Digital Multimeter P (reserved) */ #define USB_DEVICE_ID_LD_UMIP 0x1090 /* USB Product ID of UMI P */ @@ -84,6 +87,9 @@ static const struct usb_device_id ld_usb_table[] = { { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTIME) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYPH) }, + { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERANALYSERCASSY) }, + { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY) }, + { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETESTCASSY) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_JWM) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_DMMP) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIP) }, -- cgit v1.1 From 91b119359c1c3033a6621909d3c5dbbdf201d6b4 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Mon, 5 Feb 2018 11:50:56 +0800 Subject: usb: host: ehci: always enable interrupt for qtd completion at test mode At former code, the SETUP stage does not enable interrupt for qtd completion, it relies on IAA watchdog to complete interrupt, then the transcation would be considered timeout if the flag need_io_watchdog is cleared by platform code. In this commit, we always add enable interrupt for qtd completion, then the qtd completion can be notified by hardware interrupt. Signed-off-by: Peter Chen Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-q.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c index 8815832..3276304 100644 --- a/drivers/usb/host/ehci-q.c +++ b/drivers/usb/host/ehci-q.c @@ -1188,10 +1188,10 @@ static int submit_single_step_set_feature( * 15 secs after the setup */ if (is_setup) { - /* SETUP pid */ + /* SETUP pid, and interrupt after SETUP completion */ qtd_fill(ehci, qtd, urb->setup_dma, sizeof(struct usb_ctrlrequest), - token | (2 /* "setup" */ << 8), 8); + QTD_IOC | token | (2 /* "setup" */ << 8), 8); submit_async(ehci, urb, &qtd_list, GFP_ATOMIC); return 0; /*Return now; we shall come back after 15 seconds*/ @@ -1228,12 +1228,8 @@ static int submit_single_step_set_feature( qtd_prev->hw_next = QTD_NEXT(ehci, qtd->qtd_dma); list_add_tail(&qtd->qtd_list, head); - /* dont fill any data in such packets */ - qtd_fill(ehci, qtd, 0, 0, token, 0); - - /* by default, enable interrupt on urb completion */ - if (likely(!(urb->transfer_flags & URB_NO_INTERRUPT))) - qtd->hw_token |= cpu_to_hc32(ehci, QTD_IOC); + /* Interrupt after STATUS completion */ + qtd_fill(ehci, qtd, 0, 0, token | QTD_IOC, 0); submit_async(ehci, urb, &qtd_list, GFP_KERNEL); -- cgit v1.1 From 46408ea558df13b110e0866b99624384a33bdeba Mon Sep 17 00:00:00 2001 From: AMAN DEEP Date: Thu, 8 Feb 2018 11:55:01 +0800 Subject: usb: ohci: Proper handling of ed_rm_list to handle race condition between usb_kill_urb() and finish_unlinks() There is a race condition between finish_unlinks->finish_urb() function and usb_kill_urb() in ohci controller case. The finish_urb calls spin_unlock(&ohci->lock) before usb_hcd_giveback_urb() function call, then if during this time, usb_kill_urb is called for another endpoint, then new ed will be added to ed_rm_list at beginning for unlink, and ed_rm_list will point to newly added. When finish_urb() is completed in finish_unlinks() and ed->td_list becomes empty as in below code (in finish_unlinks() function): if (list_empty(&ed->td_list)) { *last = ed->ed_next; ed->ed_next = NULL; } else if (ohci->rh_state == OHCI_RH_RUNNING) { *last = ed->ed_next; ed->ed_next = NULL; ed_schedule(ohci, ed); } The *last = ed->ed_next will make ed_rm_list to point to ed->ed_next and previously added ed by usb_kill_urb will be left unreferenced by ed_rm_list. This causes usb_kill_urb() hang forever waiting for finish_unlink to remove added ed from ed_rm_list. The main reason for hang in this race condtion is addition and removal of ed from ed_rm_list in the beginning during usb_kill_urb and later last* is modified in finish_unlinks(). As suggested by Alan Stern, the solution for proper handling of ohci->ed_rm_list is to remove ed from the ed_rm_list before finishing any URBs. Then at the end, we can add ed back to the list if necessary. This properly handle the updated ohci->ed_rm_list in usb_kill_urb(). Fixes: 977dcfdc6031 ("USB: OHCI: don't lose track of EDs when a controller dies") Acked-by: Alan Stern CC: Signed-off-by: Aman Deep Signed-off-by: Jeffy Chen Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-q.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c index b2ec8c3..4ccb85a 100644 --- a/drivers/usb/host/ohci-q.c +++ b/drivers/usb/host/ohci-q.c @@ -1019,6 +1019,8 @@ skip_ed: * have modified this list. normally it's just prepending * entries (which we'd ignore), but paranoia won't hurt. */ + *last = ed->ed_next; + ed->ed_next = NULL; modified = 0; /* unlink urbs as requested, but rescan the list after @@ -1077,21 +1079,22 @@ rescan_this: goto rescan_this; /* - * If no TDs are queued, take ED off the ed_rm_list. + * If no TDs are queued, ED is now idle. * Otherwise, if the HC is running, reschedule. - * If not, leave it on the list for further dequeues. + * If the HC isn't running, add ED back to the + * start of the list for later processing. */ if (list_empty(&ed->td_list)) { - *last = ed->ed_next; - ed->ed_next = NULL; ed->state = ED_IDLE; list_del(&ed->in_use_list); } else if (ohci->rh_state == OHCI_RH_RUNNING) { - *last = ed->ed_next; - ed->ed_next = NULL; ed_schedule(ohci, ed); } else { - last = &ed->ed_next; + ed->ed_next = ohci->ed_rm_list; + ohci->ed_rm_list = ed; + /* Don't loop on the same ED */ + if (last == &ohci->ed_rm_list) + last = &ed->ed_next; } if (modified) -- cgit v1.1 From 6ac5a11dc674bc5016ea716e8082fff61f524dc1 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 13 Feb 2018 15:31:05 -0800 Subject: xtensa: fix high memory/reserved memory collision Xtensa memory initialization code frees high memory pages without checking whether they are in the reserved memory regions or not. That results in invalid value of totalram_pages and duplicate page usage by CMA and highmem. It produces a bunch of BUGs at startup looking like this: BUG: Bad page state in process swapper pfn:70800 page:be60c000 count:0 mapcount:-127 mapping: (null) index:0x1 flags: 0x80000000() raw: 80000000 00000000 00000001 ffffff80 00000000 be60c014 be60c014 0000000a page dumped because: nonzero mapcount Modules linked in: CPU: 0 PID: 1 Comm: swapper Tainted: G B 4.16.0-rc1-00015-g7928b2cbe55b-dirty #23 Stack: bd839d33 00000000 00000018 ba97b64c a106578c bd839d70 be60c000 00000000 a1378054 bd86a000 00000003 ba97b64c a1066166 bd839da0 be60c000 ffe00000 a1066b58 bd839dc0 be504000 00000000 000002f4 bd838000 00000000 0000001e Call Trace: [] bad_page+0xac/0xd0 [] free_pages_check_bad+0x34/0x4c [] __free_pages_ok+0xae/0x14c [] __free_pages+0x30/0x64 [] init_cma_reserved_pageblock+0x35/0x44 [] cma_init_reserved_areas+0xf4/0x148 [] do_one_initcall+0x80/0xf8 [] kernel_init_freeable+0xda/0x13c [] kernel_init+0x9/0xd0 [] ret_from_kernel_thread+0xc/0x18 Only free high memory pages that are not reserved. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/mm/init.c | 70 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 63 insertions(+), 7 deletions(-) diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index d776ec0..34aead7 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -79,19 +79,75 @@ void __init zones_init(void) free_area_init_node(0, zones_size, ARCH_PFN_OFFSET, NULL); } +#ifdef CONFIG_HIGHMEM +static void __init free_area_high(unsigned long pfn, unsigned long end) +{ + for (; pfn < end; pfn++) + free_highmem_page(pfn_to_page(pfn)); +} + +static void __init free_highpages(void) +{ + unsigned long max_low = max_low_pfn; + struct memblock_region *mem, *res; + + reset_all_zones_managed_pages(); + /* set highmem page free */ + for_each_memblock(memory, mem) { + unsigned long start = memblock_region_memory_base_pfn(mem); + unsigned long end = memblock_region_memory_end_pfn(mem); + + /* Ignore complete lowmem entries */ + if (end <= max_low) + continue; + + if (memblock_is_nomap(mem)) + continue; + + /* Truncate partial highmem entries */ + if (start < max_low) + start = max_low; + + /* Find and exclude any reserved regions */ + for_each_memblock(reserved, res) { + unsigned long res_start, res_end; + + res_start = memblock_region_reserved_base_pfn(res); + res_end = memblock_region_reserved_end_pfn(res); + + if (res_end < start) + continue; + if (res_start < start) + res_start = start; + if (res_start > end) + res_start = end; + if (res_end > end) + res_end = end; + if (res_start != start) + free_area_high(start, res_start); + start = res_end; + if (start == end) + break; + } + + /* And now free anything which remains */ + if (start < end) + free_area_high(start, end); + } +} +#else +static void __init free_highpages(void) +{ +} +#endif + /* * Initialize memory pages. */ void __init mem_init(void) { -#ifdef CONFIG_HIGHMEM - unsigned long tmp; - - reset_all_zones_managed_pages(); - for (tmp = max_low_pfn; tmp < max_pfn; tmp++) - free_highmem_page(pfn_to_page(tmp)); -#endif + free_highpages(); max_mapnr = max_pfn - ARCH_PFN_OFFSET; high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT); -- cgit v1.1 From 7a1646d922577b5b48c0d222e03831141664bb59 Mon Sep 17 00:00:00 2001 From: Jack Stocker Date: Thu, 15 Feb 2018 18:24:10 +0000 Subject: Add delay-init quirk for Corsair K70 RGB keyboards Following on from this patch: https://lkml.org/lkml/2017/11/3/516, Corsair K70 RGB keyboards also require the DELAY_INIT quirk to start correctly at boot. Device ids found here: usb 3-3: New USB device found, idVendor=1b1c, idProduct=1b13 usb 3-3: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 3-3: Product: Corsair K70 RGB Gaming Keyboard Signed-off-by: Jack Stocker Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 4024926..f4a5484 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -226,6 +226,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1a0a, 0x0200), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Corsair K70 RGB */ + { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Corsair Strafe RGB */ { USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT }, -- cgit v1.1 From d60d8b64280c8b36c085eda7821585c1ce911795 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 26 Jan 2018 16:06:51 +0100 Subject: KVM: arm/arm64: Fix arch timers with userspace irqchips When introducing support for irqchip in userspace we needed a way to mask the timer signal to prevent the guest continuously exiting due to a screaming timer. We did this by disabling the corresponding percpu interrupt on the host interrupt controller, because we cannot rely on the host system having a GIC, and therefore cannot make any assumptions about having an active state to hide the timer signal. Unfortunately, when introducing this feature, it became entirely possible that a VCPU which belongs to a VM that has a userspace irqchip can disable the vtimer irq on the host on some physical CPU, and then go away without ever enabling the vtimer irq on that physical CPU again. This means that using irqchips in userspace on a system that also supports running VMs with an in-kernel GIC can prevent forward progress from in-kernel GIC VMs. Later on, when we started taking virtual timer interrupts in the arch timer code, we would also leave this timer state active for userspace irqchip VMs, because we leave it up to a VGIC-enabled guest to deactivate the hardware IRQ using the HW bit in the LR. Both issues are solved by only using the enable/disable trick on systems that do not have a host GIC which supports the active state, because all VMs on such systems must use irqchips in userspace. Systems that have a working GIC with support for an active state use the active state to mask the timer signal for both userspace and in-kernel irqchips. Cc: Alexander Graf Reviewed-by: Marc Zyngier Cc: # v4.12+ Fixes: d9e139778376 ("KVM: arm/arm64: Support arch timers with a userspace gic") Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 116 +++++++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 52 deletions(-) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 70268c0..70f4c30 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -36,6 +36,8 @@ static struct timecounter *timecounter; static unsigned int host_vtimer_irq; static u32 host_vtimer_irq_flags; +static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); + static const struct kvm_irq_level default_ptimer_irq = { .irq = 30, .level = 1, @@ -56,6 +58,12 @@ u64 kvm_phys_timer_read(void) return timecounter->cc->read(timecounter->cc); } +static inline bool userspace_irqchip(struct kvm *kvm) +{ + return static_branch_unlikely(&userspace_irqchip_in_use) && + unlikely(!irqchip_in_kernel(kvm)); +} + static void soft_timer_start(struct hrtimer *hrt, u64 ns) { hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), @@ -69,25 +77,6 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work) cancel_work_sync(work); } -static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu) -{ - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - - /* - * When using a userspace irqchip with the architected timers, we must - * prevent continuously exiting from the guest, and therefore mask the - * physical interrupt by disabling it on the host interrupt controller - * when the virtual level is high, such that the guest can make - * forward progress. Once we detect the output level being - * de-asserted, we unmask the interrupt again so that we exit from the - * guest when the timer fires. - */ - if (vtimer->irq.level) - disable_percpu_irq(host_vtimer_irq); - else - enable_percpu_irq(host_vtimer_irq, 0); -} - static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) { struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; @@ -106,9 +95,9 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) if (kvm_timer_should_fire(vtimer)) kvm_timer_update_irq(vcpu, true, vtimer); - if (static_branch_unlikely(&userspace_irqchip_in_use) && - unlikely(!irqchip_in_kernel(vcpu->kvm))) - kvm_vtimer_update_mask_user(vcpu); + if (userspace_irqchip(vcpu->kvm) && + !static_branch_unlikely(&has_gic_active_state)) + disable_percpu_irq(host_vtimer_irq); return IRQ_HANDLED; } @@ -290,8 +279,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, timer_ctx->irq.level); - if (!static_branch_unlikely(&userspace_irqchip_in_use) || - likely(irqchip_in_kernel(vcpu->kvm))) { + if (!userspace_irqchip(vcpu->kvm)) { ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, timer_ctx->irq.level, @@ -350,12 +338,6 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) phys_timer_emulate(vcpu); } -static void __timer_snapshot_state(struct arch_timer_context *timer) -{ - timer->cnt_ctl = read_sysreg_el0(cntv_ctl); - timer->cnt_cval = read_sysreg_el0(cntv_cval); -} - static void vtimer_save_state(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; @@ -367,8 +349,10 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu) if (!vtimer->loaded) goto out; - if (timer->enabled) - __timer_snapshot_state(vtimer); + if (timer->enabled) { + vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); + vtimer->cnt_cval = read_sysreg_el0(cntv_cval); + } /* Disable the virtual timer */ write_sysreg_el0(0, cntv_ctl); @@ -460,23 +444,43 @@ static void set_cntvoff(u64 cntvoff) kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); } -static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu) +static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active) +{ + int r; + r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active); + WARN_ON(r); +} + +static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); bool phys_active; - int ret; - phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); - - ret = irq_set_irqchip_state(host_vtimer_irq, - IRQCHIP_STATE_ACTIVE, - phys_active); - WARN_ON(ret); + if (irqchip_in_kernel(vcpu->kvm)) + phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); + else + phys_active = vtimer->irq.level; + set_vtimer_irq_phys_active(vcpu, phys_active); } -static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu) +static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) { - kvm_vtimer_update_mask_user(vcpu); + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + + /* + * When using a userspace irqchip with the architected timers and a + * host interrupt controller that doesn't support an active state, we + * must still prevent continuously exiting from the guest, and + * therefore mask the physical interrupt by disabling it on the host + * interrupt controller when the virtual level is high, such that the + * guest can make forward progress. Once we detect the output level + * being de-asserted, we unmask the interrupt again so that we exit + * from the guest when the timer fires. + */ + if (vtimer->irq.level) + disable_percpu_irq(host_vtimer_irq); + else + enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); } void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) @@ -487,10 +491,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) if (unlikely(!timer->enabled)) return; - if (unlikely(!irqchip_in_kernel(vcpu->kvm))) - kvm_timer_vcpu_load_user(vcpu); + if (static_branch_likely(&has_gic_active_state)) + kvm_timer_vcpu_load_gic(vcpu); else - kvm_timer_vcpu_load_vgic(vcpu); + kvm_timer_vcpu_load_nogic(vcpu); set_cntvoff(vtimer->cntvoff); @@ -555,18 +559,24 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { - __timer_snapshot_state(vtimer); - if (!kvm_timer_should_fire(vtimer)) { - kvm_timer_update_irq(vcpu, false, vtimer); - kvm_vtimer_update_mask_user(vcpu); - } + if (!kvm_timer_should_fire(vtimer)) { + kvm_timer_update_irq(vcpu, false, vtimer); + if (static_branch_likely(&has_gic_active_state)) + set_vtimer_irq_phys_active(vcpu, false); + else + enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); } } void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) { - unmask_vtimer_irq_user(vcpu); + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + if (unlikely(!timer->enabled)) + return; + + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + unmask_vtimer_irq_user(vcpu); } int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) @@ -753,6 +763,8 @@ int kvm_timer_hyp_init(bool has_gic) kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); goto out_free_irq; } + + static_branch_enable(&has_gic_active_state); } kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); -- cgit v1.1 From 67870eb1204223598ea6d8a4467b482e9f5875b5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Feb 2018 16:07:34 +0100 Subject: ARM: kvm: fix building with gcc-8 In banked-sr.c, we use a top-level '__asm__(".arch_extension virt")' statement to allow compilation of a multi-CPU kernel for ARMv6 and older ARMv7-A that don't normally support access to the banked registers. This is considered to be a programming error by the gcc developers and will no longer work in gcc-8, where we now get a build error: /tmp/cc4Qy7GR.s:34: Error: Banked registers are not available with this architecture. -- `mrs r3,SP_usr' /tmp/cc4Qy7GR.s:41: Error: Banked registers are not available with this architecture. -- `mrs r3,ELR_hyp' /tmp/cc4Qy7GR.s:55: Error: Banked registers are not available with this architecture. -- `mrs r3,SP_svc' /tmp/cc4Qy7GR.s:62: Error: Banked registers are not available with this architecture. -- `mrs r3,LR_svc' /tmp/cc4Qy7GR.s:69: Error: Banked registers are not available with this architecture. -- `mrs r3,SPSR_svc' /tmp/cc4Qy7GR.s:76: Error: Banked registers are not available with this architecture. -- `mrs r3,SP_abt' Passign the '-march-armv7ve' flag to gcc works, and is ok here, because we know the functions won't ever be called on pre-ARMv7VE machines. Unfortunately, older compiler versions (4.8 and earlier) do not understand that flag, so we still need to keep the asm around. Backporting to stable kernels (4.6+) is needed to allow those to be built with future compilers as well. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84129 Fixes: 33280b4cd1dc ("ARM: KVM: Add banked registers save/restore") Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Christoffer Dall --- arch/arm/kvm/hyp/Makefile | 5 +++++ arch/arm/kvm/hyp/banked-sr.c | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile index 5638ce0..63d6b40 100644 --- a/arch/arm/kvm/hyp/Makefile +++ b/arch/arm/kvm/hyp/Makefile @@ -7,6 +7,8 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING KVM=../../../../virt/kvm +CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve) + obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o @@ -15,7 +17,10 @@ obj-$(CONFIG_KVM_ARM_HOST) += tlb.o obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o obj-$(CONFIG_KVM_ARM_HOST) += vfp.o obj-$(CONFIG_KVM_ARM_HOST) += banked-sr.o +CFLAGS_banked-sr.o += $(CFLAGS_ARMV7VE) + obj-$(CONFIG_KVM_ARM_HOST) += entry.o obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o obj-$(CONFIG_KVM_ARM_HOST) += switch.o +CFLAGS_switch.o += $(CFLAGS_ARMV7VE) obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o diff --git a/arch/arm/kvm/hyp/banked-sr.c b/arch/arm/kvm/hyp/banked-sr.c index 111bda8..be4b8b0 100644 --- a/arch/arm/kvm/hyp/banked-sr.c +++ b/arch/arm/kvm/hyp/banked-sr.c @@ -20,6 +20,10 @@ #include +/* + * gcc before 4.9 doesn't understand -march=armv7ve, so we have to + * trick the assembler. + */ __asm__(".arch_extension virt"); void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt) -- cgit v1.1 From 14fa91e0fef8e4d6feb8b1fa2a807828e0abe815 Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Tue, 13 Feb 2018 12:18:27 +0200 Subject: IB/ipoib: Do not warn if IPoIB debugfs doesn't exist netdev_wait_allrefs() could rebroadcast NETDEV_UNREGISTER event multiple times until all refs are gone, which will result in calling ipoib_delete_debug_files multiple times and printing a warning. Remove the WARN_ONCE since checks of NULL pointers before calling debugfs_remove are not needed. Fixes: 771a52584096 ("IB/IPoIB: ibX: failed to create mcg debug file") Signed-off-by: Alaa Hleihel Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_fs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 11f74cb..ea302b0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -281,8 +281,6 @@ void ipoib_delete_debug_files(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n"); - WARN_ONCE(!priv->path_dentry, "null path debug file\n"); debugfs_remove(priv->mcg_dentry); debugfs_remove(priv->path_dentry); priv->mcg_dentry = priv->path_dentry = NULL; -- cgit v1.1 From 415bb699d793f7ad9c67c04a766d1e655fa6b203 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 13 Feb 2018 12:18:28 +0200 Subject: RDMA/restrack: Remove unimplemented XRCD object Resource tracking of XRCD objects is not implemented in current version of restrack and hence can be removed. Fixes: 02d8883f520e ("RDMA/restrack: Add general infrastructure to track RDMA resources") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/restrack.c | 5 ----- include/rdma/restrack.h | 4 ---- 2 files changed, 9 deletions(-) diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 857637b..d8dc709 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -63,7 +63,6 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res) { enum rdma_restrack_type type = res->type; struct ib_device *dev; - struct ib_xrcd *xrcd; struct ib_pd *pd; struct ib_cq *cq; struct ib_qp *qp; @@ -81,10 +80,6 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res) qp = container_of(res, struct ib_qp, res); dev = qp->device; break; - case RDMA_RESTRACK_XRCD: - xrcd = container_of(res, struct ib_xrcd, res); - dev = xrcd->device; - break; default: WARN_ONCE(true, "Wrong resource tracking type %u\n", type); return NULL; diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index c2d8116..2cdf8dc 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -29,10 +29,6 @@ enum rdma_restrack_type { */ RDMA_RESTRACK_QP, /** - * @RDMA_RESTRACK_XRCD: XRC domain (XRCD) - */ - RDMA_RESTRACK_XRCD, - /** * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations */ RDMA_RESTRACK_MAX -- cgit v1.1 From 89d9e8d3f14d807bbd7725f8f6f5eeb7f6f5c42f Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 13 Feb 2018 12:18:29 +0200 Subject: IB/uverbs: Always use the attribute size provided by the user This fixes several bugs around the copy_to/from user path: - copy_to used the user provided size of the attribute and could copy data beyond the end of the kernel buffer into userspace. - copy_from didn't know the size of the kernel buffer and could have left kernel memory unexpectedly un-initialized. - copy_from did not use the user length to determine if the attribute data is inlined or not. Signed-off-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types.c | 5 +++-- include/rdma/uverbs_ioctl.h | 35 ++++++++++++++++++++++++------ 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index cab0ac3..c6502c7 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -323,7 +323,8 @@ static int uverbs_create_cq_handler(struct ib_device *ib_dev, cq->res.type = RDMA_RESTRACK_CQ; rdma_restrack_add(&cq->res); - ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe); + ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe, + sizeof(cq->cqe)); if (ret) goto err_cq; @@ -375,7 +376,7 @@ static int uverbs_destroy_cq_handler(struct ib_device *ib_dev, resp.comp_events_reported = obj->comp_events_reported; resp.async_events_reported = obj->async_events_reported; - return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp); + return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp, sizeof(resp)); } static DECLARE_UVERBS_METHOD( diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 6da4407..32cb147 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -351,29 +351,50 @@ static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr } static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, - size_t idx, const void *from) + size_t idx, const void *from, size_t size) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); u16 flags; + size_t min_size; if (IS_ERR(attr)) return PTR_ERR(attr); + min_size = min_t(size_t, attr->ptr_attr.len, size); + if (copy_to_user(attr->ptr_attr.ptr, from, min_size)) + return -EFAULT; + flags = attr->ptr_attr.flags | UVERBS_ATTR_F_VALID_OUTPUT; - return (!copy_to_user(attr->ptr_attr.ptr, from, attr->ptr_attr.len) && - !put_user(flags, &attr->uattr->flags)) ? 0 : -EFAULT; + if (put_user(flags, &attr->uattr->flags)) + return -EFAULT; + + return 0; } -static inline int _uverbs_copy_from(void *to, size_t to_size, +static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr) +{ + return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data); +} + +static inline int _uverbs_copy_from(void *to, const struct uverbs_attr_bundle *attrs_bundle, - size_t idx) + size_t idx, + size_t size) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); if (IS_ERR(attr)) return PTR_ERR(attr); - if (to_size <= sizeof(((struct ib_uverbs_attr *)0)->data)) + /* + * Validation ensures attr->ptr_attr.len >= size. If the caller is + * using UVERBS_ATTR_SPEC_F_MIN_SZ then it must call copy_from with + * the right size. + */ + if (unlikely(size < attr->ptr_attr.len)) + return -EINVAL; + + if (uverbs_attr_ptr_is_inline(attr)) memcpy(to, &attr->ptr_attr.data, attr->ptr_attr.len); else if (copy_from_user(to, attr->ptr_attr.ptr, attr->ptr_attr.len)) return -EFAULT; @@ -382,7 +403,7 @@ static inline int _uverbs_copy_from(void *to, size_t to_size, } #define uverbs_copy_from(to, attrs_bundle, idx) \ - _uverbs_copy_from(to, sizeof(*(to)), attrs_bundle, idx) + _uverbs_copy_from(to, attrs_bundle, idx, sizeof(*to)) /* ================================================= * Definitions -> Specs infrastructure -- cgit v1.1 From 6c976c30ad1c205bd6e34182c5ba9a1267d752ca Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 13 Feb 2018 12:18:30 +0200 Subject: IB/uverbs: Use inline data transfer for UHW_IN The rule for the API is pointers less than 8 bytes are inlined into the .data field of the attribute. Fix the creation of the driver udata struct to follow this rule and point to the .data itself when the size is less than 8 bytes. Otherwise if the UHW struct is less than 8 bytes the driver will get EFAULT during copy_from_user. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index c6502c7..7b0e4d7 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -234,8 +234,11 @@ static void create_udata(struct uverbs_attr_bundle *ctx, uverbs_attr_get(ctx, UVERBS_UHW_OUT); if (!IS_ERR(uhw_in)) { - udata->inbuf = uhw_in->ptr_attr.ptr; udata->inlen = uhw_in->ptr_attr.len; + if (uverbs_attr_ptr_is_inline(uhw_in)) + udata->inbuf = &uhw_in->uattr->data; + else + udata->inbuf = uhw_in->ptr_attr.ptr; } else { udata->inbuf = NULL; udata->inlen = 0; -- cgit v1.1 From 2f36028ce98ef8e9c04809cc20b9dc498cc1a508 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 13 Feb 2018 12:18:31 +0200 Subject: IB/uverbs: Use u64_to_user_ptr() not a union The union approach will get the endianness wrong sometimes if the kernel's pointer size is 32 bits resulting in EFAULTs when trying to copy to/from user. Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types.c | 4 ++-- include/rdma/uverbs_ioctl.h | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 7b0e4d7..df1360e 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -238,14 +238,14 @@ static void create_udata(struct uverbs_attr_bundle *ctx, if (uverbs_attr_ptr_is_inline(uhw_in)) udata->inbuf = &uhw_in->uattr->data; else - udata->inbuf = uhw_in->ptr_attr.ptr; + udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data); } else { udata->inbuf = NULL; udata->inlen = 0; } if (!IS_ERR(uhw_out)) { - udata->outbuf = uhw_out->ptr_attr.ptr; + udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data); udata->outlen = uhw_out->ptr_attr.len; } else { udata->outbuf = NULL; diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 32cb147..38287d9 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -276,10 +276,7 @@ struct uverbs_object_tree_def { */ struct uverbs_ptr_attr { - union { - u64 data; - void __user *ptr; - }; + u64 data; u16 len; /* Combination of bits from enum UVERBS_ATTR_F_XXXX */ u16 flags; @@ -361,7 +358,7 @@ static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, return PTR_ERR(attr); min_size = min_t(size_t, attr->ptr_attr.len, size); - if (copy_to_user(attr->ptr_attr.ptr, from, min_size)) + if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size)) return -EFAULT; flags = attr->ptr_attr.flags | UVERBS_ATTR_F_VALID_OUTPUT; @@ -396,7 +393,8 @@ static inline int _uverbs_copy_from(void *to, if (uverbs_attr_ptr_is_inline(attr)) memcpy(to, &attr->ptr_attr.data, attr->ptr_attr.len); - else if (copy_from_user(to, attr->ptr_attr.ptr, attr->ptr_attr.len)) + else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data), + attr->ptr_attr.len)) return -EFAULT; return 0; -- cgit v1.1 From 3d89459e2ef92cc0e5a50dde868780ccda9786c1 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 13 Feb 2018 12:18:32 +0200 Subject: IB/uverbs: Fix method merging in uverbs_ioctl_merge Fix a bug in uverbs_ioctl_merge that looked at the object's iterator number instead of the method's iterator number when merging methods. While we're at it, make the uverbs_ioctl_merge code a bit more clear and faster. Fixes: 118620d3686b ('IB/core: Add uverbs merge trees functionality') Signed-off-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_ioctl_merge.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c index 062485f..62e1eb1 100644 --- a/drivers/infiniband/core/uverbs_ioctl_merge.c +++ b/drivers/infiniband/core/uverbs_ioctl_merge.c @@ -114,6 +114,7 @@ static size_t get_elements_above_id(const void **iters, short min = SHRT_MAX; const void *elem; int i, j, last_stored = -1; + unsigned int equal_min = 0; for_each_element(elem, i, j, elements, num_elements, num_offset, data_offset) { @@ -136,6 +137,10 @@ static size_t get_elements_above_id(const void **iters, */ iters[last_stored == i ? num_iters - 1 : num_iters++] = elem; last_stored = i; + if (min == GET_ID(id)) + equal_min++; + else + equal_min = 1; min = GET_ID(id); } @@ -146,15 +151,10 @@ static size_t get_elements_above_id(const void **iters, * Therefore, we need to clean the beginning of the array to make sure * all ids of final elements are equal to min. */ - for (i = num_iters - 1; i >= 0 && - GET_ID(*(u16 *)(iters[i] + id_offset)) == min; i--) - ; - - num_iters -= i + 1; - memmove(iters, iters + i + 1, sizeof(*iters) * num_iters); + memmove(iters, iters + num_iters - equal_min, sizeof(*iters) * equal_min); *min_id = min; - return num_iters; + return equal_min; } #define find_max_element_entry_id(num_elements, elements, num_objects_fld, \ @@ -322,7 +322,7 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me hash = kzalloc(sizeof(*hash) + ALIGN(sizeof(*hash->attrs) * (attr_max_bucket + 1), sizeof(long)) + - BITS_TO_LONGS(attr_max_bucket) * sizeof(long), + BITS_TO_LONGS(attr_max_bucket + 1) * sizeof(long), GFP_KERNEL); if (!hash) { res = -ENOMEM; @@ -509,7 +509,7 @@ static struct uverbs_object_spec *build_object_with_methods(const struct uverbs_ * first handler which != NULL. This also defines the * set of flags used for this handler. */ - for (i = num_object_defs - 1; + for (i = num_method_defs - 1; i >= 0 && !method_defs[i]->handler; i--) ; hash->methods[min_id++] = method; -- cgit v1.1 From 5d2beb576d32ef2cd047db8914e3602e99a12763 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 13 Feb 2018 12:18:33 +0200 Subject: IB/uverbs: Use __aligned_u64 for uapi headers This has no impact on the structure layout since these structs already have their u64s already properly aligned, but it does document that we have this requirement for 32 bit compatibility. Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/rdma_user_ioctl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/rdma/rdma_user_ioctl.h b/include/uapi/rdma/rdma_user_ioctl.h index 03557b5..46de088 100644 --- a/include/uapi/rdma/rdma_user_ioctl.h +++ b/include/uapi/rdma/rdma_user_ioctl.h @@ -65,7 +65,7 @@ struct ib_uverbs_attr { __u16 len; /* only for pointers */ __u16 flags; /* combination of UVERBS_ATTR_F_XXXX */ __u16 reserved; - __u64 data; /* ptr to command, inline data or idr/fd */ + __aligned_u64 data; /* ptr to command, inline data or idr/fd */ }; struct ib_uverbs_ioctl_hdr { @@ -73,7 +73,7 @@ struct ib_uverbs_ioctl_hdr { __u16 object_id; __u16 method_id; __u16 num_attrs; - __u64 reserved; + __aligned_u64 reserved; struct ib_uverbs_attr attrs[0]; }; -- cgit v1.1 From 9dfb2ff400f6c0a52f63014b5331b64ee7bd5c19 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 13 Feb 2018 12:18:34 +0200 Subject: IB/uverbs: Add ioctl support for 32bit processes 32 bit processes running on a 64 bit kernel call compat_ioctl so that implementations can revise any structure layout issues. Point compat_ioctl at our normal ioctl because: - All our structures are designed to be the same on 32 and 64 bit, ie we use __aligned_u64 when required and are careful to manage padding. - Any pointers are stored in u64's and userspace is expected to prepare them properly. Signed-off-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 395a3b0..cd72555 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -942,6 +942,7 @@ static const struct file_operations uverbs_fops = { .llseek = no_llseek, #if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS) .unlocked_ioctl = ib_uverbs_ioctl, + .compat_ioctl = ib_uverbs_ioctl, #endif }; @@ -954,6 +955,7 @@ static const struct file_operations uverbs_mmap_fops = { .llseek = no_llseek, #if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS) .unlocked_ioctl = ib_uverbs_ioctl, + .compat_ioctl = ib_uverbs_ioctl, #endif }; -- cgit v1.1 From 4d39a959bc1f3d164b5a54147fdeb19f84b1ed58 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 13 Feb 2018 12:18:35 +0200 Subject: IB/uverbs: Fix possible oops with duplicate ioctl attributes If the same attribute is listed twice by the user in the ioctl attribute list then error unwind can cause the kernel to deref garbage. This happens when an object with WRITE access is sent twice. The second parse properly fails but corrupts the state required for the error unwind it triggers. Fixing this by making duplicates in the attribute list invalid. This is not something we need to support. The ioctl interface is currently recommended to be disabled in kConfig. Signed-off-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index d96dc1d..339b851 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -59,6 +59,9 @@ static int uverbs_process_attr(struct ib_device *ibdev, return 0; } + if (test_bit(attr_id, attr_bundle_h->valid_bitmap)) + return -EINVAL; + spec = &attr_spec_bucket->attrs[attr_id]; e = &elements[attr_id]; e->uattr = uattr_ptr; -- cgit v1.1 From d9dc7a3500a58de9bf3861d1a96ffeab42624b4f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 13 Feb 2018 12:18:36 +0200 Subject: IB/uverbs: Hold the uobj write lock after allocate This clarifies the design intention that time between allocate and commit has the uobj exclusive to the caller. We already guarantee this by delaying publishing the uobj pointer via idr_insert, fd_install, list_add, etc. Additionally holding the usecnt lock during this period provides extra clarity and more protection against future mistakes. Fixes: 3832125624b7 ("IB/core: Add support for idr types") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 85b5ee4..3fe6035 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -141,7 +141,12 @@ static struct ib_uobject *alloc_uobj(struct ib_ucontext *context, */ uobj->context = context; uobj->type = type; - atomic_set(&uobj->usecnt, 0); + /* + * Allocated objects start out as write locked to deny any other + * syscalls from accessing them until they are committed. See + * rdma_alloc_commit_uobject + */ + atomic_set(&uobj->usecnt, -1); kref_init(&uobj->ref); return uobj; @@ -527,6 +532,10 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj) return ret; } + /* matches atomic_set(-1) in alloc_uobj */ + lockdep_check(uobj, true); + atomic_set(&uobj->usecnt, 0); + uobj->type->type_class->alloc_commit(uobj); up_read(&uobj->context->cleanup_rwsem); -- cgit v1.1 From 6623e3e3cd78020016d3fa42555763178e94ab64 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 13 Feb 2018 12:18:37 +0200 Subject: RDMA/uverbs: Protect from races between lookup and destroy of uobjects The race is between lookup_get_idr_uobject and uverbs_idr_remove_uobj -> uverbs_uobject_put. We deliberately do not call sychronize_rcu after the idr_remove in uverbs_idr_remove_uobj for performance reasons, instead we call kfree_rcu() during uverbs_uobject_put. However, this means we can obtain pointers to uobj's that have already been released and must protect against krefing them using kref_get_unless_zero. ================================================================== BUG: KASAN: use-after-free in copy_ah_attr_from_uverbs.isra.2+0x860/0xa00 Read of size 4 at addr ffff88005fda1ac8 by task syz-executor2/441 CPU: 1 PID: 441 Comm: syz-executor2 Not tainted 4.15.0-rc2+ #56 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 Call Trace: dump_stack+0x8d/0xd4 print_address_description+0x73/0x290 kasan_report+0x25c/0x370 ? copy_ah_attr_from_uverbs.isra.2+0x860/0xa00 copy_ah_attr_from_uverbs.isra.2+0x860/0xa00 ? uverbs_try_lock_object+0x68/0xc0 ? modify_qp.isra.7+0xdc4/0x10e0 modify_qp.isra.7+0xdc4/0x10e0 ib_uverbs_modify_qp+0xfe/0x170 ? ib_uverbs_query_qp+0x970/0x970 ? __lock_acquire+0xa11/0x1da0 ib_uverbs_write+0x55a/0xad0 ? ib_uverbs_query_qp+0x970/0x970 ? ib_uverbs_query_qp+0x970/0x970 ? ib_uverbs_open+0x760/0x760 ? futex_wake+0x147/0x410 ? sched_clock_cpu+0x18/0x180 ? check_prev_add+0x1680/0x1680 ? do_futex+0x3b6/0xa30 ? sched_clock_cpu+0x18/0x180 __vfs_write+0xf7/0x5c0 ? ib_uverbs_open+0x760/0x760 ? kernel_read+0x110/0x110 ? lock_acquire+0x370/0x370 ? __fget+0x264/0x3b0 vfs_write+0x18a/0x460 SyS_write+0xc7/0x1a0 ? SyS_read+0x1a0/0x1a0 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL_64_fastpath+0x18/0x85 RIP: 0033:0x448e29 RSP: 002b:00007f443fee0c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007f443fee16bc RCX: 0000000000448e29 RDX: 0000000000000078 RSI: 00000000209f8000 RDI: 0000000000000012 RBP: 000000000070bea0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 0000000000008e98 R14: 00000000006ebf38 R15: 0000000000000000 Allocated by task 1: kmem_cache_alloc_trace+0x16c/0x2f0 mlx5_alloc_cmd_msg+0x12e/0x670 cmd_exec+0x419/0x1810 mlx5_cmd_exec+0x40/0x70 mlx5_core_mad_ifc+0x187/0x220 mlx5_MAD_IFC+0xd7/0x1b0 mlx5_query_mad_ifc_gids+0x1f3/0x650 mlx5_ib_query_gid+0xa4/0xc0 ib_query_gid+0x152/0x1a0 ib_query_port+0x21e/0x290 mlx5_port_immutable+0x30f/0x490 ib_register_device+0x5dd/0x1130 mlx5_ib_add+0x3e7/0x700 mlx5_add_device+0x124/0x510 mlx5_register_interface+0x11f/0x1c0 mlx5_ib_init+0x56/0x61 do_one_initcall+0xa3/0x250 kernel_init_freeable+0x309/0x3b8 kernel_init+0x14/0x180 ret_from_fork+0x24/0x30 Freed by task 1: kfree+0xeb/0x2f0 mlx5_free_cmd_msg+0xcd/0x140 cmd_exec+0xeba/0x1810 mlx5_cmd_exec+0x40/0x70 mlx5_core_mad_ifc+0x187/0x220 mlx5_MAD_IFC+0xd7/0x1b0 mlx5_query_mad_ifc_gids+0x1f3/0x650 mlx5_ib_query_gid+0xa4/0xc0 ib_query_gid+0x152/0x1a0 ib_query_port+0x21e/0x290 mlx5_port_immutable+0x30f/0x490 ib_register_device+0x5dd/0x1130 mlx5_ib_add+0x3e7/0x700 mlx5_add_device+0x124/0x510 mlx5_register_interface+0x11f/0x1c0 mlx5_ib_init+0x56/0x61 do_one_initcall+0xa3/0x250 kernel_init_freeable+0x309/0x3b8 kernel_init+0x14/0x180 ret_from_fork+0x24/0x30 The buggy address belongs to the object at ffff88005fda1ab0 which belongs to the cache kmalloc-32 of size 32 The buggy address is located 24 bytes inside of 32-byte region [ffff88005fda1ab0, ffff88005fda1ad0) The buggy address belongs to the page: page:00000000d5655c19 count:1 mapcount:0 mapping: (null) index:0xffff88005fda1fc0 flags: 0x4000000000000100(slab) raw: 4000000000000100 0000000000000000 ffff88005fda1fc0 0000000180550008 raw: ffffea00017f6780 0000000400000004 ffff88006c803980 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88005fda1980: fc fc fb fb fb fb fc fc fb fb fb fb fc fc fb fb ffff88005fda1a00: fb fb fc fc fb fb fb fb fc fc 00 00 00 00 fc fc ffff88005fda1a80: fb fb fb fb fc fc fb fb fb fb fc fc fb fb fb fb ffff88005fda1b00: fc fc 00 00 00 00 fc fc fb fb fb fb fc fc fb fb ffff88005fda1b80: fb fb fc fc fb fb fb fb fc fc fb fb fb fb fc fc ==================================================================@ Cc: syzkaller Cc: # 4.11 Fixes: 3832125624b7 ("IB/core: Add support for idr types") Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 3fe6035..f1f805a 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -201,7 +201,15 @@ static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *t goto free; } - uverbs_uobject_get(uobj); + /* + * The idr_find is guaranteed to return a pointer to something that + * isn't freed yet, or NULL, as the free after idr_remove goes through + * kfree_rcu(). However the object may still have been released and + * kfree() could be called at any time. + */ + if (!kref_get_unless_zero(&uobj->ref)) + uobj = ERR_PTR(-ENOENT); + free: rcu_read_unlock(); return uobj; -- cgit v1.1 From 104f268d439b3c21c83708e52946a4d8d37f3d0f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 13 Feb 2018 12:18:38 +0200 Subject: IB/uverbs: Improve lockdep_check This is really being used as an assert that the expected usecnt is being held and implicitly that the usecnt is valid. Rename it to assert_uverbs_usecnt and tighten the checks to only accept valid values of usecnt (eg 0 and < -1 are invalid). The tigher checkes make the assertion cover more cases and is more likely to find bugs via syzkaller/etc. Fixes: 3832125624b7 ("IB/core: Add support for idr types") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index f1f805a..cfd257e 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -412,13 +412,13 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, return ret; } -static void lockdep_check(struct ib_uobject *uobj, bool exclusive) +static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive) { #ifdef CONFIG_LOCKDEP if (exclusive) - WARN_ON(atomic_read(&uobj->usecnt) > 0); + WARN_ON(atomic_read(&uobj->usecnt) != -1); else - WARN_ON(atomic_read(&uobj->usecnt) == -1); + WARN_ON(atomic_read(&uobj->usecnt) <= 0); #endif } @@ -457,7 +457,7 @@ int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj) WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); return 0; } - lockdep_check(uobj, true); + assert_uverbs_usecnt(uobj, true); ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY); up_read(&ucontext->cleanup_rwsem); @@ -487,7 +487,7 @@ int rdma_explicit_destroy(struct ib_uobject *uobject) WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); return 0; } - lockdep_check(uobject, true); + assert_uverbs_usecnt(uobject, true); ret = uobject->type->type_class->remove_commit(uobject, RDMA_REMOVE_DESTROY); if (ret) @@ -541,7 +541,7 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj) } /* matches atomic_set(-1) in alloc_uobj */ - lockdep_check(uobj, true); + assert_uverbs_usecnt(uobj, true); atomic_set(&uobj->usecnt, 0); uobj->type->type_class->alloc_commit(uobj); @@ -578,7 +578,7 @@ static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive) void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive) { - lockdep_check(uobj, exclusive); + assert_uverbs_usecnt(uobj, exclusive); uobj->type->type_class->lookup_put(uobj, exclusive); /* * In order to unlock an object, either decrease its usecnt for -- cgit v1.1 From ec6f8401c48a86809237e86878a6fac6b281118f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 13 Feb 2018 12:18:40 +0200 Subject: IB/uverbs: Fix unbalanced unlock on error path for rdma_explicit_destroy If remove_commit fails then the lock is left locked while the uobj still exists. Eventually the kernel will deadlock. lockdep detects this and says: test/4221 is leaving the kernel with locks still held! 1 lock held by test/4221: #0: (&ucontext->cleanup_rwsem){.+.+}, at: [<000000001e5c7523>] rdma_explicit_destroy+0x37/0x120 [ib_uverbs] Fixes: 4da70da23e9b ("IB/core: Explicitly destroy an object while keeping uobject") Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index cfd257e..d8eead5 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -491,12 +491,13 @@ int rdma_explicit_destroy(struct ib_uobject *uobject) ret = uobject->type->type_class->remove_commit(uobject, RDMA_REMOVE_DESTROY); if (ret) - return ret; + goto out; uobject->type = &null_obj_type; +out: up_read(&ucontext->cleanup_rwsem); - return 0; + return ret; } static void alloc_commit_idr_uobject(struct ib_uobject *uobj) -- cgit v1.1 From 3f802b162dbf4a558ff98986449eddc717826209 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 13 Feb 2018 12:18:41 +0200 Subject: RDMA/uverbs: Protect from command mask overflow The command number is not bounds checked against the command mask before it is shifted, resulting in an ubsan hit. This does not cause malfunction since the command number is eventually bounds checked, but we can make this ubsan clean by moving the bounds check to before the mask check. ================================================================================ UBSAN: Undefined behaviour in drivers/infiniband/core/uverbs_main.c:647:21 shift exponent 207 is too large for 64-bit type 'long long unsigned int' CPU: 0 PID: 446 Comm: syz-executor3 Not tainted 4.15.0-rc2+ #61 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 Call Trace: dump_stack+0xde/0x164 ? dma_virt_map_sg+0x22c/0x22c ubsan_epilogue+0xe/0x81 __ubsan_handle_shift_out_of_bounds+0x293/0x2f7 ? debug_check_no_locks_freed+0x340/0x340 ? __ubsan_handle_load_invalid_value+0x19b/0x19b ? lock_acquire+0x440/0x440 ? lock_acquire+0x19d/0x440 ? __might_fault+0xf4/0x240 ? ib_uverbs_write+0x68d/0xe20 ib_uverbs_write+0x68d/0xe20 ? __lock_acquire+0xcf7/0x3940 ? uverbs_devnode+0x110/0x110 ? cyc2ns_read_end+0x10/0x10 ? sched_clock_cpu+0x18/0x200 ? sched_clock_cpu+0x18/0x200 __vfs_write+0x10d/0x700 ? uverbs_devnode+0x110/0x110 ? kernel_read+0x170/0x170 ? __fget+0x35b/0x5d0 ? security_file_permission+0x93/0x260 vfs_write+0x1b0/0x550 SyS_write+0xc7/0x1a0 ? SyS_read+0x1a0/0x1a0 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL_64_fastpath+0x18/0x85 RIP: 0033:0x448e29 RSP: 002b:00007f033f567c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007f033f5686bc RCX: 0000000000448e29 RDX: 0000000000000060 RSI: 0000000020001000 RDI: 0000000000000012 RBP: 000000000070bea0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000000056a0 R14: 00000000006e8740 R15: 0000000000000000 ================================================================================ Cc: syzkaller Cc: # 4.5 Fixes: 2dbd5186a39c ("IB/core: IB/core: Allow legacy verbs through extended interfaces") Reported-by: Noa Osherovich Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_main.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index cd72555..b1ca223 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -650,12 +650,21 @@ static int verify_command_mask(struct ib_device *ib_dev, __u32 command) return -1; } +static bool verify_command_idx(u32 command, bool extended) +{ + if (extended) + return command < ARRAY_SIZE(uverbs_ex_cmd_table); + + return command < ARRAY_SIZE(uverbs_cmd_table); +} + static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_file *file = filp->private_data; struct ib_device *ib_dev; struct ib_uverbs_cmd_hdr hdr; + bool extended_command; __u32 command; __u32 flags; int srcu_key; @@ -688,6 +697,15 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, } command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; + flags = (hdr.command & + IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; + + extended_command = flags & IB_USER_VERBS_CMD_FLAG_EXTENDED; + if (!verify_command_idx(command, extended_command)) { + ret = -EINVAL; + goto out; + } + if (verify_command_mask(ib_dev, command)) { ret = -EOPNOTSUPP; goto out; @@ -699,12 +717,8 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, goto out; } - flags = (hdr.command & - IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; - if (!flags) { - if (command >= ARRAY_SIZE(uverbs_cmd_table) || - !uverbs_cmd_table[command]) { + if (!uverbs_cmd_table[command]) { ret = -EINVAL; goto out; } @@ -725,8 +739,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, struct ib_udata uhw; size_t written_count = count; - if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) || - !uverbs_ex_cmd_table[command]) { + if (!uverbs_ex_cmd_table[command]) { ret = -ENOSYS; goto out; } -- cgit v1.1 From 0cba0efcc7238d47a045a8d7a4079f6a22993546 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 14 Feb 2018 12:35:37 +0200 Subject: RDMA/restrack: Increment CQ restrack object before committing Once the uobj is committed it is immediately possible another thread could destroy it, which worst case, can result in a use-after-free of the restrack objects. Cc: syzkaller Fixes: 08f294a1524b ("RDMA/core: Add resource tracking for create and destroy CQs") Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 256934d..4e55f83 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1030,14 +1030,14 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, resp.response_length = offsetof(typeof(resp), response_length) + sizeof(resp.response_length); + cq->res.type = RDMA_RESTRACK_CQ; + rdma_restrack_add(&cq->res); + ret = cb(file, obj, &resp, ucore, context); if (ret) goto err_cb; uobj_alloc_commit(&obj->uobject); - cq->res.type = RDMA_RESTRACK_CQ; - rdma_restrack_add(&cq->res); - return obj; err_cb: -- cgit v1.1 From 5c2e1c4f926856717f3fd31932e926dc3fe77ebd Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 14 Feb 2018 12:35:38 +0200 Subject: RDMA/uverbs: Fix bad unlock balance in ib_uverbs_close_xrcd There is no matching lock for this mutex. Git history suggests this is just a missed remnant from an earlier version of the function before this locking was moved into uverbs_free_xrcd. Originally this lock was protecting the xrcd_table_delete() ===================================== WARNING: bad unlock balance detected! 4.15.0+ #87 Not tainted ------------------------------------- syzkaller223405/269 is trying to release lock (&uverbs_dev->xrcd_tree_mutex) at: [<00000000b8703372>] ib_uverbs_close_xrcd+0x195/0x1f0 but there are no more locks to release! other info that might help us debug this: 1 lock held by syzkaller223405/269: #0: (&uverbs_dev->disassociate_srcu){....}, at: [<000000005af3b960>] ib_uverbs_write+0x265/0xef0 stack backtrace: CPU: 0 PID: 269 Comm: syzkaller223405 Not tainted 4.15.0+ #87 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 Call Trace: dump_stack+0xde/0x164 ? dma_virt_map_sg+0x22c/0x22c ? ib_uverbs_write+0x265/0xef0 ? console_unlock+0x502/0xbd0 ? ib_uverbs_close_xrcd+0x195/0x1f0 print_unlock_imbalance_bug+0x131/0x160 lock_release+0x59d/0x1100 ? ib_uverbs_close_xrcd+0x195/0x1f0 ? lock_acquire+0x440/0x440 ? lock_acquire+0x440/0x440 __mutex_unlock_slowpath+0x88/0x670 ? wait_for_completion+0x4c0/0x4c0 ? rdma_lookup_get_uobject+0x145/0x2f0 ib_uverbs_close_xrcd+0x195/0x1f0 ? ib_uverbs_open_xrcd+0xdd0/0xdd0 ib_uverbs_write+0x7f9/0xef0 ? cyc2ns_read_end+0x10/0x10 ? ib_uverbs_open_xrcd+0xdd0/0xdd0 ? uverbs_devnode+0x110/0x110 ? cyc2ns_read_end+0x10/0x10 ? cyc2ns_read_end+0x10/0x10 ? sched_clock_cpu+0x18/0x200 __vfs_write+0x10d/0x700 ? uverbs_devnode+0x110/0x110 ? kernel_read+0x170/0x170 ? __fget+0x358/0x5d0 ? security_file_permission+0x93/0x260 vfs_write+0x1b0/0x550 SyS_write+0xc7/0x1a0 ? SyS_read+0x1a0/0x1a0 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL_64_fastpath+0x1e/0x8b RIP: 0033:0x4335c9 Cc: syzkaller Cc: # 4.11 Fixes: fd3c7904db6e ("IB/core: Change idr objects to use the new schema") Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 4e55f83..1187b75 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -603,10 +603,8 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, uobj = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle, file->ucontext); - if (IS_ERR(uobj)) { - mutex_unlock(&file->device->xrcd_tree_mutex); + if (IS_ERR(uobj)) return PTR_ERR(uobj); - } ret = uobj_remove_commit(uobj); return ret ?: in_len; -- cgit v1.1 From 1ff5325c3ca1843228a86549318bbd3b414b9207 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 14 Feb 2018 12:35:39 +0200 Subject: RDMA/uverbs: Fix circular locking dependency Avoid circular locking dependency by calling to uobj_alloc_commit() outside of xrcd_tree_mutex lock. ====================================================== WARNING: possible circular locking dependency detected 4.15.0+ #87 Not tainted ------------------------------------------------------ syzkaller401056/269 is trying to acquire lock: (&uverbs_dev->xrcd_tree_mutex){+.+.}, at: [<000000006c12d2cd>] uverbs_free_xrcd+0xd2/0x360 but task is already holding lock: (&ucontext->uobjects_lock){+.+.}, at: [<00000000da010f09>] uverbs_cleanup_ucontext+0x168/0x730 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&ucontext->uobjects_lock){+.+.}: __mutex_lock+0x111/0x1720 rdma_alloc_commit_uobject+0x22c/0x600 ib_uverbs_open_xrcd+0x61a/0xdd0 ib_uverbs_write+0x7f9/0xef0 __vfs_write+0x10d/0x700 vfs_write+0x1b0/0x550 SyS_write+0xc7/0x1a0 entry_SYSCALL_64_fastpath+0x1e/0x8b -> #0 (&uverbs_dev->xrcd_tree_mutex){+.+.}: lock_acquire+0x19d/0x440 __mutex_lock+0x111/0x1720 uverbs_free_xrcd+0xd2/0x360 remove_commit_idr_uobject+0x6d/0x110 uverbs_cleanup_ucontext+0x2f0/0x730 ib_uverbs_cleanup_ucontext.constprop.3+0x52/0x120 ib_uverbs_close+0xf2/0x570 __fput+0x2cd/0x8d0 task_work_run+0xec/0x1d0 do_exit+0x6a1/0x1520 do_group_exit+0xe8/0x380 SyS_exit_group+0x1e/0x20 entry_SYSCALL_64_fastpath+0x1e/0x8b other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&ucontext->uobjects_lock); lock(&uverbs_dev->xrcd_tree_mutex); lock(&ucontext->uobjects_lock); lock(&uverbs_dev->xrcd_tree_mutex); *** DEADLOCK *** 3 locks held by syzkaller401056/269: #0: (&file->cleanup_mutex){+.+.}, at: [<00000000c9f0c252>] ib_uverbs_close+0xac/0x570 #1: (&ucontext->cleanup_rwsem){++++}, at: [<00000000b6994d49>] uverbs_cleanup_ucontext+0xf6/0x730 #2: (&ucontext->uobjects_lock){+.+.}, at: [<00000000da010f09>] uverbs_cleanup_ucontext+0x168/0x730 stack backtrace: CPU: 0 PID: 269 Comm: syzkaller401056 Not tainted 4.15.0+ #87 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 Call Trace: dump_stack+0xde/0x164 ? dma_virt_map_sg+0x22c/0x22c ? uverbs_cleanup_ucontext+0x168/0x730 ? console_unlock+0x502/0xbd0 print_circular_bug.isra.24+0x35e/0x396 ? print_circular_bug_header+0x12e/0x12e ? find_usage_backwards+0x30/0x30 ? entry_SYSCALL_64_fastpath+0x1e/0x8b validate_chain.isra.28+0x25d1/0x40c0 ? check_usage+0xb70/0xb70 ? graph_lock+0x160/0x160 ? find_usage_backwards+0x30/0x30 ? cyc2ns_read_end+0x10/0x10 ? print_irqtrace_events+0x280/0x280 ? __lock_acquire+0x93d/0x1630 __lock_acquire+0x93d/0x1630 lock_acquire+0x19d/0x440 ? uverbs_free_xrcd+0xd2/0x360 __mutex_lock+0x111/0x1720 ? uverbs_free_xrcd+0xd2/0x360 ? uverbs_free_xrcd+0xd2/0x360 ? __mutex_lock+0x828/0x1720 ? mutex_lock_io_nested+0x1550/0x1550 ? uverbs_cleanup_ucontext+0x168/0x730 ? __lock_acquire+0x9a9/0x1630 ? mutex_lock_io_nested+0x1550/0x1550 ? uverbs_cleanup_ucontext+0xf6/0x730 ? lock_contended+0x11a0/0x11a0 ? uverbs_free_xrcd+0xd2/0x360 uverbs_free_xrcd+0xd2/0x360 remove_commit_idr_uobject+0x6d/0x110 uverbs_cleanup_ucontext+0x2f0/0x730 ? sched_clock_cpu+0x18/0x200 ? uverbs_close_fd+0x1c0/0x1c0 ib_uverbs_cleanup_ucontext.constprop.3+0x52/0x120 ib_uverbs_close+0xf2/0x570 ? ib_uverbs_remove_one+0xb50/0xb50 ? ib_uverbs_remove_one+0xb50/0xb50 __fput+0x2cd/0x8d0 task_work_run+0xec/0x1d0 do_exit+0x6a1/0x1520 ? fsnotify_first_mark+0x220/0x220 ? exit_notify+0x9f0/0x9f0 ? entry_SYSCALL_64_fastpath+0x5/0x8b ? entry_SYSCALL_64_fastpath+0x5/0x8b ? trace_hardirqs_on_thunk+0x1a/0x1c ? time_hardirqs_on+0x27/0x670 ? time_hardirqs_off+0x27/0x490 ? syscall_return_slowpath+0x6c/0x460 ? entry_SYSCALL_64_fastpath+0x5/0x8b do_group_exit+0xe8/0x380 SyS_exit_group+0x1e/0x20 entry_SYSCALL_64_fastpath+0x1e/0x8b RIP: 0033:0x431ce9 Cc: syzkaller Cc: # 4.11 Fixes: fd3c7904db6e ("IB/core: Change idr objects to use the new schema") Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 1187b75..6941faa 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -562,9 +562,10 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, if (f.file) fdput(f); + mutex_unlock(&file->device->xrcd_tree_mutex); + uobj_alloc_commit(&obj->uobject); - mutex_unlock(&file->device->xrcd_tree_mutex); return in_len; err_copy: -- cgit v1.1 From 5d4c05c3ee36f67ddc107ab5ea0898af01a62cc1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 14 Feb 2018 12:35:40 +0200 Subject: RDMA/uverbs: Sanitize user entered port numbers prior to access it ================================================================== BUG: KASAN: use-after-free in copy_ah_attr_from_uverbs+0x6f2/0x8c0 Read of size 4 at addr ffff88006476a198 by task syzkaller697701/265 CPU: 0 PID: 265 Comm: syzkaller697701 Not tainted 4.15.0+ #90 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 Call Trace: dump_stack+0xde/0x164 ? dma_virt_map_sg+0x22c/0x22c ? show_regs_print_info+0x17/0x17 ? lock_contended+0x11a0/0x11a0 print_address_description+0x83/0x3e0 kasan_report+0x18c/0x4b0 ? copy_ah_attr_from_uverbs+0x6f2/0x8c0 ? copy_ah_attr_from_uverbs+0x6f2/0x8c0 ? lookup_get_idr_uobject+0x120/0x200 ? copy_ah_attr_from_uverbs+0x6f2/0x8c0 copy_ah_attr_from_uverbs+0x6f2/0x8c0 ? modify_qp+0xd0e/0x1350 modify_qp+0xd0e/0x1350 ib_uverbs_modify_qp+0xf9/0x170 ? ib_uverbs_query_qp+0xa70/0xa70 ib_uverbs_write+0x7f9/0xef0 ? attach_entity_load_avg+0x8b0/0x8b0 ? ib_uverbs_query_qp+0xa70/0xa70 ? uverbs_devnode+0x110/0x110 ? cyc2ns_read_end+0x10/0x10 ? print_irqtrace_events+0x280/0x280 ? sched_clock_cpu+0x18/0x200 ? _raw_spin_unlock_irq+0x29/0x40 ? _raw_spin_unlock_irq+0x29/0x40 ? _raw_spin_unlock_irq+0x29/0x40 ? time_hardirqs_on+0x27/0x670 __vfs_write+0x10d/0x700 ? uverbs_devnode+0x110/0x110 ? kernel_read+0x170/0x170 ? _raw_spin_unlock_irq+0x29/0x40 ? finish_task_switch+0x1bd/0x7a0 ? finish_task_switch+0x194/0x7a0 ? prandom_u32_state+0xe/0x180 ? rcu_read_unlock+0x80/0x80 ? security_file_permission+0x93/0x260 vfs_write+0x1b0/0x550 SyS_write+0xc7/0x1a0 ? SyS_read+0x1a0/0x1a0 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL_64_fastpath+0x1e/0x8b RIP: 0033:0x433c29 RSP: 002b:00007ffcf2be82a8 EFLAGS: 00000217 Allocated by task 62: kasan_kmalloc+0xa0/0xd0 kmem_cache_alloc+0x141/0x480 dup_fd+0x101/0xcc0 copy_process.part.62+0x166f/0x4390 _do_fork+0x1cb/0xe90 kernel_thread+0x34/0x40 call_usermodehelper_exec_work+0x112/0x260 process_one_work+0x929/0x1aa0 worker_thread+0x5c6/0x12a0 kthread+0x346/0x510 ret_from_fork+0x3a/0x50 Freed by task 259: kasan_slab_free+0x71/0xc0 kmem_cache_free+0xf3/0x4c0 put_files_struct+0x225/0x2c0 exit_files+0x88/0xc0 do_exit+0x67c/0x1520 do_group_exit+0xe8/0x380 SyS_exit_group+0x1e/0x20 entry_SYSCALL_64_fastpath+0x1e/0x8b The buggy address belongs to the object at ffff88006476a000 which belongs to the cache files_cache of size 832 The buggy address is located 408 bytes inside of 832-byte region [ffff88006476a000, ffff88006476a340) The buggy address belongs to the page: page:ffffea000191da80 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 0000000000000000 0000000000000000 0000000100080008 raw: 0000000000000000 0000000100000001 ffff88006bcf7a80 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88006476a080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88006476a100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88006476a180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88006476a200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88006476a280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Cc: syzkaller Cc: # 4.11 Fixes: 44c58487d51a ("IB/core: Define 'ib' and 'roce' rdma_ah_attr types") Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 6941faa..cd9fbd7 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1970,8 +1970,15 @@ static int modify_qp(struct ib_uverbs_file *file, goto release_qp; } + if ((cmd->base.attr_mask & IB_QP_AV) && + !rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) { + ret = -EINVAL; + goto release_qp; + } + if ((cmd->base.attr_mask & IB_QP_ALT_PATH) && - !rdma_is_port_valid(qp->device, cmd->base.alt_port_num)) { + (!rdma_is_port_valid(qp->device, cmd->base.alt_port_num) || + !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num))) { ret = -EINVAL; goto release_qp; } -- cgit v1.1 From 1f5a6c47aabc4606f91ad2e6ef71a1ff1924101c Mon Sep 17 00:00:00 2001 From: Adit Ranadive Date: Thu, 15 Feb 2018 12:36:46 -0800 Subject: RDMA/vmw_pvrdma: Fix usage of user response structures in ABI file This ensures that we return the right structures back to userspace. Otherwise, it looks like the reserved fields in the response structures in userspace might have uninitialized data in them. Fixes: 8b10ba783c9d ("RDMA/vmw_pvrdma: Add shared receive queue support") Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Suggested-by: Jason Gunthorpe Reviewed-by: Bryan Tan Reviewed-by: Aditya Sarwade Reviewed-by: Jorgen Hansen Signed-off-by: Adit Ranadive Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 4 +++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 4 +++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index faa9478..f95b976 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -114,6 +114,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_cq *cmd = &req.create_cq; struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp; + struct pvrdma_create_cq_resp cq_resp = {0}; struct pvrdma_create_cq ucmd; BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64); @@ -197,6 +198,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, cq->ibcq.cqe = resp->cqe; cq->cq_handle = resp->cq_handle; + cq_resp.cqn = resp->cq_handle; spin_lock_irqsave(&dev->cq_tbl_lock, flags); dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq; spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); @@ -205,7 +207,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, cq->uar = &(to_vucontext(context)->uar); /* Copy udata back. */ - if (ib_copy_to_udata(udata, &cq->cq_handle, sizeof(__u32))) { + if (ib_copy_to_udata(udata, &cq_resp, sizeof(cq_resp))) { dev_warn(&dev->pdev->dev, "failed to copy back udata\n"); pvrdma_destroy_cq(&cq->ibcq); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 5acebb1..af23596 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -113,6 +113,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_srq *cmd = &req.create_srq; struct pvrdma_cmd_create_srq_resp *resp = &rsp.create_srq_resp; + struct pvrdma_create_srq_resp srq_resp = {0}; struct pvrdma_create_srq ucmd; unsigned long flags; int ret; @@ -204,12 +205,13 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, } srq->srq_handle = resp->srqn; + srq_resp.srqn = resp->srqn; spin_lock_irqsave(&dev->srq_tbl_lock, flags); dev->srq_tbl[srq->srq_handle % dev->dsr->caps.max_srq] = srq; spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); /* Copy udata back. */ - if (ib_copy_to_udata(udata, &srq->srq_handle, sizeof(__u32))) { + if (ib_copy_to_udata(udata, &srq_resp, sizeof(srq_resp))) { dev_warn(&dev->pdev->dev, "failed to copy back udata\n"); pvrdma_destroy_srq(&srq->ibsrq); return ERR_PTR(-EINVAL); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 16b9661..a51463c 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -447,6 +447,7 @@ struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_pd *cmd = &req.create_pd; struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp; + struct pvrdma_alloc_pd_resp pd_resp = {0}; int ret; void *ptr; @@ -475,9 +476,10 @@ struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev, pd->privileged = !context; pd->pd_handle = resp->pd_handle; pd->pdn = resp->pd_handle; + pd_resp.pdn = resp->pd_handle; if (context) { - if (ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) { + if (ib_copy_to_udata(udata, &pd_resp, sizeof(pd_resp))) { dev_warn(&dev->pdev->dev, "failed to copy back protection domain\n"); pvrdma_dealloc_pd(&pd->ibpd); -- cgit v1.1 From 9ff97fa8db94caeab59a3c5401e975df468b4d8e Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Wed, 14 Feb 2018 00:10:52 -0800 Subject: scsi: megaraid_sas: Do not use 32-bit atomic request descriptor for Ventura controllers Problem Statement: Sending I/O through 32 bit descriptors to Ventura series of controller results in IO timeout on certain conditions. This error only occurs on systems with high I/O activity on Ventura series controllers. Changes in this patch will prevent driver from using 32 bit descriptor and use 64 bit Descriptors. Cc: Signed-off-by: Kashyap Desai Signed-off-by: Shivasharan S Reviewed-by: Hannes Reinecke Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 42 ++++++++++------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 073ced0..dc8e850 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -216,36 +216,30 @@ inline void megasas_return_cmd_fusion(struct megasas_instance *instance, /** * megasas_fire_cmd_fusion - Sends command to the FW * @instance: Adapter soft state - * @req_desc: 32bit or 64bit Request descriptor + * @req_desc: 64bit Request descriptor * - * Perform PCI Write. Ventura supports 32 bit Descriptor. - * Prior to Ventura (12G) MR controller supports 64 bit Descriptor. + * Perform PCI Write. */ static void megasas_fire_cmd_fusion(struct megasas_instance *instance, union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc) { - if (instance->adapter_type == VENTURA_SERIES) - writel(le32_to_cpu(req_desc->u.low), - &instance->reg_set->inbound_single_queue_port); - else { #if defined(writeq) && defined(CONFIG_64BIT) - u64 req_data = (((u64)le32_to_cpu(req_desc->u.high) << 32) | - le32_to_cpu(req_desc->u.low)); + u64 req_data = (((u64)le32_to_cpu(req_desc->u.high) << 32) | + le32_to_cpu(req_desc->u.low)); - writeq(req_data, &instance->reg_set->inbound_low_queue_port); + writeq(req_data, &instance->reg_set->inbound_low_queue_port); #else - unsigned long flags; - spin_lock_irqsave(&instance->hba_lock, flags); - writel(le32_to_cpu(req_desc->u.low), - &instance->reg_set->inbound_low_queue_port); - writel(le32_to_cpu(req_desc->u.high), - &instance->reg_set->inbound_high_queue_port); - mmiowb(); - spin_unlock_irqrestore(&instance->hba_lock, flags); + unsigned long flags; + spin_lock_irqsave(&instance->hba_lock, flags); + writel(le32_to_cpu(req_desc->u.low), + &instance->reg_set->inbound_low_queue_port); + writel(le32_to_cpu(req_desc->u.high), + &instance->reg_set->inbound_high_queue_port); + mmiowb(); + spin_unlock_irqrestore(&instance->hba_lock, flags); #endif - } } /** @@ -982,7 +976,6 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) const char *sys_info; MFI_CAPABILITIES *drv_ops; u32 scratch_pad_2; - unsigned long flags; ktime_t time; bool cur_fw_64bit_dma_capable; @@ -1121,14 +1114,7 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) break; } - /* For Ventura also IOC INIT required 64 bit Descriptor write. */ - spin_lock_irqsave(&instance->hba_lock, flags); - writel(le32_to_cpu(req_desc.u.low), - &instance->reg_set->inbound_low_queue_port); - writel(le32_to_cpu(req_desc.u.high), - &instance->reg_set->inbound_high_queue_port); - mmiowb(); - spin_unlock_irqrestore(&instance->hba_lock, flags); + megasas_fire_cmd_fusion(instance, &req_desc); wait_and_poll(instance, cmd, MFI_POLL_TIMEOUT_SECS); -- cgit v1.1 From 9c2d63b843a5c8a8d0559cc067b5398aa5ec3ffc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 16 Feb 2018 01:10:29 +0100 Subject: bpf: fix mlock precharge on arraymaps syzkaller recently triggered OOM during percpu map allocation; while there is work in progress by Dennis Zhou to add __GFP_NORETRY semantics for percpu allocator under pressure, there seems also a missing bpf_map_precharge_memlock() check in array map allocation. Given today the actual bpf_map_charge_memlock() happens after the find_and_alloc_map() in syscall path, the bpf_map_precharge_memlock() is there to bail out early before we go and do the map setup work when we find that we hit the limits anyway. Therefore add this for array map as well. Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Fixes: a10423b87a7e ("bpf: introduce BPF_MAP_TYPE_PERCPU_ARRAY map") Reported-by: syzbot+adb03f3f0bb57ce3acda@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Cc: Dennis Zhou Signed-off-by: Alexei Starovoitov --- kernel/bpf/arraymap.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index b1f6648..a364c40 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -73,11 +73,11 @@ static int array_map_alloc_check(union bpf_attr *attr) static struct bpf_map *array_map_alloc(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; - int numa_node = bpf_map_attr_numa_node(attr); + int ret, numa_node = bpf_map_attr_numa_node(attr); u32 elem_size, index_mask, max_entries; bool unpriv = !capable(CAP_SYS_ADMIN); + u64 cost, array_size, mask64; struct bpf_array *array; - u64 array_size, mask64; elem_size = round_up(attr->value_size, 8); @@ -109,8 +109,19 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array_size += (u64) max_entries * elem_size; /* make sure there is no u32 overflow later in round_up() */ - if (array_size >= U32_MAX - PAGE_SIZE) + cost = array_size; + if (cost >= U32_MAX - PAGE_SIZE) return ERR_PTR(-ENOMEM); + if (percpu) { + cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); + if (cost >= U32_MAX - PAGE_SIZE) + return ERR_PTR(-ENOMEM); + } + cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + + ret = bpf_map_precharge_memlock(cost); + if (ret < 0) + return ERR_PTR(ret); /* allocate all map elements and zero-initialize them */ array = bpf_map_area_alloc(array_size, numa_node); @@ -121,20 +132,13 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) /* copy mandatory map attributes */ bpf_map_init_from_attr(&array->map, attr); + array->map.pages = cost; array->elem_size = elem_size; - if (!percpu) - goto out; - - array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); - - if (array_size >= U32_MAX - PAGE_SIZE || - bpf_array_alloc_percpu(array)) { + if (percpu && bpf_array_alloc_percpu(array)) { bpf_map_area_free(array); return ERR_PTR(-ENOMEM); } -out: - array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT; return &array->map; } -- cgit v1.1 From b86b8eb6fecb5a4bac1ed0ca925c4082a61ea6e9 Mon Sep 17 00:00:00 2001 From: Dominik Bozek Date: Thu, 15 Feb 2018 21:27:48 -0800 Subject: usb: cdc_acm: prevent race at write to acm while system resumes ACM driver may accept data to transmit while system is not fully resumed. In this case ACM driver buffers data and prepare URBs on usb anchor list. There is a little chance that two tasks put a char and initiate acm_tty_flush_chars(). In such a case, driver will put one URB twice on usb anchor list. This patch also reset length of data before resue of a buffer. This not only prevent sending rubbish, but also lower risc of race. Without this patch we hit following kernel panic in one of our stabilty/stress tests. [ 46.884442] *list_add double add*: new=ffff9b2ab7289330, prev=ffff9b2ab7289330, next=ffff9b2ab81e28e0. [ 46.884476] Modules linked in: hci_uart btbcm bluetooth rfkill_gpio igb_avb(O) cfg80211 snd_soc_sst_bxt_tdf8532 snd_soc_skl snd_soc_skl_ipc snd_soc_sst_ipc snd_soc_sst_dsp snd_soc_sst_acpi snd_soc_sst_match snd_hda_ext_core snd_hda_core trusty_timer trusty_wall trusty_log trusty_virtio trusty_ipc trusty_mem trusty_irq trusty virtio_ring virtio intel_ipu4_mmu_bxtB0 lib2600_mod_bxtB0 intel_ipu4_isys_mod_bxtB0 lib2600psys_mod_bxtB0 intel_ipu4_psys_mod_bxtB0 intel_ipu4_mod_bxtB0 intel_ipu4_wrapper_bxtB0 intel_ipu4_acpi videobuf2_dma_contig as3638 dw9714 lm3643 crlmodule smiapp smiapp_pll [ 46.884480] CPU: 1 PID: 33 Comm: kworker/u8:1 Tainted: G U W O 4.9.56-quilt-2e5dc0ac-g618ed69ced6e-dirty #4 [ 46.884489] Workqueue: events_unbound flush_to_ldisc [ 46.884494] ffffb98ac012bb08 ffffffffad3e82e5 ffffb98ac012bb58 0000000000000000 [ 46.884497] ffffb98ac012bb48 ffffffffad0a23d1 00000024ad6374dd ffff9b2ab7289330 [ 46.884500] ffff9b2ab81e28e0 ffff9b2ab7289330 0000000000000002 0000000000000000 [ 46.884501] Call Trace: [ 46.884507] [] dump_stack+0x67/0x92 [ 46.884511] [] __warn+0xd1/0xf0 [ 46.884513] [] warn_slowpath_fmt+0x5f/0x80 [ 46.884516] [] __list_add+0xb3/0xc0 [ 46.884521] [] *usb_anchor_urb*+0x4c/0xa0 [ 46.884524] [] *acm_tty_flush_chars*+0x8f/0xb0 [ 46.884527] [] *acm_tty_put_char*+0x41/0x100 [ 46.884530] [] tty_put_char+0x24/0x40 [ 46.884533] [] do_output_char+0xa5/0x200 [ 46.884535] [] __process_echoes+0x148/0x290 [ 46.884538] [] n_tty_receive_buf_common+0x57c/0xb00 [ 46.884541] [] n_tty_receive_buf2+0x14/0x20 [ 46.884543] [] tty_ldisc_receive_buf+0x22/0x50 [ 46.884545] [] flush_to_ldisc+0xc5/0xe0 [ 46.884549] [] process_one_work+0x148/0x440 [ 46.884551] [] worker_thread+0x69/0x4a0 [ 46.884554] [] ? max_active_store+0x80/0x80 [ 46.884556] [] kthread+0x110/0x130 [ 46.884559] [] ? kthread_park+0x60/0x60 [ 46.884563] [] ret_from_fork+0x27/0x40 [ 46.884566] ---[ end trace 3bd599058b8a9eb3 ]--- Signed-off-by: Dominik Bozek Signed-off-by: Kuppuswamy Sathyanarayanan Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 06b3b54..7b366a6 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -174,6 +174,7 @@ static int acm_wb_alloc(struct acm *acm) wb = &acm->wb[wbn]; if (!wb->use) { wb->use = 1; + wb->len = 0; return wbn; } wbn = (wbn + 1) % ACM_NW; @@ -805,16 +806,18 @@ static int acm_tty_write(struct tty_struct *tty, static void acm_tty_flush_chars(struct tty_struct *tty) { struct acm *acm = tty->driver_data; - struct acm_wb *cur = acm->putbuffer; + struct acm_wb *cur; int err; unsigned long flags; + spin_lock_irqsave(&acm->write_lock, flags); + + cur = acm->putbuffer; if (!cur) /* nothing to do */ - return; + goto out; acm->putbuffer = NULL; err = usb_autopm_get_interface_async(acm->control); - spin_lock_irqsave(&acm->write_lock, flags); if (err < 0) { cur->use = 0; acm->putbuffer = cur; -- cgit v1.1 From ca9eee95a2decc6f60bed65b5b836a26bff825c1 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 15 Feb 2018 14:05:53 +0000 Subject: arm64: dts: rockchip: Fix DWMMC clocks Trying to boot an RK3328 box with an HS200-capable eMMC, I see said eMMC fail to initialise as it can't run its tuning procedure, because the sample clock is missing. Upon closer inspection, whilst the clock is present in the DT, its name is subtly incorrect per the binding, so __of_clk_get_by_name() never finds it. By inspection, the drive clock suffers from a similar problem, so has never worked properly either. Fix up all instances of the incorrect clock names across the 64-bit DTs. Fixes: d717f7352ec6 ("arm64: dts: rockchip: add sdmmc/sdio/emmc nodes for RK3328 SoCs") Fixes: b790c2cab5ca ("arm64: dts: add Rockchip rk3368 core dtsi and board dts for the r88 board") Signed-off-by: Robin Murphy Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 6 +++--- arch/arm64/boot/dts/rockchip/rk3368.dtsi | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index a037ee5..cae3415 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -730,7 +730,7 @@ interrupts = ; clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>, <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; status = "disabled"; }; @@ -741,7 +741,7 @@ interrupts = ; clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>, <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; status = "disabled"; }; @@ -752,7 +752,7 @@ interrupts = ; clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>, <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi index aa4d070..03458ac 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi @@ -257,7 +257,7 @@ max-frequency = <150000000>; clocks = <&cru HCLK_SDIO0>, <&cru SCLK_SDIO0>, <&cru SCLK_SDIO0_DRV>, <&cru SCLK_SDIO0_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; interrupts = ; resets = <&cru SRST_SDIO0>; -- cgit v1.1 From e78c637127ee7683d606737f2e62b5da6fd7b1c3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 15 Feb 2018 14:05:54 +0000 Subject: ARM: dts: rockchip: Fix DWMMC clocks Trying to boot an RK3328 box with an HS200-capable eMMC, I see said eMMC fail to initialise as it can't run its tuning procedure, because the sample clock is missing. Upon closer inspection, whilst the clock is present in the DT, its name is subtly incorrect per the binding, so __of_clk_get_by_name() never finds it. By inspection, the drive clock suffers from a similar problem, so has never worked properly either. This error has propagated across the 32-bit DTs too, so fix those up. Fixes: 187d7967a5ee ("ARM: dts: rockchip: add the sdio/sdmmc node for rk3036") Fixes: faea098e1808 ("ARM: dts: rockchip: add core rk3036 dtsi") Fixes: 9848ebeb952d ("ARM: dts: rockchip: add core rk3228 dtsi") Signed-off-by: Robin Murphy Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3036.dtsi | 4 ++-- arch/arm/boot/dts/rk322x.dtsi | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi index 3b704cf..a974581 100644 --- a/arch/arm/boot/dts/rk3036.dtsi +++ b/arch/arm/boot/dts/rk3036.dtsi @@ -280,7 +280,7 @@ max-frequency = <37500000>; clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>, <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; interrupts = ; resets = <&cru SRST_SDIO>; @@ -298,7 +298,7 @@ max-frequency = <37500000>; clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>, <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; default-sample-phase = <158>; disable-wp; dmas = <&pdma 12>; diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi index 780ec3a..341deaf 100644 --- a/arch/arm/boot/dts/rk322x.dtsi +++ b/arch/arm/boot/dts/rk322x.dtsi @@ -621,7 +621,7 @@ interrupts = ; clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>, <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; pinctrl-names = "default"; pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_bus4>; @@ -634,7 +634,7 @@ interrupts = ; clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>, <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; pinctrl-names = "default"; pinctrl-0 = <&sdio_clk &sdio_cmd &sdio_bus4>; @@ -649,7 +649,7 @@ max-frequency = <37500000>; clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>, <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; bus-width = <8>; default-sample-phase = <158>; fifo-depth = <0x100>; -- cgit v1.1 From f88982679f54f75daa5b8eff3da72508f1e7422f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 30 Jan 2018 23:11:24 -0800 Subject: binder: check for binder_thread allocation failure in binder_poll() If the kzalloc() in binder_get_thread() fails, binder_poll() dereferences the resulting NULL pointer. Fix it by returning POLLERR if the memory allocation failed. This bug was found by syzkaller using fault injection. Reported-by: syzbot Fixes: 457b9a6f09f0 ("Staging: android: add binder driver") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 15e3d3c..ad5e662 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4391,6 +4391,8 @@ static __poll_t binder_poll(struct file *filp, bool wait_for_proc_work; thread = binder_get_thread(proc); + if (!thread) + return POLLERR; binder_inner_proc_lock(thread->proc); thread->looper |= BINDER_LOOPER_STATE_POLL; -- cgit v1.1 From e46a3b3ba7509cb7fda0e07bc7c63a2cd90f579b Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 7 Feb 2018 12:38:47 -0800 Subject: ANDROID: binder: remove WARN() for redundant txn error binder_send_failed_reply() is called when a synchronous transaction fails. It reports an error to the thread that is waiting for the completion. Given that the transaction is synchronous, there should never be more than 1 error response to that thread -- this was being asserted with a WARN(). However, when exercising the driver with syzbot tests, cases were observed where multiple "synchronous" requests were sent without waiting for responses, so it is possible that multiple errors would be reported to the thread. This testing was conducted with panic_on_warn set which forced the crash. This is easily reproduced by sending back-to-back "synchronous" transactions without checking for any response (eg, set read_size to 0): bwr.write_buffer = (uintptr_t)&bc1; bwr.write_size = sizeof(bc1); bwr.read_buffer = (uintptr_t)&br; bwr.read_size = 0; ioctl(fd, BINDER_WRITE_READ, &bwr); sleep(1); bwr2.write_buffer = (uintptr_t)&bc2; bwr2.write_size = sizeof(bc2); bwr2.read_buffer = (uintptr_t)&br; bwr2.read_size = 0; ioctl(fd, BINDER_WRITE_READ, &bwr2); sleep(1); The first transaction is sent to the servicemanager and the reply fails because no VMA is set up by this client. After binder_send_failed_reply() is called, the BINDER_WORK_RETURN_ERROR is sitting on the thread's todo list since the read_size was 0 and the client is not waiting for a response. The 2nd transaction is sent and the BINDER_WORK_RETURN_ERROR has not been consumed, so the thread's reply_error.cmd is still set (normally cleared when the BINDER_WORK_RETURN_ERROR is handled). Therefore when the servicemanager attempts to reply to the 2nd failed transaction, the error is already set and it triggers this warning. This is a user error since it is not waiting for the synchronous transaction to complete. If it ever does check, it will see an error. Changed the WARN() to a pr_warn(). Signed-off-by: Todd Kjos Reported-by: syzbot Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index ad5e662..31322e9 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1991,8 +1991,14 @@ static void binder_send_failed_reply(struct binder_transaction *t, &target_thread->reply_error.work); wake_up_interruptible(&target_thread->wait); } else { - WARN(1, "Unexpected reply error: %u\n", - target_thread->reply_error.cmd); + /* + * Cannot get here for normal operation, but + * we can if multiple synchronous transactions + * are sent without blocking for responses. + * Just ignore the 2nd error in this case. + */ + pr_warn("Unexpected reply error: %u\n", + target_thread->reply_error.cmd); } binder_inner_proc_unlock(target_thread->proc); binder_thread_dec_tmpref(target_thread); -- cgit v1.1 From 8ca86f1639ec5890d400fff9211aca22d0a392eb Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 7 Feb 2018 13:57:37 -0800 Subject: binder: replace "%p" with "%pK" The format specifier "%p" can leak kernel addresses. Use "%pK" instead. There were 4 remaining cases in binder.c. Signed-off-by: Todd Kjos Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 31322e9..a85f903 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2199,7 +2199,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, int debug_id = buffer->debug_id; binder_debug(BINDER_DEBUG_TRANSACTION, - "%d buffer release %d, size %zd-%zd, failed at %p\n", + "%d buffer release %d, size %zd-%zd, failed at %pK\n", proc->pid, buffer->debug_id, buffer->data_size, buffer->offsets_size, failed_at); @@ -3711,7 +3711,7 @@ static int binder_thread_write(struct binder_proc *proc, } } binder_debug(BINDER_DEBUG_DEAD_BINDER, - "%d:%d BC_DEAD_BINDER_DONE %016llx found %p\n", + "%d:%d BC_DEAD_BINDER_DONE %016llx found %pK\n", proc->pid, thread->pid, (u64)cookie, death); if (death == NULL) { @@ -5042,7 +5042,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m, spin_lock(&t->lock); to_proc = t->to_proc; seq_printf(m, - "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %ld r%d", + "%s %d: %pK from %d:%d to %d:%d code %x flags %x pri %ld r%d", prefix, t->debug_id, t, t->from ? t->from->proc->pid : 0, t->from ? t->from->pid : 0, @@ -5066,7 +5066,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m, } if (buffer->target_node) seq_printf(m, " node %d", buffer->target_node->debug_id); - seq_printf(m, " size %zd:%zd data %p\n", + seq_printf(m, " size %zd:%zd data %pK\n", buffer->data_size, buffer->offsets_size, buffer->data); } -- cgit v1.1 From 5eeb2ca02a2f6084fc57ae5c244a38baab07033a Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 16 Feb 2018 09:47:15 +0100 Subject: ANDROID: binder: synchronize_rcu() when using POLLFREE. To prevent races with ep_remove_waitqueue() removing the waitqueue at the same time. Reported-by: syzbot+a2a3c4909716e271487e@syzkaller.appspotmail.com Signed-off-by: Martijn Coenen Cc: stable # 4.14+ Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index a85f903..764b63a 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4382,6 +4382,15 @@ static int binder_thread_release(struct binder_proc *proc, binder_inner_proc_unlock(thread->proc); + /* + * This is needed to avoid races between wake_up_poll() above and + * and ep_remove_waitqueue() called for other reasons (eg the epoll file + * descriptor being closed); ep_remove_waitqueue() holds an RCU read + * lock, so we can be sure it's done after calling synchronize_rcu(). + */ + if (thread->looper & BINDER_LOOPER_STATE_POLL) + synchronize_rcu(); + if (send_reply) binder_send_failed_reply(send_reply, BR_DEAD_REPLY); binder_release_work(proc, &thread->todo); -- cgit v1.1 From 7ae079aca59f560d2a44b65d45dffdefed6bd17a Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 14 Feb 2018 14:03:29 +0200 Subject: mei: set device client to the disconnected state upon suspend. This fixes regression introduced by commit 8d52af6795c0 ("mei: speed up the power down flow") In mei_cldev_disable during device power down flow, such as suspend or system power off, it jumps over disconnecting function to speed up the power down process, however, because the client is unlinked from the file_list (mei_cl_unlink) mei_cl_set_disconnected is not called from mei_cl_all_disconnect leaving resource leaking. The most visible is reference counter on underlying HW module is not decreased preventing to remove modules after suspend/resume cycles. Signed-off-by: Tomas Winkler Fixes: 8d52af6795c0 ("mei: speed up the power down flow") Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 6 ------ drivers/misc/mei/client.c | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 3e5eabd..772d029 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -548,12 +548,6 @@ int mei_cldev_disable(struct mei_cl_device *cldev) goto out; } - if (bus->dev_state == MEI_DEV_POWER_DOWN) { - dev_dbg(bus->dev, "Device is powering down, don't bother with disconnection\n"); - err = 0; - goto out; - } - err = mei_cl_disconnect(cl); if (err < 0) dev_err(bus->dev, "Could not disconnect from the ME client\n"); diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index be64969..7e60c18 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -945,6 +945,12 @@ int mei_cl_disconnect(struct mei_cl *cl) return 0; } + if (dev->dev_state == MEI_DEV_POWER_DOWN) { + cl_dbg(dev, cl, "Device is powering down, don't bother with disconnection\n"); + mei_cl_set_disconnected(cl); + return 0; + } + rets = pm_runtime_get(dev->dev); if (rets < 0 && rets != -EINPROGRESS) { pm_runtime_put_noidle(dev->dev); -- cgit v1.1 From ac66b8347bbad5913df098e5281fa6e2c7fc796e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 14 Feb 2018 18:45:59 +0000 Subject: gpu: ipu-v3: make const arrays int_reg static, shrinks object size Don't populate the const read-only arrays int_reg on the stack but instead make them static. Makes the object code smaller by over 80 bytes: Before: text data bss dec hex filename 28024 8936 192 37152 9120 drivers/gpu/ipu-v3/ipu-common.o After: text data bss dec hex filename 27794 9080 192 37066 90ca drivers/gpu/ipu-v3/ipu-common.o (gcc version 7.2.0 x86_64) Signed-off-by: Colin Ian King Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 658fa2d..48685cd 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -1089,7 +1089,7 @@ static void ipu_irq_handler(struct irq_desc *desc) { struct ipu_soc *ipu = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); - const int int_reg[] = { 0, 1, 2, 3, 10, 11, 12, 13, 14}; + static const int int_reg[] = { 0, 1, 2, 3, 10, 11, 12, 13, 14}; chained_irq_enter(chip, desc); @@ -1102,7 +1102,7 @@ static void ipu_err_irq_handler(struct irq_desc *desc) { struct ipu_soc *ipu = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); - const int int_reg[] = { 4, 5, 8, 9}; + static const int int_reg[] = { 4, 5, 8, 9}; chained_irq_enter(chip, desc); -- cgit v1.1 From 6d36b7fec60e6f74a15ce4781d30b2aecce85dfc Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 22 Jan 2018 16:06:16 +0100 Subject: gpu: ipu-cpmem: add 8-bit grayscale support to ipu_cpmem_set_image Add the missing offset calculation for grayscale images. Since the IPU only supports capturing greyscale in raw passthrough mode, it is the same as 8-bit bayer formats. Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-cpmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c index bb9c087..ef32377 100644 --- a/drivers/gpu/ipu-v3/ipu-cpmem.c +++ b/drivers/gpu/ipu-v3/ipu-cpmem.c @@ -788,6 +788,7 @@ int ipu_cpmem_set_image(struct ipuv3_channel *ch, struct ipu_image *image) case V4L2_PIX_FMT_SGBRG8: case V4L2_PIX_FMT_SGRBG8: case V4L2_PIX_FMT_SRGGB8: + case V4L2_PIX_FMT_GREY: offset = image->rect.left + image->rect.top * pix->bytesperline; break; case V4L2_PIX_FMT_SBGGR16: -- cgit v1.1 From de526f401284e1638d4c97cb5a4c292ac3f37655 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 Feb 2018 08:11:48 -0800 Subject: netfilter: xt_hashlimit: fix lock imbalance syszkaller found that rcu was not held in hashlimit_mt_common() We only need to enable BH at this point. Fixes: bea74641e378 ("netfilter: xt_hashlimit: add rate match mode") Signed-off-by: Eric Dumazet Reported-by: syzkaller Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index aa96027..66f5aca 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -775,7 +775,7 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par, if (!dh->rateinfo.prev_window && (dh->rateinfo.current_rate <= dh->rateinfo.burst)) { spin_unlock(&dh->lock); - rcu_read_unlock_bh(); + local_bh_enable(); return !(cfg->mode & XT_HASHLIMIT_INVERT); } else { goto overlimit; -- cgit v1.1 From 764baba80168ad3adafb521d2ab483ccbc49e344 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 4 Feb 2018 15:35:09 +0200 Subject: ovl: hash non-dir by lower inode for fsnotify Commit 31747eda41ef ("ovl: hash directory inodes for fsnotify") fixed an issue of inotify watch on directory that stops getting events after dropping dentry caches. A similar issue exists for non-dir non-upper files, for example: $ mkdir -p lower upper work merged $ touch lower/foo $ mount -t overlay -o lowerdir=lower,workdir=work,upperdir=upper none merged $ inotifywait merged/foo & $ echo 2 > /proc/sys/vm/drop_caches $ cat merged/foo inotifywait doesn't get the OPEN event, because ovl_lookup() called from 'cat' allocates a new overlay inode and does not reuse the watched inode. Fix this by hashing non-dir overlay inodes by lower real inode in the following cases that were not hashed before this change: - A non-upper overlay mount - A lower non-hardlink when index=off A helper ovl_hash_bylower() was added to put all the logic and documentation about which real inode an overlay inode is hashed by into one place. The issue dates back to initial version of overlayfs, but this patch depends on ovl_inode code that was introduced in kernel v4.13. Cc: #v4.13 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 58 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index fcd97b7..3b1bd46 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -669,38 +669,59 @@ struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, return inode; } +/* + * Does overlay inode need to be hashed by lower inode? + */ +static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, + struct dentry *lower, struct dentry *index) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + /* No, if pure upper */ + if (!lower) + return false; + + /* Yes, if already indexed */ + if (index) + return true; + + /* Yes, if won't be copied up */ + if (!ofs->upper_mnt) + return true; + + /* No, if lower hardlink is or will be broken on copy up */ + if ((upper || !ovl_indexdir(sb)) && + !d_is_dir(lower) && d_inode(lower)->i_nlink > 1) + return false; + + /* No, if non-indexed upper with NFS export */ + if (sb->s_export_op && upper) + return false; + + /* Otherwise, hash by lower inode for fsnotify */ + return true; +} + struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, struct dentry *lowerdentry, struct dentry *index, unsigned int numlower) { - struct ovl_fs *ofs = sb->s_fs_info; struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; - /* Already indexed or could be indexed on copy up? */ - bool indexed = (index || (ovl_indexdir(sb) && !upperdentry)); - struct dentry *origin = indexed ? lowerdentry : NULL; + bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, index); bool is_dir; - if (WARN_ON(upperdentry && indexed && !lowerdentry)) - return ERR_PTR(-EIO); - if (!realinode) realinode = d_inode(lowerdentry); /* - * Copy up origin (lower) may exist for non-indexed non-dir upper, but - * we must not use lower as hash key in that case. - * Hash non-dir that is or could be indexed by origin inode. - * Hash dir that is or could be merged by origin inode. - * Hash pure upper and non-indexed non-dir by upper inode. - * Hash non-indexed dir by upper inode for NFS export. + * Copy up origin (lower) may exist for non-indexed upper, but we must + * not use lower as hash key if this is a broken hardlink. */ is_dir = S_ISDIR(realinode->i_mode); - if (is_dir && (indexed || !sb->s_export_op || !ofs->upper_mnt)) - origin = lowerdentry; - - if (upperdentry || origin) { - struct inode *key = d_inode(origin ?: upperdentry); + if (upperdentry || bylower) { + struct inode *key = d_inode(bylower ? lowerdentry : + upperdentry); unsigned int nlink = is_dir ? 1 : realinode->i_nlink; inode = iget5_locked(sb, (unsigned long) key, @@ -728,6 +749,7 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); set_nlink(inode, nlink); } else { + /* Lower hardlink that will be broken on copy up */ inode = new_inode(sb); if (!inode) goto out_nomem; -- cgit v1.1 From 2ca3c148a06244d46dcfc95c5965644c83a30b37 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 30 Jan 2018 13:31:09 +0200 Subject: ovl: check lower ancestry on encode of lower dir file handle This change relaxes copy up on encode of merge dir with lower layer > 1 and handles the case of encoding a merge dir with lower layer 1, where an ancestor is a non-indexed merge dir. In that case, decode of the lower file handle will not have been possible if the non-indexed ancestor is redirected before or after encode. Before encoding a non-upper directory file handle from real layer N, we need to check if it will be possible to reconnect an overlay dentry from the real lower decoded dentry. This is done by following the overlay ancestry up to a "layer N connected" ancestor and verifying that all parents along the way are "layer N connectable". If an ancestor that is NOT "layer N connectable" is found, we need to copy up an ancestor, which is "layer N connectable", thus making that ancestor "layer N connected". For example: layer 1: /a layer 2: /a/b/c The overlay dentry /a is NOT "layer 2 connectable", because if dir /a is copied up and renamed, upper dir /a will be indexed by lower dir /a from layer 1. The dir /a from layer 2 will never be indexed, so the algorithm in ovl_lookup_real_ancestor() (*) will not be able to lookup a connected overlay dentry from the connected lower dentry /a/b/c. To avoid this problem on decode time, we need to copy up an ancestor of /a/b/c, which is "layer 2 connectable", on encode time. That ancestor is /a/b. After copy up (and index) of /a/b, it will become "layer 2 connected" and when the time comes to decode the file handle from lower dentry /a/b/c, ovl_lookup_real_ancestor() will find the indexed ancestor /a/b and decoding a connected overlay dentry will be accomplished. (*) the algorithm in ovl_lookup_real_ancestor() can be improved to lookup an entry /a in the lower layers above layer N and find the indexed dir /a from layer 1. If that improvement is made, then the check for "layer N connected" will need to verify there are no redirects in lower layers above layer N. In the example above, /a will be "layer 2 connectable". However, if layer 2 dir /a is a target of a layer 1 redirect, then /a will NOT be "layer 2 connectable": layer 1: /A (redirect = /a) layer 2: /a/b/c Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 210 +++++++++++++++++++++++++++++++++++++---------- fs/overlayfs/overlayfs.h | 1 + fs/overlayfs/super.c | 1 + 3 files changed, 168 insertions(+), 44 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index bb94ce9..9df455c 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -19,6 +19,142 @@ #include #include "overlayfs.h" +static int ovl_encode_maybe_copy_up(struct dentry *dentry) +{ + int err; + + if (ovl_dentry_upper(dentry)) + return 0; + + err = ovl_want_write(dentry); + if (!err) { + err = ovl_copy_up(dentry); + ovl_drop_write(dentry); + } + + if (err) { + pr_warn_ratelimited("overlayfs: failed to copy up on encode (%pd2, err=%i)\n", + dentry, err); + } + + return err; +} + +/* + * Before encoding a non-upper directory file handle from real layer N, we need + * to check if it will be possible to reconnect an overlay dentry from the real + * lower decoded dentry. This is done by following the overlay ancestry up to a + * "layer N connected" ancestor and verifying that all parents along the way are + * "layer N connectable". If an ancestor that is NOT "layer N connectable" is + * found, we need to copy up an ancestor, which is "layer N connectable", thus + * making that ancestor "layer N connected". For example: + * + * layer 1: /a + * layer 2: /a/b/c + * + * The overlay dentry /a is NOT "layer 2 connectable", because if dir /a is + * copied up and renamed, upper dir /a will be indexed by lower dir /a from + * layer 1. The dir /a from layer 2 will never be indexed, so the algorithm (*) + * in ovl_lookup_real_ancestor() will not be able to lookup a connected overlay + * dentry from the connected lower dentry /a/b/c. + * + * To avoid this problem on decode time, we need to copy up an ancestor of + * /a/b/c, which is "layer 2 connectable", on encode time. That ancestor is + * /a/b. After copy up (and index) of /a/b, it will become "layer 2 connected" + * and when the time comes to decode the file handle from lower dentry /a/b/c, + * ovl_lookup_real_ancestor() will find the indexed ancestor /a/b and decoding + * a connected overlay dentry will be accomplished. + * + * (*) the algorithm in ovl_lookup_real_ancestor() can be improved to lookup an + * entry /a in the lower layers above layer N and find the indexed dir /a from + * layer 1. If that improvement is made, then the check for "layer N connected" + * will need to verify there are no redirects in lower layers above N. In the + * example above, /a will be "layer 2 connectable". However, if layer 2 dir /a + * is a target of a layer 1 redirect, then /a will NOT be "layer 2 connectable": + * + * layer 1: /A (redirect = /a) + * layer 2: /a/b/c + */ + +/* Return the lowest layer for encoding a connectable file handle */ +static int ovl_connectable_layer(struct dentry *dentry) +{ + struct ovl_entry *oe = OVL_E(dentry); + + /* We can get overlay root from root of any layer */ + if (dentry == dentry->d_sb->s_root) + return oe->numlower; + + /* + * If it's an unindexed merge dir, then it's not connectable with any + * lower layer + */ + if (ovl_dentry_upper(dentry) && + !ovl_test_flag(OVL_INDEX, d_inode(dentry))) + return 0; + + /* We can get upper/overlay path from indexed/lower dentry */ + return oe->lowerstack[0].layer->idx; +} + +/* + * @dentry is "connected" if all ancestors up to root or a "connected" ancestor + * have the same uppermost lower layer as the origin's layer. We may need to + * copy up a "connectable" ancestor to make it "connected". A "connected" dentry + * cannot become non "connected", so cache positive result in dentry flags. + * + * Return the connected origin layer or < 0 on error. + */ +static int ovl_connect_layer(struct dentry *dentry) +{ + struct dentry *next, *parent = NULL; + int origin_layer; + int err = 0; + + if (WARN_ON(dentry == dentry->d_sb->s_root) || + WARN_ON(!ovl_dentry_lower(dentry))) + return -EIO; + + origin_layer = OVL_E(dentry)->lowerstack[0].layer->idx; + if (ovl_dentry_test_flag(OVL_E_CONNECTED, dentry)) + return origin_layer; + + /* Find the topmost origin layer connectable ancestor of @dentry */ + next = dget(dentry); + for (;;) { + parent = dget_parent(next); + if (WARN_ON(parent == next)) { + err = -EIO; + break; + } + + /* + * If @parent is not origin layer connectable, then copy up + * @next which is origin layer connectable and we are done. + */ + if (ovl_connectable_layer(parent) < origin_layer) { + err = ovl_encode_maybe_copy_up(next); + break; + } + + /* If @parent is connected or indexed we are done */ + if (ovl_dentry_test_flag(OVL_E_CONNECTED, parent) || + ovl_test_flag(OVL_INDEX, d_inode(parent))) + break; + + dput(next); + next = parent; + } + + dput(parent); + dput(next); + + if (!err) + ovl_dentry_set_flag(OVL_E_CONNECTED, dentry); + + return err ?: origin_layer; +} + /* * We only need to encode origin if there is a chance that the same object was * encoded pre copy up and then we need to stay consistent with the same @@ -41,73 +177,59 @@ * L = lower file handle * * (*) Connecting an overlay dir from real lower dentry is not always - * possible when there are redirects in lower layers. To mitigate this case, - * we copy up the lower dir first and then encode an upper dir file handle. + * possible when there are redirects in lower layers and non-indexed merge dirs. + * To mitigate those case, we may copy up the lower dir ancestor before encode + * a lower dir file handle. + * + * Return 0 for upper file handle, > 0 for lower file handle or < 0 on error. */ -static bool ovl_should_encode_origin(struct dentry *dentry) +static int ovl_check_encode_origin(struct dentry *dentry) { struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + /* Upper file handle for pure upper */ if (!ovl_dentry_lower(dentry)) - return false; + return 0; /* - * Decoding a merge dir, whose origin's parent is under a redirected - * lower dir is not always possible. As a simple aproximation, we do - * not encode lower dir file handles when overlay has multiple lower - * layers and origin is below the topmost lower layer. + * Upper file handle for non-indexed upper. * - * TODO: copy up only the parent that is under redirected lower. + * Root is never indexed, so if there's an upper layer, encode upper for + * root. */ - if (d_is_dir(dentry) && ofs->upper_mnt && - OVL_E(dentry)->lowerstack[0].layer->idx > 1) - return false; - - /* Decoding a non-indexed upper from origin is not implemented */ if (ovl_dentry_upper(dentry) && !ovl_test_flag(OVL_INDEX, d_inode(dentry))) - return false; - - return true; -} - -static int ovl_encode_maybe_copy_up(struct dentry *dentry) -{ - int err; - - if (ovl_dentry_upper(dentry)) return 0; - err = ovl_want_write(dentry); - if (err) - return err; - - err = ovl_copy_up(dentry); + /* + * Decoding a merge dir, whose origin's ancestor is under a redirected + * lower dir or under a non-indexed upper is not always possible. + * ovl_connect_layer() will try to make origin's layer "connected" by + * copying up a "connectable" ancestor. + */ + if (d_is_dir(dentry) && ofs->upper_mnt) + return ovl_connect_layer(dentry); - ovl_drop_write(dentry); - return err; + /* Lower file handle for indexed and non-upper dir/non-dir */ + return 1; } static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen) { - struct dentry *origin = ovl_dentry_lower(dentry); struct ovl_fh *fh = NULL; - int err; + int err, enc_lower; /* - * If we should not encode a lower dir file handle, copy up and encode - * an upper dir file handle. + * Check if we should encode a lower or upper file handle and maybe + * copy up an ancestor to make lower file handle connectable. */ - if (!ovl_should_encode_origin(dentry)) { - err = ovl_encode_maybe_copy_up(dentry); - if (err) - goto fail; - - origin = NULL; - } + err = enc_lower = ovl_check_encode_origin(dentry); + if (enc_lower < 0) + goto fail; - /* Encode an upper or origin file handle */ - fh = ovl_encode_fh(origin ?: ovl_dentry_upper(dentry), !origin); + /* Encode an upper or lower file handle */ + fh = ovl_encode_fh(enc_lower ? ovl_dentry_lower(dentry) : + ovl_dentry_upper(dentry), !enc_lower); err = PTR_ERR(fh); if (IS_ERR(fh)) goto fail; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 0df25a9..225ff11 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -40,6 +40,7 @@ enum ovl_inode_flag { enum ovl_entry_flag { OVL_E_UPPER_ALIAS, OVL_E_OPAQUE, + OVL_E_CONNECTED, }; /* diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 9ee37c7..7c24619 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1359,6 +1359,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) /* Root is always merge -> can have whiteouts */ ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); + ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry); ovl_inode_init(d_inode(root_dentry), upperpath.dentry, ovl_dentry_lower(root_dentry)); -- cgit v1.1 From 7168179fcf25f7812e8541decac686a91359e522 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 30 Jan 2018 14:30:50 +0200 Subject: ovl: check ERR_PTR() return value from ovl_lookup_real() Reported-by: Dan Carpenter Fixes: 061701540349 ("ovl: lookup indexed ancestor of lower dir") Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 9df455c..97a916e 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -477,8 +477,8 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb, dput(upper); } - if (!this) - return NULL; + if (IS_ERR_OR_NULL(this)) + return this; if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) { dput(this); -- cgit v1.1 From aba62a9e9a4064c5ea9deb33b5b1392f263cad24 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 16 Feb 2018 12:45:13 -0200 Subject: MAINTAINERS: Add myself as sgtl5000 maintainer I would like helping maintaining and reviewing/testing sgtl5000 related patches. Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3bdc260..4e283d1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9921,6 +9921,13 @@ F: Documentation/ABI/stable/sysfs-bus-nvmem F: include/linux/nvmem-consumer.h F: include/linux/nvmem-provider.h +NXP SGTL5000 DRIVER +M: Fabio Estevam +L: alsa-devel@alsa-project.org (moderated for non-subscribers) +S: Maintained +F: Documentation/devicetree/bindings/sound/sgtl5000.txt +F: sound/soc/codecs/sgtl5000* + NXP TDA998X DRM DRIVER M: Russell King S: Supported -- cgit v1.1 From a8992973edbb2555e956b90f6fe97c4bc14d761d Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 16 Feb 2018 11:58:54 -0200 Subject: ASoC: sgtl5000: Fix suspend/resume Commit 8419caa72702 ("ASoC: sgtl5000: Do not disable regulators in SND_SOC_BIAS_OFF") causes the sgtl5000 to fail after a suspend/resume sequence: Playing WAVE '/media/a2002011001-e02.wav' : Signed 16 bit Little Endian, Rate 44100 Hz, Stereo aplay: pcm_write:2051: write error: Input/output error The problem is caused by the fact that the aforementioned commit dropped the cache handling, so re-introduce the register map resync to fix the problem. Suggested-by: Mark Brown Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown Cc: --- sound/soc/codecs/sgtl5000.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index c445a07..c5c76ab 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -876,15 +876,26 @@ static int sgtl5000_pcm_hw_params(struct snd_pcm_substream *substream, static int sgtl5000_set_bias_level(struct snd_soc_codec *codec, enum snd_soc_bias_level level) { + struct sgtl5000_priv *sgtl = snd_soc_codec_get_drvdata(codec); + int ret; + switch (level) { case SND_SOC_BIAS_ON: case SND_SOC_BIAS_PREPARE: case SND_SOC_BIAS_STANDBY: + regcache_cache_only(sgtl->regmap, false); + ret = regcache_sync(sgtl->regmap); + if (ret) { + regcache_cache_only(sgtl->regmap, true); + return ret; + } + snd_soc_update_bits(codec, SGTL5000_CHIP_ANA_POWER, SGTL5000_REFTOP_POWERUP, SGTL5000_REFTOP_POWERUP); break; case SND_SOC_BIAS_OFF: + regcache_cache_only(sgtl->regmap, true); snd_soc_update_bits(codec, SGTL5000_CHIP_ANA_POWER, SGTL5000_REFTOP_POWERUP, 0); break; -- cgit v1.1 From 2188558621ed475cef55fa94ce535499452f0091 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 14 Feb 2018 14:38:43 +0200 Subject: RDMA/verbs: Check existence of function prior to accessing it Update all the flows to ensure that function pointer exists prior to accessing it. This is much safer than checking the uverbs_ex_mask variable, especially since we know that test isn't working properly and will be removed in -next. This prevents a user triggereable oops. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 3 +++ drivers/infiniband/core/uverbs_cmd.c | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index c4560d8..c91f9a8 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -309,6 +309,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, { struct ib_qp *qp; + if (!dev->create_qp) + return ERR_PTR(-EOPNOTSUPP); + qp = dev->create_qp(pd, attr, udata); if (IS_ERR(qp)) return qp; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index cd9fbd7..dbcfb31 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -978,6 +978,9 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, struct ib_uverbs_ex_create_cq_resp resp; struct ib_cq_init_attr attr = {}; + if (!ib_dev->create_cq) + return ERR_PTR(-EOPNOTSUPP); + if (cmd->comp_vector >= file->device->num_comp_vectors) return ERR_PTR(-EINVAL); @@ -2947,6 +2950,11 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, wq_init_attr.create_flags = cmd.create_flags; obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); + + if (!pd->device->create_wq) { + err = -EOPNOTSUPP; + goto err_put_cq; + } wq = pd->device->create_wq(pd, &wq_init_attr, uhw); if (IS_ERR(wq)) { err = PTR_ERR(wq); @@ -3090,7 +3098,12 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, wq_attr.flags = cmd.flags; wq_attr.flags_mask = cmd.flags_mask; } + if (!wq->device->modify_wq) { + ret = -EOPNOTSUPP; + goto out; + } ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); +out: uobj_put_obj_read(wq); return ret; } @@ -3187,6 +3200,11 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; init_attr.ind_tbl = wqs; + + if (!ib_dev->create_rwq_ind_table) { + err = -EOPNOTSUPP; + goto err_uobj; + } rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw); if (IS_ERR(rwq_ind_tbl)) { @@ -3776,6 +3794,9 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, struct ib_device_attr attr = {0}; int err; + if (!ib_dev->query_device) + return -EOPNOTSUPP; + if (ucore->inlen < sizeof(cmd)) return -EINVAL; -- cgit v1.1 From 02b7b2844c2ffd3b614ec2b9293e8c7f041d60da Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 16 Feb 2018 16:36:04 +0100 Subject: staging: fsl-mc: fix build testing on x86 Selecting GENERIC_MSI_IRQ_DOMAIN on x86 causes a compile-time error in some configurations: drivers/base/platform-msi.c:37:19: error: field 'arg' has incomplete type On the other architectures, we are fine, but here we should have an additional dependency on X86_LOCAL_APIC so we can get the PCI_MSI_IRQ_DOMAIN symbol. Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/fsl-mc/bus/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/fsl-mc/bus/Kconfig b/drivers/staging/fsl-mc/bus/Kconfig index 1f910004..b35ef7e 100644 --- a/drivers/staging/fsl-mc/bus/Kconfig +++ b/drivers/staging/fsl-mc/bus/Kconfig @@ -7,7 +7,7 @@ config FSL_MC_BUS bool "QorIQ DPAA2 fsl-mc bus driver" - depends on OF && (ARCH_LAYERSCAPE || (COMPILE_TEST && (ARM || ARM64 || X86 || PPC))) + depends on OF && (ARCH_LAYERSCAPE || (COMPILE_TEST && (ARM || ARM64 || X86_LOCAL_APIC || PPC))) select GENERIC_MSI_IRQ_DOMAIN help Driver to enable the bus infrastructure for the QorIQ DPAA2 -- cgit v1.1 From ce8a3a9e76d0193e2e8d74a06d275b3c324ca652 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 4 Feb 2018 02:06:27 +0000 Subject: staging: android: ashmem: Fix a race condition in pin ioctls ashmem_pin_unpin() reads asma->file and asma->size before taking the ashmem_mutex, so it can race with other operations that modify them. Build-tested only. Cc: stable@vger.kernel.org Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index bbdc53b..6dbba5a 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -702,30 +702,32 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd, size_t pgstart, pgend; int ret = -EINVAL; + mutex_lock(&ashmem_mutex); + if (unlikely(!asma->file)) - return -EINVAL; + goto out_unlock; - if (unlikely(copy_from_user(&pin, p, sizeof(pin)))) - return -EFAULT; + if (unlikely(copy_from_user(&pin, p, sizeof(pin)))) { + ret = -EFAULT; + goto out_unlock; + } /* per custom, you can pass zero for len to mean "everything onward" */ if (!pin.len) pin.len = PAGE_ALIGN(asma->size) - pin.offset; if (unlikely((pin.offset | pin.len) & ~PAGE_MASK)) - return -EINVAL; + goto out_unlock; if (unlikely(((__u32)-1) - pin.offset < pin.len)) - return -EINVAL; + goto out_unlock; if (unlikely(PAGE_ALIGN(asma->size) < pin.offset + pin.len)) - return -EINVAL; + goto out_unlock; pgstart = pin.offset / PAGE_SIZE; pgend = pgstart + (pin.len / PAGE_SIZE) - 1; - mutex_lock(&ashmem_mutex); - switch (cmd) { case ASHMEM_PIN: ret = ashmem_pin(asma, pgstart, pgend); @@ -738,6 +740,7 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd, break; } +out_unlock: mutex_unlock(&ashmem_mutex); return ret; -- cgit v1.1 From 6d79bd5bb6c79a9dba4842040c9adf39e7806330 Mon Sep 17 00:00:00 2001 From: Liam Mark Date: Fri, 26 Jan 2018 09:48:18 -0800 Subject: staging: android: ion: Zero CMA allocated memory Since commit 204f672255c2 ("staging: android: ion: Use CMA APIs directly") the CMA API is now used directly and therefore the allocated memory is no longer automatically zeroed. Explicitly zero CMA allocated memory to ensure that no data is exposed to userspace. Fixes: 204f672255c2 ("staging: android: ion: Use CMA APIs directly") Signed-off-by: Liam Mark Acked-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion_cma_heap.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c index 94e0692..49718c9 100644 --- a/drivers/staging/android/ion/ion_cma_heap.c +++ b/drivers/staging/android/ion/ion_cma_heap.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "ion.h" @@ -42,6 +43,22 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer, if (!pages) return -ENOMEM; + if (PageHighMem(pages)) { + unsigned long nr_clear_pages = nr_pages; + struct page *page = pages; + + while (nr_clear_pages > 0) { + void *vaddr = kmap_atomic(page); + + memset(vaddr, 0, PAGE_SIZE); + kunmap_atomic(vaddr); + page++; + nr_clear_pages--; + } + } else { + memset(page_address(pages), 0, size); + } + table = kmalloc(sizeof(*table), GFP_KERNEL); if (!table) goto err; -- cgit v1.1 From 50c330973c0c9f1e300b07bbab78d306dcc6e612 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 16 Feb 2018 16:57:56 +0000 Subject: irqchip/gic-v3-its: Fix misplaced __iomem annotations Save 26 lines worth of Sparse complaints by fixing up this minor mishap. The pointee lies in the __iomem space; the pointer does not. Signed-off-by: Robin Murphy Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 1d3056f..94b7d74 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2495,7 +2495,7 @@ static int its_vpe_set_affinity(struct irq_data *d, static void its_vpe_schedule(struct its_vpe *vpe) { - void * __iomem vlpi_base = gic_data_rdist_vlpi_base(); + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); u64 val; /* Schedule the VPE */ @@ -2527,7 +2527,7 @@ static void its_vpe_schedule(struct its_vpe *vpe) static void its_vpe_deschedule(struct its_vpe *vpe) { - void * __iomem vlpi_base = gic_data_rdist_vlpi_base(); + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); u32 count = 1000000; /* 1s! */ bool clean; u64 val; -- cgit v1.1 From 2f08ee363fe097bc6dc01aac53e1798b16c00986 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 14 Feb 2018 18:43:36 -0800 Subject: RDMA/restrack: don't use uaccess_kernel() uaccess_kernel() isn't sufficient to determine if an rdma resource is user-mode or not. For example, resources allocated in the add_one() function of an ib_client get falsely labeled as user mode, when they are kernel mode allocations. EG: mad qps. The result is that these qps are skipped over during a nldev query because of an erroneous namespace mismatch. So now we determine if the resource is user-mode by looking at the object struct's uobject or similar pointer to know if it was allocated for user mode applications. Fixes: 02d8883f520e ("RDMA/restrack: Add general infrastructure to track RDMA resources") Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 4 +++- drivers/infiniband/core/restrack.c | 18 ++++++++++++++++-- drivers/infiniband/core/uverbs_cmd.c | 4 ++-- drivers/infiniband/core/verbs.c | 3 +-- 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index c91f9a8..25bb178 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -305,7 +305,8 @@ void nldev_exit(void); static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, struct ib_pd *pd, struct ib_qp_init_attr *attr, - struct ib_udata *udata) + struct ib_udata *udata, + struct ib_uobject *uobj) { struct ib_qp *qp; @@ -318,6 +319,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, qp->device = dev; qp->pd = pd; + qp->uobject = uobj; /* * We don't track XRC QPs for now, because they don't have PD * and more importantly they are created internaly by driver, diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index d8dc709..3dbc4e4 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -7,7 +7,6 @@ #include #include #include -#include #include void rdma_restrack_init(struct rdma_restrack_root *res) @@ -88,6 +87,21 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res) return dev; } +static bool res_is_user(struct rdma_restrack_entry *res) +{ + switch (res->type) { + case RDMA_RESTRACK_PD: + return container_of(res, struct ib_pd, res)->uobject; + case RDMA_RESTRACK_CQ: + return container_of(res, struct ib_cq, res)->uobject; + case RDMA_RESTRACK_QP: + return container_of(res, struct ib_qp, res)->uobject; + default: + WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type); + return false; + } +} + void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); @@ -95,7 +109,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) if (!dev) return; - if (!uaccess_kernel()) { + if (res_is_user(res)) { get_task_struct(current); res->task = current; res->kern_name = NULL; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index dbcfb31..25a0e0e 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1520,7 +1520,8 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = _ib_create_qp(device, pd, &attr, uhw); + qp = _ib_create_qp(device, pd, &attr, uhw, + &obj->uevent.uobject); if (IS_ERR(qp)) { ret = PTR_ERR(qp); @@ -1553,7 +1554,6 @@ static int create_qp(struct ib_uverbs_file *file, if (ind_tbl) atomic_inc(&ind_tbl->usecnt); } - qp->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = qp; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 16ebc63..93025d2 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -887,7 +887,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, if (qp_init_attr->cap.max_rdma_ctxs) rdma_rw_init_qp(device, qp_init_attr); - qp = _ib_create_qp(device, pd, qp_init_attr, NULL); + qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL); if (IS_ERR(qp)) return qp; @@ -898,7 +898,6 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, } qp->real_qp = qp; - qp->uobject = NULL; qp->qp_type = qp_init_attr->qp_type; qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl; -- cgit v1.1 From fe9c842695e26d8116b61b80bfb905356f07834b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 14 Feb 2018 15:45:07 -0800 Subject: NFC: llcp: Limit size of SDP URI The tlv_len is u8, so we need to limit the size of the SDP URI. Enforce this both in the NLA policy and in the code that performs the allocation and copy, to avoid writing past the end of the allocated buffer. Fixes: d9b8d8e19b073 ("NFC: llcp: Service Name Lookup netlink interface") Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/nfc/llcp_commands.c | 4 ++++ net/nfc/netlink.c | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index 367d8c0..2ceefa1 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -149,6 +149,10 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, pr_debug("uri: %s, len: %zu\n", uri, uri_len); + /* sdreq->tlv_len is u8, takes uri_len, + 3 for header, + 1 for NULL */ + if (WARN_ON_ONCE(uri_len > U8_MAX - 4)) + return NULL; + sdreq = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL); if (sdreq == NULL) return NULL; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index c0b83dc..f018eaf 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -61,7 +61,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { }; static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = { - [NFC_SDP_ATTR_URI] = { .type = NLA_STRING }, + [NFC_SDP_ATTR_URI] = { .type = NLA_STRING, + .len = U8_MAX - 4 }, [NFC_SDP_ATTR_SAP] = { .type = NLA_U8 }, }; -- cgit v1.1 From a8c6db1dfd1b1d18359241372bb204054f2c3174 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 15 Feb 2018 09:46:03 +0100 Subject: fib_semantics: Don't match route with mismatching tclassid In fib_nh_match(), if output interface or gateway are passed in the FIB configuration, we don't have to check next hops of multipath routes to conclude whether we have a match or not. However, we might still have routes with different realms matching the same output interface and gateway configuration, and this needs to cause the match to fail. Otherwise the first route inserted in the FIB will match, regardless of the realms: # ip route add 1.1.1.1 dev eth0 table 1234 realms 1/2 # ip route append 1.1.1.1 dev eth0 table 1234 realms 3/4 # ip route list table 1234 1.1.1.1 dev eth0 scope link realms 1/2 1.1.1.1 dev eth0 scope link realms 3/4 # ip route del 1.1.1.1 dev ens3 table 1234 realms 3/4 # ip route list table 1234 1.1.1.1 dev ens3 scope link realms 3/4 whereas route with realms 3/4 should have been deleted instead. Explicitly check for fc_flow passed in the FIB configuration (this comes from RTA_FLOW extracted by rtm_to_fib_config()) and fail matching if it differs from nh_tclassid. The handling of RTA_FLOW for multipath routes later in fib_nh_match() is still needed, as we can have multiple RTA_FLOW attributes that need to be matched against the tclassid of each next hop. v2: Check that fc_flow is set before discarding the match, so that the user can still select the first matching rule by not specifying any realm, as suggested by David Ahern. Reported-by: Jianlin Shi Signed-off-by: Stefano Brivio Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c586597..7d36a95 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -646,6 +646,11 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, fi->fib_nh, cfg, extack)) return 1; } +#ifdef CONFIG_IP_ROUTE_CLASSID + if (cfg->fc_flow && + cfg->fc_flow != fi->fib_nh->nh_tclassid) + return 1; +#endif if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) return 0; -- cgit v1.1 From c4e43e14cd4617d57babc7a9f251bf3e9ad360a0 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Thu, 15 Feb 2018 18:16:57 +0530 Subject: cxgb4: free up resources of pf 0-3 free pf 0-3 resources, commit baf5086840ab ("cxgb4: restructure VF mgmt code") erroneously removed the code which frees the pf 0-3 resources, causing the probe of pf 0-3 to fail in case of driver reload. Fixes: baf5086840ab ("cxgb4: restructure VF mgmt code") Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 56bc626..7b452e8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4982,9 +4982,10 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) pcie_fw = readl(adap->regs + PCIE_FW_A); /* Check if cxgb4 is the MASTER and fw is initialized */ - if (!(pcie_fw & PCIE_FW_INIT_F) || + if (num_vfs && + (!(pcie_fw & PCIE_FW_INIT_F) || !(pcie_fw & PCIE_FW_MASTER_VLD_F) || - PCIE_FW_MASTER_G(pcie_fw) != CXGB4_UNIFIED_PF) { + PCIE_FW_MASTER_G(pcie_fw) != CXGB4_UNIFIED_PF)) { dev_warn(&pdev->dev, "cxgb4 driver needs to be MASTER to support SRIOV\n"); return -EOPNOTSUPP; @@ -5599,24 +5600,24 @@ static void remove_one(struct pci_dev *pdev) #if IS_ENABLED(CONFIG_IPV6) t4_cleanup_clip_tbl(adapter); #endif - iounmap(adapter->regs); if (!is_t4(adapter->params.chip)) iounmap(adapter->bar2); - pci_disable_pcie_error_reporting(pdev); - if ((adapter->flags & DEV_ENABLED)) { - pci_disable_device(pdev); - adapter->flags &= ~DEV_ENABLED; - } - pci_release_regions(pdev); - kfree(adapter->mbox_log); - synchronize_rcu(); - kfree(adapter); } #ifdef CONFIG_PCI_IOV else { cxgb4_iov_configure(adapter->pdev, 0); } #endif + iounmap(adapter->regs); + pci_disable_pcie_error_reporting(pdev); + if ((adapter->flags & DEV_ENABLED)) { + pci_disable_device(pdev); + adapter->flags &= ~DEV_ENABLED; + } + pci_release_regions(pdev); + kfree(adapter->mbox_log); + synchronize_rcu(); + kfree(adapter); } /* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt -- cgit v1.1 From e6f02a4d57cc438099bc8abfba43ba1400d77b38 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Thu, 15 Feb 2018 18:20:01 +0530 Subject: cxgb4: fix trailing zero in CIM LA dump Set correct size of the CIM LA dump for T6. Fixes: 27887bc7cb7f ("cxgb4: collect hardware LA dumps") Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 557fd8b..00a1d2d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -472,7 +472,7 @@ int cudbg_collect_cim_la(struct cudbg_init *pdbg_init, if (is_t6(padap->params.chip)) { size = padap->params.cim_la_size / 10 + 1; - size *= 11 * sizeof(u32); + size *= 10 * sizeof(u32); } else { size = padap->params.cim_la_size / 8; size *= 8 * sizeof(u32); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 30485f9..143686c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -102,7 +102,7 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) case CUDBG_CIM_LA: if (is_t6(adap->params.chip)) { len = adap->params.cim_la_size / 10 + 1; - len *= 11 * sizeof(u32); + len *= 10 * sizeof(u32); } else { len = adap->params.cim_la_size / 8; len *= 8 * sizeof(u32); -- cgit v1.1 From 7dcf688d4c78a18ba9538b2bf1b11dc7a43fe9be Mon Sep 17 00:00:00 2001 From: Casey Leedom Date: Thu, 15 Feb 2018 20:03:18 +0530 Subject: PCI/cxgb4: Extend T3 PCI quirk to T4+ devices We've run into a problem where our device is attached to a Virtual Machine and the use of the new pci_set_vpd_size() API doesn't help. The VM kernel has been informed that the accesses are okay, but all of the actual VPD Capability Accesses are trapped down into the KVM Hypervisor where it goes ahead and imposes the silent denials. The right idea is to follow the kernel.org commit 1c7de2b4ff88 ("PCI: Enable access to non-standard VPD for Chelsio devices (cxgb3)") which Alexey Kardashevskiy authored to establish a PCI Quirk for our T3-based adapters. This commit extends that PCI Quirk to cover Chelsio T4 devices and later. The advantage of this approach is that the VPD Size gets set early in the Base OS/Hypervisor Boot and doesn't require that the cxgb4 driver even be available in the Base OS/Hypervisor. Thus PF4 can be exported to a Virtual Machine and everything should work. Fixes: 67e658794ca1 ("cxgb4: Set VPD size so we can read both VPD structures") Cc: # v4.9+ Signed-off-by: Casey Leedom Signed-off-by: Arjun Vynipadath Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 10 -------- drivers/pci/quirks.c | 39 ++++++++++++++++++------------ 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 047609e..920bccd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -2637,7 +2637,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) } #define EEPROM_STAT_ADDR 0x7bfc -#define VPD_SIZE 0x800 #define VPD_BASE 0x400 #define VPD_BASE_OLD 0 #define VPD_LEN 1024 @@ -2704,15 +2703,6 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p) if (!vpd) return -ENOMEM; - /* We have two VPD data structures stored in the adapter VPD area. - * By default, Linux calculates the size of the VPD area by traversing - * the first VPD area at offset 0x0, so we need to tell the OS what - * our real VPD size is. - */ - ret = pci_set_vpd_size(adapter->pdev, VPD_SIZE); - if (ret < 0) - goto out; - /* Card information normally starts at VPD_BASE but early cards had * it at 0. */ diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index fc73401..8b14bd3 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3419,22 +3419,29 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PORT_RIDGE, static void quirk_chelsio_extend_vpd(struct pci_dev *dev) { - pci_set_vpd_size(dev, 8192); -} - -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x20, quirk_chelsio_extend_vpd); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x21, quirk_chelsio_extend_vpd); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x22, quirk_chelsio_extend_vpd); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x23, quirk_chelsio_extend_vpd); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x24, quirk_chelsio_extend_vpd); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x25, quirk_chelsio_extend_vpd); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x26, quirk_chelsio_extend_vpd); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x30, quirk_chelsio_extend_vpd); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x31, quirk_chelsio_extend_vpd); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x32, quirk_chelsio_extend_vpd); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x35, quirk_chelsio_extend_vpd); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x36, quirk_chelsio_extend_vpd); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, 0x37, quirk_chelsio_extend_vpd); + int chip = (dev->device & 0xf000) >> 12; + int func = (dev->device & 0x0f00) >> 8; + int prod = (dev->device & 0x00ff) >> 0; + + /* + * If this is a T3-based adapter, there's a 1KB VPD area at offset + * 0xc00 which contains the preferred VPD values. If this is a T4 or + * later based adapter, the special VPD is at offset 0x400 for the + * Physical Functions (the SR-IOV Virtual Functions have no VPD + * Capabilities). The PCI VPD Access core routines will normally + * compute the size of the VPD by parsing the VPD Data Structure at + * offset 0x000. This will result in silent failures when attempting + * to accesses these other VPD areas which are beyond those computed + * limits. + */ + if (chip == 0x0 && prod >= 0x20) + pci_set_vpd_size(dev, 8192); + else if (chip >= 0x4 && func < 0x8) + pci_set_vpd_size(dev, 2048); +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID, + quirk_chelsio_extend_vpd); #ifdef CONFIG_ACPI /* -- cgit v1.1 From dfec091439bb2acf763497cfc58f2bdfc67c56b7 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2018 16:59:49 +0100 Subject: dn_getsockoptdecnet: move nf_{get/set}sockopt outside sock lock After commit 3f34cfae1238 ("netfilter: on sockopt() acquire sock lock only in the required scope"), the caller of nf_{get/set}sockopt() must not hold any lock, but, in such changeset, I forgot to cope with DECnet. This commit addresses the issue moving the nf call outside the lock, in the dn_{get,set}sockopt() with the same schema currently used by ipv4 and ipv6. Also moves the unhandled sockopts of the end of the main switch statements, to improve code readability. Reported-by: Petr Vandrovec BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=198791#c2 Fixes: 3f34cfae1238 ("netfilter: on sockopt() acquire sock lock only in the required scope") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/decnet/af_decnet.c | 62 +++++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 91dd09f..791aff6 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1338,6 +1338,12 @@ static int dn_setsockopt(struct socket *sock, int level, int optname, char __use lock_sock(sk); err = __dn_setsockopt(sock, level, optname, optval, optlen, 0); release_sock(sk); +#ifdef CONFIG_NETFILTER + /* we need to exclude all possible ENOPROTOOPTs except default case */ + if (err == -ENOPROTOOPT && optname != DSO_LINKINFO && + optname != DSO_STREAM && optname != DSO_SEQPACKET) + err = nf_setsockopt(sk, PF_DECnet, optname, optval, optlen); +#endif return err; } @@ -1445,15 +1451,6 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation); break; - default: -#ifdef CONFIG_NETFILTER - return nf_setsockopt(sk, PF_DECnet, optname, optval, optlen); -#endif - case DSO_LINKINFO: - case DSO_STREAM: - case DSO_SEQPACKET: - return -ENOPROTOOPT; - case DSO_MAXWINDOW: if (optlen != sizeof(unsigned long)) return -EINVAL; @@ -1501,6 +1498,12 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us return -EINVAL; scp->info_loc = u.info; break; + + case DSO_LINKINFO: + case DSO_STREAM: + case DSO_SEQPACKET: + default: + return -ENOPROTOOPT; } return 0; @@ -1514,6 +1517,20 @@ static int dn_getsockopt(struct socket *sock, int level, int optname, char __use lock_sock(sk); err = __dn_getsockopt(sock, level, optname, optval, optlen, 0); release_sock(sk); +#ifdef CONFIG_NETFILTER + if (err == -ENOPROTOOPT && optname != DSO_STREAM && + optname != DSO_SEQPACKET && optname != DSO_CONACCEPT && + optname != DSO_CONREJECT) { + int len; + + if (get_user(len, optlen)) + return -EFAULT; + + err = nf_getsockopt(sk, PF_DECnet, optname, optval, &len); + if (err >= 0) + err = put_user(len, optlen); + } +#endif return err; } @@ -1579,26 +1596,6 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us r_data = &link; break; - default: -#ifdef CONFIG_NETFILTER - { - int ret, len; - - if (get_user(len, optlen)) - return -EFAULT; - - ret = nf_getsockopt(sk, PF_DECnet, optname, optval, &len); - if (ret >= 0) - ret = put_user(len, optlen); - return ret; - } -#endif - case DSO_STREAM: - case DSO_SEQPACKET: - case DSO_CONACCEPT: - case DSO_CONREJECT: - return -ENOPROTOOPT; - case DSO_MAXWINDOW: if (r_len > sizeof(unsigned long)) r_len = sizeof(unsigned long); @@ -1630,6 +1627,13 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us r_len = sizeof(unsigned char); r_data = &scp->info_rem; break; + + case DSO_STREAM: + case DSO_SEQPACKET: + case DSO_CONACCEPT: + case DSO_CONREJECT: + default: + return -ENOPROTOOPT; } if (r_data) { -- cgit v1.1 From da27988766e338e4a4fe198170497c0920395d4c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 16 Feb 2018 15:52:42 -0500 Subject: skbuff: Fix comment mis-spelling. 'peform' --> 'perform' Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5ebc0f8..c1e66bd 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3646,7 +3646,7 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, return true; } -/* For small packets <= CHECKSUM_BREAK peform checksum complete directly +/* For small packets <= CHECKSUM_BREAK perform checksum complete directly * in checksum_init. */ #define CHECKSUM_BREAK 76 -- cgit v1.1 From 15f35d49c93f4fa9875235e7bf3e3783d2dd7a1b Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 15 Feb 2018 20:18:43 +0300 Subject: udplite: fix partial checksum initialization Since UDP-Lite is always using checksum, the following path is triggered when calculating pseudo header for it: udp4_csum_init() or udp6_csum_init() skb_checksum_init_zero_check() __skb_checksum_validate_complete() The problem can appear if skb->len is less than CHECKSUM_BREAK. In this particular case __skb_checksum_validate_complete() also invokes __skb_checksum_complete(skb). If UDP-Lite is using partial checksum that covers only part of a packet, the function will return bad checksum and the packet will be dropped. It can be fixed if we skip skb_checksum_init_zero_check() and only set the required pseudo header checksum for UDP-Lite with partial checksum before udp4_csum_init()/udp6_csum_init() functions return. Fixes: ed70fcfcee95 ("net: Call skb_checksum_init in IPv4") Fixes: e4f45b7f40bd ("net: Call skb_checksum_init in IPv6") Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller --- include/net/udplite.h | 1 + net/ipv4/udp.c | 5 +++++ net/ipv6/ip6_checksum.c | 5 +++++ 3 files changed, 11 insertions(+) diff --git a/include/net/udplite.h b/include/net/udplite.h index 81bdbf9..9185e45 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -64,6 +64,7 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) UDP_SKB_CB(skb)->cscov = cscov; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; + skb->csum_valid = 0; } return 0; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bfaefe5..e5ef7c3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2024,6 +2024,11 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, err = udplite_checksum_init(skb, uh); if (err) return err; + + if (UDP_SKB_CB(skb)->partial_cov) { + skb->csum = inet_compute_pseudo(skb, proto); + return 0; + } } /* Note, we are only interested in != 0 or == 0, thus the diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index ec43d18..547515e 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -73,6 +73,11 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) err = udplite_checksum_init(skb, uh); if (err) return err; + + if (UDP_SKB_CB(skb)->partial_cov) { + skb->csum = ip6_compute_pseudo(skb, proto); + return 0; + } } /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels) -- cgit v1.1 From 43a08e0f58b3f236165029710a4e3b303815253b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Feb 2018 14:47:15 -0800 Subject: tun: fix tun_napi_alloc_frags() frag allocator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While fuzzing arm64 v4.16-rc1 with Syzkaller, I've been hitting a misaligned atomic in __skb_clone:         atomic_inc(&(skb_shinfo(skb)->dataref)); where dataref doesn't have the required natural alignment, and the atomic operation faults. e.g. i often see it aligned to a single byte boundary rather than a four byte boundary. AFAICT, the skb_shared_info is misaligned at the instant it's allocated in __napi_alloc_skb() __napi_alloc_skb() Problem is caused by tun_napi_alloc_frags() using napi_alloc_frag() with user provided seg sizes, leading to other users of this API getting unaligned page fragments. Since we would like to not necessarily add paddings or alignments to the frags that tun_napi_alloc_frags() attaches to the skb, switch to another page frag allocator. As a bonus skb_page_frag_refill() can use GFP_KERNEL allocations, meaning that we can not deplete memory reserves as easily. Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") Signed-off-by: Eric Dumazet Reported-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: David S. Miller --- drivers/net/tun.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 81e6cc9..b52258c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1489,27 +1489,23 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile, skb->truesize += skb->data_len; for (i = 1; i < it->nr_segs; i++) { + struct page_frag *pfrag = ¤t->task_frag; size_t fragsz = it->iov[i].iov_len; - unsigned long offset; - struct page *page; - void *data; if (fragsz == 0 || fragsz > PAGE_SIZE) { err = -EINVAL; goto free; } - local_bh_disable(); - data = napi_alloc_frag(fragsz); - local_bh_enable(); - if (!data) { + if (!skb_page_frag_refill(fragsz, pfrag, GFP_KERNEL)) { err = -ENOMEM; goto free; } - page = virt_to_head_page(data); - offset = data - page_address(page); - skb_fill_page_desc(skb, i - 1, page, offset, fragsz); + skb_fill_page_desc(skb, i - 1, pfrag->page, + pfrag->offset, fragsz); + page_ref_inc(pfrag->page); + pfrag->offset += fragsz; } return skb; -- cgit v1.1 From a16b8d0cf2ec1e626d24bc2a7b9e64ace6f7501d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 15 Feb 2018 22:59:00 +0000 Subject: rxrpc: Work around usercopy check Due to a check recently added to copy_to_user(), it's now not permitted to copy from slab-held data to userspace unless the slab is whitelisted. This affects rxrpc_recvmsg() when it attempts to place an RXRPC_USER_CALL_ID control message in the userspace control message buffer. A warning is generated by usercopy_warn() because the source is the copy of the user_call_ID retained in the rxrpc_call struct. Work around the issue by copying the user_call_ID to a variable on the stack and passing that to put_cmsg(). The warning generated looks like: Bad or missing usercopy whitelist? Kernel memory exposure attempt detected from SLUB object 'dmaengine-unmap-128' (offset 680, size 8)! WARNING: CPU: 0 PID: 1401 at mm/usercopy.c:81 usercopy_warn+0x7e/0xa0 ... RIP: 0010:usercopy_warn+0x7e/0xa0 ... Call Trace: __check_object_size+0x9c/0x1a0 put_cmsg+0x98/0x120 rxrpc_recvmsg+0x6fc/0x1010 [rxrpc] ? finish_wait+0x80/0x80 ___sys_recvmsg+0xf8/0x240 ? __clear_rsb+0x25/0x3d ? __clear_rsb+0x15/0x3d ? __clear_rsb+0x25/0x3d ? __clear_rsb+0x15/0x3d ? __clear_rsb+0x25/0x3d ? __clear_rsb+0x15/0x3d ? __clear_rsb+0x25/0x3d ? __clear_rsb+0x15/0x3d ? finish_task_switch+0xa6/0x2b0 ? trace_hardirqs_on_caller+0xed/0x180 ? _raw_spin_unlock_irq+0x29/0x40 ? __sys_recvmsg+0x4e/0x90 __sys_recvmsg+0x4e/0x90 do_syscall_64+0x7a/0x220 entry_SYSCALL_64_after_hwframe+0x26/0x9b Reported-by: Jonathan Billings Signed-off-by: David Howells Acked-by: Kees Cook Tested-by: Jonathan Billings Signed-off-by: David S. Miller --- net/rxrpc/recvmsg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index cc21e8d..9d45d8b 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -517,9 +517,10 @@ try_again: ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, sizeof(unsigned int), &id32); } else { + unsigned long idl = call->user_call_ID; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, - sizeof(unsigned long), - &call->user_call_ID); + sizeof(unsigned long), &idl); } if (ret < 0) goto error_unlock_call; -- cgit v1.1 From 27d4ee03078aba88c5e07dcc4917e8d01d046f38 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 11 Feb 2018 10:38:28 +0100 Subject: workqueue: Allow retrieval of current task's work struct Introduce a helper to retrieve the current task's work struct if it is a workqueue worker. This allows us to fix a long-standing deadlock in several DRM drivers wherein the ->runtime_suspend callback waits for a specific worker to finish and that worker in turn calls a function which waits for runtime suspend to finish. That function is invoked from multiple call sites and waiting for runtime suspend to finish is the correct thing to do except if it's executing in the context of the worker. Cc: Lai Jiangshan Cc: Dave Airlie Cc: Ben Skeggs Cc: Alex Deucher Acked-by: Tejun Heo Reviewed-by: Lyude Paul Signed-off-by: Lukas Wunner Link: https://patchwork.freedesktop.org/patch/msgid/2d8f603074131eb87e588d2b803a71765bd3a2fd.1518338788.git.lukas@wunner.de --- include/linux/workqueue.h | 1 + kernel/workqueue.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 4a54ef9..bc0cda1 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -465,6 +465,7 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, int max_active); +extern struct work_struct *current_work(void); extern bool current_is_workqueue_rescuer(void); extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); extern unsigned int work_busy(struct work_struct *work); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 43d18cb..255c20e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4168,6 +4168,22 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) EXPORT_SYMBOL_GPL(workqueue_set_max_active); /** + * current_work - retrieve %current task's work struct + * + * Determine if %current task is a workqueue worker and what it's working on. + * Useful to find out the context that the %current task is running in. + * + * Return: work struct if %current task is a workqueue worker, %NULL otherwise. + */ +struct work_struct *current_work(void) +{ + struct worker *worker = current_wq_worker(); + + return worker ? worker->current_work : NULL; +} +EXPORT_SYMBOL(current_work); + +/** * current_is_workqueue_rescuer - is %current workqueue rescuer? * * Determine whether %current is a workqueue rescuer. Can be used from -- cgit v1.1 From 25c058ccaf2ebbc3e250ec1e199e161f91fe27d4 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 14 Feb 2018 06:41:25 +0100 Subject: drm: Allow determining if current task is output poll worker Introduce a helper to determine if the current task is an output poll worker. This allows us to fix a long-standing deadlock in several DRM drivers wherein the ->runtime_suspend callback waits for the output poll worker to finish and the worker in turn calls a ->detect callback which waits for runtime suspend to finish. The ->detect callback is invoked from multiple call sites and waiting for runtime suspend to finish is the correct thing to do except if it's executing in the context of the worker. v2: Expand kerneldoc to specifically mention deadlock between output poll worker and autosuspend worker as use case. (Lyude) Cc: Dave Airlie Cc: Ben Skeggs Cc: Alex Deucher Reviewed-by: Lyude Paul Signed-off-by: Lukas Wunner Link: https://patchwork.freedesktop.org/patch/msgid/3549ce32e7f1467102e70d3e9cbf70c46bfe108e.1518593424.git.lukas@wunner.de --- drivers/gpu/drm/drm_probe_helper.c | 20 ++++++++++++++++++++ include/drm/drm_crtc_helper.h | 1 + 2 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index 6dc2dde..7a6b2dc 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -655,6 +655,26 @@ out: } /** + * drm_kms_helper_is_poll_worker - is %current task an output poll worker? + * + * Determine if %current task is an output poll worker. This can be used + * to select distinct code paths for output polling versus other contexts. + * + * One use case is to avoid a deadlock between the output poll worker and + * the autosuspend worker wherein the latter waits for polling to finish + * upon calling drm_kms_helper_poll_disable(), while the former waits for + * runtime suspend to finish upon calling pm_runtime_get_sync() in a + * connector ->detect hook. + */ +bool drm_kms_helper_is_poll_worker(void) +{ + struct work_struct *work = current_work(); + + return work && work->func == output_poll_execute; +} +EXPORT_SYMBOL(drm_kms_helper_is_poll_worker); + +/** * drm_kms_helper_poll_disable - disable output polling * @dev: drm_device * diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h index 76e237b..6914633 100644 --- a/include/drm/drm_crtc_helper.h +++ b/include/drm/drm_crtc_helper.h @@ -77,5 +77,6 @@ void drm_kms_helper_hotplug_event(struct drm_device *dev); void drm_kms_helper_poll_disable(struct drm_device *dev); void drm_kms_helper_poll_enable(struct drm_device *dev); +bool drm_kms_helper_is_poll_worker(void); #endif -- cgit v1.1 From d61a5c1063515e855bedb1b81e20e50b0ac3541e Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 11 Feb 2018 10:38:28 +0100 Subject: drm/nouveau: Fix deadlock on runtime suspend nouveau's ->runtime_suspend hook calls drm_kms_helper_poll_disable(), which waits for the output poll worker to finish if it's running. The output poll worker meanwhile calls pm_runtime_get_sync() in nouveau_connector_detect() which waits for the ongoing suspend to finish, causing a deadlock. Fix by not acquiring a runtime PM ref if nouveau_connector_detect() is called in the output poll worker's context. This is safe because the poll worker is only enabled while runtime active and we know that ->runtime_suspend waits for it to finish. Other contexts calling nouveau_connector_detect() do require a runtime PM ref, these comprise: status_store() drm sysfs interface ->fill_modes drm callback drm_fb_helper_probe_connector_modes() drm_mode_getconnector() nouveau_connector_hotplug() nouveau_display_hpd_work() nv17_tv_set_property() Stack trace for posterity: INFO: task kworker/0:1:58 blocked for more than 120 seconds. Workqueue: events output_poll_execute [drm_kms_helper] Call Trace: schedule+0x28/0x80 rpm_resume+0x107/0x6e0 __pm_runtime_resume+0x47/0x70 nouveau_connector_detect+0x7e/0x4a0 [nouveau] nouveau_connector_detect_lvds+0x132/0x180 [nouveau] drm_helper_probe_detect_ctx+0x85/0xd0 [drm_kms_helper] output_poll_execute+0x11e/0x1c0 [drm_kms_helper] process_one_work+0x184/0x380 worker_thread+0x2e/0x390 INFO: task kworker/0:2:252 blocked for more than 120 seconds. Workqueue: pm pm_runtime_work Call Trace: schedule+0x28/0x80 schedule_timeout+0x1e3/0x370 wait_for_completion+0x123/0x190 flush_work+0x142/0x1c0 nouveau_pmops_runtime_suspend+0x7e/0xd0 [nouveau] pci_pm_runtime_suspend+0x5c/0x180 vga_switcheroo_runtime_suspend+0x1e/0xa0 __rpm_callback+0xc1/0x200 rpm_callback+0x1f/0x70 rpm_suspend+0x13c/0x640 pm_runtime_work+0x6e/0x90 process_one_work+0x184/0x380 worker_thread+0x2e/0x390 Bugzilla: https://bugs.archlinux.org/task/53497 Bugzilla: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=870523 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70388#c33 Fixes: 5addcf0a5f0f ("nouveau: add runtime PM support (v0.9)") Cc: stable@vger.kernel.org # v3.12+: 27d4ee03078a: workqueue: Allow retrieval of current task's work struct Cc: stable@vger.kernel.org # v3.12+: 25c058ccaf2e: drm: Allow determining if current task is output poll worker Cc: Ben Skeggs Cc: Dave Airlie Reviewed-by: Lyude Paul Signed-off-by: Lukas Wunner Link: https://patchwork.freedesktop.org/patch/msgid/b7d2cbb609a80f59ccabfdf479b9d5907c603ea1.1518338789.git.lukas@wunner.de --- drivers/gpu/drm/nouveau/nouveau_connector.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 69d6e61..6ed9cb0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -570,9 +570,15 @@ nouveau_connector_detect(struct drm_connector *connector, bool force) nv_connector->edid = NULL; } - ret = pm_runtime_get_sync(connector->dev->dev); - if (ret < 0 && ret != -EACCES) - return conn_status; + /* Outputs are only polled while runtime active, so acquiring a + * runtime PM ref here is unnecessary (and would deadlock upon + * runtime suspend because it waits for polling to finish). + */ + if (!drm_kms_helper_is_poll_worker()) { + ret = pm_runtime_get_sync(connector->dev->dev); + if (ret < 0 && ret != -EACCES) + return conn_status; + } nv_encoder = nouveau_connector_ddc_detect(connector); if (nv_encoder && (i2c = nv_encoder->i2c) != NULL) { @@ -647,8 +653,10 @@ detect_analog: out: - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } return conn_status; } -- cgit v1.1 From 9ab2323ca184168c288f7355fc19ec0838efc20c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 16 Feb 2018 17:18:33 +0800 Subject: sctp: remove the left unnecessary check for chunk in sctp_renege_events Commit fb23403536ea ("sctp: remove the useless check in sctp_renege_events") forgot to remove another check for chunk in sctp_renege_events. Dan found this when doing a static check. This patch is to remove that check, and also to merge two checks into one 'if statement'. Fixes: fb23403536ea ("sctp: remove the useless check in sctp_renege_events") Reported-by: Dan Carpenter Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/stream_interleave.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c index 65ac03b..d3764c1 100644 --- a/net/sctp/stream_interleave.c +++ b/net/sctp/stream_interleave.c @@ -968,9 +968,8 @@ static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, needed); } - if (chunk && freed >= needed) - if (sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0) - sctp_intl_start_pd(ulpq, gfp); + if (freed >= needed && sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0) + sctp_intl_start_pd(ulpq, gfp); sk_mem_reclaim(asoc->base.sk); } -- cgit v1.1 From 15734feff2bdac24aa3266c437cffa42851990e3 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 11 Feb 2018 10:38:28 +0100 Subject: drm/radeon: Fix deadlock on runtime suspend radeon's ->runtime_suspend hook calls drm_kms_helper_poll_disable(), which waits for the output poll worker to finish if it's running. The output poll worker meanwhile calls pm_runtime_get_sync() in radeon's ->detect hooks, which waits for the ongoing suspend to finish, causing a deadlock. Fix by not acquiring a runtime PM ref if the ->detect hooks are called in the output poll worker's context. This is safe because the poll worker is only enabled while runtime active and we know that ->runtime_suspend waits for it to finish. Stack trace for posterity: INFO: task kworker/0:3:31847 blocked for more than 120 seconds Workqueue: events output_poll_execute [drm_kms_helper] Call Trace: schedule+0x3c/0x90 rpm_resume+0x1e2/0x690 __pm_runtime_resume+0x3f/0x60 radeon_lvds_detect+0x39/0xf0 [radeon] output_poll_execute+0xda/0x1e0 [drm_kms_helper] process_one_work+0x14b/0x440 worker_thread+0x48/0x4a0 INFO: task kworker/2:0:10493 blocked for more than 120 seconds. Workqueue: pm pm_runtime_work Call Trace: schedule+0x3c/0x90 schedule_timeout+0x1b3/0x240 wait_for_common+0xc2/0x180 wait_for_completion+0x1d/0x20 flush_work+0xfc/0x1a0 __cancel_work_timer+0xa5/0x1d0 cancel_delayed_work_sync+0x13/0x20 drm_kms_helper_poll_disable+0x1f/0x30 [drm_kms_helper] radeon_pmops_runtime_suspend+0x3d/0xa0 [radeon] pci_pm_runtime_suspend+0x61/0x1a0 vga_switcheroo_runtime_suspend+0x21/0x70 __rpm_callback+0x32/0x70 rpm_callback+0x24/0x80 rpm_suspend+0x12b/0x640 pm_runtime_work+0x6f/0xb0 process_one_work+0x14b/0x440 worker_thread+0x48/0x4a0 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94147 Fixes: 10ebc0bc0934 ("drm/radeon: add runtime PM support (v2)") Cc: stable@vger.kernel.org # v3.13+: 27d4ee03078a: workqueue: Allow retrieval of current task's work struct Cc: stable@vger.kernel.org # v3.13+: 25c058ccaf2e: drm: Allow determining if current task is output poll worker Cc: Ismo Toijala Cc: Alex Deucher Cc: Dave Airlie Reviewed-by: Lyude Paul Signed-off-by: Lukas Wunner Link: https://patchwork.freedesktop.org/patch/msgid/64ea02c44f91dda19bc563902b97bbc699040392.1518338789.git.lukas@wunner.de --- drivers/gpu/drm/radeon/radeon_connectors.c | 74 ++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 59dcefb..30e1296 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -900,9 +900,11 @@ radeon_lvds_detect(struct drm_connector *connector, bool force) enum drm_connector_status ret = connector_status_disconnected; int r; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } if (encoder) { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); @@ -925,8 +927,12 @@ radeon_lvds_detect(struct drm_connector *connector, bool force) /* check acpi lid status ??? */ radeon_connector_update_scratch_regs(connector, ret); - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } + return ret; } @@ -1040,9 +1046,11 @@ radeon_vga_detect(struct drm_connector *connector, bool force) enum drm_connector_status ret = connector_status_disconnected; int r; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } encoder = radeon_best_single_encoder(connector); if (!encoder) @@ -1109,8 +1117,10 @@ radeon_vga_detect(struct drm_connector *connector, bool force) radeon_connector_update_scratch_regs(connector, ret); out: - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } return ret; } @@ -1174,9 +1184,11 @@ radeon_tv_detect(struct drm_connector *connector, bool force) if (!radeon_connector->dac_load_detect) return ret; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } encoder = radeon_best_single_encoder(connector); if (!encoder) @@ -1188,8 +1200,12 @@ radeon_tv_detect(struct drm_connector *connector, bool force) if (ret == connector_status_connected) ret = radeon_connector_analog_encoder_conflict_solve(connector, encoder, ret, false); radeon_connector_update_scratch_regs(connector, ret); - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } + return ret; } @@ -1252,9 +1268,11 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) enum drm_connector_status ret = connector_status_disconnected; bool dret = false, broken_edid = false; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } if (radeon_connector->detected_hpd_without_ddc) { force = true; @@ -1437,8 +1455,10 @@ out: } exit: - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } return ret; } @@ -1689,9 +1709,11 @@ radeon_dp_detect(struct drm_connector *connector, bool force) if (radeon_dig_connector->is_mst) return connector_status_disconnected; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } if (!force && radeon_check_hpd_status_unchanged(connector)) { ret = connector->status; @@ -1778,8 +1800,10 @@ radeon_dp_detect(struct drm_connector *connector, bool force) } out: - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } return ret; } -- cgit v1.1 From aa0aad57909eb321746325951d66af88a83bc956 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 11 Feb 2018 10:38:28 +0100 Subject: drm/amdgpu: Fix deadlock on runtime suspend amdgpu's ->runtime_suspend hook calls drm_kms_helper_poll_disable(), which waits for the output poll worker to finish if it's running. The output poll worker meanwhile calls pm_runtime_get_sync() in amdgpu's ->detect hooks, which waits for the ongoing suspend to finish, causing a deadlock. Fix by not acquiring a runtime PM ref if the ->detect hooks are called in the output poll worker's context. This is safe because the poll worker is only enabled while runtime active and we know that ->runtime_suspend waits for it to finish. Fixes: d38ceaf99ed0 ("drm/amdgpu: add core driver (v4)") Cc: stable@vger.kernel.org # v4.2+: 27d4ee03078a: workqueue: Allow retrieval of current task's work struct Cc: stable@vger.kernel.org # v4.2+: 25c058ccaf2e: drm: Allow determining if current task is output poll worker Cc: Alex Deucher Tested-by: Mike Lothian Reviewed-by: Lyude Paul Signed-off-by: Lukas Wunner Link: https://patchwork.freedesktop.org/patch/msgid/4c9bf72aacae1eef062bd134cd112e0770a7f121.1518338789.git.lukas@wunner.de --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 58 +++++++++++++++++--------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index df9cbc7..21e7ae1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -737,9 +737,11 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force) enum drm_connector_status ret = connector_status_disconnected; int r; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } if (encoder) { struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); @@ -758,8 +760,12 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force) /* check acpi lid status ??? */ amdgpu_connector_update_scratch_regs(connector, ret); - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } + return ret; } @@ -869,9 +875,11 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force) enum drm_connector_status ret = connector_status_disconnected; int r; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } encoder = amdgpu_connector_best_single_encoder(connector); if (!encoder) @@ -925,8 +933,10 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force) amdgpu_connector_update_scratch_regs(connector, ret); out: - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } return ret; } @@ -989,9 +999,11 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) enum drm_connector_status ret = connector_status_disconnected; bool dret = false, broken_edid = false; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) { ret = connector->status; @@ -1116,8 +1128,10 @@ out: amdgpu_connector_update_scratch_regs(connector, ret); exit: - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } return ret; } @@ -1360,9 +1374,11 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force) struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector); int r; - r = pm_runtime_get_sync(connector->dev->dev); - if (r < 0) - return connector_status_disconnected; + if (!drm_kms_helper_is_poll_worker()) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + } if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) { ret = connector->status; @@ -1430,8 +1446,10 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force) amdgpu_connector_update_scratch_regs(connector, ret); out: - pm_runtime_mark_last_busy(connector->dev->dev); - pm_runtime_put_autosuspend(connector->dev->dev); + if (!drm_kms_helper_is_poll_worker()) { + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } return ret; } -- cgit v1.1 From 6137e4166004e2ec383ac05d5ca15831f4668806 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 14 Feb 2018 16:12:54 -0800 Subject: xtensa: support DMA buffers in high memory If a DMA buffer is allocated in high memory and kernel mapping is required use dma_common_contiguous_remap to map buffer to the vmalloc region and dma_common_free_remap to unmap it. Signed-off-by: Max Filippov --- arch/xtensa/kernel/pci-dma.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 623720a..732631c 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -16,6 +16,7 @@ */ #include +#include #include #include #include @@ -123,7 +124,7 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size, unsigned long attrs) { unsigned long ret; - unsigned long uncached = 0; + unsigned long uncached; unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct page *page = NULL; @@ -144,15 +145,27 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size, if (!page) return NULL; - ret = (unsigned long)page_address(page); + *handle = phys_to_dma(dev, page_to_phys(page)); - /* We currently don't support coherent memory outside KSEG */ +#ifdef CONFIG_MMU + if (PageHighMem(page)) { + void *p; + p = dma_common_contiguous_remap(page, size, VM_MAP, + pgprot_noncached(PAGE_KERNEL), + __builtin_return_address(0)); + if (!p) { + if (!dma_release_from_contiguous(dev, page, count)) + __free_pages(page, get_order(size)); + } + return p; + } +#endif + ret = (unsigned long)page_address(page); BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR || ret > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1); uncached = ret + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR; - *handle = virt_to_bus((void *)ret); __invalidate_dcache_range(ret, size); return (void *)uncached; @@ -161,13 +174,20 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size, static void xtensa_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { - unsigned long addr = (unsigned long)vaddr + - XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR; - struct page *page = virt_to_page(addr); unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - - BUG_ON(addr < XCHAL_KSEG_CACHED_VADDR || - addr > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1); + unsigned long addr = (unsigned long)vaddr; + struct page *page; + + if (addr >= XCHAL_KSEG_BYPASS_VADDR && + addr - XCHAL_KSEG_BYPASS_VADDR < XCHAL_KSEG_SIZE) { + addr += XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR; + page = virt_to_page(addr); + } else { +#ifdef CONFIG_MMU + dma_common_free_remap(vaddr, size, VM_MAP); +#endif + page = pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_handle))); + } if (!dma_release_from_contiguous(dev, page, count)) __free_pages(page, get_order(size)); -- cgit v1.1 From 1d91c1d2c80cb70e2e553845e278b87a960c04da Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 16 Feb 2018 13:20:42 -0800 Subject: nospec: Kill array_index_nospec_mask_check() There are multiple problems with the dynamic sanity checking in array_index_nospec_mask_check(): * It causes unnecessary overhead in the 32-bit case since integer sized @index values will no longer cause the check to be compiled away like in the 64-bit case. * In the 32-bit case it may trigger with user controllable input when the expectation is that should only trigger during development of new kernel enabling. * The macro reuses the input parameter in multiple locations which is broken if someone passes an expression like 'index++' to array_index_nospec(). Reported-by: Linus Torvalds Signed-off-by: Dan Williams Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arch@vger.kernel.org Link: http://lkml.kernel.org/r/151881604278.17395.6605847763178076520.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- include/linux/nospec.h | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/include/linux/nospec.h b/include/linux/nospec.h index fbc98e2..d6701e3 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -30,26 +30,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #endif /* - * Warn developers about inappropriate array_index_nospec() usage. - * - * Even if the CPU speculates past the WARN_ONCE branch, the - * sign bit of @index is taken into account when generating the - * mask. - * - * This warning is compiled out when the compiler can infer that - * @index and @size are less than LONG_MAX. - */ -#define array_index_mask_nospec_check(index, size) \ -({ \ - if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \ - "array_index_nospec() limited to range of [0, LONG_MAX]\n")) \ - _mask = 0; \ - else \ - _mask = array_index_mask_nospec(index, size); \ - _mask; \ -}) - -/* * array_index_nospec - sanitize an array index after a bounds check * * For a code sequence like: @@ -67,7 +47,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, ({ \ typeof(index) _i = (index); \ typeof(size) _s = (size); \ - unsigned long _mask = array_index_mask_nospec_check(_i, _s); \ + unsigned long _mask = array_index_mask_nospec(_i, _s); \ \ BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ -- cgit v1.1 From b98c6a160a057d5686a8c54c79cc6c8c94a7d0c8 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 16 Feb 2018 13:20:48 -0800 Subject: nospec: Allow index argument to have const-qualified type The last expression in a statement expression need not be a bare variable, quoting gcc docs The last thing in the compound statement should be an expression followed by a semicolon; the value of this subexpression serves as the value of the entire construct. and we already use that in e.g. the min/max macros which end with a ternary expression. This way, we can allow index to have const-qualified type, which will in some cases avoid the need for introducing a local copy of index of non-const qualified type. That, in turn, can prevent readers not familiar with the internals of array_index_nospec from wondering about the seemingly redundant extra variable, and I think that's worthwhile considering how confusing the whole _nospec business is. The expression _i&_mask has type unsigned long (since that is the type of _mask, and the BUILD_BUG_ONs guarantee that _i will get promoted to that), so in order not to change the type of the whole expression, add a cast back to typeof(_i). Signed-off-by: Rasmus Villemoes Signed-off-by: Dan Williams Acked-by: Linus Torvalds Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arch@vger.kernel.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/151881604837.17395.10812767547837568328.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- include/linux/nospec.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/nospec.h b/include/linux/nospec.h index d6701e3..172a19d 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -52,7 +52,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ \ - _i &= _mask; \ - _i; \ + (typeof(_i)) (_i & _mask); \ }) #endif /* _LINUX_NOSPEC_H */ -- cgit v1.1 From eb6174f6d1be16b19cfa43dac296bfed003ce1a6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 16 Feb 2018 13:20:54 -0800 Subject: nospec: Include dependency The nospec.h header expects the per-architecture header file to optionally define array_index_mask_nospec(). Include that dependency to prevent inadvertent fallback to the default array_index_mask_nospec() implementation. The default implementation may not provide a full mitigation on architectures that perform data value speculation. Reported-by: Christian Borntraeger Signed-off-by: Dan Williams Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arch@vger.kernel.org Link: http://lkml.kernel.org/r/151881605404.17395.1341935530792574707.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- include/linux/nospec.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/nospec.h b/include/linux/nospec.h index 172a19d..e791ebc 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -5,6 +5,7 @@ #ifndef _LINUX_NOSPEC_H #define _LINUX_NOSPEC_H +#include /** * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise -- cgit v1.1 From 3f1f576a195aa266813cbd4ca70291deb61e0129 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Feb 2018 12:26:38 +0100 Subject: x86/microcode: Propagate return value from updating functions ... so that callers can know when microcode was updated and act accordingly. Tested-by: Ashok Raj Signed-off-by: Borislav Petkov Reviewed-by: Ashok Raj Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180216112640.11554-2-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/microcode.h | 9 +++++++-- arch/x86/kernel/cpu/microcode/amd.c | 10 +++++----- arch/x86/kernel/cpu/microcode/core.c | 33 +++++++++++++++++---------------- arch/x86/kernel/cpu/microcode/intel.c | 10 +++++----- 4 files changed, 34 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 55520cec..7fb1047 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -37,7 +37,12 @@ struct cpu_signature { struct device; -enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; +enum ucode_state { + UCODE_OK = 0, + UCODE_UPDATED, + UCODE_NFOUND, + UCODE_ERROR, +}; struct microcode_ops { enum ucode_state (*request_microcode_user) (int cpu, @@ -54,7 +59,7 @@ struct microcode_ops { * are being called. * See also the "Synchronization" section in microcode_core.c. */ - int (*apply_microcode) (int cpu); + enum ucode_state (*apply_microcode) (int cpu); int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); }; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 330b846..a998e1a 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, return patch_size; } -static int apply_microcode_amd(int cpu) +static enum ucode_state apply_microcode_amd(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct microcode_amd *mc_amd; @@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu) p = find_patch(cpu); if (!p) - return 0; + return UCODE_NFOUND; mc_amd = p->data; uci->mc = p->data; @@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu) if (rev >= mc_amd->hdr.patch_id) { c->microcode = rev; uci->cpu_sig.rev = rev; - return 0; + return UCODE_OK; } if (__apply_microcode_amd(mc_amd)) { pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); - return -1; + return UCODE_ERROR; } pr_info("CPU%d: new patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); @@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu) uci->cpu_sig.rev = mc_amd->hdr.patch_id; c->microcode = mc_amd->hdr.patch_id; - return 0; + return UCODE_UPDATED; } static int install_equiv_cpu_table(const u8 *buf) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 319dd65..6fdaf7c 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -374,7 +374,7 @@ static int collect_cpu_info(int cpu) } struct apply_microcode_ctx { - int err; + enum ucode_state err; }; static void apply_microcode_local(void *arg) @@ -489,31 +489,29 @@ static void __exit microcode_dev_exit(void) /* fake device for request_firmware */ static struct platform_device *microcode_pdev; -static int reload_for_cpu(int cpu) +static enum ucode_state reload_for_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; enum ucode_state ustate; - int err = 0; if (!uci->valid) - return err; + return UCODE_OK; ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); - if (ustate == UCODE_OK) - apply_microcode_on_target(cpu); - else - if (ustate == UCODE_ERROR) - err = -EINVAL; - return err; + if (ustate != UCODE_OK) + return ustate; + + return apply_microcode_on_target(cpu); } static ssize_t reload_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { + enum ucode_state tmp_ret = UCODE_OK; unsigned long val; + ssize_t ret = 0; int cpu; - ssize_t ret = 0, tmp_ret; ret = kstrtoul(buf, 0, &val); if (ret) @@ -526,15 +524,18 @@ static ssize_t reload_store(struct device *dev, mutex_lock(µcode_mutex); for_each_online_cpu(cpu) { tmp_ret = reload_for_cpu(cpu); - if (tmp_ret != 0) + if (tmp_ret > UCODE_NFOUND) { pr_warn("Error reloading microcode on CPU %d\n", cpu); - /* save retval of the first encountered reload error */ - if (!ret) - ret = tmp_ret; + /* set retval for the first encountered reload error */ + if (!ret) + ret = -EINVAL; + } } - if (!ret) + + if (!ret && tmp_ret == UCODE_UPDATED) perf_check_microcode(); + mutex_unlock(µcode_mutex); put_online_cpus(); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index a15db2b..923054a 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -772,7 +772,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) return 0; } -static int apply_microcode_intel(int cpu) +static enum ucode_state apply_microcode_intel(int cpu) { struct microcode_intel *mc; struct ucode_cpu_info *uci; @@ -782,7 +782,7 @@ static int apply_microcode_intel(int cpu) /* We should bind the task to the CPU */ if (WARN_ON(raw_smp_processor_id() != cpu)) - return -1; + return UCODE_ERROR; uci = ucode_cpu_info + cpu; mc = uci->mc; @@ -790,7 +790,7 @@ static int apply_microcode_intel(int cpu) /* Look for a newer patch in our cache: */ mc = find_patch(uci); if (!mc) - return 0; + return UCODE_NFOUND; } /* write microcode via MSR 0x79 */ @@ -801,7 +801,7 @@ static int apply_microcode_intel(int cpu) if (rev != mc->hdr.rev) { pr_err("CPU%d update to revision 0x%x failed\n", cpu, mc->hdr.rev); - return -1; + return UCODE_ERROR; } if (rev != prev_rev) { @@ -818,7 +818,7 @@ static int apply_microcode_intel(int cpu) uci->cpu_sig.rev = rev; c->microcode = rev; - return 0; + return UCODE_UPDATED; } static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, -- cgit v1.1 From 1008c52c09dcb23d93f8e0ea83a6246265d2cce0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Feb 2018 12:26:39 +0100 Subject: x86/CPU: Add a microcode loader callback Add a callback function which the microcode loader calls when microcode has been updated to a newer revision. Do the callback only when no error was encountered during loading. Tested-by: Ashok Raj Signed-off-by: Borislav Petkov Reviewed-by: Ashok Raj Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180216112640.11554-3-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/common.c | 10 ++++++++++ arch/x86/kernel/cpu/microcode/core.c | 8 ++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1bd9ed8..b0ccd48 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -977,4 +977,5 @@ bool xen_set_default_idle(void); void stop_this_cpu(void *dummy); void df_debug(struct pt_regs *regs, long error_code); +void microcode_check(void); #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 824aee0..84f1cd8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1749,3 +1749,13 @@ static int __init init_cpu_syscore(void) return 0; } core_initcall(init_cpu_syscore); + +/* + * The microcode loader calls this upon late microcode load to recheck features, + * only when microcode has been updated. Caller holds microcode_mutex and CPU + * hotplug lock. + */ +void microcode_check(void) +{ + perf_check_microcode(); +} diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 6fdaf7c..aa1b9a4 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -509,6 +509,7 @@ static ssize_t reload_store(struct device *dev, const char *buf, size_t size) { enum ucode_state tmp_ret = UCODE_OK; + bool do_callback = false; unsigned long val; ssize_t ret = 0; int cpu; @@ -531,10 +532,13 @@ static ssize_t reload_store(struct device *dev, if (!ret) ret = -EINVAL; } + + if (tmp_ret == UCODE_UPDATED) + do_callback = true; } - if (!ret && tmp_ret == UCODE_UPDATED) - perf_check_microcode(); + if (!ret && do_callback) + microcode_check(); mutex_unlock(µcode_mutex); put_online_cpus(); -- cgit v1.1 From 42ca8082e260dcfd8afa2afa6ec1940b9d41724c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Feb 2018 12:26:40 +0100 Subject: x86/CPU: Check CPU feature bits after microcode upgrade With some microcode upgrades, new CPUID features can become visible on the CPU. Check what the kernel has mirrored now and issue a warning hinting at possible things the user/admin can do to make use of the newly visible features. Originally-by: Ashok Raj Tested-by: Ashok Raj Signed-off-by: Borislav Petkov Reviewed-by: Ashok Raj Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180216112640.11554-4-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 84f1cd8..348cf48 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1757,5 +1757,25 @@ core_initcall(init_cpu_syscore); */ void microcode_check(void) { + struct cpuinfo_x86 info; + perf_check_microcode(); + + /* Reload CPUID max function as it might've changed. */ + info.cpuid_level = cpuid_eax(0); + + /* + * Copy all capability leafs to pick up the synthetic ones so that + * memcmp() below doesn't fail on that. The ones coming from CPUID will + * get overwritten in get_cpu_cap(). + */ + memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)); + + get_cpu_cap(&info); + + if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability))) + return; + + pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n"); + pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); } -- cgit v1.1 From 9e809d15d6b692fa061d74be7aaab1c79f6784b8 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Wed, 14 Feb 2018 18:59:23 +0100 Subject: x86/entry: Reduce the code footprint of the 'idtentry' macro Play a little trick in the generic PUSH_AND_CLEAR_REGS macro to insert the GP registers "above" the original return address. This allows us to (re-)insert the macro in error_entry() and paranoid_entry() and to remove it from the idtentry macro. This reduces the static footprint significantly: text data bss dec hex filename 24307 0 0 24307 5ef3 entry_64.o-orig 20987 0 0 20987 51fb entry_64.o Co-developed-by: Linus Torvalds Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180214175924.23065-2-linux@dominikbrodowski.net [ Small tweaks to comments. ] Signed-off-by: Ingo Molnar --- arch/x86/entry/calling.h | 11 ++++++++++- arch/x86/entry/entry_64.S | 18 ++++++++---------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index dce7092..196b610 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -97,7 +97,7 @@ For 32-bit we have the following conventions - kernel is built with #define SIZEOF_PTREGS 21*8 -.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax +.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 /* * Push registers and sanitize registers of values that a * speculation attack might otherwise want to exploit. The @@ -105,8 +105,14 @@ For 32-bit we have the following conventions - kernel is built with * could be put to use in a speculative execution gadget. * Interleave XOR with PUSH for better uop scheduling: */ + .if \save_ret + pushq %rsi /* pt_regs->si */ + movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ + movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */ + .else pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ + .endif pushq \rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq \rax /* pt_regs->ax */ @@ -131,6 +137,9 @@ For 32-bit we have the following conventions - kernel is built with pushq %r15 /* pt_regs->r15 */ xorq %r15, %r15 /* nospec r15*/ UNWIND_HINT_REGS + .if \save_ret + pushq %rsi /* return address on top of stack */ + .endif .endm .macro POP_REGS pop_rdi=1 skip_r11rcx=0 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 8971bd6..77edc23 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -875,12 +875,8 @@ ENTRY(\sym) pushq $-1 /* ORIG_RAX: no syscall to restart */ .endif - /* Save all registers in pt_regs */ - PUSH_AND_CLEAR_REGS - ENCODE_FRAME_POINTER - .if \paranoid < 2 - testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ + testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */ jnz .Lfrom_usermode_switch_stack_\@ .endif @@ -1130,13 +1126,15 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1 #endif /* - * Switch gs if needed. + * Save all registers in pt_regs, and switch gs if needed. * Use slow, but surefire "are we in kernel?" check. * Return: ebx=0: need swapgs on exit, ebx=1: otherwise */ ENTRY(paranoid_entry) UNWIND_HINT_FUNC cld + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 movl $1, %ebx movl $MSR_GS_BASE, %ecx rdmsr @@ -1181,12 +1179,14 @@ ENTRY(paranoid_exit) END(paranoid_exit) /* - * Switch gs if needed. + * Save all registers in pt_regs, and switch GS if needed. * Return: EBX=0: came from user mode; EBX=1: otherwise */ ENTRY(error_entry) - UNWIND_HINT_REGS offset=8 + UNWIND_HINT_FUNC cld + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 testb $3, CS+8(%rsp) jz .Lerror_kernelspace @@ -1577,8 +1577,6 @@ end_repeat_nmi: * frame to point back to repeat_nmi. */ pushq $-1 /* ORIG_RAX: no syscall to restart */ - PUSH_AND_CLEAR_REGS - ENCODE_FRAME_POINTER /* * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit -- cgit v1.1 From ced5d0bf603fa0baee8ea889e1d70971fd210894 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Wed, 14 Feb 2018 18:59:24 +0100 Subject: x86/entry/64: Use 'xorl' for faster register clearing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some x86 CPU microarchitectures using 'xorq' to clear general-purpose registers is slower than 'xorl'. As 'xorl' is sufficient to clear all 64 bits of these registers due to zero-extension [*], switch the x86 64-bit entry code to use 'xorl'. No change in functionality and no change in code size. [*] According to Intel 64 and IA-32 Architecture Software Developer's Manual, section 3.4.1.1, the result of 32-bit operands are "zero- extended to a 64-bit result in the destination general-purpose register." The AMD64 Architecture Programmer’s Manual Volume 3, Appendix B.1, describes the same behaviour. Suggested-by: Denys Vlasenko Signed-off-by: Dominik Brodowski Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180214175924.23065-3-linux@dominikbrodowski.net [ Improved on the changelog a bit. ] Signed-off-by: Ingo Molnar --- arch/x86/entry/calling.h | 16 ++++++------ arch/x86/entry/entry_64_compat.S | 54 ++++++++++++++++++++-------------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 196b610..5d10b7a 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -117,25 +117,25 @@ For 32-bit we have the following conventions - kernel is built with pushq %rcx /* pt_regs->cx */ pushq \rax /* pt_regs->ax */ pushq %r8 /* pt_regs->r8 */ - xorq %r8, %r8 /* nospec r8 */ + xorl %r8d, %r8d /* nospec r8 */ pushq %r9 /* pt_regs->r9 */ - xorq %r9, %r9 /* nospec r9 */ + xorl %r9d, %r9d /* nospec r9 */ pushq %r10 /* pt_regs->r10 */ - xorq %r10, %r10 /* nospec r10 */ + xorl %r10d, %r10d /* nospec r10 */ pushq %r11 /* pt_regs->r11 */ - xorq %r11, %r11 /* nospec r11*/ + xorl %r11d, %r11d /* nospec r11*/ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx*/ pushq %rbp /* pt_regs->rbp */ xorl %ebp, %ebp /* nospec rbp*/ pushq %r12 /* pt_regs->r12 */ - xorq %r12, %r12 /* nospec r12*/ + xorl %r12d, %r12d /* nospec r12*/ pushq %r13 /* pt_regs->r13 */ - xorq %r13, %r13 /* nospec r13*/ + xorl %r13d, %r13d /* nospec r13*/ pushq %r14 /* pt_regs->r14 */ - xorq %r14, %r14 /* nospec r14*/ + xorl %r14d, %r14d /* nospec r14*/ pushq %r15 /* pt_regs->r15 */ - xorq %r15, %r15 /* nospec r15*/ + xorl %r15d, %r15d /* nospec r15*/ UNWIND_HINT_REGS .if \save_ret pushq %rsi /* return address on top of stack */ diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index fd65e01..364ea4a 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat) pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ - xorq %r8, %r8 /* nospec r8 */ + xorl %r8d, %r8d /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ - xorq %r9, %r9 /* nospec r9 */ + xorl %r9d, %r9d /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ - xorq %r10, %r10 /* nospec r10 */ + xorl %r10d, %r10d /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ - xorq %r11, %r11 /* nospec r11 */ + xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ xorl %ebp, %ebp /* nospec rbp */ pushq $0 /* pt_regs->r12 = 0 */ - xorq %r12, %r12 /* nospec r12 */ + xorl %r12d, %r12d /* nospec r12 */ pushq $0 /* pt_regs->r13 = 0 */ - xorq %r13, %r13 /* nospec r13 */ + xorl %r13d, %r13d /* nospec r13 */ pushq $0 /* pt_regs->r14 = 0 */ - xorq %r14, %r14 /* nospec r14 */ + xorl %r14d, %r14d /* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ - xorq %r15, %r15 /* nospec r15 */ + xorl %r15d, %r15d /* nospec r15 */ cld /* @@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) pushq %rbp /* pt_regs->cx (stashed in bp) */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ - xorq %r8, %r8 /* nospec r8 */ + xorl %r8d, %r8d /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ - xorq %r9, %r9 /* nospec r9 */ + xorl %r9d, %r9d /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ - xorq %r10, %r10 /* nospec r10 */ + xorl %r10d, %r10d /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ - xorq %r11, %r11 /* nospec r11 */ + xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ xorl %ebp, %ebp /* nospec rbp */ pushq $0 /* pt_regs->r12 = 0 */ - xorq %r12, %r12 /* nospec r12 */ + xorl %r12d, %r12d /* nospec r12 */ pushq $0 /* pt_regs->r13 = 0 */ - xorq %r13, %r13 /* nospec r13 */ + xorl %r13d, %r13d /* nospec r13 */ pushq $0 /* pt_regs->r14 = 0 */ - xorq %r14, %r14 /* nospec r14 */ + xorl %r14d, %r14d /* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ - xorq %r15, %r15 /* nospec r15 */ + xorl %r15d, %r15d /* nospec r15 */ /* * User mode is traced as though IRQs are on, and SYSENTER @@ -298,9 +298,9 @@ sysret32_from_system_call: */ SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 - xorq %r8, %r8 - xorq %r9, %r9 - xorq %r10, %r10 + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d swapgs sysretl END(entry_SYSCALL_compat) @@ -358,25 +358,25 @@ ENTRY(entry_INT80_compat) pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ - xorq %r8, %r8 /* nospec r8 */ + xorl %r8d, %r8d /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ - xorq %r9, %r9 /* nospec r9 */ + xorl %r9d, %r9d /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ - xorq %r10, %r10 /* nospec r10 */ + xorl %r10d, %r10d /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ - xorq %r11, %r11 /* nospec r11 */ + xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp */ xorl %ebp, %ebp /* nospec rbp */ pushq %r12 /* pt_regs->r12 */ - xorq %r12, %r12 /* nospec r12 */ + xorl %r12d, %r12d /* nospec r12 */ pushq %r13 /* pt_regs->r13 */ - xorq %r13, %r13 /* nospec r13 */ + xorl %r13d, %r13d /* nospec r13 */ pushq %r14 /* pt_regs->r14 */ - xorq %r14, %r14 /* nospec r14 */ + xorl %r14d, %r14d /* nospec r14 */ pushq %r15 /* pt_regs->r15 */ - xorq %r15, %r15 /* nospec r15 */ + xorl %r15d, %r15d /* nospec r15 */ cld /* -- cgit v1.1 From f027e0b3a774e10302207e91d304bbf99e3a8b36 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 14 Feb 2018 15:43:00 +0100 Subject: iio: adis_lib: Initialize trigger before requesting interrupt The adis_probe_trigger() creates a new IIO trigger and requests an interrupt associated with the trigger. The interrupt uses the generic iio_trigger_generic_data_rdy_poll() function as its interrupt handler. Currently the driver initializes some fields of the trigger structure after the interrupt has been requested. But an interrupt can fire as soon as it has been requested. This opens up a race condition. iio_trigger_generic_data_rdy_poll() will access the trigger data structure and dereference the ops field. If the ops field is not yet initialized this will result in a NULL pointer deref. It is not expected that the device generates an interrupt at this point, so typically this issue did not surface unless e.g. due to a hardware misconfiguration (wrong interrupt number, wrong polarity, etc.). But some newer devices from the ADIS family start to generate periodic interrupts in their power-on reset configuration and unfortunately the interrupt can not be masked in the device. This makes the race condition much more visible and the following crash has been observed occasionally when booting a system using the ADIS16460. Unable to handle kernel NULL pointer dereference at virtual address 00000008 pgd = c0004000 [00000008] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.9.0-04126-gf9739f0-dirty #257 Hardware name: Xilinx Zynq Platform task: ef04f640 task.stack: ef050000 PC is at iio_trigger_notify_done+0x30/0x68 LR is at iio_trigger_generic_data_rdy_poll+0x18/0x20 pc : [] lr : [] psr: 60000193 sp : ef051bb8 ip : 00000000 fp : ef106400 r10: c081d80a r9 : ef3bfa00 r8 : 00000087 r7 : ef051bec r6 : 00000000 r5 : ef3bfa00 r4 : ee92ab00 r3 : 00000000 r2 : 00000000 r1 : 00000000 r0 : ee97e400 Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment none Control: 18c5387d Table: 0000404a DAC: 00000051 Process swapper/0 (pid: 1, stack limit = 0xef050210) [] (iio_trigger_notify_done) from [] (__handle_irq_event_percpu+0x88/0x118) [] (__handle_irq_event_percpu) from [] (handle_irq_event_percpu+0x1c/0x58) [] (handle_irq_event_percpu) from [] (handle_irq_event+0x38/0x5c) [] (handle_irq_event) from [] (handle_level_irq+0xa4/0x130) [] (handle_level_irq) from [] (generic_handle_irq+0x24/0x34) [] (generic_handle_irq) from [] (zynq_gpio_irqhandler+0xb8/0x13c) [] (zynq_gpio_irqhandler) from [] (generic_handle_irq+0x24/0x34) [] (generic_handle_irq) from [] (__handle_domain_irq+0x5c/0xb4) [] (__handle_domain_irq) from [] (gic_handle_irq+0x48/0x8c) [] (gic_handle_irq) from [] (__irq_svc+0x6c/0xa8) To fix this make sure that the trigger is fully initialized before requesting the interrupt. Fixes: ccd2b52f4ac6 ("staging:iio: Add common ADIS library") Reported-by: Robin Getz Signed-off-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis_trigger.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iio/imu/adis_trigger.c b/drivers/iio/imu/adis_trigger.c index 0dd5a38..457372f 100644 --- a/drivers/iio/imu/adis_trigger.c +++ b/drivers/iio/imu/adis_trigger.c @@ -46,6 +46,10 @@ int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev) if (adis->trig == NULL) return -ENOMEM; + adis->trig->dev.parent = &adis->spi->dev; + adis->trig->ops = &adis_trigger_ops; + iio_trigger_set_drvdata(adis->trig, adis); + ret = request_irq(adis->spi->irq, &iio_trigger_generic_data_rdy_poll, IRQF_TRIGGER_RISING, @@ -54,9 +58,6 @@ int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev) if (ret) goto error_free_trig; - adis->trig->dev.parent = &adis->spi->dev; - adis->trig->ops = &adis_trigger_ops; - iio_trigger_set_drvdata(adis->trig, adis); ret = iio_trigger_register(adis->trig); indio_dev->trig = iio_trigger_get(adis->trig); -- cgit v1.1 From 4cd140bda6494543f1c1b0ccceceaa44b676eef6 Mon Sep 17 00:00:00 2001 From: Stefan Windfeldt-Prytz Date: Thu, 15 Feb 2018 15:02:53 +0100 Subject: iio: buffer: check if a buffer has been set up when poll is called If no iio buffer has been set up and poll is called return 0. Without this check there will be a null pointer dereference when calling poll on a iio driver without an iio buffer. Cc: stable@vger.kernel.org Signed-off-by: Stefan Windfeldt-Prytz Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index eda2a0f..c7499c8 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -175,7 +175,7 @@ unsigned int iio_buffer_poll(struct file *filp, struct iio_dev *indio_dev = filp->private_data; struct iio_buffer *rb = indio_dev->buffer; - if (!indio_dev->info) + if (!indio_dev->info || rb == NULL) return 0; poll_wait(filp, &rb->pollq, wait); -- cgit v1.1 From 565e0450129647df5112bff3df3ffd02b0c08e32 Mon Sep 17 00:00:00 2001 From: Aliaksei Karaliou Date: Sat, 23 Dec 2017 21:20:31 +0300 Subject: md/raid5: simplify uninitialization of shrinker Don't use shrinker.nr_deferred to check whether shrinker was initialized or not. Now this check was integrated into unregister_shrinker(), so it is safe to call it against unregistered shrinker. Signed-off-by: Aliaksei Karaliou Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 50d0114..36e0506 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6764,9 +6764,7 @@ static void free_conf(struct r5conf *conf) log_exit(conf); - if (conf->shrinker.nr_deferred) - unregister_shrinker(&conf->shrinker); - + unregister_shrinker(&conf->shrinker); free_thread_groups(conf); shrink_stripes(conf); raid5_free_percpu(conf); -- cgit v1.1 From 56a64c177abd85a01c5881e195c363b7bda448f2 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Wed, 17 Jan 2018 13:38:02 +0000 Subject: md/raid1: Fix trailing semicolon The trailing semicolon is an empty statement that does no operation. Removing it since it doesn't do anything. Signed-off-by: Luis de Bethencourt Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index b2eae33..f978edd 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1108,7 +1108,7 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio, bio_copy_data(behind_bio, bio); skip_copy: - r1_bio->behind_master_bio = behind_bio;; + r1_bio->behind_master_bio = behind_bio; set_bit(R1BIO_BehindIO, &r1_bio->state); return; -- cgit v1.1 From 3acdb7b514198d81ef7efcb2e86f498776ac10a7 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 13 Jan 2018 09:49:03 +0100 Subject: md-multipath: Use seq_putc() in multipath_status() A single character (closing square bracket) should be put into a sequence. Thus use the corresponding function "seq_putc". This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Shaohua Li --- drivers/md/md-multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c index e40065b..0a7e99d 100644 --- a/drivers/md/md-multipath.c +++ b/drivers/md/md-multipath.c @@ -157,7 +157,7 @@ static void multipath_status(struct seq_file *seq, struct mddev *mddev) seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); } rcu_read_unlock(); - seq_printf (seq, "]"); + seq_putc(seq, ']'); } static int multipath_congested(struct mddev *mddev, int bits) -- cgit v1.1 From 4b242e97d74192bbc5decd808c058cbc347af016 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 19 Jan 2018 11:37:56 +0800 Subject: raid10: change the size of resync window for clustered raid To align with raid1's resync window, we need to set the resync window of raid10 to 32M as well. Fixes: 8db87912c9a8 ("md-cluster: Use a small window for raid10 resync") Reported-by: Zhilong Liu Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/raid10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 99c9207..8d7ddc9 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -141,7 +141,7 @@ static void r10bio_pool_free(void *r10_bio, void *data) #define RESYNC_WINDOW (1024*1024) /* maximum number of concurrent requests, memory permitting */ #define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE) -#define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW) +#define CLUSTER_RESYNC_WINDOW (32 * RESYNC_WINDOW) #define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9) /* -- cgit v1.1 From b126194cbb799f9980b92a77e58db6ad794c8082 Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Wed, 24 Jan 2018 12:17:38 +0800 Subject: MD: Free bioset when md_run fails Signed-off-by: Xiao Ni Acked-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/md.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index bc67ab6..bcf4ab9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5497,8 +5497,10 @@ int md_run(struct mddev *mddev) } if (mddev->sync_set == NULL) { mddev->sync_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); - if (!mddev->sync_set) - return -ENOMEM; + if (!mddev->sync_set) { + err = -ENOMEM; + goto abort; + } } spin_lock(&pers_lock); @@ -5511,7 +5513,8 @@ int md_run(struct mddev *mddev) else pr_warn("md: personality for level %s is not loaded!\n", mddev->clevel); - return -EINVAL; + err = -EINVAL; + goto abort; } spin_unlock(&pers_lock); if (mddev->level != pers->level) { @@ -5524,7 +5527,8 @@ int md_run(struct mddev *mddev) pers->start_reshape == NULL) { /* This personality cannot handle reshaping... */ module_put(pers->owner); - return -EINVAL; + err = -EINVAL; + goto abort; } if (pers->sync_request) { @@ -5593,7 +5597,7 @@ int md_run(struct mddev *mddev) mddev->private = NULL; module_put(pers->owner); bitmap_destroy(mddev); - return err; + goto abort; } if (mddev->queue) { bool nonrot = true; @@ -5655,6 +5659,18 @@ int md_run(struct mddev *mddev) sysfs_notify_dirent_safe(mddev->sysfs_action); sysfs_notify(&mddev->kobj, NULL, "degraded"); return 0; + +abort: + if (mddev->bio_set) { + bioset_free(mddev->bio_set); + mddev->bio_set = NULL; + } + if (mddev->sync_set) { + bioset_free(mddev->sync_set); + mddev->sync_set = NULL; + } + + return err; } EXPORT_SYMBOL_GPL(md_run); -- cgit v1.1 From 4b6c1060eaa6495aa5b0032e8f2d51dd936b1257 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Fri, 2 Feb 2018 23:13:19 +0100 Subject: md: fix md_write_start() deadlock w/o metadata devices If no metadata devices are configured on raid1/4/5/6/10 (e.g. via dm-raid), md_write_start() unconditionally waits for superblocks to be written thus deadlocking. Fix introduces mddev->has_superblocks bool, defines it in md_run() and checks for it in md_write_start() to conditionally avoid waiting. Once on it, check for non-existing superblocks in md_super_write(). Link: https://bugzilla.kernel.org/show_bug.cgi?id=198647 Fixes: cc27b0c78c796 ("md: fix deadlock between mddev_suspend() and md_write_start()") Signed-off-by: Heinz Mauelshagen Signed-off-by: Shaohua Li --- drivers/md/md.c | 10 ++++++++++ drivers/md/md.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index bcf4ab9..9b73cf1 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -801,6 +801,9 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, struct bio *bio; int ff = 0; + if (!page) + return; + if (test_bit(Faulty, &rdev->flags)) return; @@ -5452,6 +5455,7 @@ int md_run(struct mddev *mddev) * the only valid external interface is through the md * device. */ + mddev->has_superblocks = false; rdev_for_each(rdev, mddev) { if (test_bit(Faulty, &rdev->flags)) continue; @@ -5465,6 +5469,9 @@ int md_run(struct mddev *mddev) set_disk_ro(mddev->gendisk, 1); } + if (rdev->sb_page) + mddev->has_superblocks = true; + /* perform some consistency tests on the device. * We don't want the data to overlap the metadata, * Internal Bitmap issues have been handled elsewhere. @@ -8065,6 +8072,7 @@ EXPORT_SYMBOL(md_done_sync); bool md_write_start(struct mddev *mddev, struct bio *bi) { int did_change = 0; + if (bio_data_dir(bi) != WRITE) return true; @@ -8097,6 +8105,8 @@ bool md_write_start(struct mddev *mddev, struct bio *bi) rcu_read_unlock(); if (did_change) sysfs_notify_dirent_safe(mddev->sysfs_state); + if (!mddev->has_superblocks) + return true; wait_event(mddev->sb_wait, !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) || mddev->suspended); diff --git a/drivers/md/md.h b/drivers/md/md.h index 58cd20a..fbc925c 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -468,6 +468,8 @@ struct mddev { void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); struct md_cluster_info *cluster_info; unsigned int good_device_nr; /* good device num within cluster raid */ + + bool has_superblocks:1; }; enum recovery_flags { -- cgit v1.1 From f2785b527cda46314805123ddcbc871655b7c4c4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 3 Feb 2018 09:19:30 +1100 Subject: md: document lifetime of internal rdev pointer. The rdev pointer kept in the local 'config' for each for raid1, raid10, raid4/5/6 has non-obvious lifetime rules. Sometimes RCU is needed, sometimes a lock, something nothing. Add documentation to explain this. Signed-off-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/raid1.h | 12 ++++++++++++ drivers/md/raid10.h | 13 +++++++++++++ drivers/md/raid5.h | 12 ++++++++++++ 3 files changed, 37 insertions(+) diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index c7294e7..eb84bc6 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -26,6 +26,18 @@ #define BARRIER_BUCKETS_NR_BITS (PAGE_SHIFT - ilog2(sizeof(atomic_t))) #define BARRIER_BUCKETS_NR (1<reconfig_mutex + * 2/ when resync/recovery is known to be happening - i.e. in code that is + * called as part of performing resync/recovery. + * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer + * and if it is non-NULL, increment rdev->nr_pending before dropping the + * RCU lock. + * When .rdev is set to NULL, the nr_pending count checked again and if it has + * been incremented, the pointer is put back in .rdev. + */ + struct raid1_info { struct md_rdev *rdev; sector_t head_position; diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index db2ac22..e2e8840 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -2,6 +2,19 @@ #ifndef _RAID10_H #define _RAID10_H +/* Note: raid10_info.rdev can be set to NULL asynchronously by + * raid10_remove_disk. + * There are three safe ways to access raid10_info.rdev. + * 1/ when holding mddev->reconfig_mutex + * 2/ when resync/recovery/reshape is known to be happening - i.e. in code + * that is called as part of performing resync/recovery/reshape. + * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer + * and if it is non-NULL, increment rdev->nr_pending before dropping the + * RCU lock. + * When .rdev is set to NULL, the nr_pending count checked again and if it has + * been incremented, the pointer is put back in .rdev. + */ + struct raid10_info { struct md_rdev *rdev, *replacement; sector_t head_position; diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 2e61238..3f8da26 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -450,6 +450,18 @@ enum { * HANDLE gets cleared if stripe_handle leaves nothing locked. */ +/* Note: disk_info.rdev can be set to NULL asynchronously by raid5_remove_disk. + * There are three safe ways to access disk_info.rdev. + * 1/ when holding mddev->reconfig_mutex + * 2/ when resync/recovery/reshape is known to be happening - i.e. in code that + * is called as part of performing resync/recovery/reshape. + * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer + * and if it is non-NULL, increment rdev->nr_pending before dropping the RCU + * lock. + * When .rdev is set to NULL, the nr_pending count checked again and if + * it has been incremented, the pointer is put back in .rdev. + */ + struct disk_info { struct md_rdev *rdev, *replacement; struct page *extra_page; /* extra page to use in prexor */ -- cgit v1.1 From d40ade43e3cd55c6d83859dffc0f9d428a954c63 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 13 Feb 2018 15:15:27 +0100 Subject: dt-bindings: power: Fix "debounce-interval" property misspelling "debounce_interval" was never supported. Signed-off-by: Geert Uytterhoeven Cc: Rob Herring Cc: Mark Rutland Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/power/wakeup-source.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/power/wakeup-source.txt b/Documentation/devicetree/bindings/power/wakeup-source.txt index 3c81f78..5d254ab 100644 --- a/Documentation/devicetree/bindings/power/wakeup-source.txt +++ b/Documentation/devicetree/bindings/power/wakeup-source.txt @@ -60,7 +60,7 @@ Examples #size-cells = <0>; button@1 { - debounce_interval = <50>; + debounce-interval = <50>; wakeup-source; linux,code = <116>; label = "POWER"; -- cgit v1.1 From 9487cfd3430d07366801886bdf185799a2b6f066 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 7 Feb 2018 17:39:14 +0100 Subject: s390/dasd: fix handling of internal requests Internal DASD device driver I/O such as query host access count or path verification is started using the _sleep_on() function. To mark a request as started or ended the callback_data is set to either DASD_SLEEPON_START_TAG or DASD_SLEEPON_END_TAG. In cases where the request has to be stopped unconditionally the status is set to DASD_SLEEPON_END_TAG as well which leads to immediate clearing of the request. But the request might still be on a device request queue for normal operation which might lead to a panic because of a BUG() statement in __dasd_device_process_final_queue() or a list corruption of the device request queue. Fix by removing the setting of DASD_SLEEPON_END_TAG in the dasd_cancel_req() and dasd_generic_requeue_all_requests() functions and ensure that the request is not deleted in the requeue function. Trigger the device tasklet in the requeue function and let the normal processing cleanup the request. Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index a7c15f0..ecef8e7 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2581,8 +2581,6 @@ int dasd_cancel_req(struct dasd_ccw_req *cqr) case DASD_CQR_QUEUED: /* request was not started - just set to cleared */ cqr->status = DASD_CQR_CLEARED; - if (cqr->callback_data == DASD_SLEEPON_START_TAG) - cqr->callback_data = DASD_SLEEPON_END_TAG; break; case DASD_CQR_IN_IO: /* request in IO - terminate IO and release again */ @@ -3902,9 +3900,12 @@ static int dasd_generic_requeue_all_requests(struct dasd_device *device) wait_event(dasd_flush_wq, (cqr->status != DASD_CQR_CLEAR_PENDING)); - /* mark sleepon requests as ended */ - if (cqr->callback_data == DASD_SLEEPON_START_TAG) - cqr->callback_data = DASD_SLEEPON_END_TAG; + /* + * requeue requests to blocklayer will only work + * for block device requests + */ + if (_dasd_requeue_request(cqr)) + continue; /* remove requests from device and block queue */ list_del_init(&cqr->devlist); @@ -3917,13 +3918,6 @@ static int dasd_generic_requeue_all_requests(struct dasd_device *device) cqr = refers; } - /* - * requeue requests to blocklayer will only work - * for block device requests - */ - if (_dasd_requeue_request(cqr)) - continue; - if (cqr->block) list_del_init(&cqr->blocklist); cqr->block->base->discipline->free_cp( @@ -3940,8 +3934,7 @@ static int dasd_generic_requeue_all_requests(struct dasd_device *device) list_splice_tail(&requeue_queue, &device->ccw_queue); spin_unlock_irq(get_ccwdev_lock(device->cdev)); } - /* wake up generic waitqueue for eventually ended sleepon requests */ - wake_up(&generic_waitq); + dasd_schedule_device_bh(device); return rc; } -- cgit v1.1 From 2cb370d615e9fbed9e95ed222c2c8f337181aa90 Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Sun, 18 Feb 2018 00:10:29 +0100 Subject: s390: Replace IS_ENABLED(EXPOLINE_*) with IS_ENABLED(CONFIG_EXPOLINE_*) I've accidentally stumbled upon the IS_ENABLED(EXPOLINE_*) lines, which obviously always evaluate to false. Fix this. Fixes: f19fbd5ed642 ("s390: introduce execute-trampolines for branches") Signed-off-by: Eugeniu Rosca Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/nospec-branch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 69d7fcf..9aff72d 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -2,8 +2,8 @@ #include #include -int nospec_call_disable = IS_ENABLED(EXPOLINE_OFF); -int nospec_return_disable = !IS_ENABLED(EXPOLINE_FULL); +int nospec_call_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF); +int nospec_return_disable = !IS_ENABLED(CONFIG_EXPOLINE_FULL); static int __init nospectre_v2_setup_early(char *str) { -- cgit v1.1 From 2d30e9494f1ea320aaaad0cff9ddd92c87eac355 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 18 Feb 2018 23:01:44 +0100 Subject: ASoC: rt5651: Fix regcache sync errors on resume The ALC5651 does not like multi-write accesses, avoid them. This fixes: rt5651 i2c-10EC5651:00: Unable to sync registers 0x27-0x28. -121 Errors on resume (and all registers after the registers in the error not being synced). Signed-off-by: Hans de Goede Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/rt5651.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt5651.c b/sound/soc/codecs/rt5651.c index 831b297..45a7304 100644 --- a/sound/soc/codecs/rt5651.c +++ b/sound/soc/codecs/rt5651.c @@ -1722,6 +1722,7 @@ static const struct regmap_config rt5651_regmap = { .num_reg_defaults = ARRAY_SIZE(rt5651_reg), .ranges = rt5651_ranges, .num_ranges = ARRAY_SIZE(rt5651_ranges), + .use_single_rw = true, }; #if defined(CONFIG_OF) -- cgit v1.1 From bee92d06157fc39d5d7836a061c7d41289a55797 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Feb 2018 16:31:23 +0100 Subject: cfg80211: fix cfg80211_beacon_dup gcc-8 warns about some obviously incorrect code: net/mac80211/cfg.c: In function 'cfg80211_beacon_dup': net/mac80211/cfg.c:2896:3: error: 'memcpy' source argument is the same as destination [-Werror=restrict] From the context, I conclude that we want to copy from beacon into new_beacon, as we do in the rest of the function. Cc: stable@vger.kernel.org Fixes: 73da7d5bab79 ("mac80211: add channel switch command and beacon callbacks") Signed-off-by: Arnd Bergmann Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fb15d3b9..84f757c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2863,7 +2863,7 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon) } if (beacon->probe_resp_len) { new_beacon->probe_resp_len = beacon->probe_resp_len; - beacon->probe_resp = pos; + new_beacon->probe_resp = pos; memcpy(pos, beacon->probe_resp, beacon->probe_resp_len); pos += beacon->probe_resp_len; } -- cgit v1.1 From ce162bfbc0b601841886965baba14877127c7c7c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 Jan 2018 08:40:51 +0100 Subject: mac80211_hwsim: don't use WQ_MEM_RECLAIM We're obviously not part of a memory reclaim path, so don't set the flag. This also causes a warning in check_flush_dependency() since we end up in a code path that flushes a non-reclaim workqueue, and we shouldn't do that if we were really part of reclaim. Reported-by: syzbot+41cdaf4232c50e658934@syzkaller.appspotmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index f6d4a50f..829ac22 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3455,7 +3455,7 @@ static int __init init_mac80211_hwsim(void) spin_lock_init(&hwsim_radio_lock); - hwsim_wq = alloc_workqueue("hwsim_wq",WQ_MEM_RECLAIM,0); + hwsim_wq = alloc_workqueue("hwsim_wq", 0, 0); if (!hwsim_wq) return -ENOMEM; -- cgit v1.1 From 04c4927359b1f09310bfee92e7187c9022be3e00 Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Mon, 19 Feb 2018 12:09:54 +0530 Subject: arm64: Fix compilation error while accessing MPIDR_HWID_BITMASK from .S files Since commit e1a50de37860 (arm64: cputype: Silence Sparse warnings), compilation of arm64 architecture is broken with the following error messages: AR arch/arm64/kernel/built-in.o arch/arm64/kernel/head.S: Assembler messages: arch/arm64/kernel/head.S:677: Error: found 'L', expected: ')' arch/arm64/kernel/head.S:677: Error: found 'L', expected: ')' arch/arm64/kernel/head.S:677: Error: found 'L', expected: ')' arch/arm64/kernel/head.S:677: Error: junk at end of line, first unrecognized character is `L' arch/arm64/kernel/head.S:677: Error: unexpected characters following instruction at operand 2 -- `movz x1,:abs_g1_s:0xff00ffffffUL' arch/arm64/kernel/head.S:677: Error: unexpected characters following instruction at operand 2 -- `movk x1,:abs_g0_nc:0xff00ffffffUL' This patch fixes the same by using the UL() macro correctly for assigning the MPIDR_HWID_BITMASK macro value. Fixes: e1a50de37860 ("arm64: cputype: Silence Sparse warnings") Acked-by: Arnd Bergmann Acked-by: Robin Murphy Signed-off-by: Bhupesh Sharma Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index eda8c5f..350c76a 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -20,7 +20,7 @@ #define MPIDR_UP_BITMASK (0x1 << 30) #define MPIDR_MT_BITMASK (0x1 << 24) -#define MPIDR_HWID_BITMASK 0xff00ffffffUL +#define MPIDR_HWID_BITMASK UL(0xff00ffffff) #define MPIDR_LEVEL_BITS_SHIFT 3 #define MPIDR_LEVEL_BITS (1 << MPIDR_LEVEL_BITS_SHIFT) -- cgit v1.1 From 651b9920d7a694ffb1f885aef2bbb068a25d9d66 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 10 Feb 2018 13:20:34 +0100 Subject: mac80211: round IEEE80211_TX_STATUS_HEADROOM up to multiple of 4 This ensures that mac80211 allocated management frames are properly aligned, which makes copying them more efficient. For instance, mt76 uses iowrite32_copy to copy beacon frames to beacon template memory on the chip. Misaligned 32-bit accesses cause CPU exceptions on MIPS and should be avoided. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index eec143c..c9077a83 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4141,7 +4141,7 @@ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *sta, u8 tid); * The TX headroom reserved by mac80211 for its own tx_status functions. * This is enough for the radiotap header. */ -#define IEEE80211_TX_STATUS_HEADROOM 14 +#define IEEE80211_TX_STATUS_HEADROOM ALIGN(14, 4) /** * ieee80211_sta_set_buffered - inform mac80211 about driver-buffered frames -- cgit v1.1 From d78d9ee9d40aca4781d2c5334972544601a4c3a2 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 19 Feb 2018 14:48:35 +0200 Subject: mac80211: fix a possible leak of station stats If sta_info_alloc fails after allocating the per CPU statistics, they are not properly freed. Fixes: c9c5962b56c1 ("mac80211: enable collecting station statistics per-CPU") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 0c5627f..8d7e373 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -433,6 +433,7 @@ free_txq: if (sta->sta.txq[0]) kfree(to_txq_info(sta->sta.txq[0])); free: + free_percpu(sta->pcpu_rx_stats); #ifdef CONFIG_MAC80211_MESH kfree(sta->mesh); #endif -- cgit v1.1 From 95f3ce6a77893ac828ba841df44421620de4314b Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 19 Feb 2018 14:48:37 +0200 Subject: mac80211: fix calling sleeping function in atomic context sta_info_alloc can be called from atomic paths (such as RX path) so we need to call pcpu_alloc with the correct gfp. Fixes: c9c5962b56c1 ("mac80211: enable collecting station statistics per-CPU") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 8d7e373..af0b608 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -314,7 +314,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (ieee80211_hw_check(hw, USES_RSS)) { sta->pcpu_rx_stats = - alloc_percpu(struct ieee80211_sta_rx_stats); + alloc_percpu_gfp(struct ieee80211_sta_rx_stats, gfp); if (!sta->pcpu_rx_stats) goto free; } -- cgit v1.1 From 3027a8e799b20fc922496a12f8ad2f9f36a8a696 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Mon, 19 Feb 2018 14:48:38 +0200 Subject: cfg80211: clear wep keys after disconnection When a low level driver calls cfg80211_disconnected(), wep keys are not cleared. As a result, following connection requests will fail since cfg80211 internal state shows a connection is still in progress. Fix this by clearing the wep keys when disconnecting. Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/wireless/sme.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index fdb3646..701cfd7 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1032,6 +1032,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, wdev->current_bss = NULL; wdev->ssid_len = 0; wdev->conn_owner_nlportid = 0; + kzfree(wdev->connect_keys); + wdev->connect_keys = NULL; nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap); -- cgit v1.1 From 191da271ac260700db3e5b4bb982a17ca78769d6 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 19 Feb 2018 14:48:42 +0200 Subject: mac80211: Do not disconnect on invalid operating class Some APs include a non global operating class in their extended channel switch information element. In such a case, as the operating class is not known, mac80211 would decide to disconnect. However the specification states that the operating class needs to be taken from Annex E, but it does not specify from which table it should be taken, so it is valid for an AP to use a non global operating class. To avoid possibly unneeded disconnection, in such a case ignore the operating class and assume that the current band is used, and if the resulting channel and band configuration is invalid disconnect. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/spectmgmt.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index ee01817..0293348 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -8,6 +8,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2008, Intel Corporation * Copyright 2008, Johannes Berg + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -27,7 +28,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, u32 sta_flags, u8 *bssid, struct ieee80211_csa_ie *csa_ie) { - enum nl80211_band new_band; + enum nl80211_band new_band = current_band; int new_freq; u8 new_chan_no; struct ieee80211_channel *new_chan; @@ -55,15 +56,13 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, elems->ext_chansw_ie->new_operating_class, &new_band)) { sdata_info(sdata, - "cannot understand ECSA IE operating class %d, disconnecting\n", + "cannot understand ECSA IE operating class, %d, ignoring\n", elems->ext_chansw_ie->new_operating_class); - return -EINVAL; } new_chan_no = elems->ext_chansw_ie->new_ch_num; csa_ie->count = elems->ext_chansw_ie->count; csa_ie->mode = elems->ext_chansw_ie->mode; } else if (elems->ch_switch_ie) { - new_band = current_band; new_chan_no = elems->ch_switch_ie->new_ch_num; csa_ie->count = elems->ch_switch_ie->count; csa_ie->mode = elems->ch_switch_ie->mode; -- cgit v1.1 From 3b07029729e347f288c70227cfe3c66b085d6b0b Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 19 Feb 2018 14:48:43 +0200 Subject: mac80211: Fix sending ADDBA response for an ongoing session In case an ADDBA request is received while there is already an ongoing BA sessions with the same parameters, i.e., update flow, an ADBBA response with decline status was sent twice. Fix it. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/agg-rx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index d444752..d643033 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -8,6 +8,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -315,9 +316,6 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, * driver so reject the timeout update. */ status = WLAN_STATUS_REQUEST_DECLINED; - ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, - tid, dialog_token, status, - 1, buf_size, timeout); goto end; } -- cgit v1.1 From 9085b34d0e8361595a7d19034c550d5d15044556 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 19 Feb 2018 13:38:00 +0000 Subject: arm64: uaccess: Formalise types for access_ok() In converting __range_ok() into a static inline, I inadvertently made it more type-safe, but without considering the ordering of the relevant conversions. This leads to quite a lot of Sparse noise about the fact that we use __chk_user_ptr() after addr has already been converted from a user pointer to an unsigned long. Rather than just adding another cast for the sake of shutting Sparse up, it seems reasonable to rework the types to make logical sense (although the resulting codegen for __range_ok() remains identical). The only callers this affects directly are our compat traps where the inferred "user-pointer-ness" of a register value now warrants explicit casting. Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/uaccess.h | 12 ++++++------ arch/arm64/kernel/armv8_deprecated.c | 4 +++- arch/arm64/kernel/sys_compat.c | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 543e11f..e66b0fc 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -72,15 +72,15 @@ static inline void set_fs(mm_segment_t fs) * This is equivalent to the following test: * (u65)addr + (u65)size <= (u65)current->addr_limit + 1 */ -static inline unsigned long __range_ok(unsigned long addr, unsigned long size) +static inline unsigned long __range_ok(const void __user *addr, unsigned long size) { - unsigned long limit = current_thread_info()->addr_limit; + unsigned long ret, limit = current_thread_info()->addr_limit; __chk_user_ptr(addr); asm volatile( // A + B <= C + 1 for all A,B,C, in four easy steps: // 1: X = A + B; X' = X % 2^64 - " adds %0, %0, %2\n" + " adds %0, %3, %2\n" // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4 " csel %1, xzr, %1, hi\n" // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X' @@ -92,9 +92,9 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size) // testing X' - C == 0, subject to the previous adjustments. " sbcs xzr, %0, %1\n" " cset %0, ls\n" - : "+r" (addr), "+r" (limit) : "Ir" (size) : "cc"); + : "=&r" (ret), "+r" (limit) : "Ir" (size), "0" (addr) : "cc"); - return addr; + return ret; } /* @@ -104,7 +104,7 @@ static inline unsigned long __range_ok(unsigned long addr, unsigned long size) */ #define untagged_addr(addr) sign_extend64(addr, 55) -#define access_ok(type, addr, size) __range_ok((unsigned long)(addr), size) +#define access_ok(type, addr, size) __range_ok(addr, size) #define user_addr_max get_fs #define _ASM_EXTABLE(from, to) \ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index c33b5e4..68450e9 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -370,6 +370,7 @@ static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr) static int swp_handler(struct pt_regs *regs, u32 instr) { u32 destreg, data, type, address = 0; + const void __user *user_ptr; int rn, rt2, res = 0; perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); @@ -401,7 +402,8 @@ static int swp_handler(struct pt_regs *regs, u32 instr) aarch32_insn_extract_reg_num(instr, A32_RT2_OFFSET), data); /* Check access in reasonable access range for both SWP and SWPB */ - if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) { + user_ptr = (const void __user *)(unsigned long)(address & ~3); + if (!access_ok(VERIFY_WRITE, user_ptr, 4)) { pr_debug("SWP{B} emulation: access to 0x%08x not allowed!\n", address); goto fault; diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 8b8bbd3..a382b2a 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -57,7 +57,7 @@ do_compat_cache_op(unsigned long start, unsigned long end, int flags) if (end < start || flags) return -EINVAL; - if (!access_ok(VERIFY_READ, start, end - start)) + if (!access_ok(VERIFY_READ, (const void __user *)start, end - start)) return -EFAULT; return __do_compat_cache_op(start, end); -- cgit v1.1 From c795f3052b60b01e80485fad98c53e5e67d093c9 Mon Sep 17 00:00:00 2001 From: Tobias Jordan Date: Thu, 15 Feb 2018 15:34:55 +0100 Subject: gpu: ipu-v3: pre: fix device node leak in ipu_pre_lookup_by_phandle Before returning, call of_node_put() for the device node returned by of_parse_phandle(). Fixes: d2a34232580a ("gpu: ipu-v3: add driver for Prefetch Resolve Engine") Signed-off-by: Tobias Jordan Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-pre.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c index f1cec3d..0f70e88 100644 --- a/drivers/gpu/ipu-v3/ipu-pre.c +++ b/drivers/gpu/ipu-v3/ipu-pre.c @@ -129,11 +129,14 @@ ipu_pre_lookup_by_phandle(struct device *dev, const char *name, int index) if (pre_node == pre->dev->of_node) { mutex_unlock(&ipu_pre_list_mutex); device_link_add(dev, pre->dev, DL_FLAG_AUTOREMOVE); + of_node_put(pre_node); return pre; } } mutex_unlock(&ipu_pre_list_mutex); + of_node_put(pre_node); + return NULL; } -- cgit v1.1 From 3addaba8141bc6a4f649a48f46e552af32922147 Mon Sep 17 00:00:00 2001 From: Tobias Jordan Date: Thu, 15 Feb 2018 15:35:30 +0100 Subject: gpu: ipu-v3: prg: fix device node leak in ipu_prg_lookup_by_phandle Before returning, call of_node_put() for the device node returned by of_parse_phandle(). Fixes: ea9c260514c1 ("gpu: ipu-v3: add driver for Prefetch Resolve Gasket") Signed-off-by: Tobias Jordan Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-prg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-prg.c b/drivers/gpu/ipu-v3/ipu-prg.c index 067365c..97b9950 100644 --- a/drivers/gpu/ipu-v3/ipu-prg.c +++ b/drivers/gpu/ipu-v3/ipu-prg.c @@ -102,11 +102,14 @@ ipu_prg_lookup_by_phandle(struct device *dev, const char *name, int ipu_id) mutex_unlock(&ipu_prg_list_mutex); device_link_add(dev, prg->dev, DL_FLAG_AUTOREMOVE); prg->id = ipu_id; + of_node_put(prg_node); return prg; } } mutex_unlock(&ipu_prg_list_mutex); + of_node_put(prg_node); + return NULL; } -- cgit v1.1 From 58a22fc44539ad7fd4c07c9fcc156cad1e3340ea Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 25 Jan 2018 10:37:52 +0100 Subject: gpu: ipu-cpmem: add 16-bit grayscale support to ipu_cpmem_set_image Add the missing offset calculation for 16-bit grayscale images. Since the IPU only supports capturing greyscale in raw passthrough mode, it is the same as 16-bit bayer formats. Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-cpmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c index ef32377..9f2d9ec 100644 --- a/drivers/gpu/ipu-v3/ipu-cpmem.c +++ b/drivers/gpu/ipu-v3/ipu-cpmem.c @@ -795,6 +795,7 @@ int ipu_cpmem_set_image(struct ipuv3_channel *ch, struct ipu_image *image) case V4L2_PIX_FMT_SGBRG16: case V4L2_PIX_FMT_SGRBG16: case V4L2_PIX_FMT_SRGGB16: + case V4L2_PIX_FMT_Y16: offset = image->rect.left * 2 + image->rect.top * pix->bytesperline; break; -- cgit v1.1 From 50b0f0aee839b5a9995fe7964a678634f75a0518 Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Tue, 13 Feb 2018 18:35:36 +0100 Subject: gpu: ipu-csi: add 10/12-bit grayscale support to mbus_code_to_bus_cfg The 10/12-bit config used for bayer formats is used for grayscale as well. Signed-off-by: Jan Luebbe Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-csi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-csi.c b/drivers/gpu/ipu-v3/ipu-csi.c index 24e12b8..caa05b0 100644 --- a/drivers/gpu/ipu-v3/ipu-csi.c +++ b/drivers/gpu/ipu-v3/ipu-csi.c @@ -288,6 +288,7 @@ static int mbus_code_to_bus_cfg(struct ipu_csi_bus_config *cfg, u32 mbus_code) case MEDIA_BUS_FMT_SGBRG10_1X10: case MEDIA_BUS_FMT_SGRBG10_1X10: case MEDIA_BUS_FMT_SRGGB10_1X10: + case MEDIA_BUS_FMT_Y10_1X10: cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER; cfg->mipi_dt = MIPI_DT_RAW10; cfg->data_width = IPU_CSI_DATA_WIDTH_10; @@ -296,6 +297,7 @@ static int mbus_code_to_bus_cfg(struct ipu_csi_bus_config *cfg, u32 mbus_code) case MEDIA_BUS_FMT_SGBRG12_1X12: case MEDIA_BUS_FMT_SGRBG12_1X12: case MEDIA_BUS_FMT_SRGGB12_1X12: + case MEDIA_BUS_FMT_Y12_1X12: cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER; cfg->mipi_dt = MIPI_DT_RAW12; cfg->data_width = IPU_CSI_DATA_WIDTH_12; -- cgit v1.1 From 06998a756a3865817b87a129a7e5d5bb66dc1ec3 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Sun, 18 Feb 2018 16:53:59 +0800 Subject: drm/edid: Add 6 bpc quirk for CPT panel in Asus UX303LA Similar to commit e10aec652f31 ("drm/edid: Add 6 bpc quirk for display AEO model 0."), the EDID reports "DFP 1.x compliant TMDS" but it support 6bpc instead of 8 bpc. Hence, use 6 bpc quirk for this panel. Fixes: 196f954e2509 ("drm/i915/dp: Revert "drm/i915/dp: fall back to 18 bpp when sink capability is unknown"") BugLink: https://bugs.launchpad.net/bugs/1749420 Signed-off-by: Kai-Heng Feng Reviewed-by: Mario Kleiner Cc: # v4.8+ Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180218085359.7817-1-kai.heng.feng@canonical.com --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index cb48714..16fb76b 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -113,6 +113,9 @@ static const struct edid_quirk { /* AEO model 0 reports 8 bpc, but is a 6 bpc panel */ { "AEO", 0, EDID_QUIRK_FORCE_6BPC }, + /* CPT panel of Asus UX303LA reports 8 bpc, but is a 6 bpc panel */ + { "CPT", 0x17df, EDID_QUIRK_FORCE_6BPC }, + /* Belinea 10 15 55 */ { "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 }, { "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 }, -- cgit v1.1 From b37f78f234bf4fd98979d6c3ccc0f85e508f978f Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Fri, 16 Feb 2018 15:56:37 -0700 Subject: net: qualcomm: rmnet: Fix crash on real dev unregistration With CONFIG_DEBUG_PREEMPT enabled, a crash with the following call stack was observed when removing a real dev which had rmnet devices attached to it. To fix this, remove the netdev_upper link APIs and instead use the existing information in rmnet_port and rmnet_priv to get the association between real and rmnet devs. BUG: sleeping function called from invalid context in_atomic(): 0, irqs_disabled(): 0, pid: 5762, name: ip Preemption disabled at: [] debug_object_active_state+0xa4/0x16c Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in: PC is at ___might_sleep+0x13c/0x180 LR is at ___might_sleep+0x17c/0x180 [] ___might_sleep+0x13c/0x180 [] __might_sleep+0x58/0x8c [] mutex_lock+0x2c/0x48 [] kernfs_remove_by_name_ns+0x48/0xa8 [] sysfs_remove_link+0x30/0x58 [] __netdev_adjacent_dev_remove+0x14c/0x1e0 [] __netdev_adjacent_dev_unlink_lists+0x40/0x68 [] netdev_upper_dev_unlink+0xb4/0x1fc [] rmnet_dev_walk_unreg+0x6c/0xc8 [] netdev_walk_all_lower_dev_rcu+0x58/0xb4 [] rmnet_config_notify_cb+0xf4/0x134 [] raw_notifier_call_chain+0x58/0x78 [] call_netdevice_notifiers_info+0x48/0x78 [] rollback_registered_many+0x230/0x3c8 [] unregister_netdevice_many+0x38/0x94 [] rtnl_delete_link+0x58/0x88 [] rtnl_dellink+0xbc/0x1cc [] rtnetlink_rcv_msg+0xb0/0x244 [] netlink_rcv_skb+0xb4/0xdc [] rtnetlink_rcv+0x34/0x44 [] netlink_unicast+0x1ec/0x294 [] netlink_sendmsg+0x320/0x390 [] sock_sendmsg+0x54/0x60 [] ___sys_sendmsg+0x298/0x2b0 [] SyS_sendmsg+0xb4/0xf0 [] el0_svc_naked+0x24/0x28 Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Fixes: 60d58f971c10 ("net: qualcomm: rmnet: Implement bridge mode") Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 68 +++++----------------- 1 file changed, 14 insertions(+), 54 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 7e7704d..c494918 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -43,12 +43,6 @@ /* Local Definitions and Declarations */ -struct rmnet_walk_data { - struct net_device *real_dev; - struct list_head *head; - struct rmnet_port *port; -}; - static int rmnet_is_real_dev_registered(const struct net_device *real_dev) { return rcu_access_pointer(real_dev->rx_handler) == rmnet_rx_handler; @@ -112,17 +106,14 @@ static int rmnet_register_real_device(struct net_device *real_dev) static void rmnet_unregister_bridge(struct net_device *dev, struct rmnet_port *port) { - struct net_device *rmnet_dev, *bridge_dev; struct rmnet_port *bridge_port; + struct net_device *bridge_dev; if (port->rmnet_mode != RMNET_EPMODE_BRIDGE) return; /* bridge slave handling */ if (!port->nr_rmnet_devs) { - rmnet_dev = netdev_master_upper_dev_get_rcu(dev); - netdev_upper_dev_unlink(dev, rmnet_dev); - bridge_dev = port->bridge_ep; bridge_port = rmnet_get_port_rtnl(bridge_dev); @@ -132,9 +123,6 @@ static void rmnet_unregister_bridge(struct net_device *dev, bridge_dev = port->bridge_ep; bridge_port = rmnet_get_port_rtnl(bridge_dev); - rmnet_dev = netdev_master_upper_dev_get_rcu(bridge_dev); - netdev_upper_dev_unlink(bridge_dev, rmnet_dev); - rmnet_unregister_real_device(bridge_dev, bridge_port); } } @@ -173,10 +161,6 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, if (err) goto err1; - err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL, extack); - if (err) - goto err2; - port->rmnet_mode = mode; hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]); @@ -193,8 +177,6 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, return 0; -err2: - rmnet_vnd_dellink(mux_id, port, ep); err1: rmnet_unregister_real_device(real_dev, port); err0: @@ -204,14 +186,13 @@ err0: static void rmnet_dellink(struct net_device *dev, struct list_head *head) { + struct rmnet_priv *priv = netdev_priv(dev); struct net_device *real_dev; struct rmnet_endpoint *ep; struct rmnet_port *port; u8 mux_id; - rcu_read_lock(); - real_dev = netdev_master_upper_dev_get_rcu(dev); - rcu_read_unlock(); + real_dev = priv->real_dev; if (!real_dev || !rmnet_is_real_dev_registered(real_dev)) return; @@ -219,7 +200,6 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head) port = rmnet_get_port_rtnl(real_dev); mux_id = rmnet_vnd_get_mux(dev); - netdev_upper_dev_unlink(dev, real_dev); ep = rmnet_get_endpoint(port, mux_id); if (ep) { @@ -233,30 +213,13 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head) unregister_netdevice_queue(dev, head); } -static int rmnet_dev_walk_unreg(struct net_device *rmnet_dev, void *data) -{ - struct rmnet_walk_data *d = data; - struct rmnet_endpoint *ep; - u8 mux_id; - - mux_id = rmnet_vnd_get_mux(rmnet_dev); - ep = rmnet_get_endpoint(d->port, mux_id); - if (ep) { - hlist_del_init_rcu(&ep->hlnode); - rmnet_vnd_dellink(mux_id, d->port, ep); - kfree(ep); - } - netdev_upper_dev_unlink(rmnet_dev, d->real_dev); - unregister_netdevice_queue(rmnet_dev, d->head); - - return 0; -} - static void rmnet_force_unassociate_device(struct net_device *dev) { struct net_device *real_dev = dev; - struct rmnet_walk_data d; + struct hlist_node *tmp_ep; + struct rmnet_endpoint *ep; struct rmnet_port *port; + unsigned long bkt_ep; LIST_HEAD(list); if (!rmnet_is_real_dev_registered(real_dev)) @@ -264,16 +227,19 @@ static void rmnet_force_unassociate_device(struct net_device *dev) ASSERT_RTNL(); - d.real_dev = real_dev; - d.head = &list; - port = rmnet_get_port_rtnl(dev); - d.port = port; rcu_read_lock(); rmnet_unregister_bridge(dev, port); - netdev_walk_all_lower_dev_rcu(real_dev, rmnet_dev_walk_unreg, &d); + hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) { + unregister_netdevice_queue(ep->egress_dev, &list); + rmnet_vnd_dellink(ep->mux_id, port, ep); + + hlist_del_init_rcu(&ep->hlnode); + kfree(ep); + } + rcu_read_unlock(); unregister_netdevice_many(&list); @@ -422,11 +388,6 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, if (err) return -EBUSY; - err = netdev_master_upper_dev_link(slave_dev, rmnet_dev, NULL, NULL, - extack); - if (err) - return -EINVAL; - slave_port = rmnet_get_port(slave_dev); slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE; slave_port->bridge_ep = real_dev; @@ -449,7 +410,6 @@ int rmnet_del_bridge(struct net_device *rmnet_dev, port->rmnet_mode = RMNET_EPMODE_VND; port->bridge_ep = NULL; - netdev_upper_dev_unlink(slave_dev, rmnet_dev); slave_port = rmnet_get_port(slave_dev); rmnet_unregister_real_device(slave_dev, slave_port); -- cgit v1.1 From 4dba8bbce94541c560940ac65ca9cd563fd43348 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Fri, 16 Feb 2018 15:56:38 -0700 Subject: net: qualcomm: rmnet: Fix warning seen with 64 bit stats With CONFIG_DEBUG_PREEMPT enabled, a warning was seen on device creation. This occurs due to the incorrect cpu API usage in ndo_get_stats64 handler. BUG: using smp_processor_id() in preemptible [00000000] code: rmnetcli/5743 caller is debug_smp_processor_id+0x1c/0x24 Call trace: [] dump_backtrace+0x0/0x2a8 [] show_stack+0x20/0x28 [] dump_stack+0xa8/0xe0 [] check_preemption_disabled+0x104/0x108 [] debug_smp_processor_id+0x1c/0x24 [] rmnet_get_stats64+0x64/0x13c [] dev_get_stats+0x68/0xd8 [] rtnl_fill_stats+0x54/0x140 [] rtnl_fill_ifinfo+0x428/0x9cc [] rtmsg_ifinfo_build_skb+0x80/0xf4 [] rtnetlink_event+0x88/0xb4 [] raw_notifier_call_chain+0x58/0x78 [] call_netdevice_notifiers_info+0x48/0x78 [] __netdev_upper_dev_link+0x290/0x5e8 [] netdev_master_upper_dev_link+0x3c/0x48 [] rmnet_newlink+0xf0/0x1c8 [] rtnl_newlink+0x57c/0x6c8 [] rtnetlink_rcv_msg+0xb0/0x244 [] netlink_rcv_skb+0xb4/0xdc [] rtnetlink_rcv+0x34/0x44 [] netlink_unicast+0x1ec/0x294 [] netlink_sendmsg+0x320/0x390 [] sock_sendmsg+0x54/0x60 [] SyS_sendto+0x1a0/0x1e4 [] el0_svc_naked+0x24/0x28 Fixes: 192c4b5d48f2 ("net: qualcomm: rmnet: Add support for 64 bit stats") Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 570a227..346d310 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -121,7 +121,7 @@ static void rmnet_get_stats64(struct net_device *dev, memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats)); for_each_possible_cpu(cpu) { - pcpu_ptr = this_cpu_ptr(priv->pcpu_stats); + pcpu_ptr = per_cpu_ptr(priv->pcpu_stats, cpu); do { start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp); -- cgit v1.1 From f57bbaae7271a47dc6486d489c503faeb248b6d5 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Fri, 16 Feb 2018 15:56:39 -0700 Subject: net: qualcomm: rmnet: Fix possible null dereference in command processing If a command packet with invalid mux id is received, the packet would not have a valid endpoint. This invalid endpoint maybe dereferenced leading to a crash. Identified by manual code inspection. Fixes: 3352e6c45760 ("net: qualcomm: rmnet: Convert the muxed endpoint to hlist") Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c index 6bc328f..b0dbca0 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c @@ -38,6 +38,11 @@ static u8 rmnet_map_do_flow_control(struct sk_buff *skb, } ep = rmnet_get_endpoint(port, mux_id); + if (!ep) { + kfree_skb(skb); + return RX_HANDLER_CONSUMED; + } + vnd = ep->egress_dev; ip_family = cmd->flow_control.ip_family; -- cgit v1.1 From d1c95af366961101819f07e3c64d44f3be7f0367 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sat, 17 Feb 2018 00:30:44 +0100 Subject: mlxsw: spectrum_router: Do not unconditionally clear route offload indication When mlxsw replaces (or deletes) a route it removes the offload indication from the replaced route. This is problematic for IPv4 routes, as the offload indication is stored in the fib_info which is usually shared between multiple routes. Instead of unconditionally clearing the offload indication, only clear it if no other route is using the fib_info. Fixes: 3984d1a89fe7 ("mlxsw: spectrum_router: Provide offload indication using nexthop flags") Signed-off-by: Ido Schimmel Reported-by: Alexander Petrovskiy Tested-by: Alexander Petrovskiy Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index dcc6305..f7948e9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3794,6 +3794,9 @@ mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; int i; + if (!list_is_singular(&nh_grp->fib_list)) + return; + for (i = 0; i < nh_grp->count; i++) { struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; -- cgit v1.1 From 20e6bb17facdc2a078baa12136910bab2c315519 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 15 Jan 2018 17:10:33 +0100 Subject: watchdog: rave-sp: add NVMEM dependency We can build this driver with or without NVMEM, but not built-in when NVMEM is a loadable module: drivers/watchdog/rave-sp-wdt.o: In function `rave_sp_wdt_probe': rave-sp-wdt.c:(.text+0x27c): undefined reference to `nvmem_cell_get' rave-sp-wdt.c:(.text+0x290): undefined reference to `nvmem_cell_read' rave-sp-wdt.c:(.text+0x2c4): undefined reference to `nvmem_cell_put' This adds a Kconfig dependency to enforce that. Fixes: c3bb33345721 ("watchdog: Add RAVE SP watchdog driver") Signed-off-by: Arnd Bergmann Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index aff773b..a16bad7 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -226,6 +226,7 @@ config ZIIRAVE_WATCHDOG config RAVE_SP_WATCHDOG tristate "RAVE SP Watchdog timer" depends on RAVE_SP_CORE + depends on NVMEM || !NVMEM select WATCHDOG_CORE help Support for the watchdog on RAVE SP device. -- cgit v1.1 From 7e2e5158e700f2196dff3b68ac07405575e09c22 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 19 Feb 2018 02:01:05 +0100 Subject: watchdog: i6300esb: fix build failure i6300esb uses fuctions defined in watchdog_core.c, and when CONFIG_WATCHDOG_CORE is not set we have this build error: drivers/watchdog/i6300esb.o: In function `esb_remove': i6300esb.c:(.text+0xcc): undefined reference to `watchdog_unregister_device' drivers/watchdog/i6300esb.o: In function `esb_probe': i6300esb.c:(.text+0x2a1): undefined reference to `watchdog_init_timeout' i6300esb.c:(.text+0x388): undefined reference to `watchdog_register_device' make: *** [Makefile:1029: vmlinux] Error 1 Fix this by selecting CONFIG_WATCHDOG_CORE when I6300ESB_WDT is set. Fixes: 7af4ac8772a8f ("watchdog: i6300esb: use the watchdog subsystem") Signed-off-by: Matteo Croce Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index a16bad7..56f9e20 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1009,6 +1009,7 @@ config WAFER_WDT config I6300ESB_WDT tristate "Intel 6300ESB Timer/Watchdog" depends on PCI + select WATCHDOG_CORE ---help--- Hardware driver for the watchdog timer built into the Intel 6300ESB controller hub. -- cgit v1.1 From 4cd6764495f2b6c2d3dc4fdd339f78764f7995d5 Mon Sep 17 00:00:00 2001 From: Radu Rendec Date: Mon, 19 Feb 2018 14:38:51 +0000 Subject: watchdog: xen_wdt: fix potential build failure xen_wdt uses watchdog core functions (from watchdog_core.c) and, when compiled without CONFIG_WATCHDOG_CORE being set, it produces the following build error: ERROR: "devm_watchdog_register_device" [drivers/watchdog/xen_wdt.ko] undefined! ERROR: "watchdog_init_timeout" [drivers/watchdog/xen_wdt.ko] undefined! Fix this by selecting CONFIG_WATCHDOG_CORE when CONFIG_XEN_WDT is set. Fixes: 18cffd68e0c4 ("watchdog: xen_wdt: use the watchdog subsystem") Signed-off-by: Radu Rendec Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 56f9e20..3d984cd 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1839,6 +1839,7 @@ config WATCHDOG_SUN4V config XEN_WDT tristate "Xen Watchdog support" depends on XEN + select WATCHDOG_CORE help Say Y here to support the hypervisor watchdog capability provided by Xen 4.0 and newer. The watchdog timeout period is normally one -- cgit v1.1 From a17f4f032b61abd998a1f81b206a4517e2e3db2f Mon Sep 17 00:00:00 2001 From: Wim Van Sebroeck Date: Mon, 19 Feb 2018 17:04:33 +0100 Subject: watchdog: sp5100_tco.c: fix potential build failure isp5100_tco.c uses watchdog core functions (from watchdog_core.c) and, when compiled without CONFIG_WATCHDOG_CORE being set, it produces the following build error: ERROR: "devm_watchdog_register_device" [drivers/watchdog/sp5100_tco.ko] undefined! ERROR: "watchdog_init_timeout" [drivers/watchdog/sp5100_tco.ko] undefined! Fix this by selecting CONFIG_WATCHDOG_CORE. Fixes: 7cd9d5fff792 ("watchdog: sp5100_tco: Convert to use watchdog subsystem") Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 3d984cd..37460cd 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -904,6 +904,7 @@ config F71808E_WDT config SP5100_TCO tristate "AMD/ATI SP5100 TCO Timer/Watchdog" depends on X86 && PCI + select WATCHDOG_CORE ---help--- Hardware watchdog driver for the AMD/ATI SP5100 chipset. The TCO (Total Cost of Ownership) timer is a watchdog timer that will reboot -- cgit v1.1 From be68a8aaf925aaf35574260bf820bb09d2f9e07f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 19 Feb 2018 14:41:44 +0000 Subject: arm64: cpufeature: Fix CTR_EL0 field definitions Our field definitions for CTR_EL0 suffer from a number of problems: - The IDC and DIC fields are missing, which causes us to enable CTR trapping on CPUs with either of these returning non-zero values. - The ERG is FTR_LOWER_SAFE, whereas it should be treated like CWG as FTR_HIGHER_SAFE so that applications can use it to avoid false sharing. - [nit] A RES1 field is described as "RAO" This patch updates the CTR_EL0 field definitions to fix these issues. Cc: Cc: Shanker Donthineni Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 29b1f87..2985a06 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -199,9 +199,11 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { }; static const struct arm64_ftr_bits ftr_ctr[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 29, 1, 1), /* DIC */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 28, 1, 1), /* IDC */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 20, 4, 0), /* ERG */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ /* * Linux can handle differing I-cache policies. Userspace JITs will -- cgit v1.1 From 5ee39a71fd89ab7240c5339d04161c44a8e03269 Mon Sep 17 00:00:00 2001 From: Michael Weiser Date: Thu, 1 Feb 2018 23:13:38 +0100 Subject: arm64: Disable unhandled signal log messages by default aarch64 unhandled signal kernel messages are very verbose, suggesting them to be more of a debugging aid: sigsegv[33]: unhandled level 2 translation fault (11) at 0x00000000, esr 0x92000046, in sigsegv[400000+71000] CPU: 1 PID: 33 Comm: sigsegv Tainted: G W 4.15.0-rc3+ #3 Hardware name: linux,dummy-virt (DT) pstate: 60000000 (nZCv daif -PAN -UAO) pc : 0x4003f4 lr : 0x4006bc sp : 0000fffffe94a060 x29: 0000fffffe94a070 x28: 0000000000000000 x27: 0000000000000000 x26: 0000000000000000 x25: 0000000000000000 x24: 00000000004001b0 x23: 0000000000486ac8 x22: 00000000004001c8 x21: 0000000000000000 x20: 0000000000400be8 x19: 0000000000400b30 x18: 0000000000484728 x17: 000000000865ffc8 x16: 000000000000270f x15: 00000000000000b0 x14: 0000000000000002 x13: 0000000000000001 x12: 0000000000000000 x11: 0000000000000000 x10: 0008000020008008 x9 : 000000000000000f x8 : ffffffffffffffff x7 : 0004000000000000 x6 : ffffffffffffffff x5 : 0000000000000000 x4 : 0000000000000000 x3 : 00000000004003e4 x2 : 0000fffffe94a1e8 x1 : 000000000000000a x0 : 0000000000000000 Disable them by default, so they can be enabled using /proc/sys/debug/exception-trace. Cc: Signed-off-by: Michael Weiser Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index bbb0fde..c8639f9 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -57,7 +57,7 @@ static const char *handler[]= { "Error" }; -int show_unhandled_signals = 1; +int show_unhandled_signals = 0; static void dump_backtrace_entry(unsigned long where) { -- cgit v1.1 From 1962682d2b2fbe6cfa995a85c53c069fadda473e Mon Sep 17 00:00:00 2001 From: Michael Weiser Date: Thu, 1 Feb 2018 23:13:36 +0100 Subject: arm64: Remove unimplemented syscall log message Stop printing a (ratelimited) kernel message for each instance of an unimplemented syscall being called. Userland making an unimplemented syscall is not necessarily misbehaviour and to be expected with a current userland running on an older kernel. Also, the current message looks scary to users but does not actually indicate a real problem nor help them narrow down the cause. Just rely on sys_ni_syscall() to return -ENOSYS. Cc: Acked-by: Will Deacon Signed-off-by: Michael Weiser Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index c8639f9..eb2d151 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -526,14 +526,6 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs) } #endif - if (show_unhandled_signals_ratelimited()) { - pr_info("%s[%d]: syscall %d\n", current->comm, - task_pid_nr(current), regs->syscallno); - dump_instr("", regs); - if (user_mode(regs)) - __show_regs(regs); - } - return sys_ni_syscall(); } -- cgit v1.1 From a06f818a70de21b4b3b4186816094208fc7accf9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 19 Feb 2018 16:46:57 +0000 Subject: arm64: __show_regs: Only resolve kernel symbols when running at EL1 __show_regs pretty prints PC and LR by attempting to map them to kernel function names to improve the utility of crash reports. Unfortunately, this mapping is applied even when the pt_regs corresponds to user mode, resulting in a KASLR oracle. Avoid this issue by only looking up the function symbols when the register state indicates that we're actually running at EL1. Cc: Reported-by: NCSC Security Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index ad8aeb0..c0da6ef 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -220,8 +220,15 @@ void __show_regs(struct pt_regs *regs) show_regs_print_info(KERN_DEFAULT); print_pstate(regs); - printk("pc : %pS\n", (void *)regs->pc); - printk("lr : %pS\n", (void *)lr); + + if (!user_mode(regs)) { + printk("pc : %pS\n", (void *)regs->pc); + printk("lr : %pS\n", (void *)lr); + } else { + printk("pc : %016llx\n", regs->pc); + printk("lr : %016llx\n", lr); + } + printk("sp : %016llx\n", sp); i = top_reg; -- cgit v1.1 From cfc2c740533368b96e2be5e0a4e8c3cace7d9814 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 16 Feb 2018 19:36:28 -0800 Subject: netfilter: IDLETIMER: be syzkaller friendly We had one report from syzkaller [1] First issue is that INIT_WORK() should be done before mod_timer() or we risk timer being fired too soon, even with a 1 second timer. Second issue is that we need to reject too big info->timeout to avoid overflows in msecs_to_jiffies(info->timeout * 1000), or risk looping, if result after overflow is 0. [1] WARNING: CPU: 1 PID: 5129 at kernel/workqueue.c:1444 __queue_work+0xdf4/0x1230 kernel/workqueue.c:1444 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 5129 Comm: syzkaller159866 Not tainted 4.16.0-rc1+ #230 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 panic+0x1e4/0x41c kernel/panic.c:183 __warn+0x1dc/0x200 kernel/panic.c:547 report_bug+0x211/0x2d0 lib/bug.c:184 fixup_bug.part.11+0x37/0x80 arch/x86/kernel/traps.c:178 fixup_bug arch/x86/kernel/traps.c:247 [inline] do_error_trap+0x2d7/0x3e0 arch/x86/kernel/traps.c:296 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315 invalid_op+0x22/0x40 arch/x86/entry/entry_64.S:988 RIP: 0010:__queue_work+0xdf4/0x1230 kernel/workqueue.c:1444 RSP: 0018:ffff8801db507538 EFLAGS: 00010006 RAX: ffff8801aeb46080 RBX: ffff8801db530200 RCX: ffffffff81481404 RDX: 0000000000000100 RSI: ffffffff86b42640 RDI: 0000000000000082 RBP: ffff8801db507758 R08: 1ffff1003b6a0de5 R09: 000000000000000c R10: ffff8801db5073f0 R11: 0000000000000020 R12: 1ffff1003b6a0eb6 R13: ffff8801b1067ae0 R14: 00000000000001f8 R15: dffffc0000000000 queue_work_on+0x16a/0x1c0 kernel/workqueue.c:1488 queue_work include/linux/workqueue.h:488 [inline] schedule_work include/linux/workqueue.h:546 [inline] idletimer_tg_expired+0x44/0x60 net/netfilter/xt_IDLETIMER.c:116 call_timer_fn+0x228/0x820 kernel/time/timer.c:1326 expire_timers kernel/time/timer.c:1363 [inline] __run_timers+0x7ee/0xb70 kernel/time/timer.c:1666 run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692 __do_softirq+0x2d7/0xb85 kernel/softirq.c:285 invoke_softirq kernel/softirq.c:365 [inline] irq_exit+0x1cc/0x200 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:541 [inline] smp_apic_timer_interrupt+0x16b/0x700 arch/x86/kernel/apic/apic.c:1052 apic_timer_interrupt+0xa9/0xb0 arch/x86/entry/entry_64.S:829 RIP: 0010:arch_local_irq_restore arch/x86/include/asm/paravirt.h:777 [inline] RIP: 0010:__raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:160 [inline] RIP: 0010:_raw_spin_unlock_irqrestore+0x5e/0xba kernel/locking/spinlock.c:184 RSP: 0018:ffff8801c20173c8 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff12 RAX: dffffc0000000000 RBX: 0000000000000282 RCX: 0000000000000006 RDX: 1ffffffff0d592cd RSI: 1ffff10035d68d23 RDI: 0000000000000282 RBP: ffff8801c20173d8 R08: 1ffff10038402e47 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff8820e5c8 R13: ffff8801b1067ad8 R14: ffff8801aea7c268 R15: ffff8801aea7c278 __debug_object_init+0x235/0x1040 lib/debugobjects.c:378 debug_object_init+0x17/0x20 lib/debugobjects.c:391 __init_work+0x2b/0x60 kernel/workqueue.c:506 idletimer_tg_create net/netfilter/xt_IDLETIMER.c:152 [inline] idletimer_tg_checkentry+0x691/0xb00 net/netfilter/xt_IDLETIMER.c:213 xt_check_target+0x22c/0x7d0 net/netfilter/x_tables.c:850 check_target net/ipv6/netfilter/ip6_tables.c:533 [inline] find_check_entry.isra.7+0x935/0xcf0 net/ipv6/netfilter/ip6_tables.c:575 translate_table+0xf52/0x1690 net/ipv6/netfilter/ip6_tables.c:744 do_replace net/ipv6/netfilter/ip6_tables.c:1160 [inline] do_ip6t_set_ctl+0x370/0x5f0 net/ipv6/netfilter/ip6_tables.c:1686 nf_sockopt net/netfilter/nf_sockopt.c:106 [inline] nf_setsockopt+0x67/0xc0 net/netfilter/nf_sockopt.c:115 ipv6_setsockopt+0x10b/0x130 net/ipv6/ipv6_sockglue.c:927 udpv6_setsockopt+0x45/0x80 net/ipv6/udp.c:1422 sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2976 SYSC_setsockopt net/socket.c:1850 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1829 do_syscall_64+0x282/0x940 arch/x86/entry/common.c:287 Fixes: 0902b469bd25 ("netfilter: xtables: idletimer target implementation") Signed-off-by: Eric Dumazet Reported-by: syzkaller Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_IDLETIMER.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 6c2482b..1ac6600 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -146,11 +146,11 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) timer_setup(&info->timer->timer, idletimer_tg_expired, 0); info->timer->refcnt = 1; + INIT_WORK(&info->timer->work, idletimer_tg_work); + mod_timer(&info->timer->timer, msecs_to_jiffies(info->timeout * 1000) + jiffies); - INIT_WORK(&info->timer->work, idletimer_tg_work); - return 0; out_free_attr: @@ -191,7 +191,10 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) pr_debug("timeout value is zero\n"); return -EINVAL; } - + if (info->timeout >= INT_MAX / 1000) { + pr_debug("timeout value is too big\n"); + return -EINVAL; + } if (info->label[0] == '\0' || strnlen(info->label, MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) { -- cgit v1.1 From 39772f0a7be3b3dc26c74ea13fe7847fd1522c8b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 3 Feb 2018 09:19:30 +1100 Subject: md: only allow remove_and_add_spares when no sync_thread running. The locking protocols in md assume that a device will never be removed from an array during resync/recovery/reshape. When that isn't happening, rcu or reconfig_mutex is needed to protect an rdev pointer while taking a refcount. When it is happening, that protection isn't needed. Unfortunately there are cases were remove_and_add_spares() is called when recovery might be happening: is state_store(), slot_store() and hot_remove_disk(). In each case, this is just an optimization, to try to expedite removal from the personality so the device can be removed from the array. If resync etc is happening, we just have to wait for md_check_recover to find a suitable time to call remove_and_add_spares(). This optimization and not essential so it doesn't matter if it fails. So change remove_and_add_spares() to abort early if resync/recovery/reshape is happening, unless it is called from md_check_recovery() as part of a newly started recovery. The parameter "this" is only NULL when called from md_check_recovery() so when it is NULL, there is no need to abort. As this can result in a NULL dereference, the fix is suitable for -stable. cc: yuyufen Cc: Tomasz Majchrzak Fixes: 8430e7e0af9a ("md: disconnect device from personality before trying to remove it.") Cc: stable@ver.kernel.org (v4.8+) Signed-off-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/md.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 9b73cf1..ba152dd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8595,6 +8595,10 @@ static int remove_and_add_spares(struct mddev *mddev, int removed = 0; bool remove_some = false; + if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) + /* Mustn't remove devices when resync thread is running */ + return 0; + rdev_for_each(rdev, mddev) { if ((this == NULL || rdev == this) && rdev->raid_disk >= 0 && -- cgit v1.1 From 01a69cab01c184d3786af09e9339311123d63d22 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Tue, 6 Feb 2018 17:39:15 +0800 Subject: md raid10: fix NULL deference in handle_write_completed() In the case of 'recover', an r10bio with R10BIO_WriteError & R10BIO_IsRecover will be progressed by handle_write_completed(). This function traverses all r10bio->devs[copies]. If devs[m].repl_bio != NULL, it thinks conf->mirrors[dev].replacement is also not NULL. However, this is not always true. When there is an rdev of raid10 has replacement, then each r10bio ->devs[m].repl_bio != NULL in conf->r10buf_pool. However, in 'recover', even if corresponded replacement is NULL, it doesn't clear r10bio ->devs[m].repl_bio, resulting in replacement NULL deference. This bug was introduced when replacement support for raid10 was added in Linux 3.3. As NeilBrown suggested: Elsewhere the determination of "is this device part of the resync/recovery" is made by resting bio->bi_end_io. If this is end_sync_write, then we tried to write here. If it is NULL, then we didn't try to write. Fixes: 9ad1aefc8ae8 ("md/raid10: Handle replacement devices during resync.") Cc: stable (V3.3+) Suggested-by: NeilBrown Signed-off-by: Yufen Yu Signed-off-by: Shaohua Li --- drivers/md/raid10.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8d7ddc9..9e9441f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2655,7 +2655,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) for (m = 0; m < conf->copies; m++) { int dev = r10_bio->devs[m].devnum; rdev = conf->mirrors[dev].rdev; - if (r10_bio->devs[m].bio == NULL) + if (r10_bio->devs[m].bio == NULL || + r10_bio->devs[m].bio->bi_end_io == NULL) continue; if (!r10_bio->devs[m].bio->bi_status) { rdev_clear_badblocks( @@ -2670,7 +2671,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) md_error(conf->mddev, rdev); } rdev = conf->mirrors[dev].replacement; - if (r10_bio->devs[m].repl_bio == NULL) + if (r10_bio->devs[m].repl_bio == NULL || + r10_bio->devs[m].repl_bio->bi_end_io == NULL) continue; if (!r10_bio->devs[m].repl_bio->bi_status) { -- cgit v1.1 From 506b0a395f26e52b3f18827e0de1be051acb77ab Mon Sep 17 00:00:00 2001 From: Prashant Sreedharan Date: Mon, 19 Feb 2018 12:27:04 +0530 Subject: tg3: APE heartbeat changes In ungraceful host shutdown or driver crash case BMC connectivity is lost. APE firmware is missing the driver state in this case to keep the BMC connectivity alive. This patch has below change to address this issue. Heartbeat mechanism with APE firmware. This heartbeat mechanism is needed to notify the APE firmware about driver state. This patch also has the change in wait time for APE event from 1ms to 20ms as there can be some delay in getting response. v2: Drop inline keyword as per David suggestion. Signed-off-by: Prashant Sreedharan Signed-off-by: Satish Baddipadige Signed-off-by: Siva Reddy Kallam Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 35 ++++++++++++++++++++++++----------- drivers/net/ethernet/broadcom/tg3.h | 5 +++++ 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index a77ee2f..c1841db 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -820,7 +820,7 @@ static int tg3_ape_event_lock(struct tg3 *tp, u32 timeout_us) tg3_ape_unlock(tp, TG3_APE_LOCK_MEM); - udelay(10); + usleep_range(10, 20); timeout_us -= (timeout_us > 10) ? 10 : timeout_us; } @@ -922,8 +922,8 @@ static int tg3_ape_send_event(struct tg3 *tp, u32 event) if (!(apedata & APE_FW_STATUS_READY)) return -EAGAIN; - /* Wait for up to 1 millisecond for APE to service previous event. */ - err = tg3_ape_event_lock(tp, 1000); + /* Wait for up to 20 millisecond for APE to service previous event. */ + err = tg3_ape_event_lock(tp, 20000); if (err) return err; @@ -946,6 +946,7 @@ static void tg3_ape_driver_state_change(struct tg3 *tp, int kind) switch (kind) { case RESET_KIND_INIT: + tg3_ape_write32(tp, TG3_APE_HOST_HEARTBEAT_COUNT, tp->ape_hb++); tg3_ape_write32(tp, TG3_APE_HOST_SEG_SIG, APE_HOST_SEG_SIG_MAGIC); tg3_ape_write32(tp, TG3_APE_HOST_SEG_LEN, @@ -962,13 +963,6 @@ static void tg3_ape_driver_state_change(struct tg3 *tp, int kind) event = APE_EVENT_STATUS_STATE_START; break; case RESET_KIND_SHUTDOWN: - /* With the interface we are currently using, - * APE does not track driver state. Wiping - * out the HOST SEGMENT SIGNATURE forces - * the APE to assume OS absent status. - */ - tg3_ape_write32(tp, TG3_APE_HOST_SEG_SIG, 0x0); - if (device_may_wakeup(&tp->pdev->dev) && tg3_flag(tp, WOL_ENABLE)) { tg3_ape_write32(tp, TG3_APE_HOST_WOL_SPEED, @@ -990,6 +984,18 @@ static void tg3_ape_driver_state_change(struct tg3 *tp, int kind) tg3_ape_send_event(tp, event); } +static void tg3_send_ape_heartbeat(struct tg3 *tp, + unsigned long interval) +{ + /* Check if hb interval has exceeded */ + if (!tg3_flag(tp, ENABLE_APE) || + time_before(jiffies, tp->ape_hb_jiffies + interval)) + return; + + tg3_ape_write32(tp, TG3_APE_HOST_HEARTBEAT_COUNT, tp->ape_hb++); + tp->ape_hb_jiffies = jiffies; +} + static void tg3_disable_ints(struct tg3 *tp) { int i; @@ -7262,6 +7268,7 @@ static int tg3_poll_msix(struct napi_struct *napi, int budget) } } + tg3_send_ape_heartbeat(tp, TG3_APE_HB_INTERVAL << 1); return work_done; tx_recovery: @@ -7344,6 +7351,7 @@ static int tg3_poll(struct napi_struct *napi, int budget) } } + tg3_send_ape_heartbeat(tp, TG3_APE_HB_INTERVAL << 1); return work_done; tx_recovery: @@ -10732,7 +10740,7 @@ static int tg3_reset_hw(struct tg3 *tp, bool reset_phy) if (tg3_flag(tp, ENABLE_APE)) /* Write our heartbeat update interval to APE. */ tg3_ape_write32(tp, TG3_APE_HOST_HEARTBEAT_INT_MS, - APE_HOST_HEARTBEAT_INT_DISABLE); + APE_HOST_HEARTBEAT_INT_5SEC); tg3_write_sig_post_reset(tp, RESET_KIND_INIT); @@ -11077,6 +11085,9 @@ static void tg3_timer(struct timer_list *t) tp->asf_counter = tp->asf_multiplier; } + /* Update the APE heartbeat every 5 seconds.*/ + tg3_send_ape_heartbeat(tp, TG3_APE_HB_INTERVAL); + spin_unlock(&tp->lock); restart_timer: @@ -16653,6 +16664,8 @@ static int tg3_get_invariants(struct tg3 *tp, const struct pci_device_id *ent) pci_state_reg); tg3_ape_lock_init(tp); + tp->ape_hb_interval = + msecs_to_jiffies(APE_HOST_HEARTBEAT_INT_5SEC); } /* Set up tp->grc_local_ctrl before calling diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index 47f51cc..1d61aa3 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -2508,6 +2508,7 @@ #define TG3_APE_LOCK_PHY3 5 #define TG3_APE_LOCK_GPIO 7 +#define TG3_APE_HB_INTERVAL (tp->ape_hb_interval) #define TG3_EEPROM_SB_F1R2_MBA_OFF 0x10 @@ -3423,6 +3424,10 @@ struct tg3 { struct device *hwmon_dev; bool link_up; bool pcierr_recovery; + + u32 ape_hb; + unsigned long ape_hb_interval; + unsigned long ape_hb_jiffies; }; /* Accessor macros for chip and asic attributes -- cgit v1.1 From a988681dbbca01c64d86455c0153899870d7a63c Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Sun, 18 Feb 2018 21:11:25 +0100 Subject: MAINTAINERS: Remove Richard Purdie from LED maintainers Richard has been inactive on the linux-leds list for a long time. After email discussion we agreed on removing him from the LED maintainers, which will better reflect the actual status. Acked-by: Richard Purdie Signed-off-by: Jacek Anaszewski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9a7f76e..93a12af 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7909,7 +7909,6 @@ S: Maintained F: scripts/leaking_addresses.pl LED SUBSYSTEM -M: Richard Purdie M: Jacek Anaszewski M: Pavel Machek L: linux-leds@vger.kernel.org -- cgit v1.1 From a588a8bb7b25a3fb4f7fed00feb7aec541fc2632 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 Jan 2018 18:01:21 +0100 Subject: drm/exynos: g2d: use monotonic timestamps The exynos DRM driver uses real-time 'struct timeval' values for exporting its timestamps to user space. This has multiple problems: 1. signed seconds overflow in y2038 2. the 'struct timeval' definition is deprecated in the kernel 3. time may jump or go backwards after a 'settimeofday()' syscall 4. other DRM timestamps are in CLOCK_MONOTONIC domain, so they can't be compared 5. exporting microseconds requires a division by 1000, which may be slow on some architectures. The code existed in two places before, but the IPP portion was removed in 8ded59413ccc ("drm/exynos: ipp: Remove Exynos DRM IPP subsystem"), so we no longer need to worry about it. Ideally timestamps should just use 64-bit nanoseconds instead, but of course we can't change that now. Instead, this tries to address the first four points above by using monotonic 'timespec' values. According to Tobias Jakobi, user space doesn't care about the timestamp at the moment, so we can change the format. Even if there is something looking at them, it will work just fine with monotonic times as long as the application only looks at the relative values between two events. Link: https://patchwork.kernel.org/patch/10038593/ Cc: Tobias Jakobi Signed-off-by: Arnd Bergmann Reviewed-by: Tobias Jakobi Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_g2d.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 2b8bf2d..9effe40 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -926,7 +926,7 @@ static void g2d_finish_event(struct g2d_data *g2d, u32 cmdlist_no) struct drm_device *drm_dev = g2d->subdrv.drm_dev; struct g2d_runqueue_node *runqueue_node = g2d->runqueue_node; struct drm_exynos_pending_g2d_event *e; - struct timeval now; + struct timespec64 now; if (list_empty(&runqueue_node->event_list)) return; @@ -934,9 +934,9 @@ static void g2d_finish_event(struct g2d_data *g2d, u32 cmdlist_no) e = list_first_entry(&runqueue_node->event_list, struct drm_exynos_pending_g2d_event, base.link); - do_gettimeofday(&now); + ktime_get_ts64(&now); e->event.tv_sec = now.tv_sec; - e->event.tv_usec = now.tv_usec; + e->event.tv_usec = now.tv_nsec / NSEC_PER_USEC; e->event.cmdlist_no = cmdlist_no; drm_send_event(drm_dev, &e->base); -- cgit v1.1 From 1293b6191010672c0c9dacae8f71c6f3e4d70cbe Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 5 Feb 2018 21:09:59 +0100 Subject: drm/exynos: fix comparison to bitshift when dealing with a mask Due to a typo, the mask was destroyed by a comparison instead of a bit shift. Signed-off-by: Wolfram Sang Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/regs-fimc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/regs-fimc.h b/drivers/gpu/drm/exynos/regs-fimc.h index 3049613..d7cbe53 100644 --- a/drivers/gpu/drm/exynos/regs-fimc.h +++ b/drivers/gpu/drm/exynos/regs-fimc.h @@ -569,7 +569,7 @@ #define EXYNOS_CIIMGEFF_FIN_EMBOSSING (4 << 26) #define EXYNOS_CIIMGEFF_FIN_SILHOUETTE (5 << 26) #define EXYNOS_CIIMGEFF_FIN_MASK (7 << 26) -#define EXYNOS_CIIMGEFF_PAT_CBCR_MASK ((0xff < 13) | (0xff < 0)) +#define EXYNOS_CIIMGEFF_PAT_CBCR_MASK ((0xff << 13) | (0xff << 0)) /* Real input DMA size register */ #define EXYNOS_CIREAL_ISIZE_AUTOLOAD_ENABLE (1 << 31) -- cgit v1.1 From 6f0a60298bbbea43ab5e3955913ab19c153076f3 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 8 Feb 2018 18:42:51 +0100 Subject: drm/exynos: g2d: Delete an error message for a failed memory allocation in two functions Omit an extra message for a memory allocation failure in these functions. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_g2d.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 9effe40..f68ef1b 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -286,7 +286,6 @@ static int g2d_init_cmdlist(struct g2d_data *g2d) node = kcalloc(G2D_CMDLIST_NUM, sizeof(*node), GFP_KERNEL); if (!node) { - dev_err(dev, "failed to allocate memory\n"); ret = -ENOMEM; goto err; } @@ -1358,10 +1357,9 @@ int exynos_g2d_exec_ioctl(struct drm_device *drm_dev, void *data, return -EFAULT; runqueue_node = kmem_cache_alloc(g2d->runqueue_slab, GFP_KERNEL); - if (!runqueue_node) { - dev_err(dev, "failed to allocate memory\n"); + if (!runqueue_node) return -ENOMEM; - } + run_cmdlist = &runqueue_node->run_cmdlist; event_list = &runqueue_node->event_list; INIT_LIST_HEAD(run_cmdlist); -- cgit v1.1 From b701a1436a5b177dc2240ba7e8f2ff7106bc8d84 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 15 Feb 2018 08:23:15 +0000 Subject: drm/exynos: remove exynos_drm_rotator.h Since its inclusion in 2012 via commit bea8a429d91a ("drm/exynos: add rotator ipp driver") this header is not used by any source files and is empty. Lets just remove it. Signed-off-by: Corentin Labbe Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_rotator.h | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 drivers/gpu/drm/exynos/exynos_drm_rotator.h diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.h b/drivers/gpu/drm/exynos/exynos_drm_rotator.h deleted file mode 100644 index 71a0b4c..0000000 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * - * Authors: - * YoungJun Cho - * Eunchul Kim - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef _EXYNOS_DRM_ROTATOR_H_ -#define _EXYNOS_DRM_ROTATOR_H_ - -/* TODO */ - -#endif -- cgit v1.1 From c84b66f8aa3f879dbf41353f677d87875f5fc6c9 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Wed, 14 Feb 2018 18:23:56 +0100 Subject: drm: exynos: Use proper macro definition for HDMI_I2S_PIN_SEL_1 Bit field [2:0] of HDMI_I2S_PIN_SEL_1 corresponds to SDATA_0, not SDATA_2. This patch removes redefinition of HDMI_I2S_SEL_DATA2 constant and adds missing HDMI_I2S_SEL_DATA0. The value of bit field selecting SDATA_1 (pin_sel_3) is also changed, so it is 3 as suggested in the Exynos TRMs. Signed-off-by: Sylwester Nawrocki Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_hdmi.c | 7 +++++-- drivers/gpu/drm/exynos/regs-hdmi.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index a4b75a4..abd84cb 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -1068,10 +1068,13 @@ static void hdmi_audio_config(struct hdmi_context *hdata) /* Configuration I2S input ports. Configure I2S_PIN_SEL_0~4 */ hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_0, HDMI_I2S_SEL_SCLK(5) | HDMI_I2S_SEL_LRCK(6)); - hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_1, HDMI_I2S_SEL_SDATA1(1) - | HDMI_I2S_SEL_SDATA2(4)); + + hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_1, HDMI_I2S_SEL_SDATA1(3) + | HDMI_I2S_SEL_SDATA0(4)); + hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_2, HDMI_I2S_SEL_SDATA3(1) | HDMI_I2S_SEL_SDATA2(2)); + hdmi_reg_writeb(hdata, HDMI_I2S_PIN_SEL_3, HDMI_I2S_SEL_DSD(0)); /* I2S_CON_1 & 2 */ diff --git a/drivers/gpu/drm/exynos/regs-hdmi.h b/drivers/gpu/drm/exynos/regs-hdmi.h index 04be0f7e..4420c20 100644 --- a/drivers/gpu/drm/exynos/regs-hdmi.h +++ b/drivers/gpu/drm/exynos/regs-hdmi.h @@ -464,7 +464,7 @@ /* I2S_PIN_SEL_1 */ #define HDMI_I2S_SEL_SDATA1(x) (((x) & 0x7) << 4) -#define HDMI_I2S_SEL_SDATA2(x) ((x) & 0x7) +#define HDMI_I2S_SEL_SDATA0(x) ((x) & 0x7) /* I2S_PIN_SEL_2 */ #define HDMI_I2S_SEL_SDATA3(x) (((x) & 0x7) << 4) -- cgit v1.1 From 894266466aa74a226e58e23975118ff6231dd2e4 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Tue, 20 Feb 2018 08:39:51 +0100 Subject: x86/headers/UAPI: Use __u64 instead of u64 in ... since u64 has a hidden header dependency that was not there before using it (i.e. it breaks our VMM build). Also, __u64 is the right way to expose data types through UAPI. Signed-off-by: KarimAllah Ahmed Acked-by: Thomas Gleixner Cc: Haiyang Zhang Cc: K. Y. Srinivasan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: devel@linuxdriverproject.org Fixes: 93286261 ("x86/hyperv: Reenlightenment notifications support") Link: http://lkml.kernel.org/r/1519112391-23773-1-git-send-email-karahmed@amazon.de Signed-off-by: Ingo Molnar --- arch/x86/include/uapi/asm/hyperv.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 197c2e6..0994143 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -241,24 +241,24 @@ #define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 struct hv_reenlightenment_control { - u64 vector:8; - u64 reserved1:8; - u64 enabled:1; - u64 reserved2:15; - u64 target_vp:32; + __u64 vector:8; + __u64 reserved1:8; + __u64 enabled:1; + __u64 reserved2:15; + __u64 target_vp:32; }; #define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 #define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 struct hv_tsc_emulation_control { - u64 enabled:1; - u64 reserved:63; + __u64 enabled:1; + __u64 reserved:63; }; struct hv_tsc_emulation_status { - u64 inprogress:1; - u64 reserved:63; + __u64 inprogress:1; + __u64 reserved:63; }; #define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 -- cgit v1.1 From f8f4aa68a8ae98ed79c8fee3488c38a2f5d2de8c Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 18 Feb 2018 11:05:15 +0200 Subject: mei: me: add cannon point device ids Add CNP LP and CNP H device ids for cannon lake and coffee lake platforms. Cc: 4.14+ Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 3 +++ drivers/misc/mei/pci-me.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 0ccccba..bda3bd8 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -132,6 +132,9 @@ #define MEI_DEV_ID_KBP 0xA2BA /* Kaby Point */ #define MEI_DEV_ID_KBP_2 0xA2BB /* Kaby Point 2 */ +#define MEI_DEV_ID_CNP_LP 0x9DE0 /* Cannon Point LP */ +#define MEI_DEV_ID_CNP_H 0xA360 /* Cannon Point H */ + /* * MEI HW Section */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 4a0ccda..f915000 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -98,6 +98,9 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_KBP, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH8_CFG)}, + /* required last entry */ {0, } }; -- cgit v1.1 From 2a4ac172c2f257d28c47b90c9e381bec31edcc44 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Sun, 18 Feb 2018 11:05:16 +0200 Subject: mei: me: add cannon point device ids for 4th device Add cannon point device ids for 4th (itouch) device. Cc: 4.14+ Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 2 ++ drivers/misc/mei/pci-me.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index bda3bd8..e4b10b2 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -133,7 +133,9 @@ #define MEI_DEV_ID_KBP_2 0xA2BB /* Kaby Point 2 */ #define MEI_DEV_ID_CNP_LP 0x9DE0 /* Cannon Point LP */ +#define MEI_DEV_ID_CNP_LP_4 0x9DE4 /* Cannon Point LP 4 (iTouch) */ #define MEI_DEV_ID_CNP_H 0xA360 /* Cannon Point H */ +#define MEI_DEV_ID_CNP_H_4 0xA364 /* Cannon Point H 4 (iTouch) */ /* * MEI HW Section diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index f915000..ea4e152 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -99,7 +99,9 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP_4, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_4, MEI_ME_PCH8_CFG)}, /* required last entry */ {0, } -- cgit v1.1 From b8ff1802815913aad52695898cccbc9f77b7e726 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 19 Feb 2018 11:35:43 +0000 Subject: drm: Handle unexpected holes in color-eviction During eviction, the driver may free more than one hole in the drm_mm due to the side-effects in evicting the scanned nodes. However, drm_mm_scan_color_evict() expects that the scan result is the first available hole (in the mru freed hole_stack list): kernel BUG at drivers/gpu/drm/drm_mm.c:844! invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: i915 snd_hda_codec_analog snd_hda_codec_generic coretemp snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core lpc_ich snd_pcm e1000e mei_me prime_numbers mei CPU: 1 PID: 1490 Comm: gem_userptr_bli Tainted: G U 4.16.0-rc1-g740f57c54ecf-kasan_6+ #1 Hardware name: Dell Inc. OptiPlex 755 /0PU052, BIOS A08 02/19/2008 RIP: 0010:drm_mm_scan_color_evict+0x2b8/0x3d0 RSP: 0018:ffff880057a573f8 EFLAGS: 00010287 RAX: ffff8800611f5980 RBX: ffff880057a575d0 RCX: dffffc0000000000 RDX: 00000000029d5000 RSI: 1ffff1000af4aec1 RDI: ffff8800611f5a10 RBP: ffff88005ab884d0 R08: ffff880057a57600 R09: 000000000afff000 R10: 1ffff1000b5710b5 R11: 0000000000001000 R12: 1ffff1000af4ae82 R13: ffff8800611f59b0 R14: ffff8800611f5980 R15: ffff880057a57608 FS: 00007f2de0c2e8c0(0000) GS:ffff88006ac40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f2ddde1e000 CR3: 00000000609b2000 CR4: 00000000000006e0 Call Trace: ? drm_mm_scan_remove_block+0x330/0x330 ? drm_mm_scan_remove_block+0x151/0x330 i915_gem_evict_something+0x711/0xbd0 [i915] ? igt_evict_contexts+0x50/0x50 [i915] ? nop_clear_range+0x10/0x10 [i915] ? igt_evict_something+0x90/0x90 [i915] ? i915_gem_gtt_reserve+0x1a1/0x320 [i915] i915_gem_gtt_insert+0x237/0x400 [i915] __i915_vma_do_pin+0xc25/0x1a20 [i915] eb_lookup_vmas+0x1c63/0x3790 [i915] ? i915_gem_check_execbuffer+0x250/0x250 [i915] ? trace_hardirqs_on_caller+0x33f/0x590 ? _raw_spin_unlock_irqrestore+0x39/0x60 ? __pm_runtime_resume+0x7d/0xf0 i915_gem_do_execbuffer+0x86a/0x2ff0 [i915] ? __kmalloc+0x132/0x340 ? i915_gem_execbuffer2_ioctl+0x10f/0x760 [i915] ? drm_ioctl_kernel+0x12e/0x1c0 ? drm_ioctl+0x662/0x980 ? eb_relocate_slow+0xa90/0xa90 [i915] ? i915_gem_execbuffer2_ioctl+0x10f/0x760 [i915] ? __might_fault+0xea/0x1a0 i915_gem_execbuffer2_ioctl+0x3cc/0x760 [i915] ? i915_gem_execbuffer_ioctl+0xba0/0xba0 [i915] ? lock_acquire+0x3c0/0x3c0 ? i915_gem_execbuffer_ioctl+0xba0/0xba0 [i915] drm_ioctl_kernel+0x12e/0x1c0 drm_ioctl+0x662/0x980 ? i915_gem_execbuffer_ioctl+0xba0/0xba0 [i915] ? drm_getstats+0x20/0x20 ? debug_check_no_obj_freed+0x2a6/0x8c0 do_vfs_ioctl+0x170/0xe70 ? ioctl_preallocate+0x170/0x170 ? task_work_run+0xbe/0x160 ? lock_acquire+0x3c0/0x3c0 ? trace_hardirqs_on_caller+0x33f/0x590 ? _raw_spin_unlock_irq+0x2f/0x50 SyS_ioctl+0x36/0x70 ? do_vfs_ioctl+0xe70/0xe70 do_syscall_64+0x18c/0x5d0 entry_SYSCALL_64_after_hwframe+0x26/0x9b RIP: 0033:0x7f2ddf13b587 RSP: 002b:00007fff15c4f9d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f2ddf13b587 RDX: 00007fff15c4fa20 RSI: 0000000040406469 RDI: 0000000000000003 RBP: 00007fff15c4fa20 R08: 0000000000000000 R09: 00007f2ddf3fe120 R10: 0000000000000073 R11: 0000000000000246 R12: 0000000040406469 R13: 0000000000000003 R14: 00007fff15c4fa20 R15: 00000000000000c7 Code: 00 00 00 4a c7 44 22 08 00 00 00 00 42 c7 44 22 10 00 00 00 00 48 81 c4 b8 00 00 00 5b 5d 41 5c 41 5d 41 5e 41 5f c3 0f 0b 0f 0b <0f> 0b 31 c0 eb c0 4c 89 ef e8 9a 09 41 ff e9 1e fe ff ff 4c 89 RIP: drm_mm_scan_color_evict+0x2b8/0x3d0 RSP: ffff880057a573f8 We can trivially relax this assumption by searching the hole_stack for the scan result and warn instead if the driver called us without any result. Fixes: 3fa489dabea9 ("drm: Apply tight eviction scanning to color_adjust") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: # v4.11+ Reviewed-by: Joonas Lahtinen Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180219113543.8010-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/drm_mm.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index c3c79ee..edab571 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -836,9 +836,24 @@ struct drm_mm_node *drm_mm_scan_color_evict(struct drm_mm_scan *scan) if (!mm->color_adjust) return NULL; - hole = list_first_entry(&mm->hole_stack, typeof(*hole), hole_stack); - hole_start = __drm_mm_hole_node_start(hole); - hole_end = hole_start + hole->hole_size; + /* + * The hole found during scanning should ideally be the first element + * in the hole_stack list, but due to side-effects in the driver it + * may not be. + */ + list_for_each_entry(hole, &mm->hole_stack, hole_stack) { + hole_start = __drm_mm_hole_node_start(hole); + hole_end = hole_start + hole->hole_size; + + if (hole_start <= scan->hit_start && + hole_end >= scan->hit_end) + break; + } + + /* We should only be called after we found the hole previously */ + DRM_MM_BUG_ON(&hole->hole_stack == &mm->hole_stack); + if (unlikely(&hole->hole_stack == &mm->hole_stack)) + return NULL; DRM_MM_BUG_ON(hole_start > scan->hit_start); DRM_MM_BUG_ON(hole_end < scan->hit_end); -- cgit v1.1 From 842cef9113c2120f74f645111ded1e020193d84c Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 19 Feb 2018 07:48:11 -0700 Subject: x86/mm: Fix {pmd,pud}_{set,clear}_flags() Just like pte_{set,clear}_flags() their PMD and PUD counterparts should not do any address translation. This was outright wrong under Xen (causing a dead boot with no useful output on "suitable" systems), and produced needlessly more complicated code (even if just slightly) when paravirt was enabled. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/5A8AF1BB02000078001A91C3@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 8 ++++---- arch/x86/include/asm/pgtable_types.h | 10 ++++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 63c2552..b444d83 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -350,14 +350,14 @@ static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) { pmdval_t v = native_pmd_val(pmd); - return __pmd(v | set); + return native_make_pmd(v | set); } static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) { pmdval_t v = native_pmd_val(pmd); - return __pmd(v & ~clear); + return native_make_pmd(v & ~clear); } static inline pmd_t pmd_mkold(pmd_t pmd) @@ -409,14 +409,14 @@ static inline pud_t pud_set_flags(pud_t pud, pudval_t set) { pudval_t v = native_pud_val(pud); - return __pud(v | set); + return native_make_pud(v | set); } static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) { pudval_t v = native_pud_val(pud); - return __pud(v & ~clear); + return native_make_pud(v & ~clear); } static inline pud_t pud_mkold(pud_t pud) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 3696398..246f15b 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -323,6 +323,11 @@ static inline pudval_t native_pud_val(pud_t pud) #else #include +static inline pud_t native_make_pud(pudval_t val) +{ + return (pud_t) { .p4d.pgd = native_make_pgd(val) }; +} + static inline pudval_t native_pud_val(pud_t pud) { return native_pgd_val(pud.p4d.pgd); @@ -344,6 +349,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) #else #include +static inline pmd_t native_make_pmd(pmdval_t val) +{ + return (pmd_t) { .pud.p4d.pgd = native_make_pgd(val) }; +} + static inline pmdval_t native_pmd_val(pmd_t pmd) { return native_pgd_val(pmd.pud.p4d.pgd); -- cgit v1.1 From 3b3a9268bba62b35a29bafe0931715b1725fdf26 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 19 Feb 2018 18:50:39 +0100 Subject: x86/mm: Remove stale comment about KMEMCHECK This comment referred to a conditional call to kmemcheck_hide() that was here until commit 4950276672fc ("kmemcheck: remove annotations"). Now that kmemcheck has been removed, it doesn't make sense anymore. Signed-off-by: Jann Horn Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180219175039.253089-1-jannh@google.com Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 800de81..c88573d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1248,10 +1248,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, tsk = current; mm = tsk->mm; - /* - * Detect and handle instructions that would cause a page fault for - * both a tracked kernel page and a userspace page. - */ prefetchw(&mm->mmap_sem); if (unlikely(kmmio_fault(regs, address))) -- cgit v1.1 From 700b7c5409c3e9da279fbea78cf28a78fbc176cd Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 19 Feb 2018 07:49:12 -0700 Subject: x86/asm: Improve how GEN_*_SUFFIXED_RMWcc() specify clobbers Commit: df3405245a ("x86/asm: Add suffix macro for GEN_*_RMWcc()") ... introduced "suffix" RMWcc operations, adding bogus clobber specifiers: For one, on x86 there's no point explicitly clobbering "cc". In fact, with GCC properly fixed, this results in an overlap being detected by the compiler between outputs and clobbers. Furthermore it seems bad practice to me to have clobber specification and use of the clobbered register(s) disconnected - it should rather be at the invocation place of that GEN_{UN,BIN}ARY_SUFFIXED_RMWcc() macros that the clobber is specified which this particular invocation needs. Drop the "cc" clobber altogether and move the "cx" one to refcount.h. Signed-off-by: Jan Beulich Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5A8AF1F802000078001A91E1@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/refcount.h | 4 ++-- arch/x86/include/asm/rmwcc.h | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h index 4e44250..d651711 100644 --- a/arch/x86/include/asm/refcount.h +++ b/arch/x86/include/asm/refcount.h @@ -67,13 +67,13 @@ static __always_inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r) { GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, "er", i, "%0", e); + r->refs.counter, "er", i, "%0", e, "cx"); } static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) { GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, "%0", e); + r->refs.counter, "%0", e, "cx"); } static __always_inline __must_check diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index f91c365..4914a3e 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -2,8 +2,7 @@ #ifndef _ASM_X86_RMWcc #define _ASM_X86_RMWcc -#define __CLOBBERS_MEM "memory" -#define __CLOBBERS_MEM_CC_CX "memory", "cc", "cx" +#define __CLOBBERS_MEM(clb...) "memory", ## clb #if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO) @@ -40,18 +39,19 @@ do { \ #endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ - __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM) + __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM()) -#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc) \ +#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\ __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \ - __CLOBBERS_MEM_CC_CX) + __CLOBBERS_MEM(clobbers)) #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \ - __CLOBBERS_MEM, vcon (val)) + __CLOBBERS_MEM(), vcon (val)) -#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc) \ +#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc, \ + clobbers...) \ __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \ - __CLOBBERS_MEM_CC_CX, vcon (val)) + __CLOBBERS_MEM(clobbers), vcon (val)) #endif /* _ASM_X86_RMWcc */ -- cgit v1.1 From 6262b6e78ce5ba62be47774ca80f5b0a6f0eb428 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 19 Feb 2018 07:50:23 -0700 Subject: x86/IO-APIC: Avoid warning in 32-bit builds Constants wider than 32 bits should be tagged with ULL. Signed-off-by: Jan Beulich Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5A8AF23F02000078001A91E5@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8ad2e41..7c55387 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1603,7 +1603,7 @@ static void __init delay_with_tsc(void) do { rep_nop(); now = rdtsc(); - } while ((now - start) < 40000000000UL / HZ && + } while ((now - start) < 40000000000ULL / HZ && time_before_eq(jiffies, end)); } -- cgit v1.1 From f2f18b16c779978ece4a04f304a92ff9ac8fbce5 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 19 Feb 2018 07:52:10 -0700 Subject: x86/LDT: Avoid warning in 32-bit builds with older gcc BUG() doesn't always imply "no return", and hence should be followed by a return statement even if that's obviously (to a human) unreachable. Signed-off-by: Jan Beulich Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5A8AF2AA02000078001A91E9@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu_context.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index c931b88..1de72ce 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -74,6 +74,7 @@ static inline void *ldt_slot_va(int slot) return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); #else BUG(); + return (void *)fix_to_virt(FIX_HOLE); #endif } -- cgit v1.1 From 8554004a0231dedf44d4d62147fb3d6a6db489aa Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 19 Feb 2018 08:06:14 -0700 Subject: x86-64/realmode: Add instruction suffix Omitting suffixes from instructions in AT&T mode is bad practice when operand size cannot be determined by the assembler from register operands, and is likely going to be warned about by upstream GAS in the future (mine does already). Add the single missing suffix here. Signed-off-by: Jan Beulich Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5A8AF5F602000078001A9230@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/realmode/rm/trampoline_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index de53bd1..24bb759 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -102,7 +102,7 @@ ENTRY(startup_32) * don't we'll eventually crash trying to execute encrypted * instructions. */ - bt $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags + btl $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags jnc .Ldone movl $MSR_K8_SYSCFG, %ecx rdmsr -- cgit v1.1 From d1c99108af3c5992640aa2afa7d2e88c3775c06e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 19 Feb 2018 10:50:56 +0000 Subject: Revert "x86/retpoline: Simplify vmexit_fill_RSB()" This reverts commit 1dde7415e99933bb7293d6b2843752cbdb43ec11. By putting the RSB filling out of line and calling it, we waste one RSB slot for returning from the function itself, which means one fewer actual function call we can make if we're doing the Skylake abomination of call-depth counting. It also changed the number of RSB stuffings we do on vmexit from 32, which was correct, to 16. Let's just stop with the bikeshedding; it didn't actually *fix* anything anyway. Signed-off-by: David Woodhouse Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: arjan.van.de.ven@intel.com Cc: bp@alien8.de Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Link: http://lkml.kernel.org/r/1519037457-7643-4-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 3 +- arch/x86/entry/entry_64.S | 3 +- arch/x86/include/asm/asm-prototypes.h | 3 -- arch/x86/include/asm/nospec-branch.h | 70 +++++++++++++++++++++++++++++++---- arch/x86/lib/Makefile | 1 - arch/x86/lib/retpoline.S | 56 ---------------------------- 6 files changed, 65 insertions(+), 71 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 16c2c02..6ad064c 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -252,8 +252,7 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - /* Clobbers %ebx */ - FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 77edc23..7a53879 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -364,8 +364,7 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - /* Clobbers %rbx */ - FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 4d11161..1908214 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -38,7 +38,4 @@ INDIRECT_THUNK(dx) INDIRECT_THUNK(si) INDIRECT_THUNK(di) INDIRECT_THUNK(bp) -asmlinkage void __fill_rsb(void); -asmlinkage void __clear_rsb(void); - #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 76b0585..af34b1e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -8,6 +8,50 @@ #include #include +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; lfence; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * We define a CPP macro such that it can be used from both .S files and + * inline assembly. It's possible to do a .macro and then include that + * from C via asm(".include ") but let's not go there. + */ + +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ + +/* + * Google experimented with loop-unrolling and this turned out to be + * the optimal version — two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +#define __FILL_RETURN_BUFFER(reg, nr, sp) \ + mov $(nr/2), reg; \ +771: \ + call 772f; \ +773: /* speculation trap */ \ + pause; \ + lfence; \ + jmp 773b; \ +772: \ + call 774f; \ +775: /* speculation trap */ \ + pause; \ + lfence; \ + jmp 775b; \ +774: \ + dec reg; \ + jnz 771b; \ + add $(BITS_PER_LONG/8) * nr, sp; + #ifdef __ASSEMBLY__ /* @@ -78,10 +122,17 @@ #endif .endm -/* This clobbers the BX register */ -.macro FILL_RETURN_BUFFER nr:req ftr:req + /* + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP + * monstrosity above, manually. + */ +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req #ifdef CONFIG_RETPOLINE - ALTERNATIVE "", "call __clear_rsb", \ftr + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE "jmp .Lskip_rsb_\@", \ + __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ + \ftr +.Lskip_rsb_\@: #endif .endm @@ -156,10 +207,15 @@ extern char __indirect_thunk_end[]; static inline void vmexit_fill_RSB(void) { #ifdef CONFIG_RETPOLINE - alternative_input("", - "call __fill_rsb", - X86_FEATURE_RETPOLINE, - ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); + unsigned long loops; + + asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE("jmp 910f", + __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), + X86_FEATURE_RETPOLINE) + "910:" + : "=r" (loops), ASM_CALL_CONSTRAINT + : : "memory" ); #endif } diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 91e9700..25a972c 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_RETPOLINE) += retpoline.o -OBJECT_FILES_NON_STANDARD_retpoline.o :=y obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 480edc3..c909961 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -7,7 +7,6 @@ #include #include #include -#include .macro THUNK reg .section .text.__x86.indirect_thunk @@ -47,58 +46,3 @@ GENERATE_THUNK(r13) GENERATE_THUNK(r14) GENERATE_THUNK(r15) #endif - -/* - * Fill the CPU return stack buffer. - * - * Each entry in the RSB, if used for a speculative 'ret', contains an - * infinite 'pause; lfence; jmp' loop to capture speculative execution. - * - * This is required in various cases for retpoline and IBRS-based - * mitigations for the Spectre variant 2 vulnerability. Sometimes to - * eliminate potentially bogus entries from the RSB, and sometimes - * purely to ensure that it doesn't get empty, which on some CPUs would - * allow predictions from other (unwanted!) sources to be used. - * - * Google experimented with loop-unrolling and this turned out to be - * the optimal version - two calls, each with their own speculation - * trap should their return address end up getting used, in a loop. - */ -.macro STUFF_RSB nr:req sp:req - mov $(\nr / 2), %_ASM_BX - .align 16 -771: - call 772f -773: /* speculation trap */ - pause - lfence - jmp 773b - .align 16 -772: - call 774f -775: /* speculation trap */ - pause - lfence - jmp 775b - .align 16 -774: - dec %_ASM_BX - jnz 771b - add $((BITS_PER_LONG/8) * \nr), \sp -.endm - -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ - -ENTRY(__fill_rsb) - STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP - ret -END(__fill_rsb) -EXPORT_SYMBOL_GPL(__fill_rsb) - -#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ - -ENTRY(__clear_rsb) - STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP - ret -END(__clear_rsb) -EXPORT_SYMBOL_GPL(__clear_rsb) -- cgit v1.1 From dd84441a797150dcc49298ec95c459a8891d8bb1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 19 Feb 2018 10:50:54 +0000 Subject: x86/speculation: Use IBRS if available before calling into firmware Retpoline means the kernel is safe because it has no indirect branches. But firmware isn't, so use IBRS for firmware calls if it's available. Block preemption while IBRS is set, although in practice the call sites already had to be doing that. Ignore hpwdt.c for now. It's taking spinlocks and calling into firmware code, from an NMI handler. I don't want to touch that with a bargepole. Signed-off-by: David Woodhouse Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: arjan.van.de.ven@intel.com Cc: bp@alien8.de Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Link: http://lkml.kernel.org/r/1519037457-7643-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apm.h | 6 ++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/efi.h | 17 ++++++++++++++-- arch/x86/include/asm/nospec-branch.h | 39 +++++++++++++++++++++++++++--------- arch/x86/kernel/cpu/bugs.c | 12 ++++++++++- 5 files changed, 63 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index 4d4015d..c356098 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -7,6 +7,8 @@ #ifndef _ASM_X86_MACH_DEFAULT_APM_H #define _ASM_X86_MACH_DEFAULT_APM_H +#include + #ifdef APM_ZERO_SEGS # define APM_DO_ZERO_SEGS \ "pushl %%ds\n\t" \ @@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ + firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, "=S" (*esi) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); + firmware_restrict_branch_speculation_end(); } static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, @@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ + firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, "=S" (si) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); + firmware_restrict_branch_speculation_end(); return error; } diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0dfe4d3..f41079d 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -213,6 +213,7 @@ #define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 85f6ccb..a399c1e 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -6,6 +6,7 @@ #include #include #include +#include /* * We map the EFI regions needed for runtime services non-contiguously, @@ -36,8 +37,18 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...); -#define arch_efi_call_virt_setup() kernel_fpu_begin() -#define arch_efi_call_virt_teardown() kernel_fpu_end() +#define arch_efi_call_virt_setup() \ +({ \ + kernel_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ +}) + +#define arch_efi_call_virt_teardown() \ +({ \ + firmware_restrict_branch_speculation_end(); \ + kernel_fpu_end(); \ +}) + /* * Wrap all the virtual calls in a way that forces the parameters on the stack. @@ -73,6 +84,7 @@ struct efi_scratch { efi_sync_low_kernel_mappings(); \ preempt_disable(); \ __kernel_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ \ if (efi_scratch.use_pgd) { \ efi_scratch.prev_cr3 = __read_cr3(); \ @@ -91,6 +103,7 @@ struct efi_scratch { __flush_tlb_all(); \ } \ \ + firmware_restrict_branch_speculation_end(); \ __kernel_fpu_end(); \ preempt_enable(); \ }) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index af34b1e..ec90c32 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -219,17 +219,38 @@ static inline void vmexit_fill_RSB(void) #endif } +#define alternative_msr_write(_msr, _val, _feature) \ + asm volatile(ALTERNATIVE("", \ + "movl %[msr], %%ecx\n\t" \ + "movl %[val], %%eax\n\t" \ + "movl $0, %%edx\n\t" \ + "wrmsr", \ + _feature) \ + : : [msr] "i" (_msr), [val] "i" (_val) \ + : "eax", "ecx", "edx", "memory") + static inline void indirect_branch_prediction_barrier(void) { - asm volatile(ALTERNATIVE("", - "movl %[msr], %%ecx\n\t" - "movl %[val], %%eax\n\t" - "movl $0, %%edx\n\t" - "wrmsr", - X86_FEATURE_USE_IBPB) - : : [msr] "i" (MSR_IA32_PRED_CMD), - [val] "i" (PRED_CMD_IBPB) - : "eax", "ecx", "edx", "memory"); + alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, + X86_FEATURE_USE_IBPB); +} + +/* + * With retpoline, we must use IBRS to restrict branch prediction + * before calling into firmware. + */ +static inline void firmware_restrict_branch_speculation_start(void) +{ + preempt_disable(); + alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, + X86_FEATURE_USE_IBRS_FW); +} + +static inline void firmware_restrict_branch_speculation_end(void) +{ + alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, + X86_FEATURE_USE_IBRS_FW); + preempt_enable(); } #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d71c8b5..bfca937 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -300,6 +300,15 @@ retpoline_auto: setup_force_cpu_cap(X86_FEATURE_USE_IBPB); pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); } + + /* + * Retpoline means the kernel is safe because it has no indirect + * branches. But firmware isn't, so use IBRS to protect that. + */ + if (boot_cpu_has(X86_FEATURE_IBRS)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); + pr_info("Enabling Restricted Speculation for firmware calls\n"); + } } #undef pr_fmt @@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", spectre_v2_module_string()); } #endif -- cgit v1.1 From e88230a3744a71a0b5ecfb45e08ddfe1c884e50d Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 15 Feb 2018 11:19:36 +0100 Subject: drm/meson: fix vsync buffer update The plane buffer address/stride/height was incorrectly updated in the plane_atomic_update operation instead of the vsync irq. This patch delays this operation in the vsync irq along with the other plane delayed setup. This issue was masked using legacy framebuffer and X11 modesetting, but is clearly visible using gbm rendering when buffer is submitted late after vblank, like using software decoding and OpenGL rendering in Kodi. With this patch, tearing and other artifacts disappears completely. Cc: Michal Lazo Fixes: bbbe775ec5b5 ("drm: Add support for Amlogic Meson Graphic Controller") Signed-off-by: Neil Armstrong Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1518689976-23292-1-git-send-email-narmstrong@baylibre.com --- drivers/gpu/drm/meson/meson_crtc.c | 6 ++++++ drivers/gpu/drm/meson/meson_drv.h | 3 +++ drivers/gpu/drm/meson/meson_plane.c | 7 +++---- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c index 5155f01..0552020 100644 --- a/drivers/gpu/drm/meson/meson_crtc.c +++ b/drivers/gpu/drm/meson/meson_crtc.c @@ -36,6 +36,7 @@ #include "meson_venc.h" #include "meson_vpp.h" #include "meson_viu.h" +#include "meson_canvas.h" #include "meson_registers.h" /* CRTC definition */ @@ -192,6 +193,11 @@ void meson_crtc_irq(struct meson_drm *priv) } else meson_vpp_disable_interlace_vscaler_osd1(priv); + meson_canvas_setup(priv, MESON_CANVAS_ID_OSD1, + priv->viu.osd1_addr, priv->viu.osd1_stride, + priv->viu.osd1_height, MESON_CANVAS_WRAP_NONE, + MESON_CANVAS_BLKMODE_LINEAR); + /* Enable OSD1 */ writel_bits_relaxed(VPP_OSD1_POSTBLEND, VPP_OSD1_POSTBLEND, priv->io_base + _REG(VPP_MISC)); diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h index 5e8b392..8450d6ac 100644 --- a/drivers/gpu/drm/meson/meson_drv.h +++ b/drivers/gpu/drm/meson/meson_drv.h @@ -43,6 +43,9 @@ struct meson_drm { bool osd1_commit; uint32_t osd1_ctrl_stat; uint32_t osd1_blk0_cfg[5]; + uint32_t osd1_addr; + uint32_t osd1_stride; + uint32_t osd1_height; } viu; struct { diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c index 17e96fa..0b6011b 100644 --- a/drivers/gpu/drm/meson/meson_plane.c +++ b/drivers/gpu/drm/meson/meson_plane.c @@ -164,10 +164,9 @@ static void meson_plane_atomic_update(struct drm_plane *plane, /* Update Canvas with buffer address */ gem = drm_fb_cma_get_gem_obj(fb, 0); - meson_canvas_setup(priv, MESON_CANVAS_ID_OSD1, - gem->paddr, fb->pitches[0], - fb->height, MESON_CANVAS_WRAP_NONE, - MESON_CANVAS_BLKMODE_LINEAR); + priv->viu.osd1_addr = gem->paddr; + priv->viu.osd1_stride = fb->pitches[0]; + priv->viu.osd1_height = fb->height; spin_unlock_irqrestore(&priv->drm->event_lock, flags); } -- cgit v1.1 From 87358710c1fb4f1bf96bbe2349975ff9953fc9b2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 19 Feb 2018 10:50:57 +0000 Subject: x86/retpoline: Support retpoline builds with Clang Signed-off-by: David Woodhouse Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: arjan.van.de.ven@intel.com Cc: bp@alien8.de Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Link: http://lkml.kernel.org/r/1519037457-7643-5-git-send-email-dwmw@amazon.co.uk Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 5 ++++- include/linux/compiler-clang.h | 5 +++++ include/linux/compiler-gcc.h | 4 ++++ include/linux/init.h | 8 ++++---- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index fad5516..dbc7d0e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -232,7 +232,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre ifdef CONFIG_RETPOLINE - RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) + RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register + RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk + + RETPOLINE_CFLAGS += $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG))) ifneq ($(RETPOLINE_CFLAGS),) KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE endif diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index d02a4df..d3f264a 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -27,3 +27,8 @@ #if __has_feature(address_sanitizer) #define __SANITIZE_ADDRESS__ #endif + +/* Clang doesn't have a way to turn it off per-function, yet. */ +#ifdef __noretpoline +#undef __noretpoline +#endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 73bc63e..673fbf9 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -93,6 +93,10 @@ #define __weak __attribute__((weak)) #define __alias(symbol) __attribute__((alias(#symbol))) +#ifdef RETPOLINE +#define __noretpoline __attribute__((indirect_branch("keep"))) +#endif + /* * it doesn't make sense on ARM (currently the only user of __naked) * to trace naked functions because then mcount is called without diff --git a/include/linux/init.h b/include/linux/init.h index 506a981..bc27cf0 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -6,10 +6,10 @@ #include /* Built-in __init functions needn't be compiled with retpoline */ -#if defined(RETPOLINE) && !defined(MODULE) -#define __noretpoline __attribute__((indirect_branch("keep"))) +#if defined(__noretpoline) && !defined(MODULE) +#define __noinitretpoline __noretpoline #else -#define __noretpoline +#define __noinitretpoline #endif /* These macros are used to mark some functions or @@ -47,7 +47,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold __latent_entropy __noretpoline +#define __init __section(.init.text) __cold __latent_entropy __noinitretpoline #define __initdata __section(.init.data) #define __initconst __section(.init.rodata) #define __exitdata __section(.exit.data) -- cgit v1.1 From 2b05f6ae1ee5a3c625478acd10b0966b66a3a017 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Feb 2018 16:41:55 +0000 Subject: ARM: ux500: remove PMU IRQ bouncer The ux500 PMU IRQ bouncer is getting in the way of some fundametnal changes to the ARM PMU driver, and it's the only special case that exists today. Let's remove it. Reviewed-by: Linus Walleij Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/mach-ux500/cpu-db8500.c | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 57058ac..7e5d7a0 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include @@ -112,37 +111,6 @@ static void ux500_restart(enum reboot_mode mode, const char *cmd) prcmu_system_reset(0); } -/* - * The PMU IRQ lines of two cores are wired together into a single interrupt. - * Bounce the interrupt to the other core if it's not ours. - */ -static irqreturn_t db8500_pmu_handler(int irq, void *dev, irq_handler_t handler) -{ - irqreturn_t ret = handler(irq, dev); - int other = !smp_processor_id(); - - if (ret == IRQ_NONE && cpu_online(other)) - irq_set_affinity(irq, cpumask_of(other)); - - /* - * We should be able to get away with the amount of IRQ_NONEs we give, - * while still having the spurious IRQ detection code kick in if the - * interrupt really starts hitting spuriously. - */ - return ret; -} - -static struct arm_pmu_platdata db8500_pmu_platdata = { - .handle_irq = db8500_pmu_handler, - .irq_flags = IRQF_NOBALANCING | IRQF_NO_THREAD, -}; - -static struct of_dev_auxdata u8500_auxdata_lookup[] __initdata = { - /* Requires call-back bindings. */ - OF_DEV_AUXDATA("arm,cortex-a9-pmu", 0, "arm-pmu", &db8500_pmu_platdata), - {}, -}; - static struct of_dev_auxdata u8540_auxdata_lookup[] __initdata = { OF_DEV_AUXDATA("stericsson,db8500-prcmu", 0x80157000, "db8500-prcmu", NULL), {}, @@ -165,9 +133,6 @@ static void __init u8500_init_machine(void) if (of_machine_is_compatible("st-ericsson,u8540")) of_platform_populate(NULL, u8500_local_bus_nodes, u8540_auxdata_lookup, NULL); - else - of_platform_populate(NULL, u8500_local_bus_nodes, - u8500_auxdata_lookup, NULL); } static const char * stericsson_dt_platform_compat[] = { -- cgit v1.1 From c0248c96631f38f02d58762fc018e316843acac8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Feb 2018 16:41:56 +0000 Subject: arm_pmu: kill arm_pmu_platdata Now that we have no platforms passing platform data to the arm_pmu code, we can get rid of the platdata and associated hooks, paving the way for rework of our IRQ handling. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 27 ++++----------------------- include/linux/perf/arm_pmu.h | 17 ----------------- 2 files changed, 4 insertions(+), 40 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 7bc5eee..82b09d1 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -320,17 +319,9 @@ validate_group(struct perf_event *event) return 0; } -static struct arm_pmu_platdata *armpmu_get_platdata(struct arm_pmu *armpmu) -{ - struct platform_device *pdev = armpmu->plat_device; - - return pdev ? dev_get_platdata(&pdev->dev) : NULL; -} - static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) { struct arm_pmu *armpmu; - struct arm_pmu_platdata *plat; int ret; u64 start_clock, finish_clock; @@ -342,13 +333,8 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) */ armpmu = *(void **)dev; - plat = armpmu_get_platdata(armpmu); - start_clock = sched_clock(); - if (plat && plat->handle_irq) - ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq); - else - ret = armpmu->handle_irq(irq, armpmu); + ret = armpmu->handle_irq(irq, armpmu); finish_clock = sched_clock(); perf_sample_event_took(finish_clock - start_clock); @@ -578,7 +564,6 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) goto err_out; } } else { - struct arm_pmu_platdata *platdata = armpmu_get_platdata(armpmu); unsigned long irq_flags; err = irq_force_affinity(irq, cpumask_of(cpu)); @@ -589,13 +574,9 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) goto err_out; } - if (platdata && platdata->irq_flags) { - irq_flags = platdata->irq_flags; - } else { - irq_flags = IRQF_PERCPU | - IRQF_NOBALANCING | - IRQF_NO_THREAD; - } + irq_flags = IRQF_PERCPU | + IRQF_NOBALANCING | + IRQF_NO_THREAD; err = request_irq(irq, handler, irq_flags, "arm-pmu", per_cpu_ptr(&hw_events->percpu_pmu, cpu)); diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index af0f44ef..712764b 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -17,23 +17,6 @@ #include #include -/* - * struct arm_pmu_platdata - ARM PMU platform data - * - * @handle_irq: an optional handler which will be called from the - * interrupt and passed the address of the low level handler, - * and can be used to implement any platform specific handling - * before or after calling it. - * - * @irq_flags: if non-zero, these flags will be passed to request_irq - * when requesting interrupts for this PMU device. - */ -struct arm_pmu_platdata { - irqreturn_t (*handle_irq)(int irq, void *dev, - irq_handler_t pmu_handler); - unsigned long irq_flags; -}; - #ifdef CONFIG_ARM_PMU /* -- cgit v1.1 From d3d5aac206b4e9e569a22fe1811c909dde17587c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Feb 2018 16:41:57 +0000 Subject: arm_pmu: fold platform helpers into platform code The armpmu_{request,free}_irqs() helpers are only used by arm_pmu_platform.c, so let's fold them in and make them static. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 21 --------------------- drivers/perf/arm_pmu_platform.c | 21 +++++++++++++++++++++ include/linux/perf/arm_pmu.h | 2 -- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 82b09d1..373dfd7 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -534,14 +534,6 @@ void armpmu_free_irq(struct arm_pmu *armpmu, int cpu) free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); } -void armpmu_free_irqs(struct arm_pmu *armpmu) -{ - int cpu; - - for_each_cpu(cpu, &armpmu->supported_cpus) - armpmu_free_irq(armpmu, cpu); -} - int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) { int err = 0; @@ -593,19 +585,6 @@ err_out: return err; } -int armpmu_request_irqs(struct arm_pmu *armpmu) -{ - int cpu, err; - - for_each_cpu(cpu, &armpmu->supported_cpus) { - err = armpmu_request_irq(armpmu, cpu); - if (err) - break; - } - - return err; -} - static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu) { struct pmu_hw_events __percpu *hw_events = pmu->hw_events; diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 46501cc..244558c 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -164,6 +164,27 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) return 0; } +static int armpmu_request_irqs(struct arm_pmu *armpmu) +{ + int cpu, err; + + for_each_cpu(cpu, &armpmu->supported_cpus) { + err = armpmu_request_irq(armpmu, cpu); + if (err) + break; + } + + return err; +} + +static void armpmu_free_irqs(struct arm_pmu *armpmu) +{ + int cpu; + + for_each_cpu(cpu, &armpmu->supported_cpus) + armpmu_free_irq(armpmu, cpu); +} + int arm_pmu_device_probe(struct platform_device *pdev, const struct of_device_id *of_table, const struct pmu_probe_info *probe_table) diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 712764b..899bc7e 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -159,8 +159,6 @@ static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } struct arm_pmu *armpmu_alloc(void); void armpmu_free(struct arm_pmu *pmu); int armpmu_register(struct arm_pmu *pmu); -int armpmu_request_irqs(struct arm_pmu *armpmu); -void armpmu_free_irqs(struct arm_pmu *armpmu); int armpmu_request_irq(struct arm_pmu *armpmu, int cpu); void armpmu_free_irq(struct arm_pmu *armpmu, int cpu); -- cgit v1.1 From 0dc1a1851af1d593eee248b94c1277c7c7ccbbce Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Feb 2018 16:41:58 +0000 Subject: arm_pmu: add armpmu_alloc_atomic() In ACPI systems, we don't know the makeup of CPUs until we hotplug them on, and thus have to allocate the PMU datastructures at hotplug time. Thus, we must use GFP_ATOMIC allocations. Let's add an armpmu_alloc_atomic() that we can use in this case. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 17 ++++++++++++++--- drivers/perf/arm_pmu_acpi.c | 2 +- include/linux/perf/arm_pmu.h | 1 + 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 373dfd7..4f73c5e 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -760,18 +760,18 @@ static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) &cpu_pmu->node); } -struct arm_pmu *armpmu_alloc(void) +static struct arm_pmu *__armpmu_alloc(gfp_t flags) { struct arm_pmu *pmu; int cpu; - pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); + pmu = kzalloc(sizeof(*pmu), flags); if (!pmu) { pr_info("failed to allocate PMU device!\n"); goto out; } - pmu->hw_events = alloc_percpu(struct pmu_hw_events); + pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, flags); if (!pmu->hw_events) { pr_info("failed to allocate per-cpu PMU data.\n"); goto out_free_pmu; @@ -817,6 +817,17 @@ out: return NULL; } +struct arm_pmu *armpmu_alloc(void) +{ + return __armpmu_alloc(GFP_KERNEL); +} + +struct arm_pmu *armpmu_alloc_atomic(void) +{ + return __armpmu_alloc(GFP_ATOMIC); +} + + void armpmu_free(struct arm_pmu *pmu) { free_percpu(pmu->hw_events); diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 705f1a3..30c5f2b 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -127,7 +127,7 @@ static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void) return pmu; } - pmu = armpmu_alloc(); + pmu = armpmu_alloc_atomic(); if (!pmu) { pr_warn("Unable to allocate PMU for CPU%d\n", smp_processor_id()); diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 899bc7e..1f8bb83 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -157,6 +157,7 @@ static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); +struct arm_pmu *armpmu_alloc_atomic(void); void armpmu_free(struct arm_pmu *pmu); int armpmu_register(struct arm_pmu *pmu); int armpmu_request_irq(struct arm_pmu *armpmu, int cpu); -- cgit v1.1 From 43fc9a2febbd96dd39588d67ace456b7bbc73d9f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Feb 2018 16:41:59 +0000 Subject: arm_pmu: acpi: check for mismatched PPIs The arm_pmu platform code explicitly checks for mismatched PPIs at probe time, while the ACPI code leaves this to the core code. Future refactoring will make this difficult for the core code to check, so let's have the ACPI code check this explicitly. As before, upon a failure we'll continue on without an interrupt. Ho hum. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 17 ++++------------- drivers/perf/arm_pmu_acpi.c | 42 +++++++++++++++++++++++++++++++++++++---- drivers/perf/arm_pmu_platform.c | 7 ------- 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 4f73c5e..ddcabd6 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -543,19 +543,7 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) if (!irq) return 0; - if (irq_is_percpu_devid(irq) && cpumask_empty(&armpmu->active_irqs)) { - err = request_percpu_irq(irq, handler, "arm-pmu", - &hw_events->percpu_pmu); - } else if (irq_is_percpu_devid(irq)) { - int other_cpu = cpumask_first(&armpmu->active_irqs); - int other_irq = per_cpu(hw_events->irq, other_cpu); - - if (irq != other_irq) { - pr_warn("mismatched PPIs detected.\n"); - err = -EINVAL; - goto err_out; - } - } else { + if (!irq_is_percpu_devid(irq)) { unsigned long irq_flags; err = irq_force_affinity(irq, cpumask_of(cpu)); @@ -572,6 +560,9 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) err = request_irq(irq, handler, irq_flags, "arm-pmu", per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + } else if (cpumask_empty(&armpmu->active_irqs)) { + err = request_percpu_irq(irq, handler, "arm-pmu", + &hw_events->percpu_pmu); } if (err) diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 30c5f2b..09a1a36 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include @@ -140,6 +142,35 @@ static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void) } /* + * Check whether the new IRQ is compatible with those already associated with + * the PMU (e.g. we don't have mismatched PPIs). + */ +static bool pmu_irq_matches(struct arm_pmu *pmu, int irq) +{ + struct pmu_hw_events __percpu *hw_events = pmu->hw_events; + int cpu; + + if (!irq) + return true; + + for_each_cpu(cpu, &pmu->supported_cpus) { + int other_irq = per_cpu(hw_events->irq, cpu); + if (!other_irq) + continue; + + if (irq == other_irq) + continue; + if (!irq_is_percpu_devid(irq) && !irq_is_percpu_devid(other_irq)) + continue; + + pr_warn("mismatched PPIs detected\n"); + return false; + } + + return true; +} + +/* * This must run before the common arm_pmu hotplug logic, so that we can * associate a CPU and its interrupt before the common code tries to manage the * affinity and so on. @@ -164,18 +195,21 @@ static int arm_pmu_acpi_cpu_starting(unsigned int cpu) if (!pmu) return -ENOMEM; - cpumask_set_cpu(cpu, &pmu->supported_cpus); - per_cpu(probed_pmus, cpu) = pmu; + if (pmu_irq_matches(pmu, irq)) { + hw_events = pmu->hw_events; + per_cpu(hw_events->irq, cpu) = irq; + } + + cpumask_set_cpu(cpu, &pmu->supported_cpus); + /* * Log and request the IRQ so the core arm_pmu code can manage it. In * some situations (e.g. mismatched PPIs), we may fail to request the * IRQ. However, it may be too late for us to do anything about it. * The common ARM PMU code will log a warning in this case. */ - hw_events = pmu->hw_events; - per_cpu(hw_events->irq, cpu) = irq; armpmu_request_irq(pmu, cpu); /* diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 244558c..1dc3c1f 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -127,13 +127,6 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) pdev->dev.of_node); } - /* - * Some platforms have all PMU IRQs OR'd into a single IRQ, with a - * special platdata function that attempts to demux them. - */ - if (dev_get_platdata(&pdev->dev)) - cpumask_setall(&pmu->supported_cpus); - for (i = 0; i < num_irqs; i++) { int cpu, irq; -- cgit v1.1 From 6de3f79112cc26bf24edbb240248d21e1dd85dde Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Feb 2018 16:42:00 +0000 Subject: arm_pmu: explicitly enable/disable SPIs at hotplug To support ACPI systems, we need to request IRQs before CPUs are hotplugged, and thus we need to request IRQs before we know their associated PMU. This is problematic if a PMU IRQ is pending out of reset, as it may be taken before we know the PMU, and thus the IRQ handler won't be able to handle it, leaving it screaming. To avoid such problems, lets request all IRQs in a disabled state, and explicitly enable/disable them at hotplug time, when we're sure the PMU has been probed. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index ddcabd6..72118e6 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -558,6 +558,7 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) IRQF_NOBALANCING | IRQF_NO_THREAD; + irq_set_status_flags(irq, IRQ_NOAUTOEN); err = request_irq(irq, handler, irq_flags, "arm-pmu", per_cpu_ptr(&hw_events->percpu_pmu, cpu)); } else if (cpumask_empty(&armpmu->active_irqs)) { @@ -600,10 +601,10 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node) irq = armpmu_get_cpu_irq(pmu, cpu); if (irq) { - if (irq_is_percpu_devid(irq)) { + if (irq_is_percpu_devid(irq)) enable_percpu_irq(irq, IRQ_TYPE_NONE); - return 0; - } + else + enable_irq(irq); } return 0; @@ -618,8 +619,12 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) return 0; irq = armpmu_get_cpu_irq(pmu, cpu); - if (irq && irq_is_percpu_devid(irq)) - disable_percpu_irq(irq); + if (irq) { + if (irq_is_percpu_devid(irq)) + disable_percpu_irq(irq); + else + disable_irq(irq); + } return 0; } -- cgit v1.1 From 84b4be57ae17f8c0b3c1d8629e10f23910838fd7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 12 Dec 2017 16:56:06 +0000 Subject: arm_pmu: note IRQs and PMUs per-cpu To support ACPI systems, we need to request IRQs before we know the associated PMU, and thus we need some percpu variable that the IRQ handler can find the PMU from. As we're going to request IRQs without the PMU, we can't rely on the arm_pmu::active_irqs mask, and similarly need to track requested IRQs with a percpu variable. Signed-off-by: Mark Rutland [will: made armpmu_count_irq_users static] Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 69 +++++++++++++++++++++++++++++++++----------- include/linux/perf/arm_pmu.h | 1 - 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 72118e6..2b2af35 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -25,6 +25,9 @@ #include +static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); +static DEFINE_PER_CPU(int, cpu_irq); + static int armpmu_map_cache_event(const unsigned (*cache_map) [PERF_COUNT_HW_CACHE_MAX] @@ -332,6 +335,8 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) * dereference. */ armpmu = *(void **)dev; + if (WARN_ON_ONCE(!armpmu)) + return IRQ_NONE; start_clock = sched_clock(); ret = armpmu->handle_irq(irq, armpmu); @@ -517,29 +522,45 @@ int perf_num_counters(void) } EXPORT_SYMBOL_GPL(perf_num_counters); -void armpmu_free_irq(struct arm_pmu *armpmu, int cpu) +static int armpmu_count_irq_users(const int irq) { - struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; - int irq = per_cpu(hw_events->irq, cpu); + int cpu, count = 0; - if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs)) - return; + for_each_possible_cpu(cpu) { + if (per_cpu(cpu_irq, cpu) == irq) + count++; + } + + return count; +} - if (irq_is_percpu_devid(irq)) { - free_percpu_irq(irq, &hw_events->percpu_pmu); - cpumask_clear(&armpmu->active_irqs); +void armpmu_free_cpu_irq(int irq, int cpu) +{ + if (per_cpu(cpu_irq, cpu) == 0) return; - } + if (WARN_ON(irq != per_cpu(cpu_irq, cpu))) + return; + + if (!irq_is_percpu_devid(irq)) + free_irq(irq, per_cpu_ptr(&cpu_armpmu, cpu)); + else if (armpmu_count_irq_users(irq) == 1) + free_percpu_irq(irq, &cpu_armpmu); - free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + per_cpu(cpu_irq, cpu) = 0; } -int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) +void armpmu_free_irq(struct arm_pmu *armpmu, int cpu) { - int err = 0; struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; - const irq_handler_t handler = armpmu_dispatch_irq; int irq = per_cpu(hw_events->irq, cpu); + + armpmu_free_cpu_irq(irq, cpu); +} + +int armpmu_request_cpu_irq(int irq, int cpu) +{ + int err = 0; + const irq_handler_t handler = armpmu_dispatch_irq; if (!irq) return 0; @@ -560,16 +581,16 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) irq_set_status_flags(irq, IRQ_NOAUTOEN); err = request_irq(irq, handler, irq_flags, "arm-pmu", - per_cpu_ptr(&hw_events->percpu_pmu, cpu)); - } else if (cpumask_empty(&armpmu->active_irqs)) { + per_cpu_ptr(&cpu_armpmu, cpu)); + } else if (armpmu_count_irq_users(irq) == 0) { err = request_percpu_irq(irq, handler, "arm-pmu", - &hw_events->percpu_pmu); + &cpu_armpmu); } if (err) goto err_out; - cpumask_set_cpu(cpu, &armpmu->active_irqs); + per_cpu(cpu_irq, cpu) = irq; return 0; err_out: @@ -577,6 +598,16 @@ err_out: return err; } +int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) +{ + struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; + int irq = per_cpu(hw_events->irq, cpu); + if (!irq) + return 0; + + return armpmu_request_cpu_irq(irq, cpu); +} + static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu) { struct pmu_hw_events __percpu *hw_events = pmu->hw_events; @@ -599,6 +630,8 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node) if (pmu->reset) pmu->reset(pmu); + per_cpu(cpu_armpmu, cpu) = pmu; + irq = armpmu_get_cpu_irq(pmu, cpu); if (irq) { if (irq_is_percpu_devid(irq)) @@ -626,6 +659,8 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) disable_irq(irq); } + per_cpu(cpu_armpmu, cpu) = NULL; + return 0; } diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 1f8bb83..feec9e7 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -75,7 +75,6 @@ enum armpmu_attr_groups { struct arm_pmu { struct pmu pmu; - cpumask_t active_irqs; cpumask_t supported_cpus; char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); -- cgit v1.1 From 167e61438da0664cab87c825a6c0cb83510d578e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 9 Oct 2017 17:09:05 +0100 Subject: arm_pmu: acpi: request IRQs up-front We can't request IRQs in atomic context, so for ACPI systems we'll have to request them up-front, and later associate them with CPUs. This patch reorganises the arm_pmu code to do so. As we no longer have the arm_pmu structure at probe time, a number of prototypes need to be adjusted, requiring changes to the common arm_pmu code and arm_pmu platform code. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 22 ++-------------------- drivers/perf/arm_pmu_acpi.c | 19 ++++++------------- drivers/perf/arm_pmu_platform.c | 15 ++++++++++++--- include/linux/perf/arm_pmu.h | 5 +++-- 4 files changed, 23 insertions(+), 38 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 2b2af35..0c2ed11 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -534,7 +534,7 @@ static int armpmu_count_irq_users(const int irq) return count; } -void armpmu_free_cpu_irq(int irq, int cpu) +void armpmu_free_irq(int irq, int cpu) { if (per_cpu(cpu_irq, cpu) == 0) return; @@ -549,15 +549,7 @@ void armpmu_free_cpu_irq(int irq, int cpu) per_cpu(cpu_irq, cpu) = 0; } -void armpmu_free_irq(struct arm_pmu *armpmu, int cpu) -{ - struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; - int irq = per_cpu(hw_events->irq, cpu); - - armpmu_free_cpu_irq(irq, cpu); -} - -int armpmu_request_cpu_irq(int irq, int cpu) +int armpmu_request_irq(int irq, int cpu) { int err = 0; const irq_handler_t handler = armpmu_dispatch_irq; @@ -598,16 +590,6 @@ err_out: return err; } -int armpmu_request_irq(struct arm_pmu *armpmu, int cpu) -{ - struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; - int irq = per_cpu(hw_events->irq, cpu); - if (!irq) - return 0; - - return armpmu_request_cpu_irq(irq, cpu); -} - static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu) { struct pmu_hw_events __percpu *hw_events = pmu->hw_events; diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 09a1a36..0f19751 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -89,7 +89,13 @@ static int arm_pmu_acpi_parse_irqs(void) pr_warn("No ACPI PMU IRQ for CPU%d\n", cpu); } + /* + * Log and request the IRQ so the core arm_pmu code can manage + * it. We'll have to sanity-check IRQs later when we associate + * them with their PMUs. + */ per_cpu(pmu_irqs, cpu) = irq; + armpmu_request_irq(irq, cpu); } return 0; @@ -205,14 +211,6 @@ static int arm_pmu_acpi_cpu_starting(unsigned int cpu) cpumask_set_cpu(cpu, &pmu->supported_cpus); /* - * Log and request the IRQ so the core arm_pmu code can manage it. In - * some situations (e.g. mismatched PPIs), we may fail to request the - * IRQ. However, it may be too late for us to do anything about it. - * The common ARM PMU code will log a warning in this case. - */ - armpmu_request_irq(pmu, cpu); - - /* * Ideally, we'd probe the PMU here when we find the first matching * CPU. We can't do that for several reasons; see the comment in * arm_pmu_acpi_init(). @@ -281,11 +279,6 @@ static int arm_pmu_acpi_init(void) if (acpi_disabled) return 0; - /* - * We can't request IRQs yet, since we don't know the cookie value - * until we know which CPUs share the same logical PMU. We'll handle - * that in arm_pmu_acpi_cpu_starting(). - */ ret = arm_pmu_acpi_parse_irqs(); if (ret) return ret; diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 1dc3c1f..7729eda 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -159,10 +159,15 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) static int armpmu_request_irqs(struct arm_pmu *armpmu) { + struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; int cpu, err; for_each_cpu(cpu, &armpmu->supported_cpus) { - err = armpmu_request_irq(armpmu, cpu); + int irq = per_cpu(hw_events->irq, cpu); + if (!irq) + continue; + + err = armpmu_request_irq(irq, cpu); if (err) break; } @@ -173,9 +178,13 @@ static int armpmu_request_irqs(struct arm_pmu *armpmu) static void armpmu_free_irqs(struct arm_pmu *armpmu) { int cpu; + struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; - for_each_cpu(cpu, &armpmu->supported_cpus) - armpmu_free_irq(armpmu, cpu); + for_each_cpu(cpu, &armpmu->supported_cpus) { + int irq = per_cpu(hw_events->irq, cpu); + + armpmu_free_irq(irq, cpu); + } } int arm_pmu_device_probe(struct platform_device *pdev, diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index feec9e7..40036a5 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -159,8 +160,8 @@ struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc_atomic(void); void armpmu_free(struct arm_pmu *pmu); int armpmu_register(struct arm_pmu *pmu); -int armpmu_request_irq(struct arm_pmu *armpmu, int cpu); -void armpmu_free_irq(struct arm_pmu *armpmu, int cpu); +int armpmu_request_irq(int irq, int cpu); +void armpmu_free_irq(int irq, int cpu); #define ARMV8_PMU_PDEV_NAME "armv8-pmu" -- cgit v1.1 From 0331365edb1d6ccd6ae68b1038111da85d4c68d1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 14 Feb 2018 17:21:57 +0000 Subject: arm64: perf: correct PMUVer probing The ID_AA64DFR0_EL1.PMUVer field doesn't follow the usual ID registers scheme. While value 0xf indicates a non-architected PMU is implemented, values 0x1 to 0xe indicate an increasingly featureful architected PMU, as if the field were unsigned. For more details, see ARM DDI 0487C.a, D10.1.4, "Alternative ID scheme used for the Performance Monitors Extension version". Currently, we treat the field as signed, and erroneously bail out for values 0x8 to 0xe. Let's correct that. Signed-off-by: Mark Rutland Reviewed-by: Robin Murphy Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 75b220b..85a251b 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -908,9 +908,9 @@ static void __armv8pmu_probe_pmu(void *info) int pmuver; dfr0 = read_sysreg(id_aa64dfr0_el1); - pmuver = cpuid_feature_extract_signed_field(dfr0, + pmuver = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMUVER_SHIFT); - if (pmuver < 1) + if (pmuver == 0xf || pmuver == 0) return; probe->present = true; -- cgit v1.1 From 35b5f14ec6dab281346a2d0ceb34abe2dba94190 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 13 Feb 2018 10:37:59 +0100 Subject: regulator: Fix resume from suspend to idle When resuming from idle with the new suspend mode configuration support we go through the resume callbacks with a state of PM_SUSPEND_TO_IDLE which we don't have regulator constraints for, causing an error: dpm_run_callback(): regulator_resume_early+0x0/0x64 returns -22 PM: Device regulator.0 failed to resume early: error -22 Avoid this and similar errors by treating missing constraints as a noop. See also commit 57a0dd187956ea04 ("regulator: Fix suspend to idle"), which fixed the suspend part. Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index dd4708c..1fc0c08 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -4310,7 +4310,7 @@ static int _regulator_resume_early(struct device *dev, void *data) rstate = regulator_get_suspend_state(rdev, *state); if (rstate == NULL) - return -EINVAL; + return 0; mutex_lock(&rdev->mutex); -- cgit v1.1 From 17539f2f4f0b7fa906b508765c8ada07a1e45f52 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Tue, 20 Feb 2018 07:30:10 -0600 Subject: usb: musb: fix enumeration after resume On dm3730 there are enumeration problems after resume. Investigation led to the cause that the MUSB_POWER_SOFTCONN bit is not set. If it was set before suspend (because it was enabled via musb_pullup()), it is set in musb_restore_context() so the pullup is enabled. But then musb_start() is called which overwrites MUSB_POWER and therefore disables MUSB_POWER_SOFTCONN, so no pullup is enabled and the device is not enumerated. So let's do a subset of what musb_start() does in the same way as musb_suspend() does it. Platform-specific stuff it still called as there might be some phy-related stuff which needs to be enabled. Also interrupts are enabled, as it was the original idea of calling musb_start() in musb_resume() according to Commit 6fc6f4b87cb3 ("usb: musb: Disable interrupts on suspend, enable them on resume") Signed-off-by: Andreas Kemnade Tested-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 968bf1e..eef4ad5 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2708,7 +2708,8 @@ static int musb_resume(struct device *dev) if ((devctl & mask) != (musb->context.devctl & mask)) musb->port1_status = 0; - musb_start(musb); + musb_enable_interrupts(musb); + musb_platform_enable(musb); spin_lock_irqsave(&musb->lock, flags); error = musb_run_resume_work(musb); -- cgit v1.1 From 44eb5e12b845cc8a0634f21b70ef07d774eb4b25 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 20 Feb 2018 07:31:35 -0600 Subject: Revert "usb: musb: host: don't start next rx urb if current one failed" This reverts commit dbac5d07d13e330e6706813c9fde477140fb5d80. commit dbac5d07d13e ("usb: musb: host: don't start next rx urb if current one failed") along with commit b5801212229f ("usb: musb: host: clear rxcsr error bit if set") try to solve the issue described in [1], but the latter alone is sufficient, and the former causes the issue as in [2], so now revert it. [1] https://marc.info/?l=linux-usb&m=146173995117456&w=2 [2] https://marc.info/?l=linux-usb&m=151689238420622&w=2 Cc: stable@vger.kernel.org # v4.7+ Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 394b4ac..45ed32c 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -391,13 +391,7 @@ static void musb_advance_schedule(struct musb *musb, struct urb *urb, } } - /* - * The pipe must be broken if current urb->status is set, so don't - * start next urb. - * TODO: to minimize the risk of regression, only check urb->status - * for RX, until we have a test case to understand the behavior of TX. - */ - if ((!status || !is_in) && qh && qh->is_ready) { + if (qh != NULL && qh->is_ready) { musb_dbg(musb, "... next ep%d %cX urb %p", hw_ep->epnum, is_in ? 'R' : 'T', next_urb(qh)); musb_start_urb(musb, is_in, qh); -- cgit v1.1 From 5e558f8afaec8957932b1dbe5aeff800f9fc6957 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 20 Feb 2018 16:19:05 +0200 Subject: ASoC: hdmi-codec: Fix module unloading caused kernel crash The hcp->chmap_info must not be freed up in the hdmi_codec_remove() function as it leads to kernel crash due ALSA core's pcm_chmap_ctl_private_free() is trying to free it up again when the card destroyed via snd_card_free. Commit cd6111b26280a ("ASoC: hdmi-codec: add channel mapping control") should not have added the kfree(hcp->chmap_info); to the hdmi_codec_remove function. Signed-off-by: Peter Ujfalusi Reviewed-by: Jyri Sarha Tested-by: Jyri Sarha Signed-off-by: Mark Brown --- sound/soc/codecs/hdmi-codec.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index 5672e51..c1830ccd 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -798,12 +798,7 @@ static int hdmi_codec_probe(struct platform_device *pdev) static int hdmi_codec_remove(struct platform_device *pdev) { - struct device *dev = &pdev->dev; - struct hdmi_codec_priv *hcp; - - hcp = dev_get_drvdata(dev); - kfree(hcp->chmap_info); - snd_soc_unregister_codec(dev); + snd_soc_unregister_codec(&pdev->dev); return 0; } -- cgit v1.1 From 6ae1756faddefd7494353380ee546dd38c2f97eb Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 20 Feb 2018 15:44:37 +0000 Subject: MIPS: Drop spurious __unused in struct compat_flock MIPS' struct compat_flock doesn't match the 32-bit struct flock, as it has an extra short __unused before pad[4], which combined with alignment increases the size to 40 bytes compared with struct flock's 36 bytes. Since commit 8c6657cb50cb ("Switch flock copyin/copyout primitives to copy_{from,to}_user()"), put_compat_flock() writes the full compat_flock struct to userland, which results in corruption of the userland word after the struct flock when running 32-bit userlands on 64-bit kernels. This was observed to cause a bus error exception when starting Firefox on Debian 8 (Jessie). Reported-by: Peter Mamonov Signed-off-by: James Hogan Tested-by: Peter Mamonov Cc: Ralf Baechle Cc: Al Viro Cc: linux-mips@linux-mips.org Cc: # 4.13+ Patchwork: https://patchwork.linux-mips.org/patch/18646/ --- arch/mips/include/asm/compat.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h index 946681d..9a0fa66 100644 --- a/arch/mips/include/asm/compat.h +++ b/arch/mips/include/asm/compat.h @@ -86,7 +86,6 @@ struct compat_flock { compat_off_t l_len; s32 l_sysid; compat_pid_t l_pid; - short __unused; s32 pad[4]; }; -- cgit v1.1 From 7ff662b76167fd9a68254352287c5de0dc698942 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 15 Feb 2018 21:20:08 -0800 Subject: RDMA/bnxt_re: Disable atomic capability on bnxt_re adapters More testing needs to be done before enabling this feature. Disabling the feature temporarily Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 6 ++---- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 14 +------------- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index ae9e9ff..280354f 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -174,10 +174,8 @@ int bnxt_re_query_device(struct ib_device *ibdev, ib_attr->max_pd = dev_attr->max_pd; ib_attr->max_qp_rd_atom = dev_attr->max_qp_rd_atom; ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_init_rd_atom; - if (dev_attr->is_atomic) { - ib_attr->atomic_cap = IB_ATOMIC_HCA; - ib_attr->masked_atomic_cap = IB_ATOMIC_HCA; - } + ib_attr->atomic_cap = IB_ATOMIC_NONE; + ib_attr->masked_atomic_cap = IB_ATOMIC_NONE; ib_attr->max_ee_rd_atom = 0; ib_attr->max_res_rd_atom = 0; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index c015c18..0305798 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -52,18 +52,6 @@ const struct bnxt_qplib_gid bnxt_qplib_gid_zero = {{ 0, 0, 0, 0, 0, 0, 0, 0, /* Device */ -static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw) -{ - int rc; - u16 pcie_ctl2; - - rc = pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2, - &pcie_ctl2); - if (rc) - return false; - return !!(pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ); -} - static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw, char *fw_ver) { @@ -165,7 +153,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc); } - attr->is_atomic = bnxt_qplib_is_atomic_cap(rcfw); + attr->is_atomic = 0; bail: bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); return rc; -- cgit v1.1 From 6b4521f5174c26020ae0deb3ef7f2c28557cf445 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 15 Feb 2018 21:20:10 -0800 Subject: RDMA/bnxt_re: Unpin SQ and RQ memory if QP create fails Driver leaves the QP memory pinned if QP create command fails from the FW. Avoids this scenario by adding a proper exit path if the FW command fails. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 280354f..29e6b17 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1183,7 +1183,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd, rc = bnxt_qplib_create_qp(&rdev->qplib_res, &qp->qplib_qp); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to create HW QP"); - goto fail; + goto free_umem; } } @@ -1211,6 +1211,13 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd, return &qp->ib_qp; qp_destroy: bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp); +free_umem: + if (udata) { + if (qp->rumem) + ib_umem_release(qp->rumem); + if (qp->sumem) + ib_umem_release(qp->sumem); + } fail: kfree(qp); return ERR_PTR(rc); -- cgit v1.1 From 3b921e3bc4c20af58a663ed238ad57e87493dde2 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 15 Feb 2018 21:20:11 -0800 Subject: RDMA/bnxt_re: Synchronize destroy_qp with poll_cq Avoid system crash when destroy_qp is invoked while the driver is processing the poll_cq. Synchronize these functions using the cq_lock. Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 39 +++++++++++++++++++++++++++++--- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 2 ++ drivers/infiniband/hw/bnxt_re/qplib_fp.c | 21 +++++------------ drivers/infiniband/hw/bnxt_re/qplib_fp.h | 4 +++- 4 files changed, 47 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 29e6b17..643174d 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -785,20 +785,51 @@ int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) return 0; } +static unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp) + __acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock) +{ + unsigned long flags; + + spin_lock_irqsave(&qp->scq->cq_lock, flags); + if (qp->rcq != qp->scq) + spin_lock(&qp->rcq->cq_lock); + else + __acquire(&qp->rcq->cq_lock); + + return flags; +} + +static void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, + unsigned long flags) + __releases(&qp->scq->cq_lock) __releases(&qp->rcq->cq_lock) +{ + if (qp->rcq != qp->scq) + spin_unlock(&qp->rcq->cq_lock); + else + __release(&qp->rcq->cq_lock); + spin_unlock_irqrestore(&qp->scq->cq_lock, flags); +} + /* Queue Pairs */ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) { struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_re_dev *rdev = qp->rdev; int rc; + unsigned int flags; bnxt_qplib_flush_cqn_wq(&qp->qplib_qp); - bnxt_qplib_del_flush_qp(&qp->qplib_qp); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to destroy HW QP"); return rc; } + + flags = bnxt_re_lock_cqs(qp); + bnxt_qplib_clean_qp(&qp->qplib_qp); + bnxt_re_unlock_cqs(qp, flags); + bnxt_qplib_free_qp_res(&rdev->qplib_res, &qp->qplib_qp); + if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) { rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &rdev->sqp_ah->qplib_ah); @@ -808,7 +839,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) return rc; } - bnxt_qplib_del_flush_qp(&qp->qplib_qp); + bnxt_qplib_clean_qp(&qp->qplib_qp); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &rdev->qp1_sqp->qplib_qp); if (rc) { @@ -1067,6 +1098,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd, goto fail; } qp->qplib_qp.scq = &cq->qplib_cq; + qp->scq = cq; } if (qp_init_attr->recv_cq) { @@ -1078,6 +1110,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd, goto fail; } qp->qplib_qp.rcq = &cq->qplib_cq; + qp->rcq = cq; } if (qp_init_attr->srq) { @@ -1608,7 +1641,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, dev_dbg(rdev_to_dev(rdev), "Move QP = %p out of flush list\n", qp); - bnxt_qplib_del_flush_qp(&qp->qplib_qp); + bnxt_qplib_clean_qp(&qp->qplib_qp); } } if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 423ebe0..b88a48d 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -89,6 +89,8 @@ struct bnxt_re_qp { /* QP1 */ u32 send_psn; struct ib_ud_header qp1_hdr; + struct bnxt_re_cq *scq; + struct bnxt_re_cq *rcq; }; struct bnxt_re_cq { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 1b0e946..3ea5b96 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -173,7 +173,7 @@ static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp) } } -void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp) +void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp) { unsigned long flags; @@ -1419,7 +1419,6 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct cmdq_destroy_qp req; struct creq_destroy_qp_resp resp; - unsigned long flags; u16 cmd_flags = 0; int rc; @@ -1437,19 +1436,12 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, return rc; } - /* Must walk the associated CQs to nullified the QP ptr */ - spin_lock_irqsave(&qp->scq->hwq.lock, flags); - - __clean_cq(qp->scq, (u64)(unsigned long)qp); - - if (qp->rcq && qp->rcq != qp->scq) { - spin_lock(&qp->rcq->hwq.lock); - __clean_cq(qp->rcq, (u64)(unsigned long)qp); - spin_unlock(&qp->rcq->hwq.lock); - } - - spin_unlock_irqrestore(&qp->scq->hwq.lock, flags); + return 0; +} +void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res, + struct bnxt_qplib_qp *qp) +{ bnxt_qplib_free_qp_hdr_buf(res, qp); bnxt_qplib_free_hwq(res->pdev, &qp->sq.hwq); kfree(qp->sq.swq); @@ -1462,7 +1454,6 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, if (qp->orrq.max_elements) bnxt_qplib_free_hwq(res->pdev, &qp->orrq); - return 0; } void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 211b27a..ca0a2ff 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -478,6 +478,9 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp); int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp); int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp); int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp); +void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp); +void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res, + struct bnxt_qplib_qp *qp); void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp, struct bnxt_qplib_sge *sge); void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp, @@ -500,7 +503,6 @@ void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type); void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq); int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq); void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp); -void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp); void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp, unsigned long *flags); void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp, -- cgit v1.1 From dcdaba08062b4726500b9456f8664bfda896c664 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 15 Feb 2018 21:20:12 -0800 Subject: RDMA/bnxt_re: Fix system crash during load/unload During driver unload, the driver proceeds with cleanup without waiting for the scheduled events. So the device pointers get freed up and driver crashes when the events are scheduled later. Flush the bnxt_re_task work queue before starting device removal. Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 508d00a..7f9298d 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1578,6 +1578,11 @@ static void __exit bnxt_re_mod_exit(void) */ list_for_each_entry_safe_reverse(rdev, next, &to_be_deleted, list) { dev_info(rdev_to_dev(rdev), "Unregistering Device"); + /* + * Flush out any scheduled tasks before destroying the + * resources + */ + flush_workqueue(bnxt_re_wq); bnxt_re_dev_stop(rdev); bnxt_re_ib_unreg(rdev, true); bnxt_re_remove_one(rdev); -- cgit v1.1 From 7374fbd9e167ddc4f380d056ca74518be5d45518 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 15 Feb 2018 21:20:13 -0800 Subject: RDMA/bnxt_re: Avoid system hang during device un-reg BNXT_RE_FLAG_TASK_IN_PROG doesn't handle multiple work requests posted together. Track schedule of multiple workqueue items by maintaining a per device counter and proceed with IB dereg only if this counter is zero. flush_workqueue is no longer required from NETDEV_UNREGISTER path. Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 2 +- drivers/infiniband/hw/bnxt_re/main.c | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index ca32057..3eb7a83 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -120,7 +120,6 @@ struct bnxt_re_dev { #define BNXT_RE_FLAG_HAVE_L2_REF 3 #define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4 #define BNXT_RE_FLAG_QOS_WORK_REG 5 -#define BNXT_RE_FLAG_TASK_IN_PROG 6 #define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29 struct net_device *netdev; unsigned int version, major, minor; @@ -158,6 +157,7 @@ struct bnxt_re_dev { atomic_t srq_count; atomic_t mr_count; atomic_t mw_count; + atomic_t sched_count; /* Max of 2 lossless traffic class supported per port */ u16 cosq[2]; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 7f9298d..33a4480 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -656,7 +656,6 @@ static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev) mutex_unlock(&bnxt_re_dev_lock); synchronize_rcu(); - flush_workqueue(bnxt_re_wq); ib_dealloc_device(&rdev->ibdev); /* rdev is gone */ @@ -1441,7 +1440,7 @@ static void bnxt_re_task(struct work_struct *work) break; } smp_mb__before_atomic(); - clear_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags); + atomic_dec(&rdev->sched_count); kfree(re_work); } @@ -1503,7 +1502,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, /* netdev notifier will call NETDEV_UNREGISTER again later since * we are still holding the reference to the netdev */ - if (test_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags)) + if (atomic_read(&rdev->sched_count) > 0) goto exit; bnxt_re_ib_unreg(rdev, false); bnxt_re_remove_one(rdev); @@ -1523,7 +1522,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, re_work->vlan_dev = (real_dev == netdev ? NULL : netdev); INIT_WORK(&re_work->work, bnxt_re_task); - set_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags); + atomic_inc(&rdev->sched_count); queue_work(bnxt_re_wq, &re_work->work); } } -- cgit v1.1 From ab0dc41b7324329af1c18580b0fc891922a717cf Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Mon, 5 Feb 2018 02:21:19 +0100 Subject: riscv: Remove ARCH_WANT_OPTIONAL_GPIOLIB select The ARCH_WANT_OPTIONAL_GPIOLIB symbol was removed in commit 65053e1a7743 ("gpio: delete ARCH_[WANTS_OPTIONAL|REQUIRE]_GPIOLIB"). GPIOLIB should just be selected explicitly if needed. Remove the ARCH_WANT_OPTIONAL_GPIOLIB select from RISCV. See commit 0145071b3314 ("x86: Do away with ARCH_[WANT_OPTIONAL|REQUIRE]_GPIOLIB") and commit da9a1c6767 ("arm64: do away with ARCH_[WANT_OPTIONAL|REQUIRE]_GPIOLIB") as well. Discovered with the https://github.com/ulfalizer/Kconfiglib/blob/master/examples/list_undefined.py script. Reviewed-by: Linus Walleij Signed-off-by: Ulf Magnusson Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index b6722c2..f9fd6ed 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -20,7 +20,6 @@ config RISCV select GENERIC_STRNLEN_USER select GENERIC_SMP_IDLE_THREAD select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A - select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_DMA_API_DEBUG -- cgit v1.1 From 2aaa2dc31bee808703c24ce626e50d1b6d8c7f9c Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Thu, 8 Feb 2018 23:54:46 +0100 Subject: riscv: kconfig: Remove RISCV_IRQ_INTC select The RISCV_IRQ_INTC configuration symbol is undefined, but RISCV selects it. Quoting Palmer Dabbelt: It looks like this slipped through, the symbol has been renamed RISCV_INTC. No RISCV_INTC configuration symbol has been merged either. Just remove the RISCV_IRQ_INTC select for now. Signed-off-by: Ulf Magnusson Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index f9fd6ed..9740748 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -33,7 +33,6 @@ config RISCV select HAVE_ARCH_TRACEHOOK select MODULES_USE_ELF_RELA if MODULES select THREAD_INFO_IN_TASK - select RISCV_IRQ_INTC select RISCV_TIMER config MMU -- cgit v1.1 From 89a4b4441206962d1bbb62f128604a269b60933d Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Mon, 5 Feb 2018 02:21:18 +0100 Subject: riscv: Remove ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE symbol was removed in commit 51a021244b9d ("atomic64: no need for CONFIG_ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE"). Remove the ARCH_HAS_ATOMIC64_DEC_IS_POSITIVE select from RISCV. Discovered with the https://github.com/ulfalizer/Kconfiglib/blob/master/examples/list_undefined.py script. Signed-off-by: Ulf Magnusson Reviewed-by: Jonathan Neuschäfer Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 9740748..04807c7 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -8,7 +8,6 @@ config RISCV select OF select OF_EARLY_FLATTREE select OF_IRQ - select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_WANT_FRAME_POINTERS select CLONE_BACKWARDS select COMMON_CLK -- cgit v1.1 From bcae803a213172c79ab5d077f169e3428e44d2ba Mon Sep 17 00:00:00 2001 From: "zongbox@gmail.com" Date: Mon, 29 Jan 2018 23:51:45 -0800 Subject: RISC-V: Enable IRQ during exception handling Interrupt is allowed during exception handling. There are warning messages if the kernel enables the configuration 'CONFIG_DEBUG_ATOMIC_SLEEP=y'. BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:23 in_atomic(): 0, irqs_disabled(): 1, pid: 43, name: ash CPU: 0 PID: 43 Comm: ash Tainted: G W 4.15.0-rc8-00089-g89ffdae-dirty #17 Call Trace: [<000000009abb1587>] walk_stackframe+0x0/0x7a [<00000000d4f3d088>] ___might_sleep+0x102/0x11a [<00000000b1fd792a>] down_read+0x18/0x28 [<000000000289ec01>] do_page_fault+0x86/0x2f6 [<00000000012441f6>] _do_fork+0x1b4/0x1e0 [<00000000f46c3e3b>] ret_from_syscall+0xa/0xe Reviewed-by: Christoph Hellwig Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 87fc045..56fa592 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -172,6 +172,9 @@ ENTRY(handle_exception) move a1, sp /* pt_regs */ tail do_IRQ 1: + /* Exceptions run with interrupts enabled */ + csrs sstatus, SR_SIE + /* Handle syscalls */ li t0, EXC_SYSCALL beq s4, t0, handle_syscall @@ -198,8 +201,6 @@ handle_syscall: */ addi s2, s2, 0x4 REG_S s2, PT_SEPC(sp) - /* System calls run with interrupts enabled */ - csrs sstatus, SR_SIE /* Trace syscalls, but only if requested by the user. */ REG_L t0, TASK_TI_FLAGS(tp) andi t0, t0, _TIF_SYSCALL_TRACE -- cgit v1.1 From 8b08f50152ff85a4780e5c385d2b65889406e842 Mon Sep 17 00:00:00 2001 From: Michael Clark Date: Fri, 16 Feb 2018 09:30:29 +1300 Subject: Rename sbi_save to parse_dtb to improve code readability The sbi_ prefix would seem to indicate an SBI interface, and save is not very specific. After applying this patch, reading head.S makes more sense. Signed-off-by: Michael Clark Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/head.S | 2 +- arch/riscv/kernel/setup.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 226eeb1..6e07ed3 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -64,7 +64,7 @@ ENTRY(_start) /* Start the kernel */ mv a0, s0 mv a1, s1 - call sbi_save + call parse_dtb tail start_kernel relocate: diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 09f7064..c11f40c 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -144,7 +144,7 @@ asmlinkage void __init setup_vm(void) #endif } -void __init sbi_save(unsigned int hartid, void *dtb) +void __init parse_dtb(unsigned int hartid, void *dtb) { early_init_dt_scan(__va(dtb)); } -- cgit v1.1 From abe27a885d9e6575e663a16176dabc58ce9d7188 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 19 Feb 2018 20:12:57 -0600 Subject: ibmvnic: Check for NULL skb's in NAPI poll routine After introduction of commit d0869c0071e4, there were some instances of RX queue entries from a previous session (before the device was closed and reopened) returned to the NAPI polling routine. Since the corresponding socket buffers were freed, this resulted in a panic on reopen. Include a check for a NULL skb here to avoid this. Fixes: d0869c0071e4 ("ibmvnic: Clean RX pool buffers during device close") Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 996f475..1495cb9 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1901,6 +1901,11 @@ restart_poll: dev_kfree_skb_any(rx_buff->skb); remove_buff_from_pool(adapter, rx_buff); continue; + } else if (!rx_buff->skb) { + /* free the entry */ + next->rx_comp.first = 0; + remove_buff_from_pool(adapter, rx_buff); + continue; } length = be32_to_cpu(next->rx_comp.len); -- cgit v1.1 From 5825acf5c958a6820b04e9811caeb2f5e572bcd8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 13 Feb 2018 14:25:11 -0500 Subject: drm/amd/powerplay/vega10: allow mclk switching with no displays If there are no displays attached, there is no reason to disable mclk switching. Fixes mclks getting set to high when there are no displays attached. Reviewed-by: Eric Huang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 2d55dab..5f9c3ef 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -3168,10 +3168,13 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, disable_mclk_switching_for_vr = PP_CAP(PHM_PlatformCaps_DisableMclkSwitchForVR); force_mclk_high = PP_CAP(PHM_PlatformCaps_ForceMclkHigh); - disable_mclk_switching = (info.display_count > 1) || - disable_mclk_switching_for_frame_lock || - disable_mclk_switching_for_vr || - force_mclk_high; + if (info.display_count == 0) + disable_mclk_switching = false; + else + disable_mclk_switching = (info.display_count > 1) || + disable_mclk_switching_for_frame_lock || + disable_mclk_switching_for_vr || + force_mclk_high; sclk = vega10_ps->performance_levels[0].gfx_clock; mclk = vega10_ps->performance_levels[0].mem_clock; -- cgit v1.1 From 51954e17914aaadf18d97b21c2a2cee16fa29513 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 13 Feb 2018 14:26:54 -0500 Subject: drm/amd/powerplay/smu7: allow mclk switching with no displays If there are no displays attached, there is no reason to disable mclk switching. Fixes mclks getting set to high when there are no displays attached. Reviewed-by: Eric Huang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 41e42be..45be313 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -2756,10 +2756,13 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, PHM_PlatformCaps_DisableMclkSwitchingForFrameLock); - disable_mclk_switching = ((1 < info.display_count) || - disable_mclk_switching_for_frame_lock || - smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) || - (mode_info.refresh_rate > 120)); + if (info.display_count == 0) + disable_mclk_switching = false; + else + disable_mclk_switching = ((1 < info.display_count) || + disable_mclk_switching_for_frame_lock || + smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us) || + (mode_info.refresh_rate > 120)); sclk = smu7_ps->performance_levels[0].engine_clock; mclk = smu7_ps->performance_levels[0].memory_clock; -- cgit v1.1 From 53bf277b487eb5ae6695db01bede0fe406792119 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 15 Feb 2018 08:40:30 -0500 Subject: Revert "drm/radeon/pm: autoswitch power state when in balanced mode" This reverts commit 1c331f75aa6ccbf64ebcc5a019183e617c9d818a. Breaks resume on some systems. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=100759 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_pm.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 326ad06..4b65425 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -47,7 +47,6 @@ static bool radeon_pm_in_vbl(struct radeon_device *rdev); static bool radeon_pm_debug_check_in_vbl(struct radeon_device *rdev, bool finish); static void radeon_pm_update_profile(struct radeon_device *rdev); static void radeon_pm_set_clocks(struct radeon_device *rdev); -static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev); int radeon_pm_get_type_index(struct radeon_device *rdev, enum radeon_pm_state_type ps_type, @@ -80,8 +79,6 @@ void radeon_pm_acpi_event_handler(struct radeon_device *rdev) radeon_dpm_enable_bapm(rdev, rdev->pm.dpm.ac_power); } mutex_unlock(&rdev->pm.mutex); - /* allow new DPM state to be picked */ - radeon_pm_compute_clocks_dpm(rdev); } else if (rdev->pm.pm_method == PM_METHOD_PROFILE) { if (rdev->pm.profile == PM_PROFILE_AUTO) { mutex_lock(&rdev->pm.mutex); @@ -885,8 +882,7 @@ static struct radeon_ps *radeon_dpm_pick_power_state(struct radeon_device *rdev, dpm_state = POWER_STATE_TYPE_INTERNAL_3DPERF; /* balanced states don't exist at the moment */ if (dpm_state == POWER_STATE_TYPE_BALANCED) - dpm_state = rdev->pm.dpm.ac_power ? - POWER_STATE_TYPE_PERFORMANCE : POWER_STATE_TYPE_BATTERY; + dpm_state = POWER_STATE_TYPE_PERFORMANCE; restart_search: /* Pick the best power state based on current conditions */ -- cgit v1.1 From b1a2ce825737b0165cc08e6f98f8c0ea1affdd60 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 20 Feb 2018 01:00:07 +0000 Subject: tools/libbpf: Avoid possibly using uninitialized variable Fixes a GCC maybe-uninitialized warning introduced by 48cca7e44f9f. "text" is only initialized inside the if statement so only print debug info there. Fixes: 48cca7e44f9f ("libbpf: add support for bpf_call") Signed-off-by: Jeremy Cline Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 97073d6..5bbbf28 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1060,11 +1060,12 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, prog->insns = new_insn; prog->main_prog_cnt = prog->insns_cnt; prog->insns_cnt = new_cnt; + pr_debug("added %zd insn from %s to prog %s\n", + text->insns_cnt, text->section_name, + prog->section_name); } insn = &prog->insns[relo->insn_idx]; insn->imm += prog->main_prog_cnt - relo->insn_idx; - pr_debug("added %zd insn from %s to prog %s\n", - text->insns_cnt, text->section_name, prog->section_name); return 0; } -- cgit v1.1 From 5893f6e8a87243c951ddcb0bb95842bd5aef4d8f Mon Sep 17 00:00:00 2001 From: Mikita Lipski Date: Fri, 19 Jan 2018 11:21:04 -0500 Subject: drm/amdgpu: Add a missing lock for drm_mm_takedown Inside amdgpu_gtt_mgr_fini add a missing lock to maintain locking balance Signed-off-by: Mikita Lipski Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index e14ab34..7c2be32 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -75,7 +75,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man) { struct amdgpu_gtt_mgr *mgr = man->priv; - + spin_lock(&mgr->lock); drm_mm_takedown(&mgr->mm); spin_unlock(&mgr->lock); kfree(mgr); -- cgit v1.1 From 09c381e0f34abaeff68ba5ac3d949928f32757c5 Mon Sep 17 00:00:00 2001 From: Mikita Lipski Date: Sat, 3 Feb 2018 15:19:20 -0500 Subject: drm/amdgpu: Unify the dm resume calls into one amdgpu_dm_display_resume is now called from dm_resume to unify DAL resume call into a single function call There is no more need to separately call 2 resume functions for DM. Initially they were separated to resume display state after cursor is pinned. But because there is no longer any corruption with the cursor - the calls can be merged into one function hook. Signed-off-by: Mikita Lipski Reviewed-by: Harry Wentland Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 --------- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +++- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 00a50cc..829dc2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2284,14 +2284,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); } drm_modeset_unlock_all(dev); - } else { - /* - * There is no equivalent atomic helper to turn on - * display, so we defined our own function for this, - * once suspend resume is supported by the atomic - * framework this will be reworked - */ - amdgpu_dm_display_resume(adev); } } @@ -2726,7 +2718,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, if (amdgpu_device_has_dc_support(adev)) { if (drm_atomic_helper_resume(adev->ddev, state)) dev_info(adev->dev, "drm resume failed:%d\n", r); - amdgpu_dm_display_resume(adev); } else { drm_helper_resume_force_mode(adev->ddev); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1ce4c98..862835d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -629,11 +629,13 @@ static int dm_resume(void *handle) { struct amdgpu_device *adev = handle; struct amdgpu_display_manager *dm = &adev->dm; + int ret = 0; /* power on hardware */ dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); - return 0; + ret = amdgpu_dm_display_resume(adev); + return ret; } int amdgpu_dm_display_resume(struct amdgpu_device *adev) -- cgit v1.1 From 4909c6de7d7ada493e1c2f0d8bf0145a750d2dd6 Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Tue, 30 Jan 2018 11:46:16 -0500 Subject: drm/amd/display: VGA black screen from s3 when attached to hook [Description] For MST, DC already notify MST sink for MST mode, DC stll check DP SINK DPCD register to see if MST enabled. DP RX firmware may not handle this properly. Signed-off-by: Hersen Wu Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 61e8c3e..51f5a57 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1465,7 +1465,7 @@ void decide_link_settings(struct dc_stream_state *stream, /* MST doesn't perform link training for now * TODO: add MST specific link training routine */ - if (is_mst_supported(link)) { + if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { *link_setting = link->verified_link_cap; return; } -- cgit v1.1 From 5fe01793dd953ab947fababe8abaf5ed5258c8df Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 7 Feb 2018 12:46:42 +0100 Subject: KVM: s390: take care of clock-comparator sign control Missed when enabling the Multiple-epoch facility. If the facility is installed and the control is set, a sign based comaprison has to be performed. Right now we would inject wrong interrupts and ignore interrupt conditions. Also the sleep time is calculated in a wrong way. Signed-off-by: David Hildenbrand Message-Id: <20180207114647.6220-2-david@redhat.com> Fixes: 8fa1696ea781 ("KVM: s390: Multiple Epoch Facility support") Cc: stable@vger.kernel.org Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 3f2c49b..b04616b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -169,8 +169,15 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) static int ckc_irq_pending(struct kvm_vcpu *vcpu) { - if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm)) + const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm); + const u64 ckc = vcpu->arch.sie_block->ckc; + + if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) { + if ((s64)ckc >= (s64)now) + return 0; + } else if (ckc >= now) { return 0; + } return ckc_interrupts_enabled(vcpu); } @@ -1047,13 +1054,19 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) static u64 __calculate_sltime(struct kvm_vcpu *vcpu) { - u64 now, cputm, sltime = 0; + const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm); + const u64 ckc = vcpu->arch.sie_block->ckc; + u64 cputm, sltime = 0; if (ckc_interrupts_enabled(vcpu)) { - now = kvm_s390_get_tod_clock_fast(vcpu->kvm); - sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); - /* already expired or overflow? */ - if (!sltime || vcpu->arch.sie_block->ckc <= now) + if (vcpu->arch.sie_block->gcr[0] & 0x0020000000000000ul) { + if ((s64)now < (s64)ckc) + sltime = tod_to_ns((s64)ckc - (s64)now); + } else if (now < ckc) { + sltime = tod_to_ns(ckc - now); + } + /* already expired */ + if (!sltime) return 0; if (cpu_timer_interrupts_enabled(vcpu)) { cputm = kvm_s390_get_cpu_timer(vcpu); -- cgit v1.1 From d16b52cb9cdb6f06dea8ab2f0a428e7d7f0b0a81 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 7 Feb 2018 12:46:44 +0100 Subject: KVM: s390: consider epoch index on hotplugged CPUs We must copy both, the epoch and the epoch_idx. Signed-off-by: David Hildenbrand Message-Id: <20180207114647.6220-4-david@redhat.com> Fixes: 8fa1696ea781 ("KVM: s390: Multiple Epoch Facility support") Reviewed-by: Cornelia Huck Reviewed-by: Christian Borntraeger Fixes: 8fa1696ea781 ("KVM: s390: Multiple Epoch Facility support") Cc: stable@vger.kernel.org Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ba4c709..5b7fe80 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2389,6 +2389,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) mutex_lock(&vcpu->kvm->lock); preempt_disable(); vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; + vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx; preempt_enable(); mutex_unlock(&vcpu->kvm->lock); if (!kvm_is_ucontrol(vcpu->kvm)) { -- cgit v1.1 From 1575767ef3cf5326701d2ae3075b7732cbc855e4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 7 Feb 2018 12:46:45 +0100 Subject: KVM: s390: consider epoch index on TOD clock syncs For now, we don't take care of over/underflows. Especially underflows are critical: Assume the epoch is currently 0 and we get a sync request for delta=1, meaning the TOD is moved forward by 1 and we have to fix it up by subtracting 1 from the epoch. Right now, this will leave the epoch index untouched, resulting in epoch=-1, epoch_idx=0, which is wrong. We have to take care of over and underflows, also for the VSIE case. So let's factor out calculation into a separate function. Signed-off-by: David Hildenbrand Message-Id: <20180207114647.6220-5-david@redhat.com> Reviewed-by: Christian Borntraeger Fixes: 8fa1696ea781 ("KVM: s390: Multiple Epoch Facility support") Cc: stable@vger.kernel.org Signed-off-by: Christian Borntraeger [use u8 for idx] --- arch/s390/kvm/kvm-s390.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 5b7fe80..b07aa16 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -179,6 +179,28 @@ int kvm_arch_hardware_enable(void) static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, unsigned long end); +static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) +{ + u8 delta_idx = 0; + + /* + * The TOD jumps by delta, we have to compensate this by adding + * -delta to the epoch. + */ + delta = -delta; + + /* sign-extension - we're adding to signed values below */ + if ((s64)delta < 0) + delta_idx = -1; + + scb->epoch += delta; + if (scb->ecd & ECD_MEF) { + scb->epdx += delta_idx; + if (scb->epoch < delta) + scb->epdx += 1; + } +} + /* * This callback is executed during stop_machine(). All CPUs are therefore * temporarily stopped. In order not to change guest behavior, we have to @@ -194,13 +216,17 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, unsigned long long *delta = v; list_for_each_entry(kvm, &vm_list, vm_list) { - kvm->arch.epoch -= *delta; kvm_for_each_vcpu(i, vcpu, kvm) { - vcpu->arch.sie_block->epoch -= *delta; + kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); + if (i == 0) { + kvm->arch.epoch = vcpu->arch.sie_block->epoch; + kvm->arch.epdx = vcpu->arch.sie_block->epdx; + } if (vcpu->arch.cputm_enabled) vcpu->arch.cputm_start += *delta; if (vcpu->arch.vsie_block) - vcpu->arch.vsie_block->epoch -= *delta; + kvm_clock_sync_scb(vcpu->arch.vsie_block, + *delta); } } return NOTIFY_OK; -- cgit v1.1 From 0e7def5fb0dc53ddbb9f62a497d15f1e11ccdc36 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 7 Feb 2018 12:46:43 +0100 Subject: KVM: s390: provide only a single function for setting the tod (fix SCK) Right now, SET CLOCK called in the guest does not properly take care of the epoch index, as the call goes via the old kvm_s390_set_tod_clock() interface. So the epoch index is neither reset to 0, if required, nor properly set to e.g. 0xff on negative values. Fix this by providing a single kvm_s390_set_tod_clock() function. Move Multiple-epoch facility handling into it. Signed-off-by: David Hildenbrand Message-Id: <20180207114647.6220-3-david@redhat.com> Reviewed-by: Christian Borntraeger Fixes: 8fa1696ea781 ("KVM: s390: Multiple Epoch Facility support") Cc: stable@vger.kernel.org Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 46 +++++++++++++++------------------------------- arch/s390/kvm/kvm-s390.h | 5 ++--- arch/s390/kvm/priv.c | 9 +++++---- 3 files changed, 22 insertions(+), 38 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b07aa16..77d7818 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -928,12 +928,9 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) return -EFAULT; - if (test_kvm_facility(kvm, 139)) - kvm_s390_set_tod_clock_ext(kvm, >od); - else if (gtod.epoch_idx == 0) - kvm_s390_set_tod_clock(kvm, gtod.tod); - else + if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) return -EINVAL; + kvm_s390_set_tod_clock(kvm, >od); VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", gtod.epoch_idx, gtod.tod); @@ -958,13 +955,14 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) { - u64 gtod; + struct kvm_s390_vm_tod_clock gtod = { 0 }; - if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) + if (copy_from_user(>od.tod, (void __user *)attr->addr, + sizeof(gtod.tod))) return -EFAULT; - kvm_s390_set_tod_clock(kvm, gtod); - VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod); + kvm_s390_set_tod_clock(kvm, >od); + VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); return 0; } @@ -3048,8 +3046,8 @@ retry: return 0; } -void kvm_s390_set_tod_clock_ext(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod) +void kvm_s390_set_tod_clock(struct kvm *kvm, + const struct kvm_s390_vm_tod_clock *gtod) { struct kvm_vcpu *vcpu; struct kvm_s390_tod_clock_ext htod; @@ -3061,10 +3059,12 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm, get_tod_clock_ext((char *)&htod); kvm->arch.epoch = gtod->tod - htod.tod; - kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; - - if (kvm->arch.epoch > gtod->tod) - kvm->arch.epdx -= 1; + kvm->arch.epdx = 0; + if (test_kvm_facility(kvm, 139)) { + kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; + if (kvm->arch.epoch > gtod->tod) + kvm->arch.epdx -= 1; + } kvm_s390_vcpu_block_all(kvm); kvm_for_each_vcpu(i, vcpu, kvm) { @@ -3077,22 +3077,6 @@ void kvm_s390_set_tod_clock_ext(struct kvm *kvm, mutex_unlock(&kvm->lock); } -void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) -{ - struct kvm_vcpu *vcpu; - int i; - - mutex_lock(&kvm->lock); - preempt_disable(); - kvm->arch.epoch = tod - get_tod_clock(); - kvm_s390_vcpu_block_all(kvm); - kvm_for_each_vcpu(i, vcpu, kvm) - vcpu->arch.sie_block->epoch = kvm->arch.epoch; - kvm_s390_vcpu_unblock_all(kvm); - preempt_enable(); - mutex_unlock(&kvm->lock); -} - /** * kvm_arch_fault_in_page - fault-in guest page if necessary * @vcpu: The corresponding virtual cpu diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 3c0a975..f55ac0e 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -281,9 +281,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock_ext(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod); -void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod); +void kvm_s390_set_tod_clock(struct kvm *kvm, + const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index a74578c..f0b4185 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -85,9 +85,10 @@ int kvm_s390_handle_e3(struct kvm_vcpu *vcpu) /* Handle SCK (SET CLOCK) interception */ static int handle_set_clock(struct kvm_vcpu *vcpu) { + struct kvm_s390_vm_tod_clock gtod = { 0 }; int rc; u8 ar; - u64 op2, val; + u64 op2; vcpu->stat.instruction_sck++; @@ -97,12 +98,12 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) op2 = kvm_s390_get_base_disp_s(vcpu, &ar); if (op2 & 7) /* Operand must be on a doubleword boundary */ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = read_guest(vcpu, op2, ar, &val, sizeof(val)); + rc = read_guest(vcpu, op2, ar, >od.tod, sizeof(gtod.tod)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val); - kvm_s390_set_tod_clock(vcpu->kvm, val); + VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod); + kvm_s390_set_tod_clock(vcpu->kvm, >od); kvm_s390_set_psw_cc(vcpu, 0); return 0; -- cgit v1.1 From 8babd44d2079079f9d5a4aca7005aed80236efe0 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 20 Dec 2017 08:48:24 +0200 Subject: net/mlx5e: Fix TCP checksum in LRO buffers When receiving an LRO packet, the checksum field is set by the hardware to the checksum of the first coalesced packet. Obviously, this checksum is not valid for the merged LRO packet and should be fixed. We can use the CQE checksum which covers the checksum of the entire merged packet TCP payload to help us calculate the checksum incrementally. Tested by sending IPv4/6 traffic with LRO enabled, RX checksum disabled and watching nstat checksum error counters (in addition to the obvious bandwidth drop caused by checksum errors). This bug is usually "hidden" since LRO packets would go through the CHECKSUM_UNNECESSARY flow which does not validate the packet checksum. It's important to note that previous to this patch, LRO packets provided with CHECKSUM_UNNECESSARY are indeed packets with a correct validated checksum (even though the checksum inside the TCP header is incorrect), since the hardware LRO aggregation is terminated upon receiving a packet with bad checksum. Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 49 ++++++++++++++++++------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0d4bb06..e5c3ab4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -546,20 +547,33 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) return true; } +static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp) +{ + u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); + u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || + (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); + + tcp->check = 0; + tcp->psh = get_cqe_lro_tcppsh(cqe); + + if (tcp_ack) { + tcp->ack = 1; + tcp->ack_seq = cqe->lro_ack_seq_num; + tcp->window = cqe->lro_tcp_win; + } +} + static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { struct ethhdr *eth = (struct ethhdr *)(skb->data); struct tcphdr *tcp; int network_depth = 0; + __wsum check; __be16 proto; u16 tot_len; void *ip_p; - u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); - u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || - (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); - proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); tot_len = cqe_bcnt - network_depth; @@ -576,23 +590,30 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, ipv4->check = 0; ipv4->check = ip_fast_csum((unsigned char *)ipv4, ipv4->ihl); + + mlx5e_lro_update_tcp_hdr(cqe, tcp); + check = csum_partial(tcp, tcp->doff * 4, + csum_unfold((__force __sum16)cqe->check_sum)); + /* Almost done, don't forget the pseudo header */ + tcp->check = csum_tcpudp_magic(ipv4->saddr, ipv4->daddr, + tot_len - sizeof(struct iphdr), + IPPROTO_TCP, check); } else { + u16 payload_len = tot_len - sizeof(struct ipv6hdr); struct ipv6hdr *ipv6 = ip_p; tcp = ip_p + sizeof(struct ipv6hdr); skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; ipv6->hop_limit = cqe->lro_min_ttl; - ipv6->payload_len = cpu_to_be16(tot_len - - sizeof(struct ipv6hdr)); - } - - tcp->psh = get_cqe_lro_tcppsh(cqe); - - if (tcp_ack) { - tcp->ack = 1; - tcp->ack_seq = cqe->lro_ack_seq_num; - tcp->window = cqe->lro_tcp_win; + ipv6->payload_len = cpu_to_be16(payload_len); + + mlx5e_lro_update_tcp_hdr(cqe, tcp); + check = csum_partial(tcp, tcp->doff * 4, + csum_unfold((__force __sum16)cqe->check_sum)); + /* Almost done, don't forget the pseudo header */ + tcp->check = csum_ipv6_magic(&ipv6->saddr, &ipv6->daddr, payload_len, + IPPROTO_TCP, check); } } -- cgit v1.1 From ef7a3518f7dd4f4cf5e5b5358c93d1eb78df28fb Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Thu, 7 Dec 2017 17:26:33 +0200 Subject: net/mlx5e: Fix loopback self test when GRO is off When GRO is off, the transport header pointer in sk_buff is initialized to network's header. To find the udp header, instead of using udp_hdr() which assumes skb_network_header was set, manually calculate the udp header offset. Fixes: 0952da791c97 ("net/mlx5e: Add support for loopback selftest") Signed-off-by: Inbar Karmy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 5a46082..7079764 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -216,7 +216,8 @@ mlx5e_test_loopback_validate(struct sk_buff *skb, if (iph->protocol != IPPROTO_UDP) goto out; - udph = udp_hdr(skb); + /* Don't assume skb_transport_header() was set */ + udph = (struct udphdr *)((u8 *)iph + 4 * iph->ihl); if (udph->dest != htons(9)) goto out; -- cgit v1.1 From f600c6088018d1dbc5777d18daa83660f7ea4a64 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 25 Jan 2018 11:18:09 +0200 Subject: net/mlx5e: Verify inline header size do not exceed SKB linear size Driver tries to copy at least MLX5E_MIN_INLINE bytes into the control segment of the WQE. It assumes that the linear part contains at least MLX5E_MIN_INLINE bytes, which can be wrong. Cited commit verified that driver will not copy more bytes into the inline header part that the actual size of the packet. Re-factor this check to make sure we do not exceed the linear part as well. This fix is aligned with the current driver's assumption that the entire L2 will be present in the linear part of the SKB. Fixes: 6aace17e64f4 ("net/mlx5e: Fix inline header size for small packets") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 569b42a..11b4f10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -176,7 +176,7 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, default: hlen = mlx5e_skb_l2_header_offset(skb); } - return min_t(u16, hlen, skb->len); + return min_t(u16, hlen, skb_headlen(skb)); } static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data, -- cgit v1.1 From 9afe9a5353778994d4396f3d5ff639221bfa5cc9 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 1 Jan 2018 13:19:51 +0000 Subject: net/mlx5e: Eliminate build warnings on no previous prototype Fix these gcc warnings on drivers/net/ethernet/mellanox/mlx5: [..]/core/lib/clock.c:454:6: warning: no previous prototype for 'mlx5_init_clock' [-Wmissing-prototypes] [..]/core/lib/clock.c:510:6: warning: no previous prototype for 'mlx5_cleanup_clock' [-Wmissing-prototypes] [..]/core/en_main.c:3141:5: warning: no previous prototype for 'mlx5e_setup_tc' [-Wmissing-prototypes] Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 47bab84..a64b922 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2994,8 +2994,8 @@ static int mlx5e_setup_tc_block(struct net_device *dev, } #endif -int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, - void *type_data) +static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) { switch (type) { #ifdef CONFIG_MLX5_ESWITCH diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index e159243..8570355 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -34,6 +34,7 @@ #include #include #include "en.h" +#include "clock.h" enum { MLX5_CYCLES_SHIFT = 23 -- cgit v1.1 From 4f5c02f949973b7c9dfa8a7c23d766b1208d208f Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 1 Jan 2018 13:29:53 +0000 Subject: net/mlx5: Address static checker warnings on non-constant initializers Address these sparse warnings on drivers/net/ethernet/mellanox/mlx5 [..]/core/diag/fs_tracepoint.c:99:53: warning: non-constant initializer for static object [..]/core/diag/fs_tracepoint.c:102:53: warning: non-constant initializer for static object etc Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index 0be4575..fd50916 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -96,10 +96,10 @@ static void print_lyr_2_4_hdrs(struct trace_seq *p, "%pI4"); } else if (ethertype.v == ETH_P_IPV6) { static const struct in6_addr full_ones = { - .in6_u.u6_addr32 = {htonl(0xffffffff), - htonl(0xffffffff), - htonl(0xffffffff), - htonl(0xffffffff)}, + .in6_u.u6_addr32 = {__constant_htonl(0xffffffff), + __constant_htonl(0xffffffff), + __constant_htonl(0xffffffff), + __constant_htonl(0xffffffff)}, }; DECLARE_MASK_VAL(struct in6_addr, src_ipv6); DECLARE_MASK_VAL(struct in6_addr, dst_ipv6); -- cgit v1.1 From 001a2fc0c8cc29241305e44ffbce52d1daf8782b Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 30 Jan 2018 13:16:58 +0200 Subject: net/mlx5e: Return error if prio is specified when offloading eswitch vlan push This isn't supported when we emulate eswitch vlan push action which is the current state of things. Fixes: 8b32580df1cb ('net/mlx5e: Add TC vlan action for SRIOV offloads') Signed-off-by: Or Gerlitz Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fd98b0d..fa86a14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2529,7 +2529,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) { attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) { - if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q)) + if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) || + tcf_vlan_push_prio(a)) return -EOPNOTSUPP; attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; -- cgit v1.1 From 2f0db87901698cd73d828cc6fb1957b8916fc911 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 25 Jan 2018 18:00:41 +0200 Subject: net/mlx5e: Specify numa node when allocating drop rq When allocating a drop rq, no numa node is explicitly set which means allocations are done on node zero. This is not necessarily the nearest numa node to the HCA, and even worse, might even be a memoryless numa node. Choose the numa_node given to us by the pci device in order to properly allocate the coherent dma memory instead of assuming zero is valid. Fixes: 556dd1b9c313 ("net/mlx5e: Set drop RQ's necessary parameters only") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a64b922..da94c8c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1768,13 +1768,16 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, param->wq.linear = 1; } -static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param) +static void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, + struct mlx5e_rq_param *param) { void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); + + param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev); } static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, @@ -2634,6 +2637,9 @@ static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev, struct mlx5e_cq *cq, struct mlx5e_cq_param *param) { + param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev); + param->wq.db_numa_node = dev_to_node(&mdev->pdev->dev); + return mlx5e_alloc_cq_common(mdev, param, cq); } @@ -2645,7 +2651,7 @@ static int mlx5e_open_drop_rq(struct mlx5_core_dev *mdev, struct mlx5e_cq *cq = &drop_rq->cq; int err; - mlx5e_build_drop_rq_param(&rq_param); + mlx5e_build_drop_rq_param(mdev, &rq_param); err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param); if (err) -- cgit v1.1 From c67f100edae0d2f43e8b35955f7710d702efd590 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Fri, 2 Feb 2018 09:32:53 -0600 Subject: net/mlx5: Use 128B cacheline size for 128B or larger cachelines The adapter uses the cache_line_128byte setting to set the bounds for end padding. On systems where the cacheline size is greater than 128B use 128B instead of the default of 64B. This results in fewer partial cacheline writes. There's a 50% chance it will pad to the end of a 256B cache line vs only 25% when using 64B. Fixes: f32f5bd2eb7e ("net/mlx5: Configure cache line size for start and end padding") Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 2ef641c9..ae391e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -551,7 +551,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) MLX5_SET(cmd_hca_cap, set_hca_cap, cache_line_128byte, - cache_line_size() == 128 ? 1 : 0); + cache_line_size() >= 128 ? 1 : 0); if (MLX5_CAP_GEN_MAX(dev, dct)) MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1); -- cgit v1.1 From 96de67a77293b4da48a05f6ec0385f60006a7ba6 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 11 Feb 2018 13:26:06 +0200 Subject: net/mlx5: Add header re-write to the checks for conflicting actions We can't allow only some of the rules sharing an FTE to ask for header re-write, add it to the conflicting action checks. Fixes: 0d235c3fabb7 ('net/mlx5: Add hash table to search FTEs in a flow-group') Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index c025c98..6caa4a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1429,7 +1429,8 @@ static bool check_conflicting_actions(u32 action1, u32 action2) if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_ENCAP | - MLX5_FLOW_CONTEXT_ACTION_DECAP)) + MLX5_FLOW_CONTEXT_ACTION_DECAP | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) return true; return false; -- cgit v1.1 From 26a0f6e82997d5c8345782b55d3a7894421f777f Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Wed, 31 Jan 2018 09:36:29 +0200 Subject: net/mlx5: E-Switch, Fix drop counters use before creation First use of drop counters happens in esw_apply_vport_conf function, while they are allocated later in the flow. Fix that by moving esw_vport_create_drop_counters function to be called before the first use. Fixes: b8a0dbe3a90b ("net/mlx5e: E-switch, Add steering drop counters") Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 5ecf2cd..c2b1d7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1529,6 +1529,10 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); + /* Create steering drop counters for ingress and egress ACLs */ + if (vport_num && esw->mode == SRIOV_LEGACY) + esw_vport_create_drop_counters(vport); + /* Restore old vport configuration */ esw_apply_vport_conf(esw, vport); @@ -1545,10 +1549,6 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, if (!vport_num) vport->info.trusted = true; - /* create steering drop counters for ingress and egress ACLs */ - if (vport_num && esw->mode == SRIOV_LEGACY) - esw_vport_create_drop_counters(vport); - esw_vport_change_handle_locked(vport); esw->enabled_vports++; -- cgit v1.1 From 9238e380e823a39983ee8d6b6ee8d1a9c4ba8a65 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 6 Feb 2018 10:52:19 +0200 Subject: net/mlx5: Fix error handling when adding flow rules If building match list or adding existing fg fails when node is locked, function returned without unlocking it. This happened if node version changed or adding existing fg returned with EAGAIN after jumping to search_again_locked label. Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Vlad Buslov Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 6caa4a7..31fc2cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1759,8 +1759,11 @@ search_again_locked: /* Collect all fgs which has a matching match_criteria */ err = build_match_list(&match_head, ft, spec); - if (err) + if (err) { + if (take_write) + up_write_ref_node(&ft->node); return ERR_PTR(err); + } if (!take_write) up_read_ref_node(&ft->node); @@ -1769,8 +1772,11 @@ search_again_locked: dest_num, version); free_match_list(&match_head); if (!IS_ERR(rule) || - (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) + (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) { + if (take_write) + up_write_ref_node(&ft->node); return rule; + } if (!take_write) { nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT); -- cgit v1.1 From 521ca5a9859a870e354d1a6b84a6ff4c07bbceb0 Mon Sep 17 00:00:00 2001 From: "Juan J. Alvarez" Date: Thu, 15 Feb 2018 12:49:51 -0600 Subject: powerpc/eeh: Fix crashes in eeh_report_resume() The notify_resume() callback in eeh_ops is NULL on powernv, leading to crashes: NIP (null) LR eeh_report_resume+0x218/0x220 Call Trace: eeh_report_resume+0x1f0/0x220 (unreliable) eeh_pe_dev_traverse+0x98/0x170 eeh_handle_normal_event+0x3f4/0x650 eeh_handle_event+0x54/0x380 eeh_event_handler+0x14c/0x210 kthread+0x168/0x1b0 ret_from_kernel_thread+0x5c/0xb4 Fix it by adding a check before calling it. Fixes: 856e1eb9bdd4 ("PCI/AER: Add uevents in AER and EEH error/resume") Signed-off-by: Juan J. Alvarez Reviewed-by: Bryant G. Ly Tested-by: Carol L. Soto Reviewed-by: Andrew Donnellan Tested-by: Mauro S. M. Rodrigues Acked-by: Michael Neuling [mpe: Rewrite change log] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index beea218..0c0b66f 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -384,7 +384,8 @@ static void *eeh_report_resume(void *data, void *userdata) eeh_pcid_put(dev); pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); #ifdef CONFIG_PCI_IOV - eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); + if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev)) + eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); #endif return NULL; } -- cgit v1.1 From 423688abd9ab654044bddd82eb5983189eb9630d Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Fri, 16 Feb 2018 14:01:18 +0100 Subject: ocxl: Fix potential bad errno on irq allocation Fix some issues found by a static checker: When allocating an AFU interrupt, if the driver cannot copy the output parameters to userland, the errno value was not set to EFAULT Remove a (now) useless cast. Reported-by: Dan Carpenter Signed-off-by: Frederic Barrat Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- drivers/misc/ocxl/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index 2dd2db9..337462e 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -133,8 +133,10 @@ static long afu_ioctl(struct file *file, unsigned int cmd, if (!rc) { rc = copy_to_user((u64 __user *) args, &irq_offset, sizeof(irq_offset)); - if (rc) + if (rc) { ocxl_afu_irq_free(ctx, irq_offset); + return -EFAULT; + } } break; @@ -329,7 +331,7 @@ static ssize_t afu_read(struct file *file, char __user *buf, size_t count, used += sizeof(header); - rc = (ssize_t) used; + rc = used; return rc; } -- cgit v1.1 From b3b12ea3661958bc093e258b7c0dd0a13bdcc719 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 19 Feb 2018 18:59:36 +0100 Subject: drm/edid: quirk Oculus Rift headsets as non-desktop This uses the EDID info from Oculus Rift DK1 (OVR-0001), DK2 (OVR-0003), and CV1 (OVR-0004) to mark them as non-desktop. Signed-off-by: Philipp Zabel Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_edid.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index ddd5379..d6fa56b 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -162,6 +162,11 @@ static const struct edid_quirk { /* HTC Vive VR Headset */ { "HVR", 0xaa01, EDID_QUIRK_NON_DESKTOP }, + + /* Oculus Rift DK1, DK2, and CV1 VR Headsets */ + { "OVR", 0x0001, EDID_QUIRK_NON_DESKTOP }, + { "OVR", 0x0003, EDID_QUIRK_NON_DESKTOP }, + { "OVR", 0x0004, EDID_QUIRK_NON_DESKTOP }, }; /* -- cgit v1.1 From 90eda8fc8016cfe39e2c73222e14665f0e5dabb1 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 19 Feb 2018 18:59:37 +0100 Subject: drm/edid: quirk Windows Mixed Reality headsets as non-desktop This uses the EDID info from Lenovo Explorer (LEN-b800), Acer AH100 (ACR-7fce), and Samsung Odyssey (SEC-144a) to mark them as non-desktop. The other entries are for the HP Windows Mixed Reality Headset (HPN-3515), the Fujitsu Windows Mixed Reality headset (FUJ-1970), the Dell Visor (DEL-7fce), and the ASUS HC102 (AUS-c102). They are not tested with real hardware, but listed as HMD monitors alongside the tested headsets in the Microsoft HololensSensors driver package. Signed-off-by: Philipp Zabel Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_edid.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index d6fa56b..bfd89b4 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -167,6 +167,16 @@ static const struct edid_quirk { { "OVR", 0x0001, EDID_QUIRK_NON_DESKTOP }, { "OVR", 0x0003, EDID_QUIRK_NON_DESKTOP }, { "OVR", 0x0004, EDID_QUIRK_NON_DESKTOP }, + + /* Windows Mixed Reality Headsets */ + { "ACR", 0x7fce, EDID_QUIRK_NON_DESKTOP }, + { "HPN", 0x3515, EDID_QUIRK_NON_DESKTOP }, + { "LEN", 0x0408, EDID_QUIRK_NON_DESKTOP }, + { "LEN", 0xb800, EDID_QUIRK_NON_DESKTOP }, + { "FUJ", 0x1970, EDID_QUIRK_NON_DESKTOP }, + { "DEL", 0x7fce, EDID_QUIRK_NON_DESKTOP }, + { "SEC", 0x144a, EDID_QUIRK_NON_DESKTOP }, + { "AUS", 0xc102, EDID_QUIRK_NON_DESKTOP }, }; /* -- cgit v1.1 From ccffc9ebfa66e3f2cc5e17b2579202786050b32e Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 19 Feb 2018 18:59:38 +0100 Subject: drm/edid: quirk Sony PlayStation VR headset as non-desktop This uses the EDID info from the Sony PlayStation VR headset, when connected directly, to mark it as non-desktop. Since the connection box (product id b403) defaults to HDMI pass-through to the TV, it is not marked as non-desktop. Signed-off-by: Philipp Zabel Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index bfd89b4..9796c29 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -177,6 +177,9 @@ static const struct edid_quirk { { "DEL", 0x7fce, EDID_QUIRK_NON_DESKTOP }, { "SEC", 0x144a, EDID_QUIRK_NON_DESKTOP }, { "AUS", 0xc102, EDID_QUIRK_NON_DESKTOP }, + + /* Sony PlayStation VR Headset */ + { "SNY", 0x0704, EDID_QUIRK_NON_DESKTOP }, }; /* -- cgit v1.1 From 5ae437ad5a2ed573b1ebb04e0afa70b8869f88dd Mon Sep 17 00:00:00 2001 From: Roman Kapl Date: Mon, 19 Feb 2018 21:32:51 +0100 Subject: net: sched: report if filter is too large to dump So far, if the filter was too large to fit in the allocated skb, the kernel did not return any error and stopped dumping. Modify the dumper so that it returns -EMSGSIZE when a filter fails to dump and it is the first filter in the skb. If we are not first, we will get a next chance with more room. I understand this is pretty near to being an API change, but the original design (silent truncation) can be considered a bug. Note: The error case can happen pretty easily if you create a filter with 32 actions and have 4kb pages. Also recent versions of iproute try to be clever with their buffer allocation size, which in turn leads to Signed-off-by: Roman Kapl Acked-by: Jiri Pirko Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_api.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a7dc727..247b7cc 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1397,13 +1397,18 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) nla_get_u32(tca[TCA_CHAIN]) != chain->index) continue; if (!tcf_chain_dump(chain, q, parent, skb, cb, - index_start, &index)) + index_start, &index)) { + err = -EMSGSIZE; break; + } } cb->args[0] = index; out: + /* If we did no progress, the error (EMSGSIZE) is real */ + if (skb->len == 0 && err) + return err; return skb->len; } -- cgit v1.1 From 30a3317ddc2427d173d8bcffaa3f41a61eb66560 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 20 Feb 2018 16:20:08 +0200 Subject: drm/tve200: fix kernel-doc documentation comment include The DOC: line acts as an identifier for the :doc: include. Fixes: ./drivers/gpu/drm/tve200/tve200_drv.c:1: warning: no structured comments found Cc: Linus Walleij Reviewed-by: Daniel Vetter Reviewed-by: Linus Walleij Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180220142008.9330-1-jani.nikula@intel.com --- Documentation/gpu/tve200.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/gpu/tve200.rst b/Documentation/gpu/tve200.rst index 69b17b3..152ea93 100644 --- a/Documentation/gpu/tve200.rst +++ b/Documentation/gpu/tve200.rst @@ -3,4 +3,4 @@ ================================== .. kernel-doc:: drivers/gpu/drm/tve200/tve200_drv.c - :doc: Faraday TV Encoder 200 + :doc: Faraday TV Encoder TVE200 DRM Driver -- cgit v1.1 From 88e77dc6a354095ddaaae715bc0d3b55702fa3db Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 20 Feb 2018 16:01:36 +0100 Subject: locking/mutex: Add comment to __mutex_owner() to deter usage Attempt to deter usage, this is not a public interface. It is entirely possible to implement a conformant mutex without having this owner field (in fact, we used to have that). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/mutex.h b/include/linux/mutex.h index f25c134..cb3bbed 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -66,6 +66,11 @@ struct mutex { #endif }; +/* + * Internal helper function; C doesn't allow us to hide it :/ + * + * DO NOT USE (outside of mutex code). + */ static inline struct task_struct *__mutex_owner(struct mutex *lock) { return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x07); -- cgit v1.1 From 9e0e3c5130e949c389caabc8033e9799b129e429 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 17 Jan 2018 22:34:34 +0100 Subject: x86/speculation, objtool: Annotate indirect calls/jumps for objtool Annotate the indirect calls/jumps in the CALL_NOSPEC/JUMP_NOSPEC alternatives. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Woodhouse Acked-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nospec-branch.h | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index ec90c32..1aad6c7 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -68,6 +68,18 @@ .endm /* + * This should be used immediately before an indirect jump/call. It tells + * objtool the subsequent indirect jump/call is vouched safe for retpoline + * builds. + */ +.macro ANNOTATE_RETPOLINE_SAFE + .Lannotate_\@: + .pushsection .discard.retpoline_safe + _ASM_PTR .Lannotate_\@ + .popsection +.endm + +/* * These are the bare retpoline primitives for indirect jmp and call. * Do not use these directly; they only exist to make the ALTERNATIVE * invocation below less ugly. @@ -103,9 +115,9 @@ .macro JMP_NOSPEC reg:req #ifdef CONFIG_RETPOLINE ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(jmp *\reg), \ + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \ __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD #else jmp *\reg #endif @@ -114,9 +126,9 @@ .macro CALL_NOSPEC reg:req #ifdef CONFIG_RETPOLINE ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(call *\reg), \ + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \ __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ - __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD #else call *\reg #endif @@ -144,6 +156,12 @@ ".long 999b - .\n\t" \ ".popsection\n\t" +#define ANNOTATE_RETPOLINE_SAFE \ + "999:\n\t" \ + ".pushsection .discard.retpoline_safe\n\t" \ + _ASM_PTR " 999b\n\t" \ + ".popsection\n\t" + #if defined(CONFIG_X86_64) && defined(RETPOLINE) /* @@ -153,6 +171,7 @@ # define CALL_NOSPEC \ ANNOTATE_NOSPEC_ALTERNATIVE \ ALTERNATIVE( \ + ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ "call __x86_indirect_thunk_%V[thunk_target]\n", \ X86_FEATURE_RETPOLINE) -- cgit v1.1 From 3010a0663fd949d122eca0561b06b0a9453f7866 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 17 Jan 2018 16:58:11 +0100 Subject: x86/paravirt, objtool: Annotate indirect calls Paravirt emits indirect calls which get flagged by objtool retpoline checks, annotate it away because all these indirect calls will be patched out before we start userspace. This patching happens through alternative_instructions() -> apply_paravirt() -> pv_init_ops.patch() which will eventually end up in paravirt_patch_default(). This function _will_ write direct alternatives. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Woodhouse Acked-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt.h | 17 +++++++++++++---- arch/x86/include/asm/paravirt_types.h | 5 ++++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 554841f..c83a2f4 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -7,6 +7,7 @@ #ifdef CONFIG_PARAVIRT #include #include +#include #include @@ -879,23 +880,27 @@ extern void default_banner(void); #define INTERRUPT_RETURN \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) + ANNOTATE_RETPOLINE_SAFE; \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #ifdef CONFIG_X86_32 #define GET_CR0_INTO_EAX \ push %ecx; push %edx; \ + ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ pop %edx; pop %ecx #else /* !CONFIG_X86_32 */ @@ -917,21 +922,25 @@ extern void default_banner(void); */ #define SWAPGS \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ - call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ ) #define GET_CR2_INTO_RAX \ - call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2) + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); #define USERGS_SYSRET64 \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ CLBR_NONE, \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) + ANNOTATE_RETPOLINE_SAFE; \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);) #ifdef CONFIG_DEBUG_ENTRY #define SAVE_FLAGS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #endif diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index f624f1f..180bc0b 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -43,6 +43,7 @@ #include #include #include +#include struct page; struct thread_struct; @@ -392,7 +393,9 @@ int paravirt_disable_iospace(void); * offset into the paravirt_patch_template structure, and can therefore be * freely converted back into a structure offset. */ -#define PARAVIRT_CALL "call *%c[paravirt_opptr];" +#define PARAVIRT_CALL \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%c[paravirt_opptr];" /* * These macros are intended to wrap calls through one of the paravirt -- cgit v1.1 From bd89004f6305cbf7352238f61da093207ee518d6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Jan 2018 10:38:09 +0100 Subject: x86/boot, objtool: Annotate indirect jump in secondary_startup_64() The objtool retpoline validation found this indirect jump. Seeing how it's on CPU bringup before we run userspace it should be safe, annotate it. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Woodhouse Acked-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 04a625f..0f545b3 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -23,6 +23,7 @@ #include #include "../entry/calling.h" #include +#include #ifdef CONFIG_PARAVIRT #include @@ -134,6 +135,7 @@ ENTRY(secondary_startup_64) /* Ensure I am executing from virtual addresses */ movq $1f, %rax + ANNOTATE_RETPOLINE_SAFE jmp *%rax 1: UNWIND_HINT_EMPTY -- cgit v1.1 From 531bb52a869a9c6e08c8d17ba955fcbfc18037ad Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Jan 2018 16:18:50 +0100 Subject: x86/mm/sme, objtool: Annotate indirect call in sme_encrypt_execute() This is boot code and thus Spectre-safe: we run this _way_ before userspace comes along to have a chance to poison our branch predictor. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Tom Lendacky Signed-off-by: Ingo Molnar --- arch/x86/mm/mem_encrypt_boot.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 01f682c..40a6085 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -15,6 +15,7 @@ #include #include #include +#include .text .code64 @@ -59,6 +60,7 @@ ENTRY(sme_encrypt_execute) movq %rax, %r8 /* Workarea encryption routine */ addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */ + ANNOTATE_RETPOLINE_SAFE call *%rax /* Call the encryption routine */ pop %r12 -- cgit v1.1 From 43a4525f80534530077683f6472d8971646b0ace Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Jan 2018 17:16:32 +0100 Subject: objtool: Use existing global variables for options Use the existing global variables instead of passing them around and creating duplicate global variables. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- tools/objtool/builtin-check.c | 2 +- tools/objtool/builtin-orc.c | 6 +----- tools/objtool/builtin.h | 5 +++++ tools/objtool/check.c | 5 ++--- tools/objtool/check.h | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 57254f5..8d0986d 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -53,5 +53,5 @@ int cmd_check(int argc, const char **argv) objname = argv[0]; - return check(objname, no_fp, no_unreachable, false); + return check(objname, false); } diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index 91e8e19..77ea2b9 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -25,7 +25,6 @@ */ #include -#include #include "builtin.h" #include "check.h" @@ -36,9 +35,6 @@ static const char *orc_usage[] = { NULL, }; -extern const struct option check_options[]; -extern bool no_fp, no_unreachable; - int cmd_orc(int argc, const char **argv) { const char *objname; @@ -54,7 +50,7 @@ int cmd_orc(int argc, const char **argv) objname = argv[0]; - return check(objname, no_fp, no_unreachable, true); + return check(objname, true); } if (!strcmp(argv[0], "dump")) { diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h index dd52606..f166ea1 100644 --- a/tools/objtool/builtin.h +++ b/tools/objtool/builtin.h @@ -17,6 +17,11 @@ #ifndef _BUILTIN_H #define _BUILTIN_H +#include + +extern const struct option check_options[]; +extern bool no_fp, no_unreachable; + extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a8cb69a..ab6f0de 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -18,6 +18,7 @@ #include #include +#include "builtin.h" #include "check.h" #include "elf.h" #include "special.h" @@ -33,7 +34,6 @@ struct alternative { }; const char *objname; -static bool no_fp; struct cfi_state initial_func_cfi; struct instruction *find_insn(struct objtool_file *file, @@ -2022,13 +2022,12 @@ static void cleanup(struct objtool_file *file) elf_close(file->elf); } -int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc) +int check(const char *_objname, bool orc) { struct objtool_file file; int ret, warnings = 0; objname = _objname; - no_fp = _no_fp; file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY); if (!file.elf) diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 23a1d06..936255b 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -63,7 +63,7 @@ struct objtool_file { bool ignore_unreachables, c_file, hints; }; -int check(const char *objname, bool no_fp, bool no_unreachable, bool orc); +int check(const char *objname, bool orc); struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset); -- cgit v1.1 From b5bc2231b8ad4387c9641f235ca0ad8cd300b6df Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Jan 2018 10:24:06 +0100 Subject: objtool: Add retpoline validation David requested a objtool validation pass for CONFIG_RETPOLINE=y enabled builds, where it validates no unannotated indirect jumps or calls are left. Add an additional .discard.retpoline_safe section to allow annotating the few indirect sites that are required and safe. Requested-by: David Woodhouse Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Woodhouse Acked-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- scripts/Makefile.build | 4 ++ tools/objtool/builtin-check.c | 3 +- tools/objtool/builtin.h | 2 +- tools/objtool/check.c | 86 ++++++++++++++++++++++++++++++++++++++++++- tools/objtool/check.h | 1 + 5 files changed, 93 insertions(+), 3 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 47cddf3..53d862a 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -264,6 +264,10 @@ objtool_args += --no-unreachable else objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable) endif +ifdef CONFIG_RETPOLINE + objtool_args += --retpoline +endif + ifdef CONFIG_MODVERSIONS objtool_o = $(@D)/.tmp_$(@F) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 8d0986d..dd6bcd6 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -29,7 +29,7 @@ #include "builtin.h" #include "check.h" -bool no_fp, no_unreachable; +bool no_fp, no_unreachable, retpoline; static const char * const check_usage[] = { "objtool check [] file.o", @@ -39,6 +39,7 @@ static const char * const check_usage[] = { const struct option check_options[] = { OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), + OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), OPT_END(), }; diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h index f166ea1..7b6addf 100644 --- a/tools/objtool/builtin.h +++ b/tools/objtool/builtin.h @@ -20,7 +20,7 @@ #include extern const struct option check_options[]; -extern bool no_fp, no_unreachable; +extern bool no_fp, no_unreachable, retpoline; extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ab6f0de..5e5db7b 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -497,6 +497,7 @@ static int add_jump_destinations(struct objtool_file *file) * disguise, so convert them accordingly. */ insn->type = INSN_JUMP_DYNAMIC; + insn->retpoline_safe = true; continue; } else { /* sibling call */ @@ -548,7 +549,8 @@ static int add_call_destinations(struct objtool_file *file) if (!insn->call_dest && !insn->ignore) { WARN_FUNC("unsupported intra-function call", insn->sec, insn->offset); - WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."); + if (retpoline) + WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."); return -1; } @@ -1108,6 +1110,54 @@ static int read_unwind_hints(struct objtool_file *file) return 0; } +static int read_retpoline_hints(struct objtool_file *file) +{ + struct section *sec, *relasec; + struct instruction *insn; + struct rela *rela; + int i; + + sec = find_section_by_name(file->elf, ".discard.retpoline_safe"); + if (!sec) + return 0; + + relasec = sec->rela; + if (!relasec) { + WARN("missing .rela.discard.retpoline_safe section"); + return -1; + } + + if (sec->len % sizeof(unsigned long)) { + WARN("retpoline_safe size mismatch: %d %ld", sec->len, sizeof(unsigned long)); + return -1; + } + + for (i = 0; i < sec->len / sizeof(unsigned long); i++) { + rela = find_rela_by_dest(sec, i * sizeof(unsigned long)); + if (!rela) { + WARN("can't find rela for retpoline_safe[%d]", i); + return -1; + } + + insn = find_insn(file, rela->sym->sec, rela->addend); + if (!insn) { + WARN("can't find insn for retpoline_safe[%d]", i); + return -1; + } + + if (insn->type != INSN_JUMP_DYNAMIC && + insn->type != INSN_CALL_DYNAMIC) { + WARN_FUNC("retpoline_safe hint not a indirect jump/call", + insn->sec, insn->offset); + return -1; + } + + insn->retpoline_safe = true; + } + + return 0; +} + static int decode_sections(struct objtool_file *file) { int ret; @@ -1146,6 +1196,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + ret = read_retpoline_hints(file); + if (ret) + return ret; + return 0; } @@ -1891,6 +1945,29 @@ static int validate_unwind_hints(struct objtool_file *file) return warnings; } +static int validate_retpoline(struct objtool_file *file) +{ + struct instruction *insn; + int warnings = 0; + + for_each_insn(file, insn) { + if (insn->type != INSN_JUMP_DYNAMIC && + insn->type != INSN_CALL_DYNAMIC) + continue; + + if (insn->retpoline_safe) + continue; + + WARN_FUNC("indirect %s found in RETPOLINE build", + insn->sec, insn->offset, + insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); + + warnings++; + } + + return warnings; +} + static bool is_kasan_insn(struct instruction *insn) { return (insn->type == INSN_CALL && @@ -2051,6 +2128,13 @@ int check(const char *_objname, bool orc) if (list_empty(&file.insn_list)) goto out; + if (retpoline) { + ret = validate_retpoline(&file); + if (ret < 0) + return ret; + warnings += ret; + } + ret = validate_functions(&file); if (ret < 0) goto out; diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 936255b..c6b68fc 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -45,6 +45,7 @@ struct instruction { unsigned char type; unsigned long immediate; bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts; + bool retpoline_safe; struct symbol *call_dest; struct instruction *jump_dest; struct instruction *first_jump_src; -- cgit v1.1 From ca41b97ed9124fd62323a162de5852f6e28f94b8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 31 Jan 2018 10:18:28 +0100 Subject: objtool: Add module specific retpoline rules David allowed retpolines in .init.text, except for modules, which will trip up objtool retpoline validation, fix that. Requested-by: David Woodhouse Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Acked-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- scripts/Makefile.build | 2 ++ tools/objtool/builtin-check.c | 3 ++- tools/objtool/builtin.h | 2 +- tools/objtool/check.c | 9 +++++++++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 53d862a..ce0fc4d 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -256,6 +256,8 @@ __objtool_obj := $(objtree)/tools/objtool/objtool objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check) +objtool_args += $(if $(part-of-module), --module,) + ifndef CONFIG_FRAME_POINTER objtool_args += --no-fp endif diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index dd6bcd6..694abc6 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -29,7 +29,7 @@ #include "builtin.h" #include "check.h" -bool no_fp, no_unreachable, retpoline; +bool no_fp, no_unreachable, retpoline, module; static const char * const check_usage[] = { "objtool check [] file.o", @@ -40,6 +40,7 @@ const struct option check_options[] = { OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), + OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"), OPT_END(), }; diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h index 7b6addf..28ff40e 100644 --- a/tools/objtool/builtin.h +++ b/tools/objtool/builtin.h @@ -20,7 +20,7 @@ #include extern const struct option check_options[]; -extern bool no_fp, no_unreachable, retpoline; +extern bool no_fp, no_unreachable, retpoline, module; extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5e5db7b..472e64e 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1958,6 +1958,15 @@ static int validate_retpoline(struct objtool_file *file) if (insn->retpoline_safe) continue; + /* + * .init.text code is ran before userspace and thus doesn't + * strictly need retpolines, except for modules which are + * loaded late, they very much do need retpoline in their + * .init.text + */ + if (!strcmp(insn->sec->name, ".init.text") && !module) + continue; + WARN_FUNC("indirect %s found in RETPOLINE build", insn->sec, insn->offset, insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); -- cgit v1.1 From fba4adbbf670577e605f9ad306629db6031cd48b Mon Sep 17 00:00:00 2001 From: Ben Gardner Date: Wed, 14 Feb 2018 09:29:52 -0600 Subject: i2c: designware: must wait for enable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One I2C bus on my Atom E3845 board has been broken since 4.9. It has two devices, both declared by ACPI and with built-in drivers. There are two back-to-back transactions originating from the kernel, one targeting each device. The first transaction works, the second one locks up the I2C controller. The controller never recovers. These kernel logs show up whenever an I2C transaction is attempted after this failure. i2c-designware-pci 0000:00:18.3: timeout in disabling adapter i2c-designware-pci 0000:00:18.3: timeout waiting for bus ready Waiting for the I2C controller status to indicate that it is enabled before programming it fixes the issue. I have tested this patch on 4.14 and 4.15. Fixes: commit 2702ea7dbec5 ("i2c: designware: wait for disable/enable only if necessary") Cc: linux-stable #4.13+ Signed-off-by: Ben Gardner Acked-by: Jarkko Nikula Reviewed-by: José Roberto de Souza Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index ae69188..55926ef 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -209,7 +209,7 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev) i2c_dw_disable_int(dev); /* Enable the adapter */ - __i2c_dw_enable(dev, true); + __i2c_dw_enable_and_wait(dev, true); /* Clear and enable interrupts */ dw_readl(dev, DW_IC_CLR_INTR); -- cgit v1.1 From 15407798835a94f0936c7cbabb2f611bf20f467a Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Fri, 16 Feb 2018 11:24:29 +0200 Subject: i2c: i801: Add missing documentation entries for Braswell and Kaby Lake Commits adding PCI IDs for Intel Braswell and Kaby Lake PCH-H lacked the respective Kconfig and Documentation/i2c/busses/i2c-i801 change. Add them now. Signed-off-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- Documentation/i2c/busses/i2c-i801 | 2 ++ drivers/i2c/busses/Kconfig | 2 ++ drivers/i2c/busses/i2c-i801.c | 1 + 3 files changed, 5 insertions(+) diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801 index d477024..65514c2 100644 --- a/Documentation/i2c/busses/i2c-i801 +++ b/Documentation/i2c/busses/i2c-i801 @@ -28,8 +28,10 @@ Supported adapters: * Intel Wildcat Point (PCH) * Intel Wildcat Point-LP (PCH) * Intel BayTrail (SOC) + * Intel Braswell (SOC) * Intel Sunrise Point-H (PCH) * Intel Sunrise Point-LP (PCH) + * Intel Kaby Lake-H (PCH) * Intel DNV (SOC) * Intel Broxton (SOC) * Intel Lewisburg (PCH) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index a9805c7..e2954fb 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -123,8 +123,10 @@ config I2C_I801 Wildcat Point (PCH) Wildcat Point-LP (PCH) BayTrail (SOC) + Braswell (SOC) Sunrise Point-H (PCH) Sunrise Point-LP (PCH) + Kaby Lake-H (PCH) DNV (SOC) Broxton (SOC) Lewisburg (PCH) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 8eac00e..692b341 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -58,6 +58,7 @@ * Wildcat Point (PCH) 0x8ca2 32 hard yes yes yes * Wildcat Point-LP (PCH) 0x9ca2 32 hard yes yes yes * BayTrail (SOC) 0x0f12 32 hard yes yes yes + * Braswell (SOC) 0x2292 32 hard yes yes yes * Sunrise Point-H (PCH) 0xa123 32 hard yes yes yes * Sunrise Point-LP (PCH) 0x9d23 32 hard yes yes yes * DNV (SOC) 0x19df 32 hard yes yes yes -- cgit v1.1 From 85c615eb52222bc5fab6c7190d146bc59fac289e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Feb 2018 21:58:21 +0100 Subject: x86/oprofile: Fix bogus GCC-8 warning in nmi_setup() GCC-8 shows a warning for the x86 oprofile code that copies per-CPU data from CPU 0 to all other CPUs, which when building a non-SMP kernel turns into a memcpy() with identical source and destination pointers: arch/x86/oprofile/nmi_int.c: In function 'mux_clone': arch/x86/oprofile/nmi_int.c:285:2: error: 'memcpy' source argument is the same as destination [-Werror=restrict] memcpy(per_cpu(cpu_msrs, cpu).multiplex, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ per_cpu(cpu_msrs, 0).multiplex, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sizeof(struct op_msr) * model->num_virt_counters); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/oprofile/nmi_int.c: In function 'nmi_setup': arch/x86/oprofile/nmi_int.c:466:3: error: 'memcpy' source argument is the same as destination [-Werror=restrict] arch/x86/oprofile/nmi_int.c:470:3: error: 'memcpy' source argument is the same as destination [-Werror=restrict] I have analyzed a number of such warnings now: some are valid and the GCC warning is welcome. Others turned out to be false-positives, and GCC was changed to not warn about those any more. This is a corner case that is a false-positive but the GCC developers feel it's better to keep warning about it. In this case, it seems best to work around it by telling GCC a little more clearly that this code path is never hit with an IS_ENABLED() configuration check. Cc:stable as we also want old kernels to build cleanly with GCC-8. Signed-off-by: Arnd Bergmann Cc: Jessica Yu Cc: Kees Cook Cc: Linus Torvalds Cc: Martin Sebor Cc: Peter Zijlstra Cc: Robert Richter Cc: Thomas Gleixner Cc: oprofile-list@lists.sf.net Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180220205826.2008875-1-arnd@arndb.de Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84095 Signed-off-by: Ingo Molnar --- arch/x86/oprofile/nmi_int.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 174c597..a7a7677 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -460,7 +460,7 @@ static int nmi_setup(void) goto fail; for_each_possible_cpu(cpu) { - if (!cpu) + if (!IS_ENABLED(CONFIG_SMP) || !cpu) continue; memcpy(per_cpu(cpu_msrs, cpu).counters, -- cgit v1.1 From cb13b424e986aed68d74cbaec3449ea23c50e167 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Tue, 20 Feb 2018 19:45:56 +0100 Subject: locking/xchg/alpha: Add unconditional memory barrier to cmpxchg() Continuing along with the fight against smp_read_barrier_depends() [1] (or rather, against its improper use), add an unconditional barrier to cmpxchg. This guarantees that dependency ordering is preserved when a dependency is headed by an unsuccessful cmpxchg. As it turns out, the change could enable further simplification of LKMM as proposed in [2]. [1] https://marc.info/?l=linux-kernel&m=150884953419377&w=2 https://marc.info/?l=linux-kernel&m=150884946319353&w=2 https://marc.info/?l=linux-kernel&m=151215810824468&w=2 https://marc.info/?l=linux-kernel&m=151215816324484&w=2 [2] https://marc.info/?l=linux-kernel&m=151881978314872&w=2 Signed-off-by: Andrea Parri Acked-by: Peter Zijlstra Acked-by: Paul E. McKenney Cc: Alan Stern Cc: Ivan Kokshaysky Cc: Linus Torvalds Cc: Matt Turner Cc: Richard Henderson Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-alpha@vger.kernel.org Link: http://lkml.kernel.org/r/1519152356-4804-1-git-send-email-parri.andrea@gmail.com Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/xchg.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h index 68dfb3c..e266086 100644 --- a/arch/alpha/include/asm/xchg.h +++ b/arch/alpha/include/asm/xchg.h @@ -128,10 +128,9 @@ ____xchg(, volatile void *ptr, unsigned long x, int size) * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. * - * The memory barrier should be placed in SMP only when we actually - * make the change. If we don't change anything (so if the returned - * prev is equal to old) then we aren't acquiring anything new and - * we don't need any memory barrier as far I can tell. + * The memory barrier is placed in SMP unconditionally, in order to + * guarantee that dependency ordering is preserved when a dependency + * is headed by an unsuccessful operation. */ static inline unsigned long @@ -150,8 +149,8 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new) " or %1,%2,%2\n" " stq_c %2,0(%4)\n" " beq %2,3f\n" - __ASM__MB "2:\n" + __ASM__MB ".subsection 2\n" "3: br 1b\n" ".previous" @@ -177,8 +176,8 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new) " or %1,%2,%2\n" " stq_c %2,0(%4)\n" " beq %2,3f\n" - __ASM__MB "2:\n" + __ASM__MB ".subsection 2\n" "3: br 1b\n" ".previous" @@ -200,8 +199,8 @@ ____cmpxchg(_u32, volatile int *m, int old, int new) " mov %4,%1\n" " stl_c %1,%2\n" " beq %1,3f\n" - __ASM__MB "2:\n" + __ASM__MB ".subsection 2\n" "3: br 1b\n" ".previous" @@ -223,8 +222,8 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new) " mov %4,%1\n" " stq_c %1,%2\n" " beq %1,3f\n" - __ASM__MB "2:\n" + __ASM__MB ".subsection 2\n" "3: br 1b\n" ".previous" -- cgit v1.1 From 78de41a368af26e54ba53e63629fd7f166f17cef Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 20 Feb 2018 23:55:46 +0100 Subject: MAINTAINERS: add Freescale pin controllers Add Dong Aisheng, Fabio Estevam, Shawn Guo and myself as maintainer and the Pengutronix kernel team as reviewer. Signed-off-by: Stefan Agner Reviewed-by: Fabio Estevam Acked-by: Dong Aisheng Acked-by: Lucas Stach Signed-off-by: Linus Walleij --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3bdc260..524b564 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10925,6 +10925,17 @@ L: linux-gpio@vger.kernel.org S: Supported F: drivers/pinctrl/pinctrl-at91-pio4.* +PIN CONTROLLER - FREESCALE +M: Dong Aisheng +M: Fabio Estevam +M: Shawn Guo +M: Stefan Agner +R: Pengutronix Kernel Team +L: linux-gpio@vger.kernel.org +S: Maintained +F: drivers/pinctrl/freescale/ +F: Documentation/devicetree/bindings/pinctrl/fsl,* + PIN CONTROLLER - INTEL M: Mika Westerberg M: Heikki Krogerus -- cgit v1.1 From d72f4e29e6d84b7ec02ae93088aa459ac70e733b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 21 Feb 2018 09:20:37 +0100 Subject: x86/speculation: Move firmware_restrict_branch_speculation_*() from C to CPP firmware_restrict_branch_speculation_*() recently started using preempt_enable()/disable(), but those are relatively high level primitives and cause build failures on some 32-bit builds. Since we want to keep low level, convert them to macros to avoid header hell... Cc: David Woodhouse Cc: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: arjan.van.de.ven@intel.com Cc: bp@alien8.de Cc: dave.hansen@intel.com Cc: jmattson@google.com Cc: karahmed@amazon.de Cc: kvm@vger.kernel.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nospec-branch.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 1aad6c7..b7063cf 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -257,20 +257,22 @@ static inline void indirect_branch_prediction_barrier(void) /* * With retpoline, we must use IBRS to restrict branch prediction * before calling into firmware. + * + * (Implemented as CPP macros due to header hell.) */ -static inline void firmware_restrict_branch_speculation_start(void) -{ - preempt_disable(); - alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, - X86_FEATURE_USE_IBRS_FW); -} +#define firmware_restrict_branch_speculation_start() \ +do { \ + preempt_disable(); \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ + X86_FEATURE_USE_IBRS_FW); \ +} while (0) -static inline void firmware_restrict_branch_speculation_end(void) -{ - alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, - X86_FEATURE_USE_IBRS_FW); - preempt_enable(); -} +#define firmware_restrict_branch_speculation_end() \ +do { \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ + X86_FEATURE_USE_IBRS_FW); \ + preempt_enable(); \ +} while (0) #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ -- cgit v1.1 From 0e34d226342c27c4f96138b211547d423e4be8a1 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Feb 2018 22:01:08 +0100 Subject: x86/entry/64: Move PUSH_AND_CLEAR_REGS from interrupt macro to helper function The PUSH_AND_CLEAR_REGS macro is able to insert the GP registers "above" the original return address. This allows us to move a sizeable part of the interrupt entry macro to an interrupt entry helper function: text data bss dec hex filename 21088 0 0 21088 5260 entry_64.o-orig 18006 0 0 18006 4656 entry_64.o Suggested-by: Linus Torvalds Signed-off-by: Dominik Brodowski Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: David Woodhouse Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180220210113.6725-2-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 7a53879..b0ae0c3 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -525,6 +525,14 @@ END(irq_entries_start) * * Entry runs with interrupts off. */ +ENTRY(interrupt_entry) + UNWIND_HINT_FUNC + + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 + + ret +END(interrupt_entry) /* 0(%rsp): ~(interrupt number) */ .macro interrupt func @@ -536,8 +544,7 @@ END(irq_entries_start) call switch_to_thread_stack 1: - PUSH_AND_CLEAR_REGS - ENCODE_FRAME_POINTER + call interrupt_entry testb $3, CS(%rsp) jz 1f -- cgit v1.1 From 2ba6474104a1132c4af9f6dc42c6bfe3ca71f8c7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Feb 2018 22:01:09 +0100 Subject: x86/entry/64: Move ENTER_IRQ_STACK from interrupt macro to interrupt_entry Moving the switch to IRQ stack from the interrupt macro to the helper function requires some trickery: All ENTER_IRQ_STACK really cares about is where the "original" stack -- meaning the GP registers etc. -- is stored. Therefore, we need to offset the stored RSP value by 8 whenever ENTER_IRQ_STACK is called from within a function. In such cases, and after switching to the IRQ stack, we need to push the "original" return address (i.e. the return address from the call to the interrupt entry function) to the IRQ stack. This trickery allows us to carve another .85k from the text size (it would be more except for the additional unwind hints): text data bss dec hex filename 18006 0 0 18006 4656 entry_64.o-orig 17158 0 0 17158 4306 entry_64.o Signed-off-by: Dominik Brodowski Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: David Woodhouse Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180220210113.6725-3-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 56 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b0ae0c3..7a6ae19 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -448,9 +448,19 @@ END(irq_entries_start) * * The invariant is that, if irq_count != -1, then the IRQ stack is in use. */ -.macro ENTER_IRQ_STACK regs=1 old_rsp +.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0 DEBUG_ENTRY_ASSERT_IRQS_OFF + + .if \save_ret + /* + * If save_ret is set, the original stack contains one additional + * entry -- the return address. Therefore, move the address one + * entry below %rsp to \old_rsp. + */ + leaq 8(%rsp), \old_rsp + .else movq %rsp, \old_rsp + .endif .if \regs UNWIND_HINT_REGS base=\old_rsp @@ -496,6 +506,15 @@ END(irq_entries_start) .if \regs UNWIND_HINT_REGS indirect=1 .endif + + .if \save_ret + /* + * Push the return address to the stack. This return address can + * be found at the "real" original RSP, which was offset by 8 at + * the beginning of this macro. + */ + pushq -8(\old_rsp) + .endif .endm /* @@ -531,22 +550,7 @@ ENTRY(interrupt_entry) PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 - ret -END(interrupt_entry) - -/* 0(%rsp): ~(interrupt number) */ - .macro interrupt func - cld - - testb $3, CS-ORIG_RAX(%rsp) - jz 1f - SWAPGS - call switch_to_thread_stack -1: - - call interrupt_entry - - testb $3, CS(%rsp) + testb $3, CS+8(%rsp) jz 1f /* @@ -564,10 +568,26 @@ END(interrupt_entry) CALL_enter_from_user_mode 1: - ENTER_IRQ_STACK old_rsp=%rdi + ENTER_IRQ_STACK old_rsp=%rdi save_ret=1 /* We entered an interrupt context - irqs are off: */ TRACE_IRQS_OFF + ret +END(interrupt_entry) + +/* 0(%rsp): ~(interrupt number) */ + .macro interrupt func + cld + + testb $3, CS-ORIG_RAX(%rsp) + jz 1f + SWAPGS + call switch_to_thread_stack +1: + + call interrupt_entry + + UNWIND_HINT_REGS indirect=1 call \func /* rdi points to pt_regs */ .endm -- cgit v1.1 From 90a6acc4e7ebafa8672a7a1a5b23fbad3dd04130 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Feb 2018 22:01:10 +0100 Subject: x86/entry/64: Move the switch_to_thread_stack() call to interrupt_entry() We can also move the CLD, SWAPGS, and the switch_to_thread_stack() call to the interrupt_entry() helper function. As we do not want call depths of two, convert switch_to_thread_stack() to a macro. However, switch_to_thread_stack() has another user in entry_64_compat.S, which currently expects it to be a function. To keep the code changes in this patch minimal, create a wrapper function. The switch to a macro means that there is some binary code duplication if CONFIG_IA32_EMULATION=y is enabled. Therefore, the size reduction differs whether CONFIG_IA32_EMULATION is enabled or not: CONFIG_IA32_EMULATION=y (-0.13k): text data bss dec hex filename 17158 0 0 17158 4306 entry_64.o-orig 17028 0 0 17028 4284 entry_64.o CONFIG_IA32_EMULATION=n (-0.27k): text data bss dec hex filename 17158 0 0 17158 4306 entry_64.o-orig 16882 0 0 16882 41f2 entry_64.o Signed-off-by: Dominik Brodowski Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: David Woodhouse Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180220210113.6725-4-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 66 ++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 7a6ae19..b45d766 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -538,14 +538,47 @@ END(irq_entries_start) .endm /* + * Switch to the thread stack. This is called with the IRET frame and + * orig_ax on the stack. (That is, RDI..R12 are not on the stack and + * space has not been allocated for them.) + */ +.macro DO_SWITCH_TO_THREAD_STACK + pushq %rdi + /* Need to switch before accessing the thread stack. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI + + pushq 7*8(%rdi) /* regs->ss */ + pushq 6*8(%rdi) /* regs->rsp */ + pushq 5*8(%rdi) /* regs->eflags */ + pushq 4*8(%rdi) /* regs->cs */ + pushq 3*8(%rdi) /* regs->ip */ + pushq 2*8(%rdi) /* regs->orig_ax */ + pushq 8(%rdi) /* return address */ + UNWIND_HINT_FUNC + + movq (%rdi), %rdi +.endm + +/* * Interrupt entry/exit. * * Interrupt entry points save only callee clobbered registers in fast path. * * Entry runs with interrupts off. */ +/* 8(%rsp): ~(interrupt number) */ ENTRY(interrupt_entry) UNWIND_HINT_FUNC + cld + + testb $3, CS-ORIG_RAX+8(%rsp) + jz 1f + SWAPGS + DO_SWITCH_TO_THREAD_STACK +1: PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 @@ -577,14 +610,6 @@ END(interrupt_entry) /* 0(%rsp): ~(interrupt number) */ .macro interrupt func - cld - - testb $3, CS-ORIG_RAX(%rsp) - jz 1f - SWAPGS - call switch_to_thread_stack -1: - call interrupt_entry UNWIND_HINT_REGS indirect=1 @@ -858,33 +883,16 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) -/* - * Switch to the thread stack. This is called with the IRET frame and - * orig_ax on the stack. (That is, RDI..R12 are not on the stack and - * space has not been allocated for them.) - */ +#if defined(CONFIG_IA32_EMULATION) +/* entry_64_compat.S::entry_INT80_compat expects this to be an ASM function */ ENTRY(switch_to_thread_stack) UNWIND_HINT_FUNC - pushq %rdi - /* Need to switch before accessing the thread stack. */ - SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi - movq %rsp, %rdi - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI - - pushq 7*8(%rdi) /* regs->ss */ - pushq 6*8(%rdi) /* regs->rsp */ - pushq 5*8(%rdi) /* regs->eflags */ - pushq 4*8(%rdi) /* regs->cs */ - pushq 3*8(%rdi) /* regs->ip */ - pushq 2*8(%rdi) /* regs->orig_ax */ - pushq 8(%rdi) /* return address */ - UNWIND_HINT_FUNC + DO_SWITCH_TO_THREAD_STACK - movq (%rdi), %rdi ret END(switch_to_thread_stack) +#endif .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ENTRY(\sym) -- cgit v1.1 From 3aa99fc3e708b9cd9b4cfe2df0b7a66cf293e3cf Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Feb 2018 22:01:11 +0100 Subject: x86/entry/64: Remove 'interrupt' macro It is now trivial to call interrupt_entry() and then the actual worker. Therefore, remove the interrupt macro and open code it all. Suggested-by: Linus Torvalds Signed-off-by: Dominik Brodowski Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: David Woodhouse Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180220210113.6725-5-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b45d766..8ea03cf 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -608,14 +608,6 @@ ENTRY(interrupt_entry) ret END(interrupt_entry) -/* 0(%rsp): ~(interrupt number) */ - .macro interrupt func - call interrupt_entry - - UNWIND_HINT_REGS indirect=1 - call \func /* rdi points to pt_regs */ - .endm - /* * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_interrupt. @@ -624,7 +616,9 @@ END(interrupt_entry) common_interrupt: ASM_CLAC addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ - interrupt do_IRQ + call interrupt_entry + UNWIND_HINT_REGS indirect=1 + call do_IRQ /* rdi points to pt_regs */ /* 0(%rsp): old RSP */ ret_from_intr: DISABLE_INTERRUPTS(CLBR_ANY) @@ -820,7 +814,9 @@ ENTRY(\sym) ASM_CLAC pushq $~(\num) .Lcommon_\sym: - interrupt \do_sym + call interrupt_entry + UNWIND_HINT_REGS indirect=1 + call \do_sym /* rdi points to pt_regs */ jmp ret_from_intr END(\sym) .endm -- cgit v1.1 From b2855d8d2de0fa15c1ff30c69ed7756b00c48b22 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Feb 2018 22:01:12 +0100 Subject: x86/entry/64: Move ASM_CLAC to interrupt_entry() Moving ASM_CLAC to interrupt_entry means two instructions (addq / pushq and call interrupt_entry) are not covered by it. However, it offers a noticeable size reduction (-.2k): text data bss dec hex filename 16882 0 0 16882 41f2 entry_64.o-orig 16623 0 0 16623 40ef entry_64.o Suggested-by: Brian Gerst Signed-off-by: Dominik Brodowski Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: David Woodhouse Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180220210113.6725-6-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 8ea03cf..42a4b65 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -572,6 +572,7 @@ END(irq_entries_start) /* 8(%rsp): ~(interrupt number) */ ENTRY(interrupt_entry) UNWIND_HINT_FUNC + ASM_CLAC cld testb $3, CS-ORIG_RAX+8(%rsp) @@ -614,7 +615,6 @@ END(interrupt_entry) */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: - ASM_CLAC addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ call interrupt_entry UNWIND_HINT_REGS indirect=1 @@ -811,7 +811,6 @@ END(common_interrupt) .macro apicinterrupt3 num sym do_sym ENTRY(\sym) UNWIND_HINT_IRET_REGS - ASM_CLAC pushq $~(\num) .Lcommon_\sym: call interrupt_entry -- cgit v1.1 From f3d415ea46968ae1f9cbb9e201601d7207ce74c7 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 20 Feb 2018 22:01:13 +0100 Subject: x86/entry/64: Open-code switch_to_thread_stack() Open-code the two instances which called switch_to_thread_stack(). This allows us to remove the wrapper around DO_SWITCH_TO_THREAD_STACK. While at it, update the UNWIND hint to reflect where the IRET frame is, and update the commentary to reflect what we are actually doing here. Signed-off-by: Dominik Brodowski Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: David Woodhouse Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: dan.j.williams@intel.com Link: http://lkml.kernel.org/r/20180220210113.6725-7-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 76 +++++++++++++++++++++------------------- arch/x86/entry/entry_64_compat.S | 17 +++++++-- 2 files changed, 55 insertions(+), 38 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 42a4b65..d5c7f18 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -538,17 +538,48 @@ END(irq_entries_start) .endm /* - * Switch to the thread stack. This is called with the IRET frame and - * orig_ax on the stack. (That is, RDI..R12 are not on the stack and - * space has not been allocated for them.) + * Interrupt entry helper function. + * + * Entry runs with interrupts off. Stack layout at entry: + * +----------------------------------------------------+ + * | regs->ss | + * | regs->rsp | + * | regs->eflags | + * | regs->cs | + * | regs->ip | + * +----------------------------------------------------+ + * | regs->orig_ax = ~(interrupt number) | + * +----------------------------------------------------+ + * | return address | + * +----------------------------------------------------+ */ -.macro DO_SWITCH_TO_THREAD_STACK +ENTRY(interrupt_entry) + UNWIND_HINT_FUNC + ASM_CLAC + cld + + testb $3, CS-ORIG_RAX+8(%rsp) + jz 1f + SWAPGS + + /* + * Switch to the thread stack. The IRET frame and orig_ax are + * on the stack, as well as the return address. RDI..R12 are + * not (yet) on the stack and space has not (yet) been + * allocated for them. + */ pushq %rdi + /* Need to switch before accessing the thread stack. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi movq %rsp, %rdi movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI + + /* + * We have RDI, return address, and orig_ax on the stack on + * top of the IRET frame. That means offset=24 + */ + UNWIND_HINT_IRET_REGS base=%rdi offset=24 pushq 7*8(%rdi) /* regs->ss */ pushq 6*8(%rdi) /* regs->rsp */ @@ -560,25 +591,6 @@ END(irq_entries_start) UNWIND_HINT_FUNC movq (%rdi), %rdi -.endm - -/* - * Interrupt entry/exit. - * - * Interrupt entry points save only callee clobbered registers in fast path. - * - * Entry runs with interrupts off. - */ -/* 8(%rsp): ~(interrupt number) */ -ENTRY(interrupt_entry) - UNWIND_HINT_FUNC - ASM_CLAC - cld - - testb $3, CS-ORIG_RAX+8(%rsp) - jz 1f - SWAPGS - DO_SWITCH_TO_THREAD_STACK 1: PUSH_AND_CLEAR_REGS save_ret=1 @@ -592,7 +604,7 @@ ENTRY(interrupt_entry) * * We need to tell lockdep that IRQs are off. We can't do this until * we fix gsbase, and we should do it before enter_from_user_mode - * (which can take locks). Since TRACE_IRQS_OFF idempotent, + * (which can take locks). Since TRACE_IRQS_OFF is idempotent, * the simplest way to handle it is to just call it twice if * we enter from user mode. There's no reason to optimize this since * TRACE_IRQS_OFF is a no-op if lockdep is off. @@ -609,6 +621,9 @@ ENTRY(interrupt_entry) ret END(interrupt_entry) + +/* Interrupt entry/exit. */ + /* * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_interrupt. @@ -878,17 +893,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) -#if defined(CONFIG_IA32_EMULATION) -/* entry_64_compat.S::entry_INT80_compat expects this to be an ASM function */ -ENTRY(switch_to_thread_stack) - UNWIND_HINT_FUNC - - DO_SWITCH_TO_THREAD_STACK - - ret -END(switch_to_thread_stack) -#endif - .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ENTRY(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 364ea4a..e811dd9 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -347,10 +347,23 @@ ENTRY(entry_INT80_compat) */ movl %eax, %eax + /* switch to thread stack expects orig_ax and rdi to be pushed */ pushq %rax /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + + /* Need to switch before accessing the thread stack. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + pushq 6*8(%rdi) /* regs->ss */ + pushq 5*8(%rdi) /* regs->rsp */ + pushq 4*8(%rdi) /* regs->eflags */ + pushq 3*8(%rdi) /* regs->cs */ + pushq 2*8(%rdi) /* regs->ip */ + pushq 1*8(%rdi) /* regs->orig_ax */ - /* switch to thread stack expects orig_ax to be pushed */ - call switch_to_thread_stack + movq (%rdi), %rdi /* restore %rdi */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ -- cgit v1.1 From 33352244706369ea6736781ae41fe41692eb69bb Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 20 Feb 2018 11:37:51 -0600 Subject: jump_label: Explicitly disable jump labels in __init code After initmem has been freed, any jump labels in __init code are prevented from being written to by the kernel_text_address() check in __jump_label_update(). However, this check is quite broad. If kernel_text_address() were to return false for any other reason, the jump label write would fail silently with no warning. For jump labels in module init code, entry->code is set to zero to indicate that the entry is disabled. Do the same thing for core kernel init code. This makes the behavior more consistent, and will also make it more straightforward to detect non-init jump label write failures in the next patch. Signed-off-by: Josh Poimboeuf Acked-by: Peter Zijlstra Cc: Borislav Petkov Cc: Jason Baron Cc: Linus Torvalds Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/c52825c73f3a174e8398b6898284ec20d4deb126.1519051220.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 3 +++ init/main.c | 2 ++ kernel/jump_label.c | 16 ++++++++++++++++ 3 files changed, 21 insertions(+) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index b6a29c1..2168cc6 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -151,6 +151,7 @@ extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; extern void jump_label_init(void); +extern void jump_label_invalidate_init(void); extern void jump_label_lock(void); extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, @@ -198,6 +199,8 @@ static __always_inline void jump_label_init(void) static_key_initialized = true; } +static inline void jump_label_invalidate_init(void) {} + static __always_inline bool static_key_false(struct static_key *key) { if (unlikely(static_key_count(key) > 0)) diff --git a/init/main.c b/init/main.c index a8100b9..969eaf1 100644 --- a/init/main.c +++ b/init/main.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include @@ -1000,6 +1001,7 @@ static int __ref kernel_init(void *unused) /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); ftrace_free_init_mem(); + jump_label_invalidate_init(); free_initmem(); mark_readonly(); system_state = SYSTEM_RUNNING; diff --git a/kernel/jump_label.c b/kernel/jump_label.c index b451709..b717765 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef HAVE_JUMP_LABEL @@ -417,6 +418,20 @@ void __init jump_label_init(void) cpus_read_unlock(); } +/* Disable any jump label entries in __init code */ +void __init jump_label_invalidate_init(void) +{ + struct jump_entry *iter_start = __start___jump_table; + struct jump_entry *iter_stop = __stop___jump_table; + struct jump_entry *iter; + + for (iter = iter_start; iter < iter_stop; iter++) { + if (iter->code >= (unsigned long)_sinittext && + iter->code < (unsigned long)_einittext) + iter->code = 0; + } +} + #ifdef CONFIG_MODULES static enum jump_label_type jump_label_init_type(struct jump_entry *entry) @@ -633,6 +648,7 @@ static void jump_label_del_module(struct module *mod) } } +/* Disable any jump label entries in module init code */ static void jump_label_invalidate_module_init(struct module *mod) { struct jump_entry *iter_start = mod->jump_entries; -- cgit v1.1 From dc1dd184c2f0016bec35c0d7a48c057e0ad763d3 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 20 Feb 2018 11:37:52 -0600 Subject: jump_label: Warn on failed jump_label patching attempt Currently when the jump label code encounters an address which isn't recognized by kernel_text_address(), it just silently fails. This can be dangerous because jump labels are used in a variety of places, and are generally expected to work. Convert the silent failure to a warning. This won't warn about attempted writes to tracepoints in __init code after initmem has been freed, as those are already guarded by the entry->code check. Signed-off-by: Josh Poimboeuf Acked-by: Peter Zijlstra Cc: Borislav Petkov Cc: Jason Baron Cc: Linus Torvalds Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/de3a271c93807adb7ed48f4e946b4f9156617680.1519051220.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- kernel/jump_label.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index b717765..b2f0b47 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -367,12 +367,15 @@ static void __jump_label_update(struct static_key *key, { for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) { /* - * entry->code set to 0 invalidates module init text sections - * kernel_text_address() verifies we are not in core kernel - * init code, see jump_label_invalidate_module_init(). + * An entry->code of 0 indicates an entry which has been + * disabled because it was in an init text area. */ - if (entry->code && kernel_text_address(entry->code)) - arch_jump_label_transform(entry, jump_label_type(entry)); + if (entry->code) { + if (kernel_text_address(entry->code)) + arch_jump_label_transform(entry, jump_label_type(entry)); + else + WARN_ONCE(1, "can't patch jump_label at %pS", (void *)entry->code); + } } } -- cgit v1.1 From 9fbcc57aa16424ef84cb54e0d9db3221763de88a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 20 Feb 2018 11:37:53 -0600 Subject: extable: Make init_kernel_text() global Convert init_kernel_text() to a global function and use it in a few places instead of manually comparing _sinittext and _einittext. Note that kallsyms.h has a very similar function called is_kernel_inittext(), but its end check is inclusive. I'm not sure whether that's intentional behavior, so I didn't touch it. Suggested-by: Jason Baron Signed-off-by: Josh Poimboeuf Acked-by: Peter Zijlstra Acked-by: Steven Rostedt (VMware) Cc: Borislav Petkov Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/4335d02be8d45ca7d265d2f174251d0b7ee6c5fd.1519051220.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/unwind_orc.c | 3 +-- include/linux/kernel.h | 1 + kernel/extable.c | 2 +- kernel/jump_label.c | 4 +--- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 1f9188f..feb28fe 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -5,7 +5,6 @@ #include #include #include -#include #define orc_warn(fmt, ...) \ printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__) @@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip) } /* vmlinux .init slow lookup: */ - if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext) + if (init_kernel_text(ip)) return __orc_find(__start_orc_unwind_ip, __start_orc_unwind, __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ce51455..3fd2915 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -472,6 +472,7 @@ extern bool parse_option_str(const char *str, const char *option); extern char *next_arg(char *args, char **param, char **val); extern int core_kernel_text(unsigned long addr); +extern int init_kernel_text(unsigned long addr); extern int core_kernel_data(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); diff --git a/kernel/extable.c b/kernel/extable.c index a17fdb6..6a5b61e 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -64,7 +64,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) return e; } -static inline int init_kernel_text(unsigned long addr) +int init_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_sinittext && addr < (unsigned long)_einittext) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index b2f0b47..52a0a7a 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -16,7 +16,6 @@ #include #include #include -#include #ifdef HAVE_JUMP_LABEL @@ -429,8 +428,7 @@ void __init jump_label_invalidate_init(void) struct jump_entry *iter; for (iter = iter_start; iter < iter_stop; iter++) { - if (iter->code >= (unsigned long)_sinittext && - iter->code < (unsigned long)_einittext) + if (init_kernel_text(iter->code)) iter->code = 0; } } -- cgit v1.1 From 0ca7d5baa1787e5f2a7abd6bfca3303b1bbb48ac Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 20 Feb 2018 20:42:14 -0600 Subject: x86/entry/64: Simplify ENCODE_FRAME_POINTER On 64-bit, the stack pointer is always aligned on interrupt, so instead of setting the LSB of the pt_regs address, we can just add 1 to it. Suggested-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Cc: Andrew Lutomirski Cc: Brian Gerst Cc: Dan Williams Cc: Dominik Brodowski Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180221024214.lhl5jfgw33c4vz3m@treble Signed-off-by: Ingo Molnar --- arch/x86/entry/calling.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 5d10b7a..be63330 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -181,12 +181,7 @@ For 32-bit we have the following conventions - kernel is built with */ .macro ENCODE_FRAME_POINTER ptregs_offset=0 #ifdef CONFIG_FRAME_POINTER - .if \ptregs_offset - leaq \ptregs_offset(%rsp), %rbp - .else - mov %rsp, %rbp - .endif - orq $0x1, %rbp + leaq 1+\ptregs_offset(%rsp), %rbp #endif .endm -- cgit v1.1 From d5028ba8ee5a18c9d0bb926d883c28b370f89009 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 6 Feb 2018 09:46:13 +0100 Subject: objtool, retpolines: Integrate objtool with retpoline support more closely Disable retpoline validation in objtool if your compiler sucks, and otherwise select the validation stuff for CONFIG_RETPOLINE=y (most builds would already have it set due to ORC). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- Makefile | 5 +++++ arch/x86/Kconfig | 1 + arch/x86/Makefile | 10 +++------- scripts/Makefile.build | 2 ++ 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 79ad2bf..3dfce4d 100644 --- a/Makefile +++ b/Makefile @@ -489,6 +489,11 @@ KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) endif +RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register +RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk +RETPOLINE_CFLAGS := $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG))) +export RETPOLINE_CFLAGS + ifeq ($(config-targets),1) # =========================================================================== # *config targets only - make sure prerequisites are updated, and descend diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 63bf349..c1aed6c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -436,6 +436,7 @@ config GOLDFISH config RETPOLINE bool "Avoid speculative indirect branches in kernel" default y + select STACK_VALIDATION if HAVE_STACK_VALIDATION help Compile kernel with the retpoline compiler options to guard against kernel-to-user data leaks by avoiding speculative indirect diff --git a/arch/x86/Makefile b/arch/x86/Makefile index dbc7d0e..498c1b8 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -232,13 +232,9 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre ifdef CONFIG_RETPOLINE - RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register - RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk - - RETPOLINE_CFLAGS += $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG))) - ifneq ($(RETPOLINE_CFLAGS),) - KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE - endif +ifneq ($(RETPOLINE_CFLAGS),) + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE +endif endif archscripts: scripts_basic diff --git a/scripts/Makefile.build b/scripts/Makefile.build index ce0fc4d..4f2b25d 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -267,8 +267,10 @@ else objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable) endif ifdef CONFIG_RETPOLINE +ifneq ($(RETPOLINE_CFLAGS),) objtool_args += --retpoline endif +endif ifdef CONFIG_MODVERSIONS -- cgit v1.1 From f4bc0c813e03bdb93f5300c3e06d7a0f07f65a74 Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Tue, 20 Feb 2018 10:10:36 +0100 Subject: raid5-ppl: fix handling flush requests Add missing bio completion. Without this any flush request would hang. Fixes: 1532d9e87e8b ("raid5-ppl: PPL support for disks with write-back cache enabled") Signed-off-by: Artur Paszkiewicz Signed-off-by: Shaohua Li --- drivers/md/raid5-log.h | 3 ++- drivers/md/raid5-ppl.c | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h index 0c76bce..a001808 100644 --- a/drivers/md/raid5-log.h +++ b/drivers/md/raid5-log.h @@ -44,6 +44,7 @@ extern void ppl_write_stripe_run(struct r5conf *conf); extern void ppl_stripe_write_finished(struct stripe_head *sh); extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add); extern void ppl_quiesce(struct r5conf *conf, int quiesce); +extern int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio); static inline bool raid5_has_ppl(struct r5conf *conf) { @@ -104,7 +105,7 @@ static inline int log_handle_flush_request(struct r5conf *conf, struct bio *bio) if (conf->log) ret = r5l_handle_flush_request(conf->log, bio); else if (raid5_has_ppl(conf)) - ret = 0; + ret = ppl_handle_flush_request(conf->log, bio); return ret; } diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 2764c22..42890a08 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -693,6 +693,16 @@ void ppl_quiesce(struct r5conf *conf, int quiesce) } } +int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio) +{ + if (bio->bi_iter.bi_size == 0) { + bio_endio(bio); + return 0; + } + bio->bi_opf &= ~REQ_PREFLUSH; + return -EAGAIN; +} + void ppl_stripe_write_finished(struct stripe_head *sh) { struct ppl_io_unit *io; -- cgit v1.1 From 53b8d89ddbdbb0e4625a46d2cdbb6f106c52f801 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Feb 2018 14:09:11 +0100 Subject: md: raid5: avoid string overflow warning gcc warns about a possible overflow of the kmem_cache string, when adding four characters to a string of the same length: drivers/md/raid5.c: In function 'setup_conf': drivers/md/raid5.c:2207:34: error: '-alt' directive writing 4 bytes into a region of size between 1 and 32 [-Werror=format-overflow=] sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]); ^~~~ drivers/md/raid5.c:2207:2: note: 'sprintf' output between 5 and 36 bytes into a destination of size 32 sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If I'm counting correctly, we need 11 characters for the fixed part of the string and 18 characters for a 64-bit pointer (when no gendisk is used), so that leaves three characters for conf->level, which should always be sufficient. This makes the code use snprintf() with the correct length, to make the code more robust against changes, and to get the compiler to shut up. In commit f4be6b43f1ac ("md/raid5: ensure we create a unique name for kmem_cache when mddev has no gendisk") from 2010, Neil said that the pointer could be removed "shortly" once devices without gendisk are disallowed. I have no idea if that happened, but if it did, that should probably be changed as well. Signed-off-by: Arnd Bergmann Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 36e0506..e3b0f79 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2196,15 +2196,16 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) static int grow_stripes(struct r5conf *conf, int num) { struct kmem_cache *sc; + size_t namelen = sizeof(conf->cache_name[0]); int devs = max(conf->raid_disks, conf->previous_raid_disks); if (conf->mddev->gendisk) - sprintf(conf->cache_name[0], + snprintf(conf->cache_name[0], namelen, "raid%d-%s", conf->level, mdname(conf->mddev)); else - sprintf(conf->cache_name[0], + snprintf(conf->cache_name[0], namelen, "raid%d-%p", conf->level, conf->mddev); - sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]); + snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]); conf->active_name = 0; sc = kmem_cache_create(conf->cache_name[conf->active_name], -- cgit v1.1 From f45765872e7aae7b81feb3044aaf9886b21885ef Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 21 Feb 2018 10:25:01 +0200 Subject: RDMA/uverbs: Fix kernel panic while using XRC_TGT QP type Attempt to modify XRC_TGT QP type from the user space (ibv_xsrq_pingpong invocation) will trigger the following kernel panic. It is caused by the fact that such QPs missed uobject initialization. [ 17.408845] BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 [ 17.412645] IP: rdma_lookup_put_uobject+0x9/0x50 [ 17.416567] PGD 0 P4D 0 [ 17.419262] Oops: 0000 [#1] SMP PTI [ 17.422915] CPU: 0 PID: 455 Comm: ibv_xsrq_pingpo Not tainted 4.16.0-rc1+ #86 [ 17.424765] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 [ 17.427399] RIP: 0010:rdma_lookup_put_uobject+0x9/0x50 [ 17.428445] RSP: 0018:ffffb8c7401e7c90 EFLAGS: 00010246 [ 17.429543] RAX: 0000000000000000 RBX: ffffb8c7401e7cf8 RCX: 0000000000000000 [ 17.432426] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000000 [ 17.437448] RBP: 0000000000000000 R08: 00000000000218f0 R09: ffffffff8ebc4cac [ 17.440223] R10: fffff6038052cd80 R11: ffff967694b36400 R12: ffff96769391f800 [ 17.442184] R13: ffffb8c7401e7cd8 R14: 0000000000000000 R15: ffff967699f60000 [ 17.443971] FS: 00007fc29207d700(0000) GS:ffff96769fc00000(0000) knlGS:0000000000000000 [ 17.446623] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 17.448059] CR2: 0000000000000048 CR3: 000000001397a000 CR4: 00000000000006b0 [ 17.449677] Call Trace: [ 17.450247] modify_qp.isra.20+0x219/0x2f0 [ 17.451151] ib_uverbs_modify_qp+0x90/0xe0 [ 17.452126] ib_uverbs_write+0x1d2/0x3c0 [ 17.453897] ? __handle_mm_fault+0x93c/0xe40 [ 17.454938] __vfs_write+0x36/0x180 [ 17.455875] vfs_write+0xad/0x1e0 [ 17.456766] SyS_write+0x52/0xc0 [ 17.457632] do_syscall_64+0x75/0x180 [ 17.458631] entry_SYSCALL_64_after_hwframe+0x21/0x86 [ 17.460004] RIP: 0033:0x7fc29198f5a0 [ 17.460982] RSP: 002b:00007ffccc71f018 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 17.463043] RAX: ffffffffffffffda RBX: 0000000000000078 RCX: 00007fc29198f5a0 [ 17.464581] RDX: 0000000000000078 RSI: 00007ffccc71f050 RDI: 0000000000000003 [ 17.466148] RBP: 0000000000000000 R08: 0000000000000078 R09: 00007ffccc71f050 [ 17.467750] R10: 000055b6cf87c248 R11: 0000000000000246 R12: 00007ffccc71f300 [ 17.469541] R13: 000055b6cf8733a0 R14: 0000000000000000 R15: 0000000000000000 [ 17.471151] Code: 00 00 0f 1f 44 00 00 48 8b 47 48 48 8b 00 48 8b 40 10 e9 0b 8b 68 00 90 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 53 89 f5 <48> 8b 47 48 48 89 fb 40 0f b6 f6 48 8b 00 48 8b 40 20 e8 e0 8a [ 17.475185] RIP: rdma_lookup_put_uobject+0x9/0x50 RSP: ffffb8c7401e7c90 [ 17.476841] CR2: 0000000000000048 [ 17.477764] ---[ end trace 1dbcc5354071a712 ]--- [ 17.478880] Kernel panic - not syncing: Fatal exception [ 17.480277] Kernel Offset: 0xd000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) Fixes: 2f08ee363fe0 ("RDMA/restrack: don't use uaccess_kernel()") Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 25a0e0e..a148de3 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1553,6 +1553,9 @@ static int create_qp(struct ib_uverbs_file *file, atomic_inc(&attr.srq->usecnt); if (ind_tbl) atomic_inc(&ind_tbl->usecnt); + } else { + /* It is done in _ib_create_qp for other QP types */ + qp->uobject = &obj->uevent.uobject; } obj->uevent.uobject.object = qp; -- cgit v1.1 From 7324f5399b06cdbbd1520b8fde8024035953179d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 20 Feb 2018 14:32:04 +0100 Subject: virtio_net: disable XDP_REDIRECT in receive_mergeable() case The virtio_net code have three different RX code-paths in receive_buf(). Two of these code paths can handle XDP, but one of them is broken for at least XDP_REDIRECT. Function(1): receive_big() does not support XDP. Function(2): receive_small() support XDP fully and uses build_skb(). Function(3): receive_mergeable() broken XDP_REDIRECT uses napi_alloc_skb(). The simple explanation is that receive_mergeable() is broken because it uses napi_alloc_skb(), which violates XDP given XDP assumes packet header+data in single page and enough tail room for skb_shared_info. The longer explaination is that receive_mergeable() tries to work-around and satisfy these XDP requiresments e.g. by having a function xdp_linearize_page() that allocates and memcpy RX buffers around (in case packet is scattered across multiple rx buffers). This does currently satisfy XDP_PASS, XDP_DROP and XDP_TX (but only because we have not implemented bpf_xdp_adjust_tail yet). The XDP_REDIRECT action combined with cpumap is broken, and cause hard to debug crashes. The main issue is that the RX packet does not have the needed tail-room (SKB_DATA_ALIGN(skb_shared_info)), causing skb_shared_info to overlap the next packets head-room (in which cpumap stores info). Reproducing depend on the packet payload length and if RX-buffer size happened to have tail-room for skb_shared_info or not. But to make this even harder to troubleshoot, the RX-buffer size is runtime dynamically change based on an Exponentially Weighted Moving Average (EWMA) over the packet length, when refilling RX rings. This patch only disable XDP_REDIRECT support in receive_mergeable() case, because it can cause a real crash. IMHO we should consider NOT supporting XDP in receive_mergeable() at all, because the principles behind XDP are to gain speed by (1) code simplicity, (2) sacrificing memory and (3) where possible moving runtime checks to setup time. These principles are clearly being violated in receive_mergeable(), that e.g. runtime track average buffer size to save memory consumption. In the longer run, we should consider introducing a separate receive function when attaching an XDP program, and also change the memory model to be compatible with XDP when attaching an XDP prog. Fixes: 186b3c998c50 ("virtio-net: support XDP_REDIRECT") Signed-off-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 626c273..0ca9194 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -677,7 +677,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct bpf_prog *xdp_prog; unsigned int truesize; unsigned int headroom = mergeable_ctx_to_headroom(ctx); - int err; head_skb = NULL; @@ -754,12 +753,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, goto err_xdp; rcu_read_unlock(); goto xdp_xmit; - case XDP_REDIRECT: - err = xdp_do_redirect(dev, &xdp, xdp_prog); - if (!err) - *xdp_xmit = true; - rcu_read_unlock(); - goto xdp_xmit; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: -- cgit v1.1 From 95dbe9e7b3720efa5cf83d21f44f6d953f7cf4a2 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 20 Feb 2018 14:32:10 +0100 Subject: virtio_net: fix XDP code path in receive_small() When configuring virtio_net to use the code path 'receive_small()', in-order to get correct XDP_REDIRECT support, I discovered TCP packets would get silently dropped when loading an XDP program action XDP_PASS. The bug seems to be that receive_small() when XDP is loaded check that hdr->hdr.flags is zero, which seems wrong as hdr.flags contains the flags VIRTIO_NET_HDR_F_* : #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ TCP got dropped as it had the VIRTIO_NET_HDR_F_DATA_VALID flag set. The flags that are relevant here are the VIRTIO_NET_HDR_GSO_* flags stored in hdr->hdr.gso_type. Thus, the fix is just check that none of the gso_type flags have been set. Fixes: bb91accf2733 ("virtio-net: XDP support for small buffers") Signed-off-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0ca9194..10c8fc4 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -558,7 +558,7 @@ static struct sk_buff *receive_small(struct net_device *dev, void *orig_data; u32 act; - if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) + if (unlikely(hdr->hdr.gso_type)) goto err_xdp; if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { -- cgit v1.1 From 11b7d897ccc1fb5a3d3f9eb1e6b4574671e5dd7d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 20 Feb 2018 14:32:15 +0100 Subject: virtio_net: fix memory leak in XDP_REDIRECT XDP_REDIRECT calling xdp_do_redirect() can fail for multiple reasons (which can be inspected by tracepoints). The current semantics is that on failure the driver calling xdp_do_redirect() must handle freeing or recycling the page associated with this frame. This can be seen as an optimization, as drivers usually have an optimized XDP_DROP code path for frame recycling in place already. The virtio_net driver didn't handle when xdp_do_redirect() failed. This caused a memory leak as the page refcnt wasn't decremented on failures. The function __virtnet_xdp_xmit() did handle one type of failure, when the xmit queue virtqueue_add_outbuf() is full, which "hides" releasing a refcnt on the page. Instead the function __virtnet_xdp_xmit() must follow API of xdp_do_redirect(), which on errors leave it up to the caller to free the page, of the failed send operation. Fixes: 186b3c998c50 ("virtio-net: support XDP_REDIRECT") Signed-off-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 10c8fc4..1e0e0fc 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -443,12 +443,8 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi, sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data); err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC); - if (unlikely(err)) { - struct page *page = virt_to_head_page(xdp->data); - - put_page(page); - return false; - } + if (unlikely(err)) + return false; /* Caller handle free/refcnt */ return true; } @@ -546,8 +542,11 @@ static struct sk_buff *receive_small(struct net_device *dev, unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); struct page *page = virt_to_head_page(buf); - unsigned int delta = 0, err; + unsigned int delta = 0; struct page *xdp_page; + bool sent; + int err; + len -= vi->hdr_len; rcu_read_lock(); @@ -596,16 +595,19 @@ static struct sk_buff *receive_small(struct net_device *dev, delta = orig_data - xdp.data; break; case XDP_TX: - if (unlikely(!__virtnet_xdp_xmit(vi, &xdp))) + sent = __virtnet_xdp_xmit(vi, &xdp); + if (unlikely(!sent)) { trace_xdp_exception(vi->dev, xdp_prog, act); - else - *xdp_xmit = true; + goto err_xdp; + } + *xdp_xmit = true; rcu_read_unlock(); goto xdp_xmit; case XDP_REDIRECT: err = xdp_do_redirect(dev, &xdp, xdp_prog); - if (!err) - *xdp_xmit = true; + if (err) + goto err_xdp; + *xdp_xmit = true; rcu_read_unlock(); goto xdp_xmit; default: @@ -677,6 +679,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct bpf_prog *xdp_prog; unsigned int truesize; unsigned int headroom = mergeable_ctx_to_headroom(ctx); + bool sent; head_skb = NULL; @@ -745,10 +748,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } break; case XDP_TX: - if (unlikely(!__virtnet_xdp_xmit(vi, &xdp))) + sent = __virtnet_xdp_xmit(vi, &xdp); + if (unlikely(!sent)) { trace_xdp_exception(vi->dev, xdp_prog, act); - else - *xdp_xmit = true; + if (unlikely(xdp_page != page)) + put_page(xdp_page); + goto err_xdp; + } + *xdp_xmit = true; if (unlikely(xdp_page != page)) goto err_xdp; rcu_read_unlock(); -- cgit v1.1 From 8dcc5b0ab0ec9a2efb3362d380272546b8b2ee26 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 20 Feb 2018 14:32:20 +0100 Subject: virtio_net: fix ndo_xdp_xmit crash towards dev not ready for XDP When a driver implements the ndo_xdp_xmit() function, there is (currently) no generic way to determine whether it is safe to call. It is e.g. unsafe to call the drivers ndo_xdp_xmit, if it have not allocated the needed XDP TX queues yet. This is the case for virtio_net, which first allocates the XDP TX queues once an XDP/bpf prog is attached (in virtnet_xdp_set()). Thus, a crash will occur for virtio_net when redirecting to another virtio_net device's ndo_xdp_xmit, which have not attached a XDP prog. The sample xdp_redirect_map tries to attach a dummy XDP prog to take this into account, but it can also easily fail if the virtio_net (or actually underlying vhost driver) have not allocated enough extra queues for the device. Allocating more queue this is currently a manual config. Hint for libvirt XML add: The solution in this patch is to check that the device have loaded an XDP/bpf prog before proceeding. This is similar to the check performed in driver ixgbe. Signed-off-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1e0e0fc..9bb9e56 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -452,8 +452,18 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi, static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) { struct virtnet_info *vi = netdev_priv(dev); - bool sent = __virtnet_xdp_xmit(vi, xdp); + struct receive_queue *rq = vi->rq; + struct bpf_prog *xdp_prog; + bool sent; + + /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this + * indicate XDP resources have been successfully allocated. + */ + xdp_prog = rcu_dereference(rq->xdp_prog); + if (!xdp_prog) + return -ENXIO; + sent = __virtnet_xdp_xmit(vi, xdp); if (!sent) return -ENOSPC; return 0; -- cgit v1.1 From cfd092f2db8b4b6727e1c03ef68a7842e1023573 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 20 Feb 2018 15:22:05 -0600 Subject: amd-xgbe: Restore PCI interrupt enablement setting on resume After resuming from suspend, the PCI device support must re-enable the interrupt setting so that interrupts are actually delivered. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index 3e5833c..eb23f9b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -426,6 +426,8 @@ static int xgbe_pci_resume(struct pci_dev *pdev) struct net_device *netdev = pdata->netdev; int ret = 0; + XP_IOWRITE(pdata, XP_INT_EN, 0x1fffff); + pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER; XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); -- cgit v1.1 From 6d243a235612946971ba98f24f52dc99f4ebb32a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Feb 2018 16:35:50 -0500 Subject: NFSv4: Fix broken cast in nfs4_callback_recallany() Passing a pointer to a unsigned integer to test_bit() is broken. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 2435af5..a50d781 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -572,7 +572,7 @@ out: } static bool -validate_bitmap_values(unsigned long mask) +validate_bitmap_values(unsigned int mask) { return (mask & ~RCA4_TYPE_MASK_ALL) == 0; } @@ -596,17 +596,15 @@ __be32 nfs4_callback_recallany(void *argp, void *resp, goto out; status = cpu_to_be32(NFS4_OK); - if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *) - &args->craa_type_mask)) + if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_RDATA_DLG)) flags = FMODE_READ; - if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *) - &args->craa_type_mask)) + if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_WDATA_DLG)) flags |= FMODE_WRITE; - if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *) - &args->craa_type_mask)) - pnfs_recall_all_layouts(cps->clp); if (flags) nfs_expire_unused_delegation_types(cps->clp, flags); + + if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_FILE_LAYOUT)) + pnfs_recall_all_layouts(cps->clp); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; -- cgit v1.1 From 7ed1c1901fe52e6c5828deb155920b44b0adabb1 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Wed, 21 Feb 2018 14:45:12 -0800 Subject: tools: fix cross-compile var clobbering Currently a number of Makefiles break when used with toolchains that pass extra flags in CC and other cross-compile related variables (such as --sysroot). Thus we get this error when we use a toolchain that puts --sysroot in the CC var: ~/src/linux/tools$ make iio [snip] iio_event_monitor.c:18:10: fatal error: unistd.h: No such file or directory #include ^~~~~~~~~~ This occurs because we clobber several env vars related to cross-compiling with lines like this: CC = $(CROSS_COMPILE)gcc Although this will point to a valid cross-compiler, we lose any extra flags that might exist in the CC variable, which can break toolchains that rely on them (for example, those that use --sysroot). This easily shows up using a Yocto SDK: $ . [snip]/sdk/environment-setup-cortexa8hf-neon-poky-linux-gnueabi $ echo $CC arm-poky-linux-gnueabi-gcc -march=armv7-a -mfpu=neon -mfloat-abi=hard -mcpu=cortex-a8 --sysroot=[snip]/sdk/sysroots/cortexa8hf-neon-poky-linux-gnueabi $ echo $CROSS_COMPILE arm-poky-linux-gnueabi- $ echo ${CROSS_COMPILE}gcc krm-poky-linux-gnueabi-gcc Although arm-poky-linux-gnueabi-gcc is a cross-compiler, we've lost the --sysroot and other flags that enable us to find the right libraries to link against, so we can't find unistd.h and other libraries and headers. Normally with the --sysroot flag we would find unistd.h in the sdk directory in the sysroot: $ find [snip]/sdk/sysroots -path '*/usr/include/unistd.h' [snip]/sdk/sysroots/cortexa8hf-neon-poky-linux-gnueabi/usr/include/unistd.h The perf Makefile adds CC = $(CROSS_COMPILE)gcc if and only if CC is not already set, and it compiles correctly with the above toolchain. So, generalize the logic that perf uses in the common Makefile and remove the manual CC = $(CROSS_COMPILE)gcc lines from each Makefile. Note that this patch does not fix cross-compile for all the tools (some have other bugs), but it does fix it for all except usb and acpi, which still have other unrelated issues. I tested both with and without the patch on native and cross-build and there appear to be no regressions. Link: http://lkml.kernel.org/r/20180107214028.23771-1-martin@martingkelly.com Signed-off-by: Martin Kelly Acked-by: Mark Brown Cc: Tejun Heo Cc: Li Zefan Cc: Johannes Weiner Cc: Linus Walleij Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Jonathan Cameron Cc: Pali Rohar Cc: Richard Purdie Cc: Jacek Anaszewski Cc: Pavel Machek Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Robert Moore Cc: Lv Zheng Cc: "Rafael J. Wysocki" Cc: Greg Kroah-Hartman Cc: Valentina Manea Cc: Shuah Khan Cc: Mario Limonciello Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/cgroup/Makefile | 1 - tools/gpio/Makefile | 2 -- tools/hv/Makefile | 1 - tools/iio/Makefile | 2 -- tools/laptop/freefall/Makefile | 1 - tools/leds/Makefile | 1 - tools/perf/Makefile.perf | 6 ------ tools/power/acpi/Makefile.config | 3 --- tools/scripts/Makefile.include | 18 ++++++++++++++++++ tools/spi/Makefile | 2 -- tools/usb/Makefile | 1 - tools/vm/Makefile | 1 - tools/wmi/Makefile | 1 - 13 files changed, 18 insertions(+), 22 deletions(-) diff --git a/tools/cgroup/Makefile b/tools/cgroup/Makefile index 860fa15..ffca068 100644 --- a/tools/cgroup/Makefile +++ b/tools/cgroup/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for cgroup tools -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -Wextra all: cgroup_event_listener diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile index 805a2c0..240eda0 100644 --- a/tools/gpio/Makefile +++ b/tools/gpio/Makefile @@ -12,8 +12,6 @@ endif # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r -CC = $(CROSS_COMPILE)gcc -LD = $(CROSS_COMPILE)ld CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon diff --git a/tools/hv/Makefile b/tools/hv/Makefile index 1139d71..5db5e62 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for Hyper-V tools -CC = $(CROSS_COMPILE)gcc WARNINGS = -Wall -Wextra CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS) diff --git a/tools/iio/Makefile b/tools/iio/Makefile index a08e7a4..332ed2f 100644 --- a/tools/iio/Makefile +++ b/tools/iio/Makefile @@ -12,8 +12,6 @@ endif # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r -CC = $(CROSS_COMPILE)gcc -LD = $(CROSS_COMPILE)ld CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include ALL_TARGETS := iio_event_monitor lsiio iio_generic_buffer diff --git a/tools/laptop/freefall/Makefile b/tools/laptop/freefall/Makefile index 5f758c4..b572d94 100644 --- a/tools/laptop/freefall/Makefile +++ b/tools/laptop/freefall/Makefile @@ -2,7 +2,6 @@ PREFIX ?= /usr SBINDIR ?= sbin INSTALL ?= install -CC = $(CROSS_COMPILE)gcc TARGET = freefall diff --git a/tools/leds/Makefile b/tools/leds/Makefile index c379af0..7b6bed1 100644 --- a/tools/leds/Makefile +++ b/tools/leds/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for LEDs tools -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -Wextra -g -I../../include/uapi all: uledmon led_hw_brightness_mon diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 9b0351d..0123280 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -146,12 +146,6 @@ define allow-override $(eval $(1) = $(2))) endef -# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix. -$(call allow-override,CC,$(CROSS_COMPILE)gcc) -$(call allow-override,AR,$(CROSS_COMPILE)ar) -$(call allow-override,LD,$(CROSS_COMPILE)ld) -$(call allow-override,CXX,$(CROSS_COMPILE)g++) - LD += $(EXTRA_LDFLAGS) HOSTCC ?= gcc diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index a1883bb..2cccbba 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -56,9 +56,6 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM} # to compile vs uClibc, that can be done here as well. CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- CROSS_COMPILE ?= $(CROSS) -CC = $(CROSS_COMPILE)gcc -LD = $(CROSS_COMPILE)gcc -STRIP = $(CROSS_COMPILE)strip HOSTCC = gcc # check if compiler option is supported diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index fcb3ed0..dd61446 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -42,6 +42,24 @@ EXTRA_WARNINGS += -Wformat CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?) +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting various cross-compile vars or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,LD,$(CROSS_COMPILE)ld) +$(call allow-override,CXX,$(CROSS_COMPILE)g++) +$(call allow-override,STRIP,$(CROSS_COMPILE)strip) + ifeq ($(CC_NO_CLANG), 1) EXTRA_WARNINGS += -Wstrict-aliasing=3 endif diff --git a/tools/spi/Makefile b/tools/spi/Makefile index 90615e1..815d155 100644 --- a/tools/spi/Makefile +++ b/tools/spi/Makefile @@ -11,8 +11,6 @@ endif # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r -CC = $(CROSS_COMPILE)gcc -LD = $(CROSS_COMPILE)ld CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include ALL_TARGETS := spidev_test spidev_fdx diff --git a/tools/usb/Makefile b/tools/usb/Makefile index 4e65060..01d758d 100644 --- a/tools/usb/Makefile +++ b/tools/usb/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for USB tools -CC = $(CROSS_COMPILE)gcc PTHREAD_LIBS = -lpthread WARNINGS = -Wall -Wextra CFLAGS = $(WARNINGS) -g -I../include diff --git a/tools/vm/Makefile b/tools/vm/Makefile index be320b9..20f6cf0 100644 --- a/tools/vm/Makefile +++ b/tools/vm/Makefile @@ -6,7 +6,6 @@ TARGETS=page-types slabinfo page_owner_sort LIB_DIR = ../lib/api LIBS = $(LIB_DIR)/libapi.a -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -Wextra -I../lib/ LDFLAGS = $(LIBS) diff --git a/tools/wmi/Makefile b/tools/wmi/Makefile index e664f11..e0e8723 100644 --- a/tools/wmi/Makefile +++ b/tools/wmi/Makefile @@ -2,7 +2,6 @@ PREFIX ?= /usr SBINDIR ?= sbin INSTALL ?= install CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include -CC = $(CROSS_COMPILE)gcc TARGET = dell-smbios-example -- cgit v1.1 From d34bc48f8275b6ce0da44f639d68344891268ee9 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 21 Feb 2018 14:45:17 -0800 Subject: include/linux/sched/mm.h: re-inline mmdrop() As Peter points out, Doing a CALL+RET for just the decrement is a bit silly. Fixes: d70f2a14b72a4bc ("include/linux/sched/mm.h: uninline mmdrop_async(), etc") Acked-by: Peter Zijlstra (Intel) Cc: Ingo Molnar Cc: Michal Hocko Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 13 ++++++++++++- kernel/fork.c | 15 ++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 1149533..9806184 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -36,7 +36,18 @@ static inline void mmgrab(struct mm_struct *mm) atomic_inc(&mm->mm_count); } -extern void mmdrop(struct mm_struct *mm); +extern void __mmdrop(struct mm_struct *mm); + +static inline void mmdrop(struct mm_struct *mm) +{ + /* + * The implicit full barrier implied by atomic_dec_and_test() is + * required by the membarrier system call before returning to + * user-space, after storing to rq->curr. + */ + if (unlikely(atomic_dec_and_test(&mm->mm_count))) + __mmdrop(mm); +} /** * mmget() - Pin the address space associated with a &struct mm_struct. diff --git a/kernel/fork.c b/kernel/fork.c index be8aa5b..e5d9d40 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -592,7 +592,7 @@ static void check_mm(struct mm_struct *mm) * is dropped: either by a lazy thread or by * mmput. Free the page directory and the mm. */ -static void __mmdrop(struct mm_struct *mm) +void __mmdrop(struct mm_struct *mm) { BUG_ON(mm == &init_mm); mm_free_pgd(mm); @@ -603,18 +603,7 @@ static void __mmdrop(struct mm_struct *mm) put_user_ns(mm->user_ns); free_mm(mm); } - -void mmdrop(struct mm_struct *mm) -{ - /* - * The implicit full barrier implied by atomic_dec_and_test() is - * required by the membarrier system call before returning to - * user-space, after storing to rq->curr. - */ - if (unlikely(atomic_dec_and_test(&mm->mm_count))) - __mmdrop(mm); -} -EXPORT_SYMBOL_GPL(mmdrop); +EXPORT_SYMBOL_GPL(__mmdrop); static void mmdrop_async_fn(struct work_struct *work) { -- cgit v1.1 From 101110f6271ce956a049250c907bc960030577f8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Feb 2018 14:45:20 -0800 Subject: Kbuild: always define endianess in kconfig.h Build testing with LTO found a couple of files that get compiled differently depending on whether asm/byteorder.h gets included early enough or not. In particular, include/asm-generic/qrwlock_types.h is affected by this, but there are probably others as well. The symptom is a series of LTO link time warnings, including these: net/netlabel/netlabel_unlabeled.h:223: error: type of 'netlbl_unlhsh_add' does not match original declaration [-Werror=lto-type-mismatch] int netlbl_unlhsh_add(struct net *net, net/netlabel/netlabel_unlabeled.c:377: note: 'netlbl_unlhsh_add' was previously declared here include/net/ipv6.h:360: error: type of 'ipv6_renew_options_kern' does not match original declaration [-Werror=lto-type-mismatch] ipv6_renew_options_kern(struct sock *sk, net/ipv6/exthdrs.c:1162: note: 'ipv6_renew_options_kern' was previously declared here net/core/dev.c:761: note: 'dev_get_by_name_rcu' was previously declared here struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) net/core/dev.c:761: note: code may be misoptimized unless -fno-strict-aliasing is used drivers/gpu/drm/i915/i915_drv.h:3377: error: type of 'i915_gem_object_set_to_wc_domain' does not match original declaration [-Werror=lto-type-mismatch] i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write); drivers/gpu/drm/i915/i915_gem.c:3639: note: 'i915_gem_object_set_to_wc_domain' was previously declared here include/linux/debugfs.h:92:9: error: type of 'debugfs_attr_read' does not match original declaration [-Werror=lto-type-mismatch] ssize_t debugfs_attr_read(struct file *file, char __user *buf, fs/debugfs/file.c:318: note: 'debugfs_attr_read' was previously declared here include/linux/rwlock_api_smp.h:30: error: type of '_raw_read_unlock' does not match original declaration [-Werror=lto-type-mismatch] void __lockfunc _raw_read_unlock(rwlock_t *lock) __releases(lock); kernel/locking/spinlock.c:246:26: note: '_raw_read_unlock' was previously declared here include/linux/fs.h:3308:5: error: type of 'simple_attr_open' does not match original declaration [-Werror=lto-type-mismatch] int simple_attr_open(struct inode *inode, struct file *file, fs/libfs.c:795: note: 'simple_attr_open' was previously declared here All of the above are caused by include/asm-generic/qrwlock_types.h failing to include asm/byteorder.h after commit e0d02285f16e ("locking/qrwlock: Use 'struct qrwlock' instead of 'struct __qrwlock'") in linux-4.15. Similar bugs may or may not exist in older kernels as well, but there is no easy way to test those with link-time optimizations, and kernels before 4.14 are harder to fix because they don't have Babu's patch series We had similar issues with CONFIG_ symbols in the past and ended up always including the configuration headers though linux/kconfig.h. This works around the issue through that same file, defining either __BIG_ENDIAN or __LITTLE_ENDIAN depending on CONFIG_CPU_BIG_ENDIAN, which is now always set on all architectures since commit 4c97a0c8fee3 ("arch: define CPU_BIG_ENDIAN for all fixed big endian archs"). Link: http://lkml.kernel.org/r/20180202154104.1522809-2-arnd@arndb.de Signed-off-by: Arnd Bergmann Cc: Babu Moger Cc: Andi Kleen Cc: Greg Kroah-Hartman Cc: Masahiro Yamada Cc: Nicolas Pitre Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kconfig.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h index fec5076..cc8fa10 100644 --- a/include/linux/kconfig.h +++ b/include/linux/kconfig.h @@ -4,6 +4,12 @@ #include +#ifdef CONFIG_CPU_BIG_ENDIAN +#define __BIG_ENDIAN 4321 +#else +#define __LITTLE_ENDIAN 1234 +#endif + #define __ARG_PLACEHOLDER_1 0, #define __take_second_arg(__ignored, val, ...) val -- cgit v1.1 From c3cc39118c3610eb6ab4711bc624af7fc48a35fe Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 21 Feb 2018 14:45:24 -0800 Subject: mm: memcontrol: fix NR_WRITEBACK leak in memcg and system stats After commit a983b5ebee57 ("mm: memcontrol: fix excessive complexity in memory.stat reporting"), we observed slowly upward creeping NR_WRITEBACK counts over the course of several days, both the per-memcg stats as well as the system counter in e.g. /proc/meminfo. The conversion from full per-cpu stat counts to per-cpu cached atomic stat counts introduced an irq-unsafe RMW operation into the updates. Most stat updates come from process context, but one notable exception is the NR_WRITEBACK counter. While writebacks are issued from process context, they are retired from (soft)irq context. When writeback completions interrupt the RMW counter updates of new writebacks being issued, the decs from the completions are lost. Since the global updates are routed through the joint lruvec API, both the memcg counters as well as the system counters are affected. This patch makes the joint stat and event API irq safe. Link: http://lkml.kernel.org/r/20180203082353.17284-1-hannes@cmpxchg.org Fixes: a983b5ebee57 ("mm: memcontrol: fix excessive complexity in memory.stat reporting") Signed-off-by: Johannes Weiner Debugged-by: Tejun Heo Reviewed-by: Rik van Riel Reviewed-by: Andrew Morton Cc: Vladimir Davydov Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8820468..c46016b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -523,9 +523,11 @@ static inline void __mod_memcg_state(struct mem_cgroup *memcg, static inline void mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) { - preempt_disable(); + unsigned long flags; + + local_irq_save(flags); __mod_memcg_state(memcg, idx, val); - preempt_enable(); + local_irq_restore(flags); } /** @@ -606,9 +608,11 @@ static inline void __mod_lruvec_state(struct lruvec *lruvec, static inline void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { - preempt_disable(); + unsigned long flags; + + local_irq_save(flags); __mod_lruvec_state(lruvec, idx, val); - preempt_enable(); + local_irq_restore(flags); } static inline void __mod_lruvec_page_state(struct page *page, @@ -630,9 +634,11 @@ static inline void __mod_lruvec_page_state(struct page *page, static inline void mod_lruvec_page_state(struct page *page, enum node_stat_item idx, int val) { - preempt_disable(); + unsigned long flags; + + local_irq_save(flags); __mod_lruvec_page_state(page, idx, val); - preempt_enable(); + local_irq_restore(flags); } unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, @@ -659,9 +665,11 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg, static inline void count_memcg_events(struct mem_cgroup *memcg, int idx, unsigned long count) { - preempt_disable(); + unsigned long flags; + + local_irq_save(flags); __count_memcg_events(memcg, idx, count); - preempt_enable(); + local_irq_restore(flags); } /* idx can be of type enum memcg_event_item or vm_event_item */ -- cgit v1.1 From 9c4e6b1a7027f102990c0395296015a812525f4d Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 21 Feb 2018 14:45:28 -0800 Subject: mm, mlock, vmscan: no more skipping pagevecs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a thread mlocks an address space backed either by file pages which are currently not present in memory or swapped out anon pages (not in swapcache), a new page is allocated and added to the local pagevec (lru_add_pvec), I/O is triggered and the thread then sleeps on the page. On I/O completion, the thread can wake on a different CPU, the mlock syscall will then sets the PageMlocked() bit of the page but will not be able to put that page in unevictable LRU as the page is on the pagevec of a different CPU. Even on drain, that page will go to evictable LRU because the PageMlocked() bit is not checked on pagevec drain. The page will eventually go to right LRU on reclaim but the LRU stats will remain skewed for a long time. This patch puts all the pages, even unevictable, to the pagevecs and on the drain, the pages will be added on their LRUs correctly by checking their evictability. This resolves the mlocked pages on pagevec of other CPUs issue because when those pagevecs will be drained, the mlocked file pages will go to unevictable LRU. Also this makes the race with munlock easier to resolve because the pagevec drains happen in LRU lock. However there is still one place which makes a page evictable and does PageLRU check on that page without LRU lock and needs special attention. TestClearPageMlocked() and isolate_lru_page() in clear_page_mlock(). #0: __pagevec_lru_add_fn #1: clear_page_mlock SetPageLRU() if (!TestClearPageMlocked()) return smp_mb() // <--required // inside does PageLRU if (!PageMlocked()) if (isolate_lru_page()) move to evictable LRU putback_lru_page() else move to unevictable LRU In '#1', TestClearPageMlocked() provides full memory barrier semantics and thus the PageLRU check (inside isolate_lru_page) can not be reordered before it. In '#0', without explicit memory barrier, the PageMlocked() check can be reordered before SetPageLRU(). If that happens, '#0' can put a page in unevictable LRU and '#1' might have just cleared the Mlocked bit of that page but fails to isolate as PageLRU fails as '#0' still hasn't set PageLRU bit of that page. That page will be stranded on the unevictable LRU. There is one (good) side effect though. Without this patch, the pages allocated for System V shared memory segment are added to evictable LRUs even after shmctl(SHM_LOCK) on that segment. This patch will correctly put such pages to unevictable LRU. Link: http://lkml.kernel.org/r/20171121211241.18877-1-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Vlastimil Babka Cc: Jérôme Glisse Cc: Huang Ying Cc: Tim Chen Cc: Michal Hocko Cc: Greg Thelen Cc: Johannes Weiner Cc: Balbir Singh Cc: Minchan Kim Cc: Shaohua Li Cc: Jan Kara Cc: Nicholas Piggin Cc: Dan Williams Cc: Mel Gorman Cc: Hugh Dickins Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 -- mm/mlock.c | 6 ++++ mm/swap.c | 82 ++++++++++++++++++++++++++++++---------------------- mm/vmscan.c | 59 +------------------------------------ 4 files changed, 54 insertions(+), 95 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 7b6a59f..a1a3f4e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -337,8 +337,6 @@ extern void deactivate_file_page(struct page *page); extern void mark_page_lazyfree(struct page *page); extern void swap_setup(void); -extern void add_page_to_unevictable_list(struct page *page); - extern void lru_cache_add_active_or_unevictable(struct page *page, struct vm_area_struct *vma); diff --git a/mm/mlock.c b/mm/mlock.c index 7939820..74e5a65 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -64,6 +64,12 @@ void clear_page_mlock(struct page *page) mod_zone_page_state(page_zone(page), NR_MLOCK, -hpage_nr_pages(page)); count_vm_event(UNEVICTABLE_PGCLEARED); + /* + * The previous TestClearPageMlocked() corresponds to the smp_mb() + * in __pagevec_lru_add_fn(). + * + * See __pagevec_lru_add_fn for more explanation. + */ if (!isolate_lru_page(page)) { putback_lru_page(page); } else { diff --git a/mm/swap.c b/mm/swap.c index 567a7b9..2d33771 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -446,30 +446,6 @@ void lru_cache_add(struct page *page) } /** - * add_page_to_unevictable_list - add a page to the unevictable list - * @page: the page to be added to the unevictable list - * - * Add page directly to its zone's unevictable list. To avoid races with - * tasks that might be making the page evictable, through eg. munlock, - * munmap or exit, while it's not on the lru, we want to add the page - * while it's locked or otherwise "invisible" to other tasks. This is - * difficult to do when using the pagevec cache, so bypass that. - */ -void add_page_to_unevictable_list(struct page *page) -{ - struct pglist_data *pgdat = page_pgdat(page); - struct lruvec *lruvec; - - spin_lock_irq(&pgdat->lru_lock); - lruvec = mem_cgroup_page_lruvec(page, pgdat); - ClearPageActive(page); - SetPageUnevictable(page); - SetPageLRU(page); - add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE); - spin_unlock_irq(&pgdat->lru_lock); -} - -/** * lru_cache_add_active_or_unevictable * @page: the page to be added to LRU * @vma: vma in which page is mapped for determining reclaimability @@ -484,13 +460,9 @@ void lru_cache_add_active_or_unevictable(struct page *page, { VM_BUG_ON_PAGE(PageLRU(page), page); - if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) { + if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) SetPageActive(page); - lru_cache_add(page); - return; - } - - if (!TestSetPageMlocked(page)) { + else if (!TestSetPageMlocked(page)) { /* * We use the irq-unsafe __mod_zone_page_stat because this * counter is not modified from interrupt context, and the pte @@ -500,7 +472,7 @@ void lru_cache_add_active_or_unevictable(struct page *page, hpage_nr_pages(page)); count_vm_event(UNEVICTABLE_PGMLOCKED); } - add_page_to_unevictable_list(page); + lru_cache_add(page); } /* @@ -886,15 +858,55 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, void *arg) { - int file = page_is_file_cache(page); - int active = PageActive(page); - enum lru_list lru = page_lru(page); + enum lru_list lru; + int was_unevictable = TestClearPageUnevictable(page); VM_BUG_ON_PAGE(PageLRU(page), page); SetPageLRU(page); + /* + * Page becomes evictable in two ways: + * 1) Within LRU lock [munlock_vma_pages() and __munlock_pagevec()]. + * 2) Before acquiring LRU lock to put the page to correct LRU and then + * a) do PageLRU check with lock [check_move_unevictable_pages] + * b) do PageLRU check before lock [clear_page_mlock] + * + * (1) & (2a) are ok as LRU lock will serialize them. For (2b), we need + * following strict ordering: + * + * #0: __pagevec_lru_add_fn #1: clear_page_mlock + * + * SetPageLRU() TestClearPageMlocked() + * smp_mb() // explicit ordering // above provides strict + * // ordering + * PageMlocked() PageLRU() + * + * + * if '#1' does not observe setting of PG_lru by '#0' and fails + * isolation, the explicit barrier will make sure that page_evictable + * check will put the page in correct LRU. Without smp_mb(), SetPageLRU + * can be reordered after PageMlocked check and can make '#1' to fail + * the isolation of the page whose Mlocked bit is cleared (#0 is also + * looking at the same page) and the evictable page will be stranded + * in an unevictable LRU. + */ + smp_mb(); + + if (page_evictable(page)) { + lru = page_lru(page); + update_page_reclaim_stat(lruvec, page_is_file_cache(page), + PageActive(page)); + if (was_unevictable) + count_vm_event(UNEVICTABLE_PGRESCUED); + } else { + lru = LRU_UNEVICTABLE; + ClearPageActive(page); + SetPageUnevictable(page); + if (!was_unevictable) + count_vm_event(UNEVICTABLE_PGCULLED); + } + add_page_to_lru_list(page, lruvec, lru); - update_page_reclaim_stat(lruvec, file, active); trace_mm_lru_insertion(page, lru); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 4447496..bee5349 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -769,64 +769,7 @@ int remove_mapping(struct address_space *mapping, struct page *page) */ void putback_lru_page(struct page *page) { - bool is_unevictable; - int was_unevictable = PageUnevictable(page); - - VM_BUG_ON_PAGE(PageLRU(page), page); - -redo: - ClearPageUnevictable(page); - - if (page_evictable(page)) { - /* - * For evictable pages, we can use the cache. - * In event of a race, worst case is we end up with an - * unevictable page on [in]active list. - * We know how to handle that. - */ - is_unevictable = false; - lru_cache_add(page); - } else { - /* - * Put unevictable pages directly on zone's unevictable - * list. - */ - is_unevictable = true; - add_page_to_unevictable_list(page); - /* - * When racing with an mlock or AS_UNEVICTABLE clearing - * (page is unlocked) make sure that if the other thread - * does not observe our setting of PG_lru and fails - * isolation/check_move_unevictable_pages, - * we see PG_mlocked/AS_UNEVICTABLE cleared below and move - * the page back to the evictable list. - * - * The other side is TestClearPageMlocked() or shmem_lock(). - */ - smp_mb(); - } - - /* - * page's status can change while we move it among lru. If an evictable - * page is on unevictable list, it never be freed. To avoid that, - * check after we added it to the list, again. - */ - if (is_unevictable && page_evictable(page)) { - if (!isolate_lru_page(page)) { - put_page(page); - goto redo; - } - /* This means someone else dropped this page from LRU - * So, it will be freed or putback to LRU again. There is - * nothing to do here. - */ - } - - if (was_unevictable && !is_unevictable) - count_vm_event(UNEVICTABLE_PGRESCUED); - else if (!was_unevictable && is_unevictable) - count_vm_event(UNEVICTABLE_PGCULLED); - + lru_cache_add(page); put_page(page); /* drop ref from isolate */ } -- cgit v1.1 From 88913bd8ea2a75d7e460a4bed5f75e1c32660d7e Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 21 Feb 2018 14:45:32 -0800 Subject: kernel/relay.c: limit kmalloc size to KMALLOC_MAX_SIZE chan->n_subbufs is set by the user and relay_create_buf() does a kmalloc() of chan->n_subbufs * sizeof(size_t *). kmalloc_slab() will generate a warning when this fails if chan->subbufs * sizeof(size_t *) > KMALLOC_MAX_SIZE. Limit chan->n_subbufs to the maximum allowed kmalloc() size. Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1802061216100.122576@chino.kir.corp.google.com Fixes: f6302f1bcd75 ("relay: prevent integer overflow in relay_open()") Signed-off-by: David Rientjes Reviewed-by: Andrew Morton Cc: Jens Axboe Cc: Dave Jiang Cc: Al Viro Cc: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/relay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/relay.c b/kernel/relay.c index c302940..c955b10 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -163,7 +163,7 @@ static struct rchan_buf *relay_create_buf(struct rchan *chan) { struct rchan_buf *buf; - if (chan->n_subbufs > UINT_MAX / sizeof(size_t *)) + if (chan->n_subbufs > KMALLOC_MAX_SIZE / sizeof(size_t *)) return NULL; buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); -- cgit v1.1 From 2be04df5668d81f9a98e57b81bc53f72bd5f4f92 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 21 Feb 2018 14:45:35 -0800 Subject: certs/blacklist_nohashes.c: fix const confusion in certs blacklist const must be marked __initconst, not __initdata. Link: http://lkml.kernel.org/r/20171222001335.1987-1-andi@firstfloor.org Signed-off-by: Andi Kleen Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- certs/blacklist_nohashes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/certs/blacklist_nohashes.c b/certs/blacklist_nohashes.c index 73fd990..753b703 100644 --- a/certs/blacklist_nohashes.c +++ b/certs/blacklist_nohashes.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "blacklist.h" -const char __initdata *const blacklist_hashes[] = { +const char __initconst *const blacklist_hashes[] = { NULL }; -- cgit v1.1 From 7ba716698cc53f8d5367766c93c538c7da6c68ce Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 21 Feb 2018 14:45:39 -0800 Subject: mm, swap, frontswap: fix THP swap if frontswap enabled It was reported by Sergey Senozhatsky that if THP (Transparent Huge Page) and frontswap (via zswap) are both enabled, when memory goes low so that swap is triggered, segfault and memory corruption will occur in random user space applications as follow, kernel: urxvt[338]: segfault at 20 ip 00007fc08889ae0d sp 00007ffc73a7fc40 error 6 in libc-2.26.so[7fc08881a000+1ae000] #0 0x00007fc08889ae0d _int_malloc (libc.so.6) #1 0x00007fc08889c2f3 malloc (libc.so.6) #2 0x0000560e6004bff7 _Z14rxvt_wcstoutf8PKwi (urxvt) #3 0x0000560e6005e75c n/a (urxvt) #4 0x0000560e6007d9f1 _ZN16rxvt_perl_interp6invokeEP9rxvt_term9hook_typez (urxvt) #5 0x0000560e6003d988 _ZN9rxvt_term9cmd_parseEv (urxvt) #6 0x0000560e60042804 _ZN9rxvt_term6pty_cbERN2ev2ioEi (urxvt) #7 0x0000560e6005c10f _Z17ev_invoke_pendingv (urxvt) #8 0x0000560e6005cb55 ev_run (urxvt) #9 0x0000560e6003b9b9 main (urxvt) #10 0x00007fc08883af4a __libc_start_main (libc.so.6) #11 0x0000560e6003f9da _start (urxvt) After bisection, it was found the first bad commit is bd4c82c22c36 ("mm, THP, swap: delay splitting THP after swapped out"). The root cause is as follows: When the pages are written to swap device during swapping out in swap_writepage(), zswap (fontswap) is tried to compress the pages to improve performance. But zswap (frontswap) will treat THP as a normal page, so only the head page is saved. After swapping in, tail pages will not be restored to their original contents, causing memory corruption in the applications. This is fixed by refusing to save page in the frontswap store functions if the page is a THP. So that the THP will be swapped out to swap device. Another choice is to split THP if frontswap is enabled. But it is found that the frontswap enabling isn't flexible. For example, if CONFIG_ZSWAP=y (cannot be module), frontswap will be enabled even if zswap itself isn't enabled. Frontswap has multiple backends, to make it easy for one backend to enable THP support, the THP checking is put in backend frontswap store functions instead of the general interfaces. Link: http://lkml.kernel.org/r/20180209084947.22749-1-ying.huang@intel.com Fixes: bd4c82c22c367e068 ("mm, THP, swap: delay splitting THP after swapped out") Signed-off-by: "Huang, Ying" Reported-by: Sergey Senozhatsky Tested-by: Sergey Senozhatsky Suggested-by: Minchan Kim [put THP checking in backend] Cc: Konrad Rzeszutek Wilk Cc: Dan Streetman Cc: Seth Jennings Cc: Tetsuo Handa Cc: Shaohua Li Cc: Michal Hocko Cc: Johannes Weiner Cc: Mel Gorman Cc: Shakeel Butt Cc: Boris Ostrovsky Cc: Juergen Gross Cc: [4.14] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/xen/tmem.c | 4 ++++ mm/zswap.c | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index bf13d1e..04e7b3b 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -284,6 +284,10 @@ static int tmem_frontswap_store(unsigned type, pgoff_t offset, int pool = tmem_frontswap_poolid; int ret; + /* THP isn't supported */ + if (PageTransHuge(page)) + return -1; + if (pool < 0) return -1; if (ind64 != ind) diff --git a/mm/zswap.c b/mm/zswap.c index c004aa4..61a5c41 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1007,6 +1007,12 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, u8 *src, *dst; struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) }; + /* THP isn't supported */ + if (PageTransHuge(page)) { + ret = -EINVAL; + goto reject; + } + if (!zswap_enabled || !tree) { ret = -ENODEV; goto reject; -- cgit v1.1 From b1a8a7a70043400d1e685899548c92b92f640d71 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 21 Feb 2018 14:45:43 -0800 Subject: ida: do zeroing in ida_pre_get() As far as I can tell, the only place the per-cpu ida_bitmap is populated is in ida_pre_get. The pre-allocated element is stolen in two places in ida_get_new_above, in both cases immediately followed by a memset(0). Since ida_get_new_above is called with locks held, do the zeroing in ida_pre_get, or rather let kmalloc() do it. Also, apparently gcc generates ~44 bytes of code to do a memset(, 0, 128): $ scripts/bloat-o-meter vmlinux.{0,1} add/remove: 0/0 grow/shrink: 2/1 up/down: 5/-88 (-83) Function old new delta ida_pre_get 115 119 +4 vermagic 27 28 +1 ida_get_new_above 715 627 -88 Link: http://lkml.kernel.org/r/20180108225634.15340-1-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Acked-by: Matthew Wilcox Cc: Eric Biggers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 2 -- lib/radix-tree.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/idr.c b/lib/idr.c index c98d77f..99ec5bc 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -431,7 +431,6 @@ int ida_get_new_above(struct ida *ida, int start, int *id) bitmap = this_cpu_xchg(ida_bitmap, NULL); if (!bitmap) return -EAGAIN; - memset(bitmap, 0, sizeof(*bitmap)); bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT; rcu_assign_pointer(*slot, bitmap); } @@ -464,7 +463,6 @@ int ida_get_new_above(struct ida *ida, int start, int *id) bitmap = this_cpu_xchg(ida_bitmap, NULL); if (!bitmap) return -EAGAIN; - memset(bitmap, 0, sizeof(*bitmap)); __set_bit(bit, bitmap->bitmap); radix_tree_iter_replace(root, &iter, slot, bitmap); } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 0a7ae32..8e00138 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2125,7 +2125,7 @@ int ida_pre_get(struct ida *ida, gfp_t gfp) preempt_enable(); if (!this_cpu_read(ida_bitmap)) { - struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp); + struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp); if (!bitmap) return 0; if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap)) -- cgit v1.1 From 14fec9eba43b05d39825128e4354a2dc50fb59ea Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 21 Feb 2018 14:45:46 -0800 Subject: mm/zpool.c: zpool_evictable: fix mismatch in parameter name and kernel-doc [akpm@linux-foundation.org: add colon, per Randy] Link: http://lkml.kernel.org/r/1518116984-21141-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Reviewed-by: Andrew Morton Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/zpool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/zpool.c b/mm/zpool.c index f8cb83e..01a771e 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -360,7 +360,7 @@ u64 zpool_get_total_size(struct zpool *zpool) /** * zpool_evictable() - Test if zpool is potentially evictable - * @pool The zpool to test + * @zpool: The zpool to test * * Zpool is only potentially evictable when it's created with struct * zpool_ops.evict and its driver implements struct zpool_driver.shrink. -- cgit v1.1 From cb6f0f34802dd7148d930f4f8d1cce991b8c23be Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 21 Feb 2018 14:45:50 -0800 Subject: mm/swap.c: make functions and their kernel-doc agree (again) There was a conflict between the commit e02a9f048ef7 ("mm/swap.c: make functions and their kernel-doc agree") and the commit f144c390f905 ("mm: docs: fix parameter names mismatch") that both tried to fix mismatch betweeen pagevec_lookup_entries() parameter names and their description. Since nr_entries is a better name for the parameter, fix the description again. Link: http://lkml.kernel.org/r/1518116946-20947-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Acked-by: Randy Dunlap Reviewed-by: Andrew Morton Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/swap.c b/mm/swap.c index 2d33771..0f17330 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -925,7 +925,7 @@ EXPORT_SYMBOL(__pagevec_lru_add); * @pvec: Where the resulting entries are placed * @mapping: The address_space to search * @start: The starting entry index - * @nr_pages: The maximum number of pages + * @nr_entries: The maximum number of pages * @indices: The cache indices corresponding to the entries in @pvec * * pagevec_lookup_entries() will search for and return a group of up -- cgit v1.1 From 173a3efd3edb2ef6ef07471397c5f542a360e9c1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Feb 2018 14:45:54 -0800 Subject: bug.h: work around GCC PR82365 in BUG() Looking at functions with large stack frames across all architectures led me discovering that BUG() suffers from the same problem as fortify_panic(), which I've added a workaround for already. In short, variables that go out of scope by calling a noreturn function or __builtin_unreachable() keep using stack space in functions afterwards. A workaround that was identified is to insert an empty assembler statement just before calling the function that doesn't return. I'm adding a macro "barrier_before_unreachable()" to document this, and insert calls to that in all instances of BUG() that currently suffer from this problem. The files that saw the largest change from this had these frame sizes before, and much less with my patch: fs/ext4/inode.c:82:1: warning: the frame size of 1672 bytes is larger than 800 bytes [-Wframe-larger-than=] fs/ext4/namei.c:434:1: warning: the frame size of 904 bytes is larger than 800 bytes [-Wframe-larger-than=] fs/ext4/super.c:2279:1: warning: the frame size of 1160 bytes is larger than 800 bytes [-Wframe-larger-than=] fs/ext4/xattr.c:146:1: warning: the frame size of 1168 bytes is larger than 800 bytes [-Wframe-larger-than=] fs/f2fs/inode.c:152:1: warning: the frame size of 1424 bytes is larger than 800 bytes [-Wframe-larger-than=] net/netfilter/ipvs/ip_vs_core.c:1195:1: warning: the frame size of 1068 bytes is larger than 800 bytes [-Wframe-larger-than=] net/netfilter/ipvs/ip_vs_core.c:395:1: warning: the frame size of 1084 bytes is larger than 800 bytes [-Wframe-larger-than=] net/netfilter/ipvs/ip_vs_ftp.c:298:1: warning: the frame size of 928 bytes is larger than 800 bytes [-Wframe-larger-than=] net/netfilter/ipvs/ip_vs_ftp.c:418:1: warning: the frame size of 908 bytes is larger than 800 bytes [-Wframe-larger-than=] net/netfilter/ipvs/ip_vs_lblcr.c:718:1: warning: the frame size of 960 bytes is larger than 800 bytes [-Wframe-larger-than=] drivers/net/xen-netback/netback.c:1500:1: warning: the frame size of 1088 bytes is larger than 800 bytes [-Wframe-larger-than=] In case of ARC and CRIS, it turns out that the BUG() implementation actually does return (or at least the compiler thinks it does), resulting in lots of warnings about uninitialized variable use and leaving noreturn functions, such as: block/cfq-iosched.c: In function 'cfq_async_queue_prio': block/cfq-iosched.c:3804:1: error: control reaches end of non-void function [-Werror=return-type] include/linux/dmaengine.h: In function 'dma_maxpq': include/linux/dmaengine.h:1123:1: error: control reaches end of non-void function [-Werror=return-type] This makes them call __builtin_trap() instead, which should normally dump the stack and kill the current process, like some of the other architectures already do. I tried adding barrier_before_unreachable() to panic() and fortify_panic() as well, but that had very little effect, so I'm not submitting that patch. Vineet said: : For ARC, it is double win. : : 1. Fixes 3 -Wreturn-type warnings : : | ../net/core/ethtool.c:311:1: warning: control reaches end of non-void function : [-Wreturn-type] : | ../kernel/sched/core.c:3246:1: warning: control reaches end of non-void function : [-Wreturn-type] : | ../include/linux/sunrpc/svc_xprt.h:180:1: warning: control reaches end of : non-void function [-Wreturn-type] : : 2. bloat-o-meter reports code size improvements as gcc elides the : generated code for stack return. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365 Link: http://lkml.kernel.org/r/20171219114112.939391-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Vineet Gupta [arch/arc] Tested-by: Vineet Gupta [arch/arc] Cc: Mikael Starvik Cc: Jesper Nilsson Cc: Tony Luck Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: "David S. Miller" Cc: Christopher Li Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Kees Cook Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Will Deacon Cc: "Steven Rostedt (VMware)" Cc: Mark Rutland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arc/include/asm/bug.h | 3 ++- arch/cris/include/arch-v10/arch/bug.h | 11 +++++++++-- arch/ia64/include/asm/bug.h | 6 +++++- arch/m68k/include/asm/bug.h | 3 +++ arch/sparc/include/asm/bug.h | 6 +++++- include/asm-generic/bug.h | 1 + include/linux/compiler-gcc.h | 15 ++++++++++++++- include/linux/compiler.h | 5 +++++ 8 files changed, 44 insertions(+), 6 deletions(-) diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h index ea022d4..21ec824 100644 --- a/arch/arc/include/asm/bug.h +++ b/arch/arc/include/asm/bug.h @@ -23,7 +23,8 @@ void die(const char *str, struct pt_regs *regs, unsigned long address); #define BUG() do { \ pr_warn("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \ - dump_stack(); \ + barrier_before_unreachable(); \ + __builtin_trap(); \ } while (0) #define HAVE_ARCH_BUG diff --git a/arch/cris/include/arch-v10/arch/bug.h b/arch/cris/include/arch-v10/arch/bug.h index 905afea..06da9d4 100644 --- a/arch/cris/include/arch-v10/arch/bug.h +++ b/arch/cris/include/arch-v10/arch/bug.h @@ -44,18 +44,25 @@ struct bug_frame { * not be used like this with newer versions of gcc. */ #define BUG() \ +do { \ __asm__ __volatile__ ("clear.d [" __stringify(BUG_MAGIC) "]\n\t"\ "movu.w " __stringify(__LINE__) ",$r0\n\t"\ "jump 0f\n\t" \ ".section .rodata\n" \ "0:\t.string \"" __FILE__ "\"\n\t" \ - ".previous") + ".previous"); \ + unreachable(); \ +} while (0) #endif #else /* This just causes an oops. */ -#define BUG() (*(int *)0 = 0) +#define BUG() \ +do { \ + barrier_before_unreachable(); \ + __builtin_trap(); \ +} while (0) #endif diff --git a/arch/ia64/include/asm/bug.h b/arch/ia64/include/asm/bug.h index bd3eeb8..66b37a5 100644 --- a/arch/ia64/include/asm/bug.h +++ b/arch/ia64/include/asm/bug.h @@ -4,7 +4,11 @@ #ifdef CONFIG_BUG #define ia64_abort() __builtin_trap() -#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0) +#define BUG() do { \ + printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + barrier_before_unreachable(); \ + ia64_abort(); \ +} while (0) /* should this BUG be made generic? */ #define HAVE_ARCH_BUG diff --git a/arch/m68k/include/asm/bug.h b/arch/m68k/include/asm/bug.h index b7e2bf1..275dca1 100644 --- a/arch/m68k/include/asm/bug.h +++ b/arch/m68k/include/asm/bug.h @@ -8,16 +8,19 @@ #ifndef CONFIG_SUN3 #define BUG() do { \ pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + barrier_before_unreachable(); \ __builtin_trap(); \ } while (0) #else #define BUG() do { \ pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + barrier_before_unreachable(); \ panic("BUG!"); \ } while (0) #endif #else #define BUG() do { \ + barrier_before_unreachable(); \ __builtin_trap(); \ } while (0) #endif diff --git a/arch/sparc/include/asm/bug.h b/arch/sparc/include/asm/bug.h index 6f17528..ea53e41 100644 --- a/arch/sparc/include/asm/bug.h +++ b/arch/sparc/include/asm/bug.h @@ -9,10 +9,14 @@ void do_BUG(const char *file, int line); #define BUG() do { \ do_BUG(__FILE__, __LINE__); \ + barrier_before_unreachable(); \ __builtin_trap(); \ } while (0) #else -#define BUG() __builtin_trap() +#define BUG() do { \ + barrier_before_unreachable(); \ + __builtin_trap(); \ +} while (0) #endif #define HAVE_ARCH_BUG diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 963b755..a7613e1 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -52,6 +52,7 @@ struct bug_entry { #ifndef HAVE_ARCH_BUG #define BUG() do { \ printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \ + barrier_before_unreachable(); \ panic("BUG!"); \ } while (0) #endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 73bc63e..901c1cc 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -208,6 +208,15 @@ #endif /* + * calling noreturn functions, __builtin_unreachable() and __builtin_trap() + * confuse the stack allocation in gcc, leading to overly large stack + * frames, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365 + * + * Adding an empty inline assembly before it works around the problem + */ +#define barrier_before_unreachable() asm volatile("") + +/* * Mark a position in code as unreachable. This can be used to * suppress control flow warnings after asm blocks that transfer * control elsewhere. @@ -217,7 +226,11 @@ * unreleased. Really, we need to have autoconf for the kernel. */ #define unreachable() \ - do { annotate_unreachable(); __builtin_unreachable(); } while (0) + do { \ + annotate_unreachable(); \ + barrier_before_unreachable(); \ + __builtin_unreachable(); \ + } while (0) /* Mark a function definition as prohibited from being cloned. */ #define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index e835fc0..ab4711c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -86,6 +86,11 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, # define barrier_data(ptr) barrier() #endif +/* workaround for GCC PR82365 if needed */ +#ifndef barrier_before_unreachable +# define barrier_before_unreachable() do { } while (0) +#endif + /* Unreachable code */ #ifdef CONFIG_STACK_VALIDATION /* -- cgit v1.1 From bdefe01a6b14bde268741435ac854fda4ef7e847 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 21 Feb 2018 14:45:58 -0800 Subject: selftests/memfd: add run_fuse_test.sh to TEST_FILES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While testing memfd tests, there is a missing script, as reported by kselftest: ./run_tests.sh: line 7: ./run_fuse_test.sh: No such file or directory Link: http://lkml.kernel.org/r/1517955779-11386-1-git-send-email-daniel.diaz@linaro.org Signed-off-by: Anders Roxell Signed-off-by: Daniel Díaz Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/memfd/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile index a5276a9..0862e6f 100644 --- a/tools/testing/selftests/memfd/Makefile +++ b/tools/testing/selftests/memfd/Makefile @@ -5,6 +5,7 @@ CFLAGS += -I../../../../include/ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_tests.sh +TEST_FILES := run_fuse_test.sh TEST_GEN_FILES := memfd_test fuse_mnt fuse_test fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags) -- cgit v1.1 From 698d0831ba87b92ae10b15e8203cfd59f5a59a35 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 21 Feb 2018 14:46:01 -0800 Subject: vmalloc: fix __GFP_HIGHMEM usage for vmalloc_32 on 32b systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kai Heng Feng has noticed that BUG_ON(PageHighMem(pg)) triggers in drivers/media/common/saa7146/saa7146_core.c since 19809c2da28a ("mm, vmalloc: use __GFP_HIGHMEM implicitly"). saa7146_vmalloc_build_pgtable uses vmalloc_32 and it is reasonable to expect that the resulting page is not in highmem. The above commit aimed to add __GFP_HIGHMEM only for those requests which do not specify any zone modifier gfp flag. vmalloc_32 relies on GFP_VMALLOC32 which should do the right thing. Except it has been missed that GFP_VMALLOC32 is an alias for GFP_KERNEL on 32b architectures. Thanks to Matthew to notice this. Fix the problem by unconditionally setting GFP_DMA32 in GFP_VMALLOC32 for !64b arches (as a bailout). This should do the right thing and use ZONE_NORMAL which should be always below 4G on 32b systems. Debugged by Matthew Wilcox. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20180212095019.GX21609@dhcp22.suse.cz Fixes: 19809c2da28a ("mm, vmalloc: use __GFP_HIGHMEM implicitly”) Signed-off-by: Michal Hocko Reported-by: Kai Heng Feng Cc: Matthew Wilcox Cc: Laura Abbott Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6739420..ebff729 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1943,11 +1943,15 @@ void *vmalloc_exec(unsigned long size) } #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32) -#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL +#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL) #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA) -#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL +#define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL) #else -#define GFP_VMALLOC32 GFP_KERNEL +/* + * 64b systems should always have either DMA or DMA32 zones. For others + * GFP_DMA32 should do the right thing and use the normal zone. + */ +#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL #endif /** -- cgit v1.1 From 908009e832b4e58796ed95d4544e3210bc0ff2c4 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 21 Feb 2018 14:46:05 -0800 Subject: lib/Kconfig.debug: enable RUNTIME_TESTING_MENU Commit d3deafaa8b5c ("lib/: make RUNTIME_TESTS a menuconfig to ease disabling it all") causes a regression when using runtime tests due to it defaults RUNTIME_TESTING_MENU to not set. Link: http://lkml.kernel.org/r/20180214133015.10090-1-anders.roxell@linaro.org Fixes: d3deafaa8b5c ("lib/: make RUNTIME_TESTS a menuconfig to easedisabling it all") Signed-off-by: Anders Roxell Cc: Vincent Legoll Cc: Ingo Molnar Cc: Byungchul Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6088408..64155e3 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1642,6 +1642,7 @@ config DMA_API_DEBUG menuconfig RUNTIME_TESTING_MENU bool "Runtime Testing" + def_bool y if RUNTIME_TESTING_MENU -- cgit v1.1 From 895f7b8e90200cf1a5dc313329369adf30e51f9a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 21 Feb 2018 14:46:09 -0800 Subject: mm: don't defer struct page initialization for Xen pv guests Commit f7f99100d8d9 ("mm: stop zeroing memory during allocation in vmemmap") broke Xen pv domains in some configurations, as the "Pinned" information in struct page of early page tables could get lost. This will lead to the kernel trying to write directly into the page tables instead of asking the hypervisor to do so. The result is a crash like the following: BUG: unable to handle kernel paging request at ffff8801ead19008 IP: xen_set_pud+0x4e/0xd0 PGD 1c0a067 P4D 1c0a067 PUD 23a0067 PMD 1e9de0067 PTE 80100001ead19065 Oops: 0003 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.14.0-default+ #271 Hardware name: Dell Inc. Latitude E6440/0159N7, BIOS A07 06/26/2014 task: ffffffff81c10480 task.stack: ffffffff81c00000 RIP: e030:xen_set_pud+0x4e/0xd0 Call Trace: __pmd_alloc+0x128/0x140 ioremap_page_range+0x3f4/0x410 __ioremap_caller+0x1c3/0x2e0 acpi_os_map_iomem+0x175/0x1b0 acpi_tb_acquire_table+0x39/0x66 acpi_tb_validate_table+0x44/0x7c acpi_tb_verify_temp_table+0x45/0x304 acpi_reallocate_root_table+0x12d/0x141 acpi_early_init+0x4d/0x10a start_kernel+0x3eb/0x4a1 xen_start_kernel+0x528/0x532 Code: 48 01 e8 48 0f 42 15 a2 fd be 00 48 01 d0 48 ba 00 00 00 00 00 ea ff ff 48 c1 e8 0c 48 c1 e0 06 48 01 d0 48 8b 00 f6 c4 02 75 5d <4c> 89 65 00 5b 5d 41 5c c3 65 8b 05 52 9f fe 7e 89 c0 48 0f a3 RIP: xen_set_pud+0x4e/0xd0 RSP: ffffffff81c03cd8 CR2: ffff8801ead19008 ---[ end trace 38eca2e56f1b642e ]--- Avoid this problem by not deferring struct page initialization when running as Xen pv guest. Pavel said: : This is unique for Xen, so this particular issue won't effect other : configurations. I am going to investigate if there is a way to : re-enable deferred page initialization on xen guests. [akpm@linux-foundation.org: explicitly include xen.h] Link: http://lkml.kernel.org/r/20180216154101.22865-1-jgross@suse.com Fixes: f7f99100d8d95d ("mm: stop zeroing memory during allocation in vmemmap") Signed-off-by: Juergen Gross Reviewed-by: Pavel Tatashin Cc: Steven Sistare Cc: Daniel Jordan Cc: Bob Picco Cc: [4.15.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 81e18ce..cb41672 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -347,6 +348,9 @@ static inline bool update_defer_init(pg_data_t *pgdat, /* Always populate low zones for address-constrained allocations */ if (zone_end < pgdat_end_pfn(pgdat)) return true; + /* Xen PV domains need page structures early */ + if (xen_pv_domain()) + return true; (*nr_initialised)++; if ((*nr_initialised > pgdat->static_init_pgcnt) && (pfn & (PAGES_PER_SECTION - 1)) == 0) { -- cgit v1.1 From a7dcdf6ea1b264ee7655a8cafe844f06eed3906a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Feb 2018 23:07:33 +0100 Subject: bpf: clean up unused-variable warning The only user of this variable is inside of an #ifdef, causing a warning without CONFIG_INET: net/core/filter.c: In function '____bpf_sock_ops_cb_flags_set': net/core/filter.c:3382:6: error: unused variable 'val' [-Werror=unused-variable] int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS; This replaces the #ifdef with a nicer IS_ENABLED() check that makes the code more readable and avoids the warning. Fixes: b13d88072172 ("bpf: Adds field bpf_sock_ops_cb_flags to tcp_sock") Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Borkmann --- net/core/filter.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 08ab4c6..0c121ad 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3381,17 +3381,13 @@ BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock, struct sock *sk = bpf_sock->sk; int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS; - if (!sk_fullsock(sk)) + if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk)) return -EINVAL; -#ifdef CONFIG_INET if (val) tcp_sk(sk)->bpf_sock_ops_cb_flags = val; return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS); -#else - return -EINVAL; -#endif } static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = { -- cgit v1.1 From b52db43a3d2e34b4ef2bb563d95227bb755027df Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 21 Feb 2018 17:51:16 +0100 Subject: selftests/bpf: tcpbpf_kern: use in6_* macros from glibc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both glibc and the kernel have in6_* macros definitions. Build fails because it picks up wrong in6_* macro from the kernel header and not the header from glibc. Fixes build error below: clang -I. -I./include/uapi -I../../../include/uapi -Wno-compare-distinct-pointer-types \ -O2 -target bpf -emit-llvm -c test_tcpbpf_kern.c -o - | \ llc -march=bpf -mcpu=generic -filetype=obj -o .../tools/testing/selftests/bpf/test_tcpbpf_kern.o In file included from test_tcpbpf_kern.c:12: .../netinet/in.h:101:5: error: expected identifier IPPROTO_HOPOPTS = 0, /* IPv6 Hop-by-Hop options. */ ^ .../linux/in6.h:131:26: note: expanded from macro 'IPPROTO_HOPOPTS' ^ In file included from test_tcpbpf_kern.c:12: /usr/include/netinet/in.h:103:5: error: expected identifier IPPROTO_ROUTING = 43, /* IPv6 routing header. */ ^ .../linux/in6.h:132:26: note: expanded from macro 'IPPROTO_ROUTING' ^ In file included from test_tcpbpf_kern.c:12: .../netinet/in.h:105:5: error: expected identifier IPPROTO_FRAGMENT = 44, /* IPv6 fragmentation header. */ ^ Since both glibc and the kernel have in6_* macros definitions, use the one from glibc. Kernel headers will check for previous libc definitions by including include/linux/libc-compat.h. Reported-by: Daniel Díaz Signed-off-by: Anders Roxell Tested-by: Daniel Díaz Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_tcpbpf_kern.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c index 57119ad..3e645ee 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include -- cgit v1.1 From 31a8260d3e34aaddf821388b8e0d589f44401f75 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 21 Feb 2018 22:30:01 +0100 Subject: selftests/bpf: update gitignore with test_libbpf_open MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bpf builds a test program for loading BPF ELF files. Add the executable to the .gitignore list. Signed-off-by: Anders Roxell Tested-by: Daniel Díaz Acked-by: David S. Miller Acked-by: Shuah Khan Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index cc15af2..9cf83f8 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -11,3 +11,4 @@ test_progs test_tcpbpf_user test_verifier_log feature +test_libbpf_open -- cgit v1.1 From 2a040f9f39d3b020c79e08dec26d12a7ce131c10 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Tue, 20 Feb 2018 19:47:45 -0700 Subject: seccomp, ptrace: switch get_metadata types to arch independent Commit 26500475ac1b ("ptrace, seccomp: add support for retrieving seccomp metadata") introduced `struct seccomp_metadata`, which contained unsigned longs that should be arch independent. The type of the flags member was chosen to match the corresponding argument to seccomp(), and so we need something at least as big as unsigned long. My understanding is that __u64 should fit the bill, so let's switch both types to that. While this is userspace facing, it was only introduced in 4.16-rc2, and so should be safe assuming it goes in before then. Reported-by: "Dmitry V. Levin" Signed-off-by: Tycho Andersen CC: Kees Cook CC: Oleg Nesterov Reviewed-by: "Dmitry V. Levin" Signed-off-by: Kees Cook --- include/uapi/linux/ptrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index e46d82b..d5a1b8a 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -69,8 +69,8 @@ struct ptrace_peeksiginfo_args { #define PTRACE_SECCOMP_GET_METADATA 0x420d struct seccomp_metadata { - unsigned long filter_off; /* Input: which filter */ - unsigned int flags; /* Output: filter's flags */ + __u64 filter_off; /* Input: which filter */ + __u64 flags; /* Output: filter's flags */ }; /* Read signals from a shared (process wide) queue */ -- cgit v1.1 From 63bb0045b98ae821e56e27c2250e14bb0ae663e5 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Tue, 20 Feb 2018 19:47:46 -0700 Subject: ptrace, seccomp: tweak get_metadata behavior slightly Previously if users passed a small size for the input structure size, they would get get odd behavior. It doesn't make sense to pass a structure smaller than at least filter_off size, so let's just give -EINVAL in this case. This changes userspace visible behavior, but was only introduced in commit 26500475ac1b ("ptrace, seccomp: add support for retrieving seccomp metadata") in 4.16-rc2, so should be safe to change if merged before then. Reported-by: Eugene Syromiatnikov Signed-off-by: Tycho Andersen CC: Kees Cook CC: Oleg Nesterov Signed-off-by: Kees Cook --- kernel/seccomp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 61bd9dc..1245b23 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1076,14 +1076,16 @@ long seccomp_get_metadata(struct task_struct *task, size = min_t(unsigned long, size, sizeof(kmd)); - if (copy_from_user(&kmd, data, size)) + if (size < sizeof(kmd.filter_off)) + return -EINVAL; + + if (copy_from_user(&kmd.filter_off, data, sizeof(kmd.filter_off))) return -EFAULT; filter = get_nth_filter(task, kmd.filter_off); if (IS_ERR(filter)) return PTR_ERR(filter); - memset(&kmd, 0, sizeof(kmd)); if (filter->log) kmd.flags |= SECCOMP_FILTER_FLAG_LOG; -- cgit v1.1 From d057dc4e35e16050befa3dda943876dab39cbf80 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Tue, 20 Feb 2018 19:47:47 -0700 Subject: seccomp: add a selftest for get_metadata Let's test that we get the flags correctly, and that we preserve the filter index across the ptrace(PTRACE_SECCOMP_GET_METADATA) correctly. Signed-off-by: Tycho Andersen CC: Kees Cook Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 61 +++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 24dbf63..92db488 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -141,6 +141,15 @@ struct seccomp_data { #define SECCOMP_FILTER_FLAG_LOG 2 #endif +#ifndef PTRACE_SECCOMP_GET_METADATA +#define PTRACE_SECCOMP_GET_METADATA 0x420d + +struct seccomp_metadata { + __u64 filter_off; /* Input: which filter */ + __u64 flags; /* Output: filter's flags */ +}; +#endif + #ifndef seccomp int seccomp(unsigned int op, unsigned int flags, void *args) { @@ -2845,6 +2854,58 @@ TEST(get_action_avail) EXPECT_EQ(errno, EOPNOTSUPP); } +TEST(get_metadata) +{ + pid_t pid; + int pipefd[2]; + char buf; + struct seccomp_metadata md; + + ASSERT_EQ(0, pipe(pipefd)); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) { + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + /* one with log, one without */ + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, + SECCOMP_FILTER_FLAG_LOG, &prog)); + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); + + ASSERT_EQ(0, close(pipefd[0])); + ASSERT_EQ(1, write(pipefd[1], "1", 1)); + ASSERT_EQ(0, close(pipefd[1])); + + while (1) + sleep(100); + } + + ASSERT_EQ(0, close(pipefd[1])); + ASSERT_EQ(1, read(pipefd[0], &buf, 1)); + + ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid)); + ASSERT_EQ(pid, waitpid(pid, NULL, 0)); + + md.filter_off = 0; + ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md)); + EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); + EXPECT_EQ(md.filter_off, 0); + + md.filter_off = 1; + ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md)); + EXPECT_EQ(md.flags, 0); + EXPECT_EQ(md.filter_off, 1); + + ASSERT_EQ(0, kill(pid, SIGKILL)); +} + /* * TODO: * - add microbenchmarks -- cgit v1.1 From 7801c545e706674aeed40256eb806ad37b18ad71 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 7 Jan 2018 14:49:05 +0100 Subject: soc: imx: gpc: de-register power domains only if initialized If power domain information are missing in the device tree, no power domains get initialized. However, imx_gpc_remove tries to remove power domains always in the old DT binding case. Only remove power domains when imx_gpc_probe initialized them in first place. Fixes: 721cabf6c660 ("soc: imx: move PGC handling to a new GPC driver") Signed-off-by: Stefan Agner Reviewed-by: Lucas Stach Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- drivers/soc/imx/gpc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c index 53f7275..62bb724 100644 --- a/drivers/soc/imx/gpc.c +++ b/drivers/soc/imx/gpc.c @@ -470,13 +470,21 @@ static int imx_gpc_probe(struct platform_device *pdev) static int imx_gpc_remove(struct platform_device *pdev) { + struct device_node *pgc_node; int ret; + pgc_node = of_get_child_by_name(pdev->dev.of_node, "pgc"); + + /* bail out if DT too old and doesn't provide the necessary info */ + if (!of_property_read_bool(pdev->dev.of_node, "#power-domain-cells") && + !pgc_node) + return 0; + /* * If the old DT binding is used the toplevel driver needs to * de-register the power domains */ - if (!of_get_child_by_name(pdev->dev.of_node, "pgc")) { + if (!pgc_node) { of_genpd_del_provider(pdev->dev.of_node); ret = pm_genpd_remove(&imx_gpc_domains[GPC_PGC_DOMAIN_PU].base); -- cgit v1.1 From 9cfad4a5f4f795715c8657fb7dc22574a6046327 Mon Sep 17 00:00:00 2001 From: "Michael Kelley (EOSG)" Date: Wed, 24 Jan 2018 22:14:08 +0000 Subject: scsi: storvsc: Spread interrupts when picking a channel for I/O requests Update the algorithm in storvsc_do_io to look for a channel starting with the current CPU + 1 and wrap around (within the current NUMA node). This spreads VMbus interrupts more evenly across CPUs. Previous code always started with first CPU in the current NUMA node, skewing the interrupt load to that CPU. Signed-off-by: Michael Kelley Reviewed-by: Long Li Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 8eadb30..6205107 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1310,7 +1310,8 @@ static int storvsc_do_io(struct hv_device *device, */ cpumask_and(&alloced_mask, &stor_device->alloced_cpus, cpumask_of_node(cpu_to_node(q_num))); - for_each_cpu(tgt_cpu, &alloced_mask) { + for_each_cpu_wrap(tgt_cpu, &alloced_mask, + outgoing_channel->target_cpu + 1) { if (tgt_cpu != outgoing_channel->target_cpu) { outgoing_channel = stor_device->stor_chns[tgt_cpu]; -- cgit v1.1 From 5539d31a04b3b9ac5f55edb766f1d21de683fad1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 21 Feb 2018 22:54:37 +1100 Subject: powerpc/pseries: Fix duplicate firmware feature for DRC_INFO We had a mid-air collision between two new firmware features, DRMEM_V2 and DRC_INFO, and they ended up with the same value. No one's actually reported any problems, presumably because the new firmware that supports both properties is not widely available, and the two properties tend to be enabled together. Still if we ever had one enabled but not the other, the bugs that could result are many and varied. So fix it. Fixes: 3f38000eda48 ("powerpc/firmware: Add definitions for new drc-info firmware feature") Signed-off-by: Michael Ellerman Reviewed-by: Tyrel Datwyler --- arch/powerpc/include/asm/firmware.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 511acfd..535add3 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -52,7 +52,7 @@ #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000) #define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000) #define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000) -#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000400000000) +#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000800000000) #ifndef __ASSEMBLY__ -- cgit v1.1 From c7a3275e0f9e461bb8942132aa6914aae59e7103 Mon Sep 17 00:00:00 2001 From: Michael Bringmann Date: Tue, 13 Feb 2018 14:02:53 -0600 Subject: powerpc/pseries: Revert support for ibm,drc-info devtree property This reverts commit 02ef6dd8109b581343ebeb1c4c973513682535d6. The earlier patch tried to enable support for a new property "ibm,drc-info" on powerpc systems. Unfortunately, some errors in the associated patch set break things in some of the DLPAR operations. In particular when attempting to hot-add a new CPU or set of CPUs, the original patch failed to properly calculate the available resources, and aborted the operation. In addition, the original set missed several opportunities to compress and reuse common code. As the associated patch set was meant to provide an optimization of storage and performance of a set of device-tree properties for future systems with large amounts of resources, reverting just restores the previous behavior for existing systems. It seems unnecessary to enable this feature and introduce the consequent problems in the field that it will cause at this time, so please revert it for now until testing of the corrections are finished properly. Fixes: 02ef6dd8109b ("powerpc: Enable support for ibm,drc-info devtree property") Signed-off-by: Michael W. Bringmann Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index adf044d..d22c41c 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -874,7 +874,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .mmu = 0, .hash_ext = 0, .radix_ext = 0, - .byte22 = OV5_FEAT(OV5_DRC_INFO), + .byte22 = 0, }, /* option vector 6: IBM PAPR hints */ -- cgit v1.1 From 083b20907185b076f21c265b30fe5b5f24c03d8c Mon Sep 17 00:00:00 2001 From: Mark Lord Date: Tue, 20 Feb 2018 14:49:20 -0500 Subject: powerpc/bpf/jit: Fix 32-bit JIT for seccomp_data access I am using SECCOMP to filter syscalls on a ppc32 platform, and noticed that the JIT compiler was failing on the BPF even though the interpreter was working fine. The issue was that the compiler was missing one of the instructions used by SECCOMP, so here is a patch to enable JIT for that instruction. Fixes: eb84bab0fb38 ("ppc: Kconfig: Enable BPF JIT on ppc32") Signed-off-by: Mark Lord Acked-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/net/bpf_jit_comp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 872d1f6..a9636d8 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -327,6 +327,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len)); break; + case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */ + PPC_LWZ_OFFS(r_A, r_skb, K); + break; case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len)); break; -- cgit v1.1 From 9ff549ffb4fb4cc9a4b24d1de9dc3e68287797c4 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Fri, 16 Feb 2018 20:39:57 -0200 Subject: scsi: mpt3sas: fix oops in error handlers after shutdown/unload This patch adds checks for 'ioc->remove_host' in the SCSI error handlers, so not to access pointers/resources potentially freed in the PCI shutdown/module unload path. The error handlers may be invoked after shutdown/unload, depending on other components. This problem was observed with kexec on a system with a mpt3sas based adapter and an infiniband adapter which takes long enough to shutdown: The mpt3sas driver finished shutting down / disabled interrupt handling, thus some commands have not finished and timed out. Since the system was still running (waiting for the infiniband adapter to shutdown), the scsi error handler for task abort of mpt3sas was invoked, and hit an oops -- either in scsih_abort() because 'ioc->scsi_lookup' was NULL without commit dbec4c9040ed ("scsi: mpt3sas: lockless command submission"), or later up in scsih_host_reset() (with or without that commit), because it eventually called mpt3sas_base_get_iocstate(). After the above commit, the oops in scsih_abort() does not occur anymore (_scsih_scsi_lookup_find_by_scmd() is no longer called), but that commit is too big and out of the scope of linux-stable, where this patch might help, so still go for the changes. Also, this might help to prevent similar errors in the future, in case code changes and possibly tries to access freed stuff. Note the fix in scsih_host_reset() is still important anyway. Signed-off-by: Mauricio Faria de Oliveira Acked-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 74fca18..5ab3caf 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -2835,7 +2835,8 @@ scsih_abort(struct scsi_cmnd *scmd) _scsih_tm_display_info(ioc, scmd); sas_device_priv_data = scmd->device->hostdata; - if (!sas_device_priv_data || !sas_device_priv_data->sas_target) { + if (!sas_device_priv_data || !sas_device_priv_data->sas_target || + ioc->remove_host) { sdev_printk(KERN_INFO, scmd->device, "device been deleted! scmd(%p)\n", scmd); scmd->result = DID_NO_CONNECT << 16; @@ -2898,7 +2899,8 @@ scsih_dev_reset(struct scsi_cmnd *scmd) _scsih_tm_display_info(ioc, scmd); sas_device_priv_data = scmd->device->hostdata; - if (!sas_device_priv_data || !sas_device_priv_data->sas_target) { + if (!sas_device_priv_data || !sas_device_priv_data->sas_target || + ioc->remove_host) { sdev_printk(KERN_INFO, scmd->device, "device been deleted! scmd(%p)\n", scmd); scmd->result = DID_NO_CONNECT << 16; @@ -2961,7 +2963,8 @@ scsih_target_reset(struct scsi_cmnd *scmd) _scsih_tm_display_info(ioc, scmd); sas_device_priv_data = scmd->device->hostdata; - if (!sas_device_priv_data || !sas_device_priv_data->sas_target) { + if (!sas_device_priv_data || !sas_device_priv_data->sas_target || + ioc->remove_host) { starget_printk(KERN_INFO, starget, "target been deleted! scmd(%p)\n", scmd); scmd->result = DID_NO_CONNECT << 16; @@ -3019,7 +3022,7 @@ scsih_host_reset(struct scsi_cmnd *scmd) ioc->name, scmd); scsi_print_command(scmd); - if (ioc->is_driver_loading) { + if (ioc->is_driver_loading || ioc->remove_host) { pr_info(MPT3SAS_FMT "Blocking the host reset\n", ioc->name); r = FAILED; -- cgit v1.1 From c666d3be99c000bb889a33353e9be0fa5808d3de Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Fri, 16 Feb 2018 20:39:58 -0200 Subject: scsi: mpt3sas: wait for and flush running commands on shutdown/unload This patch finishes all outstanding SCSI IO commands (but not other commands, e.g., task management) in the shutdown and unload paths. It first waits for the commands to complete (this is done after setting 'ioc->remove_host = 1 ', which prevents new commands to be queued) then it flushes commands that might still be running. This avoids triggering error handling (e.g., abort command) for all commands possibly completed by the adapter after interrupts disabled. [mauricfo: introduced something in commit message.] Signed-off-by: Sreekanth Reddy Tested-by: Mauricio Faria de Oliveira Signed-off-by: Mauricio Faria de Oliveira Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 8 ++++---- drivers/scsi/mpt3sas/mpt3sas_base.h | 3 +++ drivers/scsi/mpt3sas/mpt3sas_scsih.c | 10 +++++++++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 59a87ca..0aafbfd 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -6297,14 +6297,14 @@ _base_reset_handler(struct MPT3SAS_ADAPTER *ioc, int reset_phase) } /** - * _wait_for_commands_to_complete - reset controller + * mpt3sas_wait_for_commands_to_complete - reset controller * @ioc: Pointer to MPT_ADAPTER structure * * This function is waiting 10s for all pending commands to complete * prior to putting controller in reset. */ -static void -_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc) +void +mpt3sas_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc) { u32 ioc_state; @@ -6377,7 +6377,7 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc, is_fault = 1; } _base_reset_handler(ioc, MPT3_IOC_PRE_RESET); - _wait_for_commands_to_complete(ioc); + mpt3sas_wait_for_commands_to_complete(ioc); _base_mask_interrupts(ioc); r = _base_make_ioc_ready(ioc, type); if (r) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 789bc42..99ccf83 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -1433,6 +1433,9 @@ void mpt3sas_base_update_missing_delay(struct MPT3SAS_ADAPTER *ioc, int mpt3sas_port_enable(struct MPT3SAS_ADAPTER *ioc); +void +mpt3sas_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc); + /* scsih shared API */ struct scsi_cmnd *mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc, diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 5ab3caf..c2ea13c7 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -4456,7 +4456,7 @@ _scsih_flush_running_cmds(struct MPT3SAS_ADAPTER *ioc) st = scsi_cmd_priv(scmd); mpt3sas_base_clear_st(ioc, st); scsi_dma_unmap(scmd); - if (ioc->pci_error_recovery) + if (ioc->pci_error_recovery || ioc->remove_host) scmd->result = DID_NO_CONNECT << 16; else scmd->result = DID_RESET << 16; @@ -9742,6 +9742,10 @@ static void scsih_remove(struct pci_dev *pdev) unsigned long flags; ioc->remove_host = 1; + + mpt3sas_wait_for_commands_to_complete(ioc); + _scsih_flush_running_cmds(ioc); + _scsih_fw_event_cleanup_queue(ioc); spin_lock_irqsave(&ioc->fw_event_lock, flags); @@ -9818,6 +9822,10 @@ scsih_shutdown(struct pci_dev *pdev) unsigned long flags; ioc->remove_host = 1; + + mpt3sas_wait_for_commands_to_complete(ioc); + _scsih_flush_running_cmds(ioc); + _scsih_fw_event_cleanup_queue(ioc); spin_lock_irqsave(&ioc->fw_event_lock, flags); -- cgit v1.1 From 5a1e59533380a3fd04593e4ab2d4633ebf7745c1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Feb 2018 07:24:08 -0800 Subject: nvme-fabrics: don't check for non-NULL module in nvmf_register_transport THIS_MODULE evaluates to NULL when used from code built into the kernel, thus breaking built-in transport modules. Remove the bogus check. Fixes: 0de5cd36 ("nvme-fabrics: protect against module unload during create_ctrl") Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Signed-off-by: Keith Busch --- drivers/nvme/host/fabrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5dd4cee..a1c58e3 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -493,7 +493,7 @@ EXPORT_SYMBOL_GPL(nvmf_should_reconnect); */ int nvmf_register_transport(struct nvmf_transport_ops *ops) { - if (!ops->create_ctrl || !ops->module) + if (!ops->create_ctrl) return -EINVAL; down_write(&nvmf_transports_rwsem); -- cgit v1.1 From 0d30992395b1ed0e006960de1651b44cd51be791 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Feb 2018 07:24:09 -0800 Subject: nvme-rdma: use blk_rq_payload_bytes instead of blk_rq_bytes blk_rq_bytes does the wrong thing for special payloads like discards and might cause the driver to not set up a SGL. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 2bc059f..acc9eb2 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1051,7 +1051,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; - if (!blk_rq_bytes(rq)) + if (!blk_rq_payload_bytes(rq)) return; if (req->mr) { @@ -1166,7 +1166,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, c->common.flags |= NVME_CMD_SGL_METABUF; - if (!blk_rq_bytes(rq)) + if (!blk_rq_payload_bytes(rq)) return nvme_rdma_set_sg_null(c); req->sg_table.sgl = req->first_sgl; -- cgit v1.1 From 796b0b8d8dea191d9f64e0be8ab58d8f3586bcde Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Feb 2018 07:24:10 -0800 Subject: nvmet-loop: use blk_rq_payload_bytes for sgl selection blk_rq_bytes does the wrong thing for special payloads like discards and might cause the driver to not set up a SGL. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Signed-off-by: Keith Busch --- drivers/nvme/target/loop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 7991ec3..861d150 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -184,7 +184,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } - if (blk_rq_bytes(req)) { + if (blk_rq_payload_bytes(req)) { iod->sg_table.sgl = iod->first_sgl; if (sg_alloc_table_chained(&iod->sg_table, blk_rq_nr_phys_segments(req), @@ -193,7 +193,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, iod->req.sg = iod->sg_table.sgl; iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl); - iod->req.transfer_len = blk_rq_bytes(req); + iod->req.transfer_len = blk_rq_payload_bytes(req); } blk_mq_start_request(req); -- cgit v1.1 From dc24b7b49a53c7ee5502c877b133558acec0b3f8 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 19 Feb 2018 11:27:09 +0100 Subject: s390/clean-up: use CFI_* macros in entry.S Commit f19fbd5ed642 ("s390: introduce execute-trampolines for branches") introduces .cfi_* assembler directives. Instead of using the directives directly, use the macros from asm/dwarf.h. This also ensures that the dwarf debug information are created in the .debug_frame section. Fixes: f19fbd5ed642 ("s390: introduce execute-trampolines for branches") Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 13a133a..9ec728f 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -230,7 +231,7 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) .hidden \name .type \name,@function \name: - .cfi_startproc + CFI_STARTPROC #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES exrl 0,0f #else @@ -239,7 +240,7 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) #endif j . 0: br \reg - .cfi_endproc + CFI_ENDPROC .endm GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1 -- cgit v1.1 From f97a6b6c47d2f329a24f92cc0ca3c6df5727ba73 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 6 Feb 2018 14:59:43 +0100 Subject: s390/cio: fix ccw_device_start_timeout API There are cases a device driver can't start IO because the device is currently in use by cio. In this case the device driver is notified when the device is usable again. Using ccw_device_start_timeout we would set the timeout (and change an existing timeout) before we test for internal usage. Worst case this could lead to an unexpected timer deletion. Fix this by setting the timeout after we test for internal usage. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device_ops.c | 72 +++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 40 deletions(-) diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index 1caf6a3..75ce12a 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -159,7 +159,7 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm) } /** - * ccw_device_start_key() - start a s390 channel program with key + * ccw_device_start_timeout_key() - start a s390 channel program with timeout and key * @cdev: target ccw device * @cpa: logical start address of channel program * @intparm: user specific interruption parameter; will be presented back to @@ -170,10 +170,15 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm) * @key: storage key to be used for the I/O * @flags: additional flags; defines the action to be performed for I/O * processing. + * @expires: timeout value in jiffies * * Start a S/390 channel program. When the interrupt arrives, the * IRQ handler is called, either immediately, delayed (dev-end missing, * or sense required) or never (no IRQ handler registered). + * This function notifies the device driver if the channel program has not + * completed during the time specified by @expires. If a timeout occurs, the + * channel program is terminated via xsch, hsch or csch, and the device's + * interrupt handler will be called with an irb containing ERR_PTR(-%ETIMEDOUT). * Returns: * %0, if the operation was successful; * -%EBUSY, if the device is busy, or status pending; @@ -182,9 +187,9 @@ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm) * Context: * Interrupts disabled, ccw device lock held */ -int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, - unsigned long intparm, __u8 lpm, __u8 key, - unsigned long flags) +int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, + unsigned long intparm, __u8 lpm, __u8 key, + unsigned long flags, int expires) { struct subchannel *sch; int ret; @@ -224,6 +229,8 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, switch (ret) { case 0: cdev->private->intparm = intparm; + if (expires) + ccw_device_set_timeout(cdev, expires); break; case -EACCES: case -ENODEV: @@ -234,7 +241,7 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, } /** - * ccw_device_start_timeout_key() - start a s390 channel program with timeout and key + * ccw_device_start_key() - start a s390 channel program with key * @cdev: target ccw device * @cpa: logical start address of channel program * @intparm: user specific interruption parameter; will be presented back to @@ -245,15 +252,10 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, * @key: storage key to be used for the I/O * @flags: additional flags; defines the action to be performed for I/O * processing. - * @expires: timeout value in jiffies * * Start a S/390 channel program. When the interrupt arrives, the * IRQ handler is called, either immediately, delayed (dev-end missing, * or sense required) or never (no IRQ handler registered). - * This function notifies the device driver if the channel program has not - * completed during the time specified by @expires. If a timeout occurs, the - * channel program is terminated via xsch, hsch or csch, and the device's - * interrupt handler will be called with an irb containing ERR_PTR(-%ETIMEDOUT). * Returns: * %0, if the operation was successful; * -%EBUSY, if the device is busy, or status pending; @@ -262,19 +264,12 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, * Context: * Interrupts disabled, ccw device lock held */ -int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, - unsigned long intparm, __u8 lpm, __u8 key, - unsigned long flags, int expires) +int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, + unsigned long intparm, __u8 lpm, __u8 key, + unsigned long flags) { - int ret; - - if (!cdev) - return -ENODEV; - ccw_device_set_timeout(cdev, expires); - ret = ccw_device_start_key(cdev, cpa, intparm, lpm, key, flags); - if (ret != 0) - ccw_device_set_timeout(cdev, 0); - return ret; + return ccw_device_start_timeout_key(cdev, cpa, intparm, lpm, key, + flags, 0); } /** @@ -489,18 +484,20 @@ void ccw_device_get_id(struct ccw_device *cdev, struct ccw_dev_id *dev_id) EXPORT_SYMBOL(ccw_device_get_id); /** - * ccw_device_tm_start_key() - perform start function + * ccw_device_tm_start_timeout_key() - perform start function * @cdev: ccw device on which to perform the start function * @tcw: transport-command word to be started * @intparm: user defined parameter to be passed to the interrupt handler * @lpm: mask of paths to use * @key: storage key to use for storage access + * @expires: time span in jiffies after which to abort request * * Start the tcw on the given ccw device. Return zero on success, non-zero * otherwise. */ -int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw, - unsigned long intparm, u8 lpm, u8 key) +int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw, + unsigned long intparm, u8 lpm, u8 key, + int expires) { struct subchannel *sch; int rc; @@ -527,37 +524,32 @@ int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw, return -EACCES; } rc = cio_tm_start_key(sch, tcw, lpm, key); - if (rc == 0) + if (rc == 0) { cdev->private->intparm = intparm; + if (expires) + ccw_device_set_timeout(cdev, expires); + } return rc; } -EXPORT_SYMBOL(ccw_device_tm_start_key); +EXPORT_SYMBOL(ccw_device_tm_start_timeout_key); /** - * ccw_device_tm_start_timeout_key() - perform start function + * ccw_device_tm_start_key() - perform start function * @cdev: ccw device on which to perform the start function * @tcw: transport-command word to be started * @intparm: user defined parameter to be passed to the interrupt handler * @lpm: mask of paths to use * @key: storage key to use for storage access - * @expires: time span in jiffies after which to abort request * * Start the tcw on the given ccw device. Return zero on success, non-zero * otherwise. */ -int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw, - unsigned long intparm, u8 lpm, u8 key, - int expires) +int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw, + unsigned long intparm, u8 lpm, u8 key) { - int ret; - - ccw_device_set_timeout(cdev, expires); - ret = ccw_device_tm_start_key(cdev, tcw, intparm, lpm, key); - if (ret != 0) - ccw_device_set_timeout(cdev, 0); - return ret; + return ccw_device_tm_start_timeout_key(cdev, tcw, intparm, lpm, key, 0); } -EXPORT_SYMBOL(ccw_device_tm_start_timeout_key); +EXPORT_SYMBOL(ccw_device_tm_start_key); /** * ccw_device_tm_start() - perform start function -- cgit v1.1 From 770b55c995d171f026a9efb85e71e3b1ea47b93d Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 7 Feb 2018 13:18:19 +0100 Subject: s390/cio: fix return code after missing interrupt When a timeout occurs for users of ccw_device_start_timeout we will stop the IO and call the drivers int handler with the irb pointer set to ERR_PTR(-ETIMEDOUT). Sometimes however we'd set the irb pointer to ERR_PTR(-EIO) which is not intended. Just set the correct value in all codepaths. Reported-by: Julian Wiedmann Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device_fsm.c | 6 ++++-- drivers/s390/cio/io_sch.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 1319122..384f085 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -795,6 +795,7 @@ ccw_device_online_timeout(struct ccw_device *cdev, enum dev_event dev_event) ccw_device_set_timeout(cdev, 0); cdev->private->iretry = 255; + cdev->private->async_kill_io_rc = -ETIMEDOUT; ret = ccw_device_cancel_halt_clear(cdev); if (ret == -EBUSY) { ccw_device_set_timeout(cdev, 3*HZ); @@ -871,7 +872,7 @@ ccw_device_killing_irq(struct ccw_device *cdev, enum dev_event dev_event) /* OK, i/o is dead now. Call interrupt handler. */ if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, - ERR_PTR(-EIO)); + ERR_PTR(cdev->private->async_kill_io_rc)); } static void @@ -888,7 +889,7 @@ ccw_device_killing_timeout(struct ccw_device *cdev, enum dev_event dev_event) ccw_device_online_verify(cdev, 0); if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, - ERR_PTR(-EIO)); + ERR_PTR(cdev->private->async_kill_io_rc)); } void ccw_device_kill_io(struct ccw_device *cdev) @@ -896,6 +897,7 @@ void ccw_device_kill_io(struct ccw_device *cdev) int ret; cdev->private->iretry = 255; + cdev->private->async_kill_io_rc = -EIO; ret = ccw_device_cancel_halt_clear(cdev); if (ret == -EBUSY) { ccw_device_set_timeout(cdev, 3*HZ); diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h index af571d8..90e4e3a 100644 --- a/drivers/s390/cio/io_sch.h +++ b/drivers/s390/cio/io_sch.h @@ -157,6 +157,7 @@ struct ccw_device_private { unsigned long intparm; /* user interruption parameter */ struct qdio_irq *qdio_data; struct irb irb; /* device status */ + int async_kill_io_rc; struct senseid senseid; /* SenseID info */ struct pgid pgid[8]; /* path group IDs per chpid*/ struct ccw1 iccws[2]; /* ccws for SNID/SID/SPGID commands */ -- cgit v1.1 From 410d5e13e7638bc146321671e223d56495fbf3c7 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 12 Feb 2018 12:01:03 +0100 Subject: s390/cio: clear timer when terminating driver I/O When we terminate driver I/O (because we need to stop using a certain channel path) we also need to ensure that a timer (which may have been set up using ccw_device_start_timeout) is cleared. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device_fsm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 384f085..9169af7 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -896,6 +896,7 @@ void ccw_device_kill_io(struct ccw_device *cdev) { int ret; + ccw_device_set_timeout(cdev, 0); cdev->private->iretry = 255; cdev->private->async_kill_io_rc = -EIO; ret = ccw_device_cancel_halt_clear(cdev); -- cgit v1.1 From ed7158bae41044ff696e9aafd5ada46d391a5a2e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 22 Feb 2018 10:54:55 +0100 Subject: treewide/trivial: Remove ';;$' typo noise On lkml suggestions were made to split up such trivial typo fixes into per subsystem patches: --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -439,7 +439,7 @@ setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height) struct efi_uga_draw_protocol *uga = NULL, *first_uga; efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; unsigned long nr_ugas; - u32 *handles = (u32 *)uga_handle;; + u32 *handles = (u32 *)uga_handle; efi_status_t status = EFI_INVALID_PARAMETER; int i; This patch is the result of the following script: $ sed -i 's/;;$/;/g' $(git grep -E ';;$' | grep "\.[ch]:" | grep -vwE 'for|ia64' | cut -d: -f1 | sort | uniq) ... followed by manual review to make sure it's all good. Splitting this up is just crazy talk, let's get over with this and just do it. Reported-by: Pavel Machek Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arc/kernel/setup.c | 2 +- arch/arc/kernel/unwind.c | 2 +- arch/arm/kernel/time.c | 2 +- arch/arm64/kernel/ptrace.c | 2 +- arch/powerpc/kvm/book3s_xive.c | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- arch/x86/boot/compressed/eboot.c | 4 ++-- block/sed-opal.c | 2 +- drivers/clocksource/mips-gic-timer.c | 4 ++-- drivers/clocksource/timer-sun5i.c | 2 +- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +- drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 2 +- drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c | 2 +- drivers/gpu/drm/scheduler/gpu_scheduler.c | 2 +- drivers/iommu/intel-svm.c | 2 +- drivers/md/raid1.c | 2 +- drivers/soc/imx/gpc.c | 2 +- 17 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 9d27331..ec12fe1 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -373,7 +373,7 @@ static void arc_chk_core_config(void) { struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; int saved = 0, present = 0; - char *opt_nm = NULL;; + char *opt_nm = NULL; if (!cpu->extn.timer0) panic("Timer0 is not present!\n"); diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 333daab..183391d 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -366,7 +366,7 @@ static void init_unwind_hdr(struct unwind_table *table, return; ret_err: - panic("Attention !!! Dwarf FDE parsing errors\n");; + panic("Attention !!! Dwarf FDE parsing errors\n"); } #ifdef CONFIG_MODULES diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 629f8e9..cf2701c 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -83,7 +83,7 @@ static void dummy_clock_access(struct timespec64 *ts) } static clock_access_fn __read_persistent_clock = dummy_clock_access; -static clock_access_fn __read_boot_clock = dummy_clock_access;; +static clock_access_fn __read_boot_clock = dummy_clock_access; void read_persistent_clock64(struct timespec64 *ts) { diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6618036..9ae31f7 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1419,7 +1419,7 @@ static int compat_ptrace_hbp_get(unsigned int note_type, u64 addr = 0; u32 ctrl = 0; - int err, idx = compat_ptrace_hbp_num_to_idx(num);; + int err, idx = compat_ptrace_hbp_num_to_idx(num); if (num & 1) { err = ptrace_hbp_get_addr(note_type, tsk, idx, &addr); diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index f0f5cd4..f9818d7 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -188,7 +188,7 @@ static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio) if (!qpage) { pr_err("Failed to allocate queue %d for VCPU %d\n", prio, xc->server_num); - return -ENOMEM;; + return -ENOMEM; } memset(qpage, 0, 1 << xive->q_order); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 496e476..a6c92c7 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1854,7 +1854,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) s64 rc; if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) - return -ENODEV;; + return -ENODEV; pe = &phb->ioda.pe_array[pdn->pe_number]; if (pe->tce_bypass_enabled) { diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 353e20c..886a911 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -439,7 +439,7 @@ setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height) struct efi_uga_draw_protocol *uga = NULL, *first_uga; efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; unsigned long nr_ugas; - u32 *handles = (u32 *)uga_handle;; + u32 *handles = (u32 *)uga_handle; efi_status_t status = EFI_INVALID_PARAMETER; int i; @@ -484,7 +484,7 @@ setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height) struct efi_uga_draw_protocol *uga = NULL, *first_uga; efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; unsigned long nr_ugas; - u64 *handles = (u64 *)uga_handle;; + u64 *handles = (u64 *)uga_handle; efi_status_t status = EFI_INVALID_PARAMETER; int i; diff --git a/block/sed-opal.c b/block/sed-opal.c index 9ed51d0c..e4929ee 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -490,7 +490,7 @@ static int opal_discovery0_end(struct opal_dev *dev) if (!found_com_id) { pr_debug("Could not find OPAL comid for device. Returning early\n"); - return -EOPNOTSUPP;; + return -EOPNOTSUPP; } dev->comid = comid; diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c index a04808a..65e18c8 100644 --- a/drivers/clocksource/mips-gic-timer.c +++ b/drivers/clocksource/mips-gic-timer.c @@ -205,12 +205,12 @@ static int __init gic_clocksource_of_init(struct device_node *node) } else if (of_property_read_u32(node, "clock-frequency", &gic_frequency)) { pr_err("GIC frequency not specified.\n"); - return -EINVAL;; + return -EINVAL; } gic_timer_irq = irq_of_parse_and_map(node, 0); if (!gic_timer_irq) { pr_err("GIC timer IRQ not specified.\n"); - return -EINVAL;; + return -EINVAL; } ret = __gic_clocksource_init(); diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index 2a3fe83..3b56ea3 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -334,7 +334,7 @@ static int __init sun5i_timer_init(struct device_node *node) timer_base = of_io_request_and_map(node, 0, of_node_full_name(node)); if (IS_ERR(timer_base)) { pr_err("Can't map registers\n"); - return PTR_ERR(timer_base);; + return PTR_ERR(timer_base); } irq = irq_of_parse_and_map(node, 0); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 61e8c3e..33d91e4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -718,7 +718,7 @@ static enum link_training_result perform_channel_equalization_sequence( uint32_t retries_ch_eq; enum dc_lane_count lane_count = lt_settings->link_settings.lane_count; union lane_align_status_updated dpcd_lane_status_updated = {{0}}; - union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {{{0}}};; + union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {{{0}}}; hw_tr_pattern = get_supported_tp(link); diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 4c3223a..adb6e7b 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -162,7 +162,7 @@ static int pp_hw_init(void *handle) if(hwmgr->smumgr_funcs->start_smu(pp_handle->hwmgr)) { pr_err("smc start failed\n"); hwmgr->smumgr_funcs->smu_fini(pp_handle->hwmgr); - return -EINVAL;; + return -EINVAL; } if (ret == PP_DPM_DISABLED) goto exit; diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c index 3e9bba4..6d8e3a9 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c @@ -680,7 +680,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) } else { dev_info(&pdev->dev, "no iommu, fallback to phys contig buffers for scanout\n"); - aspace = NULL;; + aspace = NULL; } pm_runtime_put_sync(&pdev->dev); diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 2c18996..0d95888 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -461,7 +461,7 @@ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_jo { struct drm_sched_job *s_job; struct drm_sched_entity *entity, *tmp; - int i;; + int i; spin_lock(&sched->job_list_lock); list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 35a408d..99bc9bd 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -205,7 +205,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d * for example, an "address" value of 0x12345f000 will * flush from 0x123440000 to 0x12347ffff (256KiB). */ unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT); - unsigned long mask = __rounddown_pow_of_two(address ^ last);; + unsigned long mask = __rounddown_pow_of_two(address ^ last); desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE; } else { diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index b2eae33..f978edd 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1108,7 +1108,7 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio, bio_copy_data(behind_bio, bio); skip_copy: - r1_bio->behind_master_bio = behind_bio;; + r1_bio->behind_master_bio = behind_bio; set_bit(R1BIO_BehindIO, &r1_bio->state); return; diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c index 53f7275..cfb42f5 100644 --- a/drivers/soc/imx/gpc.c +++ b/drivers/soc/imx/gpc.c @@ -348,7 +348,7 @@ static int imx_gpc_old_dt_init(struct device *dev, struct regmap *regmap, if (i == 1) { domain->supply = devm_regulator_get(dev, "pu"); if (IS_ERR(domain->supply)) - return PTR_ERR(domain->supply);; + return PTR_ERR(domain->supply); ret = imx_pgc_get_clocks(dev, domain); if (ret) -- cgit v1.1 From 7229b12f5da33d5c376ee264f063703844b8092d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 20 Feb 2018 17:33:45 +0100 Subject: ALSA: x86: hdmi: Add single_port option for compatible behavior The recent support for the multiple PCM devices allowed user to use multiple HDMI/DP outputs, but at the same time, the PCM stream assignment has been changed, too. Due to that, the former PCM#0 (there was only one stream in the past) is likely assigned to a different one (e.g. PCM#2), and it ends up with the regression when user sticks with the fixed configuration using the device#0. Although the multiple monitor support shouldn't matter when user deploys the backend like PulseAudio that checks the jack detection state, the behavior change isn't always acceptable for some users. As a mitigation, this patch introduces an option to switch the behavior back to the old-good-days: when the new option, single_port=1, is passed, the driver creates only a single PCM device, and it's assigned to the first connected one, like the earlier versions did. The option is turned off as default still to support the multiple monitors. Fixes: 8a2d6ae1f737 ("ALSA: x86: Register multiple PCM devices for the LPE audio card") Reported-and-tested-by: Hubert Mantel Signed-off-by: Takashi Iwai --- sound/x86/intel_hdmi_audio.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index a095150..96115c4 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -50,6 +50,7 @@ /*standard module options for ALSA. This module supports only one card*/ static int hdmi_card_index = SNDRV_DEFAULT_IDX1; static char *hdmi_card_id = SNDRV_DEFAULT_STR1; +static bool single_port; module_param_named(index, hdmi_card_index, int, 0444); MODULE_PARM_DESC(index, @@ -57,6 +58,9 @@ MODULE_PARM_DESC(index, module_param_named(id, hdmi_card_id, charp, 0444); MODULE_PARM_DESC(id, "ID string for INTEL Intel HDMI Audio controller."); +module_param(single_port, bool, 0444); +MODULE_PARM_DESC(single_port, + "Single-port mode (for compatibility)"); /* * ELD SA bits in the CEA Speaker Allocation data block @@ -1579,7 +1583,11 @@ static irqreturn_t display_pipe_interrupt_handler(int irq, void *dev_id) static void notify_audio_lpe(struct platform_device *pdev, int port) { struct snd_intelhad_card *card_ctx = platform_get_drvdata(pdev); - struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; + struct snd_intelhad *ctx; + + ctx = &card_ctx->pcm_ctx[single_port ? 0 : port]; + if (single_port) + ctx->port = port; schedule_work(&ctx->hdmi_audio_wq); } @@ -1816,7 +1824,7 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) init_channel_allocations(); card_ctx->num_pipes = pdata->num_pipes; - card_ctx->num_ports = pdata->num_ports; + card_ctx->num_ports = single_port ? 1 : pdata->num_ports; for_each_port(card_ctx, port) { struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; @@ -1824,7 +1832,7 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) ctx->card_ctx = card_ctx; ctx->dev = card_ctx->dev; - ctx->port = port; + ctx->port = single_port ? -1 : port; ctx->pipe = -1; INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq); -- cgit v1.1 From fe32a815f05c8568669a062587435e15f9345764 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 8 Feb 2018 14:54:05 +0100 Subject: i2c: bcm2835: Set up the rising/falling edge delays We were leaving them in the power on state (or the state the firmware had set up for some client, if we were taking over from them). The boot state was 30 core clocks, when we actually want to sample some time after (to make sure that the new input bit has actually arrived). Signed-off-by: Eric Anholt Signed-off-by: Boris Brezillon Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/busses/i2c-bcm2835.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index cd07a69..44deae7 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -50,6 +50,9 @@ #define BCM2835_I2C_S_CLKT BIT(9) #define BCM2835_I2C_S_LEN BIT(10) /* Fake bit for SW error reporting */ +#define BCM2835_I2C_FEDL_SHIFT 16 +#define BCM2835_I2C_REDL_SHIFT 0 + #define BCM2835_I2C_CDIV_MIN 0x0002 #define BCM2835_I2C_CDIV_MAX 0xFFFE @@ -81,7 +84,7 @@ static inline u32 bcm2835_i2c_readl(struct bcm2835_i2c_dev *i2c_dev, u32 reg) static int bcm2835_i2c_set_divider(struct bcm2835_i2c_dev *i2c_dev) { - u32 divider; + u32 divider, redl, fedl; divider = DIV_ROUND_UP(clk_get_rate(i2c_dev->clk), i2c_dev->bus_clk_rate); @@ -100,6 +103,22 @@ static int bcm2835_i2c_set_divider(struct bcm2835_i2c_dev *i2c_dev) bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_DIV, divider); + /* + * Number of core clocks to wait after falling edge before + * outputting the next data bit. Note that both FEDL and REDL + * can't be greater than CDIV/2. + */ + fedl = max(divider / 16, 1u); + + /* + * Number of core clocks to wait after rising edge before + * sampling the next incoming data bit. + */ + redl = max(divider / 4, 1u); + + bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_DEL, + (fedl << BCM2835_I2C_FEDL_SHIFT) | + (redl << BCM2835_I2C_REDL_SHIFT)); return 0; } -- cgit v1.1 From c396b9a03e3bb5e95e036bdb0c7d614e0e1a4e3d Mon Sep 17 00:00:00 2001 From: Patryk Kocielnik Date: Fri, 26 Jan 2018 21:19:26 +0100 Subject: i2c: busses: i2c-sirf: Fix spelling: "formular" -> "formula". Fix spelling. Signed-off-by: Patryk Kocielnik [wsa: fixed "Initialization", too] Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-sirf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-sirf.c b/drivers/i2c/busses/i2c-sirf.c index 2fd8b6d..87197ec 100644 --- a/drivers/i2c/busses/i2c-sirf.c +++ b/drivers/i2c/busses/i2c-sirf.c @@ -341,7 +341,7 @@ static int i2c_sirfsoc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, adap); init_completion(&siic->done); - /* Controller Initalisation */ + /* Controller initialisation */ writel(SIRFSOC_I2C_RESET, siic->base + SIRFSOC_I2C_CTRL); while (readl(siic->base + SIRFSOC_I2C_CTRL) & SIRFSOC_I2C_RESET) @@ -369,7 +369,7 @@ static int i2c_sirfsoc_probe(struct platform_device *pdev) * but they start to affect the speed when clock is set to faster * frequencies. * Through the actual tests, use the different user_div value(which - * in the divider formular 'Fio / (Fi2c * user_div)') to adapt + * in the divider formula 'Fio / (Fi2c * user_div)') to adapt * the different ranges of i2c bus clock frequency, to make the SCL * more accurate. */ -- cgit v1.1 From d1fa74520dcdbeae891b30035e6c51aafa35306d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 17 Feb 2018 22:58:43 +0200 Subject: i2c: designware: Consider SCL GPIO optional GPIO library can return -ENOSYS for the failed request. Instead of failing ->probe() in this case override error code to 0. Fixes: ca382f5b38f3 ("i2c: designware: add i2c gpio recovery option") Reported-by: Dominik Brodowski Signed-off-by: Andy Shevchenko Tested-by: Dominik Brodowski Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index 55926ef..0573253 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -644,7 +644,7 @@ static int i2c_dw_init_recovery_info(struct dw_i2c_dev *dev) gpio = devm_gpiod_get(dev->dev, "scl", GPIOD_OUT_HIGH); if (IS_ERR(gpio)) { r = PTR_ERR(gpio); - if (r == -ENOENT) + if (r == -ENOENT || r == -ENOSYS) return 0; return r; } -- cgit v1.1 From 15122ee2c515a253b0c66a3e618bc7ebe35105eb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 21 Feb 2018 12:59:27 +0000 Subject: arm64: Enforce BBM for huge IO/VMAP mappings ioremap_page_range doesn't honour break-before-make and attempts to put down huge mappings (using p*d_set_huge) over the top of pre-existing table entries. This leads to us leaking page table memory and also gives rise to TLB conflicts and spurious aborts, which have been seen in practice on Cortex-A75. Until this has been resolved, refuse to put block mappings when the existing entry is found to be present. Fixes: 324420bf91f60 ("arm64: add support for ioremap() block mappings") Reported-by: Hanjun Guo Reported-by: Lei Li Acked-by: Ard Biesheuvel Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3161b85..84a019f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -933,6 +933,11 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + + /* ioremap_page_range doesn't honour BBM */ + if (pud_present(READ_ONCE(*pudp))) + return 0; + BUG_ON(phys & ~PUD_MASK); set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); return 1; @@ -942,6 +947,11 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + + /* ioremap_page_range doesn't honour BBM */ + if (pmd_present(READ_ONCE(*pmdp))) + return 0; + BUG_ON(phys & ~PMD_MASK); set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot)); return 1; -- cgit v1.1 From d5feec04fe578c8dbd9e2e1439afc2f0af761ed4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 22 Feb 2018 13:42:29 +0100 Subject: s390: do not bypass BPENTER for interrupt system calls The system call path can be interrupted before the switch back to the standard branch prediction with BPENTER has been done. The critical section cleanup code skips forward to .Lsysc_do_svc and bypasses the BPENTER. In this case the kernel and all subsequent code will run with the limited branch prediction. Fixes: eacf67eb9b32 ("s390: run user space and KVM guests with modified branch prediction") Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 9ec728f..7349246 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1440,6 +1440,7 @@ cleanup_critical: stg %r15,__LC_SYSTEM_TIMER 0: # update accounting time stamp mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP # set up saved register r11 lg %r15,__LC_KERNEL_STACK la %r9,STACK_FRAME_OVERHEAD(%r15) -- cgit v1.1 From 80475c48c6a8a65171e035e0915dc7996b5a0a65 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Thu, 22 Feb 2018 10:34:02 +0800 Subject: selftests/bpf/test_maps: exit child process without error in ENOMEM case test_maps contains a series of stress tests, and previously it will break the rest tests when it failed to alloc memory. ----------------------- Failed to create hashmap key=8 value=262144 'Cannot allocate memory' Failed to create hashmap key=16 value=262144 'Cannot allocate memory' Failed to create hashmap key=8 value=262144 'Cannot allocate memory' Failed to create hashmap key=8 value=262144 'Cannot allocate memory' test_maps: test_maps.c:955: run_parallel: Assertion `status == 0' failed. Aborted not ok 1..3 selftests: test_maps [FAIL] ----------------------- after this patch, the rest tests will be continue when it occurs an ENOMEM failure CC: Alexei Starovoitov CC: Philip Li Suggested-by: Daniel Borkmann Signed-off-by: Li Zhijian Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_maps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 436c4c7..9e03a4c 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -126,6 +126,8 @@ static void test_hashmap_sizes(int task, void *data) fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j, 2, map_flags); if (fd < 0) { + if (errno == ENOMEM) + return; printf("Failed to create hashmap key=%d value=%d '%s'\n", i, j, strerror(errno)); exit(1); -- cgit v1.1 From 971b42c038dc83e3327872d294fe7131bab152fc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 22 Feb 2018 14:38:33 +0000 Subject: PKCS#7: fix certificate chain verification When pkcs7_verify_sig_chain() is building the certificate chain for a SignerInfo using the certificates in the PKCS#7 message, it is passing the wrong arguments to public_key_verify_signature(). Consequently, when the next certificate is supposed to be used to verify the previous certificate, the next certificate is actually used to verify itself. An attacker can use this bug to create a bogus certificate chain that has no cryptographic relationship between the beginning and end. Fortunately I couldn't quite find a way to use this to bypass the overall signature verification, though it comes very close. Here's the reasoning: due to the bug, every certificate in the chain beyond the first actually has to be self-signed (where "self-signed" here refers to the actual key and signature; an attacker might still manipulate the certificate fields such that the self_signed flag doesn't actually get set, and thus the chain doesn't end immediately). But to pass trust validation (pkcs7_validate_trust()), either the SignerInfo or one of the certificates has to actually be signed by a trusted key. Since only self-signed certificates can be added to the chain, the only way for an attacker to introduce a trusted signature is to include a self-signed trusted certificate. But, when pkcs7_validate_trust_one() reaches that certificate, instead of trying to verify the signature on that certificate, it will actually look up the corresponding trusted key, which will succeed, and then try to verify the *previous* certificate, which will fail. Thus, disaster is narrowly averted (as far as I could tell). Fixes: 6c2dc5ae4ab7 ("X.509: Extract signature digest and make self-signed cert checks earlier") Cc: # v4.7+ Signed-off-by: Eric Biggers Signed-off-by: David Howells --- crypto/asymmetric_keys/pkcs7_verify.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c index 39e6de0..2f6a768 100644 --- a/crypto/asymmetric_keys/pkcs7_verify.c +++ b/crypto/asymmetric_keys/pkcs7_verify.c @@ -270,7 +270,7 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7, sinfo->index); return 0; } - ret = public_key_verify_signature(p->pub, p->sig); + ret = public_key_verify_signature(p->pub, x509->sig); if (ret < 0) return ret; x509->signer = p; -- cgit v1.1 From 29f4a67c17e19314b7d74b8569be935e6c7edf50 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 22 Feb 2018 14:38:33 +0000 Subject: PKCS#7: fix certificate blacklisting If there is a blacklisted certificate in a SignerInfo's certificate chain, then pkcs7_verify_sig_chain() sets sinfo->blacklisted and returns 0. But, pkcs7_verify() fails to handle this case appropriately, as it actually continues on to the line 'actual_ret = 0;', indicating that the SignerInfo has passed verification. Consequently, PKCS#7 signature verification ignores the certificate blacklist. Fix this by not considering blacklisted SignerInfos to have passed verification. Also fix the function comment with regards to when 0 is returned. Fixes: 03bb79315ddc ("PKCS#7: Handle blacklisted certificates") Cc: # v4.12+ Signed-off-by: Eric Biggers Signed-off-by: David Howells --- crypto/asymmetric_keys/pkcs7_verify.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c index 2f6a768..97c77f6 100644 --- a/crypto/asymmetric_keys/pkcs7_verify.c +++ b/crypto/asymmetric_keys/pkcs7_verify.c @@ -366,8 +366,7 @@ static int pkcs7_verify_one(struct pkcs7_message *pkcs7, * * (*) -EBADMSG if some part of the message was invalid, or: * - * (*) 0 if no signature chains were found to be blacklisted or to contain - * unsupported crypto, or: + * (*) 0 if a signature chain passed verification, or: * * (*) -EKEYREJECTED if a blacklisted key was encountered, or: * @@ -423,8 +422,11 @@ int pkcs7_verify(struct pkcs7_message *pkcs7, for (sinfo = pkcs7->signed_infos; sinfo; sinfo = sinfo->next) { ret = pkcs7_verify_one(pkcs7, sinfo); - if (sinfo->blacklisted && actual_ret == -ENOPKG) - actual_ret = -EKEYREJECTED; + if (sinfo->blacklisted) { + if (actual_ret == -ENOPKG) + actual_ret = -EKEYREJECTED; + continue; + } if (ret < 0) { if (ret == -ENOPKG) { sinfo->unsupported_crypto = true; -- cgit v1.1 From 6459ae386699a5fe0dc52cf30255f75274fa43a4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 22 Feb 2018 14:38:33 +0000 Subject: PKCS#7: fix direct verification of SignerInfo signature If none of the certificates in a SignerInfo's certificate chain match a trusted key, nor is the last certificate signed by a trusted key, then pkcs7_validate_trust_one() tries to check whether the SignerInfo's signature was made directly by a trusted key. But, it actually fails to set the 'sig' variable correctly, so it actually verifies the last signature seen. That will only be the SignerInfo's signature if the certificate chain is empty; otherwise it will actually be the last certificate's signature. This is not by itself a security problem, since verifying any of the certificates in the chain should be sufficient to verify the SignerInfo. Still, it's not working as intended so it should be fixed. Fix it by setting 'sig' correctly for the direct verification case. Fixes: 757932e6da6d ("PKCS#7: Handle PKCS#7 messages that contain no X.509 certs") Signed-off-by: Eric Biggers Signed-off-by: David Howells --- crypto/asymmetric_keys/pkcs7_trust.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/asymmetric_keys/pkcs7_trust.c b/crypto/asymmetric_keys/pkcs7_trust.c index 1f4e25f..598906b 100644 --- a/crypto/asymmetric_keys/pkcs7_trust.c +++ b/crypto/asymmetric_keys/pkcs7_trust.c @@ -106,6 +106,7 @@ static int pkcs7_validate_trust_one(struct pkcs7_message *pkcs7, pr_devel("sinfo %u: Direct signer is key %x\n", sinfo->index, key_serial(key)); x509 = NULL; + sig = sinfo->sig; goto matched; } if (PTR_ERR(key) != -ENOKEY) -- cgit v1.1 From 437499eea4291ae9621e8763a41df027c110a1ef Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 22 Feb 2018 14:38:33 +0000 Subject: X.509: fix BUG_ON() when hash algorithm is unsupported The X.509 parser mishandles the case where the certificate's signature's hash algorithm is not available in the crypto API. In this case, x509_get_sig_params() doesn't allocate the cert->sig->digest buffer; this part seems to be intentional. However, public_key_verify_signature() is still called via x509_check_for_self_signed(), which triggers the 'BUG_ON(!sig->digest)'. Fix this by making public_key_verify_signature() return -ENOPKG if the hash buffer has not been allocated. Reproducer when all the CONFIG_CRYPTO_SHA512* options are disabled: openssl req -new -sha512 -x509 -batch -nodes -outform der \ | keyctl padd asymmetric desc @s Fixes: 6c2dc5ae4ab7 ("X.509: Extract signature digest and make self-signed cert checks earlier") Reported-by: Paolo Valente Cc: Paolo Valente Cc: # v4.7+ Signed-off-by: Eric Biggers Signed-off-by: David Howells --- crypto/asymmetric_keys/public_key.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index de99658..e929fe1 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -79,9 +79,11 @@ int public_key_verify_signature(const struct public_key *pkey, BUG_ON(!pkey); BUG_ON(!sig); - BUG_ON(!sig->digest); BUG_ON(!sig->s); + if (!sig->digest) + return -ENOPKG; + alg_name = sig->pkey_algo; if (strcmp(sig->pkey_algo, "rsa") == 0) { /* The data wangled by the RSA algorithm is typically padded -- cgit v1.1 From 4b34968e77ad09628cfb3c4a7daf2adc2cefc6e8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 22 Feb 2018 14:38:34 +0000 Subject: X.509: fix NULL dereference when restricting key with unsupported_sig The asymmetric key type allows an X.509 certificate to be added even if its signature's hash algorithm is not available in the crypto API. In that case 'payload.data[asym_auth]' will be NULL. But the key restriction code failed to check for this case before trying to use the signature, resulting in a NULL pointer dereference in key_or_keyring_common() or in restrict_link_by_signature(). Fix this by returning -ENOPKG when the signature is unsupported. Reproducer when all the CONFIG_CRYPTO_SHA512* options are disabled and keyctl has support for the 'restrict_keyring' command: keyctl new_session keyctl restrict_keyring @s asymmetric builtin_trusted openssl req -new -sha512 -x509 -batch -nodes -outform der \ | keyctl padd asymmetric desc @s Fixes: a511e1af8b12 ("KEYS: Move the point of trust determination to __key_link()") Cc: # v4.7+ Signed-off-by: Eric Biggers Signed-off-by: David Howells --- crypto/asymmetric_keys/restrict.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/crypto/asymmetric_keys/restrict.c b/crypto/asymmetric_keys/restrict.c index 86fb685..7c93c77 100644 --- a/crypto/asymmetric_keys/restrict.c +++ b/crypto/asymmetric_keys/restrict.c @@ -67,8 +67,9 @@ __setup("ca_keys=", ca_keys_setup); * * Returns 0 if the new certificate was accepted, -ENOKEY if we couldn't find a * matching parent certificate in the trusted list, -EKEYREJECTED if the - * signature check fails or the key is blacklisted and some other error if - * there is a matching certificate but the signature check cannot be performed. + * signature check fails or the key is blacklisted, -ENOPKG if the signature + * uses unsupported crypto, or some other error if there is a matching + * certificate but the signature check cannot be performed. */ int restrict_link_by_signature(struct key *dest_keyring, const struct key_type *type, @@ -88,6 +89,8 @@ int restrict_link_by_signature(struct key *dest_keyring, return -EOPNOTSUPP; sig = payload->data[asym_auth]; + if (!sig) + return -ENOPKG; if (!sig->auth_ids[0] && !sig->auth_ids[1]) return -ENOKEY; @@ -139,6 +142,8 @@ static int key_or_keyring_common(struct key *dest_keyring, return -EOPNOTSUPP; sig = payload->data[asym_auth]; + if (!sig) + return -ENOPKG; if (!sig->auth_ids[0] && !sig->auth_ids[1]) return -ENOKEY; @@ -222,9 +227,9 @@ static int key_or_keyring_common(struct key *dest_keyring, * * Returns 0 if the new certificate was accepted, -ENOKEY if we * couldn't find a matching parent certificate in the trusted list, - * -EKEYREJECTED if the signature check fails, and some other error if - * there is a matching certificate but the signature check cannot be - * performed. + * -EKEYREJECTED if the signature check fails, -ENOPKG if the signature uses + * unsupported crypto, or some other error if there is a matching certificate + * but the signature check cannot be performed. */ int restrict_link_by_key_or_keyring(struct key *dest_keyring, const struct key_type *type, @@ -249,9 +254,9 @@ int restrict_link_by_key_or_keyring(struct key *dest_keyring, * * Returns 0 if the new certificate was accepted, -ENOKEY if we * couldn't find a matching parent certificate in the trusted list, - * -EKEYREJECTED if the signature check fails, and some other error if - * there is a matching certificate but the signature check cannot be - * performed. + * -EKEYREJECTED if the signature check fails, -ENOPKG if the signature uses + * unsupported crypto, or some other error if there is a matching certificate + * but the signature check cannot be performed. */ int restrict_link_by_key_or_keyring_chain(struct key *dest_keyring, const struct key_type *type, -- cgit v1.1 From d9f4bb1a0f4db493efe6d7c58ffe696a57de7eb3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Feb 2018 14:38:34 +0000 Subject: KEYS: Use individual pages in big_key for crypto buffers kmalloc() can't always allocate large enough buffers for big_key to use for crypto (1MB + some metadata) so we cannot use that to allocate the buffer. Further, vmalloc'd pages can't be passed to sg_init_one() and the aead crypto accessors cannot be called progressively and must be passed all the data in one go (which means we can't pass the data in one block at a time). Fix this by allocating the buffer pages individually and passing them through a multientry scatterlist to the crypto layer. This has the bonus advantage that we don't have to allocate a contiguous series of pages. We then vmap() the page list and pass that through to the VFS read/write routines. This can trigger a warning: WARNING: CPU: 0 PID: 60912 at mm/page_alloc.c:3883 __alloc_pages_nodemask+0xb7c/0x15f8 ([<00000000002acbb6>] __alloc_pages_nodemask+0x1ee/0x15f8) [<00000000002dd356>] kmalloc_order+0x46/0x90 [<00000000002dd3e0>] kmalloc_order_trace+0x40/0x1f8 [<0000000000326a10>] __kmalloc+0x430/0x4c0 [<00000000004343e4>] big_key_preparse+0x7c/0x210 [<000000000042c040>] key_create_or_update+0x128/0x420 [<000000000042e52c>] SyS_add_key+0x124/0x220 [<00000000007bba2c>] system_call+0xc4/0x2b0 from the keyctl/padd/useradd test of the keyutils testsuite on s390x. Note that it might be better to shovel data through in page-sized lumps instead as there's no particular need to use a monolithic buffer unless the kernel itself wants to access the data. Fixes: 13100a72f40f ("Security: Keys: Big keys stored encrypted") Reported-by: Paul Bunyan Signed-off-by: David Howells cc: Kirill Marinushkin --- security/keys/big_key.c | 110 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 87 insertions(+), 23 deletions(-) diff --git a/security/keys/big_key.c b/security/keys/big_key.c index 929e149..fa728f6 100644 --- a/security/keys/big_key.c +++ b/security/keys/big_key.c @@ -22,6 +22,13 @@ #include #include +struct big_key_buf { + unsigned int nr_pages; + void *virt; + struct scatterlist *sg; + struct page *pages[]; +}; + /* * Layout of key payload words. */ @@ -91,10 +98,9 @@ static DEFINE_MUTEX(big_key_aead_lock); /* * Encrypt/decrypt big_key data */ -static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key) +static int big_key_crypt(enum big_key_op op, struct big_key_buf *buf, size_t datalen, u8 *key) { int ret; - struct scatterlist sgio; struct aead_request *aead_req; /* We always use a zero nonce. The reason we can get away with this is * because we're using a different randomly generated key for every @@ -109,8 +115,7 @@ static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key) return -ENOMEM; memset(zero_nonce, 0, sizeof(zero_nonce)); - sg_init_one(&sgio, data, datalen + (op == BIG_KEY_ENC ? ENC_AUTHTAG_SIZE : 0)); - aead_request_set_crypt(aead_req, &sgio, &sgio, datalen, zero_nonce); + aead_request_set_crypt(aead_req, buf->sg, buf->sg, datalen, zero_nonce); aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); aead_request_set_ad(aead_req, 0); @@ -130,21 +135,81 @@ error: } /* + * Free up the buffer. + */ +static void big_key_free_buffer(struct big_key_buf *buf) +{ + unsigned int i; + + if (buf->virt) { + memset(buf->virt, 0, buf->nr_pages * PAGE_SIZE); + vunmap(buf->virt); + } + + for (i = 0; i < buf->nr_pages; i++) + if (buf->pages[i]) + __free_page(buf->pages[i]); + + kfree(buf); +} + +/* + * Allocate a buffer consisting of a set of pages with a virtual mapping + * applied over them. + */ +static void *big_key_alloc_buffer(size_t len) +{ + struct big_key_buf *buf; + unsigned int npg = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned int i, l; + + buf = kzalloc(sizeof(struct big_key_buf) + + sizeof(struct page) * npg + + sizeof(struct scatterlist) * npg, + GFP_KERNEL); + if (!buf) + return NULL; + + buf->nr_pages = npg; + buf->sg = (void *)(buf->pages + npg); + sg_init_table(buf->sg, npg); + + for (i = 0; i < buf->nr_pages; i++) { + buf->pages[i] = alloc_page(GFP_KERNEL); + if (!buf->pages[i]) + goto nomem; + + l = min_t(size_t, len, PAGE_SIZE); + sg_set_page(&buf->sg[i], buf->pages[i], l, 0); + len -= l; + } + + buf->virt = vmap(buf->pages, buf->nr_pages, VM_MAP, PAGE_KERNEL); + if (!buf->virt) + goto nomem; + + return buf; + +nomem: + big_key_free_buffer(buf); + return NULL; +} + +/* * Preparse a big key */ int big_key_preparse(struct key_preparsed_payload *prep) { + struct big_key_buf *buf; struct path *path = (struct path *)&prep->payload.data[big_key_path]; struct file *file; u8 *enckey; - u8 *data = NULL; ssize_t written; - size_t datalen = prep->datalen; + size_t datalen = prep->datalen, enclen = datalen + ENC_AUTHTAG_SIZE; int ret; - ret = -EINVAL; if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data) - goto error; + return -EINVAL; /* Set an arbitrary quota */ prep->quotalen = 16; @@ -157,13 +222,12 @@ int big_key_preparse(struct key_preparsed_payload *prep) * * File content is stored encrypted with randomly generated key. */ - size_t enclen = datalen + ENC_AUTHTAG_SIZE; loff_t pos = 0; - data = kmalloc(enclen, GFP_KERNEL); - if (!data) + buf = big_key_alloc_buffer(enclen); + if (!buf) return -ENOMEM; - memcpy(data, prep->data, datalen); + memcpy(buf->virt, prep->data, datalen); /* generate random key */ enckey = kmalloc(ENC_KEY_SIZE, GFP_KERNEL); @@ -176,7 +240,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) goto err_enckey; /* encrypt aligned data */ - ret = big_key_crypt(BIG_KEY_ENC, data, datalen, enckey); + ret = big_key_crypt(BIG_KEY_ENC, buf, datalen, enckey); if (ret) goto err_enckey; @@ -187,7 +251,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) goto err_enckey; } - written = kernel_write(file, data, enclen, &pos); + written = kernel_write(file, buf->virt, enclen, &pos); if (written != enclen) { ret = written; if (written >= 0) @@ -202,7 +266,7 @@ int big_key_preparse(struct key_preparsed_payload *prep) *path = file->f_path; path_get(path); fput(file); - kzfree(data); + big_key_free_buffer(buf); } else { /* Just store the data in a buffer */ void *data = kmalloc(datalen, GFP_KERNEL); @@ -220,7 +284,7 @@ err_fput: err_enckey: kzfree(enckey); error: - kzfree(data); + big_key_free_buffer(buf); return ret; } @@ -298,15 +362,15 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen) return datalen; if (datalen > BIG_KEY_FILE_THRESHOLD) { + struct big_key_buf *buf; struct path *path = (struct path *)&key->payload.data[big_key_path]; struct file *file; - u8 *data; u8 *enckey = (u8 *)key->payload.data[big_key_data]; size_t enclen = datalen + ENC_AUTHTAG_SIZE; loff_t pos = 0; - data = kmalloc(enclen, GFP_KERNEL); - if (!data) + buf = big_key_alloc_buffer(enclen); + if (!buf) return -ENOMEM; file = dentry_open(path, O_RDONLY, current_cred()); @@ -316,26 +380,26 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen) } /* read file to kernel and decrypt */ - ret = kernel_read(file, data, enclen, &pos); + ret = kernel_read(file, buf->virt, enclen, &pos); if (ret >= 0 && ret != enclen) { ret = -EIO; goto err_fput; } - ret = big_key_crypt(BIG_KEY_DEC, data, enclen, enckey); + ret = big_key_crypt(BIG_KEY_DEC, buf, enclen, enckey); if (ret) goto err_fput; ret = datalen; /* copy decrypted data to user */ - if (copy_to_user(buffer, data, datalen) != 0) + if (copy_to_user(buffer, buf->virt, datalen) != 0) ret = -EFAULT; err_fput: fput(file); error: - kzfree(data); + big_key_free_buffer(buf); } else { ret = datalen; if (copy_to_user(buffer, key->payload.data[big_key_data], -- cgit v1.1 From c8d5dcf122b194e897d2a6311903eae0c1023325 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 16 Feb 2018 11:03:01 +0100 Subject: MAINTAINERS: ARM: at91: update my email address Free Electrons is now Bootlin. Acked-by: Nicolas Ferre Signed-off-by: Alexandre Belloni --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3bdc260..99038a8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1238,7 +1238,7 @@ F: drivers/clk/at91 ARM/ATMEL AT91RM9200, AT91SAM9 AND SAMA5 SOC SUPPORT M: Nicolas Ferre -M: Alexandre Belloni +M: Alexandre Belloni L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) W: http://www.linux4sam.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/nferre/linux-at91.git -- cgit v1.1 From e2c8d283c4e2f468bed1bcfedb80b670b1bc8ab1 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 1 Feb 2018 15:32:40 -0600 Subject: arm64: dts: cavium: fix PCI bus dtc warnings dtc recently added PCI bus checks. Fix these warnings: arch/arm64/boot/dts/cavium/thunder2-99xx.dtb: Warning (pci_bridge): Node /pci missing bus-range for PCI bridge arch/arm64/boot/dts/cavium/thunder2-99xx.dtb: Warning (unit_address_vs_reg): Node /pci has a reg or ranges property, but no unit name Signed-off-by: Rob Herring Cc: Jayachandran C Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi b/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi index 4220fbd..ff5c4c4 100644 --- a/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi +++ b/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi @@ -98,7 +98,7 @@ clock-output-names = "clk125mhz"; }; - pci { + pcie@30000000 { compatible = "pci-host-ecam-generic"; device_type = "pci"; #interrupt-cells = <1>; @@ -118,6 +118,7 @@ ranges = <0x02000000 0 0x40000000 0 0x40000000 0 0x20000000 0x43000000 0x40 0x00000000 0x40 0x00000000 0x20 0x00000000>; + bus-range = <0 0xff>; interrupt-map-mask = <0 0 0 7>; interrupt-map = /* addr pin ic icaddr icintr */ -- cgit v1.1 From 9977a8c3497a8f7f7f951994f298a8e4d961234f Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 14 Dec 2017 17:53:52 +0100 Subject: arm64: dts: Remove leading 0x and 0s from bindings notation Improve the DTS files by removing all the leading "0x" and zeros to fix the following dtc warnings: Warning (unit_address_format): Node /XXX unit name should not have leading "0x" and Warning (unit_address_format): Node /XXX unit name should not have leading 0s Converted using the following command: find . -type f \( -iname *.dts -o -iname *.dtsi \) -exec sed -E -i -e "s/@0x([0-9a-fA-F\.]+)\s?\{/@\L\1 \{/g" -e "s/@0+([0-9a-fA-F\.]+)\s?\{/@\L\1 \{/g" {} + For simplicity, two sed expressions were used to solve each warnings separately. To make the regex expression more robust a few other issues were resolved, namely setting unit-address to lower case, and adding a whitespace before the the opening curly brace: https://elinux.org/Device_Tree_Linux#Linux_conventions This is a follow up to commit 4c9847b7375a ("dt-bindings: Remove leading 0x from bindings notation") Reported-by: David Daney Suggested-by: Rob Herring Signed-off-by: Mathieu Malaterre Acked-by: Matthias Brugger Acked-by: Andy Gross Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 2 +- arch/arm64/boot/dts/mediatek/mt8173.dtsi | 2 +- arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 6 +++--- arch/arm64/boot/dts/qcom/msm8996.dtsi | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index e94fa1a..047641f 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -51,7 +51,7 @@ #size-cells = <2>; ranges; - ramoops@0x21f00000 { + ramoops@21f00000 { compatible = "ramoops"; reg = <0x0 0x21f00000 0x0 0x00100000>; record-size = <0x00020000>; diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi index 9fbe470..94597e3 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi @@ -341,7 +341,7 @@ reg = <0 0x10005000 0 0x1000>; }; - pio: pinctrl@0x10005000 { + pio: pinctrl@10005000 { compatible = "mediatek,mt8173-pinctrl"; reg = <0 0x1000b000 0 0x1000>; mediatek,pctl-regmap = <&syscfg_pctl_a>; diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi index 492a011..1c8f1b8 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi @@ -140,16 +140,16 @@ }; agnoc@0 { - qcom,pcie@00600000 { + qcom,pcie@600000 { perst-gpio = <&msmgpio 35 GPIO_ACTIVE_LOW>; }; - qcom,pcie@00608000 { + qcom,pcie@608000 { status = "okay"; perst-gpio = <&msmgpio 130 GPIO_ACTIVE_LOW>; }; - qcom,pcie@00610000 { + qcom,pcie@610000 { status = "okay"; perst-gpio = <&msmgpio 114 GPIO_ACTIVE_LOW>; }; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 4b2afcc..0a6f795 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -840,7 +840,7 @@ #size-cells = <1>; ranges; - pcie0: qcom,pcie@00600000 { + pcie0: qcom,pcie@600000 { compatible = "qcom,pcie-msm8996", "snps,dw-pcie"; status = "disabled"; power-domains = <&gcc PCIE0_GDSC>; @@ -893,7 +893,7 @@ }; - pcie1: qcom,pcie@00608000 { + pcie1: qcom,pcie@608000 { compatible = "qcom,pcie-msm8996", "snps,dw-pcie"; power-domains = <&gcc PCIE1_GDSC>; bus-range = <0x00 0xff>; @@ -946,7 +946,7 @@ "bus_slave"; }; - pcie2: qcom,pcie@00610000 { + pcie2: qcom,pcie@610000 { compatible = "qcom,pcie-msm8996", "snps,dw-pcie"; power-domains = <&gcc PCIE2_GDSC>; bus-range = <0x00 0xff>; -- cgit v1.1 From e519eedb6848198cb6fb7f50abb2e416309c0ce5 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Fri, 15 Dec 2017 13:46:48 +0100 Subject: arm: zx: dts: Remove leading 0x and 0s from bindings notation Improve the DTS files by removing all the leading "0x" and zeros to fix the following dtc warnings: Warning (unit_address_format): Node /XXX unit name should not have leading "0x" and Warning (unit_address_format): Node /XXX unit name should not have leading 0s Converted using the following command: find . -type f \( -iname *.dts -o -iname *.dtsi \) -exec sed -i -e "s/@\([0-9a-fA-FxX\.;:#]+\)\s*{/@\L\1 {/g" -e "s/@0x\(.*\) {/@\1 {/g" -e "s/@0+\(.*\) {/@\1 {/g" {} +^C For simplicity, two sed expressions were used to solve each warnings separately. To make the regex expression more robust a few other issues were resolved, namely setting unit-address to lower case, and adding a whitespace before the the opening curly brace: https://elinux.org/Device_Tree_Linux#Linux_conventions This will solve as a side effect warning: Warning (simple_bus_reg): Node /XXX@ simple-bus unit address format error, expected "" This is a follow up to commit 4c9847b7375a ("dt-bindings: Remove leading 0x from bindings notation") Reported-by: David Daney Suggested-by: Rob Herring Signed-off-by: Mathieu Malaterre Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/zx296702.dtsi | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm/boot/dts/zx296702.dtsi b/arch/arm/boot/dts/zx296702.dtsi index 8a74efd..240e7a2 100644 --- a/arch/arm/boot/dts/zx296702.dtsi +++ b/arch/arm/boot/dts/zx296702.dtsi @@ -56,7 +56,7 @@ clocks = <&topclk ZX296702_A9_PERIPHCLK>; }; - l2cc: l2-cache-controller@0x00c00000 { + l2cc: l2-cache-controller@c00000 { compatible = "arm,pl310-cache"; reg = <0x00c00000 0x1000>; cache-unified; @@ -67,30 +67,30 @@ arm,double-linefill-incr = <0>; }; - pcu: pcu@0xa0008000 { + pcu: pcu@a0008000 { compatible = "zte,zx296702-pcu"; reg = <0xa0008000 0x1000>; }; - topclk: topclk@0x09800000 { + topclk: topclk@9800000 { compatible = "zte,zx296702-topcrm-clk"; reg = <0x09800000 0x1000>; #clock-cells = <1>; }; - lsp1clk: lsp1clk@0x09400000 { + lsp1clk: lsp1clk@9400000 { compatible = "zte,zx296702-lsp1crpm-clk"; reg = <0x09400000 0x1000>; #clock-cells = <1>; }; - lsp0clk: lsp0clk@0x0b000000 { + lsp0clk: lsp0clk@b000000 { compatible = "zte,zx296702-lsp0crpm-clk"; reg = <0x0b000000 0x1000>; #clock-cells = <1>; }; - uart0: serial@0x09405000 { + uart0: serial@9405000 { compatible = "zte,zx296702-uart"; reg = <0x09405000 0x1000>; interrupts = ; @@ -98,7 +98,7 @@ status = "disabled"; }; - uart1: serial@0x09406000 { + uart1: serial@9406000 { compatible = "zte,zx296702-uart"; reg = <0x09406000 0x1000>; interrupts = ; @@ -106,7 +106,7 @@ status = "disabled"; }; - mmc0: mmc@0x09408000 { + mmc0: mmc@9408000 { compatible = "snps,dw-mshc"; #address-cells = <1>; #size-cells = <0>; @@ -119,7 +119,7 @@ status = "disabled"; }; - mmc1: mmc@0x0b003000 { + mmc1: mmc@b003000 { compatible = "snps,dw-mshc"; #address-cells = <1>; #size-cells = <0>; @@ -132,7 +132,7 @@ status = "disabled"; }; - sysctrl: sysctrl@0xa0007000 { + sysctrl: sysctrl@a0007000 { compatible = "zte,sysctrl", "syscon"; reg = <0xa0007000 0x1000>; }; -- cgit v1.1 From 01a6e1267e741a91c6cdb4604cd2f898166e03f0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Feb 2018 17:24:50 +0100 Subject: ARM: clps711x: mark clps711x_compat as const The array of string pointers is put in __initconst, and the strings themselves are marke 'const' but the the pointers are not, which caused a warning when built with LTO: arch/arm/mach-clps711x/board-dt.c:72:20: error: 'clps711x_compat' causes a section type conflict with 'feroceon_ids' static const char *clps711x_compat[] __initconst = { This marks the array itself const as well, which was certainly the intention originally. Signed-off-by: Arnd Bergmann --- arch/arm/mach-clps711x/board-dt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-clps711x/board-dt.c b/arch/arm/mach-clps711x/board-dt.c index ee1f83b..4c89a8e 100644 --- a/arch/arm/mach-clps711x/board-dt.c +++ b/arch/arm/mach-clps711x/board-dt.c @@ -69,7 +69,7 @@ static void clps711x_restart(enum reboot_mode mode, const char *cmd) soft_restart(0); } -static const char *clps711x_compat[] __initconst = { +static const char *const clps711x_compat[] __initconst = { "cirrus,ep7209", NULL }; -- cgit v1.1 From eec51afc9d2612c65523627cfd81456c6995a79a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Feb 2018 17:24:52 +0100 Subject: ARM: davinci: mark spi_board_info arrays as const Building with LTO revealed that three spi_board_info arrays are marked __initconst, but not const: arch/arm/mach-davinci/board-dm365-evm.c: In function 'dm365_evm_init': arch/arm/mach-davinci/board-dm365-evm.c:729:30: error: 'dm365_evm_spi_info' causes a section type conflict with 'dm646x_edma_device' static struct spi_board_info dm365_evm_spi_info[] __initconst = { ^ arch/arm/mach-davinci/dm646x.c:603:42: note: 'dm646x_edma_device' was declared here static const struct platform_device_info dm646x_edma_device __initconst = { This marks them const as well, as was originally intended. Signed-off-by: Arnd Bergmann --- arch/arm/mach-davinci/board-dm355-evm.c | 2 +- arch/arm/mach-davinci/board-dm355-leopard.c | 2 +- arch/arm/mach-davinci/board-dm365-evm.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c index e457f29..d6b1190 100644 --- a/arch/arm/mach-davinci/board-dm355-evm.c +++ b/arch/arm/mach-davinci/board-dm355-evm.c @@ -368,7 +368,7 @@ static struct spi_eeprom at25640a = { .flags = EE_ADDR2, }; -static struct spi_board_info dm355_evm_spi_info[] __initconst = { +static const struct spi_board_info dm355_evm_spi_info[] __initconst = { { .modalias = "at25", .platform_data = &at25640a, diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c index be99724..fad9a56 100644 --- a/arch/arm/mach-davinci/board-dm355-leopard.c +++ b/arch/arm/mach-davinci/board-dm355-leopard.c @@ -217,7 +217,7 @@ static struct spi_eeprom at25640a = { .flags = EE_ADDR2, }; -static struct spi_board_info dm355_leopard_spi_info[] __initconst = { +static const struct spi_board_info dm355_leopard_spi_info[] __initconst = { { .modalias = "at25", .platform_data = &at25640a, diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c index e75741f..e378098 100644 --- a/arch/arm/mach-davinci/board-dm365-evm.c +++ b/arch/arm/mach-davinci/board-dm365-evm.c @@ -726,7 +726,7 @@ static struct spi_eeprom at25640 = { .flags = EE_ADDR2, }; -static struct spi_board_info dm365_evm_spi_info[] __initconst = { +static const struct spi_board_info dm365_evm_spi_info[] __initconst = { { .modalias = "at25", .platform_data = &at25640, -- cgit v1.1 From 8337d083507b9827dfb36d545538b7789df834fd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Feb 2018 13:18:49 +0100 Subject: ARM: orion: fix orion_ge00_switch_board_info initialization A section type mismatch warning shows up when building with LTO, since orion_ge00_mvmdio_bus_name was put in __initconst but not marked const itself: include/linux/of.h: In function 'spear_setup_of_timer': arch/arm/mach-spear/time.c:207:34: error: 'timer_of_match' causes a section type conflict with 'orion_ge00_mvmdio_bus_name' static const struct of_device_id timer_of_match[] __initconst = { ^ arch/arm/plat-orion/common.c:475:32: note: 'orion_ge00_mvmdio_bus_name' was declared here static __initconst const char *orion_ge00_mvmdio_bus_name = "orion-mii"; ^ As pointed out by Andrew Lunn, it should in fact be 'const' but not '__initconst' because the string is never copied but may be accessed after the init sections are freed. To fix that, I get rid of the extra symbol and rewrite the initialization in a simpler way that assigns both the bus_id and modalias statically. I spotted another theoretical bug in the same place, where d->netdev[i] may be an out of bounds access, this can be fixed by moving the device assignment into the loop. Cc: stable@vger.kernel.org Reviewed-by: Andrew Lunn Signed-off-by: Arnd Bergmann --- arch/arm/plat-orion/common.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index aff6994..a2399fd 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -472,28 +472,27 @@ void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data, /***************************************************************************** * Ethernet switch ****************************************************************************/ -static __initconst const char *orion_ge00_mvmdio_bus_name = "orion-mii"; -static __initdata struct mdio_board_info - orion_ge00_switch_board_info; +static __initdata struct mdio_board_info orion_ge00_switch_board_info = { + .bus_id = "orion-mii", + .modalias = "mv88e6085", +}; void __init orion_ge00_switch_init(struct dsa_chip_data *d) { - struct mdio_board_info *bd; unsigned int i; if (!IS_BUILTIN(CONFIG_PHYLIB)) return; - for (i = 0; i < ARRAY_SIZE(d->port_names); i++) - if (!strcmp(d->port_names[i], "cpu")) + for (i = 0; i < ARRAY_SIZE(d->port_names); i++) { + if (!strcmp(d->port_names[i], "cpu")) { + d->netdev[i] = &orion_ge00.dev; break; + } + } - bd = &orion_ge00_switch_board_info; - bd->bus_id = orion_ge00_mvmdio_bus_name; - bd->mdio_addr = d->sw_addr; - d->netdev[i] = &orion_ge00.dev; - strcpy(bd->modalias, "mv88e6085"); - bd->platform_data = d; + orion_ge00_switch_board_info.mdio_addr = d->sw_addr; + orion_ge00_switch_board_info.platform_data = d; mdiobus_register_board_info(&orion_ge00_switch_board_info, 1); } -- cgit v1.1 From b21ebf2fb4cde1618915a97cc773e287ff49173e Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 7 Feb 2018 14:20:09 -0800 Subject: x86: Treat R_X86_64_PLT32 as R_X86_64_PC32 On i386, there are 2 types of PLTs, PIC and non-PIC. PIE and shared objects must use PIC PLT. To use PIC PLT, you need to load _GLOBAL_OFFSET_TABLE_ into EBX first. There is no need for that on x86-64 since x86-64 uses PC-relative PLT. On x86-64, for 32-bit PC-relative branches, we can generate PLT32 relocation, instead of PC32 relocation, which can also be used as a marker for 32-bit PC-relative branches. Linker can always reduce PLT32 relocation to PC32 if function is defined locally. Local functions should use PC32 relocation. As far as Linux kernel is concerned, R_X86_64_PLT32 can be treated the same as R_X86_64_PC32 since Linux kernel doesn't use PLT. R_X86_64_PLT32 for 32-bit PC-relative branches has been enabled in binutils master branch which will become binutils 2.31. [ hjl is working on having better documentation on this all, but a few more notes from him: "PLT32 relocation is used as marker for PC-relative branches. Because of EBX, it looks odd to generate PLT32 relocation on i386 when EBX doesn't have GOT. As for symbol resolution, PLT32 and PC32 relocations are almost interchangeable. But when linker sees PLT32 relocation against a protected symbol, it can resolved locally at link-time since it is used on a branch instruction. Linker can't do that for PC32 relocation" but for the kernel use, the two are basically the same, and this commit gets things building and working with the current binutils master - Linus ] Signed-off-by: H.J. Lu Signed-off-by: Linus Torvalds --- arch/x86/kernel/machine_kexec_64.c | 1 + arch/x86/kernel/module.c | 1 + arch/x86/tools/relocs.c | 3 +++ 3 files changed, 5 insertions(+) diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 1f790cf..3b7427a 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -542,6 +542,7 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr, goto overflow; break; case R_X86_64_PC32: + case R_X86_64_PLT32: value -= (u64)address; *(u32 *)location = value; break; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index da0c160..f58336a 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -191,6 +191,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, goto overflow; break; case R_X86_64_PC32: + case R_X86_64_PLT32: if (*(u32 *)loc != 0) goto invalid_relocation; val -= (u64)loc; diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 5d73c44..220e978 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -770,9 +770,12 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, break; case R_X86_64_PC32: + case R_X86_64_PLT32: /* * PC relative relocations don't need to be adjusted unless * referencing a percpu symbol. + * + * NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32. */ if (is_percpu_sym(sym, symname)) add_reloc(&relocs32neg, offset); -- cgit v1.1 From ad86f605c59500da82d196ac312cfbac3daba31d Mon Sep 17 00:00:00 2001 From: "Bill.Baker@oracle.com" Date: Wed, 21 Feb 2018 12:46:43 -0600 Subject: nfs: system crashes after NFS4ERR_MOVED recovery nfs4_update_server unconditionally releases the nfs_client for the source server. If migration fails, this can cause the source server's nfs_client struct to be left with a low reference count, resulting in use-after-free. Also, adjust reference count handling for ELOOP. NFS: state manager: migration failed on NFSv4 server nfsvmu10 with error 6 WARNING: CPU: 16 PID: 17960 at fs/nfs/client.c:281 nfs_put_client+0xfa/0x110 [nfs]() nfs_put_client+0xfa/0x110 [nfs] nfs4_run_state_manager+0x30/0x40 [nfsv4] kthread+0xd8/0xf0 BUG: unable to handle kernel NULL pointer dereference at 00000000000002a8 nfs4_xdr_enc_write+0x6b/0x160 [nfsv4] rpcauth_wrap_req+0xac/0xf0 [sunrpc] call_transmit+0x18c/0x2c0 [sunrpc] __rpc_execute+0xa6/0x490 [sunrpc] rpc_async_schedule+0x15/0x20 [sunrpc] process_one_work+0x160/0x470 worker_thread+0x112/0x540 ? rescuer_thread+0x3f0/0x3f0 kthread+0xd8/0xf0 This bug was introduced by 32e62b7c ("NFS: Add nfs4_update_server"), but the fix applies cleanly to 52442f9b ("NFS4: Avoid migration loops") Reported-by: Helen Chao Fixes: 52442f9b11b7 ("NFS4: Avoid migration loops") Signed-off-by: Bill Baker Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 04612c2..9796314 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -868,8 +868,10 @@ static int nfs4_set_client(struct nfs_server *server, if (IS_ERR(clp)) return PTR_ERR(clp); - if (server->nfs_client == clp) + if (server->nfs_client == clp) { + nfs_put_client(clp); return -ELOOP; + } /* * Query for the lease time on clientid setup or renewal @@ -1244,11 +1246,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, clp->cl_proto, clnt->cl_timeout, clp->cl_minorversion, net); clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status); - nfs_put_client(clp); if (error != 0) { nfs_server_insert_lists(server); return error; } + nfs_put_client(clp); if (server->nfs_client->cl_hostname == NULL) server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL); -- cgit v1.1 From 1b7204064582792b77c6be796e78bd821c9f71b1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 7 Feb 2018 11:27:54 +0000 Subject: NFS: make struct nlmclnt_fl_close_lock_ops static The structure nlmclnt_fl_close_lock_ops s local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: fs/nfs/nfs3proc.c:876:33: warning: symbol 'nlmclnt_fl_close_lock_ops' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 49f848f..7327930 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -873,7 +873,7 @@ static void nfs3_nlm_release_call(void *data) } } -const struct nlmclnt_operations nlmclnt_fl_close_lock_ops = { +static const struct nlmclnt_operations nlmclnt_fl_close_lock_ops = { .nlmclnt_alloc_call = nfs3_nlm_alloc_call, .nlmclnt_unlock_prepare = nfs3_nlm_unlock_prepare, .nlmclnt_release_call = nfs3_nlm_release_call, -- cgit v1.1 From 6275ecbcd3ae3aaf47c3bc1e46343a50f16b2577 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 14 Feb 2018 10:15:12 +0100 Subject: samples/seccomp: do not compile when cross compiled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit samples/seccomp relies on the host setting which is not suitable for crosscompilation and it actually fails when crosscompiling s390 and powerpc all{yes,mod}config on x86_64 with samples/seccomp/bpf-helper.h:135:2: error: #error __BITS_PER_LONG value unusable. #error __BITS_PER_LONG value unusable. ^ In file included from samples/seccomp/bpf-fancy.c:13:0: samples/seccomp/bpf-fancy.c: In function ‘main’: samples/seccomp/bpf-fancy.c:38:11: error: ‘__NR_exit’ undeclared (first use in this function) SYSCALL(__NR_exit, ALLOW), and many others. I am doing these for compile testing and it's been quite useful to catch issues. Crosscompiling sample code on the other hand doesn't seem all that important so it seems like the easiest way to simply disable samples/seccomp when crosscompiling. Fixing this properly is not that easy as Kees explains: : IIRC, one of the problems is with build ordering problems: the kernel : headers used by the samples aren't available when cross compiling. Signed-off-by: Michal Hocko Signed-off-by: Kees Cook --- samples/seccomp/Makefile | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile index 0e349b8..ba942e3 100644 --- a/samples/seccomp/Makefile +++ b/samples/seccomp/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +ifndef CROSS_COMPILE hostprogs-$(CONFIG_SAMPLE_SECCOMP) := bpf-fancy dropper bpf-direct HOSTCFLAGS_bpf-fancy.o += -I$(objtree)/usr/include @@ -16,7 +17,6 @@ HOSTCFLAGS_bpf-direct.o += -idirafter $(objtree)/include bpf-direct-objs := bpf-direct.o # Try to match the kernel target. -ifndef CROSS_COMPILE ifndef CONFIG_64BIT # s390 has -m31 flag to build 31 bit binaries @@ -35,12 +35,4 @@ HOSTLOADLIBES_bpf-fancy += $(MFLAG) HOSTLOADLIBES_dropper += $(MFLAG) endif always := $(hostprogs-m) -else -# MIPS system calls are defined based on the -mabi that is passed -# to the toolchain which may or may not be a valid option -# for the host toolchain. So disable tests if target architecture -# is MIPS but the host isn't. -ifndef CONFIG_MIPS -always := $(hostprogs-m) -endif endif -- cgit v1.1 From 28128c61e08eaeced9cc8ec0e6b5d677b5b94690 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 22 Feb 2018 09:41:40 -0800 Subject: kconfig.h: Include compiler types to avoid missed struct attributes The header files for some structures could get included in such a way that struct attributes (specifically __randomize_layout from path.h) would be parsed as variable names instead of attributes. This could lead to some instances of a structure being unrandomized, causing nasty GPFs, etc. This patch makes sure the compiler_types.h header is included in kconfig.h so that we've always got types and struct attributes defined, since kconfig.h is included from the compiler command line. Reported-by: Patrick McLean Root-caused-by: Maciej S. Szmigiero Suggested-by: Linus Torvalds Tested-by: Maciej S. Szmigiero Fixes: 3859a271a003 ("randstruct: Mark various structs for randomization") Signed-off-by: Kees Cook Signed-off-by: Linus Torvalds --- include/linux/kconfig.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h index fec5076..c5fd4ee 100644 --- a/include/linux/kconfig.h +++ b/include/linux/kconfig.h @@ -64,4 +64,7 @@ */ #define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option)) +/* Make sure we always have all types and struct attributes defined. */ +#include + #endif /* __LINUX_KCONFIG_H */ -- cgit v1.1 From bef3efbeb897b56867e271cdbc5f8adaacaeb9cd Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Thu, 22 Feb 2018 09:15:06 -0800 Subject: efivarfs: Limit the rate for non-root to read files Each read from a file in efivarfs results in two calls to EFI (one to get the file size, another to get the actual data). On X86 these EFI calls result in broadcast system management interrupts (SMI) which affect performance of the whole system. A malicious user can loop performing reads from efivarfs bringing the system to its knees. Linus suggested per-user rate limit to solve this. So we add a ratelimit structure to "user_struct" and initialize it for the root user for no limit. When allocating user_struct for other users we set the limit to 100 per second. This could be used for other places that want to limit the rate of some detrimental user action. In efivarfs if the limit is exceeded when reading, we take an interruptible nap for 50ms and check the rate limit again. Signed-off-by: Tony Luck Acked-by: Ard Biesheuvel Signed-off-by: Linus Torvalds --- fs/efivarfs/file.c | 6 ++++++ include/linux/sched/user.h | 4 ++++ kernel/user.c | 3 +++ 3 files changed, 13 insertions(+) diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index 5f22e74..8e56842 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -74,6 +75,11 @@ static ssize_t efivarfs_file_read(struct file *file, char __user *userbuf, ssize_t size = 0; int err; + while (!__ratelimit(&file->f_cred->user->ratelimit)) { + if (!msleep_interruptible(50)) + return -EINTR; + } + err = efivar_entry_size(var, &datasize); /* diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 0dcf4e4..96fe289 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -4,6 +4,7 @@ #include #include +#include struct key; @@ -41,6 +42,9 @@ struct user_struct { defined(CONFIG_NET) atomic_long_t locked_vm; #endif + + /* Miscellaneous per-user rate limit */ + struct ratelimit_state ratelimit; }; extern int uids_sysfs_init(void); diff --git a/kernel/user.c b/kernel/user.c index 9a20acc..36288d8 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -101,6 +101,7 @@ struct user_struct root_user = { .sigpending = ATOMIC_INIT(0), .locked_shm = 0, .uid = GLOBAL_ROOT_UID, + .ratelimit = RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0), }; /* @@ -191,6 +192,8 @@ struct user_struct *alloc_uid(kuid_t uid) new->uid = uid; atomic_set(&new->__count, 1); + ratelimit_state_init(&new->ratelimit, HZ, 100); + ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE); /* * Before adding this, check whether we raced -- cgit v1.1 From b87b6194be631c94785fe93398651e804ed43e28 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 21 Feb 2018 04:41:59 +0100 Subject: netlink: put module reference if dump start fails Before, if cb->start() failed, the module reference would never be put, because cb->cb_running is intentionally false at this point. Users are generally annoyed by this because they can no longer unload modules that leak references. Also, it may be possible to tediously wrap a reference counter back to zero, especially since module.c still uses atomic_inc instead of refcount_inc. This patch expands the error path to simply call module_put if cb->start() fails. Fixes: 41c87425a1ac ("netlink: do not set cb_running if dump's start() errs") Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2ad445c..07e8478 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2308,7 +2308,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, if (cb->start) { ret = cb->start(cb); if (ret) - goto error_unlock; + goto error_put; } nlk->cb_running = true; @@ -2328,6 +2328,8 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, */ return -EINTR; +error_put: + module_put(control->module); error_unlock: sock_put(sk); mutex_unlock(nlk->cb_mutex); -- cgit v1.1 From 88e80c62671ceecdbb77c902731ec95a4bfa62f9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Feb 2018 21:42:26 -0800 Subject: smsc75xx: fix smsc75xx_set_features() If an attempt is made to disable RX checksums, USB adapter is changed but netdev->features is not, because smsc75xx_set_features() returns a non zero value. This throws errors from netdev_rx_csum_fault() : : hw csum failure Signed-off-by: Eric Dumazet Cc: Steve Glendinning Signed-off-by: David S. Miller --- drivers/net/usb/smsc75xx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index d0a1137..7a6a1fe 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -954,10 +954,11 @@ static int smsc75xx_set_features(struct net_device *netdev, /* it's racing here! */ ret = smsc75xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); - if (ret < 0) + if (ret < 0) { netdev_warn(dev->net, "Error writing RFE_CTL\n"); - - return ret; + return ret; + } + return 0; } static int smsc75xx_wait_ready(struct usbnet *dev, int in_pm) -- cgit v1.1 From 350c9f484bde93ef229682eedd98cd5f74350f7f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 21 Feb 2018 06:43:03 -0800 Subject: tcp_bbr: better deal with suboptimal GSO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BBR uses tcp_tso_autosize() in an attempt to probe what would be the burst sizes and to adjust cwnd in bbr_target_cwnd() with following gold formula : /* Allow enough full-sized skbs in flight to utilize end systems. */ cwnd += 3 * bbr->tso_segs_goal; But GSO can be lacking or be constrained to very small units (ip link set dev ... gso_max_segs 2) What we really want is to have enough packets in flight so that both GSO and GRO are efficient. So in the case GSO is off or downgraded, we still want to have the same number of packets in flight as if GSO/TSO was fully operational, so that GRO can hopefully be working efficiently. To fix this issue, we make tcp_tso_autosize() unaware of sk->sk_gso_max_segs Only tcp_tso_segs() has to enforce the gso_max_segs limit. Tested: ethtool -K eth0 tso off gso off tc qd replace dev eth0 root pfifo_fast Before patch: for f in {1..5}; do ./super_netperf 1 -H lpaa24 -- -K bbr; done     691  (ss -temoi shows cwnd is stuck around 6 )     667     651     631     517 After patch : # for f in {1..5}; do ./super_netperf 1 -H lpaa24 -- -K bbr; done    1733 (ss -temoi shows cwnd is around 386 )    1778    1746    1781    1718 Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Eric Dumazet Reported-by: Oleksandr Natalenko Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b2bca37..6818042 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1730,7 +1730,7 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, */ segs = max_t(u32, bytes / mss_now, min_tso_segs); - return min_t(u32, segs, sk->sk_gso_max_segs); + return segs; } EXPORT_SYMBOL(tcp_tso_autosize); @@ -1742,9 +1742,10 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; - return tso_segs ? : - tcp_tso_autosize(sk, mss_now, - sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); + if (!tso_segs) + tso_segs = tcp_tso_autosize(sk, mss_now, + sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); + return min_t(u32, tso_segs, sk->sk_gso_max_segs); } /* Returns the portion of skb which can be sent right away */ -- cgit v1.1 From 1fe4b1184c2ae2bfbf9e8b14c9c0c1945c98f205 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 21 Feb 2018 11:00:54 -0800 Subject: net: ipv4: Set addr_type in hash_keys for forwarded case The result of the skb flow dissect is copied from keys to hash_keys to ensure only the intended data is hashed. The original L4 hash patch overlooked setting the addr_type for this case; add it. Fixes: bf4e0a3db97eb ("net: ipv4: add support for ECMP hash policy choice") Reported-by: Ido Schimmel Signed-off-by: David Ahern Acked-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 49cc1c1..a4f44d8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1826,6 +1826,8 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, return skb_get_hash_raw(skb) >> 1; memset(&hash_keys, 0, sizeof(hash_keys)); skb_flow_dissect_flow_keys(skb, &keys, flag); + + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; hash_keys.ports.src = keys.ports.src; -- cgit v1.1 From 83090e7d35caaabc8daa65fd698275951455bbec Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 22 Feb 2018 09:24:59 +1100 Subject: net/smc9194: Remove bogus CONFIG_MAC reference AFAIK the only version of smc9194.c with Mac support is the one in the linux-mac68k CVS repo, which never made it to the mainline. Despite that, from v2.3.45, arch/m68k/config.in listed CONFIG_SMC9194 under CONFIG_MAC. This mistake got carried over into Kconfig in v2.5.55. (See pre-git era "[PATCH] add m68k dependencies to net driver config".) Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig index 63aca9f..4c2f612e 100644 --- a/drivers/net/ethernet/smsc/Kconfig +++ b/drivers/net/ethernet/smsc/Kconfig @@ -20,7 +20,7 @@ if NET_VENDOR_SMSC config SMC9194 tristate "SMC 9194 support" - depends on (ISA || MAC && BROKEN) + depends on ISA select CRC32 ---help--- This is support for the SMC9xxx based Ethernet cards. Choose this -- cgit v1.1 From a2c0f039bbd0f9ebf375176d05b056e3f3b5c4f7 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 21 Feb 2018 18:18:30 -0600 Subject: ibmvnic: Fix early release of login buffer The login buffer is released before the driver can perform sanity checks between resources the driver requested and what firmware will provide. Don't release the login buffer until the sanity check is performed. Fixes: 34f0f4e3f488 ("ibmvnic: Fix login buffer memory leaks") Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 1495cb9..1b3cc8b 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3760,7 +3760,6 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz, DMA_BIDIRECTIONAL); - release_login_buffer(adapter); dma_unmap_single(dev, adapter->login_rsp_buf_token, adapter->login_rsp_buf_sz, DMA_BIDIRECTIONAL); @@ -3791,6 +3790,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, ibmvnic_remove(adapter->vdev); return -EIO; } + release_login_buffer(adapter); complete(&adapter->init_done); return 0; -- cgit v1.1 From 657308f73e674e86b60509a430a46e569bf02846 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Feb 2018 20:55:28 +0100 Subject: regulatory: add NUL to request alpha2 Similar to the ancient commit a5fe8e7695dc ("regulatory: add NUL to alpha2"), add another byte to alpha2 in the request struct so that when we use nla_put_string(), we don't overrun anything. Fixes: 73d54c9e74c4 ("cfg80211: add regulatory netlink multicast group") Reported-by: Kees Cook Signed-off-by: Johannes Berg --- include/net/regulatory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/regulatory.h b/include/net/regulatory.h index ebc5a2e..f83cacc 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -78,7 +78,7 @@ struct regulatory_request { int wiphy_idx; enum nl80211_reg_initiator initiator; enum nl80211_user_reg_hint_type user_reg_hint_type; - char alpha2[2]; + char alpha2[3]; enum nl80211_dfs_regions dfs_region; bool intersect; bool processed; -- cgit v1.1 From 32fff239de37ef226d5b66329dd133f64d63b22d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2018 08:33:24 -0800 Subject: bpf: add schedule points in percpu arrays management syszbot managed to trigger RCU detected stalls in bpf_array_free_percpu() It takes time to allocate a huge percpu map, but even more time to free it. Since we run in process context, use cond_resched() to yield cpu if needed. Fixes: a10423b87a7e ("bpf: introduce BPF_MAP_TYPE_PERCPU_ARRAY map") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Daniel Borkmann --- kernel/bpf/arraymap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index a364c40..14750e7 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -26,8 +26,10 @@ static void bpf_array_free_percpu(struct bpf_array *array) { int i; - for (i = 0; i < array->map.max_entries; i++) + for (i = 0; i < array->map.max_entries; i++) { free_percpu(array->pptrs[i]); + cond_resched(); + } } static int bpf_array_alloc_percpu(struct bpf_array *array) @@ -43,6 +45,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) return -ENOMEM; } array->pptrs[i] = ptr; + cond_resched(); } return 0; -- cgit v1.1 From 6c5f61023c5b0edb0c8a64c902fe97c6453b1852 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 22 Feb 2018 10:10:35 -0800 Subject: bpf: fix rcu lockdep warning for lpm_trie map_free callback Commit 9a3efb6b661f ("bpf: fix memory leak in lpm_trie map_free callback function") fixed a memory leak and removed unnecessary locks in map_free callback function. Unfortrunately, it introduced a lockdep warning. When lockdep checking is turned on, running tools/testing/selftests/bpf/test_lpm_map will have: [ 98.294321] ============================= [ 98.294807] WARNING: suspicious RCU usage [ 98.295359] 4.16.0-rc2+ #193 Not tainted [ 98.295907] ----------------------------- [ 98.296486] /home/yhs/work/bpf/kernel/bpf/lpm_trie.c:572 suspicious rcu_dereference_check() usage! [ 98.297657] [ 98.297657] other info that might help us debug this: [ 98.297657] [ 98.298663] [ 98.298663] rcu_scheduler_active = 2, debug_locks = 1 [ 98.299536] 2 locks held by kworker/2:1/54: [ 98.300152] #0: ((wq_completion)"events"){+.+.}, at: [<00000000196bc1f0>] process_one_work+0x157/0x5c0 [ 98.301381] #1: ((work_completion)(&map->work)){+.+.}, at: [<00000000196bc1f0>] process_one_work+0x157/0x5c0 Since actual trie tree removal happens only after no other accesses to the tree are possible, replacing rcu_dereference_protected(*slot, lockdep_is_held(&trie->lock)) with rcu_dereference_protected(*slot, 1) fixed the issue. Fixes: 9a3efb6b661f ("bpf: fix memory leak in lpm_trie map_free callback function") Reported-by: Eric Dumazet Suggested-by: Eric Dumazet Signed-off-by: Yonghong Song Reviewed-by: Eric Dumazet Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- kernel/bpf/lpm_trie.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index a75e02c..b4b5b81 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -569,8 +569,7 @@ static void trie_free(struct bpf_map *map) slot = &trie->root; for (;;) { - node = rcu_dereference_protected(*slot, - lockdep_is_held(&trie->lock)); + node = rcu_dereference_protected(*slot, 1); if (!node) goto out; -- cgit v1.1 From 370c10522e96bf1b2e7fd9e906dbe8fb5be895d2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 22 Feb 2018 12:11:55 +0300 Subject: net: aquantia: Fix error handling in aq_pci_probe() We should check "self->aq_hw" for allocation failure, and also we should free it on the error paths. Fixes: 23ee07ad3c2f ("net: aquantia: Cleanup pci functions module") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 22889fc..87c4308 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -226,6 +226,10 @@ static int aq_pci_probe(struct pci_dev *pdev, goto err_ioremap; self->aq_hw = kzalloc(sizeof(*self->aq_hw), GFP_KERNEL); + if (!self->aq_hw) { + err = -ENOMEM; + goto err_ioremap; + } self->aq_hw->aq_nic_cfg = aq_nic_get_cfg(self); for (bar = 0; bar < 4; ++bar) { @@ -235,19 +239,19 @@ static int aq_pci_probe(struct pci_dev *pdev, mmio_pa = pci_resource_start(pdev, bar); if (mmio_pa == 0U) { err = -EIO; - goto err_ioremap; + goto err_free_aq_hw; } reg_sz = pci_resource_len(pdev, bar); if ((reg_sz <= 24 /*ATL_REGS_SIZE*/)) { err = -EIO; - goto err_ioremap; + goto err_free_aq_hw; } self->aq_hw->mmio = ioremap_nocache(mmio_pa, reg_sz); if (!self->aq_hw->mmio) { err = -EIO; - goto err_ioremap; + goto err_free_aq_hw; } break; } @@ -255,7 +259,7 @@ static int aq_pci_probe(struct pci_dev *pdev, if (bar == 4) { err = -EIO; - goto err_ioremap; + goto err_free_aq_hw; } numvecs = min((u8)AQ_CFG_VECS_DEF, @@ -290,6 +294,8 @@ err_register: aq_pci_free_irq_vectors(self); err_hwinit: iounmap(self->aq_hw->mmio); +err_free_aq_hw: + kfree(self->aq_hw); err_ioremap: free_netdev(ndev); err_pci_func: -- cgit v1.1 From 93c62c45ed5fad1b87e3a45835b251cd68de9c46 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Feb 2018 14:38:14 +0000 Subject: rxrpc: Fix send in rxrpc_send_data_packet() All the kernel_sendmsg() calls in rxrpc_send_data_packet() need to send both parts of the iov[] buffer, but one of them does not. Fix it so that it does. Without this, short IPv6 rxrpc DATA packets may be seen that have the rxrpc header included, but no payload. Fixes: 5a924b8951f8 ("rxrpc: Don't store the rxrpc header in the Tx queue sk_buffs") Reported-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 42410e9..cf73dc0 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -445,7 +445,7 @@ send_fragmentable: (char *)&opt, sizeof(opt)); if (ret == 0) { ret = kernel_sendmsg(conn->params.local->socket, &msg, - iov, 1, iov[0].iov_len); + iov, 2, len); opt = IPV6_PMTUDISC_DO; kernel_setsockopt(conn->params.local->socket, -- cgit v1.1 From 9026e820cbd2ea39a06a129ecdddf2739bd3602b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 12 Feb 2018 13:18:38 -0800 Subject: fs/signalfd: fix build error for BUS_MCEERR_AR Fix build error in fs/signalfd.c by using same method that is used in kernel/signal.c: separate blocks for different signal si_code values. ./fs/signalfd.c: error: 'BUS_MCEERR_AR' undeclared (first use in this function) Reported-by: Geert Uytterhoeven Signed-off-by: Randy Dunlap Cc: Alexander Viro Signed-off-by: Eric W. Biederman --- fs/signalfd.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/signalfd.c b/fs/signalfd.c index 9990957..76bf9cc 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -118,13 +118,22 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno); #endif #ifdef BUS_MCEERR_AO - /* + /* + * Other callers might not initialize the si_lsb field, + * so check explicitly for the right codes here. + */ + if (kinfo->si_signo == SIGBUS && + kinfo->si_code == BUS_MCEERR_AO) + err |= __put_user((short) kinfo->si_addr_lsb, + &uinfo->ssi_addr_lsb); +#endif +#ifdef BUS_MCEERR_AR + /* * Other callers might not initialize the si_lsb field, * so check explicitly for the right codes here. */ if (kinfo->si_signo == SIGBUS && - (kinfo->si_code == BUS_MCEERR_AR || - kinfo->si_code == BUS_MCEERR_AO)) + kinfo->si_code == BUS_MCEERR_AR) err |= __put_user((short) kinfo->si_addr_lsb, &uinfo->ssi_addr_lsb); #endif -- cgit v1.1 From 6d516d6798cdfc073aa2fa11dd5a5d72f3906ae5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 22 Feb 2018 15:00:43 -0600 Subject: PCI: Update location of pci.ids file Update the URL for the pci.ids file and add locations for its mirrors. Signed-off-by: Randy Dunlap Signed-off-by: Bjorn Helgaas Cc: Martin Mares Cc: Michal Vaner --- Documentation/PCI/pci.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt index 611a75e4..badb26a 100644 --- a/Documentation/PCI/pci.txt +++ b/Documentation/PCI/pci.txt @@ -570,7 +570,9 @@ your driver if they're helpful, or just use plain hex constants. The device IDs are arbitrary hex numbers (vendor controlled) and normally used only in a single location, the pci_device_id table. -Please DO submit new vendor/device IDs to http://pciids.sourceforge.net/. +Please DO submit new vendor/device IDs to http://pci-ids.ucw.cz/. +There are mirrors of the pci.ids file at http://pciids.sourceforge.net/ +and https://github.com/pciutils/pciids. -- cgit v1.1 From 651ca2c00405a2ae3870cc0b4f15a182eb6fbe26 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 22 Feb 2018 12:08:05 +0100 Subject: genirq/matrix: Handle CPU offlining proper At CPU hotunplug the corresponding per cpu matrix allocator is shut down and the allocated interrupt bits are discarded under the assumption that all allocated bits have been either migrated away or shut down through the managed interrupts mechanism. This is not true because interrupts which are not started up might have a vector allocated on the outgoing CPU. When the interrupt is started up later or completely shutdown and freed then the allocated vector is handed back, triggering warnings or causing accounting issues which result in suspend failures and other issues. Change the CPU hotplug mechanism of the matrix allocator so that the remaining allocations at unplug time are preserved and global accounting at hotplug is correctly readjusted to take the dormant vectors into account. Fixes: 2f75d9e1c905 ("genirq: Implement bitmap matrix allocator") Reported-by: Yuriy Vostrikov Signed-off-by: Thomas Gleixner Tested-by: Yuriy Vostrikov Cc: Peter Zijlstra Cc: Randy Dunlap Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180222112316.849980972@linutronix.de --- kernel/irq/matrix.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c index 5187dfe..4c57704 100644 --- a/kernel/irq/matrix.c +++ b/kernel/irq/matrix.c @@ -16,6 +16,7 @@ struct cpumap { unsigned int available; unsigned int allocated; unsigned int managed; + bool initialized; bool online; unsigned long alloc_map[IRQ_MATRIX_SIZE]; unsigned long managed_map[IRQ_MATRIX_SIZE]; @@ -81,9 +82,11 @@ void irq_matrix_online(struct irq_matrix *m) BUG_ON(cm->online); - bitmap_zero(cm->alloc_map, m->matrix_bits); - cm->available = m->alloc_size - (cm->managed + m->systembits_inalloc); - cm->allocated = 0; + if (!cm->initialized) { + cm->available = m->alloc_size; + cm->available -= cm->managed + m->systembits_inalloc; + cm->initialized = true; + } m->global_available += cm->available; cm->online = true; m->online_maps++; @@ -370,14 +373,16 @@ void irq_matrix_free(struct irq_matrix *m, unsigned int cpu, if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end)) return; - if (cm->online) { - clear_bit(bit, cm->alloc_map); - cm->allocated--; + clear_bit(bit, cm->alloc_map); + cm->allocated--; + + if (cm->online) m->total_allocated--; - if (!managed) { - cm->available++; + + if (!managed) { + cm->available++; + if (cm->online) m->global_available++; - } } trace_irq_matrix_free(bit, cpu, m, cm); } -- cgit v1.1 From 77ee2e1bac3218723a1b252d678827de1fa651ce Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Fri, 15 Dec 2017 13:46:28 +0100 Subject: ARM: BCM: dts: Remove leading 0x and 0s from bindings notation Improve the DTS files by removing all the leading "0x" and zeros to fix the following dtc warnings: Warning (unit_address_format): Node /XXX unit name should not have leading "0x" and Warning (unit_address_format): Node /XXX unit name should not have leading 0s Converted using the following command: find . -type f \( -iname *.dts -o -iname *.dtsi \) -exec sed -i -e "s/@\([0-9a-fA-FxX\.;:#]+\)\s*{/@\L\1 {/g" -e "s/@0x\(.*\) {/@\1 {/g" -e "s/@0+\(.*\) {/@\1 {/g" {} +^C For simplicity, two sed expressions were used to solve each warnings separately. To make the regex expression more robust a few other issues were resolved, namely setting unit-address to lower case, and adding a whitespace before the the opening curly brace: https://elinux.org/Device_Tree_Linux#Linux_conventions This will solve as a side effect warning: Warning (simple_bus_reg): Node /XXX@ simple-bus unit address format error, expected "" This is a follow up to commit 4c9847b7375a ("dt-bindings: Remove leading 0x from bindings notation") Reported-by: David Daney Suggested-by: Rob Herring Signed-off-by: Mathieu Malaterre Acked-by: Stefan Wahren Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm11351.dtsi | 2 +- arch/arm/boot/dts/bcm21664.dtsi | 2 +- arch/arm/boot/dts/bcm283x.dtsi | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/bcm11351.dtsi b/arch/arm/boot/dts/bcm11351.dtsi index 18045c3..db7cded 100644 --- a/arch/arm/boot/dts/bcm11351.dtsi +++ b/arch/arm/boot/dts/bcm11351.dtsi @@ -55,7 +55,7 @@ <0x3ff00100 0x100>; }; - smc@0x3404c000 { + smc@3404c000 { compatible = "brcm,bcm11351-smc", "brcm,kona-smc"; reg = <0x3404c000 0x400>; /* 1 KiB in SRAM */ }; diff --git a/arch/arm/boot/dts/bcm21664.dtsi b/arch/arm/boot/dts/bcm21664.dtsi index 6dde95f..266f261 100644 --- a/arch/arm/boot/dts/bcm21664.dtsi +++ b/arch/arm/boot/dts/bcm21664.dtsi @@ -55,7 +55,7 @@ <0x3ff00100 0x100>; }; - smc@0x3404e000 { + smc@3404e000 { compatible = "brcm,bcm21664-smc", "brcm,kona-smc"; reg = <0x3404e000 0x400>; /* 1 KiB in SRAM */ }; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 18db25a..9d293de 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -465,7 +465,7 @@ status = "disabled"; }; - aux: aux@0x7e215000 { + aux: aux@7e215000 { compatible = "brcm,bcm2835-aux"; #clock-cells = <1>; reg = <0x7e215000 0x8>; -- cgit v1.1 From 86516eff3b09a5fd17e81d50925bbccc6a36beed Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 22 Feb 2018 14:41:25 -0800 Subject: xfs: use memset to initialize xfs_scrub_agfl_info Apparently different gcc versions have competing and incompatible notions of how to initialize at declaration, so just give up and fall back to the time-tested memset(). Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/agheader.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index fd97552..05c66e0 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -767,7 +767,7 @@ int xfs_scrub_agfl( struct xfs_scrub_context *sc) { - struct xfs_scrub_agfl_info sai = { 0 }; + struct xfs_scrub_agfl_info sai; struct xfs_agf *agf; xfs_agnumber_t agno; unsigned int agflcount; @@ -795,6 +795,7 @@ xfs_scrub_agfl( xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp); goto out; } + memset(&sai, 0, sizeof(sai)); sai.sz_entries = agflcount; sai.entries = kmem_zalloc(sizeof(xfs_agblock_t) * agflcount, KM_NOFS); if (!sai.entries) { -- cgit v1.1 From b31c2bdcd83e3374fec5a8e27a2fb4d26e771c52 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 22 Feb 2018 14:41:25 -0800 Subject: xfs: reserve blocks for refcount / rmap log item recovery During log recovery, the per-AG reservations aren't yet set up, so log recovery has to reserve enough blocks to handle all possible btree splits. Reported-by: Dave Chinner Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_refcount_item.c | 9 ++++++--- fs/xfs/xfs_rmap_item.c | 4 +++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 3a55d6f..7a39f40 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -23,6 +23,7 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_shared.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_trans.h" @@ -456,10 +457,12 @@ xfs_cui_recover( * transaction. Normally, any work that needs to be deferred * gets attached to the same defer_ops that scheduled the * refcount update. However, we're in log recovery here, so we - * we create our own defer_ops and use that to finish up any - * work that doesn't fit. + * we use the passed in defer_ops and to finish up any work that + * doesn't fit. We need to reserve enough blocks to handle a + * full btree split on either end of the refcount range. */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, + mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp); if (error) return error; cudp = xfs_trans_get_cud(tp, cuip); diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index f3b139c..49d3124 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -23,6 +23,7 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_shared.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_trans.h" @@ -470,7 +471,8 @@ xfs_rui_recover( } } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, + mp->m_rmap_maxlevels, 0, XFS_TRANS_RESERVE, &tp); if (error) return error; rudp = xfs_trans_get_rud(tp, ruip); -- cgit v1.1 From af1da686843750809738c01e153320106e890804 Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Thu, 22 Feb 2018 19:22:20 +0800 Subject: dma-debug: fix memory leak in debug_dma_alloc_coherent Marty reported a memory leakage introduced by commit 3aaabbf1c39e ("lib/dma-debug.c: fix incorrect pfn calculation"). Fix it by checking the virtual address before allocating the entry. This patch also use virt_addr_valid() instead of virt_to_page() to check if a virtual address is linear. Fixes: 3aaabbf1 ("lib/dma-debug.c: fix incorrect pfn calculation") Reported-by: Marty Faltesek Signed-off-by: Miles Chen Acked-by: Robin Murphy Signed-off-by: Christoph Hellwig --- lib/dma-debug.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 1b34d21..7f5cdc1 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -1491,12 +1491,12 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, if (unlikely(virt == NULL)) return; - entry = dma_entry_alloc(); - if (!entry) + /* handle vmalloc and linear addresses */ + if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) return; - /* handle vmalloc and linear addresses */ - if (!is_vmalloc_addr(virt) && !virt_to_page(virt)) + entry = dma_entry_alloc(); + if (!entry) return; entry->type = dma_debug_coherent; @@ -1528,7 +1528,7 @@ void debug_dma_free_coherent(struct device *dev, size_t size, }; /* handle vmalloc and linear addresses */ - if (!is_vmalloc_addr(virt) && !virt_to_page(virt)) + if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) return; if (is_vmalloc_addr(virt)) -- cgit v1.1 From a493a87f38cfa48caaa95c9347be2d914c6fdf29 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 22 Feb 2018 15:12:53 +0100 Subject: bpf, x64: implement retpoline for tail call Implement a retpoline [0] for the BPF tail call JIT'ing that converts the indirect jump via jmp %rax that is used to make the long jump into another JITed BPF image. Since this is subject to speculative execution, we need to control the transient instruction sequence here as well when CONFIG_RETPOLINE is set, and direct it into a pause + lfence loop. The latter aligns also with what gcc / clang emits (e.g. [1]). JIT dump after patch: # bpftool p d x i 1 0: (18) r2 = map[id:1] 2: (b7) r3 = 0 3: (85) call bpf_tail_call#12 4: (b7) r0 = 2 5: (95) exit With CONFIG_RETPOLINE: # bpftool p d j i 1 [...] 33: cmp %edx,0x24(%rsi) 36: jbe 0x0000000000000072 |* 38: mov 0x24(%rbp),%eax 3e: cmp $0x20,%eax 41: ja 0x0000000000000072 | 43: add $0x1,%eax 46: mov %eax,0x24(%rbp) 4c: mov 0x90(%rsi,%rdx,8),%rax 54: test %rax,%rax 57: je 0x0000000000000072 | 59: mov 0x28(%rax),%rax 5d: add $0x25,%rax 61: callq 0x000000000000006d |+ 66: pause | 68: lfence | 6b: jmp 0x0000000000000066 | 6d: mov %rax,(%rsp) | 71: retq | 72: mov $0x2,%eax [...] * relative fall-through jumps in error case + retpoline for indirect jump Without CONFIG_RETPOLINE: # bpftool p d j i 1 [...] 33: cmp %edx,0x24(%rsi) 36: jbe 0x0000000000000063 |* 38: mov 0x24(%rbp),%eax 3e: cmp $0x20,%eax 41: ja 0x0000000000000063 | 43: add $0x1,%eax 46: mov %eax,0x24(%rbp) 4c: mov 0x90(%rsi,%rdx,8),%rax 54: test %rax,%rax 57: je 0x0000000000000063 | 59: mov 0x28(%rax),%rax 5d: add $0x25,%rax 61: jmpq *%rax |- 63: mov $0x2,%eax [...] * relative fall-through jumps in error case - plain indirect jump as before [0] https://support.google.com/faqs/answer/7625886 [1] https://github.com/gcc-mirror/gcc/commit/a31e654fa107be968b802786d747e962c2fcdb2b Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- arch/x86/include/asm/nospec-branch.h | 37 ++++++++++++++++++++++++++++++++++++ arch/x86/net/bpf_jit_comp.c | 9 +++++---- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 76b0585..81a1be3 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -177,4 +177,41 @@ static inline void indirect_branch_prediction_barrier(void) } #endif /* __ASSEMBLY__ */ + +/* + * Below is used in the eBPF JIT compiler and emits the byte sequence + * for the following assembly: + * + * With retpolines configured: + * + * callq do_rop + * spec_trap: + * pause + * lfence + * jmp spec_trap + * do_rop: + * mov %rax,(%rsp) + * retq + * + * Without retpolines configured: + * + * jmp *%rax + */ +#ifdef CONFIG_RETPOLINE +# define RETPOLINE_RAX_BPF_JIT_SIZE 17 +# define RETPOLINE_RAX_BPF_JIT() \ + EMIT1_off32(0xE8, 7); /* callq do_rop */ \ + /* spec_trap: */ \ + EMIT2(0xF3, 0x90); /* pause */ \ + EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ + EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ + /* do_rop: */ \ + EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ + EMIT1(0xC3); /* retq */ +#else +# define RETPOLINE_RAX_BPF_JIT_SIZE 2 +# define RETPOLINE_RAX_BPF_JIT() \ + EMIT2(0xFF, 0xE0); /* jmp *%rax */ +#endif + #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4923d92..45e4eb5 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -13,6 +13,7 @@ #include #include #include +#include #include /* @@ -290,7 +291,7 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT2(0x89, 0xD2); /* mov edx, edx */ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); -#define OFFSET1 43 /* number of bytes to jump */ +#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; @@ -299,7 +300,7 @@ static void emit_bpf_tail_call(u8 **pprog) */ EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 32 +#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ @@ -313,7 +314,7 @@ static void emit_bpf_tail_call(u8 **pprog) * goto out; */ EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ -#define OFFSET3 10 +#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) EMIT2(X86_JE, OFFSET3); /* je out */ label3 = cnt; @@ -326,7 +327,7 @@ static void emit_bpf_tail_call(u8 **pprog) * rdi == ctx (1st arg) * rax == prog->bpf_func + prologue_size */ - EMIT2(0xFF, 0xE0); /* jmp rax */ + RETPOLINE_RAX_BPF_JIT(); /* out: */ BUILD_BUG_ON(cnt - label1 != OFFSET1); -- cgit v1.1 From 16338a9b3ac30740d49f5dfed81bac0ffa53b9c7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 Feb 2018 01:03:43 +0100 Subject: bpf, arm64: fix out of bounds access in tail call I recently noticed a crash on arm64 when feeding a bogus index into BPF tail call helper. The crash would not occur when the interpreter is used, but only in case of JIT. Output looks as follows: [ 347.007486] Unable to handle kernel paging request at virtual address fffb850e96492510 [...] [ 347.043065] [fffb850e96492510] address between user and kernel address ranges [ 347.050205] Internal error: Oops: 96000004 [#1] SMP [...] [ 347.190829] x13: 0000000000000000 x12: 0000000000000000 [ 347.196128] x11: fffc047ebe782800 x10: ffff808fd7d0fd10 [ 347.201427] x9 : 0000000000000000 x8 : 0000000000000000 [ 347.206726] x7 : 0000000000000000 x6 : 001c991738000000 [ 347.212025] x5 : 0000000000000018 x4 : 000000000000ba5a [ 347.217325] x3 : 00000000000329c4 x2 : ffff808fd7cf0500 [ 347.222625] x1 : ffff808fd7d0fc00 x0 : ffff808fd7cf0500 [ 347.227926] Process test_verifier (pid: 4548, stack limit = 0x000000007467fa61) [ 347.235221] Call trace: [ 347.237656] 0xffff000002f3a4fc [ 347.240784] bpf_test_run+0x78/0xf8 [ 347.244260] bpf_prog_test_run_skb+0x148/0x230 [ 347.248694] SyS_bpf+0x77c/0x1110 [ 347.251999] el0_svc_naked+0x30/0x34 [ 347.255564] Code: 9100075a d280220a 8b0a002a d37df04b (f86b694b) [...] In this case the index used in BPF r3 is the same as in r1 at the time of the call, meaning we fed a pointer as index; here, it had the value 0xffff808fd7cf0500 which sits in x2. While I found tail calls to be working in general (also for hitting the error cases), I noticed the following in the code emission: # bpftool p d j i 988 [...] 38: ldr w10, [x1,x10] 3c: cmp w2, w10 40: b.ge 0x000000000000007c <-- signed cmp 44: mov x10, #0x20 // #32 48: cmp x26, x10 4c: b.gt 0x000000000000007c 50: add x26, x26, #0x1 54: mov x10, #0x110 // #272 58: add x10, x1, x10 5c: lsl x11, x2, #3 60: ldr x11, [x10,x11] <-- faulting insn (f86b694b) 64: cbz x11, 0x000000000000007c [...] Meaning, the tests passed because commit ddb55992b04d ("arm64: bpf: implement bpf_tail_call() helper") was using signed compares instead of unsigned which as a result had the test wrongly passing. Change this but also the tail call count test both into unsigned and cap the index as u32. Latter we did as well in 90caccdd8cc0 ("bpf: fix bpf_tail_call() x64 JIT") and is needed in addition here, too. Tested on HiSilicon Hi1616. Result after patch: # bpftool p d j i 268 [...] 38: ldr w10, [x1,x10] 3c: add w2, w2, #0x0 40: cmp w2, w10 44: b.cs 0x0000000000000080 48: mov x10, #0x20 // #32 4c: cmp x26, x10 50: b.hi 0x0000000000000080 54: add x26, x26, #0x1 58: mov x10, #0x110 // #272 5c: add x10, x1, x10 60: lsl x11, x2, #3 64: ldr x11, [x10,x11] 68: cbz x11, 0x0000000000000080 [...] Fixes: ddb55992b04d ("arm64: bpf: implement bpf_tail_call() helper") Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- arch/arm64/net/bpf_jit_comp.c | 5 +++-- tools/testing/selftests/bpf/test_verifier.c | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 1d4f1da..a933504 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -250,8 +250,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) off = offsetof(struct bpf_array, map.max_entries); emit_a64_mov_i64(tmp, off, ctx); emit(A64_LDR32(tmp, r2, tmp), ctx); + emit(A64_MOV(0, r3, r3), ctx); emit(A64_CMP(0, r3, tmp), ctx); - emit(A64_B_(A64_COND_GE, jmp_offset), ctx); + emit(A64_B_(A64_COND_CS, jmp_offset), ctx); /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; @@ -259,7 +260,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); emit(A64_CMP(1, tcc, tmp), ctx); - emit(A64_B_(A64_COND_GT, jmp_offset), ctx); + emit(A64_B_(A64_COND_HI, jmp_offset), ctx); emit(A64_ADD_I(1, tcc, tcc, 1), ctx); /* prog = array->ptrs[index]; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index c0f16e9..c73592f 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2587,6 +2587,32 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { + "runtime/jit: pass negative index to tail_call", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, -1), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_prog = { 1 }, + .result = ACCEPT, + }, + { + "runtime/jit: pass > 32bit index to tail_call", + .insns = { + BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_prog = { 2 }, + .result = ACCEPT, + }, + { "stack pointer arithmetic", .insns = { BPF_MOV64_IMM(BPF_REG_1, 4), -- cgit v1.1 From 0f9da844d87796ac31b04e81ee95e155e9043132 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 22 Feb 2018 16:59:26 -0800 Subject: MIPS: boot: Define __ASSEMBLY__ for its.S build The MIPS %.its.S compiler command did not define __ASSEMBLY__, which meant when compiler_types.h was added to kconfig.h, unexpected things appeared (e.g. struct declarations) which should not have been present. As done in the general %.S compiler command, __ASSEMBLY__ is now included here too. The failure was: Error: arch/mips/boot/vmlinux.gz.its:201.1-2 syntax error FATAL ERROR: Unable to parse input tree /usr/bin/mkimage: Can't read arch/mips/boot/vmlinux.gz.itb.tmp: Invalid argument /usr/bin/mkimage Can't add hashes to FIT blob Reported-by: kbuild test robot Fixes: 28128c61e08e ("kconfig.h: Include compiler types to avoid missed struct attributes") Signed-off-by: Kees Cook Signed-off-by: Linus Torvalds --- arch/mips/boot/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile index 1bd5c4f..c22da16 100644 --- a/arch/mips/boot/Makefile +++ b/arch/mips/boot/Makefile @@ -126,6 +126,7 @@ $(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS quiet_cmd_cpp_its_S = ITS $@ cmd_cpp_its_S = $(CPP) $(cpp_flags) -P -C -o $@ $< \ + -D__ASSEMBLY__ \ -DKERNEL_NAME="\"Linux $(KERNELRELEASE)\"" \ -DVMLINUX_BINARY="\"$(3)\"" \ -DVMLINUX_COMPRESSION="\"$(2)\"" \ -- cgit v1.1 From 120f3b11ef88fc38ce1d0ff9c9a4b37860ad3140 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 12 Feb 2018 17:26:20 -0800 Subject: integrity/security: fix digsig.c build error with header file security/integrity/digsig.c has build errors on some $ARCH due to a missing header file, so add it. security/integrity/digsig.c:146:2: error: implicit declaration of function 'vfree' [-Werror=implicit-function-declaration] Reported-by: Michael Ellerman Signed-off-by: Randy Dunlap Cc: Mimi Zohar Cc: linux-integrity@vger.kernel.org Link: http://kisskb.ellerman.id.au/kisskb/head/13396/ Signed-off-by: James Morris --- security/integrity/digsig.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c index 6f9e4ce..9bb0a7f 100644 --- a/security/integrity/digsig.c +++ b/security/integrity/digsig.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include -- cgit v1.1 From 2c83029cda55a5e7665c7c6326909427d6a01350 Mon Sep 17 00:00:00 2001 From: Ben Crocker Date: Thu, 22 Feb 2018 17:52:19 -0500 Subject: drm/radeon: insist on 32-bit DMA for Cedar on PPC64/PPC64LE In radeon_device_init, set the need_dma32 flag for Cedar chips (e.g. FirePro 2270). This fixes, or at least works around, a bug on PowerPC exposed by last year's commits 8e3f1b1d8255105f31556aacf8aeb6071b00d469 (Russell Currey) and 253fd51e2f533552ae35a0c661705da6c4842c1b (Alistair Popple) which enabled the 64-bit DMA iommu bypass. This caused the device to freeze, in some cases unrecoverably, and is the subject of several bug reports internal to Red Hat. Signed-off-by: Ben Crocker Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_device.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 8d3e3d2..7828a5e 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1365,6 +1365,10 @@ int radeon_device_init(struct radeon_device *rdev, if ((rdev->flags & RADEON_IS_PCI) && (rdev->family <= CHIP_RS740)) rdev->need_dma32 = true; +#ifdef CONFIG_PPC64 + if (rdev->family == CHIP_CEDAR) + rdev->need_dma32 = true; +#endif dma_bits = rdev->need_dma32 ? 32 : 40; r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(dma_bits)); -- cgit v1.1 From 2f7d03e0511991f124455682cc94094eaa0981ea Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Wed, 21 Feb 2018 16:06:26 +0530 Subject: powerpc/mm/drmem: Fix unexpected flag value in ibm,dynamic-memory-v2 Memory addtion and removal by count and indexed-count methods temporarily mark the LMBs that are being added/removed by a special flag value DRMEM_LMB_RESERVED. Accessing flags value directly at a few places without proper accessor method is causing two unexpected side-effects: - DRMEM_LMB_RESERVED bit is becoming part of the flags word of drconf_cell_v2 entries in ibm,dynamic-memory-v2 DT property. - This results in extra drconf_cell entries in ibm,dynamic-memory-v2. For example if 1G memory is added, it leads to one entry for 3 LMBs and 1 separate entry for the last LMB. All the 4 LMBs should be defined by one entry here. Fix this by always accessing the flags by its accessor method drmem_lmb_flags(). Fixes: 2b31e3aec1db ("powerpc/drmem: Add support for ibm, dynamic-memory-v2 property") Signed-off-by: Bharata B Rao Reviewed-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/mm/drmem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 916844f..3f18036 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -98,7 +98,7 @@ static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell, dr_cell->base_addr = cpu_to_be64(lmb->base_addr); dr_cell->drc_index = cpu_to_be32(lmb->drc_index); dr_cell->aa_index = cpu_to_be32(lmb->aa_index); - dr_cell->flags = cpu_to_be32(lmb->flags); + dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb)); } static int drmem_update_dt_v2(struct device_node *memory, @@ -121,7 +121,7 @@ static int drmem_update_dt_v2(struct device_node *memory, } if (prev_lmb->aa_index != lmb->aa_index || - prev_lmb->flags != lmb->flags) + drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb)) lmb_sets++; prev_lmb = lmb; @@ -150,7 +150,7 @@ static int drmem_update_dt_v2(struct device_node *memory, } if (prev_lmb->aa_index != lmb->aa_index || - prev_lmb->flags != lmb->flags) { + drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb)) { /* end of one set, start of another */ dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs); dr_cell++; -- cgit v1.1 From 582605a429e20ae68fd0b041b2e840af296edd08 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 22 Feb 2018 23:58:49 +1100 Subject: powerpc/pseries: Support firmware disable of RFI flush Some versions of firmware will have a setting that can be configured to disable the RFI flush, add support for it. Fixes: 8989d56878a7 ("powerpc/pseries: Query hypervisor for RFI flush settings") Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/setup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 372d7ad..1a52762 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -482,7 +482,8 @@ static void pseries_setup_rfi_flush(void) if (types == L1D_FLUSH_NONE) types = L1D_FLUSH_FALLBACK; - if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) + if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) || + (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))) enable = false; } else { /* Default to fallback if case hcall is not available */ -- cgit v1.1 From eb0a2d2620ae431c543963c8c7f08f597366fc60 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 23 Feb 2018 00:00:11 +1100 Subject: powerpc/powernv: Support firmware disable of RFI flush Some versions of firmware will have a setting that can be configured to disable the RFI flush, add support for it. Fixes: 6e032b350cd1 ("powerpc/powernv: Check device-tree for RFI flush settings") Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/setup.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 4fb21e1..092715b 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -80,6 +80,10 @@ static void pnv_setup_rfi_flush(void) if (np && of_property_read_bool(np, "disabled")) enable--; + np = of_get_child_by_name(fw_features, "speculation-policy-favor-security"); + if (np && of_property_read_bool(np, "disabled")) + enable = 0; + of_node_put(np); of_node_put(fw_features); } -- cgit v1.1 From e84cf6aa501c58bf4bf451f1e425192ec090aed2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 22 Feb 2018 12:08:06 +0100 Subject: x86/apic/vector: Handle vector release on CPU unplug correctly When a irq vector is replaced, then the previous vector is normally released when the first interrupt happens on the new vector. If the target CPU of the previous vector is already offline when the new vector is installed, then the previous vector is silently discarded, which leads to accounting issues causing suspend failures and other problems. Adjust the logic so that the previous vector is freed in the underlying matrix allocator to ensure that the accounting stays correct. Fixes: 69cde0004a4b ("x86/vector: Use matrix allocator for vector assignment") Reported-by: Yuriy Vostrikov Signed-off-by: Thomas Gleixner Tested-by: Yuriy Vostrikov Cc: Peter Zijlstra Cc: Randy Dunlap Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180222112316.930791749@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/vector.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 3cc471b..bb6f7a2 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -134,21 +134,40 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, { struct apic_chip_data *apicd = apic_chip_data(irqd); struct irq_desc *desc = irq_data_to_desc(irqd); + bool managed = irqd_affinity_is_managed(irqd); lockdep_assert_held(&vector_lock); trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector, apicd->cpu); - /* Setup the vector move, if required */ - if (apicd->vector && cpu_online(apicd->cpu)) { + /* + * If there is no vector associated or if the associated vector is + * the shutdown vector, which is associated to make PCI/MSI + * shutdown mode work, then there is nothing to release. Clear out + * prev_vector for this and the offlined target case. + */ + apicd->prev_vector = 0; + if (!apicd->vector || apicd->vector == MANAGED_IRQ_SHUTDOWN_VECTOR) + goto setnew; + /* + * If the target CPU of the previous vector is online, then mark + * the vector as move in progress and store it for cleanup when the + * first interrupt on the new vector arrives. If the target CPU is + * offline then the regular release mechanism via the cleanup + * vector is not possible and the vector can be immediately freed + * in the underlying matrix allocator. + */ + if (cpu_online(apicd->cpu)) { apicd->move_in_progress = true; apicd->prev_vector = apicd->vector; apicd->prev_cpu = apicd->cpu; } else { - apicd->prev_vector = 0; + irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector, + managed); } +setnew: apicd->vector = newvec; apicd->cpu = newcpu; BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); -- cgit v1.1 From 36e74d355297dde6e69a39c838d24710e442babe Mon Sep 17 00:00:00 2001 From: Wang Hui Date: Thu, 22 Feb 2018 19:26:03 -0800 Subject: x86/intel_rdt: Fix incorrect returned value when creating rdgroup sub-directory in resctrl file system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If no monitoring feature is detected because all monitoring features are disabled during boot time or there is no monitoring feature in hardware, creating rdtgroup sub-directory by "mkdir" command reports error: mkdir: cannot create directory ‘/sys/fs/resctrl/p1’: No such file or directory But the sub-directory actually is generated and content is correct: cpus cpus_list schemata tasks The error is because rdtgroup_mkdir_ctrl_mon() returns non zero value after the sub-directory is created and the returned value is reported as an error to user. Clear the returned value to report to user that the sub-directory is actually created successfully. Signed-off-by: Wang Hui Signed-off-by: Zhang Yanfei Signed-off-by: Fenghua Yu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V Shankar Cc: Thomas Gleixner Cc: Tony Luck Cc: Vikas Cc: Xiaochen Shen Link: http://lkml.kernel.org/r/1519356363-133085-1-git-send-email-fenghua.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index bdab7d2..fca759d 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1804,6 +1804,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, goto out_common_fail; } closid = ret; + ret = 0; rdtgrp->closid = closid; list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); -- cgit v1.1 From ecb586bd29c99fb4de599dec388658e74388daad Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 22 Feb 2018 16:43:17 +0100 Subject: KVM/x86: Remove indirect MSR op calls from SPEC_CTRL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Having a paravirt indirect call in the IBRS restore path is not a good idea, since we are trying to protect from speculative execution of bogus indirect branch targets. It is also slower, so use native_wrmsrl() on the vmentry path too. Signed-off-by: Paolo Bonzini Reviewed-by: Jim Mattson Cc: David Woodhouse Cc: KarimAllah Ahmed Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Thomas Gleixner Cc: kvm@vger.kernel.org Cc: stable@vger.kernel.org Fixes: d28b387fb74da95d69d2615732f50cceb38e9a4d Link: http://lkml.kernel.org/r/20180222154318.20361-2-pbonzini@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kvm/svm.c | 7 ++++--- arch/x86/kvm/vmx.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b3e488a..1598bee 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -5355,7 +5356,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (svm->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5465,10 +5466,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * save it. */ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (svm->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, 0); + native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3dec126..0927be3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include "trace.h" @@ -9452,7 +9453,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (vmx->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); vmx->__launched = vmx->loaded_vmcs->launched; asm( @@ -9588,10 +9589,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * save it. */ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (vmx->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, 0); + native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); -- cgit v1.1 From 946fbbc13dce68902f64515b610eeb2a6c3d7a64 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 22 Feb 2018 16:43:18 +0100 Subject: KVM/VMX: Optimize vmx_vcpu_run() and svm_vcpu_run() by marking the RDMSR path as unlikely() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vmx_vcpu_run() and svm_vcpu_run() are large functions, and giving branch hints to the compiler can actually make a substantial cycle difference by keeping the fast path contiguous in memory. With this optimization, the retpoline-guest/retpoline-host case is about 50 cycles faster. Signed-off-by: Paolo Bonzini Reviewed-by: Jim Mattson Cc: David Woodhouse Cc: KarimAllah Ahmed Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Thomas Gleixner Cc: kvm@vger.kernel.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180222154318.20361-3-pbonzini@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/vmx.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1598bee..24c9521 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5465,7 +5465,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (svm->spec_ctrl) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0927be3..7f8401d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9588,7 +9588,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (vmx->spec_ctrl) -- cgit v1.1 From 79d442461df7478cdd0c50d9b8a76f431f150fa3 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Thu, 22 Feb 2018 10:24:29 +0100 Subject: locking/xchg/alpha: Clean up barrier usage by using smp_mb() in place of __ASM__MB Replace each occurrence of __ASM__MB with a (trailing) smp_mb() in xchg(), cmpxchg(), and remove the now unused __ASM__MB definitions; this improves readability, with no additional synchronization cost. Suggested-by: Will Deacon Signed-off-by: Andrea Parri Acked-by: Paul E. McKenney Cc: Alan Stern Cc: Andrew Morton Cc: Ivan Kokshaysky Cc: Linus Torvalds Cc: Matt Turner Cc: Peter Zijlstra Cc: Richard Henderson Cc: Thomas Gleixner Cc: linux-alpha@vger.kernel.org Link: http://lkml.kernel.org/r/1519291469-5702-1-git-send-email-parri.andrea@gmail.com Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/cmpxchg.h | 6 ------ arch/alpha/include/asm/xchg.h | 16 ++++++++-------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h index 46ebf14a..8a2b331 100644 --- a/arch/alpha/include/asm/cmpxchg.h +++ b/arch/alpha/include/asm/cmpxchg.h @@ -6,7 +6,6 @@ * Atomic exchange routines. */ -#define __ASM__MB #define ____xchg(type, args...) __xchg ## type ## _local(args) #define ____cmpxchg(type, args...) __cmpxchg ## type ## _local(args) #include @@ -33,10 +32,6 @@ cmpxchg_local((ptr), (o), (n)); \ }) -#ifdef CONFIG_SMP -#undef __ASM__MB -#define __ASM__MB "\tmb\n" -#endif #undef ____xchg #undef ____cmpxchg #define ____xchg(type, args...) __xchg ##type(args) @@ -64,7 +59,6 @@ cmpxchg((ptr), (o), (n)); \ }) -#undef __ASM__MB #undef ____cmpxchg #endif /* _ALPHA_CMPXCHG_H */ diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h index e266086..e1facf6 100644 --- a/arch/alpha/include/asm/xchg.h +++ b/arch/alpha/include/asm/xchg.h @@ -28,12 +28,12 @@ ____xchg(_u8, volatile char *m, unsigned long val) " or %1,%2,%2\n" " stq_c %2,0(%3)\n" " beq %2,2f\n" - __ASM__MB ".subsection 2\n" "2: br 1b\n" ".previous" : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) : "r" ((long)m), "1" (val) : "memory"); + smp_mb(); return ret; } @@ -52,12 +52,12 @@ ____xchg(_u16, volatile short *m, unsigned long val) " or %1,%2,%2\n" " stq_c %2,0(%3)\n" " beq %2,2f\n" - __ASM__MB ".subsection 2\n" "2: br 1b\n" ".previous" : "=&r" (ret), "=&r" (val), "=&r" (tmp), "=&r" (addr64) : "r" ((long)m), "1" (val) : "memory"); + smp_mb(); return ret; } @@ -72,12 +72,12 @@ ____xchg(_u32, volatile int *m, unsigned long val) " bis $31,%3,%1\n" " stl_c %1,%2\n" " beq %1,2f\n" - __ASM__MB ".subsection 2\n" "2: br 1b\n" ".previous" : "=&r" (val), "=&r" (dummy), "=m" (*m) : "rI" (val), "m" (*m) : "memory"); + smp_mb(); return val; } @@ -92,12 +92,12 @@ ____xchg(_u64, volatile long *m, unsigned long val) " bis $31,%3,%1\n" " stq_c %1,%2\n" " beq %1,2f\n" - __ASM__MB ".subsection 2\n" "2: br 1b\n" ".previous" : "=&r" (val), "=&r" (dummy), "=m" (*m) : "rI" (val), "m" (*m) : "memory"); + smp_mb(); return val; } @@ -150,12 +150,12 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new) " stq_c %2,0(%4)\n" " beq %2,3f\n" "2:\n" - __ASM__MB ".subsection 2\n" "3: br 1b\n" ".previous" : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); + smp_mb(); return prev; } @@ -177,12 +177,12 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new) " stq_c %2,0(%4)\n" " beq %2,3f\n" "2:\n" - __ASM__MB ".subsection 2\n" "3: br 1b\n" ".previous" : "=&r" (prev), "=&r" (new), "=&r" (tmp), "=&r" (cmp), "=&r" (addr64) : "r" ((long)m), "Ir" (old), "1" (new) : "memory"); + smp_mb(); return prev; } @@ -200,12 +200,12 @@ ____cmpxchg(_u32, volatile int *m, int old, int new) " stl_c %1,%2\n" " beq %1,3f\n" "2:\n" - __ASM__MB ".subsection 2\n" "3: br 1b\n" ".previous" : "=&r"(prev), "=&r"(cmp), "=m"(*m) : "r"((long) old), "r"(new), "m"(*m) : "memory"); + smp_mb(); return prev; } @@ -223,12 +223,12 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new) " stq_c %1,%2\n" " beq %1,3f\n" "2:\n" - __ASM__MB ".subsection 2\n" "3: br 1b\n" ".previous" : "=&r"(prev), "=&r"(cmp), "=m"(*m) : "r"((long) old), "r"(new), "m"(*m) : "memory"); + smp_mb(); return prev; } -- cgit v1.1 From 472e8c55cf6622d1c112dc2bc777f68bbd4189db Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Thu, 22 Feb 2018 10:24:48 +0100 Subject: locking/xchg/alpha: Fix xchg() and cmpxchg() memory ordering bugs Successful RMW operations are supposed to be fully ordered, but Alpha's xchg() and cmpxchg() do not meet this requirement. Will Deacon noticed the bug: > So MP using xchg: > > WRITE_ONCE(x, 1) > xchg(y, 1) > > smp_load_acquire(y) == 1 > READ_ONCE(x) == 0 > > would be allowed. ... which thus violates the above requirement. Fix it by adding a leading smp_mb() to the xchg() and cmpxchg() implementations. Reported-by: Will Deacon Signed-off-by: Andrea Parri Acked-by: Paul E. McKenney Cc: Alan Stern Cc: Andrew Morton Cc: Ivan Kokshaysky Cc: Linus Torvalds Cc: Matt Turner Cc: Peter Zijlstra Cc: Richard Henderson Cc: Thomas Gleixner Cc: linux-alpha@vger.kernel.org Link: http://lkml.kernel.org/r/1519291488-5752-1-git-send-email-parri.andrea@gmail.com Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/xchg.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h index e1facf6..e2b59fa 100644 --- a/arch/alpha/include/asm/xchg.h +++ b/arch/alpha/include/asm/xchg.h @@ -12,6 +12,10 @@ * Atomic exchange. * Since it can be used to implement critical sections * it must clobber "memory" (also for interrupts in UP). + * + * The leading and the trailing memory barriers guarantee that these + * operations are fully ordered. + * */ static inline unsigned long @@ -19,6 +23,7 @@ ____xchg(_u8, volatile char *m, unsigned long val) { unsigned long ret, tmp, addr64; + smp_mb(); __asm__ __volatile__( " andnot %4,7,%3\n" " insbl %1,%4,%1\n" @@ -43,6 +48,7 @@ ____xchg(_u16, volatile short *m, unsigned long val) { unsigned long ret, tmp, addr64; + smp_mb(); __asm__ __volatile__( " andnot %4,7,%3\n" " inswl %1,%4,%1\n" @@ -67,6 +73,7 @@ ____xchg(_u32, volatile int *m, unsigned long val) { unsigned long dummy; + smp_mb(); __asm__ __volatile__( "1: ldl_l %0,%4\n" " bis $31,%3,%1\n" @@ -87,6 +94,7 @@ ____xchg(_u64, volatile long *m, unsigned long val) { unsigned long dummy; + smp_mb(); __asm__ __volatile__( "1: ldq_l %0,%4\n" " bis $31,%3,%1\n" @@ -128,9 +136,12 @@ ____xchg(, volatile void *ptr, unsigned long x, int size) * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. * - * The memory barrier is placed in SMP unconditionally, in order to - * guarantee that dependency ordering is preserved when a dependency - * is headed by an unsuccessful operation. + * The leading and the trailing memory barriers guarantee that these + * operations are fully ordered. + * + * The trailing memory barrier is placed in SMP unconditionally, in + * order to guarantee that dependency ordering is preserved when a + * dependency is headed by an unsuccessful operation. */ static inline unsigned long @@ -138,6 +149,7 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new) { unsigned long prev, tmp, cmp, addr64; + smp_mb(); __asm__ __volatile__( " andnot %5,7,%4\n" " insbl %1,%5,%1\n" @@ -165,6 +177,7 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new) { unsigned long prev, tmp, cmp, addr64; + smp_mb(); __asm__ __volatile__( " andnot %5,7,%4\n" " inswl %1,%5,%1\n" @@ -192,6 +205,7 @@ ____cmpxchg(_u32, volatile int *m, int old, int new) { unsigned long prev, cmp; + smp_mb(); __asm__ __volatile__( "1: ldl_l %0,%5\n" " cmpeq %0,%3,%1\n" @@ -215,6 +229,7 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new) { unsigned long prev, cmp; + smp_mb(); __asm__ __volatile__( "1: ldq_l %0,%5\n" " cmpeq %0,%3,%1\n" -- cgit v1.1 From 0c52f7c5499dc708a64742da0cb7eb4f6d94588b Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Thu, 22 Feb 2018 16:48:12 +0800 Subject: x86/topology: Fix function name in documentation topology_sibling_cpumask() is the correct thread-related topology function in the kernel: s/topology_sibling_mask/topology_sibling_cpumask Signed-off-by: Dou Liyang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: corbet@lwn.net Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/20180222084812.14497-1-douly.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- Documentation/x86/topology.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/x86/topology.txt b/Documentation/x86/topology.txt index f3e9d7e..2953e3e 100644 --- a/Documentation/x86/topology.txt +++ b/Documentation/x86/topology.txt @@ -108,7 +108,7 @@ The topology of a system is described in the units of: The number of online threads is also printed in /proc/cpuinfo "siblings." - - topology_sibling_mask(): + - topology_sibling_cpumask(): The cpumask contains all online threads in the core to which a thread belongs. -- cgit v1.1 From 4596749339e06dc7a424fc08a15eded850ed78b7 Mon Sep 17 00:00:00 2001 From: Samuel Neves Date: Wed, 21 Feb 2018 20:50:36 +0000 Subject: x86/topology: Update the 'cpu cores' field in /proc/cpuinfo correctly across CPU hotplug operations Without this fix, /proc/cpuinfo will display an incorrect amount of CPU cores, after bringing them offline and online again, as exemplified below: $ cat /proc/cpuinfo | grep cores cpu cores : 4 cpu cores : 8 cpu cores : 8 cpu cores : 20 cpu cores : 4 cpu cores : 3 cpu cores : 2 cpu cores : 2 This patch fixes this by always zeroing the booted_cores variable upon turning off a logical CPU. Tested-by: Dou Liyang Signed-off-by: Samuel Neves Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: jgross@suse.com Cc: luto@kernel.org Cc: prarit@redhat.com Cc: vkuznets@redhat.com Link: http://lkml.kernel.org/r/20180221205036.5244-1-sneves@dei.uc.pt Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9eee25d..ff99e2b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1437,6 +1437,7 @@ static void remove_siblinginfo(int cpu) cpumask_clear(topology_sibling_cpumask(cpu)); cpumask_clear(topology_core_cpumask(cpu)); c->cpu_core_id = 0; + c->booted_cores = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); recompute_smt_state(); } -- cgit v1.1 From 1b22b4b28fd5fbc51855219e3238b3ab81da8466 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 22 Feb 2018 17:50:12 +0000 Subject: MIPS: ath25: Check for kzalloc allocation failure Currently there is no null check on a failed allocation of board_data, and hence a null pointer dereference will occurr. Fix this by checking for the out of memory null pointer. Fixes: a7473717483e ("MIPS: ath25: add board configuration detection") Signed-off-by: Colin Ian King Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: # 3.19+ Patchwork: https://patchwork.linux-mips.org/patch/18657/ Signed-off-by: James Hogan --- arch/mips/ath25/board.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/ath25/board.c b/arch/mips/ath25/board.c index 9ab48ff..6d11ae5 100644 --- a/arch/mips/ath25/board.c +++ b/arch/mips/ath25/board.c @@ -135,6 +135,8 @@ int __init ath25_find_config(phys_addr_t base, unsigned long size) } board_data = kzalloc(BOARD_CONFIG_BUFSZ, GFP_KERNEL); + if (!board_data) + goto error; ath25_board.config = (struct ath25_boarddata *)board_data; memcpy_fromio(board_data, bcfg, 0x100); if (broken_boarddata) { -- cgit v1.1 From 902f4d067a50ccf645a58dd5fb1d113b6e0f9b5b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 22 Feb 2018 18:08:53 +0000 Subject: MIPS: OCTEON: irq: Check for null return on kzalloc allocation The allocation of host_data is not null checked, leading to a null pointer dereference if the allocation fails. Fix this by adding a null check and return with -ENOMEM. Fixes: 64b139f97c01 ("MIPS: OCTEON: irq: add CIB and other fixes") Signed-off-by: Colin Ian King Acked-by: David Daney Cc: Ralf Baechle Cc: "Steven J. Hill" Cc: linux-mips@linux-mips.org Cc: # 4.0+ Patchwork: https://patchwork.linux-mips.org/patch/18658/ Signed-off-by: James Hogan --- arch/mips/cavium-octeon/octeon-irq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 5b3a3f6..d99f524 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -2277,6 +2277,8 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node, } host_data = kzalloc(sizeof(*host_data), GFP_KERNEL); + if (!host_data) + return -ENOMEM; raw_spin_lock_init(&host_data->lock); addr = of_get_address(ciu_node, 0, NULL, NULL); -- cgit v1.1 From 80dfd71c5ac28542911bd009f3565895e2c94380 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 23 Feb 2018 10:29:46 +0200 Subject: media: videobuf2: Add VIDEOBUF2_V4L2 Kconfig option for VB2 V4L2 part Videobuf2 is now separate from V4L2 and can be now built without it, at least in principle --- enabling videobuf2 in kernel configuration attempts to compile videobuf2-v4l2.c but that will fail if CONFIG_VIDEO_V4L2 isn't enabled. Solve this by adding a separate Kconfig option for videobuf2-v4l2 and make it a separate module as well. This means that drivers now need to choose both the appropriate videobuf2 memory type (VIDEOBUF2_{VMALLOC,DMA_CONTIG,DMA_SG}) and VIDEOBUF2_V4L2 if they need both. Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/videobuf2/Kconfig | 3 +++ drivers/media/common/videobuf2/Makefile | 3 ++- drivers/media/v4l2-core/Kconfig | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/media/common/videobuf2/Kconfig b/drivers/media/common/videobuf2/Kconfig index 5df0525..17c32ea 100644 --- a/drivers/media/common/videobuf2/Kconfig +++ b/drivers/media/common/videobuf2/Kconfig @@ -3,6 +3,9 @@ config VIDEOBUF2_CORE select DMA_SHARED_BUFFER tristate +config VIDEOBUF2_V4L2 + tristate + config VIDEOBUF2_MEMOPS tristate select FRAME_VECTOR diff --git a/drivers/media/common/videobuf2/Makefile b/drivers/media/common/videobuf2/Makefile index 19de5cc..7e27bdd 100644 --- a/drivers/media/common/videobuf2/Makefile +++ b/drivers/media/common/videobuf2/Makefile @@ -1,5 +1,6 @@ -obj-$(CONFIG_VIDEOBUF2_CORE) += videobuf2-core.o videobuf2-v4l2.o +obj-$(CONFIG_VIDEOBUF2_CORE) += videobuf2-core.o +obj-$(CONFIG_VIDEOBUF2_V4L2) += videobuf2-v4l2.o obj-$(CONFIG_VIDEOBUF2_MEMOPS) += videobuf2-memops.o obj-$(CONFIG_VIDEOBUF2_VMALLOC) += videobuf2-vmalloc.o obj-$(CONFIG_VIDEOBUF2_DMA_CONTIG) += videobuf2-dma-contig.o diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig index bf52fbd..8e37e7c 100644 --- a/drivers/media/v4l2-core/Kconfig +++ b/drivers/media/v4l2-core/Kconfig @@ -7,6 +7,7 @@ config VIDEO_V4L2 tristate depends on (I2C || I2C=n) && VIDEO_DEV select RATIONAL + select VIDEOBUF2_V4L2 if VIDEOBUF2_CORE default (I2C || I2C=n) && VIDEO_DEV config VIDEO_ADV_DEBUG -- cgit v1.1 From e77c31ed60d1bbf4879b19309f848e0d8f6df504 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Feb 2018 04:49:30 -0500 Subject: media: videobuf2: fix build issues with vb2-trace There was a trouble with vb2-trace: instead of being part of VB2 core, it was stored at V4L2 videodev. That was wrong, as it doesn't actually belong to V4L2 core. Now that vb2 is not part of v4l2-core, its trace functions should be moved altogether. So, move it to its rightful place: at videobuf2-core. That fixes those errors: drivers/media/common/videobuf2/videobuf2-core.o: In function `__read_once_size': ./include/linux/compiler.h:183: undefined reference to `__tracepoint_vb2_buf_queue' ./include/linux/compiler.h:183: undefined reference to `__tracepoint_vb2_buf_queue' ./include/linux/compiler.h:183: undefined reference to `__tracepoint_vb2_buf_done' ./include/linux/compiler.h:183: undefined reference to `__tracepoint_vb2_buf_done' ./include/linux/compiler.h:183: undefined reference to `__tracepoint_vb2_qbuf' ./include/linux/compiler.h:183: undefined reference to `__tracepoint_vb2_qbuf' ./include/linux/compiler.h:183: undefined reference to `__tracepoint_vb2_dqbuf' ./include/linux/compiler.h:183: undefined reference to `__tracepoint_vb2_dqbuf' drivers/media/common/videobuf2/videobuf2-core.o:(__jump_table+0x10): undefined reference to `__tracepoint_vb2_buf_queue' drivers/media/common/videobuf2/videobuf2-core.o:(__jump_table+0x28): undefined reference to `__tracepoint_vb2_buf_done' drivers/media/common/videobuf2/videobuf2-core.o:(__jump_table+0x40): undefined reference to `__tracepoint_vb2_qbuf' drivers/media/common/videobuf2/videobuf2-core.o:(__jump_table+0x58): undefined reference to `__tracepoint_vb2_dqbuf' Reported-by: kbuild test robot Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/videobuf2/Makefile | 3 +++ drivers/media/common/videobuf2/vb2-trace.c | 10 ++++++++++ drivers/media/v4l2-core/Makefile | 3 +-- drivers/media/v4l2-core/vb2-trace.c | 10 ---------- 4 files changed, 14 insertions(+), 12 deletions(-) create mode 100644 drivers/media/common/videobuf2/vb2-trace.c delete mode 100644 drivers/media/v4l2-core/vb2-trace.c diff --git a/drivers/media/common/videobuf2/Makefile b/drivers/media/common/videobuf2/Makefile index 7e27bdd..067badb 100644 --- a/drivers/media/common/videobuf2/Makefile +++ b/drivers/media/common/videobuf2/Makefile @@ -6,3 +6,6 @@ obj-$(CONFIG_VIDEOBUF2_VMALLOC) += videobuf2-vmalloc.o obj-$(CONFIG_VIDEOBUF2_DMA_CONTIG) += videobuf2-dma-contig.o obj-$(CONFIG_VIDEOBUF2_DMA_SG) += videobuf2-dma-sg.o obj-$(CONFIG_VIDEOBUF2_DVB) += videobuf2-dvb.o +ifeq ($(CONFIG_TRACEPOINTS),y) + obj-$(CONFIG_VIDEOBUF2_CORE) += vb2-trace.o +endif diff --git a/drivers/media/common/videobuf2/vb2-trace.c b/drivers/media/common/videobuf2/vb2-trace.c new file mode 100644 index 0000000..4c0f39d --- /dev/null +++ b/drivers/media/common/videobuf2/vb2-trace.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#define CREATE_TRACE_POINTS +#include + +EXPORT_TRACEPOINT_SYMBOL_GPL(vb2_buf_done); +EXPORT_TRACEPOINT_SYMBOL_GPL(vb2_buf_queue); +EXPORT_TRACEPOINT_SYMBOL_GPL(vb2_dqbuf); +EXPORT_TRACEPOINT_SYMBOL_GPL(vb2_qbuf); diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile index 80de2cb..7df5458 100644 --- a/drivers/media/v4l2-core/Makefile +++ b/drivers/media/v4l2-core/Makefile @@ -13,7 +13,7 @@ ifeq ($(CONFIG_COMPAT),y) endif obj-$(CONFIG_V4L2_FWNODE) += v4l2-fwnode.o ifeq ($(CONFIG_TRACEPOINTS),y) - videodev-objs += vb2-trace.o v4l2-trace.o + videodev-objs += v4l2-trace.o endif videodev-$(CONFIG_MEDIA_CONTROLLER) += v4l2-mc.o @@ -35,4 +35,3 @@ obj-$(CONFIG_VIDEOBUF_DVB) += videobuf-dvb.o ccflags-y += -I$(srctree)/drivers/media/dvb-frontends ccflags-y += -I$(srctree)/drivers/media/tuners - diff --git a/drivers/media/v4l2-core/vb2-trace.c b/drivers/media/v4l2-core/vb2-trace.c deleted file mode 100644 index 4c0f39d..0000000 --- a/drivers/media/v4l2-core/vb2-trace.c +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include - -#define CREATE_TRACE_POINTS -#include - -EXPORT_TRACEPOINT_SYMBOL_GPL(vb2_buf_done); -EXPORT_TRACEPOINT_SYMBOL_GPL(vb2_buf_queue); -EXPORT_TRACEPOINT_SYMBOL_GPL(vb2_dqbuf); -EXPORT_TRACEPOINT_SYMBOL_GPL(vb2_qbuf); -- cgit v1.1 From ec5b100462543aee1f3e139e168699fd3b05cdc6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Jan 2018 05:31:30 -0500 Subject: media: dvb: fix DVB_MMAP symbol name CONFIG_DVB_MMAP was misspelled either as CONFIG_DVB_MMSP or DVB_MMAP, so it had no effect at all. This fixes that, to make it possible to build it again. Fixes: 4021053ed52d ("media: dvb-core: make DVB mmap API optional") Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-core/Makefile | 2 +- drivers/media/dvb-core/dmxdev.c | 30 +++++++++++++++--------------- include/media/dvb_vb2.h | 2 +- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/media/dvb-core/Makefile b/drivers/media/dvb-core/Makefile index 3a105d8..62b028d 100644 --- a/drivers/media/dvb-core/Makefile +++ b/drivers/media/dvb-core/Makefile @@ -4,7 +4,7 @@ # dvb-net-$(CONFIG_DVB_NET) := dvb_net.o -dvb-vb2-$(CONFIG_DVB_MMSP) := dvb_vb2.o +dvb-vb2-$(CONFIG_DVB_MMAP) := dvb_vb2.o dvb-core-objs := dvbdev.o dmxdev.o dvb_demux.o \ dvb_ca_en50221.o dvb_frontend.o \ diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index 6d53af0..c305410 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -128,7 +128,7 @@ static int dvb_dvr_open(struct inode *inode, struct file *file) struct dvb_device *dvbdev = file->private_data; struct dmxdev *dmxdev = dvbdev->priv; struct dmx_frontend *front; -#ifndef DVB_MMAP +#ifndef CONFIG_DVB_MMAP bool need_ringbuffer = false; #else const bool need_ringbuffer = true; @@ -144,7 +144,7 @@ static int dvb_dvr_open(struct inode *inode, struct file *file) return -ENODEV; } -#ifndef DVB_MMAP +#ifndef CONFIG_DVB_MMAP if ((file->f_flags & O_ACCMODE) == O_RDONLY) need_ringbuffer = true; #else @@ -200,7 +200,7 @@ static int dvb_dvr_release(struct inode *inode, struct file *file) { struct dvb_device *dvbdev = file->private_data; struct dmxdev *dmxdev = dvbdev->priv; -#ifndef DVB_MMAP +#ifndef CONFIG_DVB_MMAP bool need_ringbuffer = false; #else const bool need_ringbuffer = true; @@ -213,7 +213,7 @@ static int dvb_dvr_release(struct inode *inode, struct file *file) dmxdev->demux->connect_frontend(dmxdev->demux, dmxdev->dvr_orig_fe); } -#ifndef DVB_MMAP +#ifndef CONFIG_DVB_MMAP if ((file->f_flags & O_ACCMODE) == O_RDONLY) need_ringbuffer = true; #endif @@ -426,7 +426,7 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len, { struct dmxdev_filter *dmxdevfilter = feed->priv; struct dvb_ringbuffer *buffer; -#ifdef DVB_MMAP +#ifdef CONFIG_DVB_MMAP struct dvb_vb2_ctx *ctx; #endif int ret; @@ -440,12 +440,12 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len, if (dmxdevfilter->params.pes.output == DMX_OUT_TAP || dmxdevfilter->params.pes.output == DMX_OUT_TSDEMUX_TAP) { buffer = &dmxdevfilter->buffer; -#ifdef DVB_MMAP +#ifdef CONFIG_DVB_MMAP ctx = &dmxdevfilter->vb2_ctx; #endif } else { buffer = &dmxdevfilter->dev->dvr_buffer; -#ifdef DVB_MMAP +#ifdef CONFIG_DVB_MMAP ctx = &dmxdevfilter->dev->dvr_vb2_ctx; #endif } @@ -1111,7 +1111,7 @@ static int dvb_demux_do_ioctl(struct file *file, mutex_unlock(&dmxdevfilter->mutex); break; -#ifdef DVB_MMAP +#ifdef CONFIG_DVB_MMAP case DMX_REQBUFS: if (mutex_lock_interruptible(&dmxdevfilter->mutex)) { mutex_unlock(&dmxdev->mutex); @@ -1199,7 +1199,7 @@ static __poll_t dvb_demux_poll(struct file *file, poll_table *wait) return mask; } -#ifdef DVB_MMAP +#ifdef CONFIG_DVB_MMAP static int dvb_demux_mmap(struct file *file, struct vm_area_struct *vma) { struct dmxdev_filter *dmxdevfilter = file->private_data; @@ -1249,7 +1249,7 @@ static const struct file_operations dvb_demux_fops = { .release = dvb_demux_release, .poll = dvb_demux_poll, .llseek = default_llseek, -#ifdef DVB_MMAP +#ifdef CONFIG_DVB_MMAP .mmap = dvb_demux_mmap, #endif }; @@ -1280,7 +1280,7 @@ static int dvb_dvr_do_ioctl(struct file *file, ret = dvb_dvr_set_buffer_size(dmxdev, arg); break; -#ifdef DVB_MMAP +#ifdef CONFIG_DVB_MMAP case DMX_REQBUFS: ret = dvb_vb2_reqbufs(&dmxdev->dvr_vb2_ctx, parg); break; @@ -1322,7 +1322,7 @@ static __poll_t dvb_dvr_poll(struct file *file, poll_table *wait) struct dvb_device *dvbdev = file->private_data; struct dmxdev *dmxdev = dvbdev->priv; __poll_t mask = 0; -#ifndef DVB_MMAP +#ifndef CONFIG_DVB_MMAP bool need_ringbuffer = false; #else const bool need_ringbuffer = true; @@ -1337,7 +1337,7 @@ static __poll_t dvb_dvr_poll(struct file *file, poll_table *wait) poll_wait(file, &dmxdev->dvr_buffer.queue, wait); -#ifndef DVB_MMAP +#ifndef CONFIG_DVB_MMAP if ((file->f_flags & O_ACCMODE) == O_RDONLY) need_ringbuffer = true; #endif @@ -1353,7 +1353,7 @@ static __poll_t dvb_dvr_poll(struct file *file, poll_table *wait) return mask; } -#ifdef DVB_MMAP +#ifdef CONFIG_DVB_MMAP static int dvb_dvr_mmap(struct file *file, struct vm_area_struct *vma) { struct dvb_device *dvbdev = file->private_data; @@ -1381,7 +1381,7 @@ static const struct file_operations dvb_dvr_fops = { .release = dvb_dvr_release, .poll = dvb_dvr_poll, .llseek = default_llseek, -#ifdef DVB_MMAP +#ifdef CONFIG_DVB_MMAP .mmap = dvb_dvr_mmap, #endif }; diff --git a/include/media/dvb_vb2.h b/include/media/dvb_vb2.h index 01d1202..056adc86 100644 --- a/include/media/dvb_vb2.h +++ b/include/media/dvb_vb2.h @@ -103,7 +103,7 @@ struct dvb_vb2_ctx { char name[DVB_VB2_NAME_MAX + 1]; }; -#ifndef DVB_MMAP +#ifndef CONFIG_DVB_MMAP static inline int dvb_vb2_init(struct dvb_vb2_ctx *ctx, const char *name, int non_blocking) { -- cgit v1.1 From 85e60bd746b71fd8aca4f7e6803f9a5207f53a30 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Jan 2018 05:31:31 -0500 Subject: media: dvb: fix DVB_MMAP dependency Enabling CONFIG_DVB_MMAP without CONFIG_VIDEOBUF2_VMALLOC results in a link error: drivers/media/dvb-core/dvb_vb2.o: In function `_stop_streaming': dvb_vb2.c:(.text+0x894): undefined reference to `vb2_buffer_done' drivers/media/dvb-core/dvb_vb2.o: In function `dvb_vb2_init': dvb_vb2.c:(.text+0xbec): undefined reference to `vb2_vmalloc_memops' dvb_vb2.c:(.text+0xc4c): undefined reference to `vb2_core_queue_init' drivers/media/dvb-core/dvb_vb2.o: In function `dvb_vb2_release': dvb_vb2.c:(.text+0xe14): undefined reference to `vb2_core_queue_release' drivers/media/dvb-core/dvb_vb2.o: In function `dvb_vb2_stream_on': dvb_vb2.c:(.text+0xeb8): undefined reference to `vb2_core_streamon' drivers/media/dvb-core/dvb_vb2.o: In function `dvb_vb2_stream_off': dvb_vb2.c:(.text+0xfe8): undefined reference to `vb2_core_streamoff' drivers/media/dvb-core/dvb_vb2.o: In function `dvb_vb2_fill_buffer': dvb_vb2.c:(.text+0x13ec): undefined reference to `vb2_plane_vaddr' dvb_vb2.c:(.text+0x149c): undefined reference to `vb2_buffer_done' This adds a 'select' statement for it, plus a dependency that ensures that videobuf2 in turn works, as it in turn depends on VIDEO_V4L2 to link, and that must not be a module if videobuf2 is built-in. Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab --- drivers/media/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig index 145e12b..372c074 100644 --- a/drivers/media/Kconfig +++ b/drivers/media/Kconfig @@ -147,6 +147,8 @@ config DVB_CORE config DVB_MMAP bool "Enable DVB memory-mapped API (EXPERIMENTAL)" depends on DVB_CORE + depends on VIDEO_V4L2=y || VIDEO_V4L2=DVB_CORE + select VIDEOBUF2_VMALLOC default n help This option enables DVB experimental memory-mapped API, with -- cgit v1.1 From 0066c764e7cc18784e5edcdeb9831cdefdf4c344 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Jan 2018 05:31:32 -0500 Subject: media: au0828: add VIDEO_V4L2 dependency After the move of videobuf2 into the common directory, selecting the au0828 driver with CONFIG_V4L2 disabled started causing a link failure, as we now attempt to build videobuf2 but it still requires v4l2: ERROR: "v4l2_event_pending" [drivers/media/common/videobuf/videobuf2-v4l2.ko] undefined! ERROR: "v4l2_fh_release" [drivers/media/common/videobuf/videobuf2-v4l2.ko] undefined! ERROR: "video_devdata" [drivers/media/common/videobuf/videobuf2-v4l2.ko] undefined! ERROR: "__tracepoint_vb2_buf_done" [drivers/media/common/videobuf/videobuf2-core.ko] undefined! ERROR: "__tracepoint_vb2_dqbuf" [drivers/media/common/videobuf/videobuf2-core.ko] undefined! ERROR: "v4l_vb2q_enable_media_source" [drivers/media/common/videobuf/videobuf2-core.ko] undefined! This adds the same dependency in au0828 that the other users of videobuf2 have. Fixes: 03fbdb2fc2b8 ("media: move videobuf2 to drivers/media/common") Fixes: 05439b1a3693 ("[media] media: au0828 - convert to use videobuf2") Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/au0828/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/au0828/Kconfig b/drivers/media/usb/au0828/Kconfig index 70521e0..bfaa806 100644 --- a/drivers/media/usb/au0828/Kconfig +++ b/drivers/media/usb/au0828/Kconfig @@ -1,7 +1,7 @@ config VIDEO_AU0828 tristate "Auvitek AU0828 support" - depends on I2C && INPUT && DVB_CORE && USB + depends on I2C && INPUT && DVB_CORE && USB && VIDEO_V4L2 select I2C_ALGOBIT select VIDEO_TVEEPROM select VIDEOBUF2_VMALLOC -- cgit v1.1 From b9c97c67fd19262c002d94ced2bfb513083e161e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 10 Feb 2018 06:14:10 -0500 Subject: media: m88ds3103: don't call a non-initalized function If m88d3103 chip ID is not recognized, the device is not initialized. However, it returns from probe without any error, causing this OOPS: [ 7.689289] Unable to handle kernel NULL pointer dereference at virtual address 00000000 [ 7.689297] pgd = 7b0bd7a7 [ 7.689302] [00000000] *pgd=00000000 [ 7.689318] Internal error: Oops: 80000005 [#1] SMP ARM [ 7.689322] Modules linked in: dvb_usb_dvbsky(+) m88ds3103 dvb_usb_v2 dvb_core videobuf2_vmalloc videobuf2_memops videobuf2_core crc32_arm_ce videodev media [ 7.689358] CPU: 3 PID: 197 Comm: systemd-udevd Not tainted 4.15.0-mcc+ #23 [ 7.689361] Hardware name: BCM2835 [ 7.689367] PC is at 0x0 [ 7.689382] LR is at m88ds3103_attach+0x194/0x1d0 [m88ds3103] [ 7.689386] pc : [<00000000>] lr : [] psr: 60000013 [ 7.689391] sp : ed8e5c20 ip : ed8c1e00 fp : ed8945c0 [ 7.689395] r10: ed894000 r9 : ed894378 r8 : eda736c0 [ 7.689400] r7 : ed894070 r6 : ed8e5c44 r5 : bf0bb040 r4 : eda77600 [ 7.689405] r3 : 00000000 r2 : 00000000 r1 : 00000000 r0 : eda77600 [ 7.689412] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [ 7.689417] Control: 10c5383d Table: 2d8e806a DAC: 00000051 [ 7.689423] Process systemd-udevd (pid: 197, stack limit = 0xe9dbfb63) [ 7.689428] Stack: (0xed8e5c20 to 0xed8e6000) [ 7.689439] 5c20: ed853a80 eda73640 ed894000 ed8942c0 ed853a80 bf0b9e98 ed894070 bf0b9f10 [ 7.689449] 5c40: 00000000 00000000 bf08c17c c08dfc50 00000000 00000000 00000000 00000000 [ 7.689459] 5c60: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 7.689468] 5c80: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 7.689479] 5ca0: 00000000 00000000 ed8945c0 ed8942c0 ed894000 ed894830 bf0b9e98 00000000 [ 7.689490] 5cc0: ed894378 bf0a3cb4 bf0bc3b0 0000533b ed920540 00000000 00000034 bf0a6434 [ 7.689500] 5ce0: ee952070 ed826600 bf0a7038 bf0a2dd8 00000001 bf0a6768 bf0a2f90 ed8943c0 [ 7.689511] 5d00: 00000000 c08eca68 ed826620 ed826620 00000000 ee952070 bf0bc034 ee952000 [ 7.689521] 5d20: ed826600 bf0bb080 ffffffed c0aa9e9c c0aa9dac ed826620 c16edf6c c168c2c8 [ 7.689531] 5d40: c16edf70 00000000 bf0bc034 0000000d 00000000 c08e268c bf0bb080 ed826600 [ 7.689541] 5d60: bf0bc034 ed826654 ed826620 bf0bc034 c164c8bc 00000000 00000001 00000000 [ 7.689553] 5d80: 00000028 c08e2948 00000000 bf0bc034 c08e2848 c08e0778 ee9f0a58 ed88bab4 [ 7.689563] 5da0: bf0bc034 ed90ba80 c168c1f0 c08e1934 bf0bb3bc c17045ac bf0bc034 c164c8bc [ 7.689574] 5dc0: bf0bc034 bf0bb3bc ed91f564 c08e34ec bf0bc000 c164c8bc bf0bc034 c0aa8dc4 [ 7.689584] 5de0: ffffe000 00000000 bf0bf000 ed91f600 ed91f564 c03021e4 00000001 00000000 [ 7.689595] 5e00: c166e040 8040003f ed853a80 bf0bc448 00000000 c1678174 ed853a80 f0f22000 [ 7.689605] 5e20: f0f21fff 8040003f 014000c0 ed91e700 ed91e700 c16d8e68 00000001 ed91e6c0 [ 7.689615] 5e40: bf0bc400 00000001 bf0bc400 ed91f564 00000001 00000000 00000028 c03c9a24 [ 7.689625] 5e60: 00000001 c03c8c94 ed8e5f50 ed8e5f50 00000001 bf0bc400 ed91f540 c03c8cb0 [ 7.689637] 5e80: bf0bc40c 00007fff bf0bc400 c03c60b0 00000000 bf0bc448 00000028 c0e09684 [ 7.689647] 5ea0: 00000002 bf0bc530 c1234bf8 bf0bc5dc bf0bc514 c10ebbe8 ffffe000 bf000000 [ 7.689657] 5ec0: 00011538 00000000 ed8e5f48 00000000 00000000 00000000 00000000 00000000 [ 7.689666] 5ee0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 7.689676] 5f00: 00000000 00000000 7fffffff 00000000 00000013 b6e55a18 0000017b c0309104 [ 7.689686] 5f20: ed8e4000 00000000 00510af0 c03c9430 7fffffff 00000000 00000003 00000000 [ 7.689697] 5f40: 00000000 f0f0f000 00011538 00000000 f0f107b0 f0f0f000 00011538 f0f1fdb8 [ 7.689707] 5f60: f0f1fbe8 f0f1b974 00004000 000041e0 bf0bc3d0 00000001 00000000 000024c4 [ 7.689717] 5f80: 0000002d 0000002e 00000019 00000000 00000010 00000000 16894000 00000000 [ 7.689727] 5fa0: 00000000 c0308f20 16894000 00000000 00000013 b6e55a18 00000000 b6e5652c [ 7.689737] 5fc0: 16894000 00000000 00000000 0000017b 00020000 00508110 00000000 00510af0 [ 7.689748] 5fe0: bef68948 bef68938 b6e4d3d0 b6d32590 60000010 00000013 00000000 00000000 [ 7.689790] [] (m88ds3103_attach [m88ds3103]) from [] (dvbsky_s960c_attach+0x78/0x280 [dvb_usb_dvbsky]) [ 7.689821] [] (dvbsky_s960c_attach [dvb_usb_dvbsky]) from [] (dvb_usbv2_probe+0xa3c/0x1024 [dvb_usb_v2]) [ 7.689849] [] (dvb_usbv2_probe [dvb_usb_v2]) from [] (usb_probe_interface+0xf0/0x2a8) [ 7.689869] [] (usb_probe_interface) from [] (driver_probe_device+0x2f8/0x4b4) [ 7.689881] [] (driver_probe_device) from [] (__driver_attach+0x100/0x11c) [ 7.689895] [] (__driver_attach) from [] (bus_for_each_dev+0x4c/0x9c) [ 7.689909] [] (bus_for_each_dev) from [] (bus_add_driver+0x1c0/0x264) [ 7.689919] [] (bus_add_driver) from [] (driver_register+0x78/0xf4) [ 7.689931] [] (driver_register) from [] (usb_register_driver+0x70/0x134) [ 7.689946] [] (usb_register_driver) from [] (do_one_initcall+0x44/0x168) [ 7.689963] [] (do_one_initcall) from [] (do_init_module+0x64/0x1f4) [ 7.689979] [] (do_init_module) from [] (load_module+0x20a0/0x25c8) [ 7.689993] [] (load_module) from [] (SyS_finit_module+0xb4/0xec) [ 7.690007] [] (SyS_finit_module) from [] (ret_fast_syscall+0x0/0x54) [ 7.690018] Code: bad PC value This may happen on normal circumstances, if, for some reason, the demod hangs and start returning an invalid chip ID: [ 10.394395] m88ds3103 3-0068: Unknown device. Chip_id=00 So, change the logic to cause probe to fail with -ENODEV, preventing the OOPS. Detected while testing DVB MMAP patches on Raspberry Pi 3 with DVBSky S960CI. Cc: stable@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-frontends/m88ds3103.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/media/dvb-frontends/m88ds3103.c b/drivers/media/dvb-frontends/m88ds3103.c index 50bce68..65d157f 100644 --- a/drivers/media/dvb-frontends/m88ds3103.c +++ b/drivers/media/dvb-frontends/m88ds3103.c @@ -1262,11 +1262,12 @@ static int m88ds3103_select(struct i2c_mux_core *muxc, u32 chan) * New users must use I2C client binding directly! */ struct dvb_frontend *m88ds3103_attach(const struct m88ds3103_config *cfg, - struct i2c_adapter *i2c, struct i2c_adapter **tuner_i2c_adapter) + struct i2c_adapter *i2c, + struct i2c_adapter **tuner_i2c_adapter) { struct i2c_client *client; struct i2c_board_info board_info; - struct m88ds3103_platform_data pdata; + struct m88ds3103_platform_data pdata = {}; pdata.clk = cfg->clock; pdata.i2c_wr_max = cfg->i2c_wr_max; @@ -1409,6 +1410,8 @@ static int m88ds3103_probe(struct i2c_client *client, case M88DS3103_CHIP_ID: break; default: + ret = -ENODEV; + dev_err(&client->dev, "Unknown device. Chip_id=%02x\n", dev->chip_id); goto err_kfree; } -- cgit v1.1 From a145f64c6107d3aa5a7cec9f8977d04ac2a896c9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 11 Feb 2018 05:44:21 -0500 Subject: media: dmxdev: fix error code for invalid ioctls Returning -EINVAL when an ioctl is not implemented is a very bad idea, as it is hard to distinguish from other error contitions that an ioctl could lead. Replace it by its right error code: -ENOTTY. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-core/dmxdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index c305410..d87b69b 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -1160,7 +1160,7 @@ static int dvb_demux_do_ioctl(struct file *file, break; #endif default: - ret = -EINVAL; + ret = -ENOTTY; break; } mutex_unlock(&dmxdev->mutex); -- cgit v1.1 From 0b23498aacc658e4d0f6b240f0b905908695a132 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 9 Feb 2018 10:44:49 -0500 Subject: media: dmxdev: Fix the logic that enables DMA mmap support Some conditions required for DVB mmap support to work are reversed. Also, the logic is not too clear. So, improve the logic, making it easier to be handled. PS.: I'm pretty sure that I fixed it while testing, but, somehow, the change got lost. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-core/dmxdev.c | 75 +++++++++++++++++++++++------------------ include/media/dmxdev.h | 2 ++ 2 files changed, 44 insertions(+), 33 deletions(-) diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index d87b69b..09c2626 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -128,11 +128,7 @@ static int dvb_dvr_open(struct inode *inode, struct file *file) struct dvb_device *dvbdev = file->private_data; struct dmxdev *dmxdev = dvbdev->priv; struct dmx_frontend *front; -#ifndef CONFIG_DVB_MMAP bool need_ringbuffer = false; -#else - const bool need_ringbuffer = true; -#endif dprintk("%s\n", __func__); @@ -144,17 +140,31 @@ static int dvb_dvr_open(struct inode *inode, struct file *file) return -ENODEV; } -#ifndef CONFIG_DVB_MMAP + dmxdev->may_do_mmap = 0; + + /* + * The logic here is a little tricky due to the ifdef. + * + * The ringbuffer is used for both read and mmap. + * + * It is not needed, however, on two situations: + * - Write devices (access with O_WRONLY); + * - For duplex device nodes, opened with O_RDWR. + */ + if ((file->f_flags & O_ACCMODE) == O_RDONLY) need_ringbuffer = true; -#else - if ((file->f_flags & O_ACCMODE) == O_RDWR) { + else if ((file->f_flags & O_ACCMODE) == O_RDWR) { if (!(dmxdev->capabilities & DMXDEV_CAP_DUPLEX)) { +#ifdef CONFIG_DVB_MMAP + dmxdev->may_do_mmap = 1; + need_ringbuffer = true; +#else mutex_unlock(&dmxdev->mutex); return -EOPNOTSUPP; +#endif } } -#endif if (need_ringbuffer) { void *mem; @@ -169,8 +179,9 @@ static int dvb_dvr_open(struct inode *inode, struct file *file) return -ENOMEM; } dvb_ringbuffer_init(&dmxdev->dvr_buffer, mem, DVR_BUFFER_SIZE); - dvb_vb2_init(&dmxdev->dvr_vb2_ctx, "dvr", - file->f_flags & O_NONBLOCK); + if (dmxdev->may_do_mmap) + dvb_vb2_init(&dmxdev->dvr_vb2_ctx, "dvr", + file->f_flags & O_NONBLOCK); dvbdev->readers--; } @@ -200,11 +211,6 @@ static int dvb_dvr_release(struct inode *inode, struct file *file) { struct dvb_device *dvbdev = file->private_data; struct dmxdev *dmxdev = dvbdev->priv; -#ifndef CONFIG_DVB_MMAP - bool need_ringbuffer = false; -#else - const bool need_ringbuffer = true; -#endif mutex_lock(&dmxdev->mutex); @@ -213,15 +219,14 @@ static int dvb_dvr_release(struct inode *inode, struct file *file) dmxdev->demux->connect_frontend(dmxdev->demux, dmxdev->dvr_orig_fe); } -#ifndef CONFIG_DVB_MMAP - if ((file->f_flags & O_ACCMODE) == O_RDONLY) - need_ringbuffer = true; -#endif - if (need_ringbuffer) { - if (dvb_vb2_is_streaming(&dmxdev->dvr_vb2_ctx)) - dvb_vb2_stream_off(&dmxdev->dvr_vb2_ctx); - dvb_vb2_release(&dmxdev->dvr_vb2_ctx); + if (((file->f_flags & O_ACCMODE) == O_RDONLY) || + dmxdev->may_do_mmap) { + if (dmxdev->may_do_mmap) { + if (dvb_vb2_is_streaming(&dmxdev->dvr_vb2_ctx)) + dvb_vb2_stream_off(&dmxdev->dvr_vb2_ctx); + dvb_vb2_release(&dmxdev->dvr_vb2_ctx); + } dvbdev->readers++; if (dmxdev->dvr_buffer.data) { void *mem = dmxdev->dvr_buffer.data; @@ -802,6 +807,12 @@ static int dvb_demux_open(struct inode *inode, struct file *file) mutex_init(&dmxdevfilter->mutex); file->private_data = dmxdevfilter; +#ifdef CONFIG_DVB_MMAP + dmxdev->may_do_mmap = 1; +#else + dmxdev->may_do_mmap = 0; +#endif + dvb_ringbuffer_init(&dmxdevfilter->buffer, NULL, 8192); dvb_vb2_init(&dmxdevfilter->vb2_ctx, "demux_filter", file->f_flags & O_NONBLOCK); @@ -1206,6 +1217,9 @@ static int dvb_demux_mmap(struct file *file, struct vm_area_struct *vma) struct dmxdev *dmxdev = dmxdevfilter->dev; int ret; + if (!dmxdev->may_do_mmap) + return -EOPNOTSUPP; + if (mutex_lock_interruptible(&dmxdev->mutex)) return -ERESTARTSYS; @@ -1322,11 +1336,6 @@ static __poll_t dvb_dvr_poll(struct file *file, poll_table *wait) struct dvb_device *dvbdev = file->private_data; struct dmxdev *dmxdev = dvbdev->priv; __poll_t mask = 0; -#ifndef CONFIG_DVB_MMAP - bool need_ringbuffer = false; -#else - const bool need_ringbuffer = true; -#endif dprintk("%s\n", __func__); @@ -1337,11 +1346,8 @@ static __poll_t dvb_dvr_poll(struct file *file, poll_table *wait) poll_wait(file, &dmxdev->dvr_buffer.queue, wait); -#ifndef CONFIG_DVB_MMAP - if ((file->f_flags & O_ACCMODE) == O_RDONLY) - need_ringbuffer = true; -#endif - if (need_ringbuffer) { + if (((file->f_flags & O_ACCMODE) == O_RDONLY) || + dmxdev->may_do_mmap) { if (dmxdev->dvr_buffer.error) mask |= (EPOLLIN | EPOLLRDNORM | EPOLLPRI | EPOLLERR); @@ -1360,6 +1366,9 @@ static int dvb_dvr_mmap(struct file *file, struct vm_area_struct *vma) struct dmxdev *dmxdev = dvbdev->priv; int ret; + if (!dmxdev->may_do_mmap) + return -EOPNOTSUPP; + if (dmxdev->exit) return -ENODEV; diff --git a/include/media/dmxdev.h b/include/media/dmxdev.h index 2f5cb2c..baafa3b8 100644 --- a/include/media/dmxdev.h +++ b/include/media/dmxdev.h @@ -163,6 +163,7 @@ struct dmxdev_filter { * @demux: pointer to &struct dmx_demux. * @filternum: number of filters. * @capabilities: demux capabilities as defined by &enum dmx_demux_caps. + * @may_do_mmap: flag used to indicate if the device may do mmap. * @exit: flag to indicate that the demux is being released. * @dvr_orig_fe: pointer to &struct dmx_frontend. * @dvr_buffer: embedded &struct dvb_ringbuffer for DVB output. @@ -180,6 +181,7 @@ struct dmxdev { int filternum; int capabilities; + unsigned int may_do_mmap:1; unsigned int exit:1; #define DMXDEV_CAP_DUPLEX 1 struct dmx_frontend *dvr_orig_fe; -- cgit v1.1 From 9c171cdf22d1486da1608abd7612fabe2a8262ca Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 9 Feb 2018 05:51:19 -0500 Subject: media: dvb: add continuity error indicators for memory mapped buffers While userspace can detect discontinuity errors, it is useful to also let Kernelspace reporting discontinuity, as it can help to identify if the data loss happened either at Kernel or userspace side. Update documentation accordingly. Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/dmx.h.rst.exceptions | 14 +++++++++---- Documentation/media/uapi/dvb/dmx-qbuf.rst | 7 ++++--- include/uapi/linux/dvb/dmx.h | 35 +++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/Documentation/media/dmx.h.rst.exceptions b/Documentation/media/dmx.h.rst.exceptions index 63f55a9..a8c4239 100644 --- a/Documentation/media/dmx.h.rst.exceptions +++ b/Documentation/media/dmx.h.rst.exceptions @@ -50,9 +50,15 @@ replace typedef dmx_filter_t :c:type:`dmx_filter` replace typedef dmx_pes_type_t :c:type:`dmx_pes_type` replace typedef dmx_input_t :c:type:`dmx_input` -ignore symbol DMX_OUT_DECODER -ignore symbol DMX_OUT_TAP -ignore symbol DMX_OUT_TS_TAP -ignore symbol DMX_OUT_TSDEMUX_TAP +replace symbol DMX_BUFFER_FLAG_HAD_CRC32_DISCARD :c:type:`dmx_buffer_flags` +replace symbol DMX_BUFFER_FLAG_TEI :c:type:`dmx_buffer_flags` +replace symbol DMX_BUFFER_PKT_COUNTER_MISMATCH :c:type:`dmx_buffer_flags` +replace symbol DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED :c:type:`dmx_buffer_flags` +replace symbol DMX_BUFFER_FLAG_DISCONTINUITY_INDICATOR :c:type:`dmx_buffer_flags` + +replace symbol DMX_OUT_DECODER :c:type:`dmx_output` +replace symbol DMX_OUT_TAP :c:type:`dmx_output` +replace symbol DMX_OUT_TS_TAP :c:type:`dmx_output` +replace symbol DMX_OUT_TSDEMUX_TAP :c:type:`dmx_output` replace ioctl DMX_DQBUF dmx_qbuf diff --git a/Documentation/media/uapi/dvb/dmx-qbuf.rst b/Documentation/media/uapi/dvb/dmx-qbuf.rst index b48c493..be5a4c6 100644 --- a/Documentation/media/uapi/dvb/dmx-qbuf.rst +++ b/Documentation/media/uapi/dvb/dmx-qbuf.rst @@ -51,9 +51,10 @@ out to disk. Buffers remain locked until dequeued, until the the device is closed. Applications call the ``DMX_DQBUF`` ioctl to dequeue a filled -(capturing) buffer from the driver's outgoing queue. They just set the ``reserved`` field array to zero. When ``DMX_DQBUF`` is called with a -pointer to this structure, the driver fills the remaining fields or -returns an error code. +(capturing) buffer from the driver's outgoing queue. +They just set the ``index`` field withe the buffer ID to be queued. +When ``DMX_DQBUF`` is called with a pointer to struct :c:type:`dmx_buffer`, +the driver fills the remaining fields or returns an error code. By default ``DMX_DQBUF`` blocks when no buffer is in the outgoing queue. When the ``O_NONBLOCK`` flag was given to the diff --git a/include/uapi/linux/dvb/dmx.h b/include/uapi/linux/dvb/dmx.h index 5f3c5a9..b4112f0 100644 --- a/include/uapi/linux/dvb/dmx.h +++ b/include/uapi/linux/dvb/dmx.h @@ -212,6 +212,32 @@ struct dmx_stc { }; /** + * enum dmx_buffer_flags - DMX memory-mapped buffer flags + * + * @DMX_BUFFER_FLAG_HAD_CRC32_DISCARD: + * Indicates that the Kernel discarded one or more frames due to wrong + * CRC32 checksum. + * @DMX_BUFFER_FLAG_TEI: + * Indicates that the Kernel has detected a Transport Error indicator + * (TEI) on a filtered pid. + * @DMX_BUFFER_PKT_COUNTER_MISMATCH: + * Indicates that the Kernel has detected a packet counter mismatch + * on a filtered pid. + * @DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED: + * Indicates that the Kernel has detected one or more frame discontinuity. + * @DMX_BUFFER_FLAG_DISCONTINUITY_INDICATOR: + * Received at least one packet with a frame discontinuity indicator. + */ + +enum dmx_buffer_flags { + DMX_BUFFER_FLAG_HAD_CRC32_DISCARD = 1 << 0, + DMX_BUFFER_FLAG_TEI = 1 << 1, + DMX_BUFFER_PKT_COUNTER_MISMATCH = 1 << 2, + DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED = 1 << 3, + DMX_BUFFER_FLAG_DISCONTINUITY_INDICATOR = 1 << 4, +}; + +/** * struct dmx_buffer - dmx buffer info * * @index: id number of the buffer @@ -220,15 +246,24 @@ struct dmx_stc { * offset from the start of the device memory for this plane, * (or a "cookie" that should be passed to mmap() as offset) * @length: size in bytes of the buffer + * @flags: bit array of buffer flags as defined by &enum dmx_buffer_flags. + * Filled only at &DMX_DQBUF. + * @count: monotonic counter for filled buffers. Helps to identify + * data stream loses. Filled only at &DMX_DQBUF. * * Contains data exchanged by application and driver using one of the streaming * I/O methods. + * + * Please notice that, for &DMX_QBUF, only @index should be filled. + * On &DMX_DQBUF calls, all fields will be filled by the Kernel. */ struct dmx_buffer { __u32 index; __u32 bytesused; __u32 offset; __u32 length; + __u32 flags; + __u32 count; }; /** -- cgit v1.1 From bf8486709ac7fad99e4040dea73fe466c57a4ae1 Mon Sep 17 00:00:00 2001 From: Anna Karbownik Date: Thu, 22 Feb 2018 16:18:13 +0100 Subject: EDAC, sb_edac: Fix out of bound writes during DIMM configuration on KNL Commit 3286d3eb906c ("EDAC, sb_edac: Drop NUM_CHANNELS from 8 back to 4") decreased NUM_CHANNELS from 8 to 4, but this is not enough for Knights Landing which supports up to 6 channels. This caused out-of-bounds writes to pvt->mirror_mode and pvt->tolm variables which don't pay critical role on KNL code path, so the memory corruption wasn't causing any visible driver failures. The easiest way of fixing it is to change NUM_CHANNELS to 6. Do that. An alternative solution would be to restructure the KNL part of the driver to 2MC/3channel representation. Reported-by: Dan Carpenter Signed-off-by: Anna Karbownik Cc: Mauro Carvalho Chehab Cc: Tony Luck Cc: jim.m.snow@intel.com Cc: krzysztof.paliswiat@intel.com Cc: lukasz.odzioba@intel.com Cc: qiuxu.zhuo@intel.com Cc: linux-edac Cc: Fixes: 3286d3eb906c ("EDAC, sb_edac: Drop NUM_CHANNELS from 8 back to 4") Link: http://lkml.kernel.org/r/1519312693-4789-1-git-send-email-anna.karbownik@intel.com [ Massage commit message. ] Signed-off-by: Borislav Petkov --- drivers/edac/sb_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index f34430f..8721002 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -279,7 +279,7 @@ static const u32 correrrthrsld[] = { * sbridge structs */ -#define NUM_CHANNELS 4 /* Max channels per MC */ +#define NUM_CHANNELS 6 /* Max channels per MC */ #define MAX_DIMMS 3 /* Max DIMMS per channel */ #define KNL_MAX_CHAS 38 /* KNL max num. of Cache Home Agents */ #define KNL_MAX_CHANNELS 6 /* KNL max num. of PCI channels */ -- cgit v1.1 From 36148c2bbfbe50c50206b6f61d072203c80161e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 2 Feb 2018 16:11:05 +0100 Subject: mac80211: Adjust TSQ pacing shift MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we now have the convenient helper to do so, actually adjust the TSQ pacing shift for packets going out over a WiFi interface. This significantly improves throughput for locally-originated TCP connections. The default pacing shift of 10 corresponds to ~1ms of queued packet data. Adjusting this to a shift of 8 (i.e. ~4ms) improves 1-hop throughput for ath9k by a factor of 3, whereas increasing it more has diminishing returns. Achieved throughput for different values of sk_pacing_shift (average of 5 iterations of 10-sec netperf runs to a host on the other side of the WiFi hop): sk_pacing_shift 10: 43.21 Mbps (pre-patch) sk_pacing_shift 9: 78.17 Mbps sk_pacing_shift 8: 123.94 Mbps sk_pacing_shift 7: 128.31 Mbps Latency for competing flows increases from ~3 ms to ~10 ms with this change. This is about the same magnitude of queueing latency induced by flows that are not originated on the WiFi device itself (and so are not limited by TSQ). Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 25904af..6972250 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3574,6 +3574,14 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, if (!IS_ERR_OR_NULL(sta)) { struct ieee80211_fast_tx *fast_tx; + /* We need a bit of data queued to build aggregates properly, so + * instruct the TCP stack to allow more than a single ms of data + * to be queued in the stack. The value is a bit-shift of 1 + * second, so 8 is ~4ms of queued data. Only affects local TCP + * sockets. + */ + sk_pacing_shift_update(skb->sk, 8); + fast_tx = rcu_dereference(sta->fast_tx); if (fast_tx && -- cgit v1.1 From b323ac19b7734a1c464b2785a082ee50bccd3b91 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 23 Feb 2018 10:06:03 +0100 Subject: mac80211: drop frames with unexpected DS bits from fast-rx to slow path Fixes rx for 4-addr packets in AP mode. These may be used for setting up a 4-addr link for stations that are allowed to do so. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index fd58061..56fe16b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3921,7 +3921,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, if ((hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) != fast_rx->expected_ds_bits) - goto drop; + return false; /* assign the key to drop unencrypted frames (later) * and strip the IV/MIC if necessary -- cgit v1.1 From c20bb155c2c5acb775f68be5d84fe679687c3c1e Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 3 Feb 2018 14:11:23 -0500 Subject: drm/nouveau: prefer XBGR2101010 for addfb ioctl Nouveau only exposes support for XBGR2101010. Prior to the atomic conversion, drm would pass in the wrong format in the framebuffer, but it was always ignored -- both userspace (xf86-video-nouveau) and the kernel driver agreed on the layout, so the fact that the format was wrong didn't matter. With the atomic conversion, nouveau all of a sudden started caring about the exact format, and so the previously-working code in xf86-video-nouveau no longer functioned since the (internally-assigned) format from the addfb ioctl was wrong. This change adds infrastructure to allow a drm driver to specify that it prefers the XBGR format variant for the addfb ioctl, and makes nouveau's nv50 display driver set it. (Prior gens had no support for 30bpp at all.) Signed-off-by: Ilia Mirkin Cc: stable@vger.kernel.org # v4.10+ Acked-by: Ben Skeggs Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180203191123.31507-1-imirkin@alum.mit.edu --- drivers/gpu/drm/drm_framebuffer.c | 4 ++++ drivers/gpu/drm/nouveau/nv50_display.c | 1 + include/drm/drm_drv.h | 1 + 3 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index 5a13ff2..c0530a1 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -121,6 +121,10 @@ int drm_mode_addfb(struct drm_device *dev, r.pixel_format = drm_mode_legacy_fb_format(or->bpp, or->depth); r.handles[0] = or->handle; + if (r.pixel_format == DRM_FORMAT_XRGB2101010 && + dev->driver->driver_features & DRIVER_PREFER_XBGR_30BPP) + r.pixel_format = DRM_FORMAT_XBGR2101010; + ret = drm_mode_addfb2(dev, &r, file_priv); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index dd8d435..caddce8 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -4477,6 +4477,7 @@ nv50_display_create(struct drm_device *dev) nouveau_display(dev)->fini = nv50_display_fini; disp->disp = &nouveau_display(dev)->disp; dev->mode_config.funcs = &nv50_disp_func; + dev->driver->driver_features |= DRIVER_PREFER_XBGR_30BPP; if (nouveau_atomic) dev->driver->driver_features |= DRIVER_ATOMIC; diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index d32b688..d23dcdd 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -56,6 +56,7 @@ struct drm_printer; #define DRIVER_ATOMIC 0x10000 #define DRIVER_KMS_LEGACY_CONTEXT 0x20000 #define DRIVER_SYNCOBJ 0x40000 +#define DRIVER_PREFER_XBGR_30BPP 0x80000 /** * struct drm_driver - DRM driver structure -- cgit v1.1 From 9f416319f40cd857d2bb517630e5855a905ef3fb Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Mon, 5 Feb 2018 14:28:01 +0100 Subject: arm64: fix unwind_frame() for filtered out fn for function graph tracing do_task_stat() calls get_wchan(), which further does unwind_frame(). unwind_frame() restores frame->pc to original value in case function graph tracer has modified a return address (LR) in a stack frame to hook a function return. However, if function graph tracer has hit a filtered function, then we can't unwind it as ftrace_push_return_trace() has biased the index(frame->graph) with a 'huge negative' offset(-FTRACE_NOTRACE_DEPTH). Moreover, arm64 stack walker defines index(frame->graph) as unsigned int, which can not compare a -ve number. Similar problem we can have with calling of walk_stackframe() from save_stack_trace_tsk() or dump_backtrace(). This patch fixes unwind_frame() to test the index for -ve value and restore index accordingly before we can restore frame->pc. Reproducer: cd /sys/kernel/debug/tracing/ echo schedule > set_graph_notrace echo 1 > options/display-graph echo wakeup > current_tracer ps -ef | grep -i agent Above commands result in: Unable to handle kernel paging request at virtual address ffff801bd3d1e000 pgd = ffff8003cbe97c00 [ffff801bd3d1e000] *pgd=0000000000000000, *pud=0000000000000000 Internal error: Oops: 96000006 [#1] SMP [...] CPU: 5 PID: 11696 Comm: ps Not tainted 4.11.0+ #33 [...] task: ffff8003c21ba000 task.stack: ffff8003cc6c0000 PC is at unwind_frame+0x12c/0x180 LR is at get_wchan+0xd4/0x134 pc : [] lr : [] pstate: 60000145 sp : ffff8003cc6c3ab0 x29: ffff8003cc6c3ab0 x28: 0000000000000001 x27: 0000000000000026 x26: 0000000000000026 x25: 00000000000012d8 x24: 0000000000000000 x23: ffff8003c1c04000 x22: ffff000008c83000 x21: ffff8003c1c00000 x20: 000000000000000f x19: ffff8003c1bc0000 x18: 0000fffffc593690 x17: 0000000000000000 x16: 0000000000000001 x15: 0000b855670e2b60 x14: 0003e97f22cf1d0f x13: 0000000000000001 x12: 0000000000000000 x11: 00000000e8f4883e x10: 0000000154f47ec8 x9 : 0000000070f367c0 x8 : 0000000000000000 x7 : 00008003f7290000 x6 : 0000000000000018 x5 : 0000000000000000 x4 : ffff8003c1c03cb0 x3 : ffff8003c1c03ca0 x2 : 00000017ffe80000 x1 : ffff8003cc6c3af8 x0 : ffff8003d3e9e000 Process ps (pid: 11696, stack limit = 0xffff8003cc6c0000) Stack: (0xffff8003cc6c3ab0 to 0xffff8003cc6c4000) [...] [] unwind_frame+0x12c/0x180 [] do_task_stat+0x864/0x870 [] proc_tgid_stat+0x3c/0x48 [] proc_single_show+0x5c/0xb8 [] seq_read+0x160/0x414 [] __vfs_read+0x58/0x164 [] vfs_read+0x88/0x144 [] SyS_read+0x60/0xc0 [] __sys_trace_return+0x0/0x4 Fixes: 20380bb390a4 (arm64: ftrace: fix a stack tracer's output under function graph tracer) Signed-off-by: Pratyush Anand Signed-off-by: Jerome Marchand [catalin.marinas@arm.com: replace WARN_ON with WARN_ON_ONCE] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/stacktrace.h | 2 +- arch/arm64/kernel/stacktrace.c | 5 +++++ arch/arm64/kernel/time.c | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 472ef94..902f9ed 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -28,7 +28,7 @@ struct stackframe { unsigned long fp; unsigned long pc; #ifdef CONFIG_FUNCTION_GRAPH_TRACER - unsigned int graph; + int graph; #endif }; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 76809cc..d5718a0 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -59,6 +59,11 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (tsk->ret_stack && (frame->pc == (unsigned long)return_to_handler)) { + if (WARN_ON_ONCE(frame->graph == -1)) + return -EINVAL; + if (frame->graph < -1) + frame->graph += FTRACE_NOTRACE_DEPTH; + /* * This is a case where function graph tracer has * modified a return address (LR) in a stack frame diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index a439128..f258636 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -52,7 +52,7 @@ unsigned long profile_pc(struct pt_regs *regs) frame.fp = regs->regs[29]; frame.pc = regs->pc; #ifdef CONFIG_FUNCTION_GRAPH_TRACER - frame.graph = -1; /* no task info */ + frame.graph = current->curr_ret_stack; #endif do { int ret = unwind_frame(NULL, &frame); -- cgit v1.1 From 4e14bf4236490306004782813b8b4494b18f5e60 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 22 Feb 2018 18:20:30 +0300 Subject: macvlan: fix use-after-free in macvlan_common_newlink() The following use-after-free was reported by KASan when running LTP macvtap01 test on 4.16-rc2: [10642.528443] BUG: KASAN: use-after-free in macvlan_common_newlink+0x12ef/0x14a0 [macvlan] [10642.626607] Read of size 8 at addr ffff880ba49f2100 by task ip/18450 ... [10642.963873] Call Trace: [10642.994352] dump_stack+0x5c/0x7c [10643.035325] print_address_description+0x75/0x290 [10643.092938] kasan_report+0x28d/0x390 [10643.137971] ? macvlan_common_newlink+0x12ef/0x14a0 [macvlan] [10643.207963] macvlan_common_newlink+0x12ef/0x14a0 [macvlan] [10643.275978] macvtap_newlink+0x171/0x260 [macvtap] [10643.334532] rtnl_newlink+0xd4f/0x1300 ... [10646.256176] Allocated by task 18450: [10646.299964] kasan_kmalloc+0xa6/0xd0 [10646.343746] kmem_cache_alloc_trace+0xf1/0x210 [10646.397826] macvlan_common_newlink+0x6de/0x14a0 [macvlan] [10646.464386] macvtap_newlink+0x171/0x260 [macvtap] [10646.522728] rtnl_newlink+0xd4f/0x1300 ... [10647.022028] Freed by task 18450: [10647.061549] __kasan_slab_free+0x138/0x180 [10647.111468] kfree+0x9e/0x1c0 [10647.147869] macvlan_port_destroy+0x3db/0x650 [macvlan] [10647.211411] rollback_registered_many+0x5b9/0xb10 [10647.268715] rollback_registered+0xd9/0x190 [10647.319675] register_netdevice+0x8eb/0xc70 [10647.370635] macvlan_common_newlink+0xe58/0x14a0 [macvlan] [10647.437195] macvtap_newlink+0x171/0x260 [macvtap] Commit d02fd6e7d293 ("macvlan: Fix one possible double free") handles the case when register_netdevice() invokes ndo_uninit() on error and as a result free the port. But 'macvlan_port_get_rtnl(dev))' check (returns dev->rx_handler_data), which was added by this commit in order to prevent double free, is not quite correct: * for macvlan it always returns NULL because 'lowerdev' is the one that was used to register rx handler (port) in macvlan_port_create() as well as to unregister it in macvlan_port_destroy(). * for macvtap it always returns a valid pointer because macvtap registers its own rx handler before macvlan_common_newlink(). Fixes: d02fd6e7d293 ("macvlan: Fix one possible double free") Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index a0f2be8..8fc02d9 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1451,7 +1451,7 @@ destroy_macvlan_port: /* the macvlan port may be freed by macvlan_uninit when fail to register. * so we destroy the macvlan port only when it's valid. */ - if (create && macvlan_port_get_rtnl(dev)) + if (create && macvlan_port_get_rtnl(lowerdev)) macvlan_port_destroy(port->dev); return err; } -- cgit v1.1 From ca79bec237f5809a7c3c59bd41cd0880aa889966 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 22 Feb 2018 16:55:34 +0100 Subject: ipv6 sit: work around bogus gcc-8 -Wrestrict warning gcc-8 has a new warning that detects overlapping input and output arguments in memcpy(). It triggers for sit_init_net() calling ipip6_tunnel_clone_6rd(), which is actually correct: net/ipv6/sit.c: In function 'sit_init_net': net/ipv6/sit.c:192:3: error: 'memcpy' source argument is the same as destination [-Werror=restrict] The problem here is that the logic detecting the memcpy() arguments finds them to be the same, but the conditional that tests for the input and output of ipip6_tunnel_clone_6rd() to be identical is not a compile-time constant. We know that netdev_priv(t->dev) is the same as t for a tunnel device, and comparing "dev" directly here lets the compiler figure out as well that 'dev == sitn->fb_tunnel_dev' when called from sit_init_net(), so it no longer warns. This code is old, so Cc stable to make sure that we don't get the warning for older kernels built with new gcc. Cc: Martin Sebor Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83456 Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3873d38..3a1775a 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -182,7 +182,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel *t = netdev_priv(dev); - if (t->dev == sitn->fb_tunnel_dev) { + if (dev == sitn->fb_tunnel_dev) { ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0); t->ip6rd.relay_prefix = 0; t->ip6rd.prefixlen = 16; -- cgit v1.1 From fdbeb96258141d911ca8ba98931b9024038b84e0 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 9 Feb 2018 07:30:46 -0500 Subject: media: dvb: update buffer mmaped flags and frame counter Now that we have support for a buffer counter and for error flags, update them at DMX_DQBUF. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-core/dmxdev.c | 24 ++++--- drivers/media/dvb-core/dvb_demux.c | 112 +++++++++++++++++++++----------- drivers/media/dvb-core/dvb_net.c | 5 +- drivers/media/dvb-core/dvb_vb2.c | 31 ++++++--- drivers/media/pci/ttpci/av7110.c | 5 +- drivers/media/pci/ttpci/av7110_av.c | 6 +- drivers/media/usb/ttusb-dec/ttusb_dec.c | 10 +-- include/media/demux.h | 21 ++++-- include/media/dvb_demux.h | 4 ++ include/media/dvb_vb2.h | 18 ++++- 10 files changed, 160 insertions(+), 76 deletions(-) diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index 09c2626..61a750f 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -385,7 +385,8 @@ static void dvb_dmxdev_filter_timer(struct dmxdev_filter *dmxdevfilter) static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len, const u8 *buffer2, size_t buffer2_len, - struct dmx_section_filter *filter) + struct dmx_section_filter *filter, + u32 *buffer_flags) { struct dmxdev_filter *dmxdevfilter = filter->priv; int ret; @@ -404,10 +405,12 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len, dprintk("section callback %*ph\n", 6, buffer1); if (dvb_vb2_is_streaming(&dmxdevfilter->vb2_ctx)) { ret = dvb_vb2_fill_buffer(&dmxdevfilter->vb2_ctx, - buffer1, buffer1_len); + buffer1, buffer1_len, + buffer_flags); if (ret == buffer1_len) ret = dvb_vb2_fill_buffer(&dmxdevfilter->vb2_ctx, - buffer2, buffer2_len); + buffer2, buffer2_len, + buffer_flags); } else { ret = dvb_dmxdev_buffer_write(&dmxdevfilter->buffer, buffer1, buffer1_len); @@ -427,7 +430,8 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len, static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len, const u8 *buffer2, size_t buffer2_len, - struct dmx_ts_feed *feed) + struct dmx_ts_feed *feed, + u32 *buffer_flags) { struct dmxdev_filter *dmxdevfilter = feed->priv; struct dvb_ringbuffer *buffer; @@ -456,9 +460,11 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len, } if (dvb_vb2_is_streaming(ctx)) { - ret = dvb_vb2_fill_buffer(ctx, buffer1, buffer1_len); + ret = dvb_vb2_fill_buffer(ctx, buffer1, buffer1_len, + buffer_flags); if (ret == buffer1_len) - ret = dvb_vb2_fill_buffer(ctx, buffer2, buffer2_len); + ret = dvb_vb2_fill_buffer(ctx, buffer2, buffer2_len, + buffer_flags); } else { if (buffer->error) { spin_unlock(&dmxdevfilter->dev->lock); @@ -1218,7 +1224,7 @@ static int dvb_demux_mmap(struct file *file, struct vm_area_struct *vma) int ret; if (!dmxdev->may_do_mmap) - return -EOPNOTSUPP; + return -ENOTTY; if (mutex_lock_interruptible(&dmxdev->mutex)) return -ERESTARTSYS; @@ -1318,7 +1324,7 @@ static int dvb_dvr_do_ioctl(struct file *file, break; #endif default: - ret = -EINVAL; + ret = -ENOTTY; break; } mutex_unlock(&dmxdev->mutex); @@ -1367,7 +1373,7 @@ static int dvb_dvr_mmap(struct file *file, struct vm_area_struct *vma) int ret; if (!dmxdev->may_do_mmap) - return -EOPNOTSUPP; + return -ENOTTY; if (dmxdev->exit) return -ENODEV; diff --git a/drivers/media/dvb-core/dvb_demux.c b/drivers/media/dvb-core/dvb_demux.c index 210eed0..f450912 100644 --- a/drivers/media/dvb-core/dvb_demux.c +++ b/drivers/media/dvb-core/dvb_demux.c @@ -55,6 +55,17 @@ MODULE_PARM_DESC(dvb_demux_feed_err_pkts, dprintk(x); \ } while (0) +#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG +# define dprintk_sect_loss(x...) dprintk(x) +#else +# define dprintk_sect_loss(x...) +#endif + +#define set_buf_flags(__feed, __flag) \ + do { \ + (__feed)->buffer_flags |= (__flag); \ + } while (0) + /****************************************************************************** * static inlined helper functions ******************************************************************************/ @@ -104,31 +115,30 @@ static inline int dvb_dmx_swfilter_payload(struct dvb_demux_feed *feed, { int count = payload(buf); int p; -#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG int ccok; u8 cc; -#endif if (count == 0) return -1; p = 188 - count; -#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG cc = buf[3] & 0x0f; ccok = ((feed->cc + 1) & 0x0f) == cc; feed->cc = cc; - if (!ccok) - dprintk("missed packet: %d instead of %d!\n", - cc, (feed->cc + 1) & 0x0f); -#endif + if (!ccok) { + set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED); + dprintk_sect_loss("missed packet: %d instead of %d!\n", + cc, (feed->cc + 1) & 0x0f); + } if (buf[1] & 0x40) // PUSI ? feed->peslen = 0xfffa; feed->peslen += count; - return feed->cb.ts(&buf[p], count, NULL, 0, &feed->feed.ts); + return feed->cb.ts(&buf[p], count, NULL, 0, &feed->feed.ts, + &feed->buffer_flags); } static int dvb_dmx_swfilter_sectionfilter(struct dvb_demux_feed *feed, @@ -150,7 +160,7 @@ static int dvb_dmx_swfilter_sectionfilter(struct dvb_demux_feed *feed, return 0; return feed->cb.sec(feed->feed.sec.secbuf, feed->feed.sec.seclen, - NULL, 0, &f->filter); + NULL, 0, &f->filter, &feed->buffer_flags); } static inline int dvb_dmx_swfilter_section_feed(struct dvb_demux_feed *feed) @@ -169,8 +179,10 @@ static inline int dvb_dmx_swfilter_section_feed(struct dvb_demux_feed *feed) if (sec->check_crc) { section_syntax_indicator = ((sec->secbuf[1] & 0x80) != 0); if (section_syntax_indicator && - demux->check_crc32(feed, sec->secbuf, sec->seclen)) + demux->check_crc32(feed, sec->secbuf, sec->seclen)) { + set_buf_flags(feed, DMX_BUFFER_FLAG_HAD_CRC32_DISCARD); return -1; + } } do { @@ -187,7 +199,6 @@ static void dvb_dmx_swfilter_section_new(struct dvb_demux_feed *feed) { struct dmx_section_feed *sec = &feed->feed.sec; -#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG if (sec->secbufp < sec->tsfeedp) { int n = sec->tsfeedp - sec->secbufp; @@ -197,12 +208,13 @@ static void dvb_dmx_swfilter_section_new(struct dvb_demux_feed *feed) * but just first and last. */ if (sec->secbuf[0] != 0xff || sec->secbuf[n - 1] != 0xff) { - dprintk("section ts padding loss: %d/%d\n", - n, sec->tsfeedp); - dprintk("pad data: %*ph\n", n, sec->secbuf); + set_buf_flags(feed, + DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED); + dprintk_sect_loss("section ts padding loss: %d/%d\n", + n, sec->tsfeedp); + dprintk_sect_loss("pad data: %*ph\n", n, sec->secbuf); } } -#endif sec->tsfeedp = sec->secbufp = sec->seclen = 0; sec->secbuf = sec->secbuf_base; @@ -237,11 +249,10 @@ static int dvb_dmx_swfilter_section_copy_dump(struct dvb_demux_feed *feed, return 0; if (sec->tsfeedp + len > DMX_MAX_SECFEED_SIZE) { -#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG - dprintk("section buffer full loss: %d/%d\n", - sec->tsfeedp + len - DMX_MAX_SECFEED_SIZE, - DMX_MAX_SECFEED_SIZE); -#endif + set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED); + dprintk_sect_loss("section buffer full loss: %d/%d\n", + sec->tsfeedp + len - DMX_MAX_SECFEED_SIZE, + DMX_MAX_SECFEED_SIZE); len = DMX_MAX_SECFEED_SIZE - sec->tsfeedp; } @@ -269,12 +280,13 @@ static int dvb_dmx_swfilter_section_copy_dump(struct dvb_demux_feed *feed, sec->seclen = seclen; sec->crc_val = ~0; /* dump [secbuf .. secbuf+seclen) */ - if (feed->pusi_seen) + if (feed->pusi_seen) { dvb_dmx_swfilter_section_feed(feed); -#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG - else - dprintk("pusi not seen, discarding section data\n"); -#endif + } else { + set_buf_flags(feed, + DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED); + dprintk_sect_loss("pusi not seen, discarding section data\n"); + } sec->secbufp += seclen; /* secbufp and secbuf moving together is */ sec->secbuf += seclen; /* redundant but saves pointer arithmetic */ } @@ -307,18 +319,22 @@ static int dvb_dmx_swfilter_section_packet(struct dvb_demux_feed *feed, } if (!ccok || dc_i) { -#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG - if (dc_i) - dprintk("%d frame with disconnect indicator\n", + if (dc_i) { + set_buf_flags(feed, + DMX_BUFFER_FLAG_DISCONTINUITY_INDICATOR); + dprintk_sect_loss("%d frame with disconnect indicator\n", cc); - else - dprintk("discontinuity: %d instead of %d. %d bytes lost\n", + } else { + set_buf_flags(feed, + DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED); + dprintk_sect_loss("discontinuity: %d instead of %d. %d bytes lost\n", cc, (feed->cc + 1) & 0x0f, count + 4); + } /* - * those bytes under sume circumstances will again be reported + * those bytes under some circumstances will again be reported * in the following dvb_dmx_swfilter_section_new */ -#endif + /* * Discontinuity detected. Reset pusi_seen to * stop feeding of suspicious data until next PUSI=1 arrives @@ -326,6 +342,7 @@ static int dvb_dmx_swfilter_section_packet(struct dvb_demux_feed *feed, * FIXME: does it make sense if the MPEG-TS is the one * reporting discontinuity? */ + feed->pusi_seen = false; dvb_dmx_swfilter_section_new(feed); } @@ -345,11 +362,11 @@ static int dvb_dmx_swfilter_section_packet(struct dvb_demux_feed *feed, dvb_dmx_swfilter_section_new(feed); dvb_dmx_swfilter_section_copy_dump(feed, after, after_len); + } else if (count > 0) { + set_buf_flags(feed, + DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED); + dprintk_sect_loss("PUSI=1 but %d bytes lost\n", count); } -#ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG - else if (count > 0) - dprintk("PUSI=1 but %d bytes lost\n", count); -#endif } else { /* PUSI=0 (is not set), no section boundary */ dvb_dmx_swfilter_section_copy_dump(feed, &buf[p], count); @@ -369,7 +386,8 @@ static inline void dvb_dmx_swfilter_packet_type(struct dvb_demux_feed *feed, if (feed->ts_type & TS_PAYLOAD_ONLY) dvb_dmx_swfilter_payload(feed, buf); else - feed->cb.ts(buf, 188, NULL, 0, &feed->feed.ts); + feed->cb.ts(buf, 188, NULL, 0, &feed->feed.ts, + &feed->buffer_flags); } /* Used only on full-featured devices */ if (feed->ts_type & TS_DECODER) @@ -430,6 +448,11 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf) } if (buf[1] & 0x80) { + list_for_each_entry(feed, &demux->feed_list, list_head) { + if ((feed->pid != pid) && (feed->pid != 0x2000)) + continue; + set_buf_flags(feed, DMX_BUFFER_FLAG_TEI); + } dprintk_tscheck("TEI detected. PID=0x%x data1=0x%x\n", pid, buf[1]); /* data in this packet can't be trusted - drop it unless @@ -445,6 +468,13 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf) (demux->cnt_storage[pid] + 1) & 0xf; if ((buf[3] & 0xf) != demux->cnt_storage[pid]) { + list_for_each_entry(feed, &demux->feed_list, list_head) { + if ((feed->pid != pid) && (feed->pid != 0x2000)) + continue; + set_buf_flags(feed, + DMX_BUFFER_PKT_COUNTER_MISMATCH); + } + dprintk_tscheck("TS packet counter mismatch. PID=0x%x expected 0x%x got 0x%x\n", pid, demux->cnt_storage[pid], buf[3] & 0xf); @@ -466,7 +496,8 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf) if (feed->pid == pid) dvb_dmx_swfilter_packet_type(feed, buf); else if (feed->pid == 0x2000) - feed->cb.ts(buf, 188, NULL, 0, &feed->feed.ts); + feed->cb.ts(buf, 188, NULL, 0, &feed->feed.ts, + &feed->buffer_flags); } } @@ -585,7 +616,8 @@ void dvb_dmx_swfilter_raw(struct dvb_demux *demux, const u8 *buf, size_t count) spin_lock_irqsave(&demux->lock, flags); - demux->feed->cb.ts(buf, count, NULL, 0, &demux->feed->feed.ts); + demux->feed->cb.ts(buf, count, NULL, 0, &demux->feed->feed.ts, + &demux->feed->buffer_flags); spin_unlock_irqrestore(&demux->lock, flags); } @@ -785,6 +817,7 @@ static int dvbdmx_allocate_ts_feed(struct dmx_demux *dmx, feed->demux = demux; feed->pid = 0xffff; feed->peslen = 0xfffa; + feed->buffer_flags = 0; (*ts_feed) = &feed->feed.ts; (*ts_feed)->parent = dmx; @@ -1042,6 +1075,7 @@ static int dvbdmx_allocate_section_feed(struct dmx_demux *demux, dvbdmxfeed->cb.sec = callback; dvbdmxfeed->demux = dvbdmx; dvbdmxfeed->pid = 0xffff; + dvbdmxfeed->buffer_flags = 0; dvbdmxfeed->feed.sec.secbuf = dvbdmxfeed->feed.sec.secbuf_base; dvbdmxfeed->feed.sec.secbufp = dvbdmxfeed->feed.sec.seclen = 0; dvbdmxfeed->feed.sec.tsfeedp = 0; diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c index b6c7eec..ba39f99 100644 --- a/drivers/media/dvb-core/dvb_net.c +++ b/drivers/media/dvb-core/dvb_net.c @@ -883,7 +883,8 @@ static void dvb_net_ule(struct net_device *dev, const u8 *buf, size_t buf_len) static int dvb_net_ts_callback(const u8 *buffer1, size_t buffer1_len, const u8 *buffer2, size_t buffer2_len, - struct dmx_ts_feed *feed) + struct dmx_ts_feed *feed, + u32 *buffer_flags) { struct net_device *dev = feed->priv; @@ -992,7 +993,7 @@ static void dvb_net_sec(struct net_device *dev, static int dvb_net_sec_callback(const u8 *buffer1, size_t buffer1_len, const u8 *buffer2, size_t buffer2_len, - struct dmx_section_filter *filter) + struct dmx_section_filter *filter, u32 *buffer_flags) { struct net_device *dev = filter->priv; diff --git a/drivers/media/dvb-core/dvb_vb2.c b/drivers/media/dvb-core/dvb_vb2.c index 763145d..b811adf 100644 --- a/drivers/media/dvb-core/dvb_vb2.c +++ b/drivers/media/dvb-core/dvb_vb2.c @@ -256,7 +256,8 @@ int dvb_vb2_is_streaming(struct dvb_vb2_ctx *ctx) } int dvb_vb2_fill_buffer(struct dvb_vb2_ctx *ctx, - const unsigned char *src, int len) + const unsigned char *src, int len, + enum dmx_buffer_flags *buffer_flags) { unsigned long flags = 0; void *vbuf = NULL; @@ -264,15 +265,17 @@ int dvb_vb2_fill_buffer(struct dvb_vb2_ctx *ctx, unsigned char *psrc = (unsigned char *)src; int ll = 0; - dprintk(3, "[%s] %d bytes are rcvd\n", ctx->name, len); - if (!src) { - dprintk(3, "[%s]:NULL pointer src\n", ctx->name); - /**normal case: This func is called twice from demux driver - * once with valid src pointer, second time with NULL pointer - */ + /* + * normal case: This func is called twice from demux driver + * one with valid src pointer, second time with NULL pointer + */ + if (!src || !len) return 0; - } spin_lock_irqsave(&ctx->slock, flags); + if (buffer_flags && *buffer_flags) { + ctx->flags |= *buffer_flags; + *buffer_flags = 0; + } while (todo) { if (!ctx->buf) { if (list_empty(&ctx->dvb_q)) { @@ -395,6 +398,7 @@ int dvb_vb2_qbuf(struct dvb_vb2_ctx *ctx, struct dmx_buffer *b) int dvb_vb2_dqbuf(struct dvb_vb2_ctx *ctx, struct dmx_buffer *b) { + unsigned long flags; int ret; ret = vb2_core_dqbuf(&ctx->vb_q, &b->index, b, ctx->nonblocking); @@ -402,7 +406,16 @@ int dvb_vb2_dqbuf(struct dvb_vb2_ctx *ctx, struct dmx_buffer *b) dprintk(1, "[%s] errno=%d\n", ctx->name, ret); return ret; } - dprintk(5, "[%s] index=%d\n", ctx->name, b->index); + + spin_lock_irqsave(&ctx->slock, flags); + b->count = ctx->count++; + b->flags = ctx->flags; + ctx->flags = 0; + spin_unlock_irqrestore(&ctx->slock, flags); + + dprintk(5, "[%s] index=%d, count=%d, flags=%d\n", + ctx->name, b->index, ctx->count, b->flags); + return 0; } diff --git a/drivers/media/pci/ttpci/av7110.c b/drivers/media/pci/ttpci/av7110.c index dc8e577..d6816ef 100644 --- a/drivers/media/pci/ttpci/av7110.c +++ b/drivers/media/pci/ttpci/av7110.c @@ -324,14 +324,15 @@ static int DvbDmxFilterCallback(u8 *buffer1, size_t buffer1_len, } return dvbdmxfilter->feed->cb.sec(buffer1, buffer1_len, buffer2, buffer2_len, - &dvbdmxfilter->filter); + &dvbdmxfilter->filter, NULL); case DMX_TYPE_TS: if (!(dvbdmxfilter->feed->ts_type & TS_PACKET)) return 0; if (dvbdmxfilter->feed->ts_type & TS_PAYLOAD_ONLY) return dvbdmxfilter->feed->cb.ts(buffer1, buffer1_len, buffer2, buffer2_len, - &dvbdmxfilter->feed->feed.ts); + &dvbdmxfilter->feed->feed.ts, + NULL); else av7110_p2t_write(buffer1, buffer1_len, dvbdmxfilter->feed->pid, diff --git a/drivers/media/pci/ttpci/av7110_av.c b/drivers/media/pci/ttpci/av7110_av.c index 4daba76..ef1bc17 100644 --- a/drivers/media/pci/ttpci/av7110_av.c +++ b/drivers/media/pci/ttpci/av7110_av.c @@ -99,7 +99,7 @@ int av7110_record_cb(struct dvb_filter_pes2ts *p2t, u8 *buf, size_t len) buf[4] = buf[5] = 0; if (dvbdmxfeed->ts_type & TS_PAYLOAD_ONLY) return dvbdmxfeed->cb.ts(buf, len, NULL, 0, - &dvbdmxfeed->feed.ts); + &dvbdmxfeed->feed.ts, NULL); else return dvb_filter_pes2ts(p2t, buf, len, 1); } @@ -109,7 +109,7 @@ static int dvb_filter_pes2ts_cb(void *priv, unsigned char *data) struct dvb_demux_feed *dvbdmxfeed = (struct dvb_demux_feed *) priv; dvbdmxfeed->cb.ts(data, 188, NULL, 0, - &dvbdmxfeed->feed.ts); + &dvbdmxfeed->feed.ts, NULL); return 0; } @@ -814,7 +814,7 @@ static void p_to_t(u8 const *buf, long int length, u16 pid, u8 *counter, memcpy(obuf + l, buf + c, TS_SIZE - l); c = length; } - feed->cb.ts(obuf, 188, NULL, 0, &feed->feed.ts); + feed->cb.ts(obuf, 188, NULL, 0, &feed->feed.ts, NULL); pes_start = 0; } } diff --git a/drivers/media/usb/ttusb-dec/ttusb_dec.c b/drivers/media/usb/ttusb-dec/ttusb_dec.c index a8900f5..44ca66c 100644 --- a/drivers/media/usb/ttusb-dec/ttusb_dec.c +++ b/drivers/media/usb/ttusb-dec/ttusb_dec.c @@ -428,7 +428,7 @@ static int ttusb_dec_audio_pes2ts_cb(void *priv, unsigned char *data) struct ttusb_dec *dec = priv; dec->audio_filter->feed->cb.ts(data, 188, NULL, 0, - &dec->audio_filter->feed->feed.ts); + &dec->audio_filter->feed->feed.ts, NULL); return 0; } @@ -438,7 +438,7 @@ static int ttusb_dec_video_pes2ts_cb(void *priv, unsigned char *data) struct ttusb_dec *dec = priv; dec->video_filter->feed->cb.ts(data, 188, NULL, 0, - &dec->video_filter->feed->feed.ts); + &dec->video_filter->feed->feed.ts, NULL); return 0; } @@ -490,7 +490,7 @@ static void ttusb_dec_process_pva(struct ttusb_dec *dec, u8 *pva, int length) if (output_pva) { dec->video_filter->feed->cb.ts(pva, length, NULL, 0, - &dec->video_filter->feed->feed.ts); + &dec->video_filter->feed->feed.ts, NULL); return; } @@ -551,7 +551,7 @@ static void ttusb_dec_process_pva(struct ttusb_dec *dec, u8 *pva, int length) case 0x02: /* MainAudioStream */ if (output_pva) { dec->audio_filter->feed->cb.ts(pva, length, NULL, 0, - &dec->audio_filter->feed->feed.ts); + &dec->audio_filter->feed->feed.ts, NULL); return; } @@ -589,7 +589,7 @@ static void ttusb_dec_process_filter(struct ttusb_dec *dec, u8 *packet, if (filter) filter->feed->cb.sec(&packet[2], length - 2, NULL, 0, - &filter->filter); + &filter->filter, NULL); } static void ttusb_dec_process_packet(struct ttusb_dec *dec) diff --git a/include/media/demux.h b/include/media/demux.h index c4df6ce..bf00a5a 100644 --- a/include/media/demux.h +++ b/include/media/demux.h @@ -117,7 +117,7 @@ struct dmx_ts_feed { * specified by @filter_value that will be used on the filter * match logic. * @filter_mode: Contains a 16 bytes (128 bits) filter mode. - * @parent: Pointer to struct dmx_section_feed. + * @parent: Back-pointer to struct dmx_section_feed. * @priv: Pointer to private data of the API client. * * @@ -130,8 +130,9 @@ struct dmx_section_filter { u8 filter_value[DMX_MAX_FILTER_SIZE]; u8 filter_mask[DMX_MAX_FILTER_SIZE]; u8 filter_mode[DMX_MAX_FILTER_SIZE]; - struct dmx_section_feed *parent; /* Back-pointer */ - void *priv; /* Pointer to private data of the API client */ + struct dmx_section_feed *parent; + + void *priv; }; /** @@ -193,6 +194,10 @@ struct dmx_section_feed { * @buffer2: Pointer to the tail of the filtered TS packets, or NULL. * @buffer2_length: Length of the TS data in buffer2. * @source: Indicates which TS feed is the source of the callback. + * @buffer_flags: Address where buffer flags are stored. Those are + * used to report discontinuity users via DVB + * memory mapped API, as defined by + * &enum dmx_buffer_flags. * * This function callback prototype, provided by the client of the demux API, * is called from the demux code. The function is only called when filtering @@ -245,7 +250,8 @@ typedef int (*dmx_ts_cb)(const u8 *buffer1, size_t buffer1_length, const u8 *buffer2, size_t buffer2_length, - struct dmx_ts_feed *source); + struct dmx_ts_feed *source, + u32 *buffer_flags); /** * typedef dmx_section_cb - DVB demux TS filter callback function prototype @@ -261,6 +267,10 @@ typedef int (*dmx_ts_cb)(const u8 *buffer1, * including headers and CRC. * @source: Indicates which section feed is the source of the * callback. + * @buffer_flags: Address where buffer flags are stored. Those are + * used to report discontinuity users via DVB + * memory mapped API, as defined by + * &enum dmx_buffer_flags. * * This function callback prototype, provided by the client of the demux API, * is called from the demux code. The function is only called when @@ -286,7 +296,8 @@ typedef int (*dmx_section_cb)(const u8 *buffer1, size_t buffer1_len, const u8 *buffer2, size_t buffer2_len, - struct dmx_section_filter *source); + struct dmx_section_filter *source, + u32 *buffer_flags); /* * DVB Front-End diff --git a/include/media/dvb_demux.h b/include/media/dvb_demux.h index b070920..3b6aeca 100644 --- a/include/media/dvb_demux.h +++ b/include/media/dvb_demux.h @@ -115,6 +115,8 @@ struct dvb_demux_filter { * @pid: PID to be filtered. * @timeout: feed timeout. * @filter: pointer to &struct dvb_demux_filter. + * @buffer_flags: Buffer flags used to report discontinuity users via DVB + * memory mapped API, as defined by &enum dmx_buffer_flags. * @ts_type: type of TS, as defined by &enum ts_filter_type. * @pes_type: type of PES, as defined by &enum dmx_ts_pes. * @cc: MPEG-TS packet continuity counter @@ -145,6 +147,8 @@ struct dvb_demux_feed { ktime_t timeout; struct dvb_demux_filter *filter; + u32 buffer_flags; + enum ts_filter_type ts_type; enum dmx_ts_pes pes_type; diff --git a/include/media/dvb_vb2.h b/include/media/dvb_vb2.h index 056adc86..8cb8845 100644 --- a/include/media/dvb_vb2.h +++ b/include/media/dvb_vb2.h @@ -85,6 +85,12 @@ struct dvb_buffer { * @nonblocking: * If different than zero, device is operating on non-blocking * mode. + * @flags: buffer flags as defined by &enum dmx_buffer_flags. + * Filled only at &DMX_DQBUF. &DMX_QBUF should zero this field. + * @count: monotonic counter for filled buffers. Helps to identify + * data stream loses. Filled only at &DMX_DQBUF. &DMX_QBUF should + * zero this field. + * * @name: name of the device type. Currently, it can either be * "dvr" or "demux_filter". */ @@ -100,6 +106,10 @@ struct dvb_vb2_ctx { int buf_siz; int buf_cnt; int nonblocking; + + enum dmx_buffer_flags flags; + u32 count; + char name[DVB_VB2_NAME_MAX + 1]; }; @@ -114,7 +124,7 @@ static inline int dvb_vb2_release(struct dvb_vb2_ctx *ctx) return 0; }; #define dvb_vb2_is_streaming(ctx) (0) -#define dvb_vb2_fill_buffer(ctx, file, wait) (0) +#define dvb_vb2_fill_buffer(ctx, file, wait, flags) (0) static inline __poll_t dvb_vb2_poll(struct dvb_vb2_ctx *ctx, struct file *file, @@ -153,9 +163,13 @@ int dvb_vb2_is_streaming(struct dvb_vb2_ctx *ctx); * @ctx: control struct for VB2 handler * @src: place where the data is stored * @len: number of bytes to be copied from @src + * @buffer_flags: + * pointer to buffer flags as defined by &enum dmx_buffer_flags. + * can be NULL. */ int dvb_vb2_fill_buffer(struct dvb_vb2_ctx *ctx, - const unsigned char *src, int len); + const unsigned char *src, int len, + enum dmx_buffer_flags *buffer_flags); /** * dvb_vb2_poll - Wrapper to vb2_core_streamon() for Digital TV -- cgit v1.1 From 3dd6b560dc5d59e7cb6dbda6e85dc9af7925fcf8 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 19 Feb 2018 13:23:39 -0500 Subject: media: Don't let tvp5150_get_vbi() go out of vbi_ram_default array As pointed by Dan, possible values for bits[3:0] of te Line Mode Registers can range from 0x0 to 0xf, but the check logic allow values ranging from 0x0 to 0xe. As static arrays are initialized with zero, using a value without an explicit initializer at the array won't cause any harm. Reported-by: Dan Carpenter Signed-off-by: Mauro Carvalho Chehab --- drivers/media/i2c/tvp5150.c | 88 +++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 43 deletions(-) diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index 3c18519..2476d81 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -505,80 +505,77 @@ static struct i2c_vbi_ram_value vbi_ram_default[] = /* FIXME: Current api doesn't handle all VBI types, those not yet supported are placed under #if 0 */ #if 0 - {0x010, /* Teletext, SECAM, WST System A */ + [0] = {0x010, /* Teletext, SECAM, WST System A */ {V4L2_SLICED_TELETEXT_SECAM,6,23,1}, { 0xaa, 0xaa, 0xff, 0xff, 0xe7, 0x2e, 0x20, 0x26, 0xe6, 0xb4, 0x0e, 0x00, 0x00, 0x00, 0x10, 0x00 } }, #endif - {0x030, /* Teletext, PAL, WST System B */ + [1] = {0x030, /* Teletext, PAL, WST System B */ {V4L2_SLICED_TELETEXT_B,6,22,1}, { 0xaa, 0xaa, 0xff, 0xff, 0x27, 0x2e, 0x20, 0x2b, 0xa6, 0x72, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00 } }, #if 0 - {0x050, /* Teletext, PAL, WST System C */ + [2] = {0x050, /* Teletext, PAL, WST System C */ {V4L2_SLICED_TELETEXT_PAL_C,6,22,1}, { 0xaa, 0xaa, 0xff, 0xff, 0xe7, 0x2e, 0x20, 0x22, 0xa6, 0x98, 0x0d, 0x00, 0x00, 0x00, 0x10, 0x00 } }, - {0x070, /* Teletext, NTSC, WST System B */ + [3] = {0x070, /* Teletext, NTSC, WST System B */ {V4L2_SLICED_TELETEXT_NTSC_B,10,21,1}, { 0xaa, 0xaa, 0xff, 0xff, 0x27, 0x2e, 0x20, 0x23, 0x69, 0x93, 0x0d, 0x00, 0x00, 0x00, 0x10, 0x00 } }, - {0x090, /* Tetetext, NTSC NABTS System C */ + [4] = {0x090, /* Tetetext, NTSC NABTS System C */ {V4L2_SLICED_TELETEXT_NTSC_C,10,21,1}, { 0xaa, 0xaa, 0xff, 0xff, 0xe7, 0x2e, 0x20, 0x22, 0x69, 0x93, 0x0d, 0x00, 0x00, 0x00, 0x15, 0x00 } }, - {0x0b0, /* Teletext, NTSC-J, NABTS System D */ + [5] = {0x0b0, /* Teletext, NTSC-J, NABTS System D */ {V4L2_SLICED_TELETEXT_NTSC_D,10,21,1}, { 0xaa, 0xaa, 0xff, 0xff, 0xa7, 0x2e, 0x20, 0x23, 0x69, 0x93, 0x0d, 0x00, 0x00, 0x00, 0x10, 0x00 } }, - {0x0d0, /* Closed Caption, PAL/SECAM */ + [6] = {0x0d0, /* Closed Caption, PAL/SECAM */ {V4L2_SLICED_CAPTION_625,22,22,1}, { 0xaa, 0x2a, 0xff, 0x3f, 0x04, 0x51, 0x6e, 0x02, 0xa6, 0x7b, 0x09, 0x00, 0x00, 0x00, 0x27, 0x00 } }, #endif - {0x0f0, /* Closed Caption, NTSC */ + [7] = {0x0f0, /* Closed Caption, NTSC */ {V4L2_SLICED_CAPTION_525,21,21,1}, { 0xaa, 0x2a, 0xff, 0x3f, 0x04, 0x51, 0x6e, 0x02, 0x69, 0x8c, 0x09, 0x00, 0x00, 0x00, 0x27, 0x00 } }, - {0x110, /* Wide Screen Signal, PAL/SECAM */ + [8] = {0x110, /* Wide Screen Signal, PAL/SECAM */ {V4L2_SLICED_WSS_625,23,23,1}, { 0x5b, 0x55, 0xc5, 0xff, 0x00, 0x71, 0x6e, 0x42, 0xa6, 0xcd, 0x0f, 0x00, 0x00, 0x00, 0x3a, 0x00 } }, #if 0 - {0x130, /* Wide Screen Signal, NTSC C */ + [9] = {0x130, /* Wide Screen Signal, NTSC C */ {V4L2_SLICED_WSS_525,20,20,1}, { 0x38, 0x00, 0x3f, 0x00, 0x00, 0x71, 0x6e, 0x43, 0x69, 0x7c, 0x08, 0x00, 0x00, 0x00, 0x39, 0x00 } }, - {0x150, /* Vertical Interval Timecode (VITC), PAL/SECAM */ + [10] = {0x150, /* Vertical Interval Timecode (VITC), PAL/SECAM */ {V4l2_SLICED_VITC_625,6,22,0}, { 0x00, 0x00, 0x00, 0x00, 0x00, 0x8f, 0x6d, 0x49, 0xa6, 0x85, 0x08, 0x00, 0x00, 0x00, 0x4c, 0x00 } }, - {0x170, /* Vertical Interval Timecode (VITC), NTSC */ + [11] = {0x170, /* Vertical Interval Timecode (VITC), NTSC */ {V4l2_SLICED_VITC_525,10,20,0}, { 0x00, 0x00, 0x00, 0x00, 0x00, 0x8f, 0x6d, 0x49, 0x69, 0x94, 0x08, 0x00, 0x00, 0x00, 0x4c, 0x00 } }, #endif - {0x190, /* Video Program System (VPS), PAL */ + [12] = {0x190, /* Video Program System (VPS), PAL */ {V4L2_SLICED_VPS,16,16,0}, { 0xaa, 0xaa, 0xff, 0xff, 0xba, 0xce, 0x2b, 0x0d, 0xa6, 0xda, 0x0b, 0x00, 0x00, 0x00, 0x60, 0x00 } }, /* 0x1d0 User programmable */ - - /* End of struct */ - { (u16)-1 } }; static int tvp5150_write_inittab(struct v4l2_subdev *sd, @@ -591,10 +588,10 @@ static int tvp5150_write_inittab(struct v4l2_subdev *sd, return 0; } -static int tvp5150_vdp_init(struct v4l2_subdev *sd, - const struct i2c_vbi_ram_value *regs) +static int tvp5150_vdp_init(struct v4l2_subdev *sd) { unsigned int i; + int j; /* Disable Full Field */ tvp5150_write(sd, TVP5150_FULL_FIELD_ENA, 0); @@ -604,14 +601,17 @@ static int tvp5150_vdp_init(struct v4l2_subdev *sd, tvp5150_write(sd, i, 0xff); /* Load Ram Table */ - while (regs->reg != (u16)-1) { + for (j = 0; j < ARRAY_SIZE(vbi_ram_default); j++) { + const struct i2c_vbi_ram_value *regs = &vbi_ram_default[j]; + + if (!regs->type.vbi_type) + continue; + tvp5150_write(sd, TVP5150_CONF_RAM_ADDR_HIGH, regs->reg >> 8); tvp5150_write(sd, TVP5150_CONF_RAM_ADDR_LOW, regs->reg); for (i = 0; i < 16; i++) tvp5150_write(sd, TVP5150_VDP_CONF_RAM_DATA, regs->values[i]); - - regs++; } return 0; } @@ -620,19 +620,23 @@ static int tvp5150_vdp_init(struct v4l2_subdev *sd, static int tvp5150_g_sliced_vbi_cap(struct v4l2_subdev *sd, struct v4l2_sliced_vbi_cap *cap) { - const struct i2c_vbi_ram_value *regs = vbi_ram_default; - int line; + int line, i; dev_dbg_lvl(sd->dev, 1, debug, "g_sliced_vbi_cap\n"); memset(cap, 0, sizeof *cap); - while (regs->reg != (u16)-1 ) { - for (line=regs->type.ini_line;line<=regs->type.end_line;line++) { + for (i = 0; i < ARRAY_SIZE(vbi_ram_default); i++) { + const struct i2c_vbi_ram_value *regs = &vbi_ram_default[i]; + + if (!regs->type.vbi_type) + continue; + + for (line = regs->type.ini_line; + line <= regs->type.end_line; + line++) { cap->service_lines[0][line] |= regs->type.vbi_type; } cap->service_set |= regs->type.vbi_type; - - regs++; } return 0; } @@ -651,14 +655,13 @@ static int tvp5150_g_sliced_vbi_cap(struct v4l2_subdev *sd, * MSB = field2 */ static int tvp5150_set_vbi(struct v4l2_subdev *sd, - const struct i2c_vbi_ram_value *regs, unsigned int type,u8 flags, int line, const int fields) { struct tvp5150 *decoder = to_tvp5150(sd); v4l2_std_id std = decoder->norm; u8 reg; - int pos = 0; + int i, pos = 0; if (std == V4L2_STD_ALL) { dev_err(sd->dev, "VBI can't be configured without knowing number of lines\n"); @@ -671,19 +674,19 @@ static int tvp5150_set_vbi(struct v4l2_subdev *sd, if (line < 6 || line > 27) return 0; - while (regs->reg != (u16)-1) { + for (i = 0; i < ARRAY_SIZE(vbi_ram_default); i++) { + const struct i2c_vbi_ram_value *regs = &vbi_ram_default[i]; + + if (!regs->type.vbi_type) + continue; + if ((type & regs->type.vbi_type) && (line >= regs->type.ini_line) && (line <= regs->type.end_line)) break; - - regs++; pos++; } - if (regs->reg == (u16)-1) - return 0; - type = pos | (flags & 0xf0); reg = ((line - 6) << 1) + TVP5150_LINE_MODE_INI; @@ -696,8 +699,7 @@ static int tvp5150_set_vbi(struct v4l2_subdev *sd, return type; } -static int tvp5150_get_vbi(struct v4l2_subdev *sd, - const struct i2c_vbi_ram_value *regs, int line) +static int tvp5150_get_vbi(struct v4l2_subdev *sd, int line) { struct tvp5150 *decoder = to_tvp5150(sd); v4l2_std_id std = decoder->norm; @@ -726,8 +728,8 @@ static int tvp5150_get_vbi(struct v4l2_subdev *sd, return 0; } pos = ret & 0x0f; - if (pos < 0x0f) - type |= regs[pos].type.vbi_type; + if (pos < ARRAY_SIZE(vbi_ram_default)) + type |= vbi_ram_default[pos].type.vbi_type; } return type; @@ -788,7 +790,7 @@ static int tvp5150_reset(struct v4l2_subdev *sd, u32 val) tvp5150_write_inittab(sd, tvp5150_init_default); /* Initializes VDP registers */ - tvp5150_vdp_init(sd, vbi_ram_default); + tvp5150_vdp_init(sd); /* Selects decoder input */ tvp5150_selmux(sd); @@ -1121,8 +1123,8 @@ static int tvp5150_s_sliced_fmt(struct v4l2_subdev *sd, struct v4l2_sliced_vbi_f for (i = 0; i <= 23; i++) { svbi->service_lines[1][i] = 0; svbi->service_lines[0][i] = - tvp5150_set_vbi(sd, vbi_ram_default, - svbi->service_lines[0][i], 0xf0, i, 3); + tvp5150_set_vbi(sd, svbi->service_lines[0][i], + 0xf0, i, 3); } /* Enables FIFO */ tvp5150_write(sd, TVP5150_FIFO_OUT_CTRL, 1); @@ -1148,7 +1150,7 @@ static int tvp5150_g_sliced_fmt(struct v4l2_subdev *sd, struct v4l2_sliced_vbi_f for (i = 0; i <= 23; i++) { svbi->service_lines[0][i] = - tvp5150_get_vbi(sd, vbi_ram_default, i); + tvp5150_get_vbi(sd, i); mask |= svbi->service_lines[0][i]; } svbi->service_set = mask; -- cgit v1.1 From d903ec77118c09f93a610b384d83a6df33a64fe6 Mon Sep 17 00:00:00 2001 From: Andy Spencer Date: Thu, 22 Feb 2018 11:05:33 -0800 Subject: gianfar: simplify FCS handling and fix memory leak Previously, buffer descriptors containing only the frame check sequence (FCS) were skipped and not added to the skb. However, the page reference count was still incremented, leading to a memory leak. Fixing this inside gfar_add_rx_frag() is difficult due to reserved memory handling and page reuse. Instead, move the FCS handling to gfar_process_frame() and trim off the FCS before passing the skb up the networking stack. Signed-off-by: Andy Spencer Signed-off-by: Jim Gruen Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 3bdeb29..f5c87bd 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2934,29 +2934,17 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus, { int size = lstatus & BD_LENGTH_MASK; struct page *page = rxb->page; - bool last = !!(lstatus & BD_LFLAG(RXBD_LAST)); - - /* Remove the FCS from the packet length */ - if (last) - size -= ETH_FCS_LEN; if (likely(first)) { skb_put(skb, size); } else { /* the last fragments' length contains the full frame length */ - if (last) + if (lstatus & BD_LFLAG(RXBD_LAST)) size -= skb->len; - /* Add the last fragment if it contains something other than - * the FCS, otherwise drop it and trim off any part of the FCS - * that was already received. - */ - if (size > 0) - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rxb->page_offset + RXBUF_ALIGNMENT, - size, GFAR_RXB_TRUESIZE); - else if (size < 0) - pskb_trim(skb, skb->len + size); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + rxb->page_offset + RXBUF_ALIGNMENT, + size, GFAR_RXB_TRUESIZE); } /* try reuse page */ @@ -3069,6 +3057,9 @@ static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb) if (priv->padding) skb_pull(skb, priv->padding); + /* Trim off the FCS */ + pskb_trim(skb, skb->len - ETH_FCS_LEN); + if (ndev->features & NETIF_F_RXCSUM) gfar_rx_checksum(skb, fcb); -- cgit v1.1 From a5f7add332b4ea6d4b9480971b3b0f5e66466ae9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2018 19:45:27 -0800 Subject: net_sched: gen_estimator: fix broken estimators based on percpu stats pfifo_fast got percpu stats lately, uncovering a bug I introduced last year in linux-4.10. I missed the fact that we have to clear our temporary storage before calling __gnet_stats_copy_basic() in the case of percpu stats. Without this fix, rate estimators (tc qd replace dev xxx root est 1sec 4sec pfifo_fast) are utterly broken. Fixes: 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate estimators") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/gen_estimator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 0a3f88f..98fd127 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -66,6 +66,7 @@ struct net_rate_estimator { static void est_fetch_counters(struct net_rate_estimator *e, struct gnet_stats_basic_packed *b) { + memset(b, 0, sizeof(*b)); if (e->stats_lock) spin_lock(e->stats_lock); -- cgit v1.1 From 69d7d95452b8964ccb6bf8a7295016f6c669aa53 Mon Sep 17 00:00:00 2001 From: Markus Mayer Date: Tue, 13 Feb 2018 12:40:38 -0800 Subject: memory: brcmstb: dpfe: properly mask vendor error bits We were printing the entire 32 bit register rather than just the lower 8 bits. Anything above bit 7 is reserved and may be any random value. Fixes: 2f330caff577 ("memory: brcmstb: Add driver for DPFE") Signed-off-by: Markus Mayer Signed-off-by: Florian Fainelli --- drivers/memory/brcmstb_dpfe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/memory/brcmstb_dpfe.c b/drivers/memory/brcmstb_dpfe.c index 0a7bdbe..0881537 100644 --- a/drivers/memory/brcmstb_dpfe.c +++ b/drivers/memory/brcmstb_dpfe.c @@ -603,7 +603,8 @@ static ssize_t show_vendor(struct device *dev, struct device_attribute *devattr, readl_relaxed(info + DRAM_VENDOR_MR6) & DRAM_VENDOR_MASK, readl_relaxed(info + DRAM_VENDOR_MR7) & DRAM_VENDOR_MASK, readl_relaxed(info + DRAM_VENDOR_MR8) & DRAM_VENDOR_MASK, - readl_relaxed(info + DRAM_VENDOR_ERROR)); + readl_relaxed(info + DRAM_VENDOR_ERROR) & + DRAM_VENDOR_MASK); } static int brcmstb_dpfe_resume(struct platform_device *pdev) -- cgit v1.1 From 9f2c4d95e088a44b2b68fedbd4593070b53754a7 Mon Sep 17 00:00:00 2001 From: Markus Mayer Date: Tue, 13 Feb 2018 12:40:39 -0800 Subject: memory: brcmstb: dpfe: fix type declaration of variable "ret" In some functions, variable "ret" should be ssize_t, so we fix it. Fixes: 2f330caff577 ("memory: brcmstb: Add driver for DPFE") Signed-off-by: Markus Mayer Signed-off-by: Florian Fainelli --- drivers/memory/brcmstb_dpfe.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/memory/brcmstb_dpfe.c b/drivers/memory/brcmstb_dpfe.c index 0881537..2013a91 100644 --- a/drivers/memory/brcmstb_dpfe.c +++ b/drivers/memory/brcmstb_dpfe.c @@ -507,7 +507,7 @@ static ssize_t show_info(struct device *dev, struct device_attribute *devattr, { u32 response[MSG_FIELD_MAX]; unsigned int info; - int ret; + ssize_t ret; ret = generic_show(DPFE_CMD_GET_INFO, response, dev, buf); if (ret) @@ -531,7 +531,7 @@ static ssize_t show_refresh(struct device *dev, unsigned int offset; u8 refresh, sr_abort, ppre, thermal_offs, tuf; u32 mr4; - int ret; + ssize_t ret; ret = generic_show(DPFE_CMD_GET_REFRESH, response, dev, buf); if (ret) @@ -588,7 +588,7 @@ static ssize_t show_vendor(struct device *dev, struct device_attribute *devattr, struct private_data *priv; void __iomem *info; unsigned int offset; - int ret; + ssize_t ret; ret = generic_show(DPFE_CMD_GET_VENDOR, response, dev, buf); if (ret) -- cgit v1.1 From fee5f1ef6cf76f87d9799596d06979c9e6589f2b Mon Sep 17 00:00:00 2001 From: Markus Mayer Date: Tue, 13 Feb 2018 12:40:40 -0800 Subject: memory: brcmstb: dpfe: support new way of passing data from the DCPU The DCPU can now send message data in two ways: - via the data RAM, as before (this is now message type 0) - via the message RAM (this is message type 1) In order to support both methods, we check the message type of the response (bits 31:28) and then treat the offset (bits 27:0) accordingly. Signed-off-by: Markus Mayer Signed-off-by: Florian Fainelli --- drivers/memory/brcmstb_dpfe.c | 65 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 10 deletions(-) diff --git a/drivers/memory/brcmstb_dpfe.c b/drivers/memory/brcmstb_dpfe.c index 2013a91..e9c1485 100644 --- a/drivers/memory/brcmstb_dpfe.c +++ b/drivers/memory/brcmstb_dpfe.c @@ -45,8 +45,16 @@ #define REG_TO_DCPU_MBOX 0x10 #define REG_TO_HOST_MBOX 0x14 +/* Macros to process offsets returned by the DCPU */ +#define DRAM_MSG_ADDR_OFFSET 0x0 +#define DRAM_MSG_TYPE_OFFSET 0x1c +#define DRAM_MSG_ADDR_MASK ((1UL << DRAM_MSG_TYPE_OFFSET) - 1) +#define DRAM_MSG_TYPE_MASK ((1UL << \ + (BITS_PER_LONG - DRAM_MSG_TYPE_OFFSET)) - 1) + /* Message RAM */ -#define DCPU_MSG_RAM(x) (0x100 + (x) * sizeof(u32)) +#define DCPU_MSG_RAM_START 0x100 +#define DCPU_MSG_RAM(x) (DCPU_MSG_RAM_START + (x) * sizeof(u32)) /* DRAM Info Offsets & Masks */ #define DRAM_INFO_INTERVAL 0x0 @@ -255,6 +263,40 @@ static unsigned int get_msg_chksum(const u32 msg[]) return sum; } +static void __iomem *get_msg_ptr(struct private_data *priv, u32 response, + char *buf, ssize_t *size) +{ + unsigned int msg_type; + unsigned int offset; + void __iomem *ptr = NULL; + + msg_type = (response >> DRAM_MSG_TYPE_OFFSET) & DRAM_MSG_TYPE_MASK; + offset = (response >> DRAM_MSG_ADDR_OFFSET) & DRAM_MSG_ADDR_MASK; + + /* + * msg_type == 1: the offset is relative to the message RAM + * msg_type == 0: the offset is relative to the data RAM (this is the + * previous way of passing data) + * msg_type is anything else: there's critical hardware problem + */ + switch (msg_type) { + case 1: + ptr = priv->regs + DCPU_MSG_RAM_START + offset; + break; + case 0: + ptr = priv->dmem + offset; + break; + default: + dev_emerg(priv->dev, "invalid message reply from DCPU: %#x\n", + response); + if (buf && size) + *size = sprintf(buf, + "FATAL: communication error with DCPU\n"); + } + + return ptr; +} + static int __send_command(struct private_data *priv, unsigned int cmd, u32 result[]) { @@ -528,7 +570,6 @@ static ssize_t show_refresh(struct device *dev, u32 response[MSG_FIELD_MAX]; void __iomem *info; struct private_data *priv; - unsigned int offset; u8 refresh, sr_abort, ppre, thermal_offs, tuf; u32 mr4; ssize_t ret; @@ -538,8 +579,10 @@ static ssize_t show_refresh(struct device *dev, return ret; priv = dev_get_drvdata(dev); - offset = response[MSG_ARG0]; - info = priv->dmem + offset; + + info = get_msg_ptr(priv, response[MSG_ARG0], buf, &ret); + if (!info) + return ret; mr4 = readl_relaxed(info + DRAM_INFO_MR4) & DRAM_INFO_MR4_MASK; @@ -561,7 +604,6 @@ static ssize_t store_refresh(struct device *dev, struct device_attribute *attr, u32 response[MSG_FIELD_MAX]; struct private_data *priv; void __iomem *info; - unsigned int offset; unsigned long val; int ret; @@ -574,8 +616,10 @@ static ssize_t store_refresh(struct device *dev, struct device_attribute *attr, if (ret) return ret; - offset = response[MSG_ARG0]; - info = priv->dmem + offset; + info = get_msg_ptr(priv, response[MSG_ARG0], NULL, NULL); + if (!info) + return -EIO; + writel_relaxed(val, info + DRAM_INFO_INTERVAL); return count; @@ -587,16 +631,17 @@ static ssize_t show_vendor(struct device *dev, struct device_attribute *devattr, u32 response[MSG_FIELD_MAX]; struct private_data *priv; void __iomem *info; - unsigned int offset; ssize_t ret; ret = generic_show(DPFE_CMD_GET_VENDOR, response, dev, buf); if (ret) return ret; - offset = response[MSG_ARG0]; priv = dev_get_drvdata(dev); - info = priv->dmem + offset; + + info = get_msg_ptr(priv, response[MSG_ARG0], buf, &ret); + if (!info) + return ret; return sprintf(buf, "%#x %#x %#x %#x %#x\n", readl_relaxed(info + DRAM_VENDOR_MR5) & DRAM_VENDOR_MASK, -- cgit v1.1 From 0bd1ed4860d0f5f836aa8371797689a3779d1bf5 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 10 Feb 2018 08:46:17 +0800 Subject: block: pass inclusive 'lend' parameter to truncate_inode_pages_range The 'lend' parameter of truncate_inode_pages_range is required to be inclusive, so follow the rule. This patch fixes one memory corruption triggered by discard. Cc: Cc: Dmitry Monakhov Fixes: 351499a172c0 ("block: Invalidate cache on discard v2") Reviewed-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/ioctl.c b/block/ioctl.c index 1668506..3884d81 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -225,7 +225,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, if (start + len > i_size_read(bdev->bd_inode)) return -EINVAL; - truncate_inode_pages_range(mapping, start, start + len); + truncate_inode_pages_range(mapping, start, start + len - 1); return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, flags); } -- cgit v1.1 From ca36960211eb228bcbc7aaebfa0d027368a94c60 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 Feb 2018 22:29:05 +0100 Subject: bpf: allow xadd only on aligned memory The requirements around atomic_add() / atomic64_add() resp. their JIT implementations differ across architectures. E.g. while x86_64 seems just fine with BPF's xadd on unaligned memory, on arm64 it triggers via interpreter but also JIT the following crash: [ 830.864985] Unable to handle kernel paging request at virtual address ffff8097d7ed6703 [...] [ 830.916161] Internal error: Oops: 96000021 [#1] SMP [ 830.984755] CPU: 37 PID: 2788 Comm: test_verifier Not tainted 4.16.0-rc2+ #8 [ 830.991790] Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.29 07/17/2017 [ 830.998998] pstate: 80400005 (Nzcv daif +PAN -UAO) [ 831.003793] pc : __ll_sc_atomic_add+0x4/0x18 [ 831.008055] lr : ___bpf_prog_run+0x1198/0x1588 [ 831.012485] sp : ffff00001ccabc20 [ 831.015786] x29: ffff00001ccabc20 x28: ffff8017d56a0f00 [ 831.021087] x27: 0000000000000001 x26: 0000000000000000 [ 831.026387] x25: 000000c168d9db98 x24: 0000000000000000 [ 831.031686] x23: ffff000008203878 x22: ffff000009488000 [ 831.036986] x21: ffff000008b14e28 x20: ffff00001ccabcb0 [ 831.042286] x19: ffff0000097b5080 x18: 0000000000000a03 [ 831.047585] x17: 0000000000000000 x16: 0000000000000000 [ 831.052885] x15: 0000ffffaeca8000 x14: 0000000000000000 [ 831.058184] x13: 0000000000000000 x12: 0000000000000000 [ 831.063484] x11: 0000000000000001 x10: 0000000000000000 [ 831.068783] x9 : 0000000000000000 x8 : 0000000000000000 [ 831.074083] x7 : 0000000000000000 x6 : 000580d428000000 [ 831.079383] x5 : 0000000000000018 x4 : 0000000000000000 [ 831.084682] x3 : ffff00001ccabcb0 x2 : 0000000000000001 [ 831.089982] x1 : ffff8097d7ed6703 x0 : 0000000000000001 [ 831.095282] Process test_verifier (pid: 2788, stack limit = 0x0000000018370044) [ 831.102577] Call trace: [ 831.105012] __ll_sc_atomic_add+0x4/0x18 [ 831.108923] __bpf_prog_run32+0x4c/0x70 [ 831.112748] bpf_test_run+0x78/0xf8 [ 831.116224] bpf_prog_test_run_xdp+0xb4/0x120 [ 831.120567] SyS_bpf+0x77c/0x1110 [ 831.123873] el0_svc_naked+0x30/0x34 [ 831.127437] Code: 97fffe97 17ffffec 00000000 f9800031 (885f7c31) Reason for this is because memory is required to be aligned. In case of BPF, we always enforce alignment in terms of stack access, but not when accessing map values or packet data when the underlying arch (e.g. arm64) has CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS set. xadd on packet data that is local to us anyway is just wrong, so forbid this case entirely. The only place where xadd makes sense in fact are map values; xadd on stack is wrong as well, but it's been around for much longer. Specifically enforce strict alignment in case of xadd, so that we handle this case generically and avoid such crashes in the first place. Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 42 +++++++++++++-------- tools/testing/selftests/bpf/test_verifier.c | 58 +++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 16 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5fb69a8..c6eff10 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1356,6 +1356,13 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) return reg->type == PTR_TO_CTX; } +static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) +{ + const struct bpf_reg_state *reg = cur_regs(env) + regno; + + return type_is_pkt_pointer(reg->type); +} + static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size, bool strict) @@ -1416,10 +1423,10 @@ static int check_generic_ptr_alignment(struct bpf_verifier_env *env, } static int check_ptr_alignment(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, - int off, int size) + const struct bpf_reg_state *reg, int off, + int size, bool strict_alignment_once) { - bool strict = env->strict_alignment; + bool strict = env->strict_alignment || strict_alignment_once; const char *pointer_desc = ""; switch (reg->type) { @@ -1576,9 +1583,9 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) * if t==write && value_regno==-1, some unknown value is stored into memory * if t==read && value_regno==-1, don't care what we read from memory */ -static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, - int bpf_size, enum bpf_access_type t, - int value_regno) +static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, + int off, int bpf_size, enum bpf_access_type t, + int value_regno, bool strict_alignment_once) { struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = regs + regno; @@ -1590,7 +1597,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn return size; /* alignment checks will add in reg->off themselves */ - err = check_ptr_alignment(env, reg, off, size); + err = check_ptr_alignment(env, reg, off, size, strict_alignment_once); if (err) return err; @@ -1735,21 +1742,23 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins return -EACCES; } - if (is_ctx_reg(env, insn->dst_reg)) { - verbose(env, "BPF_XADD stores into R%d context is not allowed\n", - insn->dst_reg); + if (is_ctx_reg(env, insn->dst_reg) || + is_pkt_reg(env, insn->dst_reg)) { + verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", + insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ? + "context" : "packet"); return -EACCES; } /* check whether atomic_add can read the memory */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_READ, -1); + BPF_SIZE(insn->code), BPF_READ, -1, true); if (err) return err; /* check whether atomic_add can write into the same memory */ return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_WRITE, -1); + BPF_SIZE(insn->code), BPF_WRITE, -1, true); } /* when register 'regno' is passed into function that will read 'access_size' @@ -2388,7 +2397,8 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn * is inferred from register state. */ for (i = 0; i < meta.access_size; i++) { - err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1); + err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, + BPF_WRITE, -1, false); if (err) return err; } @@ -4632,7 +4642,7 @@ static int do_check(struct bpf_verifier_env *env) */ err = check_mem_access(env, insn_idx, insn->src_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, - insn->dst_reg); + insn->dst_reg, false); if (err) return err; @@ -4684,7 +4694,7 @@ static int do_check(struct bpf_verifier_env *env) /* check that memory (dst_reg + off) is writeable */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, - insn->src_reg); + insn->src_reg, false); if (err) return err; @@ -4719,7 +4729,7 @@ static int do_check(struct bpf_verifier_env *env) /* check that memory (dst_reg + off) is writeable */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, - -1); + -1, false); if (err) return err; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index c73592f..437c0b1 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -11163,6 +11163,64 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, + { + "xadd/w check unaligned stack", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -7), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "misaligned stack access off", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "xadd/w check unaligned map", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_STX_XADD(BPF_W, BPF_REG_0, BPF_REG_1, 3), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = REJECT, + .errstr = "misaligned value access off", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "xadd/w check unaligned pkt", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 99), + BPF_JMP_IMM(BPF_JA, 0, 0, 6), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0), + BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 1), + BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 2), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "BPF_XADD stores into R2 packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.1 From 0b2e9904c15963e715d33e5f3f1387f17d19333a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 23 Feb 2018 23:29:32 +0100 Subject: KVM: x86: move LAPIC initialization after VMCS creation The initial reset of the local APIC is performed before the VMCS has been created, but it tries to do a vmwrite: vmwrite error: reg 810 value 4a00 (err 18944) CPU: 54 PID: 38652 Comm: qemu-kvm Tainted: G W I 4.16.0-0.rc2.git0.1.fc28.x86_64 #1 Hardware name: Intel Corporation S2600CW/S2600CW, BIOS SE5C610.86B.01.01.0003.090520141303 09/05/2014 Call Trace: vmx_set_rvi [kvm_intel] vmx_hwapic_irr_update [kvm_intel] kvm_lapic_reset [kvm] kvm_create_lapic [kvm] kvm_arch_vcpu_init [kvm] kvm_vcpu_init [kvm] vmx_create_vcpu [kvm_intel] kvm_vm_ioctl [kvm] Move it later, after the VMCS has been created. Fixes: 4191db26b714 ("KVM: x86: Update APICv on APIC reset") Cc: stable@vger.kernel.org Cc: Liran Alon Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 1 - arch/x86/kvm/x86.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 924ac8c..cc5fe7a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2165,7 +2165,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) */ vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ - kvm_lapic_reset(vcpu, false); kvm_iodevice_init(&apic->dev, &apic_mmio_ops); return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c8a0b54..ca90d95 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7975,6 +7975,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) kvm_vcpu_mtrr_init(vcpu); vcpu_load(vcpu); kvm_vcpu_reset(vcpu, false); + kvm_lapic_reset(vcpu, false); kvm_mmu_setup(vcpu); vcpu_put(vcpu); return 0; -- cgit v1.1 From 99158246208b82c0700d09a40d719bb56b32c607 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 31 Jan 2018 18:12:50 +0100 Subject: KVM: nVMX: preserve SECONDARY_EXEC_DESC without UMIP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit L1 might want to use SECONDARY_EXEC_DESC, so we must not clear the VMCS bit if UMIP is not being emulated. We must still set the bit when emulating UMIP as the feature can be passed to L2 where L0 will do the emulation and because L2 can change CR4 without a VM exit, we should clear the bit if UMIP is disabled. Fixes: 0367f205a3b7 ("KVM: vmx: add support for emulating UMIP") Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f427723..2d2cf8c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4485,7 +4485,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_DESC); hw_cr4 &= ~X86_CR4_UMIP; - } else + } else if (!is_guest_mode(vcpu) || + !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_DESC); -- cgit v1.1 From 103c763c72dd2df3e8c91f2d7ec88f98ed391111 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 31 Jan 2018 17:30:21 -0800 Subject: KVM/x86: remove WARN_ON() for when vm_munmap() fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On x86, special KVM memslots such as the TSS region have anonymous memory mappings created on behalf of userspace, and these mappings are removed when the VM is destroyed. It is however possible for removing these mappings via vm_munmap() to fail. This can most easily happen if the thread receives SIGKILL while it's waiting to acquire ->mmap_sem. This triggers the 'WARN_ON(r < 0)' in __x86_set_memory_region(). syzkaller was able to hit this, using 'exit()' to send the SIGKILL. Note that while the vm_munmap() failure results in the mapping not being removed immediately, it is not leaked forever but rather will be freed when the process exits. It's not really possible to handle this failure properly, so almost every other caller of vm_munmap() doesn't check the return value. It's a limitation of having the kernel manage these mappings rather than userspace. So just remove the WARN_ON() so that users can't spam the kernel log with this warning. Fixes: f0d648bdf0a5 ("KVM: x86: map/unmap private slots in __x86_set_memory_region") Reported-by: syzbot Signed-off-by: Eric Biggers Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ca90d95..96edda8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8461,10 +8461,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) return r; } - if (!size) { - r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); - WARN_ON(r < 0); - } + if (!size) + vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); return 0; } -- cgit v1.1 From b28676bb8ae4569cced423dc2a88f7cb319d5379 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 13 Feb 2018 15:36:00 +0100 Subject: KVM: mmu: Fix overlap between public and private memslots MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: pte_list_remove: ffff9714eb1f8078 0->BUG ------------[ cut here ]------------ kernel BUG at arch/x86/kvm/mmu.c:1157! invalid opcode: 0000 [#1] SMP RIP: 0010:pte_list_remove+0x11b/0x120 [kvm] Call Trace: drop_spte+0x83/0xb0 [kvm] mmu_page_zap_pte+0xcc/0xe0 [kvm] kvm_mmu_prepare_zap_page+0x81/0x4a0 [kvm] kvm_mmu_invalidate_zap_all_pages+0x159/0x220 [kvm] kvm_arch_flush_shadow_all+0xe/0x10 [kvm] kvm_mmu_notifier_release+0x6c/0xa0 [kvm] ? kvm_mmu_notifier_release+0x5/0xa0 [kvm] __mmu_notifier_release+0x79/0x110 ? __mmu_notifier_release+0x5/0x110 exit_mmap+0x15a/0x170 ? do_exit+0x281/0xcb0 mmput+0x66/0x160 do_exit+0x2c9/0xcb0 ? __context_tracking_exit.part.5+0x4a/0x150 do_group_exit+0x50/0xd0 SyS_exit_group+0x14/0x20 do_syscall_64+0x73/0x1f0 entry_SYSCALL64_slow_path+0x25/0x25 The reason is that when creates new memslot, there is no guarantee for new memslot not overlap with private memslots. This can be triggered by the following program: #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include long r[16]; int main() { void *p = valloc(0x4000); r[2] = open("/dev/kvm", 0); r[3] = ioctl(r[2], KVM_CREATE_VM, 0x0ul); uint64_t addr = 0xf000; ioctl(r[3], KVM_SET_IDENTITY_MAP_ADDR, &addr); r[6] = ioctl(r[3], KVM_CREATE_VCPU, 0x0ul); ioctl(r[3], KVM_SET_TSS_ADDR, 0x0ul); ioctl(r[6], KVM_RUN, 0); ioctl(r[6], KVM_RUN, 0); struct kvm_userspace_memory_region mr = { .slot = 0, .flags = KVM_MEM_LOG_DIRTY_PAGES, .guest_phys_addr = 0xf000, .memory_size = 0x4000, .userspace_addr = (uintptr_t) p }; ioctl(r[3], KVM_SET_USER_MEMORY_REGION, &mr); return 0; } This patch fixes the bug by not adding a new memslot even if it overlaps with private memslots. Reported-by: Dmitry Vyukov Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Cc: Eric Biggers Cc: stable@vger.kernel.org Signed-off-by: Wanpeng Li --- virt/kvm/kvm_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) --- virt/kvm/kvm_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4501e65..65dea3f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -969,8 +969,7 @@ int __kvm_set_memory_region(struct kvm *kvm, /* Check for overlaps */ r = -EEXIST; kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) { - if ((slot->id >= KVM_USER_MEM_SLOTS) || - (slot->id == id)) + if (slot->id == id) continue; if (!((base_gfn + npages <= slot->base_gfn) || (base_gfn >= slot->base_gfn + slot->npages))) -- cgit v1.1 From 135a06c3a515bbd17729eb04f4f26316d48363d7 Mon Sep 17 00:00:00 2001 From: Chao Gao Date: Sun, 11 Feb 2018 10:06:30 +0800 Subject: KVM: nVMX: Don't halt vcpu when L1 is injecting events to L2 Although L2 is in halt state, it will be in the active state after VM entry if the VM entry is vectoring according to SDM 26.6.2 Activity State. Halting the vcpu here means the event won't be injected to L2 and this decision isn't reported to L1. Thus L0 drops an event that should be injected to L2. Cc: Liran Alon Reviewed-by: Liran Alon Signed-off-by: Chao Gao Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2d2cf8c..67b028d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11197,7 +11197,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) if (ret) return ret; - if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) + /* + * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken + * by event injection, halt vcpu. + */ + if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) && + !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) return kvm_vcpu_halt(vcpu); vmx->nested.nested_run_pending = 1; -- cgit v1.1 From 95e057e25892eaa48cad1e2d637b80d0f1a4fac5 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 8 Feb 2018 15:32:45 +0800 Subject: KVM: X86: Fix SMRAM accessing even if VM is shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: WARNING: CPU: 6 PID: 2434 at arch/x86/kvm/vmx.c:6660 handle_ept_misconfig+0x54/0x1e0 [kvm_intel] CPU: 6 PID: 2434 Comm: repro_test Not tainted 4.15.0+ #4 RIP: 0010:handle_ept_misconfig+0x54/0x1e0 [kvm_intel] Call Trace: vmx_handle_exit+0xbd/0xe20 [kvm_intel] kvm_arch_vcpu_ioctl_run+0xdaf/0x1d50 [kvm] kvm_vcpu_ioctl+0x3e9/0x720 [kvm] do_vfs_ioctl+0xa4/0x6a0 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x25/0x9c The testcase creates a first thread to issue KVM_SMI ioctl, and then creates a second thread to mmap and operate on the same vCPU. This triggers a race condition when running the testcase with multiple threads. Sometimes one thread exits with a triple fault while another thread mmaps and operates on the same vCPU. Because CS=0x3000/IP=0x8000 is not mapped, accessing the SMI handler results in an EPT misconfig. This patch fixes it by returning RET_PF_EMULATE in kvm_handle_bad_page(), which will go on to cause an emulation failure and an exit with KVM_EXIT_INTERNAL_ERROR. Reported-by: syzbot+c1d9517cab094dae65e446c0c5b4de6c40f4dc58@syzkaller.appspotmail.com Cc: Paolo Bonzini Cc: Radim Krčmář Cc: stable@vger.kernel.org Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8eca1d0..6c5a82c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3029,7 +3029,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) return RET_PF_RETRY; } - return -EFAULT; + return RET_PF_EMULATE; } static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, -- cgit v1.1 From faa312a543283c717342cd332b5b9247bd305dce Mon Sep 17 00:00:00 2001 From: Marc Hartmayer Date: Tue, 9 Jan 2018 13:27:01 +0100 Subject: tools/kvm_stat: simplify the sortkey function The 'sortkey' function references a value in its enclosing scope (closure). This is not common practice for a sort key function so let's replace it. Additionally, the function 'sorted' has already a parameter for reversing the result therefore the inversion of the values is unneeded. The check for stats[x][1] is also superfluous as it's ensured that this value is initialized with 0. Signed-off-by: Marc Hartmayer Tested-by: Stefan Raspl Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index a5684d0..d630f5f 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1080,30 +1080,23 @@ class Tui(object): self.screen.move(row, 0) self.screen.clrtobot() stats = self.stats.get(self._display_guests) - - def sortCurAvg(x): - # sort by current events if available - if stats[x][1]: - return (-stats[x][1], -stats[x][0]) - else: - return (0, -stats[x][0]) - - def sortTotal(x): - # sort by totals - return (0, -stats[x][0]) total = 0. for key in stats.keys(): if key.find('(') is -1: total += stats[key][0] if self._sorting == SORT_DEFAULT: - sortkey = sortCurAvg + def sortkey((_k, v)): + # sort by (delta value, overall value) + return (v[1], v[0]) else: - sortkey = sortTotal + def sortkey((_k, v)): + # sort by overall value + return v[0] + tavg = 0 - for key in sorted(stats.keys(), key=sortkey): + for key, values in sorted(stats.items(), key=sortkey, reverse=True): if row >= self.screen.getmaxyx()[0] - 1: break - values = stats[key] if not values[0] and not values[1]: break if values[0] is not None: -- cgit v1.1 From 006f1548ac13d67d21865416a0f4e8062df1a85f Mon Sep 17 00:00:00 2001 From: Marc Hartmayer Date: Tue, 9 Jan 2018 13:27:02 +0100 Subject: tools/kvm_stat: use a namedtuple for storing the values Use a namedtuple for storing the values as it allows to access the fields of a tuple via names. This makes the overall code much easier to read and to understand. Access by index is still possible as before. Signed-off-by: Marc Hartmayer Tested-by: Stefan Raspl Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index d630f5f..2b7e83a 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -33,7 +33,7 @@ import resource import struct import re import subprocess -from collections import defaultdict +from collections import defaultdict, namedtuple VMX_EXIT_REASONS = { 'EXCEPTION_NMI': 0, @@ -800,6 +800,9 @@ class DebugfsProvider(Provider): self.read(2) +EventStat = namedtuple('EventStat', ['value', 'delta']) + + class Stats(object): """Manages the data providers and the data they provide. @@ -867,10 +870,10 @@ class Stats(object): for provider in self.providers: new = provider.read(by_guest=by_guest) for key in new if by_guest else provider.fields: - oldval = self.values.get(key, (0, 0))[0] + oldval = self.values.get(key, EventStat(0, 0)).value newval = new.get(key, 0) newdelta = newval - oldval - self.values[key] = (newval, newdelta) + self.values[key] = EventStat(newval, newdelta) return self.values def toggle_display_guests(self, to_pid): @@ -1083,28 +1086,28 @@ class Tui(object): total = 0. for key in stats.keys(): if key.find('(') is -1: - total += stats[key][0] + total += stats[key].value if self._sorting == SORT_DEFAULT: def sortkey((_k, v)): # sort by (delta value, overall value) - return (v[1], v[0]) + return (v.delta, v.value) else: def sortkey((_k, v)): # sort by overall value - return v[0] + return v.value tavg = 0 for key, values in sorted(stats.items(), key=sortkey, reverse=True): if row >= self.screen.getmaxyx()[0] - 1: break - if not values[0] and not values[1]: + if not values.value and not values.delta: break - if values[0] is not None: - cur = int(round(values[1] / sleeptime)) if values[1] else '' + if values.value is not None: + cur = int(round(values.delta / sleeptime)) if values.delta else '' if self._display_guests: key = self.get_gname_from_pid(key) self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % - (key, values[0], values[0] * 100 / total, + (key, values.value, values.value * 100 / total, cur)) if cur is not '' and key.find('(') is -1: tavg += cur @@ -1375,7 +1378,7 @@ def batch(stats): s = stats.get() for key in sorted(s.keys()): values = s[key] - print('%-42s%10d%10d' % (key, values[0], values[1])) + print('%-42s%10d%10d' % (key, values.value, values.delta)) except KeyboardInterrupt: pass @@ -1392,7 +1395,7 @@ def log(stats): def statline(): s = stats.get() for k in keys: - print(' %9d' % s[k][1], end=' ') + print(' %9d' % s[k].delta, end=' ') print() line = 0 banner_repeat = 20 -- cgit v1.1 From 0eb578009a1d530a11846d7c4733a5db04730884 Mon Sep 17 00:00:00 2001 From: Marc Hartmayer Date: Tue, 9 Jan 2018 13:27:03 +0100 Subject: tools/kvm_stat: use a more pythonic way to iterate over dictionaries If it's clear that the values of a dictionary will be used then use the '.items()' method. Signed-off-by: Marc Hartmayer Tested-by: Stefan Raspl [Include fix for logging mode by Stefan Raspl] Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 2b7e83a..f0da954 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1084,9 +1084,10 @@ class Tui(object): self.screen.clrtobot() stats = self.stats.get(self._display_guests) total = 0. - for key in stats.keys(): + for key, values in stats.items(): if key.find('(') is -1: - total += stats[key].value + total += values.value + if self._sorting == SORT_DEFAULT: def sortkey((_k, v)): # sort by (delta value, overall value) @@ -1376,8 +1377,7 @@ def batch(stats): s = stats.get() time.sleep(1) s = stats.get() - for key in sorted(s.keys()): - values = s[key] + for key, values in sorted(s.items()): print('%-42s%10d%10d' % (key, values.value, values.delta)) except KeyboardInterrupt: pass @@ -1388,14 +1388,14 @@ def log(stats): keys = sorted(stats.get().keys()) def banner(): - for k in keys: - print(k, end=' ') + for key in keys: + print(key, end=' ') print() def statline(): s = stats.get() - for k in keys: - print(' %9d' % s[k].delta, end=' ') + for key in keys: + print(' %9d' % s[key].delta, end=' ') print() line = 0 banner_repeat = 20 -- cgit v1.1 From 369d5a85bb782ecf63c5bae9686c7e6104eea991 Mon Sep 17 00:00:00 2001 From: Marc Hartmayer Date: Tue, 9 Jan 2018 13:27:04 +0100 Subject: tools/kvm_stat: avoid 'is' for equality checks Use '==' for equality checks and 'is' when comparing identities. An example where '==' and 'is' behave differently: >>> a = 4242 >>> a == 4242 True >>> a is 4242 False Signed-off-by: Marc Hartmayer Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index f0da954..e3f0bec 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1085,7 +1085,7 @@ class Tui(object): stats = self.stats.get(self._display_guests) total = 0. for key, values in stats.items(): - if key.find('(') is -1: + if key.find('(') == -1: total += values.value if self._sorting == SORT_DEFAULT: @@ -1110,7 +1110,7 @@ class Tui(object): self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key, values.value, values.value * 100 / total, cur)) - if cur is not '' and key.find('(') is -1: + if cur != '' and key.find('(') == -1: tavg += cur row += 1 if row == 3: -- cgit v1.1 From 3df33a0f34a3883b6696bff8cc8fcda3c7444a62 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 5 Feb 2018 13:59:57 +0100 Subject: tools/kvm_stat: fix crash when filtering out all non-child trace events When we apply a filter that will only leave child trace events, we receive a ZeroDivisionError when calculating the percentages. In that case, provide percentages based on child events only. To reproduce, run 'kvm_stat -f .*[\(].*'. Signed-off-by: Stefan Raspl Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index e3f0bec..4e0f282c 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1084,9 +1084,15 @@ class Tui(object): self.screen.clrtobot() stats = self.stats.get(self._display_guests) total = 0. + ctotal = 0. for key, values in stats.items(): if key.find('(') == -1: total += values.value + else: + ctotal += values.value + if total == 0.: + # we don't have any fields, or all non-child events are filtered + total = ctotal if self._sorting == SORT_DEFAULT: def sortkey((_k, v)): -- cgit v1.1 From 1cd8bfb1ed9962be6d80d5020508922aa93653ac Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 5 Feb 2018 13:59:58 +0100 Subject: tools/kvm_stat: print error on invalid regex Entering an invalid regular expression did not produce any indication of an error so far. To reproduce, press 'f' and enter 'foo(' (with an unescaped bracket). Signed-off-by: Stefan Raspl Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 4e0f282c..08f8422 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1176,6 +1176,7 @@ class Tui(object): Asks for a valid regex and sets the fields filter accordingly. """ + msg = '' while True: self.screen.erase() self.screen.addstr(0, 0, @@ -1184,6 +1185,7 @@ class Tui(object): self.screen.addstr(2, 0, "Current regex: {0}" .format(self.stats.fields_filter)) + self.screen.addstr(5, 0, msg) self.screen.addstr(3, 0, "New regex: ") curses.echo() regex = self.screen.getstr().decode(ENCODING) @@ -1198,6 +1200,7 @@ class Tui(object): self.refresh_header() return except re.error: + msg = '"' + regex + '": Not a valid regular expression' continue def show_vm_selection_by_pid(self): -- cgit v1.1 From 1fd6a708c8438403dee17eb411cf81ffba13cf43 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Thu, 22 Feb 2018 12:16:24 +0100 Subject: tools/kvm_stat: fix debugfs handling Te checks for debugfs assumed that debugfs is always mounted at /sys/kernel/debug - which is likely, but not guaranteed. This is addressed by checking /proc/mounts for the actual location. Furthermore, when debugfs was mounted, but the kvm module not loaded, a misleading error pointing towards debugfs not present was given. To reproduce, (a) run kvm_stat with debugfs mounted at a place different from /sys/kernel/debug (b) run kvm_stat with debugfs mounted but kvm module not loaded Signed-off-by: Stefan Raspl Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 08f8422..c5c8e92 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -331,9 +331,6 @@ class perf_event_attr(ctypes.Structure): PERF_TYPE_TRACEPOINT = 2 PERF_FORMAT_GROUP = 1 << 3 -PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing' -PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm' - class Group(object): """Represents a perf event group.""" @@ -1544,17 +1541,6 @@ Press any other key to refresh statistics immediately. def check_access(options): """Exits if the current user can't access all needed directories.""" - if not os.path.exists('/sys/kernel/debug'): - sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.') - sys.exit(1) - - if not os.path.exists(PATH_DEBUGFS_KVM): - sys.stderr.write("Please make sure, that debugfs is mounted and " - "readable by the current user:\n" - "('mount -t debugfs debugfs /sys/kernel/debug')\n" - "Also ensure, that the kvm modules are loaded.\n") - sys.exit(1) - if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or not options.debugfs): sys.stderr.write("Please enable CONFIG_TRACING in your kernel " @@ -1572,7 +1558,33 @@ def check_access(options): return options +def assign_globals(): + global PATH_DEBUGFS_KVM + global PATH_DEBUGFS_TRACING + + debugfs = '' + for line in file('/proc/mounts'): + if line.split(' ')[0] == 'debugfs': + debugfs = line.split(' ')[1] + break + if debugfs == '': + sys.stderr.write("Please make sure that CONFIG_DEBUG_FS is enabled in " + "your kernel, mounted and\nreadable by the current " + "user:\n" + "('mount -t debugfs debugfs /sys/kernel/debug')\n") + sys.exit(1) + + PATH_DEBUGFS_KVM = os.path.join(debugfs, 'kvm') + PATH_DEBUGFS_TRACING = os.path.join(debugfs, 'tracing') + + if not os.path.exists(PATH_DEBUGFS_KVM): + sys.stderr.write("Please make sure that CONFIG_KVM is enabled in " + "your kernel and that the modules are loaded.\n") + sys.exit(1) + + def main(): + assign_globals() options = get_options() options = check_access(options) -- cgit v1.1 From c0e8c21eae616ed8703c1b4b01046a1578ee875c Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Thu, 22 Feb 2018 12:16:26 +0100 Subject: tools/kvm_stat: mark private methods as such Helps quite a bit reading the code when it's obvious when a method is intended for internal use only. Signed-off-by: Stefan Raspl Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 132 ++++++++++++++++++++++---------------------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index c5c8e92..c09b742 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -373,8 +373,8 @@ class Event(object): self.syscall = self.libc.syscall self.name = name self.fd = None - self.setup_event(group, trace_cpu, trace_pid, trace_point, - trace_filter, trace_set) + self._setup_event(group, trace_cpu, trace_pid, trace_point, + trace_filter, trace_set) def __del__(self): """Closes the event's file descriptor. @@ -387,7 +387,7 @@ class Event(object): if self.fd: os.close(self.fd) - def perf_event_open(self, attr, pid, cpu, group_fd, flags): + def _perf_event_open(self, attr, pid, cpu, group_fd, flags): """Wrapper for the sys_perf_evt_open() syscall. Used to set up performance events, returns a file descriptor or -1 @@ -406,7 +406,7 @@ class Event(object): ctypes.c_int(pid), ctypes.c_int(cpu), ctypes.c_int(group_fd), ctypes.c_long(flags)) - def setup_event_attribute(self, trace_set, trace_point): + def _setup_event_attribute(self, trace_set, trace_point): """Returns an initialized ctype perf_event_attr struct.""" id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set, @@ -416,8 +416,8 @@ class Event(object): event_attr.config = int(open(id_path).read()) return event_attr - def setup_event(self, group, trace_cpu, trace_pid, trace_point, - trace_filter, trace_set): + def _setup_event(self, group, trace_cpu, trace_pid, trace_point, + trace_filter, trace_set): """Sets up the perf event in Linux. Issues the syscall to register the event in the kernel and @@ -425,7 +425,7 @@ class Event(object): """ - event_attr = self.setup_event_attribute(trace_set, trace_point) + event_attr = self._setup_event_attribute(trace_set, trace_point) # First event will be group leader. group_leader = -1 @@ -434,8 +434,8 @@ class Event(object): if group.events: group_leader = group.events[0].fd - fd = self.perf_event_open(event_attr, trace_pid, - trace_cpu, group_leader, 0) + fd = self._perf_event_open(event_attr, trace_pid, + trace_cpu, group_leader, 0) if fd == -1: err = ctypes.get_errno() raise OSError(err, os.strerror(err), @@ -497,12 +497,12 @@ class TracepointProvider(Provider): """ def __init__(self, pid, fields_filter): self.group_leaders = [] - self.filters = self.get_filters() + self.filters = self._get_filters() self.update_fields(fields_filter) self.pid = pid @staticmethod - def get_filters(): + def _get_filters(): """Returns a dict of trace events, their filter ids and the values that can be filtered. @@ -518,7 +518,7 @@ class TracepointProvider(Provider): filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) return filters - def get_available_fields(self): + def _get_available_fields(self): """Returns a list of available event's of format 'event name(filter name)'. @@ -546,11 +546,11 @@ class TracepointProvider(Provider): def update_fields(self, fields_filter): """Refresh fields, applying fields_filter""" - self.fields = [field for field in self.get_available_fields() + self.fields = [field for field in self._get_available_fields() if self.is_field_wanted(fields_filter, field)] @staticmethod - def get_online_cpus(): + def _get_online_cpus(): """Returns a list of cpu id integers.""" def parse_int_list(list_string): """Returns an int list from a string of comma separated integers and @@ -572,17 +572,17 @@ class TracepointProvider(Provider): cpu_string = cpu_list.readline() return parse_int_list(cpu_string) - def setup_traces(self): + def _setup_traces(self): """Creates all event and group objects needed to be able to retrieve data.""" - fields = self.get_available_fields() + fields = self._get_available_fields() if self._pid > 0: # Fetch list of all threads of the monitored pid, as qemu # starts a thread for each vcpu. path = os.path.join('/proc', str(self._pid), 'task') groupids = self.walkdir(path)[1] else: - groupids = self.get_online_cpus() + groupids = self._get_online_cpus() # The constant is needed as a buffer for python libs, std # streams and other files that the script opens. @@ -660,7 +660,7 @@ class TracepointProvider(Provider): # The garbage collector will get rid of all Event/Group # objects and open files after removing the references. self.group_leaders = [] - self.setup_traces() + self._setup_traces() self.fields = self._fields def read(self, by_guest=0): @@ -689,9 +689,9 @@ class DebugfsProvider(Provider): self.paths = [] self.pid = pid if include_past: - self.restore() + self._restore() - def get_available_fields(self): + def _get_available_fields(self): """"Returns a list of available fields. The fields are all available KVM debugfs files @@ -701,7 +701,7 @@ class DebugfsProvider(Provider): def update_fields(self, fields_filter): """Refresh fields, applying fields_filter""" - self._fields = [field for field in self.get_available_fields() + self._fields = [field for field in self._get_available_fields() if self.is_field_wanted(fields_filter, field)] @property @@ -755,7 +755,7 @@ class DebugfsProvider(Provider): paths.append(dir) for path in paths: for field in self._fields: - value = self.read_field(field, path) + value = self._read_field(field, path) key = path + field if reset == 1: self._baseline[key] = value @@ -776,7 +776,7 @@ class DebugfsProvider(Provider): return results - def read_field(self, field, path): + def _read_field(self, field, path): """Returns the value of a single field from a specific VM.""" try: return int(open(os.path.join(PATH_DEBUGFS_KVM, @@ -791,7 +791,7 @@ class DebugfsProvider(Provider): self._baseline = {} self.read(1) - def restore(self): + def _restore(self): """Reset field counters""" self._baseline = {} self.read(2) @@ -808,13 +808,12 @@ class Stats(object): """ def __init__(self, options): - self.providers = self.get_providers(options) + self.providers = self._get_providers(options) self._pid_filter = options.pid self._fields_filter = options.fields self.values = {} - @staticmethod - def get_providers(options): + def _get_providers(self, options): """Returns a list of data providers depending on the passed options.""" providers = [] @@ -826,7 +825,7 @@ class Stats(object): return providers - def update_provider_filters(self): + def _update_provider_filters(self): """Propagates fields filters to providers.""" # As we reset the counters when updating the fields we can # also clear the cache of old values. @@ -847,7 +846,7 @@ class Stats(object): def fields_filter(self, fields_filter): if fields_filter != self._fields_filter: self._fields_filter = fields_filter - self.update_provider_filters() + self._update_provider_filters() @property def pid_filter(self): @@ -969,7 +968,7 @@ class Tui(object): return res - def print_all_gnames(self, row): + def _print_all_gnames(self, row): """Print a list of all running guests along with their pids.""" self.screen.addstr(row, 2, '%8s %-60s' % ('Pid', 'Guest Name (fuzzy list, might be ' @@ -1032,7 +1031,7 @@ class Tui(object): return name - def update_drilldown(self): + def _update_drilldown(self): """Sets or removes a filter that only allows fields without braces.""" if not self.stats.fields_filter: self.stats.fields_filter = DEFAULT_REGEX @@ -1040,11 +1039,11 @@ class Tui(object): elif self.stats.fields_filter == DEFAULT_REGEX: self.stats.fields_filter = None - def update_pid(self, pid): + def _update_pid(self, pid): """Propagates pid selection to stats object.""" self.stats.pid_filter = pid - def refresh_header(self, pid=None): + def _refresh_header(self, pid=None): """Refreshes the header.""" if pid is None: pid = self.stats.pid_filter @@ -1075,7 +1074,7 @@ class Tui(object): self.screen.addstr(4, 1, 'Collecting data...') self.screen.refresh() - def refresh_body(self, sleeptime): + def _refresh_body(self, sleeptime): row = 3 self.screen.move(row, 0) self.screen.clrtobot() @@ -1124,7 +1123,7 @@ class Tui(object): curses.A_BOLD) self.screen.refresh() - def show_msg(self, text): + def _show_msg(self, text): """Display message centered text and exit on key press""" hint = 'Press any key to continue' curses.cbreak() @@ -1139,7 +1138,7 @@ class Tui(object): curses.A_STANDOUT) self.screen.getkey() - def show_help_interactive(self): + def _show_help_interactive(self): """Display help with list of interactive commands""" msg = (' b toggle events by guests (debugfs only, honors' ' filters)', @@ -1165,9 +1164,9 @@ class Tui(object): self.screen.addstr(row, 0, line) row += 1 self.screen.getkey() - self.refresh_header() + self._refresh_header() - def show_filter_selection(self): + def _show_filter_selection(self): """Draws filter selection mask. Asks for a valid regex and sets the fields filter accordingly. @@ -1189,18 +1188,18 @@ class Tui(object): curses.noecho() if len(regex) == 0: self.stats.fields_filter = DEFAULT_REGEX - self.refresh_header() + self._refresh_header() return try: re.compile(regex) self.stats.fields_filter = regex - self.refresh_header() + self._refresh_header() return except re.error: msg = '"' + regex + '": Not a valid regular expression' continue - def show_vm_selection_by_pid(self): + def _show_vm_selection_by_pid(self): """Draws PID selection mask. Asks for a pid until a valid pid or 0 has been entered. @@ -1216,7 +1215,7 @@ class Tui(object): 'This might limit the shown data to the trace ' 'statistics.') self.screen.addstr(5, 0, msg) - self.print_all_gnames(7) + self._print_all_gnames(7) curses.echo() self.screen.addstr(3, 0, "Pid [0 or pid]: ") @@ -1232,13 +1231,13 @@ class Tui(object): continue else: pid = 0 - self.refresh_header(pid) - self.update_pid(pid) + self._refresh_header(pid) + self._update_pid(pid) break except ValueError: msg = '"' + str(pid) + '": Not a valid pid' - def show_set_update_interval(self): + def _show_set_update_interval(self): """Draws update interval selection mask.""" msg = '' while True: @@ -1268,9 +1267,9 @@ class Tui(object): except ValueError: msg = '"' + str(val) + '": Invalid value' - self.refresh_header() + self._refresh_header() - def show_vm_selection_by_guest_name(self): + def _show_vm_selection_by_guest_name(self): """Draws guest selection mask. Asks for a guest name until a valid guest name or '' is entered. @@ -1286,15 +1285,15 @@ class Tui(object): 'This might limit the shown data to the trace ' 'statistics.') self.screen.addstr(5, 0, msg) - self.print_all_gnames(7) + self._print_all_gnames(7) curses.echo() self.screen.addstr(3, 0, "Guest [ENTER or guest]: ") gname = self.screen.getstr().decode(ENCODING) curses.noecho() if not gname: - self.refresh_header(0) - self.update_pid(0) + self._refresh_header(0) + self._update_pid(0) break else: pids = [] @@ -1311,17 +1310,17 @@ class Tui(object): msg = '"' + gname + '": Multiple matches found, use pid ' \ 'filter instead' continue - self.refresh_header(pids[0]) - self.update_pid(pids[0]) + self._refresh_header(pids[0]) + self._update_pid(pids[0]) break def show_stats(self): """Refreshes the screen and processes user input.""" sleeptime = self._delay_initial - self.refresh_header() + self._refresh_header() start = 0.0 # result based on init value never appears on screen while True: - self.refresh_body(time.time() - start) + self._refresh_body(time.time() - start) curses.halfdelay(int(sleeptime * 10)) start = time.time() sleeptime = self._delay_regular @@ -1330,32 +1329,33 @@ class Tui(object): if char == 'b': self._display_guests = not self._display_guests if self.stats.toggle_display_guests(self._display_guests): - self.show_msg(['Command not available with tracepoints' - ' enabled', 'Restart with debugfs only ' - '(see option \'-d\') and try again!']) + self._show_msg(['Command not available with ' + 'tracepoints enabled', 'Restart with ' + 'debugfs only (see option \'-d\') and ' + 'try again!']) self._display_guests = not self._display_guests - self.refresh_header() + self._refresh_header() if char == 'c': self.stats.fields_filter = DEFAULT_REGEX - self.refresh_header(0) - self.update_pid(0) + self._refresh_header(0) + self._update_pid(0) if char == 'f': curses.curs_set(1) - self.show_filter_selection() + self._show_filter_selection() curses.curs_set(0) sleeptime = self._delay_initial if char == 'g': curses.curs_set(1) - self.show_vm_selection_by_guest_name() + self._show_vm_selection_by_guest_name() curses.curs_set(0) sleeptime = self._delay_initial if char == 'h': - self.show_help_interactive() + self._show_help_interactive() if char == 'o': self._sorting = not self._sorting if char == 'p': curses.curs_set(1) - self.show_vm_selection_by_pid() + self._show_vm_selection_by_pid() curses.curs_set(0) sleeptime = self._delay_initial if char == 'q': @@ -1364,11 +1364,11 @@ class Tui(object): self.stats.reset() if char == 's': curses.curs_set(1) - self.show_set_update_interval() + self._show_set_update_interval() curses.curs_set(0) sleeptime = self._delay_initial if char == 'x': - self.update_drilldown() + self._update_drilldown() # prevents display of current values on next refresh self.stats.get(self._display_guests) except KeyboardInterrupt: -- cgit v1.1 From 516f1190a1e0cce12128a6446e6438745c8de62a Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Thu, 22 Feb 2018 12:16:27 +0100 Subject: tools/kvm_stat: eliminate extra guest/pid selection dialog We can do with a single dialog that takes both, pids and guest names. Note that we keep both interactive commands, 'p' and 'g' for now, to avoid confusion among users used to a specific key. While at it, we improve on some minor glitches regarding curses usage, e.g. cursor still visible when not supposed to be. Signed-off-by: Stefan Raspl Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 110 ++++++++++++++-------------------------- tools/kvm/kvm_stat/kvm_stat.txt | 4 +- 2 files changed, 39 insertions(+), 75 deletions(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index c09b742..0d57767 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1041,6 +1041,8 @@ class Tui(object): def _update_pid(self, pid): """Propagates pid selection to stats object.""" + self.screen.addstr(4, 1, 'Updating pid filter...') + self.screen.refresh() self.stats.pid_filter = pid def _refresh_header(self, pid=None): @@ -1144,10 +1146,10 @@ class Tui(object): ' filters)', ' c clear filter', ' f filter by regular expression', - ' g filter by guest name', + ' g filter by guest name/PID', ' h display interactive commands reference', ' o toggle sorting order (Total vs CurAvg/s)', - ' p filter by PID', + ' p filter by guest name/PID', ' q quit', ' r reset stats', ' s set update interval', @@ -1199,44 +1201,6 @@ class Tui(object): msg = '"' + regex + '": Not a valid regular expression' continue - def _show_vm_selection_by_pid(self): - """Draws PID selection mask. - - Asks for a pid until a valid pid or 0 has been entered. - - """ - msg = '' - while True: - self.screen.erase() - self.screen.addstr(0, 0, - 'Show statistics for specific pid.', - curses.A_BOLD) - self.screen.addstr(1, 0, - 'This might limit the shown data to the trace ' - 'statistics.') - self.screen.addstr(5, 0, msg) - self._print_all_gnames(7) - - curses.echo() - self.screen.addstr(3, 0, "Pid [0 or pid]: ") - pid = self.screen.getstr().decode(ENCODING) - curses.noecho() - - try: - if len(pid) > 0: - pid = int(pid) - if pid != 0 and not os.path.isdir(os.path.join('/proc/', - str(pid))): - msg = '"' + str(pid) + '": Not a running process' - continue - else: - pid = 0 - self._refresh_header(pid) - self._update_pid(pid) - break - except ValueError: - msg = '"' + str(pid) + '": Not a valid pid' - def _show_set_update_interval(self): """Draws update interval selection mask.""" msg = '' @@ -1269,17 +1233,17 @@ class Tui(object): msg = '"' + str(val) + '": Invalid value' self._refresh_header() - def _show_vm_selection_by_guest_name(self): + def _show_vm_selection_by_guest(self): """Draws guest selection mask. - Asks for a guest name until a valid guest name or '' is entered. + Asks for a guest name or pid until a valid guest name or '' is entered. """ msg = '' while True: self.screen.erase() self.screen.addstr(0, 0, - 'Show statistics for specific guest.', + 'Show statistics for specific guest or pid.', curses.A_BOLD) self.screen.addstr(1, 0, 'This might limit the shown data to the trace ' @@ -1287,32 +1251,39 @@ class Tui(object): self.screen.addstr(5, 0, msg) self._print_all_gnames(7) curses.echo() - self.screen.addstr(3, 0, "Guest [ENTER or guest]: ") - gname = self.screen.getstr().decode(ENCODING) + curses.curs_set(1) + self.screen.addstr(3, 0, "Guest or pid [ENTER exits]: ") + guest = self.screen.getstr().decode(ENCODING) curses.noecho() - if not gname: - self._refresh_header(0) - self._update_pid(0) + pid = 0 + if not guest or guest == '0': break - else: - pids = [] - try: - pids = self.get_pid_from_gname(gname) - except: - msg = '"' + gname + '": Internal error while searching, ' \ - 'use pid filter instead' - continue - if len(pids) == 0: - msg = '"' + gname + '": Not an active guest' + if guest.isdigit(): + if not os.path.isdir(os.path.join('/proc/', guest)): + msg = '"' + guest + '": Not a running process' continue - if len(pids) > 1: - msg = '"' + gname + '": Multiple matches found, use pid ' \ - 'filter instead' - continue - self._refresh_header(pids[0]) - self._update_pid(pids[0]) + pid = int(guest) break + pids = [] + try: + pids = self.get_pid_from_gname(guest) + except: + msg = '"' + guest + '": Internal error while searching, ' \ + 'use pid filter instead' + continue + if len(pids) == 0: + msg = '"' + guest + '": Not an active guest' + continue + if len(pids) > 1: + msg = '"' + guest + '": Multiple matches found, use pid ' \ + 'filter instead' + continue + pid = pids[0] + break + curses.curs_set(0) + self._refresh_header(pid) + self._update_pid(pid) def show_stats(self): """Refreshes the screen and processes user input.""" @@ -1344,20 +1315,13 @@ class Tui(object): self._show_filter_selection() curses.curs_set(0) sleeptime = self._delay_initial - if char == 'g': - curses.curs_set(1) - self._show_vm_selection_by_guest_name() - curses.curs_set(0) + if char == 'g' or char == 'p': + self._show_vm_selection_by_guest() sleeptime = self._delay_initial if char == 'h': self._show_help_interactive() if char == 'o': self._sorting = not self._sorting - if char == 'p': - curses.curs_set(1) - self._show_vm_selection_by_pid() - curses.curs_set(0) - sleeptime = self._delay_initial if char == 'q': break if char == 'r': diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt index b5b3810..0811d86 100644 --- a/tools/kvm/kvm_stat/kvm_stat.txt +++ b/tools/kvm/kvm_stat/kvm_stat.txt @@ -35,13 +35,13 @@ INTERACTIVE COMMANDS *f*:: filter by regular expression -*g*:: filter by guest name +*g*:: filter by guest name/PID *h*:: display interactive commands reference *o*:: toggle sorting order (Total vs CurAvg/s) -*p*:: filter by PID +*p*:: filter by guest name/PID *q*:: quit -- cgit v1.1 From 18e8f4100ef14f924514fbd91eb67bd5fa5396b7 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Thu, 22 Feb 2018 12:16:28 +0100 Subject: tools/kvm_stat: separate drilldown and fields filtering Drilldown (i.e. toggle display of child trace events) was implemented by overriding the fields filter. This resulted in inconsistencies: E.g. when drilldown was not active, adding a filter that also matches child trace events would not only filter fields according to the filter, but also add in the child trace events matching the filter. E.g. on x86, setting 'kvm_userspace_exit' as the fields filter after startup would result in display of kvm_userspace_exit(DCR), although that wasn't previously present - not exactly what one would expect from a filter. This patch addresses the issue by keeping drilldown and fields filter separate. While at it, we also fix a PEP8 issue by adding a blank line at one place (since we're in the area...). We implement this by adding a framework that also allows to define a taxonomy among the debugfs events to identify child trace events. I.e. drilldown using 'x' can now also work with debugfs. A respective parent- child relationship is only known for S390 at the moment, but could be added adjusting other platforms' ARCH.dbg_is_child() methods accordingly. Signed-off-by: Stefan Raspl Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 143 +++++++++++++++++++++++++++++++------------- 1 file changed, 100 insertions(+), 43 deletions(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 0d57767..6b6630e 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -228,6 +228,7 @@ IOCTL_NUMBERS = { } ENCODING = locale.getpreferredencoding(False) +TRACE_FILTER = re.compile(r'^[^\(]*$') class Arch(object): @@ -260,6 +261,11 @@ class Arch(object): return ArchX86(SVM_EXIT_REASONS) return + def tracepoint_is_child(self, field): + if (TRACE_FILTER.match(field)): + return None + return field.split('(', 1)[0] + class ArchX86(Arch): def __init__(self, exit_reasons): @@ -267,6 +273,10 @@ class ArchX86(Arch): self.ioctl_numbers = IOCTL_NUMBERS self.exit_reasons = exit_reasons + def debugfs_is_child(self, field): + """ Returns name of parent if 'field' is a child, None otherwise """ + return None + class ArchPPC(Arch): def __init__(self): @@ -282,6 +292,10 @@ class ArchPPC(Arch): self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 self.exit_reasons = {} + def debugfs_is_child(self, field): + """ Returns name of parent if 'field' is a child, None otherwise """ + return None + class ArchA64(Arch): def __init__(self): @@ -289,6 +303,10 @@ class ArchA64(Arch): self.ioctl_numbers = IOCTL_NUMBERS self.exit_reasons = AARCH64_EXIT_REASONS + def debugfs_is_child(self, field): + """ Returns name of parent if 'field' is a child, None otherwise """ + return None + class ArchS390(Arch): def __init__(self): @@ -296,6 +314,12 @@ class ArchS390(Arch): self.ioctl_numbers = IOCTL_NUMBERS self.exit_reasons = None + def debugfs_is_child(self, field): + """ Returns name of parent if 'field' is a child, None otherwise """ + if field.startswith('instruction_'): + return 'exit_instruction' + + ARCH = Arch.get_arch() @@ -472,6 +496,10 @@ class Event(object): class Provider(object): """Encapsulates functionalities used by all providers.""" + def __init__(self, pid): + self.child_events = False + self.pid = pid + @staticmethod def is_field_wanted(fields_filter, field): """Indicate whether field is valid according to fields_filter.""" @@ -499,7 +527,7 @@ class TracepointProvider(Provider): self.group_leaders = [] self.filters = self._get_filters() self.update_fields(fields_filter) - self.pid = pid + super(TracepointProvider, self).__init__(pid) @staticmethod def _get_filters(): @@ -519,7 +547,7 @@ class TracepointProvider(Provider): return filters def _get_available_fields(self): - """Returns a list of available event's of format 'event name(filter + """Returns a list of available events of format 'event name(filter name)'. All available events have directories under @@ -547,7 +575,8 @@ class TracepointProvider(Provider): def update_fields(self, fields_filter): """Refresh fields, applying fields_filter""" self.fields = [field for field in self._get_available_fields() - if self.is_field_wanted(fields_filter, field)] + if self.is_field_wanted(fields_filter, field) or + ARCH.tracepoint_is_child(field)] @staticmethod def _get_online_cpus(): @@ -668,8 +697,12 @@ class TracepointProvider(Provider): ret = defaultdict(int) for group in self.group_leaders: for name, val in group.read().items(): - if name in self._fields: - ret[name] += val + if name not in self._fields: + continue + parent = ARCH.tracepoint_is_child(name) + if parent: + name += ' ' + parent + ret[name] += val return ret def reset(self): @@ -687,7 +720,7 @@ class DebugfsProvider(Provider): self._baseline = {} self.do_read = True self.paths = [] - self.pid = pid + super(DebugfsProvider, self).__init__(pid) if include_past: self._restore() @@ -702,7 +735,8 @@ class DebugfsProvider(Provider): def update_fields(self, fields_filter): """Refresh fields, applying fields_filter""" self._fields = [field for field in self._get_available_fields() - if self.is_field_wanted(fields_filter, field)] + if self.is_field_wanted(fields_filter, field) or + ARCH.debugfs_is_child(field)] @property def fields(self): @@ -763,14 +797,15 @@ class DebugfsProvider(Provider): self._baseline[key] = 0 if self._baseline.get(key, -1) == -1: self._baseline[key] = value - increment = (results.get(field, 0) + value - - self._baseline.get(key, 0)) - if by_guest: - pid = key.split('-')[0] - if pid in results: - results[pid] += increment - else: - results[pid] = increment + parent = ARCH.debugfs_is_child(field) + if parent: + field = field + ' ' + parent + else: + if by_guest: + field = key.split('-')[0] # set 'field' to 'pid' + increment = value - self._baseline.get(key, 0) + if field in results: + results[field] += increment else: results[field] = increment @@ -812,6 +847,7 @@ class Stats(object): self._pid_filter = options.pid self._fields_filter = options.fields self.values = {} + self._child_events = False def _get_providers(self, options): """Returns a list of data providers depending on the passed options.""" @@ -860,12 +896,29 @@ class Stats(object): for provider in self.providers: provider.pid = self._pid_filter + @property + def child_events(self): + return self._child_events + + @child_events.setter + def child_events(self, val): + self._child_events = val + for provider in self.providers: + provider.child_events = val + def get(self, by_guest=0): """Returns a dict with field -> (value, delta to last value) of all - provider data.""" + provider data. + Key formats: + * plain: 'key' is event name + * child-parent: 'key' is in format ' ' + * pid: 'key' is the pid of the guest, and the record contains the + aggregated event data + These formats are generated by the providers, and handled in class TUI. + """ for provider in self.providers: new = provider.read(by_guest=by_guest) - for key in new if by_guest else provider.fields: + for key in new: oldval = self.values.get(key, EventStat(0, 0)).value newval = new.get(key, 0) newdelta = newval - oldval @@ -898,10 +951,10 @@ class Stats(object): self.get(to_pid) return 0 + DELAY_DEFAULT = 3.0 MAX_GUEST_NAME_LEN = 48 MAX_REGEX_LEN = 44 -DEFAULT_REGEX = r'^[^\(]*$' SORT_DEFAULT = 0 @@ -1031,14 +1084,6 @@ class Tui(object): return name - def _update_drilldown(self): - """Sets or removes a filter that only allows fields without braces.""" - if not self.stats.fields_filter: - self.stats.fields_filter = DEFAULT_REGEX - - elif self.stats.fields_filter == DEFAULT_REGEX: - self.stats.fields_filter = None - def _update_pid(self, pid): """Propagates pid selection to stats object.""" self.screen.addstr(4, 1, 'Updating pid filter...') @@ -1060,8 +1105,7 @@ class Tui(object): .format(pid, gname), curses.A_BOLD) else: self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) - if self.stats.fields_filter and self.stats.fields_filter \ - != DEFAULT_REGEX: + if self.stats.fields_filter: regex = self.stats.fields_filter if len(regex) > MAX_REGEX_LEN: regex = regex[:MAX_REGEX_LEN] + '...' @@ -1077,6 +1121,9 @@ class Tui(object): self.screen.refresh() def _refresh_body(self, sleeptime): + def is_child_field(field): + return field.find('(') != -1 + row = 3 self.screen.move(row, 0) self.screen.clrtobot() @@ -1084,7 +1131,11 @@ class Tui(object): total = 0. ctotal = 0. for key, values in stats.items(): - if key.find('(') == -1: + if self._display_guests: + if self.get_gname_from_pid(key): + total += values.value + continue + if not key.find(' ') != -1: total += values.value else: ctotal += values.value @@ -1101,19 +1152,26 @@ class Tui(object): # sort by overall value return v.value + sorted_items = sorted(stats.items(), key=sortkey, reverse=True) + + # print events tavg = 0 - for key, values in sorted(stats.items(), key=sortkey, reverse=True): + for key, values in sorted_items: if row >= self.screen.getmaxyx()[0] - 1: break - if not values.value and not values.delta: - break + if values == (0, 0): + continue + if not self.stats.child_events and key.find(' ') != -1: + continue if values.value is not None: cur = int(round(values.delta / sleeptime)) if values.delta else '' if self._display_guests: key = self.get_gname_from_pid(key) - self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % - (key, values.value, values.value * 100 / total, - cur)) + if not key: + continue + self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key + .split(' ')[0], values.value, + values.value * 100 / total, cur)) if cur != '' and key.find('(') == -1: tavg += cur row += 1 @@ -1189,7 +1247,7 @@ class Tui(object): regex = self.screen.getstr().decode(ENCODING) curses.noecho() if len(regex) == 0: - self.stats.fields_filter = DEFAULT_REGEX + self.stats.fields_filter = '' self._refresh_header() return try: @@ -1307,7 +1365,7 @@ class Tui(object): self._display_guests = not self._display_guests self._refresh_header() if char == 'c': - self.stats.fields_filter = DEFAULT_REGEX + self.stats.fields_filter = '' self._refresh_header(0) self._update_pid(0) if char == 'f': @@ -1332,9 +1390,7 @@ class Tui(object): curses.curs_set(0) sleeptime = self._delay_initial if char == 'x': - self._update_drilldown() - # prevents display of current values on next refresh - self.stats.get(self._display_guests) + self.stats.child_events = not self.stats.child_events except KeyboardInterrupt: break except curses.error: @@ -1348,7 +1404,8 @@ def batch(stats): time.sleep(1) s = stats.get() for key, values in sorted(s.items()): - print('%-42s%10d%10d' % (key, values.value, values.delta)) + print('%-42s%10d%10d' % (key.split(' ')[0], values.value, + values.delta)) except KeyboardInterrupt: pass @@ -1359,7 +1416,7 @@ def log(stats): def banner(): for key in keys: - print(key, end=' ') + print(key.split(' ')[0], end=' ') print() def statline(): @@ -1470,7 +1527,7 @@ Press any other key to refresh statistics immediately. ) optparser.add_option('-f', '--fields', action='store', - default=DEFAULT_REGEX, + default='', dest='fields', help='''fields to display (regex) "-f help" for a list of available events''', -- cgit v1.1 From df72ecfc790fa01de1c41f836ff51d12f9c40318 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Thu, 22 Feb 2018 12:16:29 +0100 Subject: tools/kvm_stat: group child events indented after parent We keep the current logic that sorts all events (parent and child), but re-shuffle the events afterwards, grouping the children after the respective parent. Note that the percentage column for child events gives the percentage of the parent's total. Since we rework the logic anyway, we modify the total average calculation to use the raw numbers instead of the (rounded) averages. Note that this can result in differing numbers (between total average and the sum of the individual averages) due to rounding errors. Signed-off-by: Stefan Raspl Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 89 ++++++++++++++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 30 deletions(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 6b6630e..862c997 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1124,6 +1124,45 @@ class Tui(object): def is_child_field(field): return field.find('(') != -1 + def insert_child(sorted_items, child, values, parent): + num = len(sorted_items) + for i in range(0, num): + # only add child if parent is present + if parent.startswith(sorted_items[i][0]): + sorted_items.insert(i + 1, (' ' + child, values)) + + def get_sorted_events(self, stats): + """ separate parent and child events """ + if self._sorting == SORT_DEFAULT: + def sortkey((_k, v)): + # sort by (delta value, overall value) + return (v.delta, v.value) + else: + def sortkey((_k, v)): + # sort by overall value + return v.value + + childs = [] + sorted_items = [] + # we can't rule out child events to appear prior to parents even + # when sorted - separate out all children first, and add in later + for key, values in sorted(stats.items(), key=sortkey, + reverse=True): + if values == (0, 0): + continue + if key.find(' ') != -1: + if not self.stats.child_events: + continue + childs.insert(0, (key, values)) + else: + sorted_items.append((key, values)) + if self.stats.child_events: + for key, values in childs: + (child, parent) = key.split(' ') + insert_child(sorted_items, child, values, parent) + + return sorted_items + row = 3 self.screen.move(row, 0) self.screen.clrtobot() @@ -1143,44 +1182,34 @@ class Tui(object): # we don't have any fields, or all non-child events are filtered total = ctotal - if self._sorting == SORT_DEFAULT: - def sortkey((_k, v)): - # sort by (delta value, overall value) - return (v.delta, v.value) - else: - def sortkey((_k, v)): - # sort by overall value - return v.value - - sorted_items = sorted(stats.items(), key=sortkey, reverse=True) - # print events tavg = 0 - for key, values in sorted_items: - if row >= self.screen.getmaxyx()[0] - 1: + tcur = 0 + for key, values in get_sorted_events(self, stats): + if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0): break - if values == (0, 0): - continue - if not self.stats.child_events and key.find(' ') != -1: - continue - if values.value is not None: - cur = int(round(values.delta / sleeptime)) if values.delta else '' - if self._display_guests: - key = self.get_gname_from_pid(key) - if not key: - continue - self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key - .split(' ')[0], values.value, - values.value * 100 / total, cur)) - if cur != '' and key.find('(') == -1: - tavg += cur + if self._display_guests: + key = self.get_gname_from_pid(key) + if not key: + continue + cur = int(round(values.delta / sleeptime)) if values.delta else '' + if key[0] != ' ': + if values.delta: + tcur += values.delta + ptotal = values.value + ltotal = total + else: + ltotal = ptotal + self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key, + values.value, + values.value * 100 / float(ltotal), cur)) row += 1 if row == 3: self.screen.addstr(4, 1, 'No matching events reported yet') else: + tavg = int(round(tcur / sleeptime)) if tcur > 0 else '' self.screen.addstr(row, 1, '%-40s %10d %8s' % - ('Total', total, tavg if tavg else ''), - curses.A_BOLD) + ('Total', total, tavg), curses.A_BOLD) self.screen.refresh() def _show_msg(self, text): -- cgit v1.1 From 6789af030a462708f937137e05eb12ea009fb348 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Thu, 22 Feb 2018 12:16:30 +0100 Subject: tools/kvm_stat: print 'Total' line for multiple events only The 'Total' line looks a bit weird when we have a single event only. This can happen e.g. due to filters. Therefore suppress when there's only a single event in the output. Signed-off-by: Stefan Raspl Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 862c997..5898c22 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1206,7 +1206,7 @@ class Tui(object): row += 1 if row == 3: self.screen.addstr(4, 1, 'No matching events reported yet') - else: + if row > 4: tavg = int(round(tcur / sleeptime)) if tcur > 0 else '' self.screen.addstr(row, 1, '%-40s %10d %8s' % ('Total', total, tavg), curses.A_BOLD) -- cgit v1.1 From 076467490b8176eb96eddc548a14d4135c7b5852 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 22 Feb 2018 13:05:41 +0100 Subject: kvm: fix warning for CONFIG_HAVE_KVM_EVENTFD builds Move the kvm_arch_irq_routing_update() prototype outside of ifdef CONFIG_HAVE_KVM_EVENTFD guards to fix the following sparse warning: arch/s390/kvm/../../../virt/kvm/irqchip.c:171:28: warning: symbol 'kvm_arch_irq_routing_update' was not declared. Should it be static? Signed-off-by: Sebastian Ott Acked-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ac0062b..84b9c50 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1105,7 +1105,6 @@ static inline void kvm_irq_routing_update(struct kvm *kvm) { } #endif -void kvm_arch_irq_routing_update(struct kvm *kvm); static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { @@ -1114,6 +1113,8 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) #endif /* CONFIG_HAVE_KVM_EVENTFD */ +void kvm_arch_irq_routing_update(struct kvm *kvm); + static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) { /* -- cgit v1.1 From f75e4924f0152be747bf04c9d16bb23fd8baf5f9 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 22 Feb 2018 13:04:39 +0100 Subject: kvm: fix warning for non-x86 builds Fix the following sparse warning by moving the prototype of kvm_arch_mmu_notifier_invalidate_range() to linux/kvm_host.h . CHECK arch/s390/kvm/../../../virt/kvm/kvm_main.c arch/s390/kvm/../../../virt/kvm/kvm_main.c:138:13: warning: symbol 'kvm_arch_mmu_notifier_invalidate_range' was not declared. Should it be static? Signed-off-by: Sebastian Ott Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 --- include/linux/kvm_host.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dd6f57a..0a9e330 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1464,7 +1464,4 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #define put_smstate(type, buf, offset, val) \ *(type *)((buf) + (offset) - 0x7e00) = val -void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end); - #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 84b9c50..6930c63 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1273,4 +1273,7 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, } #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */ +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end); + #endif -- cgit v1.1 From fe2a3027e74e40a3ece3a4c1e4e51403090a907a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 1 Feb 2018 22:16:21 +0100 Subject: KVM: x86: fix backward migration with async_PF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Guests on new hypersiors might set KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT bit when enabling async_PF, but this bit is reserved on old hypervisors, which results in a failure upon migration. To avoid breaking different cases, we are checking for CPUID feature bit before enabling the feature and nothing else. Fixes: 52a5c155cf79 ("KVM: async_pf: Let guest support delivery of async_pf from guest mode") Cc: Reviewed-by: Wanpeng Li Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/cpuid.txt | 4 ++++ Documentation/virtual/kvm/msr.txt | 3 ++- arch/x86/include/uapi/asm/kvm_para.h | 1 + arch/x86/kernel/kvm.c | 8 ++++---- arch/x86/kvm/cpuid.c | 3 ++- 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt index dcab6dc..87a7506 100644 --- a/Documentation/virtual/kvm/cpuid.txt +++ b/Documentation/virtual/kvm/cpuid.txt @@ -58,6 +58,10 @@ KVM_FEATURE_PV_TLB_FLUSH || 9 || guest checks this feature bit || || before enabling paravirtualized || || tlb flush. ------------------------------------------------------------------------------ +KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit + || || can be enabled by setting bit 2 + || || when writing to msr 0x4b564d02 +------------------------------------------------------------------------------ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side || || per-cpu warps are expected in || || kvmclock. diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt index 1ebecc1..f3f0d57 100644 --- a/Documentation/virtual/kvm/msr.txt +++ b/Documentation/virtual/kvm/msr.txt @@ -170,7 +170,8 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02 when asynchronous page faults are enabled on the vcpu 0 when disabled. Bit 1 is 1 if asynchronous page faults can be injected when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults - are delivered to L1 as #PF vmexits. + are delivered to L1 as #PF vmexits. Bit 2 can be set only if + KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID. First 4 byte of 64 byte memory location will be written to by the hypervisor at the time of asynchronous page fault (APF) diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 7a2ade4..6cfa9c8 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -26,6 +26,7 @@ #define KVM_FEATURE_PV_EOI 6 #define KVM_FEATURE_PV_UNHALT 7 #define KVM_FEATURE_PV_TLB_FLUSH 9 +#define KVM_FEATURE_ASYNC_PF_VMEXIT 10 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 4e37d1a..971babe 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void) #endif pa |= KVM_ASYNC_PF_ENABLED; - /* Async page fault support for L1 hypervisor is optional */ - if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN, - (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0) - wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT)) + pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; + + wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); __this_cpu_write(apf_reason.enabled, 1); printk(KERN_INFO"KVM setup async PF for cpu %d\n", smp_processor_id()); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a0c5a69..b671fc2 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -607,7 +607,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, (1 << KVM_FEATURE_PV_EOI) | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | (1 << KVM_FEATURE_PV_UNHALT) | - (1 << KVM_FEATURE_PV_TLB_FLUSH); + (1 << KVM_FEATURE_PV_TLB_FLUSH) | + (1 << KVM_FEATURE_ASYNC_PF_VMEXIT); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); -- cgit v1.1 From afdc3f588850a6fbc996205ee2d472eb4426afb3 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Wed, 17 Jan 2018 11:46:54 +0800 Subject: x86/kvm: Make parse_no_xxx __init for kvm The early_param() is only called during kernel initialization, So Linux marks the functions of it with __init macro to save memory. But it forgot to mark the parse_no_kvmapf/stealacc/kvmclock_vsyscall, So, Make them __init as well. Cc: Paolo Bonzini Cc: rkrcmar@redhat.com Cc: kvm@vger.kernel.org Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Juergen Gross Cc: x86@kernel.org Signed-off-by: Dou Liyang Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 971babe..ee7d5c9 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -49,7 +49,7 @@ static int kvmapf = 1; -static int parse_no_kvmapf(char *arg) +static int __init parse_no_kvmapf(char *arg) { kvmapf = 0; return 0; @@ -58,7 +58,7 @@ static int parse_no_kvmapf(char *arg) early_param("no-kvmapf", parse_no_kvmapf); static int steal_acc = 1; -static int parse_no_stealacc(char *arg) +static int __init parse_no_stealacc(char *arg) { steal_acc = 0; return 0; @@ -67,7 +67,7 @@ static int parse_no_stealacc(char *arg) early_param("no-steal-acc", parse_no_stealacc); static int kvmclock_vsyscall = 1; -static int parse_no_kvmclock_vsyscall(char *arg) +static int __init parse_no_kvmclock_vsyscall(char *arg) { kvmclock_vsyscall = 0; return 0; -- cgit v1.1 From 4f2f61fc507176edd65826fbedc8987dea29b9d5 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 4 Feb 2018 22:57:58 -0800 Subject: KVM: X86: Avoid traversing all the cpus for pv tlb flush when steal time is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid traversing all the cpus for pv tlb flush when steal time is disabled since pv tlb flush depends on the field in steal time for shared data. Cc: Paolo Bonzini Cc: Radim KrÄmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index ee7d5c9..bc1a272 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -545,7 +545,8 @@ static void __init kvm_guest_init(void) pv_time_ops.steal_clock = kvm_steal_clock; } - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) + if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && + !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) @@ -633,7 +634,8 @@ static __init int kvm_setup_pv_tlb_flush(void) { int cpu; - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) { + if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && + !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { for_each_possible_cpu(cpu) { zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), GFP_KERNEL, cpu_to_node(cpu)); -- cgit v1.1 From e5699f56bc91a286f006b0728085e0b4e8f5749b Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Mon, 15 Jan 2018 07:32:02 -0600 Subject: crypto: ccp: Fix sparse, use plain integer as NULL pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix sparse warning: Using plain integer as NULL pointer. Replaces assignment of 0 to pointer with NULL assignment. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Borislav Petkov Cc: Herbert Xu Cc: Gary Hook Cc: Tom Lendacky Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Brijesh Singh Signed-off-by: Paolo Bonzini --- drivers/crypto/ccp/psp-dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c index fcfa5b1..b3afb6c 100644 --- a/drivers/crypto/ccp/psp-dev.c +++ b/drivers/crypto/ccp/psp-dev.c @@ -211,7 +211,7 @@ static int __sev_platform_shutdown_locked(int *error) { int ret; - ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, 0, error); + ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error); if (ret) return ret; @@ -271,7 +271,7 @@ static int sev_ioctl_do_reset(struct sev_issue_cmd *argp) return rc; } - return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, 0, &argp->error); + return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, NULL, &argp->error); } static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp) @@ -299,7 +299,7 @@ static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp) return rc; } - return __sev_do_cmd_locked(cmd, 0, &argp->error); + return __sev_do_cmd_locked(cmd, NULL, &argp->error); } static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp) @@ -624,7 +624,7 @@ EXPORT_SYMBOL_GPL(sev_guest_decommission); int sev_guest_df_flush(int *error) { - return sev_do_cmd(SEV_CMD_DF_FLUSH, 0, error); + return sev_do_cmd(SEV_CMD_DF_FLUSH, NULL, error); } EXPORT_SYMBOL_GPL(sev_guest_df_flush); -- cgit v1.1 From 45d0be876308bf2f858559e84455219eadd9ddc7 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Mon, 15 Jan 2018 07:32:04 -0600 Subject: include: psp-sev: Capitalize invalid length enum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1d57b17c60ff ("crypto: ccp: Define SEV userspace ioctl and command id") added the invalid length enum but we missed capitalizing it. Fixes: 1d57b17c60ff (crypto: ccp: Define SEV userspace ioctl ...) Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Borislav Petkov Cc: Tom Lendacky CC: Gary R Hook Cc: linux-kernel@vger.kernel.org Signed-off-by: Brijesh Singh Signed-off-by: Paolo Bonzini --- include/uapi/linux/psp-sev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h index 3d77fe9..9008f31 100644 --- a/include/uapi/linux/psp-sev.h +++ b/include/uapi/linux/psp-sev.h @@ -42,7 +42,7 @@ typedef enum { SEV_RET_INVALID_PLATFORM_STATE, SEV_RET_INVALID_GUEST_STATE, SEV_RET_INAVLID_CONFIG, - SEV_RET_INVALID_len, + SEV_RET_INVALID_LEN, SEV_RET_ALREADY_OWNED, SEV_RET_INVALID_CERTIFICATE, SEV_RET_POLICY_FAILURE, -- cgit v1.1 From 3e233385ef4a217a2812115ed84d4be36eb16817 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Fri, 23 Feb 2018 12:36:50 -0600 Subject: KVM: SVM: no need to call access_ok() in LAUNCH_MEASURE command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the access_ok() to validate the input before issuing the SEV command does not buy us anything in this case. If userland is giving us a garbage pointer then copy_to_user() will catch it when we try to return the measurement. Suggested-by: Al Viro Fixes: 0d0736f76347 (KVM: SVM: Add support for KVM_SEV_LAUNCH_MEASURE ...) Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Borislav Petkov Cc: Tom Lendacky Cc: linux-kernel@vger.kernel.org Cc: Joerg Roedel Signed-off-by: Brijesh Singh Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b3e488a..ca69d53 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -6236,16 +6236,18 @@ e_free: static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) { + void __user *measure = (void __user *)(uintptr_t)argp->data; struct kvm_sev_info *sev = &kvm->arch.sev_info; struct sev_data_launch_measure *data; struct kvm_sev_launch_measure params; + void __user *p = NULL; void *blob = NULL; int ret; if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + if (copy_from_user(¶ms, measure, sizeof(params))) return -EFAULT; data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -6256,17 +6258,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!params.len) goto cmd; - if (params.uaddr) { + p = (void __user *)(uintptr_t)params.uaddr; + if (p) { if (params.len > SEV_FW_BLOB_MAX_SIZE) { ret = -EINVAL; goto e_free; } - if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) { - ret = -EFAULT; - goto e_free; - } - ret = -ENOMEM; blob = kmalloc(params.len, GFP_KERNEL); if (!blob) @@ -6290,13 +6288,13 @@ cmd: goto e_free_blob; if (blob) { - if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len)) + if (copy_to_user(p, blob, params.len)) ret = -EFAULT; } done: params.len = data->len; - if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) + if (copy_to_user(measure, ¶ms, sizeof(params))) ret = -EFAULT; e_free_blob: kfree(blob); -- cgit v1.1 From 7607b7174405aec7441ff6c970833c463114040a Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Mon, 19 Feb 2018 10:14:44 -0600 Subject: KVM: SVM: install RSM intercept MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RSM instruction is used by the SMM handler to return from SMM mode. Currently, rsm causes a #UD - which results in instruction fetch, decode, and emulate. By installing the RSM intercept we can avoid the instruction fetch since we know that #VMEXIT was due to rsm. The patch is required for the SEV guest, because in case of SEV guest memory is encrypted with guest-specific key and hypervisor will not able to fetch the instruction bytes from the guest memory. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Joerg Roedel Cc: Borislav Petkov Cc: Tom Lendacky Signed-off-by: Brijesh Singh Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ca69d53..4aeb665 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -300,6 +300,8 @@ module_param(vgif, int, 0444); static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); module_param(sev, int, 0444); +static u8 rsm_ins_bytes[] = "\x0f\xaa"; + static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); static void svm_complete_interrupts(struct vcpu_svm *svm); @@ -1383,6 +1385,7 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_SKINIT); set_intercept(svm, INTERCEPT_WBINVD); set_intercept(svm, INTERCEPT_XSETBV); + set_intercept(svm, INTERCEPT_RSM); if (!kvm_mwait_in_guest()) { set_intercept(svm, INTERCEPT_MONITOR); @@ -3699,6 +3702,12 @@ static int emulate_on_interception(struct vcpu_svm *svm) return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; } +static int rsm_interception(struct vcpu_svm *svm) +{ + return x86_emulate_instruction(&svm->vcpu, 0, 0, + rsm_ins_bytes, 2) == EMULATE_DONE; +} + static int rdpmc_interception(struct vcpu_svm *svm) { int err; @@ -4541,7 +4550,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_MWAIT] = mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, [SVM_EXIT_NPF] = npf_interception, - [SVM_EXIT_RSM] = emulate_on_interception, + [SVM_EXIT_RSM] = rsm_interception, [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, }; -- cgit v1.1 From 9c5e0afaf15788bcbd1c3469da701ac3da826886 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Mon, 19 Feb 2018 10:13:25 -0600 Subject: KVM: SVM: Fix SEV LAUNCH_SECRET command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SEV LAUNCH_SECRET command fails with error code 'invalid param' because we missed filling the guest and header system physical address while issuing the command. Fixes: 9f5b5b950aa9 (KVM: SVM: Add support for SEV LAUNCH_SECRET command) Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Borislav Petkov Cc: Tom Lendacky Cc: linux-kernel@vger.kernel.org Cc: Joerg Roedel Signed-off-by: Brijesh Singh Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4aeb665..3d8377f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -6604,7 +6604,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) struct page **pages; void *blob, *hdr; unsigned long n; - int ret; + int ret, offset; if (!sev_guest(kvm)) return -ENOTTY; @@ -6630,6 +6630,10 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!data) goto e_unpin_memory; + offset = params.guest_uaddr & (PAGE_SIZE - 1); + data->guest_address = __sme_page_pa(pages[0]) + offset; + data->guest_len = params.guest_len; + blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len); if (IS_ERR(blob)) { ret = PTR_ERR(blob); @@ -6644,8 +6648,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) ret = PTR_ERR(hdr); goto e_free_blob; } - data->trans_address = __psp_pa(blob); - data->trans_len = params.trans_len; + data->hdr_address = __psp_pa(hdr); + data->hdr_len = params.hdr_len; data->handle = sev->handle; ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error); -- cgit v1.1 From 3b821409632ab778d46e807516b457dfa72736ed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 23 Feb 2018 20:47:17 -0500 Subject: lock_parent() needs to recheck if dentry got __dentry_kill'ed under it In case when dentry passed to lock_parent() is protected from freeing only by the fact that it's on a shrink list and trylock of parent fails, we could get hit by __dentry_kill() (and subsequent dentry_kill(parent)) between unlocking dentry and locking presumed parent. We need to recheck that dentry is alive once we lock both it and parent *and* postpone rcu_read_unlock() until after that point. Otherwise we could return a pointer to struct dentry that already is rcu-scheduled for freeing, with ->d_lock held on it; caller's subsequent attempt to unlock it can end up with memory corruption. Cc: stable@vger.kernel.org # 3.12+, counting backports Signed-off-by: Al Viro --- fs/dcache.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 7c38f39..32aaab2 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -647,11 +647,16 @@ again: spin_unlock(&parent->d_lock); goto again; } - rcu_read_unlock(); - if (parent != dentry) + if (parent != dentry) { spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - else + if (unlikely(dentry->d_lockref.count < 0)) { + spin_unlock(&parent->d_lock); + parent = NULL; + } + } else { parent = NULL; + } + rcu_read_unlock(); return parent; } -- cgit v1.1 From c0c6bb2322964bd264b4ddedaa5776f40c709f0c Mon Sep 17 00:00:00 2001 From: Shyam Saini Date: Tue, 20 Feb 2018 18:08:08 +0530 Subject: ARM: dts: imx6dl: Include correct dtsi file for Engicam i.CoreM6 DualLite/Solo RQS This patch fixes the wrongly included dtsi file which was breaking mainline support for Engicam i.CoreM6 DualLite/Solo RQS. As per the board name, the correct file should be imx6dl.dtsi instead of imx6q.dtsi Reported-by: Michael Trimarchi Suggested-by: Jagan Teki Signed-off-by: Shyam Saini Reviewed-by: Fabio Estevam Fixes: 7a9caba55a61 ("ARM: dts: imx6dl: Add Engicam i.CoreM6 DualLite/Solo RQS initial support") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6dl-icore-rqs.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6dl-icore-rqs.dts b/arch/arm/boot/dts/imx6dl-icore-rqs.dts index cf42c2f..1281bc3 100644 --- a/arch/arm/boot/dts/imx6dl-icore-rqs.dts +++ b/arch/arm/boot/dts/imx6dl-icore-rqs.dts @@ -42,7 +42,7 @@ /dts-v1/; -#include "imx6q.dtsi" +#include "imx6dl.dtsi" #include "imx6qdl-icore-rqs.dtsi" / { -- cgit v1.1 From 1ba8f9d308174e647b864c36209b4d7934d99888 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 22 Feb 2018 14:20:35 +0100 Subject: ALSA: hda: Add a power_save blacklist On some boards setting power_save to a non 0 value leads to clicking / popping sounds when ever we enter/leave powersaving mode. Ideally we would figure out how to avoid these sounds, but that is not always feasible. This commit adds a blacklist for devices where powersaving is known to cause problems and disables it on these devices. Note I tried to put this blacklist in userspace first: https://github.com/systemd/systemd/pull/8128 But the systemd maintainers rightfully pointed out that it would be impossible to then later remove entries once we actually find a way to make power-saving work on listed boards without issues. Having this list in the kernel will allow removal of the blacklist entry in the same commit which fixes the clicks / plops. The blacklist only applies to the default power_save module-option value, if a user explicitly sets the module-option then the blacklist is not used. [ added an ifdef CONFIG_PM for the build error -- tiwai] BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1525104 BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=198611 Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index c71dcac..96143df 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -181,7 +181,7 @@ static const struct kernel_param_ops param_ops_xint = { }; #define param_check_xint param_check_int -static int power_save = CONFIG_SND_HDA_POWER_SAVE_DEFAULT; +static int power_save = -1; module_param(power_save, xint, 0644); MODULE_PARM_DESC(power_save, "Automatic power-saving timeout " "(in second, 0 = disable)."); @@ -2186,6 +2186,24 @@ out_free: return err; } +#ifdef CONFIG_PM +/* On some boards setting power_save to a non 0 value leads to clicking / + * popping sounds when ever we enter/leave powersaving mode. Ideally we would + * figure out how to avoid these sounds, but that is not always feasible. + * So we keep a list of devices where we disable powersaving as its known + * to causes problems on these devices. + */ +static struct snd_pci_quirk power_save_blacklist[] = { + /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ + SND_PCI_QUIRK(0x1849, 0x0c0c, "Asrock B85M-ITX", 0), + /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ + SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0), + /* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */ + SND_PCI_QUIRK(0x17aa, 0x2227, "Lenovo X1 Carbon 3rd Gen", 0), + {} +}; +#endif /* CONFIG_PM */ + /* number of codec slots for each chipset: 0 = default slots (i.e. 4) */ static unsigned int azx_max_codecs[AZX_NUM_DRIVERS] = { [AZX_DRIVER_NVIDIA] = 8, @@ -2198,6 +2216,7 @@ static int azx_probe_continue(struct azx *chip) struct hdac_bus *bus = azx_bus(chip); struct pci_dev *pci = chip->pci; int dev = chip->dev_index; + int val; int err; hda->probe_continued = 1; @@ -2278,7 +2297,22 @@ static int azx_probe_continue(struct azx *chip) chip->running = 1; azx_add_card_list(chip); - snd_hda_set_power_save(&chip->bus, power_save * 1000); + + val = power_save; +#ifdef CONFIG_PM + if (val == -1) { + const struct snd_pci_quirk *q; + + val = CONFIG_SND_HDA_POWER_SAVE_DEFAULT; + q = snd_pci_quirk_lookup(chip->pci, power_save_blacklist); + if (q && val) { + dev_info(chip->card->dev, "device %04x:%04x is on the power_save blacklist, forcing power_save to 0\n", + q->subvendor, q->subdevice); + val = 0; + } + } +#endif /* CONFIG_PM */ + snd_hda_set_power_save(&chip->bus, val * 1000); if (azx_has_pm_runtime(chip) || hda->use_vga_switcheroo) pm_runtime_put_autosuspend(&pci->dev); -- cgit v1.1 From 240a8af929c7c57dcde28682725b29cf8474e8e5 Mon Sep 17 00:00:00 2001 From: Erik Veijola Date: Fri, 23 Feb 2018 14:06:52 +0200 Subject: ALSA: usb-audio: Add a quirck for B&W PX headphones The capture interface doesn't work and the playback interface only supports 48 kHz sampling rate even though it advertises more rates. Signed-off-by: Erik Veijola Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks-table.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 5025204..754e632 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3325,4 +3325,51 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), } }, +{ + /* + * Bower's & Wilkins PX headphones only support the 48 kHz sample rate + * even though it advertises more. The capture interface doesn't work + * even on windows. + */ + USB_DEVICE(0x19b5, 0x0021), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_MIXER, + }, + /* Capture */ + { + .ifnum = 1, + .type = QUIRK_IGNORE_INTERFACE, + }, + /* Playback */ + { + .ifnum = 2, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .channels = 2, + .iface = 2, + .altsetting = 1, + .altset_idx = 1, + .attributes = UAC_EP_CS_ATTR_FILL_MAX | + UAC_EP_CS_ATTR_SAMPLE_RATE, + .endpoint = 0x03, + .ep_attr = USB_ENDPOINT_XFER_ISOC, + .rates = SNDRV_PCM_RATE_48000, + .rate_min = 48000, + .rate_max = 48000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { + 48000 + } + } + }, + } + } +}, + #undef USB_DEVICE_VENDOR_SPEC -- cgit v1.1 From 105976f517791aed3b11f8f53b308a2069d42055 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 23 Feb 2018 23:36:56 +0800 Subject: blk-mq: don't call io sched's .requeue_request when requeueing rq to ->dispatch __blk_mq_requeue_request() covers two cases: - one is that the requeued request is added to hctx->dispatch, such as blk_mq_dispatch_rq_list() - another case is that the request is requeued to io scheduler, such as blk_mq_requeue_request(). We should call io sched's .requeue_request callback only for the 2nd case. Cc: Paolo Valente Cc: Omar Sandoval Fixes: bd166ef183c2 ("blk-mq-sched: add framework for MQ capable IO schedulers") Cc: stable@vger.kernel.org Reviewed-by: Bart Van Assche Acked-by: Paolo Valente Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 3574927..16e83e6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -712,7 +712,6 @@ static void __blk_mq_requeue_request(struct request *rq) trace_block_rq_requeue(q, rq); wbt_requeue(q->rq_wb, &rq->issue_stat); - blk_mq_sched_requeue_request(rq); if (blk_mq_rq_state(rq) != MQ_RQ_IDLE) { blk_mq_rq_update_state(rq, MQ_RQ_IDLE); @@ -725,6 +724,9 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list) { __blk_mq_requeue_request(rq); + /* this request will be re-inserted to io scheduler queue */ + blk_mq_sched_requeue_request(rq); + BUG_ON(blk_queued_rq(rq)); blk_mq_add_to_requeue_list(rq, true, kick_requeue_list); } -- cgit v1.1 From ba989a01469d027861e55c8f1121edadef757797 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 23 Feb 2018 23:36:57 +0800 Subject: block: kyber: fix domain token leak during requeue When requeuing request, the domain token should have been freed before re-inserting the request to io scheduler. Otherwise, the assigned domain token will be leaked, and IO hang can be caused. Cc: Paolo Valente Cc: Omar Sandoval Cc: stable@vger.kernel.org Reviewed-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/kyber-iosched.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index f95c607..0d6d25e3 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -833,6 +833,7 @@ static struct elevator_type kyber_sched = { .limit_depth = kyber_limit_depth, .prepare_request = kyber_prepare_request, .finish_request = kyber_finish_request, + .requeue_request = kyber_finish_request, .completed_request = kyber_completed_request, .dispatch_request = kyber_dispatch_request, .has_work = kyber_has_work, -- cgit v1.1 From 3d4d5d618639c3155cfce57101d619a0935434d2 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Sun, 25 Feb 2018 06:00:11 -0500 Subject: radix tree test suite: Fix build - Add an empty linux/compiler_types.h (now being included by kconfig.h) - Add __GFP_ZERO - Add kzalloc - Test __GFP_DIRECT_RECLAIM instead of __GFP_NOWARN Signed-off-by: Matthew Wilcox --- tools/testing/radix-tree/linux.c | 11 +++++++++-- tools/testing/radix-tree/linux/compiler_types.h | 0 tools/testing/radix-tree/linux/gfp.h | 1 + tools/testing/radix-tree/linux/slab.h | 6 ++++++ 4 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 tools/testing/radix-tree/linux/compiler_types.h diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c index 6903ccf..44a0d1a 100644 --- a/tools/testing/radix-tree/linux.c +++ b/tools/testing/radix-tree/linux.c @@ -29,7 +29,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, int flags) { struct radix_tree_node *node; - if (flags & __GFP_NOWARN) + if (!(flags & __GFP_DIRECT_RECLAIM)) return NULL; pthread_mutex_lock(&cachep->lock); @@ -73,10 +73,17 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) void *kmalloc(size_t size, gfp_t gfp) { - void *ret = malloc(size); + void *ret; + + if (!(gfp & __GFP_DIRECT_RECLAIM)) + return NULL; + + ret = malloc(size); uatomic_inc(&nr_allocated); if (kmalloc_verbose) printf("Allocating %p from malloc\n", ret); + if (gfp & __GFP_ZERO) + memset(ret, 0, size); return ret; } diff --git a/tools/testing/radix-tree/linux/compiler_types.h b/tools/testing/radix-tree/linux/compiler_types.h new file mode 100644 index 0000000..e69de29 diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h index e9fff59..e3201cc 100644 --- a/tools/testing/radix-tree/linux/gfp.h +++ b/tools/testing/radix-tree/linux/gfp.h @@ -11,6 +11,7 @@ #define __GFP_IO 0x40u #define __GFP_FS 0x80u #define __GFP_NOWARN 0x200u +#define __GFP_ZERO 0x8000u #define __GFP_ATOMIC 0x80000u #define __GFP_ACCOUNT 0x100000u #define __GFP_DIRECT_RECLAIM 0x400000u diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h index 979baee..a037def 100644 --- a/tools/testing/radix-tree/linux/slab.h +++ b/tools/testing/radix-tree/linux/slab.h @@ -3,6 +3,7 @@ #define SLAB_H #include +#include #define SLAB_HWCACHE_ALIGN 1 #define SLAB_PANIC 2 @@ -11,6 +12,11 @@ void *kmalloc(size_t size, gfp_t); void kfree(void *); +static inline void *kzalloc(size_t size, gfp_t gfp) +{ + return kmalloc(size, gfp | __GFP_ZERO); +} + void *kmem_cache_alloc(struct kmem_cache *cachep, int flags); void kmem_cache_free(struct kmem_cache *cachep, void *objp); -- cgit v1.1 From de9647efeaa9f4e8b08c002e09757fd9c55ff901 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 22 Feb 2018 13:58:42 -0600 Subject: platform/x86: intel-vbtn: Only activate tablet mode switch on 2-in-1's Some laptops such as the XPS 9360 support the intel-vbtn INT33D6 interface but don't initialize the bit that intel-vbtn uses to represent switching tablet mode. By running this only on real 2-in-1's it shouldn't cause false positives. Fixes: 30323fb6d5 ("Support tablet mode switch") Reported-by: Jeremy Cline Signed-off-by: Mario Limonciello Tested-by: Jeremy Cline Tested-by: Darren Hart (VMware) Signed-off-by: Darren Hart (VMware) --- drivers/platform/x86/intel-vbtn.c | 46 ++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index b703d6f..8173307 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -97,9 +98,35 @@ out_unknown: dev_dbg(&device->dev, "unknown event index 0x%x\n", event); } -static int intel_vbtn_probe(struct platform_device *device) +static void detect_tablet_mode(struct platform_device *device) { + const char *chassis_type = dmi_get_system_info(DMI_CHASSIS_TYPE); + struct intel_vbtn_priv *priv = dev_get_drvdata(&device->dev); + acpi_handle handle = ACPI_HANDLE(&device->dev); struct acpi_buffer vgbs_output = { ACPI_ALLOCATE_BUFFER, NULL }; + union acpi_object *obj; + acpi_status status; + int m; + + if (!(chassis_type && strcmp(chassis_type, "31") == 0)) + goto out; + + status = acpi_evaluate_object(handle, "VGBS", NULL, &vgbs_output); + if (ACPI_FAILURE(status)) + goto out; + + obj = vgbs_output.pointer; + if (!(obj && obj->type == ACPI_TYPE_INTEGER)) + goto out; + + m = !(obj->integer.value & TABLET_MODE_FLAG); + input_report_switch(priv->input_dev, SW_TABLET_MODE, m); +out: + kfree(vgbs_output.pointer); +} + +static int intel_vbtn_probe(struct platform_device *device) +{ acpi_handle handle = ACPI_HANDLE(&device->dev); struct intel_vbtn_priv *priv; acpi_status status; @@ -122,22 +149,7 @@ static int intel_vbtn_probe(struct platform_device *device) return err; } - /* - * VGBS being present and returning something means we have - * a tablet mode switch. - */ - status = acpi_evaluate_object(handle, "VGBS", NULL, &vgbs_output); - if (ACPI_SUCCESS(status)) { - union acpi_object *obj = vgbs_output.pointer; - - if (obj && obj->type == ACPI_TYPE_INTEGER) { - int m = !(obj->integer.value & TABLET_MODE_FLAG); - - input_report_switch(priv->input_dev, SW_TABLET_MODE, m); - } - } - - kfree(vgbs_output.pointer); + detect_tablet_mode(device); status = acpi_install_notify_handler(handle, ACPI_DEVICE_NOTIFY, -- cgit v1.1 From 015555fd4d2930bc0c86952c46ad88b3392f66e4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 19 Feb 2018 14:55:54 +0000 Subject: fs: dcache: Avoid livelock between d_alloc_parallel and __d_add If d_alloc_parallel runs concurrently with __d_add, it is possible for d_alloc_parallel to continuously retry whilst i_dir_seq has been incremented to an odd value by __d_add: CPU0: __d_add n = start_dir_add(dir); cmpxchg(&dir->i_dir_seq, n, n + 1) == n CPU1: d_alloc_parallel retry: seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1; hlist_bl_lock(b); bit_spin_lock(0, (unsigned long *)b); // Always succeeds CPU0: __d_lookup_done(dentry) hlist_bl_lock bit_spin_lock(0, (unsigned long *)b); // Never succeeds CPU1: if (unlikely(parent->d_inode->i_dir_seq != seq)) { hlist_bl_unlock(b); goto retry; } Since the simple bit_spin_lock used to implement hlist_bl_lock does not provide any fairness guarantees, then CPU1 can starve CPU0 of the lock and prevent it from reaching end_dir_add(dir), therefore CPU1 cannot exit its retry loop because the sequence number always has the bottom bit set. This patch resolves the livelock by not taking hlist_bl_lock in d_alloc_parallel if the sequence counter is odd, since any subsequent masked comparison with i_dir_seq will fail anyway. Cc: Peter Zijlstra Cc: Al Viro Reported-by: Naresh Madhusudana Acked-by: Peter Zijlstra (Intel) Reviewed-by: Matthew Wilcox Signed-off-by: Will Deacon Signed-off-by: Al Viro --- fs/dcache.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/dcache.c b/fs/dcache.c index 32aaab2..bde3b66 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2479,7 +2479,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent, retry: rcu_read_lock(); - seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1; + seq = smp_load_acquire(&parent->d_inode->i_dir_seq); r_seq = read_seqbegin(&rename_lock); dentry = __d_lookup_rcu(parent, name, &d_seq); if (unlikely(dentry)) { @@ -2500,6 +2500,12 @@ retry: rcu_read_unlock(); goto retry; } + + if (unlikely(seq & 1)) { + rcu_read_unlock(); + goto retry; + } + hlist_bl_lock(b); if (unlikely(parent->d_inode->i_dir_seq != seq)) { hlist_bl_unlock(b); -- cgit v1.1 From 8cc07c808c9d595e81cbe5aad419b7769eb2e5c9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 19 Feb 2018 14:55:55 +0000 Subject: fs: dcache: Use READ_ONCE when accessing i_dir_seq i_dir_seq is subject to concurrent modification by a cmpxchg or store-release operation, so ensure that the relaxed access in d_alloc_parallel uses READ_ONCE. Reported-by: Peter Zijlstra Signed-off-by: Will Deacon Signed-off-by: Al Viro --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dcache.c b/fs/dcache.c index bde3b66..8945e6c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2507,7 +2507,7 @@ retry: } hlist_bl_lock(b); - if (unlikely(parent->d_inode->i_dir_seq != seq)) { + if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) { hlist_bl_unlock(b); rcu_read_unlock(); goto retry; -- cgit v1.1 From 43a521238aca0e24d50add1db125a61bda2a3527 Mon Sep 17 00:00:00 2001 From: Lidong Zhong Date: Tue, 23 Jan 2018 23:06:12 +0800 Subject: md-cluster: choose correct label when clustered layout is not supported r10conf is already successfully allocated before checking the layout Signed-off-by: Lidong Zhong Reviewed-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/raid10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 9e9441f..93fa947 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3784,7 +3784,7 @@ static int raid10_run(struct mddev *mddev) if (fc > 1 || fo > 0) { pr_err("only near layout is supported by clustered" " raid10\n"); - goto out; + goto out_free_conf; } } -- cgit v1.1 From 8876391e440ba615b10eef729576e111f0315f87 Mon Sep 17 00:00:00 2001 From: BingJing Chang Date: Thu, 22 Feb 2018 13:34:46 +0800 Subject: md: fix a potential deadlock of raid5/raid10 reshape There is a potential deadlock if mount/umount happens when raid5_finish_reshape() tries to grow the size of emulated disk. How the deadlock happens? 1) The raid5 resync thread finished reshape (expanding array). 2) The mount or umount thread holds VFS sb->s_umount lock and tries to write through critical data into raid5 emulated block device. So it waits for raid5 kernel thread handling stripes in order to finish it I/Os. 3) In the routine of raid5 kernel thread, md_check_recovery() will be called first in order to reap the raid5 resync thread. That is, raid5_finish_reshape() will be called. In this function, it will try to update conf and call VFS revalidate_disk() to grow the raid5 emulated block device. It will try to acquire VFS sb->s_umount lock. The raid5 kernel thread cannot continue, so no one can handle mount/ umount I/Os (stripes). Once the write-through I/Os cannot be finished, mount/umount will not release sb->s_umount lock. The deadlock happens. The raid5 kernel thread is an emulated block device. It is responible to handle I/Os (stripes) from upper layers. The emulated block device should not request any I/Os on itself. That is, it should not call VFS layer functions. (If it did, it will try to acquire VFS locks to guarantee the I/Os sequence.) So we have the resync thread to send resync I/O requests and to wait for the results. For solving this potential deadlock, we can put the size growth of the emulated block device as the final step of reshape thread. 2017/12/29: Thanks to Guoqing Jiang , we confirmed that there is the same deadlock issue in raid10. It's reproducible and can be fixed by this patch. For raid10.c, we can remove the similar code to prevent deadlock as well since they has been called before. Reported-by: Alex Wu Reviewed-by: Alex Wu Reviewed-by: Chung-Chiang Cheng Signed-off-by: BingJing Chang Signed-off-by: Shaohua Li --- drivers/md/md.c | 13 +++++++++++++ drivers/md/raid10.c | 8 +------- drivers/md/raid5.c | 8 +------- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index ba152dd..254e44e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8569,6 +8569,19 @@ void md_do_sync(struct md_thread *thread) set_mask_bits(&mddev->sb_flags, 0, BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS)); + if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && + !test_bit(MD_RECOVERY_INTR, &mddev->recovery) && + mddev->delta_disks > 0 && + mddev->pers->finish_reshape && + mddev->pers->size && + mddev->queue) { + mddev_lock_nointr(mddev); + md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0)); + mddev_unlock(mddev); + set_capacity(mddev->gendisk, mddev->array_sectors); + revalidate_disk(mddev->gendisk); + } + spin_lock(&mddev->lock); if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { /* We completed so min/max setting can be forgotten if used. */ diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 93fa947..c5e6c60 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4832,17 +4832,11 @@ static void raid10_finish_reshape(struct mddev *mddev) return; if (mddev->delta_disks > 0) { - sector_t size = raid10_size(mddev, 0, 0); - md_set_array_sectors(mddev, size); if (mddev->recovery_cp > mddev->resync_max_sectors) { mddev->recovery_cp = mddev->resync_max_sectors; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } - mddev->resync_max_sectors = size; - if (mddev->queue) { - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); - } + mddev->resync_max_sectors = mddev->array_sectors; } else { int d; rcu_read_lock(); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e3b0f79..b5d2601 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -8000,13 +8000,7 @@ static void raid5_finish_reshape(struct mddev *mddev) if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { - if (mddev->delta_disks > 0) { - md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); - if (mddev->queue) { - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); - } - } else { + if (mddev->delta_disks <= 0) { int d; spin_lock_irq(&conf->device_lock); mddev->degraded = raid5_calc_degraded(conf); -- cgit v1.1 From 3de59bb9d551428cbdc76a9ea57883f82e350b4d Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Sat, 24 Feb 2018 12:05:56 +0800 Subject: md/raid1: fix NULL pointer dereference In handle_write_finished(), if r1_bio->bios[m] != NULL, it thinks the corresponding conf->mirrors[m].rdev is also not NULL. But, it is not always true. Even if some io hold replacement rdev(i.e. rdev->nr_pending.count > 0), raid1_remove_disk() can also set the rdev as NULL. That means, bios[m] != NULL, but mirrors[m].rdev is NULL, resulting in NULL pointer dereference in handle_write_finished and sync_request_write. This patch can fix BUGs as follows: BUG: unable to handle kernel NULL pointer dereference at 0000000000000140 IP: [] raid1d+0x2bd/0xfc0 PGD 12ab52067 PUD 12f587067 PMD 0 Oops: 0000 [#1] SMP CPU: 1 PID: 2008 Comm: md3_raid1 Not tainted 4.1.44+ #130 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014 Call Trace: ? schedule+0x37/0x90 ? prepare_to_wait_event+0x83/0xf0 md_thread+0x144/0x150 ? wake_atomic_t_function+0x70/0x70 ? md_start_sync+0xf0/0xf0 kthread+0xd8/0xf0 ? kthread_worker_fn+0x160/0x160 ret_from_fork+0x42/0x70 ? kthread_worker_fn+0x160/0x160 BUG: unable to handle kernel NULL pointer dereference at 00000000000000b8 IP: sync_request_write+0x9e/0x980 PGD 800000007c518067 P4D 800000007c518067 PUD 8002b067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 24 PID: 2549 Comm: md3_raid1 Not tainted 4.15.0+ #118 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014 Call Trace: ? sched_clock+0x5/0x10 ? sched_clock_cpu+0xc/0xb0 ? flush_pending_writes+0x3a/0xd0 ? pick_next_task_fair+0x4d5/0x5f0 ? __switch_to+0xa2/0x430 raid1d+0x65a/0x870 ? find_pers+0x70/0x70 ? find_pers+0x70/0x70 ? md_thread+0x11c/0x160 md_thread+0x11c/0x160 ? finish_wait+0x80/0x80 kthread+0x111/0x130 ? kthread_create_worker_on_cpu+0x70/0x70 ? do_syscall_64+0x6f/0x190 ? SyS_exit_group+0x10/0x10 ret_from_fork+0x35/0x40 Reviewed-by: NeilBrown Signed-off-by: Yufen Yu Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f978edd..fe872dc 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1809,6 +1809,17 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) struct md_rdev *repl = conf->mirrors[conf->raid_disks + number].rdev; freeze_array(conf, 0); + if (atomic_read(&repl->nr_pending)) { + /* It means that some queued IO of retry_list + * hold repl. Thus, we cannot set replacement + * as NULL, avoiding rdev NULL pointer + * dereference in sync_request_write and + * handle_write_finished. + */ + err = -EBUSY; + unfreeze_array(conf); + goto abort; + } clear_bit(Replacement, &repl->flags); p->rdev = repl; conf->mirrors[conf->raid_disks + number].rdev = NULL; -- cgit v1.1 From abd6360591d3f8259f41c34e31ac4826dfe621b8 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Tue, 23 Jan 2018 10:59:49 +0100 Subject: batman-adv: fix packet checksum in receive path eth_type_trans() internally calls skb_pull(), which does not adjust the skb checksum; skb_postpull_rcsum() is necessary to avoid log spam of the form "bat0: hw csum failure" when packets with CHECKSUM_COMPLETE are received. Note that in usual setups, packets don't reach batman-adv with CHECKSUM_COMPLETE (I assume NICs bail out of checksumming when they see batadv's ethtype?), which is why the log messages do not occur on every system using batman-adv. I could reproduce this issue by stacking batman-adv on top of a VXLAN interface. Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Tested-by: Maximilian Wilhelm Signed-off-by: Matthias Schiffer Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/soft-interface.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 900c5ce..367a81f 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -459,13 +459,7 @@ void batadv_interface_rx(struct net_device *soft_iface, /* skb->dev & skb->pkt_type are set here */ skb->protocol = eth_type_trans(skb, soft_iface); - - /* should not be necessary anymore as we use skb_pull_rcsum() - * TODO: please verify this and remove this TODO - * -- Dec 21st 2009, Simon Wunderlich - */ - - /* skb->ip_summed = CHECKSUM_UNNECESSARY; */ + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); batadv_inc_counter(bat_priv, BATADV_CNT_RX); batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES, -- cgit v1.1 From 3bf2a09da956b43ecfaa630a2ef9a477f991a46a Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Tue, 23 Jan 2018 10:59:50 +0100 Subject: batman-adv: invalidate checksum on fragment reassembly A more sophisticated implementation could try to combine fragment checksums when all fragments have CHECKSUM_COMPLETE and are split at even offsets. For now, we just set ip_summed to CHECKSUM_NONE to avoid "hw csum failure" warnings in the kernel log when fragmented frames are received. In consequence, skb_pull_rcsum() can be replaced with skb_pull(). Note that in usual setups, packets don't reach batman-adv with CHECKSUM_COMPLETE (I assume NICs bail out of checksumming when they see batadv's ethtype?), which is why the log messages do not occur on every system using batman-adv. I could reproduce this issue by stacking batman-adv on top of a VXLAN interface. Fixes: 610bfc6bc99b ("batman-adv: Receive fragmented packets and merge") Tested-by: Maximilian Wilhelm Signed-off-by: Matthias Schiffer Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/fragmentation.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 22dde42..5afe641 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -288,7 +288,8 @@ batadv_frag_merge_packets(struct hlist_head *chain) /* Move the existing MAC header to just before the payload. (Override * the fragment header.) */ - skb_pull_rcsum(skb_out, hdr_size); + skb_pull(skb_out, hdr_size); + skb_out->ip_summed = CHECKSUM_NONE; memmove(skb_out->data - ETH_HLEN, skb_mac_header(skb_out), ETH_HLEN); skb_set_mac_header(skb_out, -ETH_HLEN); skb_reset_network_header(skb_out); -- cgit v1.1 From 8ae56822812ddedc26a152ab1916eb30120b4748 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 16 Feb 2018 12:49:32 +0100 Subject: netfilter: ipt_CLUSTERIP: put config struct if we can't increment ct refcount This needs to put() the entry to avoid a resource leak in error path. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 4b02ab3..4c8cfd3 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -496,12 +496,15 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) return PTR_ERR(config); } } - cipinfo->config = config; ret = nf_ct_netns_get(par->net, par->family); - if (ret < 0) + if (ret < 0) { pr_info("cannot load conntrack support for proto=%u\n", par->family); + clusterip_config_entry_put(par->net, config); + clusterip_config_put(config); + return ret; + } if (!par->net->xt.clusterip_deprecated_warning) { pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, " @@ -509,6 +512,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) par->net->xt.clusterip_deprecated_warning = true; } + cipinfo->config = config; return ret; } -- cgit v1.1 From 1a9da5937386dbe553ffcf6c65d985bd48c347c5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 16 Feb 2018 12:49:33 +0100 Subject: netfilter: ipt_CLUSTERIP: put config instead of freeing it Once struct is added to per-netns list it becomes visible to other cpus, so we cannot use kfree(). Also delay setting entries refcount to 1 until after everything is initialised so that when we call clusterip_config_put() in this spot entries is still zero. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 4c8cfd3..8a8ae61 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -232,7 +232,6 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i, c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; refcount_set(&c->refcount, 1); - refcount_set(&c->entries, 1); spin_lock_bh(&cn->lock); if (__clusterip_config_find(net, ip)) { @@ -263,8 +262,10 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i, c->notifier.notifier_call = clusterip_netdev_event; err = register_netdevice_notifier(&c->notifier); - if (!err) + if (!err) { + refcount_set(&c->entries, 1); return c; + } #ifdef CONFIG_PROC_FS proc_remove(c->pde); @@ -273,7 +274,7 @@ err: spin_lock_bh(&cn->lock); list_del_rcu(&c->list); spin_unlock_bh(&cn->lock); - kfree(c); + clusterip_config_put(c); return ERR_PTR(err); } -- cgit v1.1 From b078556aecd791b0e5cb3a59f4c3a14273b52121 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 19 Feb 2018 08:10:17 +0100 Subject: netfilter: ipv6: fix use-after-free Write in nf_nat_ipv6_manip_pkt l4proto->manip_pkt() can cause reallocation of skb head so pointer to the ipv6 header must be reloaded. Reported-and-tested-by: Fixes: 58a317f1061c89 ("netfilter: ipv6: add IPv6 NAT support") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index bed57ee..6b7f075 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -99,6 +99,10 @@ static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb, !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff, target, maniptype)) return false; + + /* must reload, offset might have changed */ + ipv6h = (void *)skb->data + iphdroff; + manip_addr: if (maniptype == NF_NAT_MANIP_SRC) ipv6h->saddr = target->src.u3.in6; -- cgit v1.1 From c4585a2823edf4d1326da44d1524ecbfda26bb37 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 19 Feb 2018 03:01:45 +0100 Subject: netfilter: bridge: ebt_among: add missing match size checks ebt_among is special, it has a dynamic match size and is exempt from the central size checks. Therefore it must check that the size of the match structure provided from userspace is sane by making sure em->match_size is at least the minimum size of the expected structure. The module has such a check, but its only done after accessing a structure that might be out of bounds. tested with: ebtables -A INPUT ... \ --among-dst fe:fe:fe:fe:fe:fe --among-dst fe:fe:fe:fe:fe:fe --among-src fe:fe:fe:fe:ff:f,fe:fe:fe:fe:fe:fb,fe:fe:fe:fe:fc:fd,fe:fe:fe:fe:fe:fd,fe:fe:fe:fe:fe:fe --among-src fe:fe:fe:fe:ff:f,fe:fe:fe:fe:fe:fa,fe:fe:fe:fe:fe:fd,fe:fe:fe:fe:fe:fe,fe:fe:fe:fe:fe:fe Reported-by: Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_among.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index ce7152a..c5afb42 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -172,18 +172,35 @@ ebt_among_mt(const struct sk_buff *skb, struct xt_action_param *par) return true; } +static bool poolsize_invalid(const struct ebt_mac_wormhash *w) +{ + return w && w->poolsize >= (INT_MAX / sizeof(struct ebt_mac_wormhash_tuple)); +} + static int ebt_among_mt_check(const struct xt_mtchk_param *par) { const struct ebt_among_info *info = par->matchinfo; const struct ebt_entry_match *em = container_of(par->matchinfo, const struct ebt_entry_match, data); - int expected_length = sizeof(struct ebt_among_info); + unsigned int expected_length = sizeof(struct ebt_among_info); const struct ebt_mac_wormhash *wh_dst, *wh_src; int err; + if (expected_length > em->match_size) + return -EINVAL; + wh_dst = ebt_among_wh_dst(info); - wh_src = ebt_among_wh_src(info); + if (poolsize_invalid(wh_dst)) + return -EINVAL; + expected_length += ebt_mac_wormhash_size(wh_dst); + if (expected_length > em->match_size) + return -EINVAL; + + wh_src = ebt_among_wh_src(info); + if (poolsize_invalid(wh_src)) + return -EINVAL; + expected_length += ebt_mac_wormhash_size(wh_src); if (em->match_size != EBT_ALIGN(expected_length)) { -- cgit v1.1 From fc6a5d0601c5ac1d02f283a46f60b87b2033e5ca Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 19 Feb 2018 01:24:53 +0100 Subject: netfilter: ebtables: convert BUG_ONs to WARN_ONs All of these conditions are not fatal and should have been WARN_ONs from the get-go. Convert them to WARN_ONs and bail out. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 02c4b40..61f8787 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1641,7 +1641,8 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr, int off = ebt_compat_match_offset(match, m->match_size); compat_uint_t msize = m->match_size - off; - BUG_ON(off >= m->match_size); + if (WARN_ON(off >= m->match_size)) + return -EINVAL; if (copy_to_user(cm->u.name, match->name, strlen(match->name) + 1) || put_user(msize, &cm->match_size)) @@ -1671,7 +1672,8 @@ static int compat_target_to_user(struct ebt_entry_target *t, int off = xt_compat_target_offset(target); compat_uint_t tsize = t->target_size - off; - BUG_ON(off >= t->target_size); + if (WARN_ON(off >= t->target_size)) + return -EINVAL; if (copy_to_user(cm->u.name, target->name, strlen(target->name) + 1) || put_user(tsize, &cm->match_size)) @@ -1902,7 +1904,8 @@ static int ebt_buf_add(struct ebt_entries_buf_state *state, if (state->buf_kern_start == NULL) goto count_only; - BUG_ON(state->buf_kern_offset + sz > state->buf_kern_len); + if (WARN_ON(state->buf_kern_offset + sz > state->buf_kern_len)) + return -EINVAL; memcpy(state->buf_kern_start + state->buf_kern_offset, data, sz); @@ -1915,7 +1918,8 @@ static int ebt_buf_add_pad(struct ebt_entries_buf_state *state, unsigned int sz) { char *b = state->buf_kern_start; - BUG_ON(b && state->buf_kern_offset > state->buf_kern_len); + if (WARN_ON(b && state->buf_kern_offset > state->buf_kern_len)) + return -EINVAL; if (b != NULL && sz > 0) memset(b + state->buf_kern_offset, 0, sz); @@ -1992,8 +1996,10 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, pad = XT_ALIGN(size_kern) - size_kern; if (pad > 0 && dst) { - BUG_ON(state->buf_kern_len <= pad); - BUG_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad); + if (WARN_ON(state->buf_kern_len <= pad)) + return -EINVAL; + if (WARN_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad)) + return -EINVAL; memset(dst + size_kern, 0, pad); } return off + match_size; @@ -2043,7 +2049,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, if (ret < 0) return ret; - BUG_ON(ret < match32->match_size); + if (WARN_ON(ret < match32->match_size)) + return -EINVAL; growth += ret - match32->match_size; growth += ebt_compat_entry_padsize(); @@ -2140,7 +2147,8 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, startoff = state->buf_user_offset - startoff; - BUG_ON(*total < startoff); + if (WARN_ON(*total < startoff)) + return -EINVAL; *total -= startoff; return 0; } @@ -2267,7 +2275,8 @@ static int compat_do_replace(struct net *net, void __user *user, state.buf_kern_len = size64; ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); - BUG_ON(ret < 0); /* parses same data again */ + if (WARN_ON(ret < 0)) + goto out_unlock; vfree(entries_tmp); tmp.entries_size = size64; -- cgit v1.1 From b71812168571fa55e44cdd0254471331b9c4c4c6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 19 Feb 2018 01:24:15 +0100 Subject: netfilter: ebtables: CONFIG_COMPAT: don't trust userland offsets We need to make sure the offsets are not out of range of the total size. Also check that they are in ascending order. The WARN_ON triggered by syzkaller (it sets panic_on_warn) is changed to also bail out, no point in continuing parsing. Briefly tested with simple ruleset of -A INPUT --limit 1/s' --log plus jump to custom chains using 32bit ebtables binary. Reported-by: Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 61f8787..254ef9f 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2060,7 +2060,9 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, if (match_kern) match_kern->match_size = ret; - WARN_ON(type == EBT_COMPAT_TARGET && size_left); + if (WARN_ON(type == EBT_COMPAT_TARGET && size_left)) + return -EINVAL; + match32 = (struct compat_ebt_entry_mwt *) buf; } @@ -2116,6 +2118,15 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, * * offsets are relative to beginning of struct ebt_entry (i.e., 0). */ + for (i = 0; i < 4 ; ++i) { + if (offsets[i] >= *total) + return -EINVAL; + if (i == 0) + continue; + if (offsets[i-1] > offsets[i]) + return -EINVAL; + } + for (i = 0, j = 1 ; j < 4 ; j++, i++) { struct compat_ebt_entry_mwt *match32; unsigned int size; -- cgit v1.1 From f4b7ac5ec37d0b6b183677d8b3f10576b18945fd Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 25 Feb 2018 18:18:52 +0100 Subject: netfilter: nf_flow_table: fix checksum when handling DNAT Add a missing call to csum_replace4 like on SNAT. Signed-off-by: Felix Fietkau Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_flow_table_ipv4.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c index 25d2975..282b9cc 100644 --- a/net/ipv4/netfilter/nf_flow_table_ipv4.c +++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c @@ -111,6 +111,7 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, default: return -1; } + csum_replace4(&iph->check, addr, new_addr); return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); } -- cgit v1.1 From 10d570284258a30dc104c50787c5289ec49f3d23 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 19 Feb 2018 14:08:52 +0100 Subject: batman-adv: Ignore invalid batadv_iv_gw during netlink send The function batadv_iv_gw_dump stops the processing loop when batadv_iv_gw_dump_entry returns a non-0 return code. This should only happen when the buffer is full. Otherwise, an empty message may be returned by batadv_gw_dump. This empty message will then stop the netlink dumping of gateway entries. At worst, not a single entry is returned to userspace even when plenty of possible gateways exist. Fixes: efb766af06e3 ("batman-adv: add B.A.T.M.A.N. IV bat_gw_dump implementations") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_iv_ogm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 79e3263..8f64439 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -2729,7 +2729,7 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_neigh_ifinfo *router_ifinfo = NULL; struct batadv_neigh_node *router; struct batadv_gw_node *curr_gw; - int ret = -EINVAL; + int ret = 0; void *hdr; router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); -- cgit v1.1 From 011c935fceae5252619ef730baa610c655281dda Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 19 Feb 2018 14:08:53 +0100 Subject: batman-adv: Ignore invalid batadv_v_gw during netlink send The function batadv_v_gw_dump stops the processing loop when batadv_v_gw_dump_entry returns a non-0 return code. This should only happen when the buffer is full. Otherwise, an empty message may be returned by batadv_gw_dump. This empty message will then stop the netlink dumping of gateway entries. At worst, not a single entry is returned to userspace even when plenty of possible gateways exist. Fixes: b71bb6f924fe ("batman-adv: add B.A.T.M.A.N. V bat_gw_dump implementations") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 27e165a..c74f813 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -928,7 +928,7 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_neigh_ifinfo *router_ifinfo = NULL; struct batadv_neigh_node *router; struct batadv_gw_node *curr_gw; - int ret = -EINVAL; + int ret = 0; void *hdr; router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); -- cgit v1.1 From b0264ecdfeab5f889b02ec54af7ca8cc1c245e2f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 24 Feb 2018 12:03:36 +0100 Subject: batman-adv: Fix netlink dumping of BLA claims MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function batadv_bla_claim_dump_bucket must be able to handle non-complete dumps of a single bucket. It tries to do that by saving the latest dumped index in *idx_skip to inform the caller about the current state. But the caller only assumes that buckets were not completely dumped when the return code is non-zero. This function must therefore also return a non-zero index when the dumping of an entry failed. Otherwise the caller will just skip all remaining buckets. And the function must also reset *idx_skip back to zero when it finished a bucket. Otherwise it will skip the same number of entries in the next bucket as the previous one had. Fixes: 04f3f5bf1883 ("batman-adv: add B.A.T.M.A.N. Dump BLA claims via netlink") Reported-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index fad4785..20b548e 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -2161,22 +2161,25 @@ batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, { struct batadv_bla_claim *claim; int idx = 0; + int ret = 0; rcu_read_lock(); hlist_for_each_entry_rcu(claim, head, hash_entry) { if (idx++ < *idx_skip) continue; - if (batadv_bla_claim_dump_entry(msg, portid, seq, - primary_if, claim)) { + + ret = batadv_bla_claim_dump_entry(msg, portid, seq, + primary_if, claim); + if (ret) { *idx_skip = idx - 1; goto unlock; } } - *idx_skip = idx; + *idx_skip = 0; unlock: rcu_read_unlock(); - return 0; + return ret; } /** -- cgit v1.1 From fce672db548ff19e76a08a32a829544617229bc2 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 24 Feb 2018 12:03:37 +0100 Subject: batman-adv: Fix netlink dumping of BLA backbones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function batadv_bla_backbone_dump_bucket must be able to handle non-complete dumps of a single bucket. It tries to do that by saving the latest dumped index in *idx_skip to inform the caller about the current state. But the caller only assumes that buckets were not completely dumped when the return code is non-zero. This function must therefore also return a non-zero index when the dumping of an entry failed. Otherwise the caller will just skip all remaining buckets. And the function must also reset *idx_skip back to zero when it finished a bucket. Otherwise it will skip the same number of entries in the next bucket as the previous one had. Fixes: ea4152e11716 ("batman-adv: add backbone table netlink support") Reported-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 20b548e..b1a0837 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -2394,22 +2394,25 @@ batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, { struct batadv_bla_backbone_gw *backbone_gw; int idx = 0; + int ret = 0; rcu_read_lock(); hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { if (idx++ < *idx_skip) continue; - if (batadv_bla_backbone_dump_entry(msg, portid, seq, - primary_if, backbone_gw)) { + + ret = batadv_bla_backbone_dump_entry(msg, portid, seq, + primary_if, backbone_gw); + if (ret) { *idx_skip = idx - 1; goto unlock; } } - *idx_skip = idx; + *idx_skip = 0; unlock: rcu_read_unlock(); - return 0; + return ret; } /** -- cgit v1.1 From 2412d897c2c34ab5a9834a2dc472512d96e485ef Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 17 Feb 2018 04:18:15 +0900 Subject: netfilter: increase IPSTATS_MIB_CSUMERRORS stat In the ip_rcv, IPSTATS_MIB_CSUMERRORS is increased when checksum error is occurred. bridge netfilter routine should increase IPSTATS_MIB_CSUMERRORS. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 27f1d4f..9b16eaf 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -214,7 +214,7 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb) iph = ip_hdr(skb); if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) - goto inhdr_error; + goto csum_error; len = ntohs(iph->tot_len); if (skb->len < len) { @@ -236,6 +236,8 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb) */ return 0; +csum_error: + __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); inhdr_error: __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); drop: -- cgit v1.1 From 47b7e7f82802dced3ac73658bf4b77584a63063f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 15 Feb 2018 00:23:05 +0100 Subject: netfilter: don't set F_IFACE on ipv6 fib lookups "fib" starts to behave strangely when an ipv6 default route is added - the FIB lookup returns a route using 'oif' in this case. This behaviour was inherited from ip6tables rpfilter so change this as well. Bugzilla: https://bugzilla.netfilter.org/show_bug.cgi?id=1221 Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_rpfilter.c | 4 ---- net/ipv6/netfilter/nft_fib_ipv6.c | 12 ++---------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 94deb69..91ed25a 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -48,10 +48,6 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, } fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; - if ((flags & XT_RPFILTER_LOOSE) == 0) { - fl6.flowi6_oif = dev->ifindex; - lookup_flags |= RT6_LOOKUP_F_IFACE; - } rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags); if (rt->dst.error) diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index cc5174c..62fc84d 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -180,7 +180,6 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, } *dest = 0; - again: rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags); if (rt->dst.error) goto put_rt_err; @@ -189,15 +188,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) goto put_rt_err; - if (oif && oif != rt->rt6i_idev->dev) { - /* multipath route? Try again with F_IFACE */ - if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) { - lookup_flags |= RT6_LOOKUP_F_IFACE; - fl6.flowi6_oif = oif->ifindex; - ip6_rt_put(rt); - goto again; - } - } + if (oif && oif != rt->rt6i_idev->dev) + goto put_rt_err; switch (priv->result) { case NFT_FIB_RESULT_OIF: -- cgit v1.1 From f22e08932c2960f29b5e828e745c9f3fb7c1bb86 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 26 Dec 2017 15:14:01 +0100 Subject: batman-adv: Fix internal interface indices types batman-adv uses internal indices for each enabled and active interface. It is currently used by the B.A.T.M.A.N. IV algorithm to identifify the correct position in the ogm_cnt bitmaps. The type for the number of enabled interfaces (which defines the next interface index) was set to char. This type can be (depending on the architecture) either signed (limiting batman-adv to 127 active slave interfaces) or unsigned (limiting batman-adv to 255 active slave interfaces). This limit was not correctly checked when an interface was enabled and thus an overflow happened. This was only catched on systems with the signed char type when the B.A.T.M.A.N. IV code tried to resize its counter arrays with a negative size. The if_num interface index was only a s16 and therefore significantly smaller than the ifindex (int) used by the code net code. Both &batadv_hard_iface->if_num and &batadv_priv->num_ifaces must be (unsigned) int to support the same number of slave interfaces as the net core code. And the interface activation code must check the number of active slave interfaces to avoid integer overflows. Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_iv_ogm.c | 24 ++++++++++++++---------- net/batman-adv/hard-interface.c | 9 +++++++-- net/batman-adv/originator.c | 4 ++-- net/batman-adv/originator.h | 4 ++-- net/batman-adv/types.h | 11 ++++++----- 5 files changed, 31 insertions(+), 21 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 8f64439..99abead 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -157,7 +157,7 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node) * Return: 0 on success, a negative error code otherwise. */ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, - int max_if_num) + unsigned int max_if_num) { void *data_ptr; size_t old_size; @@ -201,7 +201,8 @@ unlock: */ static void batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { size_t chunk_size; size_t if_offset; @@ -239,7 +240,8 @@ batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node, */ static void batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { size_t if_offset; void *data_ptr; @@ -276,7 +278,8 @@ batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node, * Return: 0 on success, a negative error code otherwise. */ static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); @@ -311,7 +314,8 @@ static struct batadv_orig_node * batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr) { struct batadv_orig_node *orig_node; - int size, hash_added; + int hash_added; + size_t size; orig_node = batadv_orig_hash_find(bat_priv, addr); if (orig_node) @@ -893,7 +897,7 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) u32 i; size_t word_index; u8 *w; - int if_num; + unsigned int if_num; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -1023,7 +1027,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_neigh_node *tmp_neigh_node = NULL; struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; - int if_num; + unsigned int if_num; u8 sum_orig, sum_neigh; u8 *neigh_addr; u8 tq_avg; @@ -1182,7 +1186,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, u8 total_count; u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; - int if_num; + unsigned int if_num; unsigned int tq_asym_penalty, inv_asym_penalty; unsigned int combined_tq; unsigned int tq_iface_penalty; @@ -1702,9 +1706,9 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, if (is_my_orig) { unsigned long *word; - int offset; + size_t offset; s32 bit_pos; - s16 if_num; + unsigned int if_num; u8 *weight; orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv, diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 5f186bf..68b54a3 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -763,6 +763,11 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->soft_iface = soft_iface; bat_priv = netdev_priv(hard_iface->soft_iface); + if (bat_priv->num_ifaces >= UINT_MAX) { + ret = -ENOSPC; + goto err_dev; + } + ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface, NULL, NULL, NULL); if (ret) @@ -876,7 +881,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface); /* nobody uses this interface anymore */ - if (!bat_priv->num_ifaces) { + if (bat_priv->num_ifaces == 0) { batadv_gw_check_client_stop(bat_priv); if (autodel == BATADV_IF_CLEANUP_AUTO) @@ -912,7 +917,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) if (ret) goto free_if; - hard_iface->if_num = -1; + hard_iface->if_num = 0; hard_iface->net_dev = net_dev; hard_iface->soft_iface = NULL; hard_iface->if_status = BATADV_IF_NOT_IN_USE; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 58a7d92..7478242 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1569,7 +1569,7 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) * Return: 0 on success or negative error number in case of failure */ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, - int max_if_num) + unsigned int max_if_num) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_algo_ops *bao = bat_priv->algo_ops; @@ -1611,7 +1611,7 @@ err: * Return: 0 on success or negative error number in case of failure */ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, - int max_if_num) + unsigned int max_if_num) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 8e543a3..15d896b 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -73,9 +73,9 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, - int max_if_num); + unsigned int max_if_num); int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, - int max_if_num); + unsigned int max_if_num); struct batadv_orig_node_vlan * batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, unsigned short vid); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index bb15784..a5aa6d6 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -167,7 +167,7 @@ struct batadv_hard_iface { struct list_head list; /** @if_num: identificator of the interface */ - s16 if_num; + unsigned int if_num; /** @if_status: status of the interface for batman-adv */ char if_status; @@ -1596,7 +1596,7 @@ struct batadv_priv { atomic_t batman_queue_left; /** @num_ifaces: number of interfaces assigned to this mesh interface */ - char num_ifaces; + unsigned int num_ifaces; /** @mesh_obj: kobject for sysfs mesh subdirectory */ struct kobject *mesh_obj; @@ -2186,15 +2186,16 @@ struct batadv_algo_orig_ops { * orig_node due to a new hard-interface being added into the mesh * (optional) */ - int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num); + int (*add_if)(struct batadv_orig_node *orig_node, + unsigned int max_if_num); /** * @del_if: ask the routing algorithm to apply the needed changes to the * orig_node due to an hard-interface being removed from the mesh * (optional) */ - int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num, - int del_if_num); + int (*del_if)(struct batadv_orig_node *orig_node, + unsigned int max_if_num, unsigned int del_if_num); #ifdef CONFIG_BATMAN_ADV_DEBUGFS /** @print: print the originator table (optional) */ -- cgit v1.1 From 7d98386d55a5afaa65de77e1e9197edeb8a42079 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 25 Feb 2018 11:49:07 -0800 Subject: netfilter: use skb_to_full_sk in ip6_route_me_harder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some reason, Florian forgot to apply to ip6_route_me_harder the fix that went in commit 29e09229d9f2 ("netfilter: use skb_to_full_sk in ip_route_me_harder") Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener")  Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d95ceca..531d695 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -21,18 +21,19 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); + struct sock *sk = sk_to_full_sk(skb->sk); unsigned int hh_len; struct dst_entry *dst; struct flowi6 fl6 = { - .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, + .flowi6_oif = sk ? sk->sk_bound_dev_if : 0, .flowi6_mark = skb->mark, - .flowi6_uid = sock_net_uid(net, skb->sk), + .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, .saddr = iph->saddr, }; int err; - dst = ip6_route_output(net, skb->sk, &fl6); + dst = ip6_route_output(net, sk, &fl6); err = dst->error; if (err) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -50,7 +51,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { skb_dst_set(skb, NULL); - dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) return PTR_ERR(dst); skb_dst_set(skb, dst); -- cgit v1.1 From 1fdb926974695d3dbc05a429bafa266fdd16510e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 20 Feb 2018 09:06:18 +0100 Subject: Bluetooth: btusb: Use DMI matching for QCA reset_resume quirking Commit 61f5acea8737 ("Bluetooth: btusb: Restore QCA Rome suspend/resume fix with a "rewritten" version") applied the USB_QUIRK_RESET_RESUME to all QCA USB Bluetooth modules. But it turns out that the resume problems are not caused by the QCA Rome chipset, on most platforms it resumes fine. The resume problems are actually a platform problem (likely the platform cutting all power when suspended). The USB_QUIRK_RESET_RESUME quirk also disables runtime suspend, so by matching on usb-ids, we're causing all boards with these chips to use extra power, to fix resume problems which only happen on some boards. This commit fixes this by applying the quirk based on DMI matching instead of on usb-ids, so that we match the platform and not the chipset. Here is the /sys/kernel/debug/usb/devices for the Bluetooth module: T: Bus=01 Lev=01 Prnt=01 Port=07 Cnt=04 Dev#= 5 Spd=12 MxCh= 0 D: Ver= 2.01 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=e300 Rev= 0.01 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1514836 Fixes: 61f5acea8737 ("Bluetooth: btusb: Restore QCA Rome suspend/resume..") Cc: stable@vger.kernel.org Cc: Brian Norris Cc: Kai-Heng Feng Reported-and-tested-by: Kevin Fenzi Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 2a55380..60bf04b 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -21,6 +21,7 @@ * */ +#include #include #include #include @@ -379,6 +380,21 @@ static const struct usb_device_id blacklist_table[] = { { } /* Terminating entry */ }; +/* The Bluetooth USB module build into some devices needs to be reset on resume, + * this is a problem with the platform (likely shutting off all power) not with + * the module itself. So we use a DMI list to match known broken platforms. + */ +static const struct dmi_system_id btusb_needs_reset_resume_table[] = { + { + /* Lenovo Yoga 920 (QCA Rome device 0cf3:e300) */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 920"), + }, + }, + {} +}; + #define BTUSB_MAX_ISOC_FRAMES 10 #define BTUSB_INTR_RUNNING 0 @@ -2945,6 +2961,9 @@ static int btusb_probe(struct usb_interface *intf, hdev->send = btusb_send_frame; hdev->notify = btusb_notify; + if (dmi_check_system(btusb_needs_reset_resume_table)) + interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME; + #ifdef CONFIG_PM err = btusb_config_oob_wake(hdev); if (err) @@ -3031,12 +3050,6 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_QCA_ROME) { data->setup_on_usb = btusb_setup_qca; hdev->set_bdaddr = btusb_set_bdaddr_ath3012; - - /* QCA Rome devices lose their updated firmware over suspend, - * but the USB hub doesn't notice any status change. - * explicitly request a device reset on resume. - */ - interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME; } #ifdef CONFIG_BT_HCIBTUSB_RTL -- cgit v1.1 From ab2f336cb7e629de74d8af06bcaf6b15e4230e19 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 25 Feb 2018 15:10:52 +0100 Subject: Bluetooth: hci_bcm: Make shutdown and device wake GPIO optional According to the devicetree binding the shutdown and device wake GPIOs are optional. Since commit 3e81a4ca51a1 ("Bluetooth: hci_bcm: Mandate presence of shutdown and device wake GPIO") this driver won't probe anymore on Raspberry Pi 3 and Zero W (no device wake GPIO connected). So fix this regression by reverting this commit partially. Fixes: 3e81a4ca51a1 ("Bluetooth: hci_bcm: Mandate presence of shutdown and device wake GPIO") Signed-off-by: Stefan Wahren Reviewed-by: Lukas Wunner Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 0438a64..6314dfb 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -922,12 +922,13 @@ static int bcm_get_resources(struct bcm_device *dev) dev->clk = devm_clk_get(dev->dev, NULL); - dev->device_wakeup = devm_gpiod_get(dev->dev, "device-wakeup", - GPIOD_OUT_LOW); + dev->device_wakeup = devm_gpiod_get_optional(dev->dev, "device-wakeup", + GPIOD_OUT_LOW); if (IS_ERR(dev->device_wakeup)) return PTR_ERR(dev->device_wakeup); - dev->shutdown = devm_gpiod_get(dev->dev, "shutdown", GPIOD_OUT_LOW); + dev->shutdown = devm_gpiod_get_optional(dev->dev, "shutdown", + GPIOD_OUT_LOW); if (IS_ERR(dev->shutdown)) return PTR_ERR(dev->shutdown); -- cgit v1.1 From 192b2e742c06af399e8eecb4a1726520bfccece8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 26 Feb 2018 13:17:07 +1100 Subject: selftests/powerpc: Skip tm-trap if transactional memory is not enabled Some processor revisions do not support transactional memory, and additionally kernel support can be disabled. In either case the tm-trap test should be skipped, otherwise it will fail with a SIGILL. Fixes: a08082f8e4e1 ("powerpc/selftests: Check endianness on trap in TM") Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/tm/tm-trap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c index 5d92c23..179d592 100644 --- a/tools/testing/selftests/powerpc/tm/tm-trap.c +++ b/tools/testing/selftests/powerpc/tm/tm-trap.c @@ -255,6 +255,8 @@ int tm_trap_test(void) struct sigaction trap_sa; + SKIP_IF(!have_htm()); + trap_sa.sa_flags = SA_SIGINFO; trap_sa.sa_sigaction = trap_signal_handler; sigaction(SIGTRAP, &trap_sa, NULL); -- cgit v1.1 From 4a3928c6f8a53fa1aed28ccba227742486e8ddcb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Feb 2018 18:50:41 -0800 Subject: Linux 4.16-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d9cf3a4..659a778 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.1 From f25a2dfc20e3a3ed8fe6618c331799dd7bd01190 Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Thu, 15 Feb 2018 19:13:41 +0800 Subject: nvme-pci: Fix nvme queue cleanup if IRQ setup fails This patch fixes nvme queue cleanup if requesting an IRQ handler for the queue's vector fails. It does this by resetting the cq_vector to the uninitialized value of -1 so it is ignored for a controller reset. Signed-off-by: Jianchao Wang [changelog updates, removed misc whitespace changes] Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6fe7af0..022b070 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1452,7 +1452,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) nvmeq->cq_vector = qid - 1; result = adapter_alloc_cq(dev, qid, nvmeq); if (result < 0) - return result; + goto release_vector; result = adapter_alloc_sq(dev, qid, nvmeq); if (result < 0) @@ -1466,9 +1466,12 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) return result; release_sq: + dev->online_queues--; adapter_delete_sq(dev, qid); release_cq: adapter_delete_cq(dev, qid); + release_vector: + nvmeq->cq_vector = -1; return result; } -- cgit v1.1 From 80b79e31c4195731464d96716f15716f38a555eb Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Thu, 22 Feb 2018 17:12:17 +0100 Subject: drm/sun4i: Enable the output on the pins (tcon0) I noticed that with 4.16-rc1 LVDS output on A83T based TBS A711 tablet doesn't work (there's output but it's garbled). I compared some older patches for LVDS support with the mainlined ones and this change is missing from mainline Linux. I don't know what the register does exactly and the harcoded register value doesn't inspire much confidence that it will work in a general case, so I'm sending this RFC. This patch fixes the issue on A83T. Signed-off-by: Ondrej Jirman Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20180222161217.23904-1-megous@megous.com --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 3c15cf2..51740dd 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -335,6 +335,9 @@ static void sun4i_tcon0_mode_set_lvds(struct sun4i_tcon *tcon, regmap_update_bits(tcon->regs, SUN4I_TCON_GCTL_REG, SUN4I_TCON_GCTL_IOMAP_MASK, SUN4I_TCON_GCTL_IOMAP_TCON0); + + /* Enable the output on the pins */ + regmap_write(tcon->regs, SUN4I_TCON0_IO_TRI_REG, 0xe0000000); } static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon, -- cgit v1.1 From 5a3386790a172cf738194e1574f631cd43c6140a Mon Sep 17 00:00:00 2001 From: Yong Deng Date: Mon, 26 Feb 2018 10:43:52 +0800 Subject: ASoC: sun4i-i2s: Fix RX slot number of SUN8I I2S's RX slot number of SUN8I should be shifted 4 bit to left. Fixes: 7d2993811a1e ("ASoC: sun4i-i2s: Add support for H3") Signed-off-by: Yong Deng Reviewed-by: Chen-Yu Tsai Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/sunxi/sun4i-i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sunxi/sun4i-i2s.c b/sound/soc/sunxi/sun4i-i2s.c index dca1143..a4aa931 100644 --- a/sound/soc/sunxi/sun4i-i2s.c +++ b/sound/soc/sunxi/sun4i-i2s.c @@ -104,7 +104,7 @@ #define SUN8I_I2S_CHAN_CFG_REG 0x30 #define SUN8I_I2S_CHAN_CFG_RX_SLOT_NUM_MASK GENMASK(6, 4) -#define SUN8I_I2S_CHAN_CFG_RX_SLOT_NUM(chan) (chan - 1) +#define SUN8I_I2S_CHAN_CFG_RX_SLOT_NUM(chan) ((chan - 1) << 4) #define SUN8I_I2S_CHAN_CFG_TX_SLOT_NUM_MASK GENMASK(2, 0) #define SUN8I_I2S_CHAN_CFG_TX_SLOT_NUM(chan) (chan - 1) -- cgit v1.1 From 79d103a565d16b1893d990b2ee5e0fe71767759f Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 21 Feb 2018 10:20:27 +0100 Subject: drm/sun4i: Protect the TCON pixel clocks Both TCON clocks are very sensitive to clock changes, since any change might lead to improper timings. Make sure our rate is never changed. Tested-by: Giulio Benetti Reviewed-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/d5224d2e81ecf73dc09f234e580ada52c00eaee3.1519204731.git-series.maxime.ripard@bootlin.com --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 51740dd..b396011 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -260,7 +260,7 @@ static void sun4i_tcon0_mode_set_common(struct sun4i_tcon *tcon, const struct drm_display_mode *mode) { /* Configure the dot clock */ - clk_set_rate(tcon->dclk, mode->crtc_clock * 1000); + clk_set_rate_exclusive(tcon->dclk, mode->crtc_clock * 1000); /* Set the resolution */ regmap_write(tcon->regs, SUN4I_TCON0_BASIC0_REG, @@ -421,7 +421,7 @@ static void sun4i_tcon1_mode_set(struct sun4i_tcon *tcon, WARN_ON(!tcon->quirks->has_channel_1); /* Configure the dot clock */ - clk_set_rate(tcon->sclk1, mode->crtc_clock * 1000); + clk_set_rate_exclusive(tcon->sclk1, mode->crtc_clock * 1000); /* Adjust clock delay */ clk_delay = sun4i_tcon_get_clk_delay(mode, 1); -- cgit v1.1 From b5095f24e791c2d05da7cbb3d99e2b420b36a273 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Tue, 6 Feb 2018 00:25:16 +0800 Subject: ovl: fix ptr_ret.cocci warnings fs/overlayfs/export.c:459:10-16: WARNING: PTR_ERR_OR_ZERO can be used Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR Generated by: scripts/coccinelle/api/ptr_ret.cocci Fixes: 4b91c30a5a19 ("ovl: lookup connected ancestor of dir in inode cache") CC: Amir Goldstein Signed-off-by: Fengguang Wu Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 97a916e..87bd414 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -620,7 +620,7 @@ static struct dentry *ovl_lookup_real(struct super_block *sb, if (err == -ECHILD) { this = ovl_lookup_real_ancestor(sb, real, layer); - err = IS_ERR(this) ? PTR_ERR(this) : 0; + err = PTR_ERR_OR_ZERO(this); } if (!err) { dput(connected); -- cgit v1.1 From f287eb9013ccf199cbfa4eabd80c36fedfc15a73 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 26 Feb 2018 11:36:14 +0000 Subject: clocksource/drivers/fsl_ftm_timer: Fix error return checking The error checks on freq for a negative error return always fails because freq is unsigned and can never be negative. Fix this by making freq a signed long. Detected with Coccinelle: drivers/clocksource/fsl_ftm_timer.c:287:5-9: WARNING: Unsigned expression compared with zero: freq <= 0 drivers/clocksource/fsl_ftm_timer.c:291:5-9: WARNING: Unsigned expression compared with zero: freq <= 0 Fixes: 2529c3a33079 ("clocksource: Add Freescale FlexTimer Module (FTM) timer support") Signed-off-by: Colin Ian King Signed-off-by: Thomas Gleixner Cc: Daniel Lezcano Cc: kernel-janitors@vger.kernel.org Link: https://lkml.kernel.org/r/20180226113614.3092-1-colin.king@canonical.com --- drivers/clocksource/fsl_ftm_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/fsl_ftm_timer.c b/drivers/clocksource/fsl_ftm_timer.c index 3ee7e6f..846d18d 100644 --- a/drivers/clocksource/fsl_ftm_timer.c +++ b/drivers/clocksource/fsl_ftm_timer.c @@ -281,7 +281,7 @@ static int __init __ftm_clk_init(struct device_node *np, char *cnt_name, static unsigned long __init ftm_clk_init(struct device_node *np) { - unsigned long freq; + long freq; freq = __ftm_clk_init(np, "ftm-evt-counter-en", "ftm-evt"); if (freq <= 0) -- cgit v1.1 From 753e8abc36b2c966caea075db0c845563c8a19bf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 23 Feb 2018 18:04:48 +0000 Subject: arm64: mm: fix thinko in non-global page table attribute check The routine pgattr_change_is_safe() was extended in commit 4e6020565596 ("arm64: mm: Permit transitioning from Global to Non-Global without BBM") to permit changing the nG attribute from not set to set, but did so in a way that inadvertently disallows such changes if other permitted attribute changes take place at the same time. So update the code to take this into account. Fixes: 4e6020565596 ("arm64: mm: Permit transitioning from Global to ...") Cc: # 4.14.x- Acked-by: Mark Rutland Reviewed-by: Marc Zyngier Acked-by: Will Deacon Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 84a019f..8c704f1 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -108,7 +108,7 @@ static bool pgattr_change_is_safe(u64 old, u64 new) * The following mapping attributes may be updated in live * kernel mappings without the need for break-before-make. */ - static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE; + static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG; /* creating or taking down mappings is always safe */ if (old == 0 || new == 0) @@ -118,9 +118,9 @@ static bool pgattr_change_is_safe(u64 old, u64 new) if ((old | new) & PTE_CONT) return false; - /* Transitioning from Global to Non-Global is safe */ - if (((old ^ new) == PTE_NG) && (new & PTE_NG)) - return true; + /* Transitioning from Non-Global to Global is unsafe */ + if (old & ~new & PTE_NG) + return false; return ((old ^ new) & ~mask) == 0; } -- cgit v1.1 From 71db96ddfa72671bd43cacdcc99ca178d90ba267 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 26 Feb 2018 15:36:38 +0100 Subject: ALSA: hda - Fix pincfg at resume on Lenovo T470 dock We've added a quirk to enable the recent Lenovo dock support, where it overwrites the pin configs of NID 0x17 and 19, not only updating the pin config cache. It works right after the boot, but the problem is that the pin configs are occasionally cleared when the machine goes to PM. Meanwhile the quirk writes the pin configs only at the pre-probe, so this won't be applied any longer. For addressing that issue, this patch moves the code to overwrite the pin configs into HDA_FIXUP_ACT_INIT section so that it's always applied at both probe and resume time. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195161 Fixes: 61fcf8ece9b6 ("ALSA: hda/realtek - Enable Thinkpad Dock device for ALC298 platform") Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ce28f7c..b9c93fa 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4997,13 +4997,14 @@ static void alc_fixup_tpt470_dock(struct hda_codec *codec, if (action == HDA_FIXUP_ACT_PRE_PROBE) { spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP; + snd_hda_apply_pincfgs(codec, pincfgs); + } else if (action == HDA_FIXUP_ACT_INIT) { /* Enable DOCK device */ snd_hda_codec_write(codec, 0x17, 0, AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0); /* Enable DOCK device */ snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_CONFIG_DEFAULT_BYTES_3, 0); - snd_hda_apply_pincfgs(codec, pincfgs); } } -- cgit v1.1 From 6ef0bc6ddee1f62310877a1d53b1ea1d0d8e51a2 Mon Sep 17 00:00:00 2001 From: Zhi Zhang Date: Wed, 24 Jan 2018 21:24:33 +0800 Subject: ceph: flush dirty caps of unlinked inode ASAP Client should release unlinked inode from its cache ASAP. But client can't release inode with dirty caps. Link: http://tracker.ceph.com/issues/22886 Signed-off-by: Zhi Zhang Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 26 ++++++++++++++++++++++++++ fs/ceph/dir.c | 28 +++++----------------------- fs/ceph/super.h | 2 +- 3 files changed, 32 insertions(+), 24 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 6582c45..0e5bd3e 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3965,6 +3965,32 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode) } /* + * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it + * looks like the link count will hit 0, drop any other caps (other + * than PIN) we don't specifically want (due to the file still being + * open). + */ +int ceph_drop_caps_for_unlink(struct inode *inode) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; + + spin_lock(&ci->i_ceph_lock); + if (inode->i_nlink == 1) { + drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN); + + ci->i_ceph_flags |= CEPH_I_NODELAY; + if (__ceph_caps_dirty(ci)) { + struct ceph_mds_client *mdsc = + ceph_inode_to_client(inode)->mdsc; + __cap_delay_requeue_front(mdsc, ci); + } + } + spin_unlock(&ci->i_ceph_lock); + return drop; +} + +/* * Helpers for embedding cap and dentry lease releases into mds * requests. * diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 0c43468..f1d9c6c 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1003,26 +1003,6 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, } /* - * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it - * looks like the link count will hit 0, drop any other caps (other - * than PIN) we don't specifically want (due to the file still being - * open). - */ -static int drop_caps_for_unlink(struct inode *inode) -{ - struct ceph_inode_info *ci = ceph_inode(inode); - int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; - - spin_lock(&ci->i_ceph_lock); - if (inode->i_nlink == 1) { - drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN); - ci->i_ceph_flags |= CEPH_I_NODELAY; - } - spin_unlock(&ci->i_ceph_lock); - return drop; -} - -/* * rmdir and unlink are differ only by the metadata op code */ static int ceph_unlink(struct inode *dir, struct dentry *dentry) @@ -1056,7 +1036,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; - req->r_inode_drop = drop_caps_for_unlink(inode); + req->r_inode_drop = ceph_drop_caps_for_unlink(inode); err = ceph_mdsc_do_request(mdsc, dir, req); if (!err && !req->r_reply_info.head->is_dentry) d_delete(dentry); @@ -1104,8 +1084,10 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, req->r_dentry_unless = CEPH_CAP_FILE_EXCL; /* release LINK_RDCACHE on source inode (mds will lock it) */ req->r_old_inode_drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; - if (d_really_is_positive(new_dentry)) - req->r_inode_drop = drop_caps_for_unlink(d_inode(new_dentry)); + if (d_really_is_positive(new_dentry)) { + req->r_inode_drop = + ceph_drop_caps_for_unlink(d_inode(new_dentry)); + } err = ceph_mdsc_do_request(mdsc, old_dir, req); if (!err && !req->r_reply_info.head->is_dentry) { /* diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 21b2e5b..1c2086e 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -987,7 +987,7 @@ extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_session *session); extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc); - +extern int ceph_drop_caps_for_unlink(struct inode *inode); extern int ceph_encode_inode_release(void **p, struct inode *inode, int mds, int drop, int unless, int force); extern int ceph_encode_dentry_release(void **p, struct dentry *dn, -- cgit v1.1 From 937441f3a3158d5510ca8cc78a82453f57a96365 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 6 Feb 2018 08:25:55 +0800 Subject: libceph, ceph: avoid memory leak when specifying same option several times When parsing string option, in order to avoid memory leak we need to carefully free it first in case of specifying same option several times. Signed-off-by: Chengguang Xu Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/super.c | 2 ++ net/ceph/ceph_common.c | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index a62d2a9..bfc85b2 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -225,6 +225,7 @@ static int parse_fsopt_token(char *c, void *private) return -ENOMEM; break; case Opt_mds_namespace: + kfree(fsopt->mds_namespace); fsopt->mds_namespace = kstrndup(argstr[0].from, argstr[0].to-argstr[0].from, GFP_KERNEL); @@ -232,6 +233,7 @@ static int parse_fsopt_token(char *c, void *private) return -ENOMEM; break; case Opt_fscache_uniq: + kfree(fsopt->fscache_uniq); fsopt->fscache_uniq = kstrndup(argstr[0].from, argstr[0].to-argstr[0].from, GFP_KERNEL); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 1e492ef..4d4c822 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -418,6 +418,7 @@ ceph_parse_options(char *options, const char *dev_name, opt->flags |= CEPH_OPT_FSID; break; case Opt_name: + kfree(opt->name); opt->name = kstrndup(argstr[0].from, argstr[0].to-argstr[0].from, GFP_KERNEL); @@ -427,6 +428,9 @@ ceph_parse_options(char *options, const char *dev_name, } break; case Opt_secret: + ceph_crypto_key_destroy(opt->key); + kfree(opt->key); + opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL); if (!opt->key) { err = -ENOMEM; @@ -437,6 +441,9 @@ ceph_parse_options(char *options, const char *dev_name, goto out; break; case Opt_key: + ceph_crypto_key_destroy(opt->key); + kfree(opt->key); + opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL); if (!opt->key) { err = -ENOMEM; -- cgit v1.1 From 18106734b512664a8541026519ce4b862498b6c3 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Fri, 9 Feb 2018 20:40:59 +0800 Subject: ceph: fix dentry leak when failing to init debugfs When failing from ceph_fs_debugfs_init() in ceph_real_mount(), there is lack of dput of root_dentry and it causes slab errors, so change the calling order of ceph_fs_debugfs_init() and open_root_dentry() and do some cleanups to avoid this issue. Signed-off-by: Chengguang Xu Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/super.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index bfc85b2..1c470b4 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -838,7 +838,6 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) int err; unsigned long started = jiffies; /* note the start time */ struct dentry *root; - int first = 0; /* first vfsmount for this super_block */ dout("mount start %p\n", fsc); mutex_lock(&fsc->client->mount_mutex); @@ -863,17 +862,17 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) path = fsc->mount_options->server_path + 1; dout("mount opening path %s\n", path); } + + err = ceph_fs_debugfs_init(fsc); + if (err < 0) + goto out; + root = open_root_dentry(fsc, path, started); if (IS_ERR(root)) { err = PTR_ERR(root); goto out; } fsc->sb->s_root = dget(root); - first = 1; - - err = ceph_fs_debugfs_init(fsc); - if (err < 0) - goto fail; } else { root = dget(fsc->sb->s_root); } @@ -883,11 +882,6 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) mutex_unlock(&fsc->client->mount_mutex); return root; -fail: - if (first) { - dput(fsc->sb->s_root); - fsc->sb->s_root = NULL; - } out: mutex_unlock(&fsc->client->mount_mutex); return ERR_PTR(err); -- cgit v1.1 From d1fe96c0e4de78ba0cd336ea3df3b850d06b9b9a Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 2 Feb 2018 10:23:24 -0500 Subject: ovl: redirect_dir=nofollow should not follow redirect for opaque lower redirect_dir=nofollow should not follow a redirect. But in a specific configuration it can still follow it. For example try this. $ mkdir -p lower0 lower1/foo upper work merged $ touch lower1/foo/lower-file.txt $ setfattr -n "trusted.overlay.opaque" -v "y" lower1/foo $ mount -t overlay -o lowerdir=lower1:lower0,workdir=work,upperdir=upper,redirect_dir=on none merged $ cd merged $ mv foo foo-renamed $ umount merged # mount again. This time with redirect_dir=nofollow $ mount -t overlay -o lowerdir=lower1:lower0,workdir=work,upperdir=upper,redirect_dir=nofollow none merged $ ls merged/foo-renamed/ # This lists lower-file.txt, while it should not have. Basically, we are doing redirect check after we check for d.stop. And if this is not last lower, and we find an opaque lower, d.stop will be set. ovl_lookup_single() if (!d->last && ovl_is_opaquedir(this)) { d->stop = d->opaque = true; goto out; } To fix this, first check redirect is allowed. And after that check if d.stop has been set or not. Signed-off-by: Vivek Goyal Fixes: 438c84c2f0c7 ("ovl: don't follow redirects if redirect_dir=off") Cc: #v4.15 Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index de3e6da..70fcfcc 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -913,9 +913,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, stack[ctr].layer = lower.layer; ctr++; - if (d.stop) - break; - /* * Following redirects can have security consequences: it's like * a symlink into the lower layer without the permission checks. @@ -933,6 +930,9 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, goto out_put; } + if (d.stop) + break; + if (d.redirect && d.redirect[0] == '/' && poe != roe) { poe = roe; /* Find the current layer on the root dentry */ -- cgit v1.1 From d9c10e5b8863cfb6886d1640386455075c6e979d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 26 Feb 2018 12:51:43 +0100 Subject: direct-io: Fix sleep in atomic due to sync AIO Commit e864f39569f4 "fs: add RWF_DSYNC aand RWF_SYNC" added additional way for direct IO to become synchronous and thus trigger fsync from the IO completion handler. Then commit 9830f4be159b "fs: Use RWF_* flags for AIO operations" allowed these flags to be set for AIO as well. However that commit forgot to update the condition checking whether the IO completion handling should be defered to a workqueue and thus AIO DIO with RWF_[D]SYNC set will call fsync() from IRQ context resulting in sleep in atomic. Fix the problem by checking directly iocb flags (the same way as it is done in dio_complete()) instead of checking all conditions that could lead to IO being synchronous. CC: Christoph Hellwig CC: Goldwyn Rodrigues CC: stable@vger.kernel.org Reported-by: Mark Rutland Tested-by: Mark Rutland Fixes: 9830f4be159b29399d107bffb99e0132bc5aedd4 Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/direct-io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index a0ca9e4..1357ef5 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1274,8 +1274,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, */ if (dio->is_async && iov_iter_rw(iter) == WRITE) { retval = 0; - if ((iocb->ki_filp->f_flags & O_DSYNC) || - IS_SYNC(iocb->ki_filp->f_mapping->host)) + if (iocb->ki_flags & IOCB_DSYNC) retval = dio_set_defer_completion(dio); else if (!dio->inode->i_sb->s_dio_done_wq) { /* -- cgit v1.1 From 68d2059be660944152ba667e43c3b4ec225974bc Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 22 Feb 2018 17:22:59 +0000 Subject: xen/pvcalls: fix null pointer dereference on map->sock Currently if map is null then a potential null pointer deference occurs when calling sock_release on map->sock. I believe the actual intention was to call sock_release on sock instead. Fix this. Fixes: 5db4d286a8ef ("xen/pvcalls: implement connect command") Signed-off-by: Colin Ian King Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/xen/pvcalls-back.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 156e5ae..b1092fb 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -416,7 +416,7 @@ static int pvcalls_back_connect(struct xenbus_device *dev, sock); if (!map) { ret = -EFAULT; - sock_release(map->sock); + sock_release(sock); } out: -- cgit v1.1 From 7dbdd16a79a9d27d7dca0a49029fc8966dcfecc5 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 26 Feb 2018 11:30:40 -0500 Subject: media: vb2: Makefile: place vb2-trace together with vb2-core We don't want a separate module for vb2-trace. That fixes this warning: WARNING: modpost: missing MODULE_LICENSE() in drivers/media/common/videobuf2/vb2-trace.o When building as module. While here, add a SPDX header. Reported-by: Stephen Rothwell Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/videobuf2/Makefile | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/media/common/videobuf2/Makefile b/drivers/media/common/videobuf2/Makefile index 067badb..77bebe8b 100644 --- a/drivers/media/common/videobuf2/Makefile +++ b/drivers/media/common/videobuf2/Makefile @@ -1,11 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 +videobuf2-common-objs := videobuf2-core.o -obj-$(CONFIG_VIDEOBUF2_CORE) += videobuf2-core.o +ifeq ($(CONFIG_TRACEPOINTS),y) + videobuf2-common-objs += vb2-trace.o +endif + +obj-$(CONFIG_VIDEOBUF2_CORE) += videobuf2-common.o obj-$(CONFIG_VIDEOBUF2_V4L2) += videobuf2-v4l2.o obj-$(CONFIG_VIDEOBUF2_MEMOPS) += videobuf2-memops.o obj-$(CONFIG_VIDEOBUF2_VMALLOC) += videobuf2-vmalloc.o obj-$(CONFIG_VIDEOBUF2_DMA_CONTIG) += videobuf2-dma-contig.o obj-$(CONFIG_VIDEOBUF2_DMA_SG) += videobuf2-dma-sg.o obj-$(CONFIG_VIDEOBUF2_DVB) += videobuf2-dvb.o -ifeq ($(CONFIG_TRACEPOINTS),y) - obj-$(CONFIG_VIDEOBUF2_CORE) += vb2-trace.o -endif -- cgit v1.1 From ab4af60534107c55b00fa462eca0385dcef92384 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Tue, 20 Feb 2018 11:17:28 +0100 Subject: riscv/barrier: Define __smp_{mb,rmb,wmb} Introduce __smp_{mb,rmb,wmb}, and rely on the generic definitions for smp_{mb,rmb,wmb}. A first consequence is that smp_{mb,rmb,wmb} map to a compiler barrier on !SMP (while their definition remains unchanged on SMP). As a further consequence, smp_load_acquire and smp_store_release have "fence rw,rw" instead of "fence iorw,iorw". Signed-off-by: Andrea Parri Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/barrier.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index c0319cb..5510366 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -34,9 +34,9 @@ #define wmb() RISCV_FENCE(ow,ow) /* These barriers do not need to enforce ordering on devices, just memory. */ -#define smp_mb() RISCV_FENCE(rw,rw) -#define smp_rmb() RISCV_FENCE(r,r) -#define smp_wmb() RISCV_FENCE(w,w) +#define __smp_mb() RISCV_FENCE(rw,rw) +#define __smp_rmb() RISCV_FENCE(r,r) +#define __smp_wmb() RISCV_FENCE(w,w) /* * This is a very specific barrier: it's currently only used in two places in -- cgit v1.1 From d52987b524ccd2e2165ddad9bcc087a6c3f5332c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 26 Feb 2018 13:01:37 +0100 Subject: genhd: Fix leaked module reference for NVME devices Commit 8ddcd653257c "block: introduce GENHD_FL_HIDDEN" added handling of hidden devices to get_gendisk() but forgot to drop module reference which is also acquired by get_disk(). Drop the reference as necessary. Arguably the function naming here is misleading as put_disk() is *not* the counterpart of get_disk() but let's fix that in the follow up commit since that will be more intrusive. Fixes: 8ddcd653257c18a669fcb75ee42c37054908e0d6 CC: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- block/genhd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/genhd.c b/block/genhd.c index 88a53c1..5098bff 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -817,7 +817,10 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) } if (disk && unlikely(disk->flags & GENHD_FL_HIDDEN)) { + struct module *owner = disk->fops->owner; + put_disk(disk); + module_put(owner); disk = NULL; } return disk; -- cgit v1.1 From 3079c22ea815775837a4f389ce2f7e1e7b202e09 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 26 Feb 2018 13:01:38 +0100 Subject: genhd: Rename get_disk() to get_disk_and_module() Rename get_disk() to get_disk_and_module() to make sure what the function does. It's not a great name but at least it is now clear that put_disk() is not it's counterpart. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- block/genhd.c | 10 ++++------ drivers/block/amiflop.c | 2 +- drivers/block/ataflop.c | 2 +- drivers/block/brd.c | 2 +- drivers/block/floppy.c | 2 +- drivers/block/loop.c | 2 +- drivers/block/swim.c | 2 +- drivers/block/z2ram.c | 2 +- drivers/ide/ide-probe.c | 2 +- include/linux/genhd.h | 2 +- 10 files changed, 13 insertions(+), 15 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 5098bff..21b2843 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -547,7 +547,7 @@ static int exact_lock(dev_t devt, void *data) { struct gendisk *p = data; - if (!get_disk(p)) + if (!get_disk_and_module(p)) return -1; return 0; } @@ -809,7 +809,7 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) spin_lock_bh(&ext_devt_lock); part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); - if (part && get_disk(part_to_disk(part))) { + if (part && get_disk_and_module(part_to_disk(part))) { *partno = part->partno; disk = part_to_disk(part); } @@ -1456,7 +1456,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) } EXPORT_SYMBOL(__alloc_disk_node); -struct kobject *get_disk(struct gendisk *disk) +struct kobject *get_disk_and_module(struct gendisk *disk) { struct module *owner; struct kobject *kobj; @@ -1474,15 +1474,13 @@ struct kobject *get_disk(struct gendisk *disk) return kobj; } - -EXPORT_SYMBOL(get_disk); +EXPORT_SYMBOL(get_disk_and_module); void put_disk(struct gendisk *disk) { if (disk) kobject_put(&disk_to_dev(disk)->kobj); } - EXPORT_SYMBOL(put_disk); static void set_disk_ro_uevent(struct gendisk *gd, int ro) diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index e5aa62f..3aaf6af 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1758,7 +1758,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data) if (unit[drive].type->code == FD_NODRIVE) return NULL; *part = 0; - return get_disk(unit[drive].gendisk); + return get_disk_and_module(unit[drive].gendisk); } static int __init amiga_floppy_probe(struct platform_device *pdev) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 8bc3b9f..dfb2c26 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1917,7 +1917,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data) if (drive >= FD_MAX_UNITS || type > NUM_DISK_MINORS) return NULL; *part = 0; - return get_disk(unit[drive].disk); + return get_disk_and_module(unit[drive].disk); } static int __init atari_floppy_init (void) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 8028a3a..deea78e 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -456,7 +456,7 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data) mutex_lock(&brd_devices_mutex); brd = brd_init_one(MINOR(dev) / max_part, &new); - kobj = brd ? get_disk(brd->brd_disk) : NULL; + kobj = brd ? get_disk_and_module(brd->brd_disk) : NULL; mutex_unlock(&brd_devices_mutex); if (new) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index eae484a..8ec7235 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4505,7 +4505,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data) if (((*part >> 2) & 0x1f) >= ARRAY_SIZE(floppy_type)) return NULL; *part = 0; - return get_disk(disks[drive]); + return get_disk_and_module(disks[drive]); } static int __init do_floppy_init(void) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d5fe720..87855b5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1922,7 +1922,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) if (err < 0) kobj = NULL; else - kobj = get_disk(lo->lo_disk); + kobj = get_disk_and_module(lo->lo_disk); mutex_unlock(&loop_index_mutex); *part = 0; diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 84434d3..64e066e 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -799,7 +799,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data) return NULL; *part = 0; - return get_disk(swd->unit[drive].disk); + return get_disk_and_module(swd->unit[drive].disk); } static int swim_add_floppy(struct swim_priv *swd, enum drive_location location) diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 41c95c9..8f9130a 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -332,7 +332,7 @@ static const struct block_device_operations z2_fops = static struct kobject *z2_find(dev_t dev, int *part, void *data) { *part = 0; - return get_disk(z2ram_gendisk); + return get_disk_and_module(z2ram_gendisk); } static struct request_queue *z2_queue; diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 17fd55a..caa20eb 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -928,7 +928,7 @@ static int exact_lock(dev_t dev, void *data) { struct gendisk *p = data; - if (!get_disk(p)) + if (!get_disk_and_module(p)) return -1; return 0; } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 5e35310..8e11b93 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -600,7 +600,7 @@ extern void delete_partition(struct gendisk *, int); extern void printk_all_partitions(void); extern struct gendisk *__alloc_disk_node(int minors, int node_id); -extern struct kobject *get_disk(struct gendisk *disk); +extern struct kobject *get_disk_and_module(struct gendisk *disk); extern void put_disk(struct gendisk *disk); extern void blk_register_region(dev_t devt, unsigned long range, struct module *module, -- cgit v1.1 From 9df6c29912315186fef1c79cc15b758ace84175b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 26 Feb 2018 13:01:39 +0100 Subject: genhd: Add helper put_disk_and_module() Add a proper counterpart to get_disk_and_module() - put_disk_and_module(). Currently it is opencoded in several places. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 11 ++--------- block/genhd.c | 20 ++++++++++++++++---- fs/block_dev.c | 19 +++++-------------- include/linux/genhd.h | 1 + 4 files changed, 24 insertions(+), 27 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 4117524..c2033a2 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -812,7 +812,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, struct gendisk *disk; struct request_queue *q; struct blkcg_gq *blkg; - struct module *owner; unsigned int major, minor; int key_len, part, ret; char *body; @@ -904,9 +903,7 @@ fail_unlock: spin_unlock_irq(q->queue_lock); rcu_read_unlock(); fail: - owner = disk->fops->owner; - put_disk(disk); - module_put(owner); + put_disk_and_module(disk); /* * If queue was bypassing, we should retry. Do so after a * short msleep(). It isn't strictly necessary but queue @@ -931,13 +928,9 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep); void blkg_conf_finish(struct blkg_conf_ctx *ctx) __releases(ctx->disk->queue->queue_lock) __releases(rcu) { - struct module *owner; - spin_unlock_irq(ctx->disk->queue->queue_lock); rcu_read_unlock(); - owner = ctx->disk->fops->owner; - put_disk(ctx->disk); - module_put(owner); + put_disk_and_module(ctx->disk); } EXPORT_SYMBOL_GPL(blkg_conf_finish); diff --git a/block/genhd.c b/block/genhd.c index 21b2843..4c05904 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -817,10 +817,7 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) } if (disk && unlikely(disk->flags & GENHD_FL_HIDDEN)) { - struct module *owner = disk->fops->owner; - - put_disk(disk); - module_put(owner); + put_disk_and_module(disk); disk = NULL; } return disk; @@ -1483,6 +1480,21 @@ void put_disk(struct gendisk *disk) } EXPORT_SYMBOL(put_disk); +/* + * This is a counterpart of get_disk_and_module() and thus also of + * get_gendisk(). + */ +void put_disk_and_module(struct gendisk *disk) +{ + if (disk) { + struct module *owner = disk->fops->owner; + + put_disk(disk); + module_put(owner); + } +} +EXPORT_SYMBOL(put_disk_and_module); + static void set_disk_ro_uevent(struct gendisk *gd, int ro) { char event[] = "DISK_RO=1"; diff --git a/fs/block_dev.c b/fs/block_dev.c index 4a181fc..1dbbf84 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1111,8 +1111,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev, else whole = bdgrab(bdev); - module_put(disk->fops->owner); - put_disk(disk); + put_disk_and_module(disk); if (!whole) return ERR_PTR(-ENOMEM); @@ -1407,7 +1406,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) { struct gendisk *disk; - struct module *owner; int ret; int partno; int perm = 0; @@ -1433,7 +1431,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) disk = get_gendisk(bdev->bd_dev, &partno); if (!disk) goto out; - owner = disk->fops->owner; disk_block_events(disk); mutex_lock_nested(&bdev->bd_mutex, for_part); @@ -1463,8 +1460,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_queue = NULL; mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); - put_disk(disk); - module_put(owner); + put_disk_and_module(disk); goto restart; } } @@ -1525,8 +1521,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_unlock_bdev; } /* only one opener holds refs to the module and disk */ - put_disk(disk); - module_put(owner); + put_disk_and_module(disk); } bdev->bd_openers++; if (for_part) @@ -1546,8 +1541,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); - put_disk(disk); - module_put(owner); + put_disk_and_module(disk); out: bdput(bdev); @@ -1770,8 +1764,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) disk->fops->release(disk, mode); } if (!bdev->bd_openers) { - struct module *owner = disk->fops->owner; - disk_put_part(bdev->bd_part); bdev->bd_part = NULL; bdev->bd_disk = NULL; @@ -1779,8 +1771,7 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) victim = bdev->bd_contains; bdev->bd_contains = NULL; - put_disk(disk); - module_put(owner); + put_disk_and_module(disk); } mutex_unlock(&bdev->bd_mutex); bdput(bdev); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 8e11b93..7f5906f 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -602,6 +602,7 @@ extern void printk_all_partitions(void); extern struct gendisk *__alloc_disk_node(int minors, int node_id); extern struct kobject *get_disk_and_module(struct gendisk *disk); extern void put_disk(struct gendisk *disk); +extern void put_disk_and_module(struct gendisk *disk); extern void blk_register_region(dev_t devt, unsigned long range, struct module *module, struct kobject *(*probe)(dev_t, int *, void *), -- cgit v1.1 From 897366537fb65e87755b822360c230354c3fc73b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 26 Feb 2018 13:01:40 +0100 Subject: genhd: Fix use after free in __blkdev_get() When two blkdev_open() calls race with device removal and recreation, __blkdev_get() can use looked up gendisk after it is freed: CPU0 CPU1 CPU2 del_gendisk(disk); bdev_unhash_inode(inode); blkdev_open() blkdev_open() bdev = bd_acquire(inode); - creates and returns new inode bdev = bd_acquire(inode); - returns the same inode __blkdev_get(devt) __blkdev_get(devt) disk = get_gendisk(devt); - got structure of device going away disk = get_gendisk(devt); - got new device structure if (!bdev->bd_openers) { does the first open } if (!bdev->bd_openers) - false } else { put_disk_and_module(disk) - remember this was old device - this was last ref and disk is now freed } disk_unblock_events(disk); -> oops Fix the problem by making sure we drop reference to disk in __blkdev_get() only after we are really done with it. Reported-by: Hou Tao Tested-by: Hou Tao Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/block_dev.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 1dbbf84..fe41a76 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1409,6 +1409,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) int ret; int partno; int perm = 0; + bool first_open = false; if (mode & FMODE_READ) perm |= MAY_READ; @@ -1435,6 +1436,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) disk_block_events(disk); mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { + first_open = true; bdev->bd_disk = disk; bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; @@ -1520,14 +1522,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (ret) goto out_unlock_bdev; } - /* only one opener holds refs to the module and disk */ - put_disk_and_module(disk); } bdev->bd_openers++; if (for_part) bdev->bd_part_count++; mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); + /* only one opener holds refs to the module and disk */ + if (!first_open) + put_disk_and_module(disk); return 0; out_clear: -- cgit v1.1 From 56c0908c855afbb2bdda17c15d2879949a091ad3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 26 Feb 2018 13:01:41 +0100 Subject: genhd: Fix BUG in blkdev_open() When two blkdev_open() calls for a partition race with device removal and recreation, we can hit BUG_ON(!bd_may_claim(bdev, whole, holder)) in blkdev_open(). The race can happen as follows: CPU0 CPU1 CPU2 del_gendisk() bdev_unhash_inode(part1); blkdev_open(part1, O_EXCL) blkdev_open(part1, O_EXCL) bdev = bd_acquire() bdev = bd_acquire() blkdev_get(bdev) bd_start_claiming(bdev) - finds old inode 'whole' bd_prepare_to_claim() -> 0 bdev_unhash_inode(whole); blkdev_get(bdev); bd_start_claiming(bdev) - finds new inode 'whole' bd_prepare_to_claim() - this also succeeds as we have different 'whole' here... - bad things happen now as we have two exclusive openers of the same bdev The problem here is that block device opens can see various intermediate states while gendisk is shutting down and then being recreated. We fix the problem by introducing new lookup_sem in gendisk that synchronizes gendisk deletion with get_gendisk() and furthermore by making sure that get_gendisk() does not return gendisk that is being (or has been) deleted. This makes sure that once we ever manage to look up newly created bdev inode, we are also guaranteed that following get_gendisk() will either return failure (and we fail open) or it returns gendisk for the new device and following bdget_disk() will return new bdev inode (i.e., blkdev_open() follows the path as if it is completely run after new device is created). Reported-and-analyzed-by: Hou Tao Tested-by: Hou Tao Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- block/genhd.c | 21 ++++++++++++++++++++- include/linux/genhd.h | 1 + 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/block/genhd.c b/block/genhd.c index 4c05904..9656f9e 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -717,6 +717,11 @@ void del_gendisk(struct gendisk *disk) blk_integrity_del(disk); disk_del_events(disk); + /* + * Block lookups of the disk until all bdevs are unhashed and the + * disk is marked as dead (GENHD_FL_UP cleared). + */ + down_write(&disk->lookup_sem); /* invalidate stuff */ disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); @@ -731,6 +736,7 @@ void del_gendisk(struct gendisk *disk) bdev_unhash_inode(disk_devt(disk)); set_capacity(disk, 0); disk->flags &= ~GENHD_FL_UP; + up_write(&disk->lookup_sem); if (!(disk->flags & GENHD_FL_HIDDEN)) sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); @@ -816,9 +822,21 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) spin_unlock_bh(&ext_devt_lock); } - if (disk && unlikely(disk->flags & GENHD_FL_HIDDEN)) { + if (!disk) + return NULL; + + /* + * Synchronize with del_gendisk() to not return disk that is being + * destroyed. + */ + down_read(&disk->lookup_sem); + if (unlikely((disk->flags & GENHD_FL_HIDDEN) || + !(disk->flags & GENHD_FL_UP))) { + up_read(&disk->lookup_sem); put_disk_and_module(disk); disk = NULL; + } else { + up_read(&disk->lookup_sem); } return disk; } @@ -1418,6 +1436,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) kfree(disk); return NULL; } + init_rwsem(&disk->lookup_sem); disk->node_id = node_id; if (disk_expand_part_tbl(disk, 0)) { free_part_stats(&disk->part0); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 7f5906f..c826b0b 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -198,6 +198,7 @@ struct gendisk { void *private_data; int flags; + struct rw_semaphore lookup_sem; struct kobject *slave_dir; struct timer_rand_state *random; -- cgit v1.1 From 560e7cb2f3c7f09bbfb36cd0b900e24fddd20282 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 26 Feb 2018 13:01:42 +0100 Subject: blockdev: Avoid two active bdev inodes for one device When blkdev_open() races with device removal and creation it can happen that unhashed bdev inode gets associated with newly created gendisk like: CPU0 CPU1 blkdev_open() bdev = bd_acquire() del_gendisk() bdev_unhash_inode(bdev); remove device create new device with the same number __blkdev_get() disk = get_gendisk() - gets reference to gendisk of the new device Now another blkdev_open() will not find original 'bdev' as it got unhashed, create a new one and associate it with the same 'disk' at which point problems start as we have two independent page caches for one device. Fix the problem by verifying that the bdev inode didn't get unhashed before we acquired gendisk reference. That way we make sure gendisk can get associated only with visible bdev inodes. Tested-by: Hou Tao Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/block_dev.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index fe41a76..fe09ef9 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1058,6 +1058,27 @@ retry: return 0; } +static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno) +{ + struct gendisk *disk = get_gendisk(bdev->bd_dev, partno); + + if (!disk) + return NULL; + /* + * Now that we hold gendisk reference we make sure bdev we looked up is + * not stale. If it is, it means device got removed and created before + * we looked up gendisk and we fail open in such case. Associating + * unhashed bdev with newly created gendisk could lead to two bdevs + * (and thus two independent caches) being associated with one device + * which is bad. + */ + if (inode_unhashed(bdev->bd_inode)) { + put_disk_and_module(disk); + return NULL; + } + return disk; +} + /** * bd_start_claiming - start claiming a block device * @bdev: block device of interest @@ -1094,7 +1115,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev, * @bdev might not have been initialized properly yet, look up * and grab the outer block device the hard way. */ - disk = get_gendisk(bdev->bd_dev, &partno); + disk = bdev_get_gendisk(bdev, &partno); if (!disk) return ERR_PTR(-ENXIO); @@ -1429,7 +1450,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) restart: ret = -ENXIO; - disk = get_gendisk(bdev->bd_dev, &partno); + disk = bdev_get_gendisk(bdev, &partno); if (!disk) goto out; -- cgit v1.1 From 76a6abdb2513ad4ea0ded55d2c66160491f2e848 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Fri, 23 Feb 2018 17:45:43 +0000 Subject: l2tp: don't use inet_shutdown on tunnel destroy Previously, if a tunnel was closed, we called inet_shutdown to mark the socket as unconnected such that userspace would get errors and then close the socket. This could race with userspace closing the socket. Instead, leave userspace to close the socket in its own time (our tunnel will be detached anyway). BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0 IP: __lock_acquire+0x263/0x1630 PGD 0 P4D 0 Oops: 0000 [#1] SMP KASAN Modules linked in: CPU: 2 PID: 42 Comm: kworker/u8:2 Not tainted 4.15.0-rc7+ #129 Workqueue: l2tp l2tp_tunnel_del_work RIP: 0010:__lock_acquire+0x263/0x1630 RSP: 0018:ffff88001a37fc70 EFLAGS: 00010002 RAX: 0000000000000001 RBX: 0000000000000088 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff88001a37fd18 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 00000000000076fd R12: 00000000000000a0 R13: ffff88001a3722c0 R14: 0000000000000001 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88001ad00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000a0 CR3: 000000001730b000 CR4: 00000000000006e0 Call Trace: ? __lock_acquire+0xc77/0x1630 ? console_trylock+0x11/0xa0 lock_acquire+0x117/0x230 ? lock_sock_nested+0x3a/0xa0 _raw_spin_lock_bh+0x3a/0x50 ? lock_sock_nested+0x3a/0xa0 lock_sock_nested+0x3a/0xa0 inet_shutdown+0x33/0xf0 l2tp_tunnel_del_work+0x60/0xef process_one_work+0x1ea/0x5f0 ? process_one_work+0x162/0x5f0 worker_thread+0x48/0x3e0 ? trace_hardirqs_on+0xd/0x10 kthread+0x108/0x140 ? process_one_work+0x5f0/0x5f0 ? kthread_stop+0x2a0/0x2a0 ret_from_fork+0x24/0x30 Code: 00 41 81 ff ff 1f 00 00 0f 87 7a 13 00 00 45 85 f6 49 8b 85 68 08 00 00 0f 84 ae 03 00 00 c7 44 24 18 00 00 00 00 e9 f0 00 00 00 <49> 81 3c 24 80 93 3f 83 b8 00 00 00 00 44 0f 44 c0 83 fe 01 0f RIP: __lock_acquire+0x263/0x1630 RSP: ffff88001a37fc70 CR2: 00000000000000a0 Fixes: 309795f4bec2d ("l2tp: Add netlink control API for L2TP") Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 194a748..9cd2a99 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1327,17 +1327,10 @@ static void l2tp_tunnel_del_work(struct work_struct *work) sock = sk->sk_socket; - /* If the tunnel socket was created by userspace, then go through the - * inet layer to shut the socket down, and let userspace close it. - * Otherwise, if we created the socket directly within the kernel, use + /* If the tunnel socket was created within the kernel, use * the sk API to release it here. - * In either case the tunnel resources are freed in the socket - * destructor when the tunnel socket goes away. */ - if (tunnel->fd >= 0) { - if (sock) - inet_shutdown(sock, 2); - } else { + if (tunnel->fd < 0) { if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); sock_release(sock); -- cgit v1.1 From 225eb26489d05c679a4c4197ffcb81c81e9dcaf4 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Fri, 23 Feb 2018 17:45:44 +0000 Subject: l2tp: don't use inet_shutdown on ppp session destroy Previously, if a ppp session was closed, we called inet_shutdown to mark the socket as unconnected such that userspace would get errors and then close the socket. This could race with userspace closing the socket. Instead, leave userspace to close the socket in its own time (our session will be detached anyway). BUG: KASAN: use-after-free in inet_shutdown+0x5d/0x1c0 Read of size 4 at addr ffff880010ea3ac0 by task syzbot_347bd5ac/8296 CPU: 3 PID: 8296 Comm: syzbot_347bd5ac Not tainted 4.16.0-rc1+ #91 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 Call Trace: dump_stack+0x101/0x157 ? inet_shutdown+0x5d/0x1c0 print_address_description+0x78/0x260 ? inet_shutdown+0x5d/0x1c0 kasan_report+0x240/0x360 __asan_load4+0x78/0x80 inet_shutdown+0x5d/0x1c0 ? pppol2tp_show+0x80/0x80 pppol2tp_session_close+0x68/0xb0 l2tp_tunnel_closeall+0x199/0x210 ? udp_v6_flush_pending_frames+0x90/0x90 l2tp_udp_encap_destroy+0x6b/0xc0 ? l2tp_tunnel_del_work+0x2e0/0x2e0 udpv6_destroy_sock+0x8c/0x90 sk_common_release+0x47/0x190 udp_lib_close+0x15/0x20 inet_release+0x85/0xd0 inet6_release+0x43/0x60 sock_release+0x53/0x100 ? sock_alloc_file+0x260/0x260 sock_close+0x1b/0x20 __fput+0x19f/0x380 ____fput+0x1a/0x20 task_work_run+0xd2/0x110 exit_to_usermode_loop+0x18d/0x190 do_syscall_64+0x389/0x3b0 entry_SYSCALL_64_after_hwframe+0x26/0x9b RIP: 0033:0x7fe240a45259 RSP: 002b:00007fe241132df8 EFLAGS: 00000297 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00007fe240a45259 RDX: 00007fe240a45259 RSI: 0000000000000000 RDI: 00000000000000a5 RBP: 00007fe241132e20 R08: 00007fe241133700 R09: 0000000000000000 R10: 00007fe241133700 R11: 0000000000000297 R12: 0000000000000000 R13: 00007ffc49aff84f R14: 0000000000000000 R15: 00007fe241141040 Allocated by task 8331: save_stack+0x43/0xd0 kasan_kmalloc+0xad/0xe0 kasan_slab_alloc+0x12/0x20 kmem_cache_alloc+0x144/0x3e0 sock_alloc_inode+0x22/0x130 alloc_inode+0x3d/0xf0 new_inode_pseudo+0x1c/0x90 sock_alloc+0x30/0x110 __sock_create+0xaa/0x4c0 SyS_socket+0xbe/0x130 do_syscall_64+0x128/0x3b0 entry_SYSCALL_64_after_hwframe+0x26/0x9b Freed by task 8314: save_stack+0x43/0xd0 __kasan_slab_free+0x11a/0x170 kasan_slab_free+0xe/0x10 kmem_cache_free+0x88/0x2b0 sock_destroy_inode+0x49/0x50 destroy_inode+0x77/0xb0 evict+0x285/0x340 iput+0x429/0x530 dentry_unlink_inode+0x28c/0x2c0 __dentry_kill+0x1e3/0x2f0 dput.part.21+0x500/0x560 dput+0x24/0x30 __fput+0x2aa/0x380 ____fput+0x1a/0x20 task_work_run+0xd2/0x110 exit_to_usermode_loop+0x18d/0x190 do_syscall_64+0x389/0x3b0 entry_SYSCALL_64_after_hwframe+0x26/0x9b Fixes: fd558d186df2c ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 59f246d..2d2955e 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -420,16 +420,6 @@ abort: */ static void pppol2tp_session_close(struct l2tp_session *session) { - struct sock *sk; - - BUG_ON(session->magic != L2TP_SESSION_MAGIC); - - sk = pppol2tp_session_get_sock(session); - if (sk) { - if (sk->sk_socket) - inet_shutdown(sk->sk_socket, SEND_SHUTDOWN); - sock_put(sk); - } } /* Really kill the session socket. (Called from sock_put() if -- cgit v1.1 From d00fa9adc528c1b0e64d532556764852df8bd7b9 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Fri, 23 Feb 2018 17:45:45 +0000 Subject: l2tp: fix races with tunnel socket close The tunnel socket tunnel->sock (struct sock) is accessed when preparing a new ppp session on a tunnel at pppol2tp_session_init. If the socket is closed by a thread while another is creating a new session, the threads race. In pppol2tp_connect, the tunnel object may be created if the pppol2tp socket is associated with the special session_id 0 and the tunnel socket is looked up using the provided fd. When handling this, pppol2tp_connect cannot sock_hold the tunnel socket to prevent it being destroyed during pppol2tp_connect since this may itself may race with the socket being destroyed. Doing sockfd_lookup in pppol2tp_connect isn't sufficient to prevent tunnel->sock going away either because a given tunnel socket fd may be reused between calls to pppol2tp_connect. Instead, have l2tp_tunnel_create sock_hold the tunnel socket before it does sockfd_put. This ensures that the tunnel's socket is always extant while the tunnel object exists. Hold a ref on the socket until the tunnel is destroyed and ensure that all tunnel destroy paths go through a common function (l2tp_tunnel_delete) since this will do the final sock_put to release the tunnel socket. Since the tunnel's socket is now guaranteed to exist if the tunnel exists, we no longer need to use sockfd_lookup via l2tp_sock_to_tunnel to derive the tunnel from the socket since this is always sk_user_data. Also, sessions no longer sock_hold the tunnel socket since sessions already hold a tunnel ref and the tunnel sock will not be freed until the tunnel is freed. Removing these sock_holds in l2tp_session_register avoids a possible sock leak in the pppol2tp_connect error path if l2tp_session_register succeeds but attaching a ppp channel fails. The pppol2tp_connect error path could have been fixed instead and have the sock ref dropped when the session is freed, but doing a sock_put of the tunnel socket when the session is freed would require a new session_free callback. It is simpler to just remove the sock_hold of the tunnel socket in l2tp_session_register, now that the tunnel socket lifetime is guaranteed. Finally, some init code in l2tp_tunnel_create is reordered to ensure that the new tunnel object's refcount is set and the tunnel socket ref is taken before the tunnel socket destructor callbacks are set. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Modules linked in: CPU: 0 PID: 4360 Comm: syzbot_19c09769 Not tainted 4.16.0-rc2+ #34 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 RIP: 0010:pppol2tp_session_init+0x1d6/0x500 RSP: 0018:ffff88001377fb40 EFLAGS: 00010212 RAX: dffffc0000000000 RBX: ffff88001636a940 RCX: ffffffff84836c1d RDX: 0000000000000045 RSI: 0000000055976744 RDI: 0000000000000228 RBP: ffff88001377fb60 R08: ffffffff84836bc8 R09: 0000000000000002 R10: ffff88001377fab8 R11: 0000000000000001 R12: 0000000000000000 R13: ffff88001636aac8 R14: ffff8800160f81c0 R15: 1ffff100026eff76 FS: 00007ffb3ea66700(0000) GS:ffff88001a400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020e77000 CR3: 0000000016261000 CR4: 00000000000006f0 Call Trace: pppol2tp_connect+0xd18/0x13c0 ? pppol2tp_session_create+0x170/0x170 ? __might_fault+0x115/0x1d0 ? lock_downgrade+0x860/0x860 ? __might_fault+0xe5/0x1d0 ? security_socket_connect+0x8e/0xc0 SYSC_connect+0x1b6/0x310 ? SYSC_bind+0x280/0x280 ? __do_page_fault+0x5d1/0xca0 ? up_read+0x1f/0x40 ? __do_page_fault+0x3c8/0xca0 SyS_connect+0x29/0x30 ? SyS_accept+0x40/0x40 do_syscall_64+0x1e0/0x730 ? trace_hardirqs_off_thunk+0x1a/0x1c entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x7ffb3e376259 RSP: 002b:00007ffeda4f6508 EFLAGS: 00000202 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000020e77012 RCX: 00007ffb3e376259 RDX: 000000000000002e RSI: 0000000020e77000 RDI: 0000000000000004 RBP: 00007ffeda4f6540 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000400b60 R13: 00007ffeda4f6660 R14: 0000000000000000 R15: 0000000000000000 Code: 80 3d b0 ff 06 02 00 0f 84 07 02 00 00 e8 13 d6 db fc 49 8d bc 24 28 02 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 f a 48 c1 ea 03 <80> 3c 02 00 0f 85 ed 02 00 00 4d 8b a4 24 28 02 00 00 e8 13 16 Fixes: 80d84ef3ff1dd ("l2tp: prevent l2tp_tunnel_delete racing with userspace close") Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 117 +++++++++++++++------------------------------------ net/l2tp/l2tp_core.h | 23 +--------- net/l2tp/l2tp_ip.c | 10 ++--- net/l2tp/l2tp_ip6.c | 8 ++-- 4 files changed, 42 insertions(+), 116 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 9cd2a99..0fa53ea 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -136,51 +136,6 @@ l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id) } -/* Lookup the tunnel socket, possibly involving the fs code if the socket is - * owned by userspace. A struct sock returned from this function must be - * released using l2tp_tunnel_sock_put once you're done with it. - */ -static struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel) -{ - int err = 0; - struct socket *sock = NULL; - struct sock *sk = NULL; - - if (!tunnel) - goto out; - - if (tunnel->fd >= 0) { - /* Socket is owned by userspace, who might be in the process - * of closing it. Look the socket up using the fd to ensure - * consistency. - */ - sock = sockfd_lookup(tunnel->fd, &err); - if (sock) - sk = sock->sk; - } else { - /* Socket is owned by kernelspace */ - sk = tunnel->sock; - sock_hold(sk); - } - -out: - return sk; -} - -/* Drop a reference to a tunnel socket obtained via. l2tp_tunnel_sock_put */ -static void l2tp_tunnel_sock_put(struct sock *sk) -{ - struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); - if (tunnel) { - if (tunnel->fd >= 0) { - /* Socket is owned by userspace */ - sockfd_put(sk->sk_socket); - } - sock_put(sk); - } - sock_put(sk); -} - /* Session hash list. * The session_id SHOULD be random according to RFC2661, but several * L2TP implementations (Cisco and Microsoft) use incrementing @@ -193,6 +148,13 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id) return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)]; } +void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) +{ + sock_put(tunnel->sock); + /* the tunnel is freed in the socket destructor */ +} +EXPORT_SYMBOL(l2tp_tunnel_free); + /* Lookup a tunnel. A new reference is held on the returned tunnel. */ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) { @@ -345,13 +307,11 @@ int l2tp_session_register(struct l2tp_session *session, } l2tp_tunnel_inc_refcount(tunnel); - sock_hold(tunnel->sock); hlist_add_head_rcu(&session->global_hlist, g_head); spin_unlock_bh(&pn->l2tp_session_hlist_lock); } else { l2tp_tunnel_inc_refcount(tunnel); - sock_hold(tunnel->sock); } hlist_add_head(&session->hlist, head); @@ -969,7 +929,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct l2tp_tunnel *tunnel; - tunnel = l2tp_sock_to_tunnel(sk); + tunnel = l2tp_tunnel(sk); if (tunnel == NULL) goto pass_up; @@ -977,13 +937,10 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) tunnel->name, skb->len); if (l2tp_udp_recv_core(tunnel, skb, tunnel->recv_payload_hook)) - goto pass_up_put; + goto pass_up; - sock_put(sk); return 0; -pass_up_put: - sock_put(sk); pass_up: return 1; } @@ -1214,7 +1171,6 @@ static void l2tp_tunnel_destruct(struct sock *sk) l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name); - /* Disable udp encapsulation */ switch (tunnel->encap) { case L2TP_ENCAPTYPE_UDP: @@ -1237,12 +1193,11 @@ static void l2tp_tunnel_destruct(struct sock *sk) list_del_rcu(&tunnel->list); spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - tunnel->sock = NULL; - l2tp_tunnel_dec_refcount(tunnel); - /* Call the original destructor */ if (sk->sk_destruct) (*sk->sk_destruct)(sk); + + kfree_rcu(tunnel, rcu); end: return; } @@ -1303,30 +1258,22 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall); /* Tunnel socket destroy hook for UDP encapsulation */ static void l2tp_udp_encap_destroy(struct sock *sk) { - struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); - if (tunnel) { - l2tp_tunnel_closeall(tunnel); - sock_put(sk); - } + struct l2tp_tunnel *tunnel = l2tp_tunnel(sk); + + if (tunnel) + l2tp_tunnel_delete(tunnel); } /* Workqueue tunnel deletion function */ static void l2tp_tunnel_del_work(struct work_struct *work) { - struct l2tp_tunnel *tunnel = NULL; - struct socket *sock = NULL; - struct sock *sk = NULL; - - tunnel = container_of(work, struct l2tp_tunnel, del_work); + struct l2tp_tunnel *tunnel = container_of(work, struct l2tp_tunnel, + del_work); + struct sock *sk = tunnel->sock; + struct socket *sock = sk->sk_socket; l2tp_tunnel_closeall(tunnel); - sk = l2tp_tunnel_sock_lookup(tunnel); - if (!sk) - goto out; - - sock = sk->sk_socket; - /* If the tunnel socket was created within the kernel, use * the sk API to release it here. */ @@ -1337,8 +1284,10 @@ static void l2tp_tunnel_del_work(struct work_struct *work) } } - l2tp_tunnel_sock_put(sk); -out: + /* drop initial ref */ + l2tp_tunnel_dec_refcount(tunnel); + + /* drop workqueue ref */ l2tp_tunnel_dec_refcount(tunnel); } @@ -1591,13 +1540,22 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 sk->sk_user_data = tunnel; } + /* Bump the reference count. The tunnel context is deleted + * only when this drops to zero. A reference is also held on + * the tunnel socket to ensure that it is not released while + * the tunnel is extant. Must be done before sk_destruct is + * set. + */ + refcount_set(&tunnel->ref_count, 1); + sock_hold(sk); + tunnel->sock = sk; + tunnel->fd = fd; + /* Hook on the tunnel socket destructor so that we can cleanup * if the tunnel socket goes away. */ tunnel->old_sk_destruct = sk->sk_destruct; sk->sk_destruct = &l2tp_tunnel_destruct; - tunnel->sock = sk; - tunnel->fd = fd; lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock"); sk->sk_allocation = GFP_ATOMIC; @@ -1607,11 +1565,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Add tunnel to our list */ INIT_LIST_HEAD(&tunnel->list); - - /* Bump the reference count. The tunnel context is deleted - * only when this drops to zero. Must be done before list insertion - */ - refcount_set(&tunnel->ref_count, 1); spin_lock_bh(&pn->l2tp_tunnel_list_lock); list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list); spin_unlock_bh(&pn->l2tp_tunnel_list_lock); @@ -1652,8 +1605,6 @@ void l2tp_session_free(struct l2tp_session *session) if (tunnel) { BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); - sock_put(tunnel->sock); - session->tunnel = NULL; l2tp_tunnel_dec_refcount(tunnel); } diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 9bbee90..a1aa955 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -214,27 +214,8 @@ static inline void *l2tp_session_priv(struct l2tp_session *session) return &session->priv[0]; } -static inline struct l2tp_tunnel *l2tp_sock_to_tunnel(struct sock *sk) -{ - struct l2tp_tunnel *tunnel; - - if (sk == NULL) - return NULL; - - sock_hold(sk); - tunnel = (struct l2tp_tunnel *)(sk->sk_user_data); - if (tunnel == NULL) { - sock_put(sk); - goto out; - } - - BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); - -out: - return tunnel; -} - struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id); +void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); struct l2tp_session *l2tp_session_get(const struct net *net, struct l2tp_tunnel *tunnel, @@ -283,7 +264,7 @@ static inline void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel) static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel) { if (refcount_dec_and_test(&tunnel->ref_count)) - kfree_rcu(tunnel, rcu); + l2tp_tunnel_free(tunnel); } /* Session reference counts. Incremented when code obtains a reference diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index ff61124..3428fba 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -234,17 +234,13 @@ static void l2tp_ip_close(struct sock *sk, long timeout) static void l2tp_ip_destroy_sock(struct sock *sk) { struct sk_buff *skb; - struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + struct l2tp_tunnel *tunnel = sk->sk_user_data; while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) kfree_skb(skb); - if (tunnel) { - l2tp_tunnel_closeall(tunnel); - sock_put(sk); - } - - sk_refcnt_debug_dec(sk); + if (tunnel) + l2tp_tunnel_delete(tunnel); } static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 1923446..6f009ea 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -248,16 +248,14 @@ static void l2tp_ip6_close(struct sock *sk, long timeout) static void l2tp_ip6_destroy_sock(struct sock *sk) { - struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk); + struct l2tp_tunnel *tunnel = sk->sk_user_data; lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); - if (tunnel) { - l2tp_tunnel_closeall(tunnel); - sock_put(sk); - } + if (tunnel) + l2tp_tunnel_delete(tunnel); inet6_destroy_sock(sk); } -- cgit v1.1 From d02ba2a6110c530a32926af8ad441111774d2893 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Fri, 23 Feb 2018 17:45:46 +0000 Subject: l2tp: fix race in pppol2tp_release with session object destroy pppol2tp_release uses call_rcu to put the final ref on its socket. But the session object doesn't hold a ref on the session socket so may be freed while the pppol2tp_put_sk RCU callback is scheduled. Fix this by having the session hold a ref on its socket until the session is destroyed. It is this ref that is dropped via call_rcu. Sessions are also deleted via l2tp_tunnel_closeall. This must now also put the final ref via call_rcu. So move the call_rcu call site into pppol2tp_session_close so that this happens in both destroy paths. A common destroy path should really be implemented, perhaps with l2tp_tunnel_closeall calling l2tp_session_delete like pppol2tp_release does, but this will be looked at later. ODEBUG: activate active (active state 1) object type: rcu_head hint: (null) WARNING: CPU: 3 PID: 13407 at lib/debugobjects.c:291 debug_print_object+0x166/0x220 Modules linked in: CPU: 3 PID: 13407 Comm: syzbot_19c09769 Not tainted 4.16.0-rc2+ #38 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 RIP: 0010:debug_print_object+0x166/0x220 RSP: 0018:ffff880013647a00 EFLAGS: 00010082 RAX: dffffc0000000008 RBX: 0000000000000003 RCX: ffffffff814d3333 RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff88001a59f6d0 RBP: ffff880013647a40 R08: 0000000000000000 R09: 0000000000000001 R10: ffff8800136479a8 R11: 0000000000000000 R12: 0000000000000001 R13: ffffffff86161420 R14: ffffffff85648b60 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88001a580000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020e77000 CR3: 0000000006022000 CR4: 00000000000006e0 Call Trace: debug_object_activate+0x38b/0x530 ? debug_object_assert_init+0x3b0/0x3b0 ? __mutex_unlock_slowpath+0x85/0x8b0 ? pppol2tp_session_destruct+0x110/0x110 __call_rcu.constprop.66+0x39/0x890 ? __call_rcu.constprop.66+0x39/0x890 call_rcu_sched+0x17/0x20 pppol2tp_release+0x2c7/0x440 ? fcntl_setlk+0xca0/0xca0 ? sock_alloc_file+0x340/0x340 sock_release+0x92/0x1e0 sock_close+0x1b/0x20 __fput+0x296/0x6e0 ____fput+0x1a/0x20 task_work_run+0x127/0x1a0 do_exit+0x7f9/0x2ce0 ? SYSC_connect+0x212/0x310 ? mm_update_next_owner+0x690/0x690 ? up_read+0x1f/0x40 ? __do_page_fault+0x3c8/0xca0 do_group_exit+0x10d/0x330 ? do_group_exit+0x330/0x330 SyS_exit_group+0x22/0x30 do_syscall_64+0x1e0/0x730 ? trace_hardirqs_off_thunk+0x1a/0x1c entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x7f362e471259 RSP: 002b:00007ffe389abe08 EFLAGS: 00000202 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f362e471259 RDX: 00007f362e471259 RSI: 000000000000002e RDI: 0000000000000000 RBP: 00007ffe389abe30 R08: 0000000000000000 R09: 00007f362e944270 R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000400b60 R13: 00007ffe389abf50 R14: 0000000000000000 R15: 0000000000000000 Code: 8d 3c dd a0 8f 64 85 48 89 fa 48 c1 ea 03 80 3c 02 00 75 7b 48 8b 14 dd a0 8f 64 85 4c 89 f6 48 c7 c7 20 85 64 85 e 8 2a 55 14 ff <0f> 0b 83 05 ad 2a 68 04 01 48 83 c4 18 5b 41 5c 41 5d 41 5e 41 Fixes: ee40fb2e1eb5b ("l2tp: protect sock pointer of struct pppol2tp_session with RCU") Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 52 +++++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 2d2955e..3b02f24 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -416,10 +416,28 @@ abort: * Session (and tunnel control) socket create/destroy. *****************************************************************************/ +static void pppol2tp_put_sk(struct rcu_head *head) +{ + struct pppol2tp_session *ps; + + ps = container_of(head, typeof(*ps), rcu); + sock_put(ps->__sk); +} + /* Called by l2tp_core when a session socket is being closed. */ static void pppol2tp_session_close(struct l2tp_session *session) { + struct pppol2tp_session *ps; + + ps = l2tp_session_priv(session); + mutex_lock(&ps->sk_lock); + ps->__sk = rcu_dereference_protected(ps->sk, + lockdep_is_held(&ps->sk_lock)); + RCU_INIT_POINTER(ps->sk, NULL); + if (ps->__sk) + call_rcu(&ps->rcu, pppol2tp_put_sk); + mutex_unlock(&ps->sk_lock); } /* Really kill the session socket. (Called from sock_put() if @@ -439,14 +457,6 @@ static void pppol2tp_session_destruct(struct sock *sk) } } -static void pppol2tp_put_sk(struct rcu_head *head) -{ - struct pppol2tp_session *ps; - - ps = container_of(head, typeof(*ps), rcu); - sock_put(ps->__sk); -} - /* Called when the PPPoX socket (session) is closed. */ static int pppol2tp_release(struct socket *sock) @@ -470,26 +480,17 @@ static int pppol2tp_release(struct socket *sock) sock_orphan(sk); sock->sk = NULL; + /* If the socket is associated with a session, + * l2tp_session_delete will call pppol2tp_session_close which + * will drop the session's ref on the socket. + */ session = pppol2tp_sock_to_session(sk); - - if (session != NULL) { - struct pppol2tp_session *ps; - + if (session) { l2tp_session_delete(session); - - ps = l2tp_session_priv(session); - mutex_lock(&ps->sk_lock); - ps->__sk = rcu_dereference_protected(ps->sk, - lockdep_is_held(&ps->sk_lock)); - RCU_INIT_POINTER(ps->sk, NULL); - mutex_unlock(&ps->sk_lock); - call_rcu(&ps->rcu, pppol2tp_put_sk); - - /* Rely on the sock_put() call at the end of the function for - * dropping the reference held by pppol2tp_sock_to_session(). - * The last reference will be dropped by pppol2tp_put_sk(). - */ + /* drop the ref obtained by pppol2tp_sock_to_session */ + sock_put(sk); } + release_sock(sk); /* This will delete the session context via @@ -786,6 +787,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, out_no_ppp: /* This is how we get the session context from the socket. */ + sock_hold(sk); sk->sk_user_data = session; rcu_assign_pointer(ps->sk, sk); mutex_unlock(&ps->sk_lock); -- cgit v1.1 From 28f5bfb819195ad9c2eb9486babe7b0e4efe925f Mon Sep 17 00:00:00 2001 From: James Chapman Date: Fri, 23 Feb 2018 17:45:47 +0000 Subject: l2tp: fix tunnel lookup use-after-free race l2tp_tunnel_get walks the tunnel list to find a matching tunnel instance and if a match is found, its refcount is increased before returning the tunnel pointer. But when tunnel objects are destroyed, they are on the tunnel list after their refcount hits zero. Fix this by moving the code that removes the tunnel from the tunnel list from the tunnel socket destructor into in the l2tp_tunnel_delete path, before the tunnel refcount is decremented. refcount_t: increment on 0; use-after-free. WARNING: CPU: 3 PID: 13507 at lib/refcount.c:153 refcount_inc+0x47/0x50 Modules linked in: CPU: 3 PID: 13507 Comm: syzbot_6e6a5ec8 Not tainted 4.16.0-rc2+ #36 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 RIP: 0010:refcount_inc+0x47/0x50 RSP: 0018:ffff8800136ffb20 EFLAGS: 00010286 RAX: dffffc0000000008 RBX: ffff880017068e68 RCX: ffffffff814d3333 RDX: 0000000000000000 RSI: ffff88001a59f6d8 RDI: ffff88001a59f6d8 RBP: ffff8800136ffb28 R08: 0000000000000000 R09: 0000000000000000 R10: ffff8800136ffab0 R11: 0000000000000000 R12: ffff880017068e50 R13: 0000000000000000 R14: ffff8800174da800 R15: 0000000000000004 FS: 00007f403ab1e700(0000) GS:ffff88001a580000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000205fafd2 CR3: 0000000016770000 CR4: 00000000000006e0 Call Trace: l2tp_tunnel_get+0x2dd/0x4e0 pppol2tp_connect+0x428/0x13c0 ? pppol2tp_session_create+0x170/0x170 ? __might_fault+0x115/0x1d0 ? lock_downgrade+0x860/0x860 ? __might_fault+0xe5/0x1d0 ? security_socket_connect+0x8e/0xc0 SYSC_connect+0x1b6/0x310 ? SYSC_bind+0x280/0x280 ? __do_page_fault+0x5d1/0xca0 ? up_read+0x1f/0x40 ? __do_page_fault+0x3c8/0xca0 SyS_connect+0x29/0x30 ? SyS_accept+0x40/0x40 do_syscall_64+0x1e0/0x730 ? trace_hardirqs_off_thunk+0x1a/0x1c entry_SYSCALL_64_after_hwframe+0x42/0xb7 RIP: 0033:0x7f403a42f259 RSP: 002b:00007f403ab1dee8 EFLAGS: 00000296 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 00000000205fafe4 RCX: 00007f403a42f259 RDX: 000000000000002e RSI: 00000000205fafd2 RDI: 0000000000000004 RBP: 00007f403ab1df20 R08: 00007f403ab1e700 R09: 0000000000000000 R10: 00007f403ab1e700 R11: 0000000000000296 R12: 0000000000000000 R13: 00007ffc81906cbf R14: 0000000000000000 R15: 00007f403ab2b040 Code: 3b ff 5b 5d c3 e8 ca 5f 3b ff 80 3d 49 8e 66 04 00 75 ea e8 bc 5f 3b ff 48 c7 c7 60 69 64 85 c6 05 34 8e 66 04 01 e8 59 49 15 ff <0f> 0b eb ce 0f 1f 44 00 00 55 48 89 e5 41 56 41 55 41 54 53 49 Fixes: f8ccac0e44934 ("l2tp: put tunnel socket release on a workqueue") Reported-and-tested-by: syzbot+19c09769f14b48810113@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+347bd5acde002e353a36@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+6e6a5ec8de31a94cd015@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+9df43faf09bd400f2993@syzkaller.appspotmail.com Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 0fa53ea..83421c6 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1164,7 +1164,6 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb); static void l2tp_tunnel_destruct(struct sock *sk) { struct l2tp_tunnel *tunnel = l2tp_tunnel(sk); - struct l2tp_net *pn; if (tunnel == NULL) goto end; @@ -1187,12 +1186,6 @@ static void l2tp_tunnel_destruct(struct sock *sk) sk->sk_destruct = tunnel->old_sk_destruct; sk->sk_user_data = NULL; - /* Remove the tunnel struct from the tunnel list */ - pn = l2tp_pernet(tunnel->l2tp_net); - spin_lock_bh(&pn->l2tp_tunnel_list_lock); - list_del_rcu(&tunnel->list); - spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - /* Call the original destructor */ if (sk->sk_destruct) (*sk->sk_destruct)(sk); @@ -1271,6 +1264,7 @@ static void l2tp_tunnel_del_work(struct work_struct *work) del_work); struct sock *sk = tunnel->sock; struct socket *sock = sk->sk_socket; + struct l2tp_net *pn; l2tp_tunnel_closeall(tunnel); @@ -1284,6 +1278,12 @@ static void l2tp_tunnel_del_work(struct work_struct *work) } } + /* Remove the tunnel struct from the tunnel list */ + pn = l2tp_pernet(tunnel->l2tp_net); + spin_lock_bh(&pn->l2tp_tunnel_list_lock); + list_del_rcu(&tunnel->list); + spin_unlock_bh(&pn->l2tp_tunnel_list_lock); + /* drop initial ref */ l2tp_tunnel_dec_refcount(tunnel); -- cgit v1.1 From 5b4c845ea4f4b86c43096eb924354c83a2e26f3c Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sun, 25 Feb 2018 12:17:31 -0800 Subject: xfs: fix potential memory leak in mount option parsing When specifying string type mount option (e.g., logdev) several times in a mount, current option parsing may cause memory leak. Hence, call kfree for previous one in this case. Signed-off-by: Chengguang Xu Reviewed-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 7aba628..93588ea 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -250,6 +250,7 @@ xfs_parseargs( return -EINVAL; break; case Opt_logdev: + kfree(mp->m_logname); mp->m_logname = match_strdup(args); if (!mp->m_logname) return -ENOMEM; @@ -258,6 +259,7 @@ xfs_parseargs( xfs_warn(mp, "%s option not allowed on this system", p); return -EINVAL; case Opt_rtdev: + kfree(mp->m_rtname); mp->m_rtname = match_strdup(args); if (!mp->m_rtname) return -ENOMEM; -- cgit v1.1 From 13a55372b64e00e564a08d785ca87bd9d454ba30 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 26 Feb 2018 13:41:47 -0500 Subject: ARM: orion5x: Revert commit 4904dbda41c8. It is not valid for orion5x to use mac_pton(). First of all, the orion5x buffer is not NULL terminated. mac_pton() has no business operating on non-NULL terminated buffers because only the caller can know that this is valid and in what manner it is ok to parse this NULL'less buffer. Second of all, orion5x operates on an __iomem pointer, which cannot be dereferenced using normal C pointer operations. Accesses to such areas much be performed with the proper iomem accessors. Fixes: 4904dbda41c8 ("ARM: orion5x: use mac_pton() helper") Signed-off-by: David S. Miller --- arch/arm/mach-orion5x/Kconfig | 3 -- arch/arm/mach-orion5x/dns323-setup.c | 53 ++++++++++++++++++++++++++++++++++-- arch/arm/mach-orion5x/tsx09-common.c | 49 ++++++++++++++++++++++++++++++--- 3 files changed, 95 insertions(+), 10 deletions(-) diff --git a/arch/arm/mach-orion5x/Kconfig b/arch/arm/mach-orion5x/Kconfig index 2a7bb6c..a810f4d 100644 --- a/arch/arm/mach-orion5x/Kconfig +++ b/arch/arm/mach-orion5x/Kconfig @@ -58,7 +58,6 @@ config MACH_KUROBOX_PRO config MACH_DNS323 bool "D-Link DNS-323" - select GENERIC_NET_UTILS select I2C_BOARDINFO if I2C help Say 'Y' here if you want your kernel to support the @@ -66,7 +65,6 @@ config MACH_DNS323 config MACH_TS209 bool "QNAP TS-109/TS-209" - select GENERIC_NET_UTILS help Say 'Y' here if you want your kernel to support the QNAP TS-109/TS-209 platform. @@ -101,7 +99,6 @@ config MACH_LINKSTATION_LS_HGL config MACH_TS409 bool "QNAP TS-409" - select GENERIC_NET_UTILS help Say 'Y' here if you want your kernel to support the QNAP TS-409 platform. diff --git a/arch/arm/mach-orion5x/dns323-setup.c b/arch/arm/mach-orion5x/dns323-setup.c index cd483bf..d13344b 100644 --- a/arch/arm/mach-orion5x/dns323-setup.c +++ b/arch/arm/mach-orion5x/dns323-setup.c @@ -173,10 +173,42 @@ static struct mv643xx_eth_platform_data dns323_eth_data = { .phy_addr = MV643XX_ETH_PHY_ADDR(8), }; +/* dns323_parse_hex_*() taken from tsx09-common.c; should a common copy of these + * functions be kept somewhere? + */ +static int __init dns323_parse_hex_nibble(char n) +{ + if (n >= '0' && n <= '9') + return n - '0'; + + if (n >= 'A' && n <= 'F') + return n - 'A' + 10; + + if (n >= 'a' && n <= 'f') + return n - 'a' + 10; + + return -1; +} + +static int __init dns323_parse_hex_byte(const char *b) +{ + int hi; + int lo; + + hi = dns323_parse_hex_nibble(b[0]); + lo = dns323_parse_hex_nibble(b[1]); + + if (hi < 0 || lo < 0) + return -1; + + return (hi << 4) | lo; +} + static int __init dns323_read_mac_addr(void) { u_int8_t addr[6]; - void __iomem *mac_page; + int i; + char *mac_page; /* MAC address is stored as a regular ol' string in /dev/mtdblock4 * (0x007d0000-0x00800000) starting at offset 196480 (0x2ff80). @@ -185,8 +217,23 @@ static int __init dns323_read_mac_addr(void) if (!mac_page) return -ENOMEM; - if (!mac_pton((__force const char *) mac_page, addr)) - goto error_fail; + /* Sanity check the string we're looking at */ + for (i = 0; i < 5; i++) { + if (*(mac_page + (i * 3) + 2) != ':') { + goto error_fail; + } + } + + for (i = 0; i < 6; i++) { + int byte; + + byte = dns323_parse_hex_byte(mac_page + (i * 3)); + if (byte < 0) { + goto error_fail; + } + + addr[i] = byte; + } iounmap(mac_page); printk("DNS-323: Found ethernet MAC address: %pM\n", addr); diff --git a/arch/arm/mach-orion5x/tsx09-common.c b/arch/arm/mach-orion5x/tsx09-common.c index 8977498..905d4f2 100644 --- a/arch/arm/mach-orion5x/tsx09-common.c +++ b/arch/arm/mach-orion5x/tsx09-common.c @@ -53,12 +53,53 @@ struct mv643xx_eth_platform_data qnap_tsx09_eth_data = { .phy_addr = MV643XX_ETH_PHY_ADDR(8), }; +static int __init qnap_tsx09_parse_hex_nibble(char n) +{ + if (n >= '0' && n <= '9') + return n - '0'; + + if (n >= 'A' && n <= 'F') + return n - 'A' + 10; + + if (n >= 'a' && n <= 'f') + return n - 'a' + 10; + + return -1; +} + +static int __init qnap_tsx09_parse_hex_byte(const char *b) +{ + int hi; + int lo; + + hi = qnap_tsx09_parse_hex_nibble(b[0]); + lo = qnap_tsx09_parse_hex_nibble(b[1]); + + if (hi < 0 || lo < 0) + return -1; + + return (hi << 4) | lo; +} + static int __init qnap_tsx09_check_mac_addr(const char *addr_str) { u_int8_t addr[6]; + int i; - if (!mac_pton(addr_str, addr)) - return -1; + for (i = 0; i < 6; i++) { + int byte; + + /* + * Enforce "xx:xx:xx:xx:xx:xx\n" format. + */ + if (addr_str[(i * 3) + 2] != ((i < 5) ? ':' : '\n')) + return -1; + + byte = qnap_tsx09_parse_hex_byte(addr_str + (i * 3)); + if (byte < 0) + return -1; + addr[i] = byte; + } printk(KERN_INFO "tsx09: found ethernet mac address %pM\n", addr); @@ -77,12 +118,12 @@ void __init qnap_tsx09_find_mac_addr(u32 mem_base, u32 size) unsigned long addr; for (addr = mem_base; addr < (mem_base + size); addr += 1024) { - void __iomem *nor_page; + char *nor_page; int ret = 0; nor_page = ioremap(addr, 1024); if (nor_page != NULL) { - ret = qnap_tsx09_check_mac_addr((__force const char *)nor_page); + ret = qnap_tsx09_check_mac_addr(nor_page); iounmap(nor_page); } -- cgit v1.1 From 0c5661ecc5dd7ce296870a3eb7b62b1b280a5e89 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Fri, 23 Feb 2018 12:39:41 -0800 Subject: ixgbe: fix crash in build_skb Rx code path Add check for build_skb enabled ring in ixgbe_dma_sync_frag(). In that case &skb_shinfo(skb)->frags[0] may not always be set which can lead to a crash. Instead we derive the page offset from skb->data. Fixes: 42073d91a214 ("ixgbe: Have the CPU take ownership of the buffers sooner") CC: stable Reported-by: Ambarish Soman Suggested-by: Alexander Duyck Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0da5aa2..9fc063a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1888,6 +1888,14 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE, IXGBE_RX_DMA_ATTR); + } else if (ring_uses_build_skb(rx_ring)) { + unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK; + + dma_sync_single_range_for_cpu(rx_ring->dev, + IXGBE_CB(skb)->dma, + offset, + skb_headlen(skb), + DMA_FROM_DEVICE); } else { struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; -- cgit v1.1 From f249be4d2c275fe2b98e389f471af75f758e5a59 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sat, 24 Feb 2018 11:32:24 +0800 Subject: Revert "tuntap: add missing xdp flush" This reverts commit 762c330d670e3d4b795cf7a8d761866fdd1eef49. The reason is we try to batch packets for devmap which causes calling xdp_do_flush() in the process context. Simply disabling preemption may not work since process may move among processors which lead xdp_do_flush() to miss some flushes on some processors. So simply revert the patch, a follow-up patch will add the xdp flush correctly. Reported-by: Christoffer Dall Fixes: 762c330d670e ("tuntap: add missing xdp flush") Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b52258c..2823a4a 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -181,7 +181,6 @@ struct tun_file { struct tun_struct *detached; struct ptr_ring tx_ring; struct xdp_rxq_info xdp_rxq; - int xdp_pending_pkts; }; struct tun_flow_entry { @@ -1662,7 +1661,6 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, case XDP_REDIRECT: get_page(alloc_frag->page); alloc_frag->offset += buflen; - ++tfile->xdp_pending_pkts; err = xdp_do_redirect(tun->dev, &xdp, xdp_prog); if (err) goto err_redirect; @@ -1984,11 +1982,6 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from) result = tun_get_user(tun, tfile, NULL, from, file->f_flags & O_NONBLOCK, false); - if (tfile->xdp_pending_pkts) { - tfile->xdp_pending_pkts = 0; - xdp_do_flush_map(); - } - tun_put(tun); return result; } @@ -2325,13 +2318,6 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter, m->msg_flags & MSG_DONTWAIT, m->msg_flags & MSG_MORE); - - if (tfile->xdp_pending_pkts >= NAPI_POLL_WEIGHT || - !(m->msg_flags & MSG_MORE)) { - tfile->xdp_pending_pkts = 0; - xdp_do_flush_map(); - } - tun_put(tun); return ret; } @@ -3163,7 +3149,6 @@ static int tun_chr_open(struct inode *inode, struct file * file) sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring)); - tfile->xdp_pending_pkts = 0; return 0; } -- cgit v1.1 From 23e43f07f896f8578318cfcc9466f1e8b8ab21b6 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sat, 24 Feb 2018 11:32:25 +0800 Subject: tuntap: disable preemption during XDP processing Except for tuntap, all other drivers' XDP was implemented at NAPI poll() routine in a bh. This guarantees all XDP operation were done at the same CPU which is required by e.g BFP_MAP_TYPE_PERCPU_ARRAY. But for tuntap, we do it in process context and we try to protect XDP processing by RCU reader lock. This is insufficient since CONFIG_PREEMPT_RCU can preempt the RCU reader critical section which breaks the assumption that all XDP were processed in the same CPU. Fixing this by simply disabling preemption during XDP processing. Fixes: 761876c857cb ("tap: XDP support") Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 2823a4a..63d39fe6 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1642,6 +1642,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, else *skb_xdp = 0; + preempt_disable(); rcu_read_lock(); xdp_prog = rcu_dereference(tun->xdp_prog); if (xdp_prog && !*skb_xdp) { @@ -1665,6 +1666,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, if (err) goto err_redirect; rcu_read_unlock(); + preempt_enable(); return NULL; case XDP_TX: xdp_xmit = true; @@ -1686,6 +1688,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, skb = build_skb(buf, buflen); if (!skb) { rcu_read_unlock(); + preempt_enable(); return ERR_PTR(-ENOMEM); } @@ -1698,10 +1701,12 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, skb->dev = tun->dev; generic_xdp_tx(skb, xdp_prog); rcu_read_unlock(); + preempt_enable(); return NULL; } rcu_read_unlock(); + preempt_enable(); return skb; @@ -1709,6 +1714,7 @@ err_redirect: put_page(alloc_frag->page); err_xdp: rcu_read_unlock(); + preempt_enable(); this_cpu_inc(tun->pcpu_stats->rx_dropped); return NULL; } -- cgit v1.1 From 1bb4f2e868a2891ab8bc668b8173d6ccb8c4ce6f Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sat, 24 Feb 2018 11:32:26 +0800 Subject: tuntap: correctly add the missing XDP flush We don't flush batched XDP packets through xdp_do_flush_map(), this will cause packets stall at TX queue. Consider we don't do XDP on NAPI poll(), the only possible fix is to call xdp_do_flush_map() immediately after xdp_do_redirect(). Note, this in fact won't try to batch packets through devmap, we could address in the future. Reported-by: Christoffer Dall Fixes: 761876c857cb ("tap: XDP support") Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 63d39fe6..7433bb2 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1663,6 +1663,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, get_page(alloc_frag->page); alloc_frag->offset += buflen; err = xdp_do_redirect(tun->dev, &xdp, xdp_prog); + xdp_do_flush_map(); if (err) goto err_redirect; rcu_read_unlock(); -- cgit v1.1 From 9c72258870a95671aa301e21ea6639d1d3ec4111 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 26 Jan 2018 16:58:06 -0800 Subject: blktrace_api.h: fix comment for struct blk_user_trace_setup 'struct blk_user_trace_setup' is passed to BLKTRACESETUP, not BLKTRACESTART. Signed-off-by: Eric Biggers Signed-off-by: Jens Axboe --- include/uapi/linux/blktrace_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index 20d1490d..3c50e07 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -131,7 +131,7 @@ enum { #define BLKTRACE_BDEV_SIZE 32 /* - * User setup structure passed with BLKTRACESTART + * User setup structure passed with BLKTRACESETUP */ struct blk_user_trace_setup { char name[BLKTRACE_BDEV_SIZE]; /* output */ -- cgit v1.1 From b6c3bad1ba83af1062a7ff6986d9edc4f3d7fc8e Mon Sep 17 00:00:00 2001 From: Denis Du Date: Sat, 24 Feb 2018 16:51:42 -0500 Subject: hdlc_ppp: carrier detect ok, don't turn off negotiation Sometimes when physical lines have a just good noise to make the protocol handshaking fail, but the carrier detect still good. Then after remove of the noise, nobody will trigger this protocol to be start again to cause the link to never come back. The fix is when the carrier is still on, not terminate the protocol handshaking. Signed-off-by: Denis Du Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_ppp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index afeca6b..ab8b3cb 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -574,7 +574,10 @@ static void ppp_timer(struct timer_list *t) ppp_cp_event(proto->dev, proto->pid, TO_GOOD, 0, 0, 0, NULL); proto->restart_counter--; - } else + } else if (netif_carrier_ok(proto->dev)) + ppp_cp_event(proto->dev, proto->pid, TO_GOOD, 0, 0, + 0, NULL); + else ppp_cp_event(proto->dev, proto->pid, TO_BAD, 0, 0, 0, NULL); break; -- cgit v1.1 From 4b0ad07653ee94182e2d8f21404242c9e83ad0b4 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 26 Feb 2018 14:39:30 -0500 Subject: idr: Fix handling of IDs above INT_MAX Khalid reported that the kernel selftests are currently failing: selftests: test_bpf.sh ======================================== test_bpf: [FAIL] not ok 1..8 selftests: test_bpf.sh [FAIL] He bisected it to 6ce711f2750031d12cec91384ac5cfa0a485b60a ("idr: Make 1-based IDRs more efficient"). The root cause is doing a signed comparison in idr_alloc_u32() instead of an unsigned comparison. I went looking for any similar problems and found a couple (which would each result in the failure to warn in two situations that aren't supposed to happen). I knocked up a few test-cases to prove that I was right and added them to the test-suite. Reported-by: Khalid Aziz Tested-by: Khalid Aziz Signed-off-by: Matthew Wilcox --- lib/idr.c | 13 +++++----- tools/testing/radix-tree/idr-test.c | 52 +++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/lib/idr.c b/lib/idr.c index 99ec5bc..823b813 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -36,8 +36,8 @@ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid, { struct radix_tree_iter iter; void __rcu **slot; - int base = idr->idr_base; - int id = *nextid; + unsigned int base = idr->idr_base; + unsigned int id = *nextid; if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr))) return -EINVAL; @@ -204,10 +204,11 @@ int idr_for_each(const struct idr *idr, radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) { int ret; + unsigned long id = iter.index + base; - if (WARN_ON_ONCE(iter.index > INT_MAX)) + if (WARN_ON_ONCE(id > INT_MAX)) break; - ret = fn(iter.index + base, rcu_dereference_raw(*slot), data); + ret = fn(id, rcu_dereference_raw(*slot), data); if (ret) return ret; } @@ -230,8 +231,8 @@ void *idr_get_next(struct idr *idr, int *nextid) { struct radix_tree_iter iter; void __rcu **slot; - int base = idr->idr_base; - int id = *nextid; + unsigned long base = idr->idr_base; + unsigned long id = *nextid; id = (id < base) ? 0 : id - base; slot = radix_tree_iter_find(&idr->idr_rt, &iter, id); diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 44ef9eb..6c645eb 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -178,6 +178,55 @@ void idr_get_next_test(int base) idr_destroy(&idr); } +int idr_u32_cb(int id, void *ptr, void *data) +{ + BUG_ON(id < 0); + BUG_ON(ptr != DUMMY_PTR); + return 0; +} + +void idr_u32_test1(struct idr *idr, u32 handle) +{ + static bool warned = false; + u32 id = handle; + int sid = 0; + void *ptr; + + BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL)); + BUG_ON(id != handle); + BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL) != -ENOSPC); + BUG_ON(id != handle); + if (!warned && id > INT_MAX) + printk("vvv Ignore these warnings\n"); + ptr = idr_get_next(idr, &sid); + if (id > INT_MAX) { + BUG_ON(ptr != NULL); + BUG_ON(sid != 0); + } else { + BUG_ON(ptr != DUMMY_PTR); + BUG_ON(sid != id); + } + idr_for_each(idr, idr_u32_cb, NULL); + if (!warned && id > INT_MAX) { + printk("^^^ Warnings over\n"); + warned = true; + } + BUG_ON(idr_remove(idr, id) != DUMMY_PTR); + BUG_ON(!idr_is_empty(idr)); +} + +void idr_u32_test(int base) +{ + DEFINE_IDR(idr); + idr_init_base(&idr, base); + idr_u32_test1(&idr, 10); + idr_u32_test1(&idr, 0x7fffffff); + idr_u32_test1(&idr, 0x80000000); + idr_u32_test1(&idr, 0x80000001); + idr_u32_test1(&idr, 0xffe00000); + idr_u32_test1(&idr, 0xffffffff); +} + void idr_checks(void) { unsigned long i; @@ -248,6 +297,9 @@ void idr_checks(void) idr_get_next_test(0); idr_get_next_test(1); idr_get_next_test(4); + idr_u32_test(4); + idr_u32_test(1); + idr_u32_test(0); } /* -- cgit v1.1 From d40bc96257fe070796c63934913f95cc183016b0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 26 Feb 2018 10:52:46 -0800 Subject: test_bpf: add a schedule point test_bpf() is taking 1.6 seconds nowadays, it is time to add a schedule point in it. Signed-off-by: Eric Dumazet Signed-off-by: Daniel Borkmann --- lib/test_bpf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index b4e2234..e6f5506 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -24,6 +24,7 @@ #include #include #include +#include /* General test specific settings */ #define MAX_SUBTESTS 3 @@ -6582,6 +6583,7 @@ static __init int test_bpf(void) struct bpf_prog *fp; int err; + cond_resched(); if (exclude_test(i)) continue; -- cgit v1.1 From c77f5fbbefc04612755117775e8555c2a7006cac Mon Sep 17 00:00:00 2001 From: Ramon Fried Date: Sun, 25 Feb 2018 09:49:37 +0200 Subject: qrtr: add MODULE_ALIAS macro to smd Added MODULE_ALIAS("rpmsg:IPCRTR") to ensure qrtr-smd and qrtr will load when IPCRTR channel is detected. Signed-off-by: Ramon Fried Signed-off-by: David S. Miller --- net/qrtr/smd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c index 50615d5..9cf089b 100644 --- a/net/qrtr/smd.c +++ b/net/qrtr/smd.c @@ -114,5 +114,6 @@ static struct rpmsg_driver qcom_smd_qrtr_driver = { module_rpmsg_driver(qcom_smd_qrtr_driver); +MODULE_ALIAS("rpmsg:IPCRTR"); MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver"); MODULE_LICENSE("GPL v2"); -- cgit v1.1 From 3a291aa11898bc9577c16339f108aac02ba0d109 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Thu, 1 Feb 2018 23:13:45 +0300 Subject: DT: net: renesas,ravb: document R8A77980 bindings Renesas R-Car V3H (R8A77980) SoC has the R-Car gen3 compatible EtherAVB device, so document the SoC specific bindings. Signed-off-by: Sergei Shtylyov Reviewed-by: Geert Uytterhoeven Reviewed-by: Simon Horman Reviewed-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/renesas,ravb.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt index c902261..92fd4b2 100644 --- a/Documentation/devicetree/bindings/net/renesas,ravb.txt +++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt @@ -18,6 +18,7 @@ Required properties: - "renesas,etheravb-r8a7795" for the R8A7795 SoC. - "renesas,etheravb-r8a7796" for the R8A7796 SoC. - "renesas,etheravb-r8a77970" for the R8A77970 SoC. + - "renesas,etheravb-r8a77980" for the R8A77980 SoC. - "renesas,etheravb-r8a77995" for the R8A77995 SoC. - "renesas,etheravb-rcar-gen3" as a fallback for the above R-Car Gen3 devices. -- cgit v1.1 From 0e5a82efda872c2469c210957d7d4161ef8f4391 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 25 Feb 2018 21:59:06 +0200 Subject: bridge: Fix VLAN reference count problem When a VLAN is added on a port, a reference is taken on the corresponding master VLAN entry. If it does not already exist, then it is created and a reference taken. However, in the second case a reference is not really taken when CONFIG_REFCOUNT_FULL is enabled as refcount_inc() is replaced by refcount_inc_not_zero(). Fix this by using refcount_set() on a newly created master VLAN entry. Fixes: 251277598596 ("net, bridge: convert net_bridge_vlan.refcnt from atomic_t to refcount_t") Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 5193527..9896f49 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -168,6 +168,8 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid masterv = br_vlan_find(vg, vid); if (WARN_ON(!masterv)) return NULL; + refcount_set(&masterv->refcnt, 1); + return masterv; } refcount_inc(&masterv->refcnt); -- cgit v1.1 From 4e994776e7bdc3402347f8ea7f8c1b73137bf3e3 Mon Sep 17 00:00:00 2001 From: Thomas Winter Date: Mon, 26 Feb 2018 10:28:10 +1300 Subject: ip_tunnel: Do not use mark in skb by default This reverts commit 5c38bd1b82e1f76f9fa96c1e61c9897cabf1ce45. skb->mark contains the mark the encapsulated traffic which can result in incorrect routing decisions being made such as routing loops if the route chosen is via tunnel itself. The correct method should be to use tunnel->fwmark. Signed-off-by: Thomas Winter Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index d786a84..6d21068 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -710,16 +710,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } } - if (tunnel->fwmark) { - init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, - tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, - tunnel->fwmark); - } - else { - init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, - tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, - skb->mark); - } + init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, + tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, + tunnel->fwmark); if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) goto tx_error; -- cgit v1.1 From 9d4949b4935831be10534d5432bf611285a572a5 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 26 Feb 2018 18:50:35 +0200 Subject: dax: ->direct_access does not sleep anymore In Patch: [7a862fb] brd: remove dax support Dan Williams has removed the only might_sleep implementation of ->direct_access. So we no longer need to check for it. CC: Dan Williams Signed-off-by: Boaz Harrosh Signed-off-by: Dan Williams --- drivers/dax/super.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 473af69..ecdc292 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -246,12 +246,6 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, { long avail; - /* - * The device driver is allowed to sleep, in order to make the - * memory directly accessible. - */ - might_sleep(); - if (!dax_dev) return -EOPNOTSUPP; -- cgit v1.1 From 230f5a8969d8345fc9bbe3683f068246cf1be4b8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 21 Feb 2018 17:08:01 -0800 Subject: dax: fix vma_is_fsdax() helper Gerd reports that ->i_mode may contain other bits besides S_IFCHR. Use S_ISCHR() instead. Otherwise, get_user_pages_longterm() may fail on device-dax instances when those are meant to be explicitly allowed. Fixes: 2bb6d2837083 ("mm: introduce get_user_pages_longterm") Cc: Reported-by: Gerd Rausch Acked-by: Jane Chu Reported-by: Haozhong Zhang Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 2a81556..79c4139 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3198,7 +3198,7 @@ static inline bool vma_is_fsdax(struct vm_area_struct *vma) if (!vma_is_dax(vma)) return false; inode = file_inode(vma->vm_file); - if (inode->i_mode == S_IFCHR) + if (S_ISCHR(inode->i_mode)) return false; /* device-dax */ return true; } -- cgit v1.1 From 133390fe497b8c3b63e84383300c03a13b007e08 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Feb 2018 21:38:58 +0100 Subject: ARM: omap2: set CONFIG_LIRC=y in defconfig The CONFIG_LIRC symbol has changed from 'tristate' to 'bool, so we now get a warning for omap2plus_defconfig: arch/arm/configs/omap2plus_defconfig:322:warning: symbol value 'm' invalid for LIRC This changes the file to mark the symbol as built-in to get rid of the warning. Fixes: a60d64b15c20 ("media: lirc: lirc interface should not be a raw decoder") Signed-off-by: Arnd Bergmann --- arch/arm/configs/omap2plus_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 2f145c4..92674f2 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -319,7 +319,7 @@ CONFIG_MEDIA_CAMERA_SUPPORT=y CONFIG_RC_CORE=m CONFIG_MEDIA_CONTROLLER=y CONFIG_VIDEO_V4L2_SUBDEV_API=y -CONFIG_LIRC=m +CONFIG_LIRC=y CONFIG_RC_DEVICES=y CONFIG_IR_RX51=m CONFIG_V4L_PLATFORM_DRIVERS=y -- cgit v1.1 From 29d1d52b06fbf4b26b592310bff7c4fe3ffaca07 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 25 Feb 2018 14:08:14 +0100 Subject: ARM: dts: Set D-Link DNS-313 SATA to muxmode 0 This stops the driver from trying to probe the ATA slave interface. The vendor code enables the slave interface but the driver in the vendor tree does not make use of it. Setting it to muxmode 0 disables the slave interface: the hardware only has the master interface connected to the one harddrive slot anyways. Without this change booting takes excessive time, so it is very annoying to end users. Fixes: dd5c0561db75 ("ARM: dts: Add basic devicetree for D-Link DNS-313") Signed-off-by: Linus Walleij Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/gemini-dlink-dns-313.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/gemini-dlink-dns-313.dts b/arch/arm/boot/dts/gemini-dlink-dns-313.dts index 08568ce..da8bb9d 100644 --- a/arch/arm/boot/dts/gemini-dlink-dns-313.dts +++ b/arch/arm/boot/dts/gemini-dlink-dns-313.dts @@ -269,7 +269,7 @@ sata: sata@46000000 { /* The ROM uses this muxmode */ - cortina,gemini-ata-muxmode = <3>; + cortina,gemini-ata-muxmode = <0>; cortina,gemini-enable-sata-bridge; status = "okay"; }; -- cgit v1.1 From c37406e05d1e541df40b8b81c4bd40753fcaf414 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 26 Feb 2018 14:51:13 -0600 Subject: PCI: Allow release of resources that were never assigned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is entirely possible that the BIOS wasn't able to assign resources to a device. In this case don't crash in pci_release_resource() when we try to resize the resource. Fixes: 8bb705e3e79d ("PCI: Add pci_resize_resource() for resizing BARs") Signed-off-by: Christian König Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko CC: stable@vger.kernel.org # v4.15+ --- drivers/pci/setup-res.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 369d48d..3654472 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -401,6 +401,10 @@ void pci_release_resource(struct pci_dev *dev, int resno) struct resource *res = dev->resource + resno; pci_info(dev, "BAR %d: releasing %pR\n", resno, res); + + if (!res->parent) + return; + release_resource(res); res->end = resource_size(res) - 1; res->start = 0; -- cgit v1.1 From 9326fdf3fbdfbc3c78de001969df8256913d98e7 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Mon, 26 Feb 2018 13:11:03 +0000 Subject: cpufreq: scpi: invoke frequency-invariance setter function Commit 343a8d17fa8d (cpufreq: scpi: remove arm_big_little dependency) changed the cpufreq driver on juno from arm_big_little to scpi. The scpi set_target function does not call the frequency-invariance setter function arch_set_freq_scale() like the arm_big_little set_target function does. As a result the task scheduler load and utilization signals are not frequency-invariant on this platform anymore. Fix this by adding a call to arch_set_freq_scale() into scpi_cpufreq_set_target(). Fixes: 343a8d17fa8d (cpufreq: scpi: remove arm_big_little dependency) Signed-off-by: Dietmar Eggemann Acked-by: Sudeep Holla Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/scpi-cpufreq.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index c32a833..d300a16 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -51,15 +51,23 @@ static unsigned int scpi_cpufreq_get_rate(unsigned int cpu) static int scpi_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index) { + unsigned long freq = policy->freq_table[index].frequency; struct scpi_data *priv = policy->driver_data; - u64 rate = policy->freq_table[index].frequency * 1000; + u64 rate = freq * 1000; int ret; ret = clk_set_rate(priv->clk, rate); - if (!ret && (clk_get_rate(priv->clk) != rate)) - ret = -EIO; - return ret; + if (ret) + return ret; + + if (clk_get_rate(priv->clk) != rate) + return -EIO; + + arch_set_freq_scale(policy->related_cpus, freq, + policy->cpuinfo.max_freq); + + return 0; } static int -- cgit v1.1 From 5c8b2623f6b425d48bdd5a66d7f7dc666b219613 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 23 Feb 2018 15:54:42 +0000 Subject: cpufreq: scpi: Fix incorrect arm_big_little config dependency Commit 343a8d17fa8d (cpufreq: scpi: remove arm_big_little dependency) removed the SCPI cpufreq dependency on arm_big_little cpufreq driver. However the Kconfig entry still depends on ARM_BIG_LITTLE_CPUFREQ which is clearly wrong. This patch removes that unnecessary Kconfig dependency. Fixes: 343a8d17fa8d (cpufreq: scpi: remove arm_big_little dependency) Reported-by: Quentin Perret Acked-by: Viresh Kumar Signed-off-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 3a88e33..fb586e0 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -44,10 +44,10 @@ config ARM_DT_BL_CPUFREQ config ARM_SCPI_CPUFREQ tristate "SCPI based CPUfreq driver" - depends on ARM_BIG_LITTLE_CPUFREQ && ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI + depends on ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI help - This adds the CPUfreq driver support for ARM big.LITTLE platforms - using SCPI protocol for CPU power management. + This adds the CPUfreq driver support for ARM platforms using SCPI + protocol for CPU power management. This driver uses SCPI Message Protocol driver to interact with the firmware providing the CPU DVFS functionality. -- cgit v1.1 From 067b25a5639b10dfdd41ce6b4d4140fe84d0a8e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20D=C3=ADaz?= Date: Wed, 7 Feb 2018 11:24:31 -0600 Subject: selftests/futex: Fix line continuation in Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Makefile lacks a couple of line continuation backslashes in an `if' clause, which produces an error when make versions prior to 4.x are used for building the tests. $ make make[1]: Entering directory `/[...]/linux/tools/testing/selftests/futex' /bin/sh: -c: line 5: syntax error: unexpected end of file make[1]: *** [all] Error 1 make[1]: Leaving directory `/[...]/linux/tools/testing/selftests/futex' make: *** [all] Error 2 Signed-off-by: Daniel Díaz Signed-off-by: Shuah Khan --- tools/testing/selftests/futex/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile index cea4adc..a63e845 100644 --- a/tools/testing/selftests/futex/Makefile +++ b/tools/testing/selftests/futex/Makefile @@ -12,9 +12,9 @@ all: BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ - if [ -e $$DIR/$(TEST_PROGS) ]; then - rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; - fi + if [ -e $$DIR/$(TEST_PROGS) ]; then \ + rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \ + fi \ done override define RUN_TESTS -- cgit v1.1 From 16c513b13477b8da7958e8112bf23cd59b87a7c1 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 13 Feb 2018 10:45:57 -0700 Subject: selftests: memory-hotplug: silence test command echo Silence the following command being printed while running test. ./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]" Signed-off-by: Shuah Khan --- tools/testing/selftests/memory-hotplug/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile index 86636d2..183b468 100644 --- a/tools/testing/selftests/memory-hotplug/Makefile +++ b/tools/testing/selftests/memory-hotplug/Makefile @@ -4,7 +4,7 @@ all: include ../lib.mk TEST_PROGS := mem-on-off-test.sh -override RUN_TESTS := ./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]" +override RUN_TESTS := @./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]" override EMIT_TESTS := echo "$(RUN_TESTS)" run_full_test: -- cgit v1.1 From f6869826de700bce59e2cef14974f99836e34e4f Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 13 Feb 2018 10:48:29 -0700 Subject: selftests: vm: update .gitignore with new test Update .gitignore with new test. Signed-off-by: Shuah Khan --- tools/testing/selftests/vm/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 63c94d7..342c7bc 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -11,3 +11,4 @@ mlock-intersect-test mlock-random-test virtual_address_range gup_benchmark +va_128TBswitch -- cgit v1.1 From 6bb320ca4a4a7b5b3db8c8d7250cc40002046878 Mon Sep 17 00:00:00 2001 From: Jeremy Boone Date: Thu, 8 Feb 2018 12:32:06 -0800 Subject: tpm_tis: fix potential buffer overruns caused by bit glitches on the bus Discrete TPMs are often connected over slow serial buses which, on some platforms, can have glitches causing bit flips. In all the driver _recv() functions, we need to use a u32 to unmarshal the response size, otherwise a bit flip of the 31st bit would cause the expected variable to go negative, which would then try to read a huge amount of data. Also sanity check that the expected amount of data is large enough for the TPM header. Signed-off-by: Jeremy Boone Cc: stable@vger.kernel.org Signed-off-by: James Bottomley Tested-by: Jarkko Sakkinen Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- drivers/char/tpm/tpm_tis_core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index 183a5f5..da074e3 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -270,7 +270,8 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count) { struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev); int size = 0; - int expected, status; + int status; + u32 expected; if (count < TPM_HEADER_SIZE) { size = -EIO; @@ -285,7 +286,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count) } expected = be32_to_cpu(*(__be32 *) (buf + 2)); - if (expected > count) { + if (expected > count || expected < TPM_HEADER_SIZE) { size = -EIO; goto out; } -- cgit v1.1 From f9d4d9b5a5ef2f017bc344fb65a58a902517173b Mon Sep 17 00:00:00 2001 From: Jeremy Boone Date: Thu, 8 Feb 2018 12:31:16 -0800 Subject: tpm_i2c_nuvoton: fix potential buffer overruns caused by bit glitches on the bus Discrete TPMs are often connected over slow serial buses which, on some platforms, can have glitches causing bit flips. In all the driver _recv() functions, we need to use a u32 to unmarshal the response size, otherwise a bit flip of the 31st bit would cause the expected variable to go negative, which would then try to read a huge amount of data. Also sanity check that the expected amount of data is large enough for the TPM header. Signed-off-by: Jeremy Boone Cc: stable@vger.kernel.org Signed-off-by: James Bottomley Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- drivers/char/tpm/tpm_i2c_nuvoton.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c index c642877..caa86b1 100644 --- a/drivers/char/tpm/tpm_i2c_nuvoton.c +++ b/drivers/char/tpm/tpm_i2c_nuvoton.c @@ -281,7 +281,11 @@ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count) struct device *dev = chip->dev.parent; struct i2c_client *client = to_i2c_client(dev); s32 rc; - int expected, status, burst_count, retries, size = 0; + int status; + int burst_count; + int retries; + int size = 0; + u32 expected; if (count < TPM_HEADER_SIZE) { i2c_nuvoton_ready(chip); /* return to idle */ @@ -323,7 +327,7 @@ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count) * to machine native */ expected = be32_to_cpu(*(__be32 *) (buf + 2)); - if (expected > count) { + if (expected > count || expected < size) { dev_err(dev, "%s() expected > count\n", __func__); size = -EIO; continue; -- cgit v1.1 From 9b8cb28d7c62568a5916bdd7ea1c9176d7f8f2ed Mon Sep 17 00:00:00 2001 From: Jeremy Boone Date: Thu, 8 Feb 2018 12:30:01 -0800 Subject: tpm_i2c_infineon: fix potential buffer overruns caused by bit glitches on the bus Discrete TPMs are often connected over slow serial buses which, on some platforms, can have glitches causing bit flips. In all the driver _recv() functions, we need to use a u32 to unmarshal the response size, otherwise a bit flip of the 31st bit would cause the expected variable to go negative, which would then try to read a huge amount of data. Also sanity check that the expected amount of data is large enough for the TPM header. Signed-off-by: Jeremy Boone Cc: stable@vger.kernel.org Signed-off-by: James Bottomley Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- drivers/char/tpm/tpm_i2c_infineon.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c index c1dd39e..6116cd0 100644 --- a/drivers/char/tpm/tpm_i2c_infineon.c +++ b/drivers/char/tpm/tpm_i2c_infineon.c @@ -473,7 +473,8 @@ static int recv_data(struct tpm_chip *chip, u8 *buf, size_t count) static int tpm_tis_i2c_recv(struct tpm_chip *chip, u8 *buf, size_t count) { int size = 0; - int expected, status; + int status; + u32 expected; if (count < TPM_HEADER_SIZE) { size = -EIO; @@ -488,7 +489,7 @@ static int tpm_tis_i2c_recv(struct tpm_chip *chip, u8 *buf, size_t count) } expected = be32_to_cpu(*(__be32 *)(buf + 2)); - if ((size_t) expected > count) { + if (((size_t) expected > count) || (expected < TPM_HEADER_SIZE)) { size = -EIO; goto out; } -- cgit v1.1 From 6d24cd186d9fead3722108dec1b1c993354645ff Mon Sep 17 00:00:00 2001 From: Jeremy Boone Date: Thu, 8 Feb 2018 12:29:09 -0800 Subject: tpm: st33zp24: fix potential buffer overruns caused by bit glitches on the bus Discrete TPMs are often connected over slow serial buses which, on some platforms, can have glitches causing bit flips. In all the driver _recv() functions, we need to use a u32 to unmarshal the response size, otherwise a bit flip of the 31st bit would cause the expected variable to go negative, which would then try to read a huge amount of data. Also sanity check that the expected amount of data is large enough for the TPM header. Signed-off-by: Jeremy Boone Cc: stable@vger.kernel.org Signed-off-by: James Bottomley Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- drivers/char/tpm/st33zp24/st33zp24.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/st33zp24/st33zp24.c b/drivers/char/tpm/st33zp24/st33zp24.c index 4d1dc8b..f95b9c7 100644 --- a/drivers/char/tpm/st33zp24/st33zp24.c +++ b/drivers/char/tpm/st33zp24/st33zp24.c @@ -457,7 +457,7 @@ static int st33zp24_recv(struct tpm_chip *chip, unsigned char *buf, size_t count) { int size = 0; - int expected; + u32 expected; if (!chip) return -EBUSY; @@ -474,7 +474,7 @@ static int st33zp24_recv(struct tpm_chip *chip, unsigned char *buf, } expected = be32_to_cpu(*(__be32 *)(buf + 2)); - if (expected > count) { + if (expected > count || expected < TPM_HEADER_SIZE) { size = -EIO; goto out; } -- cgit v1.1 From 3be23274755ee85771270a23af7691dc9b3a95db Mon Sep 17 00:00:00 2001 From: Jeremy Boone Date: Thu, 8 Feb 2018 12:28:08 -0800 Subject: tpm: fix potential buffer overruns caused by bit glitches on the bus Discrete TPMs are often connected over slow serial buses which, on some platforms, can have glitches causing bit flips. If a bit does flip it could cause an overrun if it's in one of the size parameters, so sanity check that we're not overrunning the provided buffer when doing a memcpy(). Signed-off-by: Jeremy Boone Cc: stable@vger.kernel.org Signed-off-by: James Bottomley Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- drivers/char/tpm/tpm-interface.c | 4 ++++ drivers/char/tpm/tpm2-cmd.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 76df4fb..9e80a95 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -1190,6 +1190,10 @@ int tpm_get_random(struct tpm_chip *chip, u8 *out, size_t max) break; recd = be32_to_cpu(tpm_cmd.params.getrandom_out.rng_data_len); + if (recd > num_bytes) { + total = -EFAULT; + break; + } rlength = be32_to_cpu(tpm_cmd.header.out.length); if (rlength < offsetof(struct tpm_getrandom_out, rng_data) + diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index c17e753..a700f8f 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -683,6 +683,10 @@ static int tpm2_unseal_cmd(struct tpm_chip *chip, if (!rc) { data_len = be16_to_cpup( (__be16 *) &buf.data[TPM_HEADER_SIZE + 4]); + if (data_len < MIN_KEY_SIZE || data_len > MAX_KEY_SIZE + 1) { + rc = -EFAULT; + goto out; + } rlength = be32_to_cpu(((struct tpm2_cmd *)&buf) ->header.out.length); -- cgit v1.1 From 4c27bf3c5b7434ccb9ab962301da661c26b467a4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 25 Feb 2018 19:12:10 -0800 Subject: r8152: fix tx packets accounting r8152 driver handles TSO packets (limited to ~16KB) quite well, but pretends each TSO logical packet is a single packet on the wire. There is also some error since headers are accounted once, but error rate is small enough that we do not care. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 958b2e8..86f7196 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1794,7 +1794,7 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) tx_data += len; agg->skb_len += len; - agg->skb_num++; + agg->skb_num += skb_shinfo(skb)->gso_segs ?: 1; dev_kfree_skb_any(skb); -- cgit v1.1 From d269176e766c71c998cb75b4ea8cbc321cc0019d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 26 Feb 2018 22:00:47 +0100 Subject: bpf, ppc64: fix out of bounds access in tail call While working on 16338a9b3ac3 ("bpf, arm64: fix out of bounds access in tail call") I noticed that ppc64 JIT is partially affected as well. While the bound checking is correctly performed as unsigned comparison, the register with the index value however, is never truncated into 32 bit space, so e.g. a index value of 0x100000000ULL with a map of 1 element would pass with PPC_CMPLW() whereas we later on continue with the full 64 bit register value. Therefore, as we do in interpreter and other JITs truncate the value to 32 bit initially in order to fix access. Fixes: ce0761419fae ("powerpc/bpf: Implement support for tail calls") Signed-off-by: Daniel Borkmann Reviewed-by: Naveen N. Rao Tested-by: Naveen N. Rao Signed-off-by: Alexei Starovoitov --- arch/powerpc/net/bpf_jit_comp64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 0a34b0c..0ef3d95 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -240,6 +240,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 * goto out; */ PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)); + PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31); PPC_CMPLW(b2p_index, b2p[TMP_REG_1]); PPC_BCC(COND_GE, out); -- cgit v1.1 From 32fc71875127498bf99cc648e96400ee0895edf7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 26 Feb 2018 13:16:04 +0100 Subject: netfilter: nf_tables: return EBUSY if device already belongs to flowtable If the netdevice is already part of a flowtable, return EBUSY. I cannot find a valid usecase for having two flowtables bound to the same netdevice. We can still have two flowtable where the device set is disjoint. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8b9fe30..43acdee 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5037,9 +5037,9 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); const struct nf_flowtable_type *type; + struct nft_flowtable *flowtable, *ft; u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; - struct nft_flowtable *flowtable; struct nft_table *table; struct nft_ctx ctx; int err, i, k; @@ -5099,6 +5099,22 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, goto err3; for (i = 0; i < flowtable->ops_len; i++) { + if (!flowtable->ops[i].dev) + continue; + + list_for_each_entry(ft, &table->flowtables, list) { + for (k = 0; k < ft->ops_len; k++) { + if (!ft->ops[k].dev) + continue; + + if (flowtable->ops[i].dev == ft->ops[k].dev && + flowtable->ops[i].pf == ft->ops[k].pf) { + err = -EBUSY; + goto err4; + } + } + } + err = nf_register_net_hook(net, &flowtable->ops[i]); if (err < 0) goto err4; -- cgit v1.1 From e603ea4ba778846b5b2203546f0c6056ec198b16 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 26 Feb 2018 13:16:05 +0100 Subject: netfilter: nf_tables: missing attribute validation in nf_tables_delflowtable() Return -EINVAL is mandatory attributes are missing. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 43acdee..2b5aa78 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5161,6 +5161,11 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk, struct nft_table *table; struct nft_ctx ctx; + if (!nla[NFTA_FLOWTABLE_TABLE] || + (!nla[NFTA_FLOWTABLE_NAME] && + !nla[NFTA_FLOWTABLE_HANDLE])) + return -EINVAL; + table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family, genmask); if (IS_ERR(table)) -- cgit v1.1 From 9a191b114906457c4b2494c474f58ae4142d4e67 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 21 Feb 2018 11:50:03 +1000 Subject: virtio-gpu: fix ioctl and expose the fixed status to userspace. This exposes to mesa that it can use the fixed ioctl for querying later cap sets, cap set 1 is forever frozen in time. Signed-off-by: Dave Airlie Link: http://patchwork.freedesktop.org/patch/msgid/20180221015003.22884-1-airlied@gmail.com Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 17 +++++++++++------ include/uapi/drm/virtgpu_drm.h | 1 + 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 5720a0d..677ac16 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -197,6 +197,9 @@ static int virtio_gpu_getparam_ioctl(struct drm_device *dev, void *data, case VIRTGPU_PARAM_3D_FEATURES: value = vgdev->has_virgl_3d == true ? 1 : 0; break; + case VIRTGPU_PARAM_CAPSET_QUERY_FIX: + value = 1; + break; default: return -EINVAL; } @@ -472,7 +475,7 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev, { struct virtio_gpu_device *vgdev = dev->dev_private; struct drm_virtgpu_get_caps *args = data; - int size; + unsigned size, host_caps_size; int i; int found_valid = -1; int ret; @@ -481,6 +484,10 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev, if (vgdev->num_capsets == 0) return -ENOSYS; + /* don't allow userspace to pass 0 */ + if (args->size == 0) + return -EINVAL; + spin_lock(&vgdev->display_info_lock); for (i = 0; i < vgdev->num_capsets; i++) { if (vgdev->capsets[i].id == args->cap_set_id) { @@ -496,11 +503,9 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev, return -EINVAL; } - size = vgdev->capsets[found_valid].max_size; - if (args->size > size) { - spin_unlock(&vgdev->display_info_lock); - return -EINVAL; - } + host_caps_size = vgdev->capsets[found_valid].max_size; + /* only copy to user the minimum of the host caps size or the guest caps size */ + size = min(args->size, host_caps_size); list_for_each_entry(cache_ent, &vgdev->cap_cache, head) { if (cache_ent->id == args->cap_set_id && diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h index 91a31ff..9a781f0 100644 --- a/include/uapi/drm/virtgpu_drm.h +++ b/include/uapi/drm/virtgpu_drm.h @@ -63,6 +63,7 @@ struct drm_virtgpu_execbuffer { }; #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */ +#define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */ struct drm_virtgpu_getparam { __u64 param; -- cgit v1.1 From 6662ae6af82df10259a70c7569b4c12ea7f3ba93 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 21 Feb 2018 09:11:00 +0100 Subject: gpiolib: Keep returning EPROBE_DEFER when we should Commits c85823390215 ("gpio: of: Support SPI nonstandard GPIO properties") and 6a537d48461d ("gpio: of: Support regulator nonstandard GPIO properties") have introduced a regression in the way error codes from of_get_named_gpiod_flags are handled. Previously, those errors codes were returned immediately, but the two commits mentioned above are now overwriting the error pointer, meaning that whatever value has been returned will be dropped in favor of whatever the two new functions will return. This might not be a big deal except for EPROBE_DEFER, on which GPIOlib customers will depend on, and that will now be returned as an hard error which means that they will not probe anymore, instead of gently deferring their probe. Since EPROBE_DEFER basically means that we have found a valid property but there was no GPIO controller registered to handle it, fix this issues by returning it as soon as we encounter it. Fixes: c85823390215 ("gpio: of: Support SPI nonstandard GPIO properties") Fixes: 6a537d48461d ("gpio: of: Support regulator nonstandard GPIO properties") Signed-off-by: Maxime Ripard [Fold in fix to the fix] Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-of.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 564bb7a..0ee5dc7 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -241,6 +241,19 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id, desc = of_get_named_gpiod_flags(dev->of_node, prop_name, idx, &of_flags); + /* + * -EPROBE_DEFER in our case means that we found a + * valid GPIO property, but no controller has been + * registered so far. + * + * This means we don't need to look any further for + * alternate name conventions, and we should really + * preserve the return code for our user to be able to + * retry probing later. + */ + if (IS_ERR(desc) && PTR_ERR(desc) == -EPROBE_DEFER) + return desc; + if (!IS_ERR(desc) || (PTR_ERR(desc) != -ENOENT)) break; } -- cgit v1.1 From ce27fb2c56db6ccfe8099343bb4afdab15e77e7b Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 13 Feb 2018 14:08:14 +0800 Subject: gpio: Handle deferred probing in of_find_gpio() properly of_get_named_gpiod_flags() used directly in of_find_gpio() or indirectly through of_find_spi_gpio() or of_find_regulator_gpio() can return -EPROBE_DEFER. This gets overwritten by the subsequent of_find_*_gpio() calls. This patch fixes this by trying of_find_spi_gpio() or of_find_regulator_gpio() only if deferred probing was not requested by the previous of_get_named_gpiod_flags() call. Fixes: 6a537d48461d ("gpio: of: Support regulator nonstandard GPIO properties") Fixes: c85823390215 ("gpio: of: Support SPI nonstandard GPIO properties") Signed-off-by: Chen-Yu Tsai [Augmented to fit with Maxime's patch] Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 0ee5dc7..84e5a9d 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -263,7 +263,7 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id, desc = of_find_spi_gpio(dev, con_id, &of_flags); /* Special handling for regulator GPIOs if used */ - if (IS_ERR(desc)) + if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER) desc = of_find_regulator_gpio(dev, con_id, &of_flags); if (IS_ERR(desc)) -- cgit v1.1 From f8870ae6e2d6be75b1accc2db981169fdfbea7ab Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 14 Feb 2018 15:57:43 +0200 Subject: mmc: sdhci-pci: Fix S0i3 for Intel BYT-based controllers Tuning can leave the IP in an active state (Buffer Read Enable bit set) which prevents the entry to low power states (i.e. S0i3). Data reset will clear it. Generally tuning is followed by a data transfer which will anyway sort out the state, so it is rare that S0i3 is actually prevented. Signed-off-by: Adrian Hunter Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-core.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 6d1a983..82c4f05 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -654,9 +654,36 @@ static void byt_read_dsm(struct sdhci_pci_slot *slot) slot->chip->rpm_retune = intel_host->d3_retune; } -static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot) +static int intel_execute_tuning(struct mmc_host *mmc, u32 opcode) +{ + int err = sdhci_execute_tuning(mmc, opcode); + struct sdhci_host *host = mmc_priv(mmc); + + if (err) + return err; + + /* + * Tuning can leave the IP in an active state (Buffer Read Enable bit + * set) which prevents the entry to low power states (i.e. S0i3). Data + * reset will clear it. + */ + sdhci_reset(host, SDHCI_RESET_DATA); + + return 0; +} + +static void byt_probe_slot(struct sdhci_pci_slot *slot) { + struct mmc_host_ops *ops = &slot->host->mmc_host_ops; + byt_read_dsm(slot); + + ops->execute_tuning = intel_execute_tuning; +} + +static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot) +{ + byt_probe_slot(slot); slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE | MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR | MMC_CAP_CMD_DURING_TFR | @@ -779,7 +806,7 @@ static int ni_byt_sdio_probe_slot(struct sdhci_pci_slot *slot) { int err; - byt_read_dsm(slot); + byt_probe_slot(slot); err = ni_set_max_freq(slot); if (err) @@ -792,7 +819,7 @@ static int ni_byt_sdio_probe_slot(struct sdhci_pci_slot *slot) static int byt_sdio_probe_slot(struct sdhci_pci_slot *slot) { - byt_read_dsm(slot); + byt_probe_slot(slot); slot->host->mmc->caps |= MMC_CAP_POWER_OFF_CARD | MMC_CAP_NONREMOVABLE | MMC_CAP_WAIT_WHILE_BUSY; return 0; @@ -800,7 +827,7 @@ static int byt_sdio_probe_slot(struct sdhci_pci_slot *slot) static int byt_sd_probe_slot(struct sdhci_pci_slot *slot) { - byt_read_dsm(slot); + byt_probe_slot(slot); slot->host->mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_AGGRESSIVE_PM | MMC_CAP_CD_WAKE; slot->cd_idx = 0; -- cgit v1.1 From c14376de3a1befa70d9811ca2872d47367b48767 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Mon, 26 Feb 2018 15:44:20 +0100 Subject: printk: Wake klogd when passing console_lock owner wake_klogd is a local variable in console_unlock(). The information is lost when the console_lock owner using the busy wait added by the commit dbdda842fe96f8932 ("printk: Add console owner and waiter logic to load balance console writes"). The following race is possible: CPU0 CPU1 console_unlock() for (;;) /* calling console for last message */ printk() log_store() log_next_seq++; /* see new message */ if (seen_seq != log_next_seq) { wake_klogd = true; seen_seq = log_next_seq; } console_lock_spinning_enable(); if (console_trylock_spinning()) /* spinning */ if (console_lock_spinning_disable_and_check()) { printk_safe_exit_irqrestore(flags); return; console_unlock() if (seen_seq != log_next_seq) { /* already seen */ /* nothing to do */ Result: Nobody would wakeup klogd. One solution would be to make a global variable from wake_klogd. But then we would need to manipulate it under a lock or so. This patch wakes klogd also when console_lock is passed to the spinning waiter. It looks like the right way to go. Also userspace should have a chance to see and store any "flood" of messages. Note that the very late klogd wake up was a historic solution. It made sense on single CPU systems or when sys_syslog() operations were synchronized using the big kernel lock like in v2.1.113. But it is questionable these days. Fixes: dbdda842fe96f8932 ("printk: Add console owner and waiter logic to load balance console writes") Link: http://lkml.kernel.org/r/20180226155734.dzwg3aovqnwtvkoy@pathway.suse.cz Cc: Steven Rostedt Cc: linux-kernel@vger.kernel.org Cc: Tejun Heo Suggested-by: Sergey Senozhatsky Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index db4b9b8..4d81864 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2397,7 +2397,7 @@ skip: if (console_lock_spinning_disable_and_check()) { printk_safe_exit_irqrestore(flags); - return; + goto out; } printk_safe_exit_irqrestore(flags); @@ -2430,6 +2430,7 @@ skip: if (retry && console_trylock()) goto again; +out: if (wake_klogd) wake_up_klogd(); } -- cgit v1.1 From a78872363614367c3f37e3a5b4181c7a6b207b37 Mon Sep 17 00:00:00 2001 From: Romain Naour Date: Sun, 25 Feb 2018 13:39:56 +0100 Subject: cfg80211: add missing dependency to CFG80211 suboptions New options introduced by the patch this fixes are still enabled even if CFG80211 is disabled. .config: # CONFIG_CFG80211 is not set CONFIG_CFG80211_REQUIRE_SIGNED_REGDB=y CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS=y # CONFIG_LIB80211 is not set When CFG80211_REQUIRE_SIGNED_REGDB is enabled, it selects SYSTEM_DATA_VERIFICATION which selects SYSTEM_TRUSTED_KEYRING that need extract-cert tool. extract-cert needs some openssl headers to be installed on the build machine. Instead of adding missing "depends on CFG80211", it's easier to use a 'if' block around all options related to CFG80211, so do that. Fixes: 90a53e4432b1 ("cfg80211: implement regdb signature checking") Signed-off-by: Romain Naour [touch up commit message a bit] Signed-off-by: Johannes Berg --- net/wireless/Kconfig | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 1abcc4f..4172204 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -34,9 +34,10 @@ config CFG80211 When built as a module it will be called cfg80211. +if CFG80211 + config NL80211_TESTMODE bool "nl80211 testmode command" - depends on CFG80211 help The nl80211 testmode command helps implementing things like factory calibration or validation tools for wireless chips. @@ -51,7 +52,6 @@ config NL80211_TESTMODE config CFG80211_DEVELOPER_WARNINGS bool "enable developer warnings" - depends on CFG80211 default n help This option enables some additional warnings that help @@ -68,7 +68,7 @@ config CFG80211_DEVELOPER_WARNINGS config CFG80211_CERTIFICATION_ONUS bool "cfg80211 certification onus" - depends on CFG80211 && EXPERT + depends on EXPERT default n ---help--- You should disable this option unless you are both capable @@ -159,7 +159,6 @@ config CFG80211_REG_RELAX_NO_IR config CFG80211_DEFAULT_PS bool "enable powersave by default" - depends on CFG80211 default y help This option enables powersave mode by default. @@ -170,7 +169,6 @@ config CFG80211_DEFAULT_PS config CFG80211_DEBUGFS bool "cfg80211 DebugFS entries" - depends on CFG80211 depends on DEBUG_FS ---help--- You can enable this if you want debugfs entries for cfg80211. @@ -180,7 +178,6 @@ config CFG80211_DEBUGFS config CFG80211_CRDA_SUPPORT bool "support CRDA" if EXPERT default y - depends on CFG80211 help You should enable this option unless you know for sure you have no need for it, for example when using internal regdb (above) or the @@ -190,7 +187,6 @@ config CFG80211_CRDA_SUPPORT config CFG80211_WEXT bool "cfg80211 wireless extensions compatibility" if !CFG80211_WEXT_EXPORT - depends on CFG80211 select WEXT_CORE default y if CFG80211_WEXT_EXPORT help @@ -199,11 +195,12 @@ config CFG80211_WEXT config CFG80211_WEXT_EXPORT bool - depends on CFG80211 help Drivers should select this option if they require cfg80211's wext compatibility symbols to be exported. +endif # CFG80211 + config LIB80211 tristate default n -- cgit v1.1 From 325501d9360eb42c7c51e6daa0d733844c1e790b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 23 Feb 2018 13:44:19 +0100 Subject: mmc: dw_mmc-k3: Fix out-of-bounds access through DT alias The hs_timing_cfg[] array is indexed using a value derived from the "mshcN" alias in DT, which may lead to an out-of-bounds access. Fix this by adding a range check. Fixes: 361c7fe9b02eee7e ("mmc: dw_mmc-k3: add sd support for hi3660") Signed-off-by: Geert Uytterhoeven Reviewed-by: Shawn Lin Cc: Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc-k3.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/dw_mmc-k3.c b/drivers/mmc/host/dw_mmc-k3.c index 73fd75c..75ae580 100644 --- a/drivers/mmc/host/dw_mmc-k3.c +++ b/drivers/mmc/host/dw_mmc-k3.c @@ -135,6 +135,9 @@ static int dw_mci_hi6220_parse_dt(struct dw_mci *host) if (priv->ctrl_id < 0) priv->ctrl_id = 0; + if (priv->ctrl_id >= TIMING_MODE) + return -EINVAL; + host->priv = priv; return 0; } -- cgit v1.1 From a4faa4929ed3be15e2d500d2405f992f6dedc8eb Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Sat, 24 Feb 2018 14:17:22 +0800 Subject: mmc: dw_mmc: Factor out dw_mci_init_slot_caps Factor out dw_mci_init_slot_caps to consolidate parsing all differents types of capabilities from host contrllers. No functional change intended. Signed-off-by: Shawn Lin Fixes: 800d78bfccb3 ("mmc: dw_mmc: add support for implementation specific callbacks") Cc: Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 73 ++++++++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 30 deletions(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 0aa3997..4033cf9 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -2778,12 +2778,50 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +static int dw_mci_init_slot_caps(struct dw_mci_slot *slot) +{ + struct dw_mci *host = slot->host; + const struct dw_mci_drv_data *drv_data = host->drv_data; + struct mmc_host *mmc = slot->mmc; + int ctrl_id; + + if (host->pdata->caps) + mmc->caps = host->pdata->caps; + + /* + * Support MMC_CAP_ERASE by default. + * It needs to use trim/discard/erase commands. + */ + mmc->caps |= MMC_CAP_ERASE; + + if (host->pdata->pm_caps) + mmc->pm_caps = host->pdata->pm_caps; + + if (host->dev->of_node) { + ctrl_id = of_alias_get_id(host->dev->of_node, "mshc"); + if (ctrl_id < 0) + ctrl_id = 0; + } else { + ctrl_id = to_platform_device(host->dev)->id; + } + if (drv_data && drv_data->caps) + mmc->caps |= drv_data->caps[ctrl_id]; + + if (host->pdata->caps2) + mmc->caps2 = host->pdata->caps2; + + /* Process SDIO IRQs through the sdio_irq_work. */ + if (mmc->caps & MMC_CAP_SDIO_IRQ) + mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD; + + return 0; +} + static int dw_mci_init_slot(struct dw_mci *host) { struct mmc_host *mmc; struct dw_mci_slot *slot; - const struct dw_mci_drv_data *drv_data = host->drv_data; - int ctrl_id, ret; + int ret; u32 freq[2]; mmc = mmc_alloc_host(sizeof(struct dw_mci_slot), host->dev); @@ -2817,38 +2855,13 @@ static int dw_mci_init_slot(struct dw_mci *host) if (!mmc->ocr_avail) mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; - if (host->pdata->caps) - mmc->caps = host->pdata->caps; - - /* - * Support MMC_CAP_ERASE by default. - * It needs to use trim/discard/erase commands. - */ - mmc->caps |= MMC_CAP_ERASE; - - if (host->pdata->pm_caps) - mmc->pm_caps = host->pdata->pm_caps; - - if (host->dev->of_node) { - ctrl_id = of_alias_get_id(host->dev->of_node, "mshc"); - if (ctrl_id < 0) - ctrl_id = 0; - } else { - ctrl_id = to_platform_device(host->dev)->id; - } - if (drv_data && drv_data->caps) - mmc->caps |= drv_data->caps[ctrl_id]; - - if (host->pdata->caps2) - mmc->caps2 = host->pdata->caps2; - ret = mmc_of_parse(mmc); if (ret) goto err_host_allocated; - /* Process SDIO IRQs through the sdio_irq_work. */ - if (mmc->caps & MMC_CAP_SDIO_IRQ) - mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD; + ret = dw_mci_init_slot_caps(slot); + if (ret) + goto err_host_allocated; /* Useful defaults if platform data is unset. */ if (host->use_dma == TRANS_MODE_IDMAC) { -- cgit v1.1 From 0d84b9e5631d923744767dc6608672df906dd092 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Sat, 24 Feb 2018 14:17:23 +0800 Subject: mmc: dw_mmc: Fix out-of-bounds access for slot's caps Add num_caps field for dw_mci_drv_data to validate the controller id from DT alias and non-DT ways. Reported-by: Geert Uytterhoeven Signed-off-by: Shawn Lin Fixes: 800d78bfccb3 ("mmc: dw_mmc: add support for implementation specific callbacks") Cc: Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc-exynos.c | 1 + drivers/mmc/host/dw_mmc-k3.c | 1 + drivers/mmc/host/dw_mmc-rockchip.c | 1 + drivers/mmc/host/dw_mmc-zx.c | 1 + drivers/mmc/host/dw_mmc.c | 9 ++++++++- drivers/mmc/host/dw_mmc.h | 2 ++ 6 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c index 3502679..fa41d94 100644 --- a/drivers/mmc/host/dw_mmc-exynos.c +++ b/drivers/mmc/host/dw_mmc-exynos.c @@ -487,6 +487,7 @@ static unsigned long exynos_dwmmc_caps[4] = { static const struct dw_mci_drv_data exynos_drv_data = { .caps = exynos_dwmmc_caps, + .num_caps = ARRAY_SIZE(exynos_dwmmc_caps), .init = dw_mci_exynos_priv_init, .set_ios = dw_mci_exynos_set_ios, .parse_dt = dw_mci_exynos_parse_dt, diff --git a/drivers/mmc/host/dw_mmc-k3.c b/drivers/mmc/host/dw_mmc-k3.c index 75ae580..89cdb3d 100644 --- a/drivers/mmc/host/dw_mmc-k3.c +++ b/drivers/mmc/host/dw_mmc-k3.c @@ -210,6 +210,7 @@ static int dw_mci_hi6220_execute_tuning(struct dw_mci_slot *slot, u32 opcode) static const struct dw_mci_drv_data hi6220_data = { .caps = dw_mci_hi6220_caps, + .num_caps = ARRAY_SIZE(dw_mci_hi6220_caps), .switch_voltage = dw_mci_hi6220_switch_voltage, .set_ios = dw_mci_hi6220_set_ios, .parse_dt = dw_mci_hi6220_parse_dt, diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c index a3f1c2b..3392952 100644 --- a/drivers/mmc/host/dw_mmc-rockchip.c +++ b/drivers/mmc/host/dw_mmc-rockchip.c @@ -319,6 +319,7 @@ static const struct dw_mci_drv_data rk2928_drv_data = { static const struct dw_mci_drv_data rk3288_drv_data = { .caps = dw_mci_rk3288_dwmmc_caps, + .num_caps = ARRAY_SIZE(dw_mci_rk3288_dwmmc_caps), .set_ios = dw_mci_rk3288_set_ios, .execute_tuning = dw_mci_rk3288_execute_tuning, .parse_dt = dw_mci_rk3288_parse_dt, diff --git a/drivers/mmc/host/dw_mmc-zx.c b/drivers/mmc/host/dw_mmc-zx.c index d38e94a..c06b539 100644 --- a/drivers/mmc/host/dw_mmc-zx.c +++ b/drivers/mmc/host/dw_mmc-zx.c @@ -195,6 +195,7 @@ static unsigned long zx_dwmmc_caps[3] = { static const struct dw_mci_drv_data zx_drv_data = { .caps = zx_dwmmc_caps, + .num_caps = ARRAY_SIZE(zx_dwmmc_caps), .execute_tuning = dw_mci_zx_execute_tuning, .prepare_hs400_tuning = dw_mci_zx_prepare_hs400_tuning, .parse_dt = dw_mci_zx_parse_dt, diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 4033cf9..a850f8d 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -2804,8 +2804,15 @@ static int dw_mci_init_slot_caps(struct dw_mci_slot *slot) } else { ctrl_id = to_platform_device(host->dev)->id; } - if (drv_data && drv_data->caps) + + if (drv_data && drv_data->caps) { + if (ctrl_id >= drv_data->num_caps) { + dev_err(host->dev, "invalid controller id %d\n", + ctrl_id); + return -EINVAL; + } mmc->caps |= drv_data->caps[ctrl_id]; + } if (host->pdata->caps2) mmc->caps2 = host->pdata->caps2; diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h index e3124f0..1424bd4 100644 --- a/drivers/mmc/host/dw_mmc.h +++ b/drivers/mmc/host/dw_mmc.h @@ -543,6 +543,7 @@ struct dw_mci_slot { /** * dw_mci driver data - dw-mshc implementation specific driver data. * @caps: mmc subsystem specified capabilities of the controller(s). + * @num_caps: number of capabilities specified by @caps. * @init: early implementation specific initialization. * @set_ios: handle bus specific extensions. * @parse_dt: parse implementation specific device tree properties. @@ -554,6 +555,7 @@ struct dw_mci_slot { */ struct dw_mci_drv_data { unsigned long *caps; + u32 num_caps; int (*init)(struct dw_mci *host); void (*set_ios)(struct dw_mci *host, struct mmc_ios *ios); int (*parse_dt)(struct dw_mci *host); -- cgit v1.1 From 5b43df8b4c1a7f0c3fbf793c9566068e6b1e570c Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Fri, 23 Feb 2018 16:47:25 +0800 Subject: mmc: dw_mmc: Avoid accessing registers in runtime suspended state cat /sys/kernel/debug/mmc0/regs will hang up the system since it's in runtime suspended state, so the genpd and biu_clk is off. This patch fixes this problem by calling pm_runtime_get_sync to wake it up before reading the registers. Fixes: e9ed8835e990 ("mmc: dw_mmc: add runtime PM callback") Cc: Signed-off-by: Shawn Lin Reviewed-by: Jaehoon Chung Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index a850f8d..d9b4ace 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -165,6 +165,8 @@ static int dw_mci_regs_show(struct seq_file *s, void *v) { struct dw_mci *host = s->private; + pm_runtime_get_sync(host->dev); + seq_printf(s, "STATUS:\t0x%08x\n", mci_readl(host, STATUS)); seq_printf(s, "RINTSTS:\t0x%08x\n", mci_readl(host, RINTSTS)); seq_printf(s, "CMD:\t0x%08x\n", mci_readl(host, CMD)); @@ -172,6 +174,8 @@ static int dw_mci_regs_show(struct seq_file *s, void *v) seq_printf(s, "INTMASK:\t0x%08x\n", mci_readl(host, INTMASK)); seq_printf(s, "CLKENA:\t0x%08x\n", mci_readl(host, CLKENA)); + pm_runtime_put_autosuspend(host->dev); + return 0; } -- cgit v1.1 From 3a574919f0cc15a46ec14c3e5e08300908991915 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 27 Feb 2018 11:49:09 +0100 Subject: mmc: core: Avoid hanging to claim host for mmc via some nested calls As the block layer, since the conversion to blkmq, claims the host using a context, a following nested call to mmc_claim_host(), which isn't using a context, may hang. Calling mmc_interrupt_hpi() and mmc_read_bkops_status() via the mmc block layer, may suffer from this problem, as these functions are calling mmc_claim|release_host(). Let's fix the problem by removing the calls to mmc_claim|release_host() from the above mentioned functions and instead make the callers responsible of claiming/releasing the host. As a matter of fact, the existing callers already deals with it. Fixes: 81196976ed94 ("mmc: block: Add blk-mq support") Reported-by: Dmitry Osipenko Suggested-by: Adrian Hunter Tested-by: Dmitry Osipenko Signed-off-by: Ulf Hansson Acked-by: Adrian Hunter Reviewed-by: Shawn Lin --- drivers/mmc/core/mmc_ops.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index 908e4db..42d6aa8 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -848,7 +848,6 @@ int mmc_interrupt_hpi(struct mmc_card *card) return 1; } - mmc_claim_host(card->host); err = mmc_send_status(card, &status); if (err) { pr_err("%s: Get card status fail\n", mmc_hostname(card->host)); @@ -890,7 +889,6 @@ int mmc_interrupt_hpi(struct mmc_card *card) } while (!err); out: - mmc_release_host(card->host); return err; } @@ -932,9 +930,7 @@ static int mmc_read_bkops_status(struct mmc_card *card) int err; u8 *ext_csd; - mmc_claim_host(card->host); err = mmc_get_ext_csd(card, &ext_csd); - mmc_release_host(card->host); if (err) return err; -- cgit v1.1 From d716d9b702bb759dd6fb50804f10a174bd156d71 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 14 Feb 2018 18:40:12 +0900 Subject: dmaengine: rcar-dmac: fix max_chunk_size for R-Car Gen3 According to R-Car Gen3 Rev.0.80 manual, the DMATCR can be set to 16,777,215 as maximum. So, this patch fixes the max_chunk_size for safety on all of SoCs. Otherwise, a system may hang if the DMATCR is set to 0 on R-Car Gen3. Signed-off-by: Yoshihiro Shimoda Reviewed-by: Simon Horman Signed-off-by: Vinod Koul --- drivers/dma/sh/rcar-dmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index e3ff162..d0cacdb 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -917,7 +917,7 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl, rcar_dmac_chan_configure_desc(chan, desc); - max_chunk_size = (RCAR_DMATCR_MASK + 1) << desc->xfer_shift; + max_chunk_size = RCAR_DMATCR_MASK << desc->xfer_shift; /* * Allocate and fill the transfer chunk descriptors. We own the only -- cgit v1.1 From b9d17175aeb984eba10d98b623b92488e9c8ece0 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 26 Feb 2018 10:59:53 +0100 Subject: devlink: Compare to size_new in case of resource child validation The current implementation checks the combined size of the children with the 'size' of the parent. The correct behavior is to check the combined size vs the pending change and to compare vs the 'size_new'. Fixes: d9f9b9a4d05f ("devlink: Add support for resource abstraction") Signed-off-by: Arkadi Sharshevsky Tested-by: Yuval Mintz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 18d385e..92aad7c 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2332,7 +2332,7 @@ devlink_resource_validate_children(struct devlink_resource *resource) list_for_each_entry(child_resource, &resource->resource_list, list) parts_size += child_resource->size_new; - if (parts_size > resource->size) + if (parts_size > resource->size_new) size_valid = false; out: resource->size_valid = size_valid; -- cgit v1.1 From c7272c2f1229125f74f22dcdd59de9bbd804f1c8 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 26 Feb 2018 16:13:43 +0100 Subject: net: ipv4: don't allow setting net.ipv4.route.min_pmtu below 68 According to RFC 1191 sections 3 and 4, ICMP frag-needed messages indicating an MTU below 68 should be rejected: A host MUST never reduce its estimate of the Path MTU below 68 octets. and (talking about ICMP frag-needed's Next-Hop MTU field): This field will never contain a value less than 68, since every router "must be able to forward a datagram of 68 octets without fragmentation". Furthermore, by letting net.ipv4.route.min_pmtu be set to negative values, we can end up with a very large PMTU when (-1) is cast into u32. Let's also make ip_rt_min_pmtu a u32, since it's only ever compared to unsigned ints. Reported-by: Jianlin Shi Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- net/ipv4/route.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a4f44d8..9548437 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -128,10 +128,13 @@ static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); static int ip_rt_error_cost __read_mostly = HZ; static int ip_rt_error_burst __read_mostly = 5 * HZ; static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; -static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; +static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; + +static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU; + /* * Interface to generic destination cache. */ @@ -2933,7 +2936,8 @@ static struct ctl_table ipv4_route_table[] = { .data = &ip_rt_min_pmtu, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &ip_min_valid_pmtu, }, { .procname = "min_adv_mss", -- cgit v1.1 From 3d18e4f19f37062a0f2cbcf3ac17eaabdde04704 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 26 Feb 2018 18:25:42 +0200 Subject: devlink: Fix resource coverity errors Fix resource coverity errors. Fixes: d9f9b9a4d05f ("devlink: Add support for resource abstraction") Signed-off-by: Arkadi Sharshevsky Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 92aad7c..7b1076d 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1695,10 +1695,11 @@ static int devlink_dpipe_table_put(struct sk_buff *skb, goto nla_put_failure; if (table->resource_valid) { - nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID, - table->resource_id, DEVLINK_ATTR_PAD); - nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS, - table->resource_units, DEVLINK_ATTR_PAD); + if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID, + table->resource_id, DEVLINK_ATTR_PAD) || + nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS, + table->resource_units, DEVLINK_ATTR_PAD)) + goto nla_put_failure; } if (devlink_dpipe_matches_put(table, skb)) goto nla_put_failure; @@ -2372,20 +2373,22 @@ static int devlink_nl_cmd_resource_set(struct sk_buff *skb, return 0; } -static void +static int devlink_resource_size_params_put(struct devlink_resource *resource, struct sk_buff *skb) { struct devlink_resource_size_params *size_params; size_params = resource->size_params; - nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN, - size_params->size_granularity, DEVLINK_ATTR_PAD); - nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX, - size_params->size_max, DEVLINK_ATTR_PAD); - nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN, - size_params->size_min, DEVLINK_ATTR_PAD); - nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit); + if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN, + size_params->size_granularity, DEVLINK_ATTR_PAD) || + nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX, + size_params->size_max, DEVLINK_ATTR_PAD) || + nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN, + size_params->size_min, DEVLINK_ATTR_PAD) || + nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit)) + return -EMSGSIZE; + return 0; } static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb, @@ -2409,10 +2412,12 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb, nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW, resource->size_new, DEVLINK_ATTR_PAD); if (resource->resource_ops && resource->resource_ops->occ_get) - nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC, - resource->resource_ops->occ_get(devlink), - DEVLINK_ATTR_PAD); - devlink_resource_size_params_put(resource, skb); + if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC, + resource->resource_ops->occ_get(devlink), + DEVLINK_ATTR_PAD)) + goto nla_put_failure; + if (devlink_resource_size_params_put(resource, skb)) + goto nla_put_failure; if (list_empty(&resource->resource_list)) goto out; -- cgit v1.1 From 0373ca74831b0f93cd4cdbf7ad3aec3c33a479a5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 23 Feb 2018 09:38:28 +0530 Subject: cpufreq: s3c24xx: Fix broken s3c_cpufreq_init() commit a307a1e6bc0d "cpufreq: s3c: use cpufreq_generic_init()" accidentally broke cpufreq on s3c2410 and s3c2412. These two platforms don't have a CPU frequency table and used to skip calling cpufreq_table_validate_and_show() for them. But with the above commit, we started calling it unconditionally and that will eventually fail as the frequency table pointer is NULL. Fix this by calling cpufreq_table_validate_and_show() conditionally again. Fixes: a307a1e6bc0d "cpufreq: s3c: use cpufreq_generic_init()" Cc: 3.13+ # v3.13+ Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/s3c24xx-cpufreq.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/s3c24xx-cpufreq.c b/drivers/cpufreq/s3c24xx-cpufreq.c index 7b596fa..6bebc1f 100644 --- a/drivers/cpufreq/s3c24xx-cpufreq.c +++ b/drivers/cpufreq/s3c24xx-cpufreq.c @@ -351,7 +351,13 @@ struct clk *s3c_cpufreq_clk_get(struct device *dev, const char *name) static int s3c_cpufreq_init(struct cpufreq_policy *policy) { policy->clk = clk_arm; - return cpufreq_generic_init(policy, ftab, cpu_cur.info->latency); + + policy->cpuinfo.transition_latency = cpu_cur.info->latency; + + if (ftab) + return cpufreq_table_validate_and_show(policy, ftab); + + return 0; } static int __init s3c_cpufreq_initclks(void) -- cgit v1.1 From 1b22bcad7e397252ecc9a8c471334f70b46820fc Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 26 Feb 2018 20:14:04 +0100 Subject: tipc: correct initial value for group congestion flag In commit 60c253069632 ("tipc: fix race between poll() and setsockopt()") we introduced a pointer from struct tipc_group to the 'group_is_connected' flag in struct tipc_sock, so that this field can be checked without dereferencing the group pointer of the latter struct. The initial value for this flag is correctly set to 'false' when a group is created, but we miss the case when no group is created at all, in which case the initial value should be 'true'. This has the effect that SOCK_RDM/DGRAM sockets sending datagrams never receive POLLOUT if they request so. This commit corrects this bug. Fixes: 60c253069632 ("tipc: fix race between poll() and setsockopt()") Reported-by: Hoang Le Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 1 + net/tipc/socket.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/tipc/group.c b/net/tipc/group.c index 122162a..04e516d 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -189,6 +189,7 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid, grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK; grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS; grp->open = group_is_open; + *grp->open = false; filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE; if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, filter, &grp->subid)) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b0323ec..7dfa9fc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -473,6 +473,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sk->sk_write_space = tipc_write_space; sk->sk_destruct = tipc_sock_destruct; tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; + tsk->group_is_open = true; atomic_set(&tsk->dupl_rcvcnt, 0); /* Start out with safe limits until we receive an advertised window */ -- cgit v1.1 From 02aa8a8b2b84531fa78b9a486d9b2a0700f7bc08 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Tue, 27 Feb 2018 09:49:29 -0800 Subject: bcache: correct flash only vols (check all uuids) Commit 2831231d4c3f ("bcache: reduce cache_set devices iteration by devices_max_used") adds c->devices_max_used to reduce iteration of c->uuids elements, this value is updated in bcache_device_attach(). But for flash only volume, when calling flash_devs_run(), the function bcache_device_attach() is not called yet and c->devices_max_used is not updated. The unexpected result is, the flash only volume won't be run by flash_devs_run(). This patch fixes the issue by iterate all c->uuids elements in flash_devs_run(). c->devices_max_used will be updated properly when bcache_device_attach() gets called. [mlyle: commit subject edited for character limit] Fixes: 2831231d4c3f ("bcache: reduce cache_set devices iteration by devices_max_used") Reported-by: Tang Junhui Signed-off-by: Coly Li Reviewed-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 3128957..4d1d8df 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1274,7 +1274,7 @@ static int flash_devs_run(struct cache_set *c) struct uuid_entry *u; for (u = c->uuids; - u < c->uuids + c->devices_max_used && !ret; + u < c->uuids + c->nr_uuids && !ret; u++) if (UUID_FLASH_ONLY(u)) ret = flash_dev_run(c, u); -- cgit v1.1 From 60eb34ec5526e264c2bbaea4f7512d714d791caf Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Tue, 27 Feb 2018 09:49:30 -0800 Subject: bcache: fix kcrashes with fio in RAID5 backend dev Kernel crashed when run fio in a RAID5 backend bcache device, the call trace is bellow: [ 440.012034] kernel BUG at block/blk-ioc.c:146! [ 440.012696] invalid opcode: 0000 [#1] SMP NOPTI [ 440.026537] CPU: 2 PID: 2205 Comm: md127_raid5 Not tainted 4.15.0 #8 [ 440.027441] Hardware name: HP ProLiant MicroServer Gen8, BIOS J06 07/16 /2015 [ 440.028615] RIP: 0010:put_io_context+0x8b/0x90 [ 440.029246] RSP: 0018:ffffa8c882b43af8 EFLAGS: 00010246 [ 440.029990] RAX: 0000000000000000 RBX: ffffa8c88294fca0 RCX: 0000000000 0f4240 [ 440.031006] RDX: 0000000000000004 RSI: 0000000000000286 RDI: ffffa8c882 94fca0 [ 440.032030] RBP: ffffa8c882b43b10 R08: 0000000000000003 R09: ffff949cb8 0c1700 [ 440.033206] R10: 0000000000000104 R11: 000000000000b71c R12: 00000000000 01000 [ 440.034222] R13: 0000000000000000 R14: ffff949cad84db70 R15: ffff949cb11 bd1e0 [ 440.035239] FS: 0000000000000000(0000) GS:ffff949cba280000(0000) knlGS: 0000000000000000 [ 440.060190] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 440.084967] CR2: 00007ff0493ef000 CR3: 00000002f1e0a002 CR4: 00000000001 606e0 [ 440.110498] Call Trace: [ 440.135443] bio_disassociate_task+0x1b/0x60 [ 440.160355] bio_free+0x1b/0x60 [ 440.184666] bio_put+0x23/0x30 [ 440.208272] search_free+0x23/0x40 [bcache] [ 440.231448] cached_dev_write_complete+0x31/0x70 [bcache] [ 440.254468] closure_put+0xb6/0xd0 [bcache] [ 440.277087] request_endio+0x30/0x40 [bcache] [ 440.298703] bio_endio+0xa1/0x120 [ 440.319644] handle_stripe+0x418/0x2270 [raid456] [ 440.340614] ? load_balance+0x17b/0x9c0 [ 440.360506] handle_active_stripes.isra.58+0x387/0x5a0 [raid456] [ 440.380675] ? __release_stripe+0x15/0x20 [raid456] [ 440.400132] raid5d+0x3ed/0x5d0 [raid456] [ 440.419193] ? schedule+0x36/0x80 [ 440.437932] ? schedule_timeout+0x1d2/0x2f0 [ 440.456136] md_thread+0x122/0x150 [ 440.473687] ? wait_woken+0x80/0x80 [ 440.491411] kthread+0x102/0x140 [ 440.508636] ? find_pers+0x70/0x70 [ 440.524927] ? kthread_associate_blkcg+0xa0/0xa0 [ 440.541791] ret_from_fork+0x35/0x40 [ 440.558020] Code: c2 48 00 5b 41 5c 41 5d 5d c3 48 89 c6 4c 89 e7 e8 bb c2 48 00 48 8b 3d bc 36 4b 01 48 89 de e8 7c f7 e0 ff 5b 41 5c 41 5d 5d c3 <0f> 0b 0f 1f 00 0f 1f 44 00 00 55 48 8d 47 b8 48 89 e5 41 57 41 [ 440.610020] RIP: put_io_context+0x8b/0x90 RSP: ffffa8c882b43af8 [ 440.628575] ---[ end trace a1fd79d85643a73e ]-- All the crash issue happened when a bypass IO coming, in such scenario s->iop.bio is pointed to the s->orig_bio. In search_free(), it finishes the s->orig_bio by calling bio_complete(), and after that, s->iop.bio became invalid, then kernel would crash when calling bio_put(). Maybe its upper layer's faulty, since bio should not be freed before we calling bio_put(), but we'd better calling bio_put() first before calling bio_complete() to notify upper layer ending this bio. This patch moves bio_complete() under bio_put() to avoid kernel crash. [mlyle: fixed commit subject for character limits] Reported-by: Matthias Ferdinand Tested-by: Matthias Ferdinand Signed-off-by: Tang Junhui Reviewed-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 1a46b41..6422846 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -659,11 +659,11 @@ static void do_bio_hook(struct search *s, struct bio *orig_bio) static void search_free(struct closure *cl) { struct search *s = container_of(cl, struct search, cl); - bio_complete(s); if (s->iop.bio) bio_put(s->iop.bio); + bio_complete(s); closure_debug_destroy(cl); mempool_free(s, s->d->c->search); } -- cgit v1.1 From 0e0d5002f8c047de92a41340cc67c39267eb9559 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 27 Feb 2018 17:58:18 +0100 Subject: netfilter: nf_tables: use the right index from flowtable error path Use the right loop index, not the number of devices in the array that we need to remove, the following message uncovered the problem: [ 5437.044119] hook not found, pf 5 num 0 [ 5437.044140] WARNING: CPU: 2 PID: 24983 at net/netfilter/core.c:376 __nf_unregister_net_hook+0x250/0x280 Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2b5aa78..558593e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5136,7 +5136,7 @@ err5: i = flowtable->ops_len; err4: for (k = i - 1; k >= 0; k--) - nf_unregister_net_hook(net, &flowtable->ops[i]); + nf_unregister_net_hook(net, &flowtable->ops[k]); kfree(flowtable->ops); err3: -- cgit v1.1 From 9c2c2e62df3fa30fb13fbeb7512a4eede729383b Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 27 Feb 2018 01:56:06 +0100 Subject: net: phy: Restore phy_resume() locking assumption commit f5e64032a799 ("net: phy: fix resume handling") changes the locking semantics for phy_resume() such that the caller now needs to hold the phy mutex. Not all call sites were adopted to this new semantic, resulting in warnings from the added WARN_ON(!mutex_is_locked(&phydev->lock)). Rather than change the semantics, add a __phy_resume() and restore the old behavior of phy_resume(). Reported-by: Heiner Kallweit Fixes: f5e64032a799 ("net: phy: fix resume handling") Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 2 +- drivers/net/phy/phy_device.c | 18 +++++++++++++----- include/linux/phy.h | 1 + 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index e3e29c2..a6f924f 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -819,7 +819,7 @@ void phy_start(struct phy_device *phydev) break; case PHY_HALTED: /* if phy was suspended, bring the physical link up again */ - phy_resume(phydev); + __phy_resume(phydev); /* make sure interrupts are re-enabled for the PHY */ if (phy_interrupt_is_valid(phydev)) { diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index d39ae77..478405e 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -135,9 +135,7 @@ static int mdio_bus_phy_resume(struct device *dev) if (!mdio_bus_phy_may_suspend(phydev)) goto no_resume; - mutex_lock(&phydev->lock); ret = phy_resume(phydev); - mutex_unlock(&phydev->lock); if (ret < 0) return ret; @@ -1041,9 +1039,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, if (err) goto error; - mutex_lock(&phydev->lock); phy_resume(phydev); - mutex_unlock(&phydev->lock); phy_led_triggers_register(phydev); return err; @@ -1172,7 +1168,7 @@ int phy_suspend(struct phy_device *phydev) } EXPORT_SYMBOL(phy_suspend); -int phy_resume(struct phy_device *phydev) +int __phy_resume(struct phy_device *phydev) { struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver); int ret = 0; @@ -1189,6 +1185,18 @@ int phy_resume(struct phy_device *phydev) return ret; } +EXPORT_SYMBOL(__phy_resume); + +int phy_resume(struct phy_device *phydev) +{ + int ret; + + mutex_lock(&phydev->lock); + ret = __phy_resume(phydev); + mutex_unlock(&phydev->lock); + + return ret; +} EXPORT_SYMBOL(phy_resume); int phy_loopback(struct phy_device *phydev, bool enable) diff --git a/include/linux/phy.h b/include/linux/phy.h index 5a0c3e5..d706953 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -924,6 +924,7 @@ void phy_device_remove(struct phy_device *phydev); int phy_init_hw(struct phy_device *phydev); int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); +int __phy_resume(struct phy_device *phydev); int phy_loopback(struct phy_device *phydev, bool enable); struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, phy_interface_t interface); -- cgit v1.1 From ffc2b6ee417435605ee8bb1eb4c8f02e9ff4b4a5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 27 Feb 2018 19:19:39 +0800 Subject: ip_gre: fix IFLA_MTU ignored on NEWLINK It's safe to remove the setting of dev's needed_headroom and mtu in __gre_tunnel_init, as discussed in [1], ip_tunnel_newlink can do it properly. Now Eric noticed that it could cover the mtu value set in do_setlink when creating a ip_gre dev. It makes IFLA_MTU param not take effect. So this patch is to remove them to make IFLA_MTU work, as in other ipv4 tunnels. [1]: https://patchwork.ozlabs.org/patch/823504/ Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Reported-by: Eric Garver Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 45d97e9..0901de4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -970,9 +970,6 @@ static void __gre_tunnel_init(struct net_device *dev) t_hlen = tunnel->hlen + sizeof(struct iphdr); - dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; - dev->mtu = ETH_DATA_LEN - t_hlen - 4; - dev->features |= GRE_FEATURES; dev->hw_features |= GRE_FEATURES; @@ -1290,8 +1287,6 @@ static int erspan_tunnel_init(struct net_device *dev) erspan_hdr_len(tunnel->erspan_ver); t_hlen = tunnel->hlen + sizeof(struct iphdr); - dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; - dev->mtu = ETH_DATA_LEN - t_hlen - 4; dev->features |= GRE_FEATURES; dev->hw_features |= GRE_FEATURES; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; -- cgit v1.1 From a6aa80446234ec0ad38eecdb8efc59e91daae565 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 27 Feb 2018 19:19:40 +0800 Subject: ip6_tunnel: fix IFLA_MTU ignored on NEWLINK Commit 128bb975dc3c ("ip6_gre: init dev->mtu and dev->hard_header_len correctly") fixed IFLA_MTU ignored on NEWLINK for ip6_gre. The same mtu fix is also needed for ip6_tunnel. Note that dev->hard_header_len setting for ip6_tunnel works fine, no need to fix it. Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 4b15fe9..6e0f21e 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1982,14 +1982,14 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, { struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - struct ip6_tnl *nt, *t; struct ip_tunnel_encap ipencap; + struct ip6_tnl *nt, *t; + int err; nt = netdev_priv(dev); if (ip6_tnl_netlink_encap_parms(data, &ipencap)) { - int err = ip6_tnl_encap_setup(nt, &ipencap); - + err = ip6_tnl_encap_setup(nt, &ipencap); if (err < 0) return err; } @@ -2005,7 +2005,11 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, return -EEXIST; } - return ip6_tnl_create2(dev); + err = ip6_tnl_create2(dev); + if (!err && tb[IFLA_MTU]) + ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + + return err; } static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], -- cgit v1.1 From 2b3957c34b6d7f03544b12ebbf875eee430745db Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 27 Feb 2018 19:19:41 +0800 Subject: sit: fix IFLA_MTU ignored on NEWLINK Commit 128bb975dc3c ("ip6_gre: init dev->mtu and dev->hard_header_len correctly") fixed IFLA_MTU ignored on NEWLINK for ip6_gre. The same mtu fix is also needed for sit. Note that dev->hard_header_len setting for sit works fine, no need to fix it. sit is actually ipv4 tunnel, it can't call ip6_tnl_change_mtu to set mtu. Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/sit.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3a1775a..0195598 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1578,6 +1578,13 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, if (err < 0) return err; + if (tb[IFLA_MTU]) { + u32 mtu = nla_get_u32(tb[IFLA_MTU]); + + if (mtu >= IPV6_MIN_MTU && mtu <= 0xFFF8 - dev->hard_header_len) + dev->mtu = mtu; + } + #ifdef CONFIG_IPV6_SIT_6RD if (ipip6_netlink_6rd_parms(data, &ip6rd)) err = ipip6_tunnel_update_6rd(nt, &ip6rd); -- cgit v1.1 From 55ea874306ea28e6be9e07b7e89bbb9fb674e8eb Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 27 Feb 2018 14:58:16 +0300 Subject: sh_eth: uninline TSU register accessors We have uninlined the sh_eth_{read|write}() functions introduced in the commit 4a55530f38e ("net: sh_eth: modify the definitions of register"). Now remove *inline* from sh_eth_tsu_{read|write}() as well and move these functions from the header to the driver itself. This saves 684 more bytes of object code (ARM gcc 4.8.5)... Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 11 +++++++++++ drivers/net/ethernet/renesas/sh_eth.h | 11 ----------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 92dcf87..14c839b 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -439,6 +439,17 @@ static void sh_eth_modify(struct net_device *ndev, int enum_index, u32 clear, enum_index); } +static void sh_eth_tsu_write(struct sh_eth_private *mdp, u32 data, + int enum_index) +{ + iowrite32(data, mdp->tsu_addr + mdp->reg_offset[enum_index]); +} + +static u32 sh_eth_tsu_read(struct sh_eth_private *mdp, int enum_index) +{ + return ioread32(mdp->tsu_addr + mdp->reg_offset[enum_index]); +} + static bool sh_eth_is_gether(struct sh_eth_private *mdp) { return mdp->reg_offset == sh_eth_offset_gigabit; diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index a6753cc..e5fe701 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -567,15 +567,4 @@ static inline void *sh_eth_tsu_get_offset(struct sh_eth_private *mdp, return mdp->tsu_addr + mdp->reg_offset[enum_index]; } -static inline void sh_eth_tsu_write(struct sh_eth_private *mdp, u32 data, - int enum_index) -{ - iowrite32(data, mdp->tsu_addr + mdp->reg_offset[enum_index]); -} - -static inline u32 sh_eth_tsu_read(struct sh_eth_private *mdp, int enum_index) -{ - return ioread32(mdp->tsu_addr + mdp->reg_offset[enum_index]); -} - #endif /* #ifndef __SH_ETH_H__ */ -- cgit v1.1 From c113187d38ff85dc302a1bb55864b203ebb2ba10 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Tue, 27 Feb 2018 14:18:39 +0200 Subject: tls: Use correct sk->sk_prot for IPV6 The tls ulp overrides sk->prot with a new tls specific proto structs. The tls specific structs were previously based on the ipv4 specific tcp_prot sturct. As a result, attaching the tls ulp to an ipv6 tcp socket replaced some ipv6 callback with the ipv4 equivalents. This patch adds ipv6 tls proto structs and uses them when attached to ipv6 sockets. Fixes: 3c4d7559159b ('tls: kernel TLS support') Signed-off-by: Boris Pismenny Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller --- net/tls/tls_main.c | 52 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index e9b4b53..d824d54 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -46,16 +46,26 @@ MODULE_DESCRIPTION("Transport Layer Security Support"); MODULE_LICENSE("Dual BSD/GPL"); enum { + TLSV4, + TLSV6, + TLS_NUM_PROTS, +}; + +enum { TLS_BASE_TX, TLS_SW_TX, TLS_NUM_CONFIG, }; -static struct proto tls_prots[TLS_NUM_CONFIG]; +static struct proto *saved_tcpv6_prot; +static DEFINE_MUTEX(tcpv6_prot_mutex); +static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG]; static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx) { - sk->sk_prot = &tls_prots[ctx->tx_conf]; + int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; + + sk->sk_prot = &tls_prots[ip_ver][ctx->tx_conf]; } int wait_on_pending_writer(struct sock *sk, long *timeo) @@ -453,8 +463,21 @@ static int tls_setsockopt(struct sock *sk, int level, int optname, return do_tls_setsockopt(sk, optname, optval, optlen); } +static void build_protos(struct proto *prot, struct proto *base) +{ + prot[TLS_BASE_TX] = *base; + prot[TLS_BASE_TX].setsockopt = tls_setsockopt; + prot[TLS_BASE_TX].getsockopt = tls_getsockopt; + prot[TLS_BASE_TX].close = tls_sk_proto_close; + + prot[TLS_SW_TX] = prot[TLS_BASE_TX]; + prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg; + prot[TLS_SW_TX].sendpage = tls_sw_sendpage; +} + static int tls_init(struct sock *sk) { + int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; struct inet_connection_sock *icsk = inet_csk(sk); struct tls_context *ctx; int rc = 0; @@ -479,6 +502,17 @@ static int tls_init(struct sock *sk) ctx->getsockopt = sk->sk_prot->getsockopt; ctx->sk_proto_close = sk->sk_prot->close; + /* Build IPv6 TLS whenever the address of tcpv6_prot changes */ + if (ip_ver == TLSV6 && + unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) { + mutex_lock(&tcpv6_prot_mutex); + if (likely(sk->sk_prot != saved_tcpv6_prot)) { + build_protos(tls_prots[TLSV6], sk->sk_prot); + smp_store_release(&saved_tcpv6_prot, sk->sk_prot); + } + mutex_unlock(&tcpv6_prot_mutex); + } + ctx->tx_conf = TLS_BASE_TX; update_sk_prot(sk, ctx); out: @@ -493,21 +527,9 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .init = tls_init, }; -static void build_protos(struct proto *prot, struct proto *base) -{ - prot[TLS_BASE_TX] = *base; - prot[TLS_BASE_TX].setsockopt = tls_setsockopt; - prot[TLS_BASE_TX].getsockopt = tls_getsockopt; - prot[TLS_BASE_TX].close = tls_sk_proto_close; - - prot[TLS_SW_TX] = prot[TLS_BASE_TX]; - prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg; - prot[TLS_SW_TX].sendpage = tls_sw_sendpage; -} - static int __init tls_register(void) { - build_protos(tls_prots, &tcp_prot); + build_protos(tls_prots[TLSV4], &tcp_prot); tcp_register_ulp(&tcp_tls_ulp_ops); -- cgit v1.1 From 8ca88b5486cd87ac4fbda94f0a8ac5f36eb71c4b Mon Sep 17 00:00:00 2001 From: Bassem Boubaker Date: Tue, 27 Feb 2018 14:04:44 +0100 Subject: cdc_ether: flag the Cinterion PLS8 modem by gemalto as WWAN The Cinterion PL8 is an LTE modem with 2 possible WWAN interfaces. The modem is controlled via AT commands through the exposed TTYs. AT^SWWAN write command can be used to activate or deactivate a WWAN connection for a PDP context defined with AT+CGDCONT. UE supports two WWAN adapter. Both WWAN adapters can be activated a the same time Signed-off-by: Bassem Boubaker Acked-by: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 05dca3e..fff4b13 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -896,6 +896,12 @@ static const struct usb_device_id products[] = { USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { + /* Cinterion PLS8 modem by GEMALTO */ + USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0061, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, +}, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long) &cdc_info, -- cgit v1.1 From 0979962f5490abe75b3e2befb07a564fa0cf631b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 12 Feb 2018 11:14:55 -0600 Subject: nbd: fix return value in error handling path It seems that the proper value to return in this particular case is the one contained into variable new_index instead of ret. Addresses-Coverity-ID: 1465148 ("Copy-paste error") Fixes: e46c7287b1c2 ("nbd: add a basic netlink interface") Reviewed-by: Omar Sandoval Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 5f2a424..86258b0 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1591,7 +1591,7 @@ again: if (new_index < 0) { mutex_unlock(&nbd_index_mutex); printk(KERN_ERR "nbd: failed to add new device\n"); - return ret; + return new_index; } nbd = idr_find(&nbd_index_idr, new_index); } -- cgit v1.1 From 0a5aff64f20d92c5a6e9aeed7b5950b0b817bcd9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 26 Feb 2018 17:00:35 -0800 Subject: ARM: dts: NSP: Fix amount of RAM on BCM958625HR Jon attempted to fix the amount of RAM on the BCM958625HR in commit c53beb47f621 ("ARM: dts: NSP: Correct RAM amount for BCM958625HR board") but it seems like we tripped over some poorly documented schematics. The top-level page of the schematics says the board has 2GB, but when you end-up scrolling to page 6, you see two chips of 4GBit (512MB) but what the bootloader really initializes only 512MB, any attempt to use more than that results in data aborts. Fix this again back to 512MB. Fixes: c53beb47f621 ("ARM: dts: NSP: Correct RAM amount for BCM958625HR board") Acked-by: Jon Mason Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm958625hr.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts index 6a44b80..f0e2008 100644 --- a/arch/arm/boot/dts/bcm958625hr.dts +++ b/arch/arm/boot/dts/bcm958625hr.dts @@ -49,7 +49,7 @@ memory { device_type = "memory"; - reg = <0x60000000 0x80000000>; + reg = <0x60000000 0x20000000>; }; gpio-restart { -- cgit v1.1 From 808b7de86a0c19582a7efce4c80d6b4e1da7f370 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 24 Feb 2018 15:15:21 +0100 Subject: ARM: dts: bcm283x: Fix unit address of local_intc This patch fixes the following DTC warning (requires W=1): Node /soc/local_intc simple-bus unit address format error, expected "40000000" Signed-off-by: Stefan Wahren Reviewed-by: Eric Anholt Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm2836.dtsi | 2 +- arch/arm/boot/dts/bcm2837.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm2836.dtsi b/arch/arm/boot/dts/bcm2836.dtsi index 1dfd764..e47f2e9 100644 --- a/arch/arm/boot/dts/bcm2836.dtsi +++ b/arch/arm/boot/dts/bcm2836.dtsi @@ -9,7 +9,7 @@ <0x40000000 0x40000000 0x00001000>; dma-ranges = <0xc0000000 0x00000000 0x3f000000>; - local_intc: local_intc { + local_intc: local_intc@40000000 { compatible = "brcm,bcm2836-l1-intc"; reg = <0x40000000 0x100>; interrupt-controller; diff --git a/arch/arm/boot/dts/bcm2837.dtsi b/arch/arm/boot/dts/bcm2837.dtsi index efa7d33..7704bb0 100644 --- a/arch/arm/boot/dts/bcm2837.dtsi +++ b/arch/arm/boot/dts/bcm2837.dtsi @@ -8,7 +8,7 @@ <0x40000000 0x40000000 0x00001000>; dma-ranges = <0xc0000000 0x00000000 0x3f000000>; - local_intc: local_intc { + local_intc: local_intc@40000000 { compatible = "brcm,bcm2836-l1-intc"; reg = <0x40000000 0x100>; interrupt-controller; -- cgit v1.1 From 2944866ac5b37e24ce1e646d2aaf472148e4a43e Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 24 Feb 2018 15:15:22 +0100 Subject: ARM: dts: bcm283x: Move arm-pmu out of soc node The ARM PMU doesn't have a reg address, so fix the following DTC warning (requires W=1): Node /soc/arm-pmu missing or empty reg/ranges property Signed-off-by: Stefan Wahren Reviewed-by: Eric Anholt Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm2835.dtsi | 6 +++--- arch/arm/boot/dts/bcm2836.dtsi | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/bcm2835.dtsi b/arch/arm/boot/dts/bcm2835.dtsi index 0e3d2a5..a5c3824 100644 --- a/arch/arm/boot/dts/bcm2835.dtsi +++ b/arch/arm/boot/dts/bcm2835.dtsi @@ -18,10 +18,10 @@ soc { ranges = <0x7e000000 0x20000000 0x02000000>; dma-ranges = <0x40000000 0x00000000 0x20000000>; + }; - arm-pmu { - compatible = "arm,arm1176-pmu"; - }; + arm-pmu { + compatible = "arm,arm1176-pmu"; }; }; diff --git a/arch/arm/boot/dts/bcm2836.dtsi b/arch/arm/boot/dts/bcm2836.dtsi index e47f2e9..c933e84 100644 --- a/arch/arm/boot/dts/bcm2836.dtsi +++ b/arch/arm/boot/dts/bcm2836.dtsi @@ -16,12 +16,12 @@ #interrupt-cells = <2>; interrupt-parent = <&local_intc>; }; + }; - arm-pmu { - compatible = "arm,cortex-a7-pmu"; - interrupt-parent = <&local_intc>; - interrupts = <9 IRQ_TYPE_LEVEL_HIGH>; - }; + arm-pmu { + compatible = "arm,cortex-a7-pmu"; + interrupt-parent = <&local_intc>; + interrupts = <9 IRQ_TYPE_LEVEL_HIGH>; }; timer { -- cgit v1.1 From 5a23699a39abc5328921a81b89383d088f6ba9cc Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 27 Feb 2018 17:01:18 +0000 Subject: ALSA: control: Fix memory corruption risk in snd_ctl_elem_read The patch "ALSA: control: code refactoring for ELEM_READ/ELEM_WRITE operations" introduced a potential for kernel memory corruption due to an incorrect if statement allowing non-readable controls to fall through and call the get function. For TLV controls a driver can omit SNDRV_CTL_ELEM_ACCESS_READ to ensure that only the TLV get function can be called. Instead the normal get() can be invoked unexpectedly and as the driver expects that this will only be called for controls <= 512 bytes, potentially try to copy >512 bytes into the 512 byte return array, so corrupting kernel memory. The problem is an attempt to refactor the snd_ctl_elem_read function to invert the logic so that it conditionally aborted if the control is unreadable instead of conditionally executing. But the if statement wasn't inverted correctly. The correct inversion of if (a && !b) is if (!a || b) Fixes: becf9e5d553c2 ("ALSA: control: code refactoring for ELEM_READ/ELEM_WRITE operations") Signed-off-by: Richard Fitzgerald Cc: Signed-off-by: Takashi Iwai --- sound/core/control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/control.c b/sound/core/control.c index 0b3026d..8a77620 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -889,7 +889,7 @@ static int snd_ctl_elem_read(struct snd_card *card, index_offset = snd_ctl_get_ioff(kctl, &control->id); vd = &kctl->vd[index_offset]; - if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) && kctl->get == NULL) + if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL) return -EPERM; snd_ctl_build_ioff(&control->id, kctl, index_offset); -- cgit v1.1 From 350144069abf351c743d766b2fba9cb9b7cd32a1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 28 Feb 2018 08:36:06 +0100 Subject: ALSA: x86: Fix missing spinlock and mutex initializations The commit change for supporting the multiple ports moved involved some code shuffling, and there the initializations of spinlock and mutex in snd_intelhad object were dropped mistakenly. This patch adds the missing initializations again for each port. Fixes: b4eb0d522fcb ("ALSA: x86: Split snd_intelhad into card and PCM specific structures") Cc: Signed-off-by: Takashi Iwai --- sound/x86/intel_hdmi_audio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index 96115c4..cec62f9 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1835,6 +1835,8 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) ctx->port = single_port ? -1 : port; ctx->pipe = -1; + spin_lock_init(&ctx->had_spinlock); + mutex_init(&ctx->mutex); INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq); ret = snd_pcm_new(card, INTEL_HAD, port, MAX_PB_STREAMS, -- cgit v1.1 From c77a6edb6d4d35204673cad7389c317bfb17492e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 28 Feb 2018 08:46:00 +0100 Subject: ALSA: x86: Fix potential crash at error path When LPE audio driver gets some error at probing, it may lead to a crash because of canceling the pending work in hdmi_lpe_audio_free(), since some of ports might be still not initialized. For assuring the proper free of each port, initialize all ports at the beginning of the probe. Fixes: b4eb0d522fcb ("ALSA: x86: Split snd_intelhad into card and PCM specific structures") Signed-off-by: Takashi Iwai --- sound/x86/intel_hdmi_audio.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index cec62f9..4ed9d0c 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1751,6 +1751,7 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) { struct snd_card *card; struct snd_intelhad_card *card_ctx; + struct snd_intelhad *ctx; struct snd_pcm *pcm; struct intel_hdmi_lpe_audio_pdata *pdata; int irq; @@ -1795,6 +1796,21 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) platform_set_drvdata(pdev, card_ctx); + card_ctx->num_pipes = pdata->num_pipes; + card_ctx->num_ports = single_port ? 1 : pdata->num_ports; + + for_each_port(card_ctx, port) { + ctx = &card_ctx->pcm_ctx[port]; + ctx->card_ctx = card_ctx; + ctx->dev = card_ctx->dev; + ctx->port = single_port ? -1 : port; + ctx->pipe = -1; + + spin_lock_init(&ctx->had_spinlock); + mutex_init(&ctx->mutex); + INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq); + } + dev_dbg(&pdev->dev, "%s: mmio_start = 0x%x, mmio_end = 0x%x\n", __func__, (unsigned int)res_mmio->start, (unsigned int)res_mmio->end); @@ -1827,18 +1843,9 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) card_ctx->num_ports = single_port ? 1 : pdata->num_ports; for_each_port(card_ctx, port) { - struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; int i; - ctx->card_ctx = card_ctx; - ctx->dev = card_ctx->dev; - ctx->port = single_port ? -1 : port; - ctx->pipe = -1; - - spin_lock_init(&ctx->had_spinlock); - mutex_init(&ctx->mutex); - INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq); - + ctx = &card_ctx->pcm_ctx[port]; ret = snd_pcm_new(card, INTEL_HAD, port, MAX_PB_STREAMS, MAX_CAP_STREAMS, &pcm); if (ret) -- cgit v1.1 From 084a804e01205bcd74cd0849bc72cb5c88f8e648 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 27 Feb 2018 12:41:41 +0200 Subject: usb: dwc3: Fix lock-up on ID change during system suspend/resume To reproduce the lock up do the following - connect otg host adapter and a USB device to the dual-role port so that it is in host mode. - suspend to mem. - disconnect otg adapter. - resume the system. If we call dwc3_host_exit() before tasks are thawed xhci_plat_remove() seems to lock up at the second usb_remove_hcd() call. To work around this we queue the _dwc3_set_mode() work on the system_freezable_wq. Fixes: 41ce1456e1db ("usb: dwc3: core: make dwc3_set_mode() work properly") Cc: # v4.12+ Suggested-by: Manu Gautam Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index f1d838a..e94bf91 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -175,7 +175,7 @@ void dwc3_set_mode(struct dwc3 *dwc, u32 mode) dwc->desired_dr_role = mode; spin_unlock_irqrestore(&dwc->lock, flags); - queue_work(system_power_efficient_wq, &dwc->drd_work); + queue_work(system_freezable_wq, &dwc->drd_work); } u32 dwc3_core_fifo_space(struct dwc3_ep *dep, u8 type) -- cgit v1.1 From 377999caf72233af4abebb511359647f312c4e6e Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Tue, 16 Jan 2018 17:06:15 +0100 Subject: dt-bindings: at24: sort manufacturers alphabetically Makes them easier to find. Signed-off-by: Peter Rosin Acked-by: Rob Herring Signed-off-by: Bartosz Golaszewski --- Documentation/devicetree/bindings/eeprom/at24.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/eeprom/at24.txt b/Documentation/devicetree/bindings/eeprom/at24.txt index 1812c84..abfae1b 100644 --- a/Documentation/devicetree/bindings/eeprom/at24.txt +++ b/Documentation/devicetree/bindings/eeprom/at24.txt @@ -38,9 +38,9 @@ Required properties: "catalyst", "microchip", + "nxp", "ramtron", "renesas", - "nxp", "st", Some vendors use different model names for chips which are just -- cgit v1.1 From 9bd82b1a4418d9b7db000bf557ed608f2872b7c9 Mon Sep 17 00:00:00 2001 From: Baegjae Sung Date: Wed, 28 Feb 2018 16:06:04 +0900 Subject: nvme-multipath: fix sysfs dangerously created links If multipathing is enabled, each NVMe subsystem creates a head namespace (e.g., nvme0n1) and multiple private namespaces (e.g., nvme0c0n1 and nvme0c1n1) in sysfs. When creating links for private namespaces, links of head namespace are used, so the namespace creation order must be followed (e.g., nvme0n1 -> nvme0c1n1). If the order is not followed, links of sysfs will be incomplete or kernel panic will occur. The kernel panic was: kernel BUG at fs/sysfs/symlink.c:27! Call Trace: nvme_mpath_add_disk_links+0x5d/0x80 [nvme_core] nvme_validate_ns+0x5c2/0x850 [nvme_core] nvme_scan_work+0x1af/0x2d0 [nvme_core] Correct order Context A Context B nvme0n1 nvme0c0n1 nvme0c1n1 Incorrect order Context A Context B nvme0c1n1 nvme0n1 nvme0c0n1 The nvme_mpath_add_disk (for creating head namespace) is called just before the nvme_mpath_add_disk_links (for creating private namespaces). In nvme_mpath_add_disk, the first context acquires the lock of subsystem and creates a head namespace, and other contexts do nothing by checking GENHD_FL_UP of a head namespace after waiting to acquire the lock. We verified the code with or without multipathing using three vendors of dual-port NVMe SSDs. Signed-off-by: Baegjae Sung Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 12 +++--------- drivers/nvme/host/multipath.c | 15 ++++++++++----- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f431c32..6088ea1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2835,7 +2835,7 @@ out: } static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, - struct nvme_id_ns *id, bool *new) + struct nvme_id_ns *id) { struct nvme_ctrl *ctrl = ns->ctrl; bool is_shared = id->nmic & (1 << 0); @@ -2851,8 +2851,6 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, ret = PTR_ERR(head); goto out_unlock; } - - *new = true; } else { struct nvme_ns_ids ids; @@ -2864,8 +2862,6 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, ret = -EINVAL; goto out_unlock; } - - *new = false; } list_add_tail(&ns->siblings, &head->list); @@ -2936,7 +2932,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) struct nvme_id_ns *id; char disk_name[DISK_NAME_LEN]; int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT; - bool new = true; ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); if (!ns) @@ -2962,7 +2957,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (id->ncap == 0) goto out_free_id; - if (nvme_init_ns_head(ns, nsid, id, &new)) + if (nvme_init_ns_head(ns, nsid, id)) goto out_free_id; nvme_setup_streams_ns(ctrl, ns); @@ -3028,8 +3023,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) pr_warn("%s: failed to register lightnvm sysfs group for identification\n", ns->disk->disk_name); - if (new) - nvme_mpath_add_disk(ns->head); + nvme_mpath_add_disk(ns->head); nvme_mpath_add_disk_links(ns); return; out_unlink_ns: diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 3b211d9..b7e5c6d 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -198,11 +198,16 @@ void nvme_mpath_add_disk(struct nvme_ns_head *head) { if (!head->disk) return; - device_add_disk(&head->subsys->dev, head->disk); - if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, - &nvme_ns_id_attr_group)) - pr_warn("%s: failed to create sysfs group for identification\n", - head->disk->disk_name); + + mutex_lock(&head->subsys->lock); + if (!(head->disk->flags & GENHD_FL_UP)) { + device_add_disk(&head->subsys->dev, head->disk); + if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, + &nvme_ns_id_attr_group)) + pr_warn("%s: failed to create sysfs group for identification\n", + head->disk->disk_name); + } + mutex_unlock(&head->subsys->lock); } void nvme_mpath_add_disk_links(struct nvme_ns *ns) -- cgit v1.1 From d7789f5bcdb298c4a302db471b1b20f74a20de95 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 28 Feb 2018 10:31:10 +0000 Subject: ASoC: wm_adsp: For TLV controls only register TLV get/set Normal 512-byte get/set of a TLV isn't supported but we were registering the normal get/set anyway and relying on omitting the SNDRV_CTL_ELEM_ACCESS_[READ|WRITE] flags to prevent them being called. Trouble is if this gets broken in the core ALSA code - as it has been since at least 4.14 - the standard get/set can be called unexpectedly and corrupt memory. There's no point providing functions that won't be called and it's a trivial change. The benefit is that if the ALSA core gets broken again we get a big fat immediate NULL dereference instead of a memory corruption timebomb. Signed-off-by: Richard Fitzgerald Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/wm_adsp.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 66e32f5..989d093 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1204,12 +1204,14 @@ static int wmfw_add_ctl(struct wm_adsp *dsp, struct wm_coeff_ctl *ctl) kcontrol->put = wm_coeff_put_acked; break; default: - kcontrol->get = wm_coeff_get; - kcontrol->put = wm_coeff_put; - - ctl->bytes_ext.max = ctl->len; - ctl->bytes_ext.get = wm_coeff_tlv_get; - ctl->bytes_ext.put = wm_coeff_tlv_put; + if (kcontrol->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) { + ctl->bytes_ext.max = ctl->len; + ctl->bytes_ext.get = wm_coeff_tlv_get; + ctl->bytes_ext.put = wm_coeff_tlv_put; + } else { + kcontrol->get = wm_coeff_get; + kcontrol->put = wm_coeff_put; + } break; } -- cgit v1.1 From 64c3f648c25d108f346fdc96c15180c6b7d250e9 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 23 Feb 2018 12:55:59 -0800 Subject: powerpc/boot: Fix random libfdt related build errors Once in a while I see build errors similar to the following when building images from a clean tree. Building powerpc:virtex-ml507:44x/virtex5_defconfig ... failed ------------ Error log: arch/powerpc/boot/treeboot-akebono.c:37:20: fatal error: libfdt.h: No such file or directory Building powerpc:bamboo:smpdev:44x/bamboo_defconfig ... failed ------------ Error log: arch/powerpc/boot/treeboot-akebono.c:37:20: fatal error: libfdt.h: No such file or directory arch/powerpc/boot/treeboot-currituck.c:35:20: fatal error: libfdt.h: No such file or directory Rebuilds will succeed. Turns out that several source files in arch/powerpc/boot/ include libfdt.h, but Makefile dependencies are incomplete. Let's fix that. Signed-off-by: Guenter Roeck Signed-off-by: Michael Ellerman --- arch/powerpc/boot/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index ef6549e..26d5d2a 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -101,7 +101,8 @@ $(addprefix $(obj)/,$(zlib-y)): \ libfdt := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c libfdtheader := fdt.h libfdt.h libfdt_internal.h -$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \ +$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o \ + treeboot-akebono.o treeboot-currituck.o treeboot-iss4xx.o): \ $(addprefix $(obj)/,$(libfdtheader)) src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ -- cgit v1.1 From b7abbd5a3533a31a1e7d4696ea275df543440c51 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 28 Feb 2018 15:15:56 +1100 Subject: selftests/powerpc: Fix missing clean of pmu/lib.o The tm-resched-dscr test links against pmu/lib.o, but we don't have a rule to clean pmu/lib.o. This can lead to a build break if you build for big endian and then little, or vice versa. Fix it by making tm-resched-dscr depend on pmu/lib.c, causing the code to be built directly in, meaning no .o is generated. Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/tm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index a234539..5c72ff9 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -16,7 +16,7 @@ $(OUTPUT)/tm-syscall: tm-syscall-asm.S $(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include $(OUTPUT)/tm-tmspr: CFLAGS += -pthread $(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64 -$(OUTPUT)/tm-resched-dscr: ../pmu/lib.o +$(OUTPUT)/tm-resched-dscr: ../pmu/lib.c $(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx $(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64 -- cgit v1.1 From 28b0f8a6962a24ed21737578f3b1b07424635c9e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Feb 2018 07:38:08 -0800 Subject: tty: make n_tty_read() always abort if hangup is in progress A tty is hung up by __tty_hangup() setting file->f_op to hung_up_tty_fops, which is skipped on ttys whose write operation isn't tty_write(). This means that, for example, /dev/console whose write op is redirected_tty_write() is never actually marked hung up. Because n_tty_read() uses the hung up status to decide whether to abort the waiting readers, the lack of hung-up marking can lead to the following scenario. 1. A session contains two processes. The leader and its child. The child ignores SIGHUP. 2. The leader exits and starts disassociating from the controlling terminal (/dev/console). 3. __tty_hangup() skips setting f_op to hung_up_tty_fops. 4. SIGHUP is delivered and ignored. 5. tty_ldisc_hangup() is invoked. It wakes up the waits which should clear the read lockers of tty->ldisc_sem. 6. The reader wakes up but because tty_hung_up_p() is false, it doesn't abort and goes back to sleep while read-holding tty->ldisc_sem. 7. The leader progresses to tty_ldisc_lock() in tty_ldisc_hangup() and is now stuck in D sleep indefinitely waiting for tty->ldisc_sem. The following is Alan's explanation on why some ttys aren't hung up. http://lkml.kernel.org/r/20171101170908.6ad08580@alans-desktop 1. It broke the serial consoles because they would hang up and close down the hardware. With tty_port that *should* be fixable properly for any cases remaining. 2. The console layer was (and still is) completely broken and doens't refcount properly. So if you turn on console hangups it breaks (as indeed does freeing consoles and half a dozen other things). As neither can be fixed quickly, this patch works around the problem by introducing a new flag, TTY_HUPPING, which is used solely to tell n_tty_read() that hang-up is in progress for the console and the readers should be aborted regardless of the hung-up status of the device. The following is a sample hung task warning caused by this issue. INFO: task agetty:2662 blocked for more than 120 seconds. Not tainted 4.11.3-dbg-tty-lockup-02478-gfd6c7ee-dirty #28 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. 0 2662 1 0x00000086 Call Trace: __schedule+0x267/0x890 schedule+0x36/0x80 schedule_timeout+0x23c/0x2e0 ldsem_down_write+0xce/0x1f6 tty_ldisc_lock+0x16/0x30 tty_ldisc_hangup+0xb3/0x1b0 __tty_hangup+0x300/0x410 disassociate_ctty+0x6c/0x290 do_exit+0x7ef/0xb00 do_group_exit+0x3f/0xa0 get_signal+0x1b3/0x5d0 do_signal+0x28/0x660 exit_to_usermode_loop+0x46/0x86 do_syscall_64+0x9c/0xb0 entry_SYSCALL64_slow_path+0x25/0x25 The following is the repro. Run "$PROG /dev/console". The parent process hangs in D state. #include #include #include #include #include #include #include #include #include #include #include #include int main(int argc, char **argv) { struct sigaction sact = { .sa_handler = SIG_IGN }; struct timespec ts1s = { .tv_sec = 1 }; pid_t pid; int fd; if (argc < 2) { fprintf(stderr, "test-hung-tty /dev/$TTY\n"); return 1; } /* fork a child to ensure that it isn't already the session leader */ pid = fork(); if (pid < 0) { perror("fork"); return 1; } if (pid > 0) { /* top parent, wait for everyone */ while (waitpid(-1, NULL, 0) >= 0) ; if (errno != ECHILD) perror("waitpid"); return 0; } /* new session, start a new session and set the controlling tty */ if (setsid() < 0) { perror("setsid"); return 1; } fd = open(argv[1], O_RDWR); if (fd < 0) { perror("open"); return 1; } if (ioctl(fd, TIOCSCTTY, 1) < 0) { perror("ioctl"); return 1; } /* fork a child, sleep a bit and exit */ pid = fork(); if (pid < 0) { perror("fork"); return 1; } if (pid > 0) { nanosleep(&ts1s, NULL); printf("Session leader exiting\n"); exit(0); } /* * The child ignores SIGHUP and keeps reading from the controlling * tty. Because SIGHUP is ignored, the child doesn't get killed on * parent exit and the bug in n_tty makes the read(2) block the * parent's control terminal hangup attempt. The parent ends up in * D sleep until the child is explicitly killed. */ sigaction(SIGHUP, &sact, NULL); printf("Child reading tty\n"); while (1) { char buf[1024]; if (read(fd, buf, sizeof(buf)) < 0) { perror("read"); return 1; } } return 0; } Signed-off-by: Tejun Heo Cc: Alan Cox Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 6 ++++++ drivers/tty/tty_io.c | 9 +++++++++ include/linux/tty.h | 1 + 3 files changed, 16 insertions(+) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 5c0e59e..cbe98bc 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -2180,6 +2180,12 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, } if (tty_hung_up_p(file)) break; + /* + * Abort readers for ttys which never actually + * get hung up. See __tty_hangup(). + */ + if (test_bit(TTY_HUPPING, &tty->flags)) + break; if (!timeout) break; if (file->f_flags & O_NONBLOCK) { diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index eb9133b..63114ea 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -586,6 +586,14 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session) return; } + /* + * Some console devices aren't actually hung up for technical and + * historical reasons, which can lead to indefinite interruptible + * sleep in n_tty_read(). The following explicitly tells + * n_tty_read() to abort readers. + */ + set_bit(TTY_HUPPING, &tty->flags); + /* inuse_filps is protected by the single tty lock, this really needs to change if we want to flush the workqueue with the lock held */ @@ -640,6 +648,7 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session) * from the ldisc side, which is now guaranteed. */ set_bit(TTY_HUPPED, &tty->flags); + clear_bit(TTY_HUPPING, &tty->flags); tty_unlock(tty); if (f) diff --git a/include/linux/tty.h b/include/linux/tty.h index 0a6c71e..47f8af2 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -364,6 +364,7 @@ struct tty_file_private { #define TTY_PTY_LOCK 16 /* pty private */ #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ #define TTY_HUPPED 18 /* Post driver->hangup() */ +#define TTY_HUPPING 19 /* Hangup in progress */ #define TTY_LDISC_HALTED 22 /* Line discipline is halted */ /* Values for tty->flow_change */ -- cgit v1.1 From fd63a8903a2c40425a9811c3371dd4d0f42c0ad3 Mon Sep 17 00:00:00 2001 From: Jonas Danielsson Date: Mon, 29 Jan 2018 12:39:15 +0100 Subject: tty/serial: atmel: add new version check for usart On our at91sam9260 based board the usart0 and usart1 ports report their versions (ATMEL_US_VERSION) as 0x10302. This version is not included in the current checks in the driver. Signed-off-by: Jonas Danielsson Acked-by: Richard Genoud Acked-by: Nicolas Ferre Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index df46a9e..e287fe8 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -1734,6 +1734,7 @@ static void atmel_get_ip_name(struct uart_port *port) switch (version) { case 0x302: case 0x10213: + case 0x10302: dev_dbg(port->dev, "This version is usart\n"); atmel_port->has_frac_baudrate = true; atmel_port->has_hw_timer = true; -- cgit v1.1 From e7f3e99cb1a667d04d60d02957fbed58b50d4e5a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 2 Feb 2018 20:39:13 +0200 Subject: serial: 8250_pci: Don't fail on multiport card class Do not fail on multiport cards in serial_pci_is_class_communication(). It restores behaviour for SUNIX multiport cards, that enumerated by class and have a custom board data. Moreover it allows users to reenumerate port-by-port from user space. Fixes: 7d8905d06405 ("serial: 8250_pci: Enable device after we check black list") Reported-by: Nikola Ciprich Signed-off-by: Andy Shevchenko Tested-by: Nikola Ciprich Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 54adf8d..d580625a 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -3387,11 +3387,9 @@ static int serial_pci_is_class_communication(struct pci_dev *dev) /* * If it is not a communications device or the programming * interface is greater than 6, give up. - * - * (Should we try to make guesses for multiport serial devices - * later?) */ if ((((dev->class >> 8) != PCI_CLASS_COMMUNICATION_SERIAL) && + ((dev->class >> 8) != PCI_CLASS_COMMUNICATION_MULTISERIAL) && ((dev->class >> 8) != PCI_CLASS_COMMUNICATION_MODEM)) || (dev->class & 0xff) > 6) return -ENODEV; @@ -3428,6 +3426,12 @@ serial_pci_guess_board(struct pci_dev *dev, struct pciserial_board *board) { int num_iomem, num_port, first_port = -1, i; + /* + * Should we try to make guesses for multiport serial devices later? + */ + if ((dev->class >> 8) == PCI_CLASS_COMMUNICATION_MULTISERIAL) + return -ENODEV; + num_iomem = num_port = 0; for (i = 0; i < PCI_NUM_BAR_RESOURCES; i++) { if (pci_resource_flags(dev, i) & IORESOURCE_IO) { -- cgit v1.1 From 714569064adee3c114a2a6490735b94abe269068 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 3 Feb 2018 12:27:23 +0100 Subject: serial: core: mark port as initialized in autoconfig This is a followup on 44117a1d1732 ("serial: core: mark port as initialized after successful IRQ change"). Nikola has been using autoconfig via setserial and reported a crash similar to what I fixed in the earlier mentioned commit. Here I do the same fixup for the autoconfig. I wasn't sure that this is the right approach. Nikola confirmed that it fixes his crash. Fixes: b3b576461864 ("tty: serial_core: convert uart_open to use tty_port_open") Link: http://lkml.kernel.org/r/20180131072000.GD1853@localhost.localdomain Reported-by: Nikola Ciprich Tested-by: Nikola Ciprich Cc: Signed-off-by: Sebastian Andrzej Siewior Tested-by: Nikola Ciprich Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index c8dde56..35b9201 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1144,6 +1144,8 @@ static int uart_do_autoconfig(struct tty_struct *tty,struct uart_state *state) uport->ops->config_port(uport, flags); ret = uart_startup(tty, state, 1); + if (ret == 0) + tty_port_set_initialized(port, true); if (ret > 0) ret = 0; } -- cgit v1.1 From 1f66dd36bb18437397ea0d7882c52f7e3c476e15 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Tue, 13 Feb 2018 17:09:08 +0800 Subject: earlycon: add reg-offset to physical address before mapping It will get the wrong virtual address because port->mapbase is not added the correct reg-offset yet. We have to update it before earlycon_map() is called Signed-off-by: Greentime Hu Acked-by: Arnd Bergmann Cc: Peter Hurley Cc: stable@vger.kernel.org Fixes: 088da2a17619 ("of: earlycon: Initialize port fields from DT properties") Acked-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/earlycon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c index 870e84f..a242783 100644 --- a/drivers/tty/serial/earlycon.c +++ b/drivers/tty/serial/earlycon.c @@ -245,11 +245,12 @@ int __init of_setup_earlycon(const struct earlycon_id *match, } port->mapbase = addr; port->uartclk = BASE_BAUD * 16; - port->membase = earlycon_map(port->mapbase, SZ_4K); val = of_get_flat_dt_prop(node, "reg-offset", NULL); if (val) port->mapbase += be32_to_cpu(*val); + port->membase = earlycon_map(port->mapbase, SZ_4K); + val = of_get_flat_dt_prop(node, "reg-shift", NULL); if (val) port->regshift = be32_to_cpu(*val); -- cgit v1.1 From 9f2068f35729948bde84d87a40d135015911345d Mon Sep 17 00:00:00 2001 From: Nikola Ciprich Date: Tue, 13 Feb 2018 15:04:46 +0100 Subject: serial: 8250_pci: Add Brainboxes UC-260 4 port serial device Add PCI ids for two variants of Brainboxes UC-260 quad port PCI serial cards. Suggested-by: Andy Shevchenko Signed-off-by: Nikola Ciprich Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index d580625a..a93f77a 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -4703,6 +4703,17 @@ static const struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, /* 135a.0dc0 */ pbn_b2_4_115200 }, /* + * BrainBoxes UC-260 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0D21, + PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00, + pbn_b2_4_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x0E34, + PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00, + pbn_b2_4_115200 }, + /* * Perle PCI-RAS cards */ { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, -- cgit v1.1 From 7842055bfce4bf0170d0f61df8b2add8399697be Mon Sep 17 00:00:00 2001 From: Ulrich Hecht Date: Thu, 15 Feb 2018 13:02:27 +0100 Subject: serial: sh-sci: prevent lockup on full TTY buffers When the TTY buffers fill up to the configured maximum, a system lockup occurs: [ 598.820128] INFO: rcu_preempt detected stalls on CPUs/tasks: [ 598.825796] 0-...!: (1 GPs behind) idle=5a6/2/0 softirq=1974/1974 fqs=1 [ 598.832577] (detected by 3, t=62517 jiffies, g=296, c=295, q=126) [ 598.838755] Task dump for CPU 0: [ 598.841977] swapper/0 R running task 0 0 0 0x00000022 [ 598.849023] Call trace: [ 598.851476] __switch_to+0x98/0xb0 [ 598.854870] (null) This can be prevented by doing a dummy read of the RX data register. This issue affects both HSCIF and SCIF ports. Reported for R-Car H3 ES2.0; reproduced and fixed on H3 ES1.1. Probably affects other R-Car platforms as well. Reported-by: Yoshihiro Shimoda Signed-off-by: Ulrich Hecht Reviewed-by: Geert Uytterhoeven Cc: stable Tested-by: Nguyen Viet Dung Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 7257c07..44adf9d 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -885,6 +885,8 @@ static void sci_receive_chars(struct uart_port *port) /* Tell the rest of the system the news. New characters! */ tty_flip_buffer_push(tport); } else { + /* TTY buffers full; read from RX reg to prevent lockup */ + serial_port_in(port, SCxRDR); serial_port_in(port, SCxSR); /* dummy read */ sci_clear_SCxSR(port, SCxSR_RDxF_CLEAR(port)); } -- cgit v1.1 From 5d7f77ec72d10c421bc33958f06a5583f2d27ed6 Mon Sep 17 00:00:00 2001 From: phil eichinger Date: Mon, 19 Feb 2018 10:24:15 +0100 Subject: serial: imx: fix bogus dev_err MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only one of the two is really required, not both: * have_rtscts or * have_rtsgpio In imx_rs485_config() this is done correctly, so RS485 is working, just the error message is false. Signed-off-by: Phil Eichinger Reviewed-by: Fabio Estevam Fixes: b8f3bff057b0 ("serial: imx: Support common rs485 binding for RTS polarity" Acked-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 1d7ca38..a33c685 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -2093,7 +2093,7 @@ static int serial_imx_probe(struct platform_device *pdev) uart_get_rs485_mode(&pdev->dev, &sport->port.rs485); if (sport->port.rs485.flags & SER_RS485_ENABLED && - (!sport->have_rtscts || !sport->have_rtsgpio)) + (!sport->have_rtscts && !sport->have_rtsgpio)) dev_err(&pdev->dev, "no RTS control, disabling rs485\n"); imx_rs485_config(&sport->port, &sport->port.rs485); -- cgit v1.1 From 5753405e27f8fe4c42c1537d3ddbd9e058e54cdc Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 28 Feb 2018 10:56:10 +0100 Subject: clocksource/drivers/mips-gic-timer: Use correct shift count to extract data __gic_clocksource_init() extracts the GIC_CONFIG_COUNTBITS field from read_gic_config() by right shifting the register value. The shift count is determined by the most significant bit (__fls) of the bitmask which is wrong as it shifts out the complete bitfield. Use the least significant bit (__ffs) instead to shift the bitfield down to bit 0. Fixes: e07127a077c7 ("clocksource: mips-gic-timer: Use new GIC accessor functions") Signed-off-by: Felix Fietkau Signed-off-by: Thomas Gleixner Cc: daniel.lezcano@linaro.org Cc: paul.burton@imgtec.com Link: https://lkml.kernel.org/r/20180228095610.50341-1-nbd@nbd.name --- drivers/clocksource/mips-gic-timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c index 65e18c8..986b679 100644 --- a/drivers/clocksource/mips-gic-timer.c +++ b/drivers/clocksource/mips-gic-timer.c @@ -166,7 +166,7 @@ static int __init __gic_clocksource_init(void) /* Set clocksource mask. */ count_width = read_gic_config() & GIC_CONFIG_COUNTBITS; - count_width >>= __fls(GIC_CONFIG_COUNTBITS); + count_width >>= __ffs(GIC_CONFIG_COUNTBITS); count_width *= 4; count_width += 32; gic_clocksource.mask = CLOCKSOURCE_MASK(count_width); -- cgit v1.1 From a4f538573cd72e7961f4ec5eb13c171f5add58ec Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 21 Feb 2018 11:31:31 -0800 Subject: clocksource/drivers/arc_timer: Update some comments TIMER0 interrupt ACK is different for ARC700 and HS3x cores. This came to light in some internal discussions and it is nice to have this documented rather than digging up the PRM (Programmers Reference Manual). Signed-off-by: Vineet Gupta Signed-off-by: Thomas Gleixner Cc: Daniel Lezcano Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Link: https://lkml.kernel.org/r/1519241491-12570-1-git-send-email-vgupta@synopsys.com --- drivers/clocksource/arc_timer.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/clocksource/arc_timer.c b/drivers/clocksource/arc_timer.c index 4927355..471b428 100644 --- a/drivers/clocksource/arc_timer.c +++ b/drivers/clocksource/arc_timer.c @@ -251,9 +251,14 @@ static irqreturn_t timer_irq_handler(int irq, void *dev_id) int irq_reenable = clockevent_state_periodic(evt); /* - * Any write to CTRL reg ACks the interrupt, we rewrite the - * Count when [N]ot [H]alted bit. - * And re-arm it if perioid by [I]nterrupt [E]nable bit + * 1. ACK the interrupt + * - For ARC700, any write to CTRL reg ACKs it, so just rewrite + * Count when [N]ot [H]alted bit. + * - For HS3x, it is a bit subtle. On taken count-down interrupt, + * IP bit [3] is set, which needs to be cleared for ACK'ing. + * The write below can only update the other two bits, hence + * explicitly clears IP bit + * 2. Re-arm interrupt if periodic by writing to IE bit [0] */ write_aux_reg(ARC_REG_TIMER0_CTRL, irq_reenable | TIMER_CTRL_NH); -- cgit v1.1 From cb097be7036aa325adba33d8c41fe77b980b0e77 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 25 Feb 2018 08:50:56 -0800 Subject: x86/refcounts: Switch to UD2 for exceptions As done in commit 3b3a371cc9bc ("x86/debug: Use UD2 for WARN()"), this switches to UD2 from UD0 to keep disassembly readable. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/20180225165056.GA11719@beast --- arch/x86/include/asm/refcount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h index d651711..4cf11d8 100644 --- a/arch/x86/include/asm/refcount.h +++ b/arch/x86/include/asm/refcount.h @@ -17,7 +17,7 @@ #define _REFCOUNT_EXCEPTION \ ".pushsection .text..refcount\n" \ "111:\tlea %[counter], %%" _ASM_CX "\n" \ - "112:\t" ASM_UD0 "\n" \ + "112:\t" ASM_UD2 "\n" \ ASM_UNREACHABLE \ ".popsection\n" \ "113:\n" \ -- cgit v1.1 From a368d7fd2a3c6babb852fe974018dd97916bcd3b Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 26 Feb 2018 04:11:21 -0700 Subject: x86/entry/64: Add instruction suffix Omitting suffixes from instructions in AT&T mode is bad practice when operand size cannot be determined by the assembler from register operands, and is likely going to be warned about by upstream gas in the future (mine does already). Add the single missing suffix here. Signed-off-by: Jan Beulich Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/5A93F96902000078001ABAC8@prv-mh.provo.novell.com --- arch/x86/entry/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d5c7f18..805f527 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -55,7 +55,7 @@ END(native_usergs_sysret64) .macro TRACE_IRQS_FLAGS flags:req #ifdef CONFIG_TRACE_IRQFLAGS - bt $9, \flags /* interrupts off? */ + btl $9, \flags /* interrupts off? */ jnc 1f TRACE_IRQS_ON 1: -- cgit v1.1 From 22636f8c9511245cb3c8412039f1dd95afb3aa59 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 26 Feb 2018 04:11:51 -0700 Subject: x86/asm: Add instruction suffixes to bitops Omitting suffixes from instructions in AT&T mode is bad practice when operand size cannot be determined by the assembler from register operands, and is likely going to be warned about by upstream gas in the future (mine does already). Add the missing suffixes here. Note that for 64-bit this means some operations change from being 32-bit to 64-bit. Signed-off-by: Jan Beulich Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/5A93F98702000078001ABACC@prv-mh.provo.novell.com --- arch/x86/include/asm/bitops.h | 29 ++++++++++++++++------------- arch/x86/include/asm/percpu.h | 2 +- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 3fa0398..9f645ba 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -78,7 +78,7 @@ set_bit(long nr, volatile unsigned long *addr) : "iq" ((u8)CONST_MASK(nr)) : "memory"); } else { - asm volatile(LOCK_PREFIX "bts %1,%0" + asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" : BITOP_ADDR(addr) : "Ir" (nr) : "memory"); } } @@ -94,7 +94,7 @@ set_bit(long nr, volatile unsigned long *addr) */ static __always_inline void __set_bit(long nr, volatile unsigned long *addr) { - asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); + asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory"); } /** @@ -115,7 +115,7 @@ clear_bit(long nr, volatile unsigned long *addr) : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)~CONST_MASK(nr))); } else { - asm volatile(LOCK_PREFIX "btr %1,%0" + asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" : BITOP_ADDR(addr) : "Ir" (nr)); } @@ -137,7 +137,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) { - asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); + asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr)); } static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) @@ -182,7 +182,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long * */ static __always_inline void __change_bit(long nr, volatile unsigned long *addr) { - asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); + asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr)); } /** @@ -201,7 +201,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)CONST_MASK(nr))); } else { - asm volatile(LOCK_PREFIX "btc %1,%0" + asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" : BITOP_ADDR(addr) : "Ir" (nr)); } @@ -217,7 +217,8 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) */ static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c); + GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), + *addr, "Ir", nr, "%0", c); } /** @@ -246,7 +247,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * { bool oldbit; - asm("bts %2,%1" + asm(__ASM_SIZE(bts) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit), ADDR : "Ir" (nr)); @@ -263,7 +264,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * */ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c); + GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), + *addr, "Ir", nr, "%0", c); } /** @@ -286,7 +288,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long { bool oldbit; - asm volatile("btr %2,%1" + asm volatile(__ASM_SIZE(btr) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit), ADDR : "Ir" (nr)); @@ -298,7 +300,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon { bool oldbit; - asm volatile("btc %2,%1" + asm volatile(__ASM_SIZE(btc) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit), ADDR : "Ir" (nr) : "memory"); @@ -316,7 +318,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon */ static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c); + GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), + *addr, "Ir", nr, "%0", c); } static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) @@ -329,7 +332,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l { bool oldbit; - asm volatile("bt %2,%1" + asm volatile(__ASM_SIZE(bt) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ba3c523..a06b073 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr, { bool oldbit; - asm volatile("bt "__percpu_arg(2)",%1" + asm volatile("btl "__percpu_arg(2)",%1" CC_SET(c) : CC_OUT(c) (oldbit) : "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); -- cgit v1.1 From b08e5fd90bfc7553d36fa42a03fb7f5e82d252eb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 26 Feb 2018 16:10:56 +0000 Subject: arm_pmu: Use disable_irq_nosync when disabling SPI in CPU teardown hook Commit 6de3f79112cc ("arm_pmu: explicitly enable/disable SPIs at hotplug") moved all of the arm_pmu IRQ enable/disable calls to the CPU hotplug hooks, regardless of whether they are implemented as PPIs or SPIs. This can lead to us sleeping from atomic context due to disable_irq blocking: | BUG: sleeping function called from invalid context at kernel/irq/manage.c:112 | in_atomic(): 1, irqs_disabled(): 128, pid: 15, name: migration/1 | no locks held by migration/1/15. | irq event stamp: 192 | hardirqs last enabled at (191): [<00000000803c2507>] | _raw_spin_unlock_irq+0x2c/0x4c | hardirqs last disabled at (192): [<000000007f57ad28>] multi_cpu_stop+0x9c/0x140 | softirqs last enabled at (0): [<0000000004ee1b58>] | copy_process.isra.77.part.78+0x43c/0x1504 | softirqs last disabled at (0): [< (null)>] (null) | CPU: 1 PID: 15 Comm: migration/1 Not tainted 4.16.0-rc3-salvator-x #1651 | Hardware name: Renesas Salvator-X board based on r8a7796 (DT) | Call trace: | dump_backtrace+0x0/0x140 | show_stack+0x14/0x1c | dump_stack+0xb4/0xf0 | ___might_sleep+0x1fc/0x218 | __might_sleep+0x70/0x80 | synchronize_irq+0x40/0xa8 | disable_irq+0x20/0x2c | arm_perf_teardown_cpu+0x80/0xac Since the interrupt is always CPU-affine and this code is running with interrupts disabled, we can just use disable_irq_nosync as we know there isn't a concurrent invocation of the handler to worry about. Fixes: 6de3f79112cc ("arm_pmu: explicitly enable/disable SPIs at hotplug") Reported-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Acked-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- drivers/perf/arm_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 0c2ed11..f63db34 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -638,7 +638,7 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) if (irq_is_percpu_devid(irq)) disable_percpu_irq(irq); else - disable_irq(irq); + disable_irq_nosync(irq); } per_cpu(cpu_armpmu, cpu) = NULL; -- cgit v1.1 From 71c208dd54ab971036d83ff6d9837bae4976e623 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 26 Feb 2018 15:08:18 +0100 Subject: x86/xen: Zero MSR_IA32_SPEC_CTRL before suspend Older Xen versions (4.5 and before) might have problems migrating pv guests with MSR_IA32_SPEC_CTRL having a non-zero value. So before suspending zero that MSR and restore it after being resumed. Signed-off-by: Juergen Gross Signed-off-by: Thomas Gleixner Reviewed-by: Jan Beulich Cc: stable@vger.kernel.org Cc: xen-devel@lists.xenproject.org Cc: boris.ostrovsky@oracle.com Link: https://lkml.kernel.org/r/20180226140818.4849-1-jgross@suse.com --- arch/x86/xen/suspend.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index d9f96cc..1d83152 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -1,12 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include #include +#include +#include #include #include #include @@ -15,6 +18,8 @@ #include "mmu.h" #include "pmu.h" +static DEFINE_PER_CPU(u64, spec_ctrl); + void xen_arch_pre_suspend(void) { xen_save_time_memory_area(); @@ -35,6 +40,9 @@ void xen_arch_post_suspend(int cancelled) static void xen_vcpu_notify_restore(void *data) { + if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl)); + /* Boot processor notified via generic timekeeping_resume() */ if (smp_processor_id() == 0) return; @@ -44,7 +52,15 @@ static void xen_vcpu_notify_restore(void *data) static void xen_vcpu_notify_suspend(void *data) { + u64 tmp; + tick_suspend_local(); + + if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) { + rdmsrl(MSR_IA32_SPEC_CTRL, tmp); + this_cpu_write(spec_ctrl, tmp); + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + } } void xen_arch_resume(void) -- cgit v1.1 From 1402fd8ed7e5bda1b3e7613b70780b0db392d1e6 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 28 Feb 2018 07:19:21 -0600 Subject: objtool: Fix another switch table detection issue Continue the switch table detection whack-a-mole. Add a check to distinguish KASAN data reads from switch data reads. The switch jump tables in .rodata have relocations associated with them. This fixes the following warning: crypto/asymmetric_keys/x509_cert_parser.o: warning: objtool: x509_note_pkey_algo()+0xa4: sibling call from callable instruction with modified stack frame Reported-by: Arnd Bergmann Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Arnd Bergmann Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/d7c8853022ad47d158cb81e953a40469fc08a95e.1519784382.git.jpoimboe@redhat.com --- tools/objtool/check.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 472e64e..46c1d23 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -925,7 +925,11 @@ static struct rela *find_switch_table(struct objtool_file *file, if (find_symbol_containing(file->rodata, text_rela->addend)) continue; - return find_rela_by_dest(file->rodata, text_rela->addend); + rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend); + if (!rodata_rela) + continue; + + return rodata_rela; } return NULL; -- cgit v1.1 From 590399ddf9561f2ed0839311c8ae1be21597ba68 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Tue, 27 Feb 2018 17:33:10 +0200 Subject: gianfar: Fix Rx byte accounting for ndev stats Don't include in the Rx bytecount of the packet sent up the stack: the FCB (frame control block), and the padding bytes inserted by the controller into the frame payload, nor the FCS. All these are being pulled out of the skb by gfar_process_frame(). This issue is old, likely from the driver's beginnings, however it was amplified by recent: commit d903ec77118c ("gianfar: simplify FCS handling and fix memory leak") which basically added the FCS to the Rx bytecount, and so brought this to my attention. Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index f5c87bd..f27f9ba 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -3063,9 +3063,6 @@ static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb) if (ndev->features & NETIF_F_RXCSUM) gfar_rx_checksum(skb, fcb); - /* Tell the skb what kind of packet this is */ - skb->protocol = eth_type_trans(skb, ndev); - /* There's need to check for NETIF_F_HW_VLAN_CTAG_RX here. * Even if vlan rx accel is disabled, on some chips * RXFCB_VLN is pseudo randomly set. @@ -3136,13 +3133,15 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit) continue; } + gfar_process_frame(ndev, skb); + /* Increment the number of packets */ total_pkts++; total_bytes += skb->len; skb_record_rx_queue(skb, rx_queue->qindex); - gfar_process_frame(ndev, skb); + skb->protocol = eth_type_trans(skb, ndev); /* Send the packet up the stack */ napi_gro_receive(&rx_queue->grp->napi_rx, skb); -- cgit v1.1 From 12472af89632beb1ed8dea29d4efe208ca05b06a Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 27 Feb 2018 18:58:12 +0100 Subject: s390/qeth: fix overestimated count of buffer elements qeth_get_elements_for_range() doesn't know how to handle a 0-length range (ie. start == end), and returns 1 when it should return 0. Such ranges occur on TSO skbs, where the L2/L3/L4 headers (and thus all of the skb's linear data) are skipped when mapping the skb into regular buffer elements. This overestimation may cause several performance-related issues: 1. sub-optimal IO buffer selection, where the next buffer gets selected even though the skb would actually still fit into the current buffer. 2. forced linearization, if the element count for a non-linear skb exceeds QETH_MAX_BUFFER_ELEMENTS. Rather than modifying qeth_get_elements_for_range() and adding overhead to every caller, fix up those callers that are in risk of passing a 0-length range. Fixes: 2863c61334aa ("qeth: refactor calculation of SBALE count") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 10 ++++++---- drivers/s390/net/qeth_l3_main.c | 11 ++++++----- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index ca72f33..30457fc 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3898,10 +3898,12 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags); int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb, int extra_elems, int data_offset) { - int elements = qeth_get_elements_for_range( - (addr_t)skb->data + data_offset, - (addr_t)skb->data + skb_headlen(skb)) + - qeth_get_elements_for_frags(skb); + addr_t end = (addr_t)skb->data + skb_headlen(skb); + int elements = qeth_get_elements_for_frags(skb); + addr_t start = (addr_t)skb->data + data_offset; + + if (start != end) + elements += qeth_get_elements_for_range(start, end); if ((elements + extra_elems) > QETH_MAX_BUFFER_ELEMENTS(card)) { QETH_DBF_MESSAGE(2, "Invalid size of IP packet " diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index b0c888e8..3421893 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2450,11 +2450,12 @@ static void qeth_tso_fill_header(struct qeth_card *card, static int qeth_l3_get_elements_no_tso(struct qeth_card *card, struct sk_buff *skb, int extra_elems) { - addr_t tcpdptr = (addr_t)tcp_hdr(skb) + tcp_hdrlen(skb); - int elements = qeth_get_elements_for_range( - tcpdptr, - (addr_t)skb->data + skb_headlen(skb)) + - qeth_get_elements_for_frags(skb); + addr_t start = (addr_t)tcp_hdr(skb) + tcp_hdrlen(skb); + addr_t end = (addr_t)skb->data + skb_headlen(skb); + int elements = qeth_get_elements_for_frags(skb); + + if (start != end) + elements += qeth_get_elements_for_range(start, end); if ((elements + extra_elems) > QETH_MAX_BUFFER_ELEMENTS(card)) { QETH_DBF_MESSAGE(2, -- cgit v1.1 From 98d823ab1fbdcb13abc25b420f9bb71bade42056 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 27 Feb 2018 18:58:13 +0100 Subject: s390/qeth: fix IP removal on offline cards If the HW is not reachable, then none of the IPs in qeth's internal table has been registered with the HW yet. So when deleting such an IP, there's no need to stage it for deregistration - just drop it from the table. This fixes the "add-delete-add" scenario on an offline card, where the the second "add" merely increments the IP's use count. But as the IP is still set to DISP_ADDR_DELETE from the previous "delete" step, l3_recover_ip() won't register it with the HW when the card goes online. Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 3421893..34481b5 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -173,12 +173,8 @@ int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) if (addr->in_progress) return -EINPROGRESS; - if (!qeth_card_hw_is_reachable(card)) { - addr->disp_flag = QETH_DISP_ADDR_DELETE; - return 0; - } - - rc = qeth_l3_deregister_addr_entry(card, addr); + if (qeth_card_hw_is_reachable(card)) + rc = qeth_l3_deregister_addr_entry(card, addr); hash_del(&addr->hnode); kfree(addr); @@ -321,11 +317,7 @@ static void qeth_l3_recover_ip(struct qeth_card *card) spin_lock_bh(&card->ip_lock); hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) { - if (addr->disp_flag == QETH_DISP_ADDR_DELETE) { - qeth_l3_deregister_addr_entry(card, addr); - hash_del(&addr->hnode); - kfree(addr); - } else if (addr->disp_flag == QETH_DISP_ADDR_ADD) { + if (addr->disp_flag == QETH_DISP_ADDR_ADD) { if (addr->proto == QETH_PROT_IPV4) { addr->in_progress = 1; spin_unlock_bh(&card->ip_lock); -- cgit v1.1 From 14d066c3531a87f727968cacd85bd95c75f59843 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 27 Feb 2018 18:58:14 +0100 Subject: s390/qeth: fix double-free on IP add/remove race Registering an IPv4 address with the HW takes quite a while, so we temporarily drop the ip_htable lock. Any concurrent add/remove of the same IP adjusts the IP's use count, and (on remove) is then blocked by addr->in_progress. After the register call has completed, we check the use count for concurrently attempted add/remove calls - and possibly straight-away deregister the IP again. This happens via l3_delete_ip(), which 1) looks up the queried IP in the htable (getting a reference to the *same* queried object), 2) deregisters the IP from the HW, and 3) frees the IP object. The caller in l3_add_ip() then does a second free on the same object. For this case, skip all the extra checks and lookups in l3_delete_ip() and just deregister & free the IP object ourselves. Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 34481b5..77cdb4f 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -237,7 +237,8 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) (rc == IPA_RC_LAN_OFFLINE)) { addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; if (addr->ref_counter < 1) { - qeth_l3_delete_ip(card, addr); + qeth_l3_deregister_addr_entry(card, addr); + hash_del(&addr->hnode); kfree(addr); } } else { -- cgit v1.1 From 4964c66fd49b2e2342da35358f2ff74614bcbaee Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 27 Feb 2018 18:58:15 +0100 Subject: Revert "s390/qeth: fix using of ref counter for rxip addresses" This reverts commit cb816192d986f7596009dedcf2201fe2e5bc2aa7. The issue this attempted to fix never actually occurs. l3_add_rxip() checks (via l3_ip_from_hash()) if the requested address was previously added to the card. If so, it returns -EEXIST and doesn't call l3_add_ip(). As a result, the "address exists" path in l3_add_ip() is never taken for rxip addresses, and this patch had no effect. Fixes: cb816192d986 ("s390/qeth: fix using of ref counter for rxip addresses") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 77cdb4f..4d8826f 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -167,8 +167,7 @@ int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) return -ENOENT; addr->ref_counter--; - if (addr->ref_counter > 0 && (addr->type == QETH_IP_TYPE_NORMAL || - addr->type == QETH_IP_TYPE_RXIP)) + if (addr->type == QETH_IP_TYPE_NORMAL && addr->ref_counter > 0) return rc; if (addr->in_progress) return -EINPROGRESS; @@ -246,9 +245,8 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) kfree(addr); } } else { - if (addr->type == QETH_IP_TYPE_NORMAL || - addr->type == QETH_IP_TYPE_RXIP) - addr->ref_counter++; + if (addr->type == QETH_IP_TYPE_NORMAL) + addr->ref_counter++; } return rc; -- cgit v1.1 From c5c48c58b259bb8f0482398370ee539d7a12df3e Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 27 Feb 2018 18:58:16 +0100 Subject: s390/qeth: fix IP address lookup for L3 devices Current code ("qeth_l3_ip_from_hash()") matches a queried address object against objects in the IP table by IP address, Mask/Prefix Length and MAC address ("qeth_l3_ipaddrs_is_equal()"). But what callers actually require is either a) "is this IP address registered" (ie. match by IP address only), before adding a new address. b) or "is this address object registered" (ie. match all relevant attributes), before deleting an address. Right now 1. the ADD path is too strict in its lookup, and eg. doesn't detect conflicts between an existing NORMAL address and a new VIPA address (because the NORMAL address will have mask != 0, while VIPA has a mask == 0), 2. the DELETE path is not strict enough, and eg. allows del_rxip() to delete a VIPA address as long as the IP address matches. Fix all this by adding helpers (_addr_match_ip() and _addr_match_all()) that do the appropriate checking. Note that the ADD path for NORMAL addresses is special, as qeth keeps track of how many times such an address is in use (and there is no immediate way of returning errors to the caller). So when a requested NORMAL address _fully_ matches an existing one, it's not considered a conflict and we merely increment the refcount. Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3.h | 34 ++++++++++++++- drivers/s390/net/qeth_l3_main.c | 91 +++++++++++++++++++---------------------- 2 files changed, 74 insertions(+), 51 deletions(-) diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h index bdd45f4..498fe9a 100644 --- a/drivers/s390/net/qeth_l3.h +++ b/drivers/s390/net/qeth_l3.h @@ -40,8 +40,40 @@ struct qeth_ipaddr { unsigned int pfxlen; } a6; } u; - }; + +static inline bool qeth_l3_addr_match_ip(struct qeth_ipaddr *a1, + struct qeth_ipaddr *a2) +{ + if (a1->proto != a2->proto) + return false; + if (a1->proto == QETH_PROT_IPV6) + return ipv6_addr_equal(&a1->u.a6.addr, &a2->u.a6.addr); + return a1->u.a4.addr == a2->u.a4.addr; +} + +static inline bool qeth_l3_addr_match_all(struct qeth_ipaddr *a1, + struct qeth_ipaddr *a2) +{ + /* Assumes that the pair was obtained via qeth_l3_addr_find_by_ip(), + * so 'proto' and 'addr' match for sure. + * + * For ucast: + * - 'mac' is always 0. + * - 'mask'/'pfxlen' for RXIP/VIPA is always 0. For NORMAL, matching + * values are required to avoid mixups in takeover eligibility. + * + * For mcast, + * - 'mac' is mapped from the IP, and thus always matches. + * - 'mask'/'pfxlen' is always 0. + */ + if (a1->type != a2->type) + return false; + if (a1->proto == QETH_PROT_IPV6) + return a1->u.a6.pfxlen == a2->u.a6.pfxlen; + return a1->u.a4.mask == a2->u.a4.mask; +} + static inline u64 qeth_l3_ipaddr_hash(struct qeth_ipaddr *addr) { u64 ret = 0; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 4d8826f..962a04b 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -67,6 +67,24 @@ void qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const __u8 *addr, qeth_l3_ipaddr6_to_string(addr, buf); } +static struct qeth_ipaddr *qeth_l3_find_addr_by_ip(struct qeth_card *card, + struct qeth_ipaddr *query) +{ + u64 key = qeth_l3_ipaddr_hash(query); + struct qeth_ipaddr *addr; + + if (query->is_multicast) { + hash_for_each_possible(card->ip_mc_htable, addr, hnode, key) + if (qeth_l3_addr_match_ip(addr, query)) + return addr; + } else { + hash_for_each_possible(card->ip_htable, addr, hnode, key) + if (qeth_l3_addr_match_ip(addr, query)) + return addr; + } + return NULL; +} + static void qeth_l3_convert_addr_to_bits(u8 *addr, u8 *bits, int len) { int i, j; @@ -120,34 +138,6 @@ static bool qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card, return rc; } -inline int -qeth_l3_ipaddrs_is_equal(struct qeth_ipaddr *addr1, struct qeth_ipaddr *addr2) -{ - return addr1->proto == addr2->proto && - !memcmp(&addr1->u, &addr2->u, sizeof(addr1->u)) && - ether_addr_equal_64bits(addr1->mac, addr2->mac); -} - -static struct qeth_ipaddr * -qeth_l3_ip_from_hash(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) -{ - struct qeth_ipaddr *addr; - - if (tmp_addr->is_multicast) { - hash_for_each_possible(card->ip_mc_htable, addr, - hnode, qeth_l3_ipaddr_hash(tmp_addr)) - if (qeth_l3_ipaddrs_is_equal(tmp_addr, addr)) - return addr; - } else { - hash_for_each_possible(card->ip_htable, addr, - hnode, qeth_l3_ipaddr_hash(tmp_addr)) - if (qeth_l3_ipaddrs_is_equal(tmp_addr, addr)) - return addr; - } - - return NULL; -} - int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) { int rc = 0; @@ -162,8 +152,8 @@ int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) QETH_CARD_HEX(card, 4, ((char *)&tmp_addr->u.a6.addr) + 8, 8); } - addr = qeth_l3_ip_from_hash(card, tmp_addr); - if (!addr) + addr = qeth_l3_find_addr_by_ip(card, tmp_addr); + if (!addr || !qeth_l3_addr_match_all(addr, tmp_addr)) return -ENOENT; addr->ref_counter--; @@ -185,6 +175,7 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) { int rc = 0; struct qeth_ipaddr *addr; + char buf[40]; QETH_CARD_TEXT(card, 4, "addip"); @@ -195,8 +186,20 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) QETH_CARD_HEX(card, 4, ((char *)&tmp_addr->u.a6.addr) + 8, 8); } - addr = qeth_l3_ip_from_hash(card, tmp_addr); - if (!addr) { + addr = qeth_l3_find_addr_by_ip(card, tmp_addr); + if (addr) { + if (tmp_addr->type != QETH_IP_TYPE_NORMAL) + return -EADDRINUSE; + if (qeth_l3_addr_match_all(addr, tmp_addr)) { + addr->ref_counter++; + return 0; + } + qeth_l3_ipaddr_to_string(tmp_addr->proto, (u8 *)&tmp_addr->u, + buf); + dev_warn(&card->gdev->dev, + "Registering IP address %s failed\n", buf); + return -EADDRINUSE; + } else { addr = qeth_l3_get_addr_buffer(tmp_addr->proto); if (!addr) return -ENOMEM; @@ -244,11 +247,7 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) hash_del(&addr->hnode); kfree(addr); } - } else { - if (addr->type == QETH_IP_TYPE_NORMAL) - addr->ref_counter++; } - return rc; } @@ -634,12 +633,7 @@ int qeth_l3_add_vipa(struct qeth_card *card, enum qeth_prot_versions proto, return -ENOMEM; spin_lock_bh(&card->ip_lock); - - if (qeth_l3_ip_from_hash(card, ipaddr)) - rc = -EEXIST; - else - rc = qeth_l3_add_ip(card, ipaddr); - + rc = qeth_l3_add_ip(card, ipaddr); spin_unlock_bh(&card->ip_lock); kfree(ipaddr); @@ -704,12 +698,7 @@ int qeth_l3_add_rxip(struct qeth_card *card, enum qeth_prot_versions proto, return -ENOMEM; spin_lock_bh(&card->ip_lock); - - if (qeth_l3_ip_from_hash(card, ipaddr)) - rc = -EEXIST; - else - rc = qeth_l3_add_ip(card, ipaddr); - + rc = qeth_l3_add_ip(card, ipaddr); spin_unlock_bh(&card->ip_lock); kfree(ipaddr); @@ -1230,8 +1219,9 @@ qeth_l3_add_mc_to_hash(struct qeth_card *card, struct in_device *in4_dev) tmp->u.a4.addr = be32_to_cpu(im4->multiaddr); tmp->is_multicast = 1; - ipm = qeth_l3_ip_from_hash(card, tmp); + ipm = qeth_l3_find_addr_by_ip(card, tmp); if (ipm) { + /* for mcast, by-IP match means full match */ ipm->disp_flag = QETH_DISP_ADDR_DO_NOTHING; } else { ipm = qeth_l3_get_addr_buffer(QETH_PROT_IPV4); @@ -1310,8 +1300,9 @@ static void qeth_l3_add_mc6_to_hash(struct qeth_card *card, sizeof(struct in6_addr)); tmp->is_multicast = 1; - ipm = qeth_l3_ip_from_hash(card, tmp); + ipm = qeth_l3_find_addr_by_ip(card, tmp); if (ipm) { + /* for mcast, by-IP match means full match */ ipm->disp_flag = QETH_DISP_ADDR_DO_NOTHING; continue; } -- cgit v1.1 From d22ffb5a712f9211ffd104c38fc17cbfb1b5e2b0 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 27 Feb 2018 18:58:17 +0100 Subject: s390/qeth: fix IPA command submission race If multiple IPA commands are build & sent out concurrently, fill_ipacmd_header() may assign a seqno value to a command that's different from what send_control_data() later assigns to this command's reply. This is due to other commands passing through send_control_data(), and incrementing card->seqno.ipa along the way. So one IPA command has no reply that's waiting for its seqno, while some other IPA command has multiple reply objects waiting for it. Only one of those waiting replies wins, and the other(s) times out and triggers a recovery via send_ipa_cmd(). Fix this by making sure that the same seqno value is assigned to a command and its reply object. Do so immediately before submitting the command & while holding the irq_pending "lock", to produce nicely ascending seqnos. As a side effect, *all* IPA commands now use a reply object that's waiting for its actual seqno. Previously, early IPA commands that were submitted while the card was still DOWN used the "catch-all" IDX seqno. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 30457fc..c8b308c 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2134,24 +2134,25 @@ int qeth_send_control_data(struct qeth_card *card, int len, } reply->callback = reply_cb; reply->param = reply_param; - if (card->state == CARD_STATE_DOWN) - reply->seqno = QETH_IDX_COMMAND_SEQNO; - else - reply->seqno = card->seqno.ipa++; + init_waitqueue_head(&reply->wait_q); - spin_lock_irqsave(&card->lock, flags); - list_add_tail(&reply->list, &card->cmd_waiter_list); - spin_unlock_irqrestore(&card->lock, flags); while (atomic_cmpxchg(&card->write.irq_pending, 0, 1)) ; - qeth_prepare_control_data(card, len, iob); if (IS_IPA(iob->data)) { cmd = __ipa_cmd(iob); + cmd->hdr.seqno = card->seqno.ipa++; + reply->seqno = cmd->hdr.seqno; event_timeout = QETH_IPA_TIMEOUT; } else { + reply->seqno = QETH_IDX_COMMAND_SEQNO; event_timeout = QETH_TIMEOUT; } + qeth_prepare_control_data(card, len, iob); + + spin_lock_irqsave(&card->lock, flags); + list_add_tail(&reply->list, &card->cmd_waiter_list); + spin_unlock_irqrestore(&card->lock, flags); timeout = jiffies + event_timeout; @@ -2933,7 +2934,7 @@ static void qeth_fill_ipacmd_header(struct qeth_card *card, memset(cmd, 0, sizeof(struct qeth_ipa_cmd)); cmd->hdr.command = command; cmd->hdr.initiator = IPA_CMD_INITIATOR_HOST; - cmd->hdr.seqno = card->seqno.ipa; + /* cmd->hdr.seqno is set by qeth_send_control_data() */ cmd->hdr.adapter_type = qeth_get_ipa_adp_type(card->info.link_type); cmd->hdr.rel_adapter_no = (__u8) card->info.portno; if (card->options.layer2) -- cgit v1.1 From d4131f09770d9b7471c9da65e6ecd2477746ac5c Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 27 Feb 2018 14:15:01 -0800 Subject: tcp: revert F-RTO middle-box workaround This reverts commit cc663f4d4c97b7297fb45135ab23cfd508b35a77. While fixing some broken middle-boxes that modifies receive window fields, it does not address middle-boxes that strip off SACK options. The best solution is to fully revert this patch and the root F-RTO enhancement. Fixes: cc663f4d4c97 ("tcp: restrict F-RTO to work-around broken middle-boxes") Reported-by: Teodor Milkov Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 575d3c1..cd8ea97 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1909,7 +1909,6 @@ void tcp_enter_loss(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct sk_buff *skb; - bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; bool is_reneg; /* is receiver reneging on SACKs? */ bool mark_lost; @@ -1968,17 +1967,15 @@ void tcp_enter_loss(struct sock *sk) tp->high_seq = tp->snd_nxt; tcp_ecn_queue_cwr(tp); - /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous - * loss recovery is underway except recurring timeout(s) on - * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing - * - * In theory F-RTO can be used repeatedly during loss recovery. - * In practice this interacts badly with broken middle-boxes that - * falsely raise the receive window, which results in repeated - * timeouts and stop-and-go behavior. + /* F-RTO RFC5682 sec 3.1 step 1 mandates to disable F-RTO + * if a previous recovery is underway, otherwise it may incorrectly + * call a timeout spurious if some previously retransmitted packets + * are s/acked (sec 3.2). We do not apply that retriction since + * retransmitted skbs are permanently tagged with TCPCB_EVER_RETRANS + * so FLAG_ORIG_SACK_ACKED is always correct. But we do disable F-RTO + * on PTMU discovery to avoid sending new data. */ tp->frto = net->ipv4.sysctl_tcp_frto && - (new_recovery || icsk->icsk_retransmits) && !inet_csk(sk)->icsk_mtup.probe_size; } -- cgit v1.1 From fc68e171d376c322e6777a3d7ac2f0278b68b17f Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 27 Feb 2018 14:15:02 -0800 Subject: tcp: revert F-RTO extension to detect more spurious timeouts This reverts commit 89fe18e44f7ee5ab1c90d0dff5835acee7751427. While the patch could detect more spurious timeouts, it could cause poor TCP performance on broken middle-boxes that modifies TCP packets (e.g. receive window, SACK options). Since the performance gain is much smaller compared to the potential loss. The best solution is to fully revert the change. Fixes: 89fe18e44f7e ("tcp: extend F-RTO to catch more spurious timeouts") Reported-by: Teodor Milkov Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cd8ea97..8d48054 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1909,6 +1909,7 @@ void tcp_enter_loss(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct sk_buff *skb; + bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; bool is_reneg; /* is receiver reneging on SACKs? */ bool mark_lost; @@ -1967,15 +1968,12 @@ void tcp_enter_loss(struct sock *sk) tp->high_seq = tp->snd_nxt; tcp_ecn_queue_cwr(tp); - /* F-RTO RFC5682 sec 3.1 step 1 mandates to disable F-RTO - * if a previous recovery is underway, otherwise it may incorrectly - * call a timeout spurious if some previously retransmitted packets - * are s/acked (sec 3.2). We do not apply that retriction since - * retransmitted skbs are permanently tagged with TCPCB_EVER_RETRANS - * so FLAG_ORIG_SACK_ACKED is always correct. But we do disable F-RTO - * on PTMU discovery to avoid sending new data. + /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous + * loss recovery is underway except recurring timeout(s) on + * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing */ tp->frto = net->ipv4.sysctl_tcp_frto && + (new_recovery || icsk->icsk_retransmits) && !inet_csk(sk)->icsk_mtup.probe_size; } @@ -2628,18 +2626,14 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack, tcp_try_undo_loss(sk, false)) return; - /* The ACK (s)acks some never-retransmitted data meaning not all - * the data packets before the timeout were lost. Therefore we - * undo the congestion window and state. This is essentially - * the operation in F-RTO (RFC5682 section 3.1 step 3.b). Since - * a retransmitted skb is permantly marked, we can apply such an - * operation even if F-RTO was not used. - */ - if ((flag & FLAG_ORIG_SACK_ACKED) && - tcp_try_undo_loss(sk, tp->undo_marker)) - return; - if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ + /* Step 3.b. A timeout is spurious if not all data are + * lost, i.e., never-retransmitted data are (s)acked. + */ + if ((flag & FLAG_ORIG_SACK_ACKED) && + tcp_try_undo_loss(sk, true)) + return; + if (after(tp->snd_nxt, tp->high_seq)) { if (flag & FLAG_DATA_SACKED || is_dupack) tp->frto = 0; /* Step 3.a. loss was real */ -- cgit v1.1 From a27fd7a8ed3856faaf5a2ff1c8c5f00c0667aaa0 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Tue, 27 Feb 2018 18:32:18 -0500 Subject: tcp: purge write queue upon RST When the connection is reset, there is no point in keeping the packets on the write queue until the connection is closed. RFC 793 (page 70) and RFC 793-bis (page 64) both suggest purging the write queue upon RST: https://tools.ietf.org/html/draft-ietf-tcpm-rfc793bis-07 Moreover, this is essential for a correct MSG_ZEROCOPY implementation, because userspace cannot call close(fd) before receiving zerocopy signals even when the connection is reset. Fixes: f214f915e7db ("tcp: enable MSG_ZEROCOPY") Signed-off-by: Soheil Hassas Yeganeh Reviewed-by: Eric Dumazet Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8d48054..9a1b3c1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3992,6 +3992,7 @@ void tcp_reset(struct sock *sk) /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); + tcp_write_queue_purge(sk); tcp_done(sk); if (!sock_flag(sk, SOCK_DEAD)) -- cgit v1.1 From 9960d7669eaa42e82a2f4393adf549191de2e587 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2018 08:39:20 -0800 Subject: test_bpf: reduce MAX_TESTRUNS For tests that are using the maximal number of BPF instruction, each run takes 20 usec. Looping 10,000 times on them totals 200 ms, which is bad when the loop is not preemptible. test_bpf: #264 BPF_MAXINSNS: Call heavy transformations jited:1 19248 18548 PASS test_bpf: #269 BPF_MAXINSNS: ld_abs+get_processor_id jited:1 20896 PASS Lets divide by ten the number of iterations, so that max latency is 20ms. We could use need_resched() to break the loop earlier if we believe 20 ms is too much. Signed-off-by: Eric Dumazet Signed-off-by: Daniel Borkmann --- lib/test_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index e6f5506..2efb213 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -28,7 +28,7 @@ /* General test specific settings */ #define MAX_SUBTESTS 3 -#define MAX_TESTRUNS 10000 +#define MAX_TESTRUNS 1000 #define MAX_DATA 128 #define MAX_INSNS 512 #define MAX_K 0xffffFFFF -- cgit v1.1 From ecc832758a654e375924ebf06a4ac971acb5ce60 Mon Sep 17 00:00:00 2001 From: Joey Pabalinas Date: Tue, 27 Feb 2018 22:05:53 -1000 Subject: net/tcp/illinois: replace broken algorithm reference link The link to the pdf containing the algorithm description is now a dead link; it seems http://www.ifp.illinois.edu/~srikant/ has been moved to https://sites.google.com/a/illinois.edu/srikant/ and none of the original papers can be found there... I have replaced it with the only working copy I was able to find. n.b. there is also a copy available at: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.296.6350&rep=rep1&type=pdf However, this seems to only be a *cached* version, so I am unsure exactly how reliable that link can be expected to remain over time and have decided against using that one. Signed-off-by: Joey Pabalinas 1 file changed, 1 insertion(+), 1 deletion(-) Signed-off-by: David S. Miller --- net/ipv4/tcp_illinois.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 7c84357..faddf4f 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -6,7 +6,7 @@ * The algorithm is described in: * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm * for High-Speed Networks" - * http://www.ifp.illinois.edu/~srikant/Papers/liubassri06perf.pdf + * http://tamerbasar.csl.illinois.edu/LiuBasarSrikantPerfEvalArtJun2008.pdf * * Implemented from description in paper and ns-2 simulation. * Copyright (C) 2007 Stephen Hemminger -- cgit v1.1 From bffd2b61670feef18d2535e9b53364d270a1c991 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 24 Jan 2018 17:31:45 +0200 Subject: nvmet: fix PSDT field check in command format PSDT field section according to NVM_Express-1.3: "This field specifies whether PRPs or SGLs are used for any data transfer associated with the command. PRPs shall be used for all Admin commands for NVMe over PCIe. SGLs shall be used for all Admin and I/O commands for NVMe over Fabrics. This field shall be set to 01b for NVMe over Fabrics 1.0 implementations. Suggested-by: Idan Burstein Signed-off-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 0bd7371..a78029e 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -520,9 +520,12 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, goto fail; } - /* either variant of SGLs is fine, as we don't support metadata */ - if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF && - (flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METASEG)) { + /* + * For fabrics, PSDT field shall describe metadata pointer (MPTR) that + * contains an address of a single contiguous physical buffer that is + * byte aligned. + */ + if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) { status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; goto fail; } -- cgit v1.1 From 028091f82eefd5e84f81cef81a7673016ecbe78b Mon Sep 17 00:00:00 2001 From: Sebastian Panceac Date: Wed, 28 Feb 2018 11:40:49 +0200 Subject: x86/platform/intel-mid: Handle Intel Edison reboot correctly When the Intel Edison module is powered with 3.3V, the reboot command makes the module stuck. If the module is powered at a greater voltage, like 4.4V (as the Edison Mini Breakout board does), reboot works OK. The official Intel Edison BSP sends the IPCMSG_COLD_RESET message to the SCU by default. The IPCMSG_COLD_BOOT which is used by the upstream kernel is only sent when explicitely selected on the kernel command line. Use IPCMSG_COLD_RESET unconditionally which makes reboot work independent of the power supply voltage. [ tglx: Massaged changelog ] Fixes: bda7b072de99 ("x86/platform/intel-mid: Implement power off sequence") Signed-off-by: Sebastian Panceac Signed-off-by: Thomas Gleixner Acked-by: Andy Shevchenko Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1519810849-15131-1-git-send-email-sebastian@resin.io --- arch/x86/platform/intel-mid/intel-mid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 2c67bae..fb1df94 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -79,7 +79,7 @@ static void intel_mid_power_off(void) static void intel_mid_reboot(void) { - intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0); + intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0); } static unsigned long __init intel_mid_calibrate_tsc(void) -- cgit v1.1 From 4e09ff5362843dff3accfa84c805c7f3a99de9cd Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 28 Feb 2018 18:20:04 +0800 Subject: virtio-net: disable NAPI only when enabled during XDP set We try to disable NAPI to prevent a single XDP TX queue being used by multiple cpus. But we don't check if device is up (NAPI is enabled), this could result stall because of infinite wait in napi_disable(). Fixing this by checking device state through netif_running() before. Fixes: 4941d472bf95b ("virtio-net: do not reset during XDP set") Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9bb9e56..2d54123 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2185,8 +2185,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, } /* Make sure NAPI is not using any XDP TX queues for RX. */ - for (i = 0; i < vi->max_queue_pairs; i++) - napi_disable(&vi->rq[i].napi); + if (netif_running(dev)) + for (i = 0; i < vi->max_queue_pairs; i++) + napi_disable(&vi->rq[i].napi); netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); err = _virtnet_set_queues(vi, curr_qp + xdp_qp); @@ -2205,7 +2206,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, } if (old_prog) bpf_prog_put(old_prog); - virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + if (netif_running(dev)) + virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); } return 0; -- cgit v1.1 From cbba07a726fb6b52b48d24ffd27e36f7278864e0 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 28 Feb 2018 12:44:07 +0100 Subject: net/smc: use a constant for control message length The sizeof(struct smc_cdc_msg) evaluates to 48 bytes instead of the required 44 bytes. We need to use the constant value of SMC_WR_TX_SIZE to set and check the control message length. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 2 +- net/smc/smc_core.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 3cd086e..b42395d 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -269,7 +269,7 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf) if (wc->byte_len < offsetof(struct smc_cdc_msg, reserved)) return; /* short message */ - if (cdc->len != sizeof(*cdc)) + if (cdc->len != SMC_WR_TX_SIZE) return; /* invalid message */ smc_cdc_msg_recv(cdc, link, wc->wr_id); } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 2424c71..053f0e6 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -465,7 +465,7 @@ create: rc = smc_link_determine_gid(conn->lgr); } conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; - conn->local_tx_ctrl.len = sizeof(struct smc_cdc_msg); + conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; #ifndef KERNEL_HAS_ATOMIC64 spin_lock_init(&conn->acurs_lock); #endif -- cgit v1.1 From 2be922f31606f114119f48de3207d122a90e7357 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 28 Feb 2018 12:44:08 +0100 Subject: net/smc: use link_id of server in confirm link reply The CONFIRM LINK reply message must contain the link_id sent by the server. And set the link_id explicitly when initializing the link. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 1 + net/smc/smc_llc.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 053f0e6..645dd22 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -177,6 +177,7 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, lnk = &lgr->lnk[SMC_SINGLE_LINK]; /* initialize link */ + lnk->link_id = SMC_SINGLE_LINK; lnk->smcibdev = smcibdev; lnk->ibport = ibport; lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 92fe4cc..b4aa4fc 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -92,7 +92,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[], memcpy(confllc->sender_mac, mac, ETH_ALEN); memcpy(confllc->sender_gid, gid, SMC_GID_SIZE); hton24(confllc->sender_qp_num, link->roce_qp->qp_num); - /* confllc->link_num = SMC_SINGLE_LINK; already done by memset above */ + confllc->link_num = link->link_id; memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE); confllc->max_links = SMC_LINKS_PER_LGR_MAX; /* send llc message */ -- cgit v1.1 From a5dcb73b96a9d21431048bdaac02d9e96f386da3 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 28 Feb 2018 12:44:09 +0100 Subject: net/smc: fix NULL pointer dereference on sock_create_kern() error path when sock_create_kern(..., a) returns an error, 'a' might not be a valid pointer, so it shouldn't be dereferenced to read a->sk->sk_sndbuf and and a->sk->sk_rcvbuf; not doing that caused the following crash: general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 4254 Comm: syzkaller919713 Not tainted 4.16.0-rc1+ #18 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:smc_create+0x14e/0x300 net/smc/af_smc.c:1410 RSP: 0018:ffff8801b06afbc8 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffff8801b63457c0 RCX: ffffffff85a3e746 RDX: 0000000000000004 RSI: 00000000ffffffff RDI: 0000000000000020 RBP: ffff8801b06afbf0 R08: 00000000000007c0 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffff8801b6345c08 R14: 00000000ffffffe9 R15: ffffffff8695ced0 FS: 0000000001afb880(0000) GS:ffff8801db200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000040 CR3: 00000001b0721004 CR4: 00000000001606f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __sock_create+0x4d4/0x850 net/socket.c:1285 sock_create net/socket.c:1325 [inline] SYSC_socketpair net/socket.c:1409 [inline] SyS_socketpair+0x1c0/0x6f0 net/socket.c:1366 do_syscall_64+0x282/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x26/0x9b RIP: 0033:0x4404b9 RSP: 002b:00007fff44ab6908 EFLAGS: 00000246 ORIG_RAX: 0000000000000035 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004404b9 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 000000000000002b RBP: 00007fff44ab6910 R08: 0000000000000002 R09: 00007fff44003031 R10: 0000000020000040 R11: 0000000000000246 R12: ffffffffffffffff R13: 0000000000000006 R14: 0000000000000000 R15: 0000000000000000 Code: 48 c1 ea 03 80 3c 02 00 0f 85 b3 01 00 00 4c 8b a3 48 04 00 00 48 b8 00 00 00 00 00 fc ff df 49 8d 7c 24 20 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 82 01 00 00 4d 8b 7c 24 20 48 b8 00 00 00 00 RIP: smc_create+0x14e/0x300 net/smc/af_smc.c:1410 RSP: ffff8801b06afbc8 Fixes: cd6851f30386 smc: remote memory buffers (RMBs) Reported-and-tested-by: syzbot+aa0227369be2dcc26ebe@syzkaller.appspotmail.com Signed-off-by: Davide Caratti Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index da1a5cd..8cc9783 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1406,8 +1406,10 @@ static int smc_create(struct net *net, struct socket *sock, int protocol, smc->use_fallback = false; /* assume rdma capability first */ rc = sock_create_kern(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &smc->clcsock); - if (rc) + if (rc) { sk_common_release(sk); + goto out; + } smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); -- cgit v1.1 From 2ddc94c76cc4ccaf51b478315912b38dfdde1afc Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 28 Feb 2018 13:12:08 +0100 Subject: mlxsw: core: Fix flex keys scratchpad offset conflict IP_TTL, IP_ECN and IP_DSCP are using the same offset within the scratchpad as L4 ports. Fix this by shifting all up. Fixes: 5f57e0909136 ("mlxsw: acl: Add ip ttl acl element") Fixes: i80d0fe4710c ("mlxsw: acl: Add ip tos acl element") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index f6963b0..122506d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -107,20 +107,20 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12), MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3), MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9), - MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x14, 0, 8), - MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x14, 9, 2), - MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x14, 11, 6), - MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x18, 0, 32), - MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x1C, 0, 32), - MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x18, 8), - MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_LO, 0x20, 8), - MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_HI, 0x28, 8), - MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_LO, 0x30, 8), MLXSW_AFK_ELEMENT_INFO_U32(DST_L4_PORT, 0x14, 0, 16), MLXSW_AFK_ELEMENT_INFO_U32(SRC_L4_PORT, 0x14, 16, 16), + MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8), + MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2), + MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6), + MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x20, 0, 32), + MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x24, 0, 32), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x20, 8), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_LO, 0x28, 8), + MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_HI, 0x30, 8), + MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_LO, 0x38, 8), }; -#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x38 +#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x40 struct mlxsw_afk_element_inst { /* element instance in actual block */ const struct mlxsw_afk_element_info *info; -- cgit v1.1 From 77d270967c5f723e5910dd073962b6372d7ef466 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 28 Feb 2018 13:12:09 +0100 Subject: mlxsw: spectrum: Fix handling of resource_size_param Current code uses global variables, adjusts them and passes pointer down to devlink. With every other mlxsw_core instance, the previously passed pointer values are rewritten. Fix this by de-globalize the variables and also memcpy size_params during devlink resource registration. Also, introduce a convenient size_param_init helper. Fixes: ef3116e5403e ("mlxsw: spectrum: Register KVD resources with devlink") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 75 +++++++++++++------------- include/net/devlink.h | 18 +++++-- net/core/devlink.c | 7 +-- 3 files changed, 57 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3dcc58d..c364a1a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4207,13 +4207,12 @@ static struct devlink_resource_ops mlxsw_sp_resource_kvd_hash_double_ops = { .size_validate = mlxsw_sp_resource_kvd_hash_double_size_validate, }; -static struct devlink_resource_size_params mlxsw_sp_kvd_size_params; -static struct devlink_resource_size_params mlxsw_sp_linear_size_params; -static struct devlink_resource_size_params mlxsw_sp_hash_single_size_params; -static struct devlink_resource_size_params mlxsw_sp_hash_double_size_params; - static void -mlxsw_sp_resource_size_params_prepare(struct mlxsw_core *mlxsw_core) +mlxsw_sp_resource_size_params_prepare(struct mlxsw_core *mlxsw_core, + struct devlink_resource_size_params *kvd_size_params, + struct devlink_resource_size_params *linear_size_params, + struct devlink_resource_size_params *hash_double_size_params, + struct devlink_resource_size_params *hash_single_size_params) { u32 single_size_min = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE); @@ -4222,37 +4221,35 @@ mlxsw_sp_resource_size_params_prepare(struct mlxsw_core *mlxsw_core) u32 kvd_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE); u32 linear_size_min = 0; - /* KVD top resource */ - mlxsw_sp_kvd_size_params.size_min = kvd_size; - mlxsw_sp_kvd_size_params.size_max = kvd_size; - mlxsw_sp_kvd_size_params.size_granularity = MLXSW_SP_KVD_GRANULARITY; - mlxsw_sp_kvd_size_params.unit = DEVLINK_RESOURCE_UNIT_ENTRY; - - /* Linear part init */ - mlxsw_sp_linear_size_params.size_min = linear_size_min; - mlxsw_sp_linear_size_params.size_max = kvd_size - single_size_min - - double_size_min; - mlxsw_sp_linear_size_params.size_granularity = MLXSW_SP_KVD_GRANULARITY; - mlxsw_sp_linear_size_params.unit = DEVLINK_RESOURCE_UNIT_ENTRY; - - /* Hash double part init */ - mlxsw_sp_hash_double_size_params.size_min = double_size_min; - mlxsw_sp_hash_double_size_params.size_max = kvd_size - single_size_min - - linear_size_min; - mlxsw_sp_hash_double_size_params.size_granularity = MLXSW_SP_KVD_GRANULARITY; - mlxsw_sp_hash_double_size_params.unit = DEVLINK_RESOURCE_UNIT_ENTRY; - - /* Hash single part init */ - mlxsw_sp_hash_single_size_params.size_min = single_size_min; - mlxsw_sp_hash_single_size_params.size_max = kvd_size - double_size_min - - linear_size_min; - mlxsw_sp_hash_single_size_params.size_granularity = MLXSW_SP_KVD_GRANULARITY; - mlxsw_sp_hash_single_size_params.unit = DEVLINK_RESOURCE_UNIT_ENTRY; + devlink_resource_size_params_init(kvd_size_params, kvd_size, kvd_size, + MLXSW_SP_KVD_GRANULARITY, + DEVLINK_RESOURCE_UNIT_ENTRY); + devlink_resource_size_params_init(linear_size_params, linear_size_min, + kvd_size - single_size_min - + double_size_min, + MLXSW_SP_KVD_GRANULARITY, + DEVLINK_RESOURCE_UNIT_ENTRY); + devlink_resource_size_params_init(hash_double_size_params, + double_size_min, + kvd_size - single_size_min - + linear_size_min, + MLXSW_SP_KVD_GRANULARITY, + DEVLINK_RESOURCE_UNIT_ENTRY); + devlink_resource_size_params_init(hash_single_size_params, + single_size_min, + kvd_size - double_size_min - + linear_size_min, + MLXSW_SP_KVD_GRANULARITY, + DEVLINK_RESOURCE_UNIT_ENTRY); } static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core) { struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params hash_single_size_params; + struct devlink_resource_size_params hash_double_size_params; + struct devlink_resource_size_params linear_size_params; + struct devlink_resource_size_params kvd_size_params; u32 kvd_size, single_size, double_size, linear_size; const struct mlxsw_config_profile *profile; int err; @@ -4261,13 +4258,17 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core) if (!MLXSW_CORE_RES_VALID(mlxsw_core, KVD_SIZE)) return -EIO; - mlxsw_sp_resource_size_params_prepare(mlxsw_core); + mlxsw_sp_resource_size_params_prepare(mlxsw_core, &kvd_size_params, + &linear_size_params, + &hash_double_size_params, + &hash_single_size_params); + kvd_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE); err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD, true, kvd_size, MLXSW_SP_RESOURCE_KVD, DEVLINK_RESOURCE_ID_PARENT_TOP, - &mlxsw_sp_kvd_size_params, + &kvd_size_params, &mlxsw_sp_resource_kvd_ops); if (err) return err; @@ -4277,7 +4278,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core) false, linear_size, MLXSW_SP_RESOURCE_KVD_LINEAR, MLXSW_SP_RESOURCE_KVD, - &mlxsw_sp_linear_size_params, + &linear_size_params, &mlxsw_sp_resource_kvd_linear_ops); if (err) return err; @@ -4291,7 +4292,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core) false, double_size, MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE, MLXSW_SP_RESOURCE_KVD, - &mlxsw_sp_hash_double_size_params, + &hash_double_size_params, &mlxsw_sp_resource_kvd_hash_double_ops); if (err) return err; @@ -4301,7 +4302,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core) false, single_size, MLXSW_SP_RESOURCE_KVD_HASH_SINGLE, MLXSW_SP_RESOURCE_KVD, - &mlxsw_sp_hash_single_size_params, + &hash_single_size_params, &mlxsw_sp_resource_kvd_hash_single_ops); if (err) return err; diff --git a/include/net/devlink.h b/include/net/devlink.h index 6545b03..4de35ed 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -257,6 +257,18 @@ struct devlink_resource_size_params { enum devlink_resource_unit unit; }; +static inline void +devlink_resource_size_params_init(struct devlink_resource_size_params *size_params, + u64 size_min, u64 size_max, + u64 size_granularity, + enum devlink_resource_unit unit) +{ + size_params->size_min = size_min; + size_params->size_max = size_max; + size_params->size_granularity = size_granularity; + size_params->unit = unit; +} + /** * struct devlink_resource - devlink resource * @name: name of the resource @@ -278,7 +290,7 @@ struct devlink_resource { u64 size_new; bool size_valid; struct devlink_resource *parent; - struct devlink_resource_size_params *size_params; + struct devlink_resource_size_params size_params; struct list_head list; struct list_head resource_list; const struct devlink_resource_ops *resource_ops; @@ -402,7 +414,7 @@ int devlink_resource_register(struct devlink *devlink, u64 resource_size, u64 resource_id, u64 parent_resource_id, - struct devlink_resource_size_params *size_params, + const struct devlink_resource_size_params *size_params, const struct devlink_resource_ops *resource_ops); void devlink_resources_unregister(struct devlink *devlink, struct devlink_resource *resource); @@ -556,7 +568,7 @@ devlink_resource_register(struct devlink *devlink, u64 resource_size, u64 resource_id, u64 parent_resource_id, - struct devlink_resource_size_params *size_params, + const struct devlink_resource_size_params *size_params, const struct devlink_resource_ops *resource_ops) { return 0; diff --git a/net/core/devlink.c b/net/core/devlink.c index 7b1076d..2f2307d9 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2379,7 +2379,7 @@ devlink_resource_size_params_put(struct devlink_resource *resource, { struct devlink_resource_size_params *size_params; - size_params = resource->size_params; + size_params = &resource->size_params; if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN, size_params->size_granularity, DEVLINK_ATTR_PAD) || nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX, @@ -3156,7 +3156,7 @@ int devlink_resource_register(struct devlink *devlink, u64 resource_size, u64 resource_id, u64 parent_resource_id, - struct devlink_resource_size_params *size_params, + const struct devlink_resource_size_params *size_params, const struct devlink_resource_ops *resource_ops) { struct devlink_resource *resource; @@ -3199,7 +3199,8 @@ int devlink_resource_register(struct devlink *devlink, resource->id = resource_id; resource->resource_ops = resource_ops; resource->size_valid = true; - resource->size_params = size_params; + memcpy(&resource->size_params, size_params, + sizeof(resource->size_params)); INIT_LIST_HEAD(&resource->resource_list); list_add_tail(&resource->list, resource_list); out: -- cgit v1.1 From 9d45deb04c59b628b21fc5014aff4f9a1d38f969 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 28 Feb 2018 13:12:10 +0100 Subject: mlxsw: spectrum: Treat IPv6 unregistered multicast as broadcast When multicast snooping is enabled, the Linux bridge resorts to flooding unregistered multicast packets to all ports only in case it did not detect a querier in the network. The above condition is not reflected to underlying drivers, which is especially problematic in IPv6 environments, as multicast snooping is enabled by default and since neighbour solicitation packets might be treated as unregistered multicast packets in case there is no corresponding MDB entry. Until the Linux bridge reflects its querier state to underlying drivers, simply treat unregistered multicast packets as broadcast and allow them to reach their destination. Fixes: 9df552ef3e21 ("mlxsw: spectrum: Improve IPv6 unregistered multicast flooding") Signed-off-by: Ido Schimmel Reported-by: David Ahern Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index bbd238e..54262af 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -112,11 +112,11 @@ static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP] = 1, [MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL] = 1, [MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST] = 1, + [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6] = 1, }; static const int mlxsw_sp_sfgc_mc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4] = 1, - [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6] = 1, }; static const int *mlxsw_sp_packet_type_sfgc_types[] = { -- cgit v1.1 From b3529af6bb0d4fe72defdd539712ceffaa054fb3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 28 Feb 2018 13:12:11 +0100 Subject: spectrum: Reference count VLAN entries One of the basic construct in the device is a port-VLAN pair, which can be bound to a FID or a RIF in order to direct packets to the bridge or the router, respectively. Since not all the netdevs are configured with a VLAN (e.g., sw1p1 vs. sw1p1.10), VID 1 is used to represent these and thus this VID can be used by both upper devices of mlxsw ports and by the driver itself. However, this VID is not reference counted and therefore might be freed prematurely, which can result in various WARNINGs. For example: $ ip link add name br0 type bridge vlan_filtering 1 $ teamd -t team0 -d -c '{"runner": {"name": "lacp"}}' $ ip link set dev team0 master br0 $ ip link set dev enp1s0np1 master team0 $ ip address add 192.0.2.1/24 dev enp1s0np1 The enslavement to team0 will fail because team0 already has an upper and thus vlan_vids_del_by_dev() will be executed as part of team's error path which will delete VID 1 from enp1s0np1 (added by br0 as PVID). The WARNING will be generated when the driver will realize it can't find VID 1 on the port and bind it to a RIF. Fix this by adding a reference count to the VLAN entries on the port, in a similar fashion to the reference counting used by the corresponding 'vlan_vid_info' structure in the 8021q driver. Fixes: c57529e1d5d8 ("mlxsw: spectrum: Replace vPorts with Port-VLAN") Reported-by: Tal Bar Signed-off-by: Ido Schimmel Tested-by: Tal Bar Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 8 +++++++- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index c364a1a..c7e941a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1459,6 +1459,7 @@ mlxsw_sp_port_vlan_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) } mlxsw_sp_port_vlan->mlxsw_sp_port = mlxsw_sp_port; + mlxsw_sp_port_vlan->ref_count = 1; mlxsw_sp_port_vlan->vid = vid; list_add(&mlxsw_sp_port_vlan->list, &mlxsw_sp_port->vlans_list); @@ -1486,8 +1487,10 @@ mlxsw_sp_port_vlan_get(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); - if (mlxsw_sp_port_vlan) + if (mlxsw_sp_port_vlan) { + mlxsw_sp_port_vlan->ref_count++; return mlxsw_sp_port_vlan; + } return mlxsw_sp_port_vlan_create(mlxsw_sp_port, vid); } @@ -1496,6 +1499,9 @@ void mlxsw_sp_port_vlan_put(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) { struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + if (--mlxsw_sp_port_vlan->ref_count != 0) + return; + if (mlxsw_sp_port_vlan->bridge_port) mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); else if (fid) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index bdd8f94a..4ec1ca3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -211,6 +211,7 @@ struct mlxsw_sp_port_vlan { struct list_head list; struct mlxsw_sp_port *mlxsw_sp_port; struct mlxsw_sp_fid *fid; + unsigned int ref_count; u16 vid; struct mlxsw_sp_bridge_port *bridge_port; struct list_head bridge_vlan_node; -- cgit v1.1 From 701eda01cbd212bae2f7d29cf14322bd49b94657 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 21 Feb 2018 15:10:02 -0800 Subject: ARCv2: boot log: fix HS48 release number Signed-off-by: Vineet Gupta --- arch/arc/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index ec12fe1..b2cae79 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -51,7 +51,7 @@ static const struct id_to_str arc_cpu_rel[] = { { 0x51, "R2.0" }, { 0x52, "R2.1" }, { 0x53, "R3.0" }, - { 0x54, "R4.0" }, + { 0x54, "R3.10a" }, #endif { 0x00, NULL } }; -- cgit v1.1 From 07423d00a2b2a71a97e4287d9262cb83c4c4c89f Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Fri, 23 Feb 2018 19:41:52 +0300 Subject: ARC: mcip: halt GFRC counter when ARC cores halt In SMP systems, GFRC is used for clocksource. However by default the counter keeps running even when core is halted (say when debugging via a JTAG debugger). This confuses Linux timekeeping and triggers flase RCU stall splat such as below: | [ARCLinux]# while true; do ./shm_open_23-1.run-test ; done | Running with 1000 processes for 1000 objects | hrtimer: interrupt took 485060 ns | | create_cnt: 1000 | Running with 1000 processes for 1000 objects | [ARCLinux]# INFO: rcu_preempt self-detected stall on CPU | 2-...: (1 GPs behind) idle=a01/1/0 softirq=135770/135773 fqs=0 | INFO: rcu_preempt detected stalls on CPUs/tasks: | 0-...: (1 GPs behind) idle=71e/0/0 softirq=135264/135264 fqs=0 | 2-...: (1 GPs behind) idle=a01/1/0 softirq=135770/135773 fqs=0 | 3-...: (1 GPs behind) idle=4e0/0/0 softirq=134304/134304 fqs=0 | (detected by 1, t=13648 jiffies, g=31493, c=31492, q=1) Starting from ARC HS v3.0 it's possible to tie GFRC to state of up-to 4 ARC cores with help of GFRC's CORE register where we set a mask for cores which state we need to rely on. We update cpu mask every time new cpu came online instead of using hardcoded one or using mask generated from "possible_cpus" as we want it set correctly even if we run kernel on HW which has fewer cores than expected (or we launch kernel via debugger and kick fever cores than HW has) Note that GFRC halts when all cores have halted and thus relies on programming of Inter-Core-dEbug register to halt all cores when one halts. Signed-off-by: Alexey Brodkin Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta [vgupta: rewrote changelog] --- arch/arc/kernel/mcip.c | 37 +++++++++++++++++++++++++++++++++++++ include/soc/arc/mcip.h | 3 +++ 2 files changed, 40 insertions(+) diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index f61a52b..1119029 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -22,10 +22,47 @@ static DEFINE_RAW_SPINLOCK(mcip_lock); static char smp_cpuinfo_buf[128]; +/* + * Set mask to halt GFRC if any online core in SMP cluster is halted. + * Only works for ARC HS v3.0+, on earlier versions has no effect. + */ +static void mcip_update_gfrc_halt_mask(int cpu) +{ + struct bcr_generic gfrc; + unsigned long flags; + u32 gfrc_halt_mask; + + READ_BCR(ARC_REG_GFRC_BUILD, gfrc); + + /* + * CMD_GFRC_SET_CORE and CMD_GFRC_READ_CORE commands were added in + * GFRC 0x3 version. + */ + if (gfrc.ver < 0x3) + return; + + raw_spin_lock_irqsave(&mcip_lock, flags); + + __mcip_cmd(CMD_GFRC_READ_CORE, 0); + gfrc_halt_mask = read_aux_reg(ARC_REG_MCIP_READBACK); + gfrc_halt_mask |= BIT(cpu); + __mcip_cmd_data(CMD_GFRC_SET_CORE, 0, gfrc_halt_mask); + + raw_spin_unlock_irqrestore(&mcip_lock, flags); +} + static void mcip_setup_per_cpu(int cpu) { + struct mcip_bcr mp; + + READ_BCR(ARC_REG_MCIP_BCR, mp); + smp_ipi_irq_setup(cpu, IPI_IRQ); smp_ipi_irq_setup(cpu, SOFTIRQ_IRQ); + + /* Update GFRC halt mask as new CPU came online */ + if (mp.gfrc) + mcip_update_gfrc_halt_mask(cpu); } static void mcip_ipi_send(int cpu) diff --git a/include/soc/arc/mcip.h b/include/soc/arc/mcip.h index c2d1b15..1138da5 100644 --- a/include/soc/arc/mcip.h +++ b/include/soc/arc/mcip.h @@ -15,6 +15,7 @@ #define ARC_REG_MCIP_BCR 0x0d0 #define ARC_REG_MCIP_IDU_BCR 0x0D5 +#define ARC_REG_GFRC_BUILD 0x0D6 #define ARC_REG_MCIP_CMD 0x600 #define ARC_REG_MCIP_WDATA 0x601 #define ARC_REG_MCIP_READBACK 0x602 @@ -40,6 +41,8 @@ struct mcip_cmd { #define CMD_GFRC_READ_LO 0x42 #define CMD_GFRC_READ_HI 0x43 +#define CMD_GFRC_SET_CORE 0x47 +#define CMD_GFRC_READ_CORE 0x48 #define CMD_IDU_ENABLE 0x71 #define CMD_IDU_DISABLE 0x72 -- cgit v1.1 From f3205de98db2fc8083796dd5ad81b191e436fab8 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Fri, 23 Feb 2018 19:41:53 +0300 Subject: ARC: mcip: update MCIP debug mask when the new cpu came online As of today we use hardcoded MCIP debug mask, so if we launch kernel via debugger and kick fever cores than HW has all cpus hang at the momemt of setup MCIP debug mask. So update MCIP debug mask when the new cpu came online, instead of use hardcoded MCIP debug mask. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/kernel/mcip.c | 37 ++++++++++++++++++++++++++++++++----- include/soc/arc/mcip.h | 2 ++ 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 1119029..5fe84e4 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -51,6 +51,34 @@ static void mcip_update_gfrc_halt_mask(int cpu) raw_spin_unlock_irqrestore(&mcip_lock, flags); } +static void mcip_update_debug_halt_mask(int cpu) +{ + u32 mcip_mask = 0; + unsigned long flags; + + raw_spin_lock_irqsave(&mcip_lock, flags); + + /* + * mcip_mask is same for CMD_DEBUG_SET_SELECT and CMD_DEBUG_SET_MASK + * commands. So read it once instead of reading both CMD_DEBUG_READ_MASK + * and CMD_DEBUG_READ_SELECT. + */ + __mcip_cmd(CMD_DEBUG_READ_SELECT, 0); + mcip_mask = read_aux_reg(ARC_REG_MCIP_READBACK); + + mcip_mask |= BIT(cpu); + + __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, mcip_mask); + /* + * Parameter specified halt cause: + * STATUS32[H]/actionpoint/breakpoint/self-halt + * We choose all of them (0xF). + */ + __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xF, mcip_mask); + + raw_spin_unlock_irqrestore(&mcip_lock, flags); +} + static void mcip_setup_per_cpu(int cpu) { struct mcip_bcr mp; @@ -63,6 +91,10 @@ static void mcip_setup_per_cpu(int cpu) /* Update GFRC halt mask as new CPU came online */ if (mp.gfrc) mcip_update_gfrc_halt_mask(cpu); + + /* Update MCIP debug mask as new CPU came online */ + if (mp.dbg) + mcip_update_debug_halt_mask(cpu); } static void mcip_ipi_send(int cpu) @@ -138,11 +170,6 @@ static void mcip_probe_n_setup(void) IS_AVAIL1(mp.gfrc, "GFRC")); cpuinfo_arc700[0].extn.gfrc = mp.gfrc; - - if (mp.dbg) { - __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf); - __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf); - } } struct plat_smp_ops plat_smp_ops = { diff --git a/include/soc/arc/mcip.h b/include/soc/arc/mcip.h index 1138da5..a91f251 100644 --- a/include/soc/arc/mcip.h +++ b/include/soc/arc/mcip.h @@ -37,7 +37,9 @@ struct mcip_cmd { #define CMD_SEMA_RELEASE 0x12 #define CMD_DEBUG_SET_MASK 0x34 +#define CMD_DEBUG_READ_MASK 0x35 #define CMD_DEBUG_SET_SELECT 0x36 +#define CMD_DEBUG_READ_SELECT 0x37 #define CMD_GFRC_READ_LO 0x42 #define CMD_GFRC_READ_HI 0x43 -- cgit v1.1 From a29a25275452c97fe35815f1eb9564f2a07a1965 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Fri, 23 Feb 2018 19:41:54 +0300 Subject: ARC: setup cpu possible mask according to possible-cpus dts property As we have option in u-boot to set CPU mask for running linux, we want to pass information to kernel about CPU cores should be brought up. So we patch kernel dtb in u-boot to set possible-cpus property. This also allows us to have correctly setuped MCIP debug mask. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/kernel/smp.c | 50 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index efe8b42..21d86c3 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -47,6 +48,42 @@ void __init smp_prepare_boot_cpu(void) { } +static int __init arc_get_cpu_map(const char *name, struct cpumask *cpumask) +{ + unsigned long dt_root = of_get_flat_dt_root(); + const char *buf; + + buf = of_get_flat_dt_prop(dt_root, name, NULL); + if (!buf) + return -EINVAL; + + if (cpulist_parse(buf, cpumask)) + return -EINVAL; + + return 0; +} + +/* + * Read from DeviceTree and setup cpu possible mask. If there is no + * "possible-cpus" property in DeviceTree pretend all [0..NR_CPUS-1] exist. + */ +static void __init arc_init_cpu_possible(void) +{ + struct cpumask cpumask; + + if (arc_get_cpu_map("possible-cpus", &cpumask)) { + pr_warn("Failed to get possible-cpus from dtb, pretending all %u cpus exist\n", + NR_CPUS); + + cpumask_setall(&cpumask); + } + + if (!cpumask_test_cpu(0, &cpumask)) + panic("Master cpu (cpu[0]) is missed in cpu possible mask!"); + + init_cpu_possible(&cpumask); +} + /* * Called from setup_arch() before calling setup_processor() * @@ -58,10 +95,7 @@ void __init smp_prepare_boot_cpu(void) */ void __init smp_init_cpus(void) { - unsigned int i; - - for (i = 0; i < NR_CPUS; i++) - set_cpu_possible(i, true); + arc_init_cpu_possible(); if (plat_smp_ops.init_early_smp) plat_smp_ops.init_early_smp(); @@ -70,16 +104,12 @@ void __init smp_init_cpus(void) /* called from init ( ) => process 1 */ void __init smp_prepare_cpus(unsigned int max_cpus) { - int i; - /* * if platform didn't set the present map already, do it now * boot cpu is set to present already by init/main.c */ - if (num_present_cpus() <= 1) { - for (i = 0; i < max_cpus; i++) - set_cpu_present(i, true); - } + if (num_present_cpus() <= 1) + init_cpu_present(cpu_possible_mask); } void __init smp_cpus_done(unsigned int max_cpus) -- cgit v1.1 From 8a949fff0302b50063f74bb345a66190015528d0 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 25 Feb 2018 22:29:18 +0200 Subject: ipvs: remove IPS_NAT_MASK check to fix passive FTP The IPS_NAT_MASK check in 4.12 replaced previous check for nfct_nat() which was needed to fix a crash in 2.6.36-rc, see commit 7bcbf81a2296 ("ipvs: avoid oops for passive FTP"). But as IPVS does not set the IPS_SRC_NAT and IPS_DST_NAT bits, checking for IPS_NAT_MASK prevents PASV response to be properly mangled and blocks the transfer. Remove the check as it is not needed after 3.12 commit 41d73ec053d2 ("netfilter: nf_conntrack: make sequence number adjustments usuable without NAT") which changes nfct_nat() with nfct_seqadj() and especially after 3.13 commit b25adce16064 ("ipvs: correct usage/allocation of seqadj ext in ipvs"). Thanks to Li Shuang and Florian Westphal for reporting the problem! Reported-by: Li Shuang Fixes: be7be6e161a2 ("netfilter: ipvs: fix incorrect conflict resolution") Signed-off-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ftp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 3e17d32..58d5d05 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -260,7 +260,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, buf_len = strlen(buf); ct = nf_ct_get(skb, &ctinfo); - if (ct && (ct->status & IPS_NAT_MASK)) { + if (ct) { bool mangled; /* If mangling fails this function will return 0 -- cgit v1.1 From 7998a4ecc61fbef5547afd379b8953b526709dd2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 26 Feb 2018 16:25:12 +0100 Subject: dt-bindings/irqchip/renesas-irqc: Document R-Car M3-N support Document support for the Interrupt Controller for Externel Devices (INTC-EX) in the Renesas M3-N (r8a77965) SoC. No driver update is needed. Signed-off-by: Geert Uytterhoeven Signed-off-by: Thomas Gleixner Reviewed-by: Simon Horman Cc: Mark Rutland Cc: devicetree@vger.kernel.org Cc: Jason Cooper Cc: Marc Zyngier Cc: linux-renesas-soc@vger.kernel.org Cc: Rob Herring Link: https://lkml.kernel.org/r/1519658712-22910-1-git-send-email-geert%2Brenesas@glider.be --- Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt index 33c9a10..20f121d 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt @@ -14,6 +14,7 @@ Required properties: - "renesas,irqc-r8a7794" (R-Car E2) - "renesas,intc-ex-r8a7795" (R-Car H3) - "renesas,intc-ex-r8a7796" (R-Car M3-W) + - "renesas,intc-ex-r8a77965" (R-Car M3-N) - "renesas,intc-ex-r8a77970" (R-Car V3M) - "renesas,intc-ex-r8a77995" (R-Car D3) - #interrupt-cells: has to be <2>: an interrupt index and flags, as defined in -- cgit v1.1 From 30009746168da0f1f648881f77083c40e226a8a0 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 27 Feb 2018 14:17:51 +0800 Subject: Documentation, x86, resctrl: Make text and sample command match The text says "Move the cpus 4-7 over to p1", but the sample command writes to p0/cpus. Signed-off-by: Li RongQing Signed-off-by: Thomas Gleixner Cc: fenghua.yu@intel.com Cc: linux-doc@vger.kernel.org Link: https://lkml.kernel.org/r/1519712271-8802-1-git-send-email-lirongqing@baidu.com --- Documentation/x86/intel_rdt_ui.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt index 756fd76..71c3098 100644 --- a/Documentation/x86/intel_rdt_ui.txt +++ b/Documentation/x86/intel_rdt_ui.txt @@ -671,7 +671,7 @@ occupancy of the real time threads on these cores. # mkdir p1 Move the cpus 4-7 over to p1 -# echo f0 > p0/cpus +# echo f0 > p1/cpus View the llc occupancy snapshot -- cgit v1.1 From b1b13780ab06ef8c770dd9cbe31dac549a11630e Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 14 Feb 2018 11:18:25 -0800 Subject: drm/i915: Fix rsvd2 mask when out-fence is returned GENMASK_ULL wants the high bit of the mask first. The current value cancels the in-fence when an out-fence is returned. Fixes: fec0445caa273 ("drm/i915: Support explicit fencing for execbuf") Testcase: igt/gem_exec_fence/keep-in-fence* Cc: Chris Wilson Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180214191827.8465-1-daniele.ceraolospurio@intel.com Cc: # v4.12+ (cherry picked from commit b6a88e4a804cf5a71159906e16df2c1fc7196f92) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 4401068..36fca0e 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -2410,7 +2410,7 @@ err_request: if (out_fence) { if (err == 0) { fd_install(out_fence_fd, out_fence->file); - args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */ + args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */ args->rsvd2 |= (u64)out_fence_fd << 32; out_fence_fd = -1; } else { -- cgit v1.1 From da343b6d90e11132f1e917d865d88ee35d6e6d00 Mon Sep 17 00:00:00 2001 From: Sergey Gorenko Date: Sun, 25 Feb 2018 13:39:48 +0200 Subject: IB/mlx5: Fix incorrect size of klms in the memory region The value of mr->ndescs greater than mr->max_descs is set in the function mlx5_ib_sg_to_klms() if sg_nents is greater than mr->max_descs. This is an invalid value and it causes the following error when registering mr: mlx5_0:dump_cqe:276:(pid 193): dump error cqe 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000030: 00 00 00 00 0f 00 78 06 25 00 00 8b 08 1e 8f d3 Cc: # 4.5 Fixes: b005d3164713 ("mlx5: Add arbitrary sg list support") Signed-off-by: Sergey Gorenko Tested-by: Laurence Oberman Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 556e015..1961c6a 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1816,7 +1816,6 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, mr->ibmr.iova = sg_dma_address(sg) + sg_offset; mr->ibmr.length = 0; - mr->ndescs = sg_nents; for_each_sg(sgl, sg, sg_nents, i) { if (unlikely(i >= mr->max_descs)) @@ -1828,6 +1827,7 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, sg_offset = 0; } + mr->ndescs = i; if (sg_offset_p) *sg_offset_p = sg_offset; -- cgit v1.1 From e7b169f34403becd3c9fd3b6e46614ab788f2187 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sun, 25 Feb 2018 13:39:51 +0200 Subject: IB/mlx5: Avoid passing an invalid QP type to firmware During QP creation, the mlx5 driver translates the QP type to an internal value which is passed on to FW. There was no check to make sure that the translated value is valid, and -EINVAL was coerced into the mailbox command. Current firmware refuses this as an invalid QP type, but future/past firmware may do something else. Fixes: 09a7d9eca1a6c ('{net,IB}/mlx5: QP/XRCD commands via mlx5 ifc') Reviewed-by: Ilya Lesokhin Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 39d24bf..e8d7eaf 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1584,6 +1584,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 uidx = MLX5_IB_DEFAULT_UIDX; struct mlx5_ib_create_qp ucmd; struct mlx5_ib_qp_base *base; + int mlx5_st; void *qpc; u32 *in; int err; @@ -1592,6 +1593,10 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); + mlx5_st = to_mlx5_st(init_attr->qp_type); + if (mlx5_st < 0) + return -EINVAL; + if (init_attr->rwq_ind_tbl) { if (!udata) return -ENOSYS; @@ -1753,7 +1758,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); - MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type)); + MLX5_SET(qpc, qpc, st, mlx5_st); MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR) -- cgit v1.1 From aba462134634b502d720e15b23154f21cfa277e5 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Sun, 25 Feb 2018 13:39:53 +0200 Subject: {net, IB}/mlx5: Raise fatal IB event when sys error occurs All other mlx5_events report the port number as 1 based, which is how FW reports it in the port event EQE. Reporting 0 for this event causes mlx5_ib to not raise a fatal event notification to registered clients due to a seemingly invalid port. All switch cases in mlx5_ib_event that go through the port check are supposed to set the port now, so just do it once at variable declaration. Fixes: 89d44f0a6c73("net/mlx5_core: Add pci error handlers to mlx5_core driver") Reviewed-by: Majd Dibbiny Signed-off-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 11 ++--------- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4236c80..bab38c6 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3263,7 +3263,7 @@ static void mlx5_ib_handle_event(struct work_struct *_work) struct mlx5_ib_dev *ibdev; struct ib_event ibev; bool fatal = false; - u8 port = 0; + u8 port = (u8)work->param; if (mlx5_core_is_mp_slave(work->dev)) { ibdev = mlx5_ib_get_ibdev_from_mpi(work->context); @@ -3283,8 +3283,6 @@ static void mlx5_ib_handle_event(struct work_struct *_work) case MLX5_DEV_EVENT_PORT_UP: case MLX5_DEV_EVENT_PORT_DOWN: case MLX5_DEV_EVENT_PORT_INITIALIZED: - port = (u8)work->param; - /* In RoCE, port up/down events are handled in * mlx5_netdev_event(). */ @@ -3298,24 +3296,19 @@ static void mlx5_ib_handle_event(struct work_struct *_work) case MLX5_DEV_EVENT_LID_CHANGE: ibev.event = IB_EVENT_LID_CHANGE; - port = (u8)work->param; break; case MLX5_DEV_EVENT_PKEY_CHANGE: ibev.event = IB_EVENT_PKEY_CHANGE; - port = (u8)work->param; - schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); break; case MLX5_DEV_EVENT_GUID_CHANGE: ibev.event = IB_EVENT_GID_CHANGE; - port = (u8)work->param; break; case MLX5_DEV_EVENT_CLIENT_REREG: ibev.event = IB_EVENT_CLIENT_REREGISTER; - port = (u8)work->param; break; case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT: schedule_work(&ibdev->delay_drop.delay_drop_work); @@ -3327,7 +3320,7 @@ static void mlx5_ib_handle_event(struct work_struct *_work) ibev.device = &ibdev->ib_dev; ibev.element.port_num = port; - if (port < 1 || port > ibdev->num_ports) { + if (!rdma_is_port_valid(&ibdev->ib_dev, port)) { mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 21d29f7..d39b0b7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -124,7 +124,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) trigger_cmd_completions(dev); } - mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0); + mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1); mlx5_core_err(dev, "end\n"); unlock: -- cgit v1.1 From 65389322b28f81cc137b60a41044c2d958a7b950 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Sun, 25 Feb 2018 13:39:54 +0200 Subject: IB/mlx: Set slid to zero in Ethernet completion struct IB spec says that a lid should be ignored when link layer is Ethernet, for example when building or parsing a CM request message (CA17-34). However, since ib_lid_be16() and ib_lid_cpu16() validates the slid, not only when link layer is IB, we set the slid to zero to prevent false warnings in the kernel log. Fixes: 62ede7779904 ("Add OPA extended LID support") Reviewed-by: Majd Dibbiny Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/cq.c | 4 +++- drivers/infiniband/hw/mlx5/cq.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 9a566ee..82adc0d 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -601,6 +601,7 @@ static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct wc->dlid_path_bits = 0; if (is_eth) { + wc->slid = 0; wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid); memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4); memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2); @@ -851,7 +852,6 @@ repoll: } } - wc->slid = be16_to_cpu(cqe->rlid); g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); wc->src_qp = g_mlpath_rqpn & 0xffffff; wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; @@ -860,6 +860,7 @@ repoll: wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum) ? IB_WC_IP_CSUM_OK : 0; if (is_eth) { + wc->slid = 0; wc->sl = be16_to_cpu(cqe->sl_vid) >> 13; if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_CVLAN_PRESENT_MASK) { @@ -871,6 +872,7 @@ repoll: memcpy(wc->smac, cqe->smac, ETH_ALEN); wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC); } else { + wc->slid = be16_to_cpu(cqe->rlid); wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; wc->vlan_id = 0xffff; } diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 5b974fb..b5cfdaa 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -226,7 +226,6 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey); break; } - wc->slid = be16_to_cpu(cqe->slid); wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff; wc->dlid_path_bits = cqe->ml_path; g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; @@ -241,10 +240,12 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, } if (ll != IB_LINK_LAYER_ETHERNET) { + wc->slid = be16_to_cpu(cqe->slid); wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf; return; } + wc->slid = 0; vlan_present = cqe->l4_l3_hdr_type & 0x1; roce_packet_type = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3; if (vlan_present) { -- cgit v1.1 From 2fb4f4eadd180a50112618dd9c5fef7fc50d4f08 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 25 Feb 2018 13:39:56 +0200 Subject: IB/core: Fix missing RDMA cgroups release in case of failure to register device During IB device registration process, if query_device() fails or if ib_core fails to registers sysfs entries, rdma cgroup cleanup is skipped. Cc: # v4.2+ Fixes: 4be3a4fa51f4 ("IB/core: Fix kernel crash during fail to initialize device") Reviewed-by: Daniel Jurgens Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index e8010e7..bb065c9 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -536,14 +536,14 @@ int ib_register_device(struct ib_device *device, ret = device->query_device(device, &device->attrs, &uhw); if (ret) { pr_warn("Couldn't query the device attributes\n"); - goto cache_cleanup; + goto cg_cleanup; } ret = ib_device_register_sysfs(device, port_callback); if (ret) { pr_warn("Couldn't register device %s with driver model\n", device->name); - goto cache_cleanup; + goto cg_cleanup; } device->reg_state = IB_DEV_REGISTERED; @@ -559,6 +559,8 @@ int ib_register_device(struct ib_device *device, mutex_unlock(&device_mutex); return 0; +cg_cleanup: + ib_device_unregister_rdmacg(device); cache_cleanup: ib_cache_cleanup_one(device); ib_cache_release_one(device); -- cgit v1.1 From a45bc17b360d75fac9ced85e99fda14bf38b4dc3 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Mon, 26 Feb 2018 01:51:37 -0800 Subject: RDMA/bnxt_re: Unconditionly fence non wire memory operations HW requires an unconditonal fence for all non-wire memory operations through SQ. This guarantees the completions of these memory operations. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 643174d..755f1cc 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2227,10 +2227,13 @@ static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr, wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV; wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey; + /* Need unconditional fence for local invalidate + * opcode to work as expected. + */ + wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; + if (wr->send_flags & IB_SEND_SIGNALED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP; - if (wr->send_flags & IB_SEND_FENCE) - wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; if (wr->send_flags & IB_SEND_SOLICITED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT; @@ -2251,8 +2254,12 @@ static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr, wqe->frmr.levels = qplib_frpl->hwq.level + 1; wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR; - if (wr->wr.send_flags & IB_SEND_FENCE) - wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; + /* Need unconditional fence for reg_mr + * opcode to function as expected. + */ + + wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; + if (wr->wr.send_flags & IB_SEND_SIGNALED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP; -- cgit v1.1 From c354dff00db8df80f271418d8392065e10ffffb6 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Mon, 26 Feb 2018 01:51:38 -0800 Subject: RDMA/bnxt_re: Fix incorrect DB offset calculation To support host systems with non 4K page size, l2_db_size shall be calculated with 4096 instead of PAGE_SIZE. Also, supply the host page size to FW during initialization. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 6 +++++- drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 1 + drivers/infiniband/hw/bnxt_re/qplib_sp.c | 3 ++- drivers/infiniband/hw/bnxt_re/roce_hsi.h | 25 ++++++++++++++++++++++++- 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 8329ec6..14d153d 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -460,7 +460,11 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, int rc; RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags); - + /* Supply (log-base-2-of-host-page-size - base-page-shift) + * to bono to adjust the doorbell page sizes. + */ + req.log2_dbr_pg_size = cpu_to_le16(PAGE_SHIFT - + RCFW_DBR_BASE_PAGE_SHIFT); /* * VFs need not setup the HW context area, PF * shall setup this area for VF. Skipping the diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 6bee6e3..c7cce2e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -49,6 +49,7 @@ #define RCFW_COMM_SIZE 0x104 #define RCFW_DBR_PCI_BAR_REGION 2 +#define RCFW_DBR_BASE_PAGE_SHIFT 12 #define RCFW_CMD_PREP(req, CMD, cmd_flags) \ do { \ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 0305798..ee98e5e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -139,7 +139,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->max_pkey = le32_to_cpu(sb->max_pkeys); attr->max_inline_data = le32_to_cpu(sb->max_inline_data); - attr->l2_db_size = (sb->l2_db_space_size + 1) * PAGE_SIZE; + attr->l2_db_size = (sb->l2_db_space_size + 1) * + (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); attr->max_sgid = le32_to_cpu(sb->max_gid); bnxt_qplib_query_version(rcfw, attr->fw_ver); diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index 2d7ea09..3e5a4f7 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -1761,7 +1761,30 @@ struct cmdq_initialize_fw { #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_2M (0x3UL << 4) #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_8M (0x4UL << 4) #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_1G (0x5UL << 4) - __le16 reserved16; + /* This value is (log-base-2-of-DBR-page-size - 12). + * 0 for 4KB. HW supported values are enumerated below. + */ + __le16 log2_dbr_pg_size; + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_MASK 0xfUL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_SFT 0 + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_4K 0x0UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_8K 0x1UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_16K 0x2UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_32K 0x3UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_64K 0x4UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128K 0x5UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_256K 0x6UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_512K 0x7UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_1M 0x8UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_2M 0x9UL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_4M 0xaUL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_8M 0xbUL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_16M 0xcUL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_32M 0xdUL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_64M 0xeUL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128M 0xfUL + #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_LAST \ + CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128M __le64 qpc_page_dir; __le64 mrw_page_dir; __le64 srq_page_dir; -- cgit v1.1 From 72a6d72c2cd03bba7b70117b63dea83d2de88057 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 14 Feb 2018 19:38:40 +0200 Subject: drm/i915/audio: fix check for av_enc_map overflow Turns out -1 >= ARRAY_SIZE() is always true. Move the bounds check where we know pipe >= 0 and next to the array indexing where it makes most sense. Fixes: 9965db26ac05 ("drm/i915: Check for fused or unused pipes") Fixes: 0b7029b7e43f ("drm/i915: Check for fused or unused pipes") Cc: # v4.10+ Cc: Mika Kahola Cc: Rodrigo Vivi Cc: Jani Nikula Cc: Joonas Lahtinen Cc: intel-gfx@lists.freedesktop.org Reviewed-by: Dhinakaran Pandiyan Reviewed-by: Mika Kahola Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180214173840.25360-1-jani.nikula@intel.com (cherry picked from commit cdb3db8542d854bd678d60cd28861b042e191672) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_audio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 522d54f..4a01f62 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -779,11 +779,11 @@ static struct intel_encoder *get_saved_enc(struct drm_i915_private *dev_priv, { struct intel_encoder *encoder; - if (WARN_ON(pipe >= ARRAY_SIZE(dev_priv->av_enc_map))) - return NULL; - /* MST */ if (pipe >= 0) { + if (WARN_ON(pipe >= ARRAY_SIZE(dev_priv->av_enc_map))) + return NULL; + encoder = dev_priv->av_enc_map[pipe]; /* * when bootup, audio driver may not know it is -- cgit v1.1 From 497158aa5f520db50452ef928c0f955cb42f2e77 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 26 Feb 2018 01:51:39 -0800 Subject: RDMA/bnxt_re: Fix the ib_reg failure cleanup Release the netdev references in the cleanup path. Invokes the cleanup routines if bnxt_re_ib_reg fails. Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 33a4480..604c805 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1416,9 +1416,12 @@ static void bnxt_re_task(struct work_struct *work) switch (re_work->event) { case NETDEV_REGISTER: rc = bnxt_re_ib_reg(rdev); - if (rc) + if (rc) { dev_err(rdev_to_dev(rdev), "Failed to register with IB: %#x", rc); + bnxt_re_remove_one(rdev); + bnxt_re_dev_unreg(rdev); + } break; case NETDEV_UP: bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, -- cgit v1.1 From 4cd482c12be473ae507eba232a8374c798233e42 Mon Sep 17 00:00:00 2001 From: Muneendra Kumar M Date: Tue, 27 Feb 2018 21:51:49 -0800 Subject: IB/core : Add null pointer check in addr_resolve dev_get_by_index is being called in addr_resolve function which returns NULL and NULL pointer access leads to kernel crash. Following call trace is observed while running rdma_lat test application [ 146.173149] BUG: unable to handle kernel NULL pointer dereference at 00000000000004a0 [ 146.173198] IP: addr_resolve+0x9e/0x3e0 [ib_core] [ 146.173221] PGD 0 P4D 0 [ 146.173869] Oops: 0000 [#1] SMP PTI [ 146.182859] CPU: 8 PID: 127 Comm: kworker/8:1 Tainted: G O 4.15.0-rc6+ #18 [ 146.183758] Hardware name: LENOVO System x3650 M5: -[8871AC1]-/01KN179, BIOS-[TCE132H-2.50]- 10/11/2017 [ 146.184691] Workqueue: ib_cm cm_work_handler [ib_cm] [ 146.185632] RIP: 0010:addr_resolve+0x9e/0x3e0 [ib_core] [ 146.186584] RSP: 0018:ffffc9000362faa0 EFLAGS: 00010246 [ 146.187521] RAX: 000000000000001b RBX: ffffc9000362fc08 RCX: 0000000000000006 [ 146.188472] RDX: 0000000000000000 RSI: 0000000000000096 RDI : ffff88087fc16990 [ 146.189427] RBP: ffffc9000362fb18 R08: 00000000ffffff9d R09: 00000000000004ac [ 146.190392] R10: 00000000000001e7 R11: 0000000000000001 R12: ffff88086af2e090 [ 146.191361] R13: 0000000000000000 R14: 0000000000000001 R15: 00000000ffffff9d [ 146.192327] FS: 0000000000000000(0000) GS:ffff88087fc00000(0000) knlGS:0000000000000000 [ 146.193301] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 146.194274] CR2: 00000000000004a0 CR3: 000000000220a002 CR4: 00000000003606e0 [ 146.195258] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 146.196256] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 146.197231] Call Trace: [ 146.198209] ? rdma_addr_register_client+0x30/0x30 [ib_core] [ 146.199199] rdma_resolve_ip+0x1af/0x280 [ib_core] [ 146.200196] rdma_addr_find_l2_eth_by_grh+0x154/0x2b0 [ib_core] The below patch adds the missing NULL pointer check returned by dev_get_by_index before accessing the netdev to avoid kernel crash. We observed the below crash when we try to do the below test. server client --------- --------- |1.1.1.1|<----rxe-channel--->|1.1.1.2| --------- --------- On server: rdma_lat -c -n 2 -s 1024 On client:rdma_lat 1.1.1.1 -c -n 2 -s 1024 Fixes: 200298326b27 ("IB/core: Validate route when we init ah") Signed-off-by: Muneendra Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/addr.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index a5b4cf0..9183d148d 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -550,18 +550,13 @@ static int addr_resolve(struct sockaddr *src_in, dst_release(dst); } - if (ndev->flags & IFF_LOOPBACK) { - ret = rdma_translate_ip(dst_in, addr); - /* - * Put the loopback device and get the translated - * device instead. - */ + if (ndev) { + if (ndev->flags & IFF_LOOPBACK) + ret = rdma_translate_ip(dst_in, addr); + else + addr->bound_dev_if = ndev->ifindex; dev_put(ndev); - ndev = dev_get_by_index(addr->net, addr->bound_dev_if); - } else { - addr->bound_dev_if = ndev->ifindex; } - dev_put(ndev); return ret; } -- cgit v1.1 From 1b0008450f23632b029e9fde9a71be90f119ec35 Mon Sep 17 00:00:00 2001 From: Mahesh Kumar Date: Thu, 15 Feb 2018 15:26:41 +0530 Subject: drm/i915/cnl: Fix PORT_TX_DW5/7 register address Register Address for CNL_PORT_DW5_LN0_D is 0x162E54, but current code is defining it as 0x162ED4. Similarly for CNL_PORT_DW7_LN0_D register address is defined 0x162EDC instead of 0x162E5C, fix it. Signed-off-by: Mahesh Kumar Fixes: 04416108ccea ("drm/i915/cnl: Add registers related to voltage swing sequences.") Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180215095643.3844-2-mahesh1.kumar@intel.com (cherry picked from commit e103962611b2d464be6ab596d7b3495fe7b4c132) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_reg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a2108e3..33eb0c5 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2027,7 +2027,7 @@ enum i915_power_well_id { #define _CNL_PORT_TX_DW5_LN0_AE 0x162454 #define _CNL_PORT_TX_DW5_LN0_B 0x162654 #define _CNL_PORT_TX_DW5_LN0_C 0x162C54 -#define _CNL_PORT_TX_DW5_LN0_D 0x162ED4 +#define _CNL_PORT_TX_DW5_LN0_D 0x162E54 #define _CNL_PORT_TX_DW5_LN0_F 0x162854 #define CNL_PORT_TX_DW5_GRP(port) _MMIO_PORT6(port, \ _CNL_PORT_TX_DW5_GRP_AE, \ @@ -2058,7 +2058,7 @@ enum i915_power_well_id { #define _CNL_PORT_TX_DW7_LN0_AE 0x16245C #define _CNL_PORT_TX_DW7_LN0_B 0x16265C #define _CNL_PORT_TX_DW7_LN0_C 0x162C5C -#define _CNL_PORT_TX_DW7_LN0_D 0x162EDC +#define _CNL_PORT_TX_DW7_LN0_D 0x162E5C #define _CNL_PORT_TX_DW7_LN0_F 0x16285C #define CNL_PORT_TX_DW7_GRP(port) _MMIO_PORT6(port, \ _CNL_PORT_TX_DW7_GRP_AE, \ -- cgit v1.1 From e659d14ed48096f87a678e7ebbdf286a817b4d0e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 19 Feb 2018 14:01:44 +0000 Subject: drm/i915: Clear the in-use marker on execbuf failure If we fail to unbind the vma (due to a signal on an active buffer that needs to be moved for the next execbuf), then we need to clear the persistent tracking state we setup for this execbuf. Fixes: c7c6e46f913b ("drm/i915: Convert execbuf to use struct-of-array packing for critical fields") Testcase: igt/gem_fenced_exec_thrash/no-spare-fences-busy* Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: # v4.14+ Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180219140144.24004-1-chris@chris-wilson.co.uk (cherry picked from commit ed2f3532321083cf40e4da4e36234880e0136136) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 36fca0e..3ab1ace 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -505,6 +505,8 @@ eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma) list_add_tail(&vma->exec_link, &eb->unbound); if (drm_mm_node_allocated(&vma->node)) err = i915_vma_unbind(vma); + if (unlikely(err)) + vma->exec_flags = NULL; } return err; } -- cgit v1.1 From fa89782b4f9c40d40e3f7d9ad7ef14e0bb0c3ca0 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 20 Feb 2018 10:47:42 +0000 Subject: drm/i915: Make global seqno known in i915_gem_request_execute tracepoint Commit fe49789fab97 ("drm/i915: Deconstruct execute fence") re-arranged the code and moved the i915_gem_request_execute tracepoint to before the global seqno is assigned to the request. We need to move the tracepoint a bit later so this information is once again available. Signed-off-by: Tvrtko Ursulin Fixes: fe49789fab97 ("drm/i915: Deconstruct execute fence") Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: intel-gfx@lists.freedesktop.org Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180220104742.565-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 158863fb50968c0ae85e87a401221425c941b9f0) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_gem_request.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index e09d18d..a3e93d4 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -476,8 +476,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->timeline->lock); - trace_i915_gem_request_execute(request); - /* Transfer from per-context onto the global per-engine timeline */ timeline = engine->timeline; GEM_BUG_ON(timeline == request->timeline); @@ -501,6 +499,8 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) list_move_tail(&request->link, &timeline->requests); spin_unlock(&request->timeline->lock); + trace_i915_gem_request_execute(request); + wake_up_all(&request->execute); } -- cgit v1.1 From 910f8befdf5bccf25287d9f1743e3e546bcb7ce0 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 28 Feb 2018 09:19:03 +0000 Subject: xen/pirq: fix error path cleanup when binding MSIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current cleanup in the error path of xen_bind_pirq_msi_to_irq is wrong. First of all there's an off-by-one in the cleanup loop, which can lead to unbinding wrong IRQs. Secondly IRQs not bound won't be freed, thus leaking IRQ numbers. Note that there's no need to differentiate between bound and unbound IRQs when freeing them, __unbind_from_irq will deal with both of them correctly. Fixes: 4892c9b4ada9f9 ("xen: add support for MSI message groups") Reported-by: Hooman Mirhadi Signed-off-by: Roger Pau Monné Reviewed-by: Amit Shah Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- drivers/xen/events/events_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 1ab4bd1..762378f 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -755,8 +755,8 @@ out: mutex_unlock(&irq_mapping_update_lock); return irq; error_irq: - for (; i >= 0; i--) - __unbind_from_irq(irq + i); + while (nvec--) + __unbind_from_irq(irq + nvec); mutex_unlock(&irq_mapping_update_lock); return ret; } -- cgit v1.1 From c2d2e6738a209f0f9dffa2dc8e7292fc45360d61 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Wed, 28 Feb 2018 07:23:23 -0500 Subject: xen-netfront: Fix hang on device removal A toolstack may delete the vif frontend and backend xenstore entries while xen-netfront is in the removal code path. In that case, the checks for xenbus_read_driver_state would return XenbusStateUnknown, and xennet_remove would hang indefinitely. This hang prevents system shutdown. xennet_remove must be able to handle XenbusStateUnknown, and netback_changed must also wake up the wake_queue for that state as well. Fixes: 5b5971df3bc2 ("xen-netfront: remove warning when unloading module") Signed-off-by: Jason Andryuk Cc: Eduardo Otubo Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- drivers/net/xen-netfront.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 8328d39..3127bc8 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -2005,7 +2005,10 @@ static void netback_changed(struct xenbus_device *dev, case XenbusStateInitialised: case XenbusStateReconfiguring: case XenbusStateReconfigured: + break; + case XenbusStateUnknown: + wake_up_all(&module_unload_q); break; case XenbusStateInitWait: @@ -2136,7 +2139,9 @@ static int xennet_remove(struct xenbus_device *dev) xenbus_switch_state(dev, XenbusStateClosing); wait_event(module_unload_q, xenbus_read_driver_state(dev->otherend) == - XenbusStateClosing); + XenbusStateClosing || + xenbus_read_driver_state(dev->otherend) == + XenbusStateUnknown); xenbus_switch_state(dev, XenbusStateClosed); wait_event(module_unload_q, -- cgit v1.1 From 47b02f4c621c5ae9fd27248dfa9a194bc1387ecb Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 27 Feb 2018 11:19:22 +0100 Subject: x86/xen: add tty0 and hvc0 as preferred consoles for dom0 Today the tty0 and hvc0 consoles are added as a preferred consoles for pv domUs only. As this requires a boot parameter for getting dom0 messages per default, add them for dom0, too. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index c047f42..3c2c253 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1376,8 +1376,6 @@ asmlinkage __visible void __init xen_start_kernel(void) if (!xen_initial_domain()) { add_preferred_console("xenboot", 0, NULL); - add_preferred_console("tty", 0, NULL); - add_preferred_console("hvc", 0, NULL); if (pci_xen) x86_init.pci.arch_init = pci_xen_init; } else { @@ -1410,6 +1408,10 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_boot_params_init_edd(); } + + add_preferred_console("tty", 0, NULL); + add_preferred_console("hvc", 0, NULL); + #ifdef CONFIG_PCI /* PCI BIOS service won't work from a PV guest. */ pci_probe &= ~PCI_PROBE_BIOS; -- cgit v1.1 From d6b6669762898dfc99e9273b8d8603bc47014aa9 Mon Sep 17 00:00:00 2001 From: Shirish S Date: Wed, 21 Feb 2018 16:10:33 +0530 Subject: drm/amd/display: check for ipp before calling cursor operations Currently all cursor related functions are made to all pipes that are attached to a particular stream. This is not applicable to pipes that do not have cursor plane initialised like underlay. Hence this patch allows cursor related operations on a pipe only if ipp in available on that particular pipe. The check is added to set_cursor_position & set_cursor_attribute. Signed-off-by: Shirish S Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 261811e..539c3e0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -197,7 +197,8 @@ bool dc_stream_set_cursor_attributes( for (i = 0; i < MAX_PIPES; i++) { struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; - if (pipe_ctx->stream != stream || (!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp)) + if (pipe_ctx->stream != stream || (!pipe_ctx->plane_res.xfm && + !pipe_ctx->plane_res.dpp) || !pipe_ctx->plane_res.ipp) continue; if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state) continue; @@ -273,7 +274,8 @@ bool dc_stream_set_cursor_position( if (pipe_ctx->stream != stream || (!pipe_ctx->plane_res.mi && !pipe_ctx->plane_res.hubp) || !pipe_ctx->plane_state || - (!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp)) + (!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp) || + !pipe_ctx->plane_res.ipp) continue; if (pipe_ctx->plane_state->address.type -- cgit v1.1 From 9f51943c2a434d40f46776369b7a72e0ffb6ea59 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 19 Jan 2018 19:02:16 +0800 Subject: drm/amdgpu: only flush hotplug work without DC since hotplug_work is initialized under the case of no dc support Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 56bcd59..36483e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -257,7 +257,8 @@ int amdgpu_irq_init(struct amdgpu_device *adev) r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); if (r) { adev->irq.installed = false; - flush_work(&adev->hotplug_work); + if (!amdgpu_device_has_dc_support(adev)) + flush_work(&adev->hotplug_work); cancel_work_sync(&adev->reset_work); return r; } @@ -282,7 +283,8 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) adev->irq.installed = false; if (adev->irq.msi_enabled) pci_disable_msi(adev->pdev); - flush_work(&adev->hotplug_work); + if (!amdgpu_device_has_dc_support(adev)) + flush_work(&adev->hotplug_work); cancel_work_sync(&adev->reset_work); } -- cgit v1.1 From a4ef6edc8e87bee656a5feaedb0bb167acd9d360 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 24 Jan 2018 12:20:32 +0800 Subject: drm/amdgpu: move WB_FREE to correct place WB_FREE should be put after all engines's hw_fini done, otherwise the invalid wptr/rptr_addr would still be used by engines which trigger abnormal bugs. This fixes couple DMAR reading error in host side for SRIOV after guest kmd is unloaded. Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 829dc2e..d9f3d54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1455,11 +1455,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.hw) continue; - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { - amdgpu_free_static_csa(adev); - amdgpu_device_wb_fini(adev); - amdgpu_device_vram_scratch_fini(adev); - } if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) { @@ -1486,6 +1481,13 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.sw) continue; + + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { + amdgpu_free_static_csa(adev); + amdgpu_device_wb_fini(adev); + amdgpu_device_vram_scratch_fini(adev); + } + r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); /* XXX handle errors */ if (r) { -- cgit v1.1 From 8014e2d3fd640c892ed334e7de7af918e141c8ff Mon Sep 17 00:00:00 2001 From: James Zhu Date: Tue, 27 Feb 2018 09:55:17 -0500 Subject: drm/amdgpu:Fixed wrong emit frame size for enc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Emit frame size should match with corresponding function, uvd_v6_0_enc_ring_emit_vm_flush has 5 amdgpu_ring_write Signed-off-by: James Zhu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index b2bfeda..9bab484 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -1618,7 +1618,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = { .set_wptr = uvd_v6_0_enc_ring_set_wptr, .emit_frame_size = 4 + /* uvd_v6_0_enc_ring_emit_pipeline_sync */ - 6 + /* uvd_v6_0_enc_ring_emit_vm_flush */ + 5 + /* uvd_v6_0_enc_ring_emit_vm_flush */ 5 + 5 + /* uvd_v6_0_enc_ring_emit_fence x2 vm fence */ 1, /* uvd_v6_0_enc_ring_insert_end */ .emit_ib_size = 5, /* uvd_v6_0_enc_ring_emit_ib */ -- cgit v1.1 From a0aaa03062be252aacad60a776f3374dd53e3f98 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 26 Feb 2018 17:36:19 -0500 Subject: drm/amd/powerplay: fix power over limit on Fiji power containment disabled only on Fiji and compute power profile. It violates PCIe spec and may cause power supply failed. Enabling it will fix the issue, even the fix will drop performance of some compute tests. Signed-off-by: Eric Huang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 45be313..08e8a79 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -4537,13 +4537,6 @@ static int smu7_set_power_profile_state(struct pp_hwmgr *hwmgr, int tmp_result, result = 0; uint32_t sclk_mask = 0, mclk_mask = 0; - if (hwmgr->chip_id == CHIP_FIJI) { - if (request->type == AMD_PP_GFX_PROFILE) - smu7_enable_power_containment(hwmgr); - else if (request->type == AMD_PP_COMPUTE_PROFILE) - smu7_disable_power_containment(hwmgr); - } - if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_AUTO) return -EINVAL; -- cgit v1.1 From 8d333fe0ad9dcb9651b9b450424a960bac040f96 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Wed, 7 Feb 2018 16:17:16 +0800 Subject: drm/amdgpu: Correct sdma_v4 get_wptr(v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the original method will change the wptr value in wb. v2: furthur cleanup Signed-off-by: Emily Deng Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index e92fb37..91cf95a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -238,31 +238,27 @@ static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring) static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u64 *wptr = NULL; - uint64_t local_wptr = 0; + u64 wptr; if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]); - DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr); - *wptr = (*wptr) >> 2; - DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr); + wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { u32 lowbit, highbit; int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - wptr = &local_wptr; lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR)) >> 2; highbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", me, highbit, lowbit); - *wptr = highbit; - *wptr = (*wptr) << 32; - *wptr |= lowbit; + wptr = highbit; + wptr = wptr << 32; + wptr |= lowbit; } - return *wptr; + return wptr >> 2; } /** -- cgit v1.1 From f812dec57d55719f5fe1f6fce193561015369363 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 29 Dec 2017 17:06:41 +0800 Subject: drm/amdgpu: fix&cleanups for wb_clear fix: should do right shift on wb before clearing cleanups: 1,should memset all wb buffer 2,set max wb number to 128 (total 4KB) is big enough Signed-off-by: Monk Liu Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d5a2eef..74edba1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1156,7 +1156,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, /* * Writeback */ -#define AMDGPU_MAX_WB 512 /* Reserve at most 512 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo *wb_obj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d9f3d54..af1b879 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -492,7 +492,7 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev) memset(&adev->wb.used, 0, sizeof(adev->wb.used)); /* clear wb memory */ - memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t)); + memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); } return 0; @@ -530,8 +530,9 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) */ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) { + wb >>= 3; if (wb < adev->wb.num_wb) - __clear_bit(wb >> 3, adev->wb.used); + __clear_bit(wb, adev->wb.used); } /** -- cgit v1.1 From 82d0ece957bcd0e6d500759b205508dbda1bc265 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Mon, 26 Feb 2018 09:09:26 -0500 Subject: drm/amd/amdgpu: Correct VRAM width for APUs with GMC9 DDR4 has a 64-bit width not 128-bits. It was reporting twice the width. Tested with my Ryzen 2400G. Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 2719937..fd63709 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -682,7 +682,10 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) adev->mc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); if (!adev->mc.vram_width) { /* hbm memory channel size */ - chansize = 128; + if (adev->flags & AMD_IS_APU) + chansize = 64; + else + chansize = 128; tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0); tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; -- cgit v1.1 From fd430a702d37747d79bb5520590ce198df02aaa5 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Thu, 18 Jan 2018 16:58:04 +0800 Subject: drm/amdgpu: skip ECC for SRIOV in gmc late_init Signed-off-by: Monk Liu Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index fd63709..3b7e7af 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -634,7 +634,7 @@ static int gmc_v9_0_late_init(void *handle) for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i) BUG_ON(vm_inv_eng[i] > 16); - if (adev->asic_type == CHIP_VEGA10) { + if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) { r = gmc_v9_0_ecc_available(adev); if (r == 1) { DRM_INFO("ECC is active.\n"); -- cgit v1.1 From 20f4ed3ae5c1a2dfedfafc993e8974b4855e2d13 Mon Sep 17 00:00:00 2001 From: Alexandre Torgue Date: Tue, 27 Feb 2018 17:34:58 +0100 Subject: MAINTAINERS: update entries for ARM/STM32 Changes old git repository to the maintained one and adds more patterns. Signed-off-by: Alexandre Torgue Acked-by: Maxime Coquelin Signed-off-by: Arnd Bergmann --- MAINTAINERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f2e4d9d..bbd8a403 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1999,8 +1999,10 @@ M: Maxime Coquelin M: Alexandre Torgue L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/mcoquelin/stm32.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/atorgue/stm32.git stm32-next N: stm32 +F: arch/arm/boot/dts/stm32* +F: arch/arm/mach-stm32/ F: drivers/clocksource/armv7m_systick.c ARM/TANGO ARCHITECTURE -- cgit v1.1 From c52232a49e203a65a6e1a670cd5262f59e9364a0 Mon Sep 17 00:00:00 2001 From: Lingutla Chandrasekhar Date: Thu, 18 Jan 2018 17:20:22 +0530 Subject: timers: Forward timer base before migrating timers On CPU hotunplug the enqueued timers of the unplugged CPU are migrated to a live CPU. This happens from the control thread which initiated the unplug. If the CPU on which the control thread runs came out from a longer idle period then the base clock of that CPU might be stale because the control thread runs prior to any event which forwards the clock. In such a case the timers from the unplugged CPU are queued on the live CPU based on the stale clock which can cause large delays due to increased granularity of the outer timer wheels which are far away from base:;clock. But there is a worse problem than that. The following sequence of events illustrates it: - CPU0 timer1 is queued expires = 59969 and base->clk = 59131. The timer is queued at wheel level 2, with resulting expiry time = 60032 (due to level granularity). - CPU1 enters idle @60007, with next timer expiry @60020. - CPU0 is hotplugged at @60009 - CPU1 exits idle and runs the control thread which migrates the timers from CPU0 timer1 is now queued in level 0 for immediate handling in the next softirq because the requested expiry time 59969 is before CPU1 base->clk 60007 - CPU1 runs code which forwards the base clock which succeeds because the next expiring timer. which was collected at idle entry time is still set to 60020. So it forwards beyond 60007 and therefore misses to expire the migrated timer1. That timer gets expired when the wheel wraps around again, which takes between 63 and 630ms depending on the HZ setting. Address both problems by invoking forward_timer_base() for the control CPUs timer base. All other places, which might run into a similar problem (mod_timer()/add_timer_on()) already invoke forward_timer_base() to avoid that. [ tglx: Massaged comment and changelog ] Fixes: a683f390b93f ("timers: Forward the wheel clock whenever possible") Co-developed-by: Neeraj Upadhyay Signed-off-by: Neeraj Upadhyay Signed-off-by: Lingutla Chandrasekhar Signed-off-by: Thomas Gleixner Cc: Anna-Maria Gleixner Cc: linux-arm-msm@vger.kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180118115022.6368-1-clingutla@codeaurora.org --- kernel/time/timer.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 48150ab..4a4fd56 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1894,6 +1894,12 @@ int timers_dead_cpu(unsigned int cpu) raw_spin_lock_irq(&new_base->lock); raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); + /* + * The current CPUs base clock might be stale. Update it + * before moving the timers over. + */ + forward_timer_base(new_base); + BUG_ON(old_base->running_timer); for (i = 0; i < WHEEL_SIZE; i++) -- cgit v1.1 From d811bcee1f7a379cad893fdee4c8db5775963b7f Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 28 Feb 2018 18:05:34 -0800 Subject: pvcalls-front: 64-bit align flags We are using test_and_* operations on the status and flag fields of struct sock_mapping. However, these functions require the operand to be 64-bit aligned on arm64. Currently, only status is 64-bit aligned. Make status and flags explicitly 64-bit aligned. Signed-off-by: Stefano Stabellini Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- drivers/xen/pvcalls-front.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index aedbee3..2f11ca7 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -73,20 +73,25 @@ struct sock_mapping { wait_queue_head_t inflight_conn_req; } active; struct { - /* Socket status */ + /* + * Socket status, needs to be 64-bit aligned due to the + * test_and_* functions which have this requirement on arm64. + */ #define PVCALLS_STATUS_UNINITALIZED 0 #define PVCALLS_STATUS_BIND 1 #define PVCALLS_STATUS_LISTEN 2 - uint8_t status; + uint8_t status __attribute__((aligned(8))); /* * Internal state-machine flags. * Only one accept operation can be inflight for a socket. * Only one poll operation can be inflight for a given socket. + * flags needs to be 64-bit aligned due to the test_and_* + * functions which have this requirement on arm64. */ #define PVCALLS_FLAG_ACCEPT_INFLIGHT 0 #define PVCALLS_FLAG_POLL_INFLIGHT 1 #define PVCALLS_FLAG_POLL_RET 2 - uint8_t flags; + uint8_t flags __attribute__((aligned(8))); uint32_t inflight_req_id; struct sock_mapping *accept_map; wait_queue_head_t inflight_accept_req; -- cgit v1.1 From 945fd17ab6bab8a4d05da6c3170519fbcfe62ddb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 28 Feb 2018 21:14:26 +0100 Subject: x86/cpu_entry_area: Sync cpu_entry_area to initial_page_table The separation of the cpu_entry_area from the fixmap missed the fact that on 32bit non-PAE kernels the cpu_entry_area mapping might not be covered in initial_page_table by the previous synchronizations. This results in suspend/resume failures because 32bit utilizes initial page table for resume. The absence of the cpu_entry_area mapping results in a triple fault, aka. insta reboot. With PAE enabled this works by chance because the PGD entry which covers the fixmap and other parts incindentally provides the cpu_entry_area mapping as well. Synchronize the initial page table after setting up the cpu entry area. Instead of adding yet another copy of the same code, move it to a function and invoke it from the various places. It needs to be investigated if the existing calls in setup_arch() and setup_per_cpu_areas() can be replaced by the later invocation from setup_cpu_entry_areas(), but that's beyond the scope of this fix. Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap") Reported-by: Woody Suwalski Signed-off-by: Thomas Gleixner Tested-by: Woody Suwalski Cc: William Grant Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1802282137290.1392@nanos.tec.linutronix.de --- arch/x86/include/asm/pgtable_32.h | 1 + arch/x86/include/asm/pgtable_64.h | 1 + arch/x86/kernel/setup.c | 17 +++++------------ arch/x86/kernel/setup_percpu.c | 17 ++++------------- arch/x86/mm/cpu_entry_area.c | 6 ++++++ arch/x86/mm/init_32.c | 15 +++++++++++++++ 6 files changed, 32 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index e554667..b3ec519 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -32,6 +32,7 @@ extern pmd_t initial_pg_pmd[]; static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } void paging_init(void); +void sync_initial_page_table(void); /* * Define this if things work differently on an i386 and an i486: diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 81462e9..1149d21 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -28,6 +28,7 @@ extern pgd_t init_top_pgt[]; #define swapper_pg_dir init_top_pgt extern void paging_init(void); +static inline void sync_initial_page_table(void) { } #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %p(%016lx)\n", \ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 1ae67e9..4c616be 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1204,20 +1204,13 @@ void __init setup_arch(char **cmdline_p) kasan_init(); -#ifdef CONFIG_X86_32 - /* sync back kernel address range */ - clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - KERNEL_PGD_PTRS); - /* - * sync back low identity map too. It is used for example - * in the 32-bit EFI stub. + * Sync back kernel address range. + * + * FIXME: Can the later sync in setup_cpu_entry_areas() replace + * this call? */ - clone_pgd_range(initial_page_table, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); -#endif + sync_initial_page_table(); tboot_probe(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 497aa76..ea554f8 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -287,24 +287,15 @@ void __init setup_per_cpu_areas(void) /* Setup cpu initialized, callin, callout masks */ setup_cpu_local_masks(); -#ifdef CONFIG_X86_32 /* * Sync back kernel address range again. We already did this in * setup_arch(), but percpu data also needs to be available in * the smpboot asm. We can't reliably pick up percpu mappings * using vmalloc_fault(), because exception dispatch needs * percpu data. + * + * FIXME: Can the later sync in setup_cpu_entry_areas() replace + * this call? */ - clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - KERNEL_PGD_PTRS); - - /* - * sync back low identity map too. It is used for example - * in the 32-bit EFI stub. - */ - clone_pgd_range(initial_page_table, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); -#endif + sync_initial_page_table(); } diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index b9283cc..476d810 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -163,4 +163,10 @@ void __init setup_cpu_entry_areas(void) for_each_possible_cpu(cpu) setup_cpu_entry_area(cpu); + + /* + * This is the last essential update to swapper_pgdir which needs + * to be synchronized to initial_page_table on 32bit. + */ + sync_initial_page_table(); } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 79cb066..396e1f0 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -453,6 +453,21 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base) } #endif /* CONFIG_HIGHMEM */ +void __init sync_initial_page_table(void) +{ + clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + KERNEL_PGD_PTRS); + + /* + * sync back low identity map too. It is used for example + * in the 32-bit EFI stub. + */ + clone_pgd_range(initial_page_table, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); +} + void __init native_pagetable_init(void) { unsigned long pfn, va; -- cgit v1.1 From e64b6afa98f3629d0c0c46233bbdbe8acdb56f06 Mon Sep 17 00:00:00 2001 From: Giulio Benetti Date: Wed, 28 Feb 2018 17:46:53 +0100 Subject: drm/sun4i: Fix dclk_set_phase Phase value is not shifted before writing. Shift left of 28 bits to fit right bits Signed-off-by: Giulio Benetti Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/1519836413-35023-1-git-send-email-giulio.benetti@micronovasrl.com --- drivers/gpu/drm/sun4i/sun4i_dotclock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c index 023f39b..e36004f 100644 --- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c +++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c @@ -132,10 +132,13 @@ static int sun4i_dclk_get_phase(struct clk_hw *hw) static int sun4i_dclk_set_phase(struct clk_hw *hw, int degrees) { struct sun4i_dclk *dclk = hw_to_dclk(hw); + u32 val = degrees / 120; + + val <<= 28; regmap_update_bits(dclk->regmap, SUN4I_TCON0_IO_POL_REG, GENMASK(29, 28), - degrees / 120); + val); return 0; } -- cgit v1.1 From b758dbd57650b4157da98b2c734974b409849625 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 28 Feb 2018 12:09:56 +0100 Subject: platform/x86: intel-vbtn: Reset wakeup capable flag on removal The intel-vbtn device will not be able to wake up the system any more after removing the notify handler provided by its driver, so make its sysfs attributes reflect that. Fixes: 91f9e850d465 (platform: x86: intel-vbtn: Wake up the system from suspend-to-idle) Signed-off-by: Rafael J. Wysocki Signed-off-by: Andy Shevchenko --- drivers/platform/x86/intel-vbtn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index 8173307..c13780b8 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -166,6 +166,7 @@ static int intel_vbtn_remove(struct platform_device *device) { acpi_handle handle = ACPI_HANDLE(&device->dev); + device_init_wakeup(&device->dev, false); acpi_remove_notify_handler(handle, ACPI_DEVICE_NOTIFY, notify_handler); /* -- cgit v1.1 From 38c08aa54b50640302a6529f4b6eb441b3bfc5dd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 28 Feb 2018 12:10:59 +0100 Subject: platform/x86: intel-hid: Reset wakeup capable flag on removal The intel-hid device will not be able to wake up the system any more after removing the notify handler provided by its driver, so make its sysfs attributes reflect that. Fixes: ef884112e55c (platform: x86: intel-hid: Wake up the system from suspend-to-idle) Signed-off-by: Rafael J. Wysocki Signed-off-by: Andy Shevchenko --- drivers/platform/x86/intel-hid.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c index d1a0131..5e3df19 100644 --- a/drivers/platform/x86/intel-hid.c +++ b/drivers/platform/x86/intel-hid.c @@ -376,6 +376,7 @@ static int intel_hid_remove(struct platform_device *device) { acpi_handle handle = ACPI_HANDLE(&device->dev); + device_init_wakeup(&device->dev, false); acpi_remove_notify_handler(handle, ACPI_DEVICE_NOTIFY, notify_handler); intel_hid_set_enable(&device->dev, false); intel_button_array_enable(&device->dev, false); -- cgit v1.1 From 09a0fb67536a49af19f2bfc632100e9de91fe526 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 28 Feb 2018 18:44:34 +0000 Subject: KVM: s390: provide io interrupt kvm_stat We already count io interrupts, but we forgot to print them. Signed-off-by: Christian Borntraeger Fixes: d8346b7d9b ("KVM: s390: Support for I/O interrupts.") Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 77d7818..df19f15 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -86,6 +86,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, + { "deliver_io_interrupt", VCPU_STAT(deliver_io_int) }, { "exit_wait_state", VCPU_STAT(exit_wait_state) }, { "instruction_epsw", VCPU_STAT(instruction_epsw) }, { "instruction_gs", VCPU_STAT(instruction_gs) }, -- cgit v1.1 From a8fd1f71749387c9a1053a83ff1c16287499a4e7 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 15 Feb 2018 22:59:47 -0500 Subject: btrfs: use kvzalloc to allocate btrfs_fs_info The srcu_struct in btrfs_fs_info scales in size with NR_CPUS. On kernels built with NR_CPUS=8192, this can result in kmalloc failures that prevent mounting. There is work in progress to try to resolve this for every user of srcu_struct but using kvzalloc will work around the failures until that is complete. As an example with NR_CPUS=512 on x86_64: the overall size of subvol_srcu is 3460 bytes, fs_info is 6496. Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 +- fs/btrfs/super.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1a462ab..0f521ba 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2974,7 +2974,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info) kfree(fs_info->super_copy); kfree(fs_info->super_for_commit); security_free_mnt_opts(&fs_info->security_opts); - kfree(fs_info); + kvfree(fs_info); } /* tree mod log functions from ctree.c */ diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6e71a2a..4b81794 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1545,7 +1545,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, * it for searching for existing supers, so this lets us do that and * then open_ctree will properly initialize everything later. */ - fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL); + fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL); if (!fs_info) { error = -ENOMEM; goto error_sec_opts; -- cgit v1.1 From ac01f26a27f10aace4bb89fd2c2c05a60c251832 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 8 Jan 2018 10:59:43 +0200 Subject: btrfs: handle failure of add_pending_csums add_pending_csums was added as part of the new data=ordered implementation in e6dcd2dc9c48 ("Btrfs: New data=ordered implementation"). Even back then it called the btrfs_csum_file_blocks which can fail but it never bothered handling the failure. In ENOMEM situation this could lead to the filesystem failing to write the checksums for a particular extent and not detect this. On read this could lead to the filesystem erroring out due to crc mismatch. Fix it by propagating failure from add_pending_csums and handling them. Signed-off-by: Nikolay Borisov Reviewed-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 29b4913..8f7d41f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2042,12 +2042,15 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, struct inode *inode, struct list_head *list) { struct btrfs_ordered_sum *sum; + int ret; list_for_each_entry(sum, list, list) { trans->adding_csums = true; - btrfs_csum_file_blocks(trans, + ret = btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root->fs_info->csum_root, sum); trans->adding_csums = false; + if (ret) + return ret; } return 0; } @@ -3061,7 +3064,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out; } - add_pending_csums(trans, inode, &ordered_extent->list); + ret = add_pending_csums(trans, inode, &ordered_extent->list); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out; + } btrfs_ordered_update_i_size(inode, 0, ordered_extent); ret = btrfs_update_inode_fallback(trans, root, inode); -- cgit v1.1 From 765f3cebff0023d05d724374db8b63c01e07c499 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 31 Jan 2018 17:14:02 +0200 Subject: btrfs: Handle btrfs_set_extent_delalloc failure in relocate_file_extent_cluster Essentially duplicate the error handling from the above block which handles the !PageUptodate(page) case and additionally clear EXTENT_BOUNDARY. Signed-off-by: Nikolay Borisov Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f0c3f00..cd2298d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3268,8 +3268,22 @@ static int relocate_file_extent_cluster(struct inode *inode, nr++; } - btrfs_set_extent_delalloc(inode, page_start, page_end, 0, NULL, - 0); + ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0, + NULL, 0); + if (ret) { + unlock_page(page); + put_page(page); + btrfs_delalloc_release_metadata(BTRFS_I(inode), + PAGE_SIZE); + btrfs_delalloc_release_extents(BTRFS_I(inode), + PAGE_SIZE); + + clear_extent_bits(&BTRFS_I(inode)->io_tree, + page_start, page_end, + EXTENT_LOCKED | EXTENT_BOUNDARY); + goto out; + + } set_page_dirty(page); unlock_extent(&BTRFS_I(inode)->io_tree, -- cgit v1.1 From 92e222df7b8f05c565009c7383321b593eca488b Mon Sep 17 00:00:00 2001 From: Hans van Kranenburg Date: Mon, 5 Feb 2018 17:45:11 +0100 Subject: btrfs: alloc_chunk: fix DUP stripe size handling In case of using DUP, we search for enough unallocated disk space on a device to hold two stripes. The devices_info[ndevs-1].max_avail that holds the amount of unallocated space found is directly assigned to stripe_size, while it's actually twice the stripe size. Later on in the code, an unconditional division of stripe_size by dev_stripes corrects the value, but in the meantime there's a check to see if the stripe_size does not exceed max_chunk_size. Since during this check stripe_size is twice the amount as intended, the check will reduce the stripe_size to max_chunk_size if the actual correct to be used stripe_size is more than half the amount of max_chunk_size. The unconditional division later tries to correct stripe_size, but will actually make sure we can't allocate more than half the max_chunk_size. Fix this by moving the division by dev_stripes before the max chunk size check, so it always contains the right value, instead of putting a duct tape division in further on to get it fixed again. Since in all other cases than DUP, dev_stripes is 1, this change only affects DUP. Other attempts in the past were made to fix this: * 37db63a400 "Btrfs: fix max chunk size check in chunk allocator" tried to fix the same problem, but still resulted in part of the code acting on a wrongly doubled stripe_size value. * 86db25785a "Btrfs: fix max chunk size on raid5/6" unintentionally broke this fix again. The real problem was already introduced with the rest of the code in 73c5de0051. The user visible result however will be that the max chunk size for DUP will suddenly double, while it's actually acting according to the limits in the code again like it was 5 years ago. Reported-by: Naohiro Aota Link: https://www.spinics.net/lists/linux-btrfs/msg69752.html Fixes: 73c5de0051 ("btrfs: quasi-round-robin for chunk allocation") Fixes: 86db25785a ("Btrfs: fix max chunk size on raid5/6") Signed-off-by: Hans van Kranenburg Reviewed-by: David Sterba [ update comment ] Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2ceb924..b2d05c6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4829,10 +4829,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ndevs = min(ndevs, devs_max); /* - * the primary goal is to maximize the number of stripes, so use as many - * devices as possible, even if the stripes are not maximum sized. + * The primary goal is to maximize the number of stripes, so use as + * many devices as possible, even if the stripes are not maximum sized. + * + * The DUP profile stores more than one stripe per device, the + * max_avail is the total size so we have to adjust. */ - stripe_size = devices_info[ndevs-1].max_avail; + stripe_size = div_u64(devices_info[ndevs - 1].max_avail, dev_stripes); num_stripes = ndevs * dev_stripes; /* @@ -4867,8 +4870,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, stripe_size = devices_info[ndevs-1].max_avail; } - stripe_size = div_u64(stripe_size, dev_stripes); - /* align to BTRFS_STRIPE_LEN */ stripe_size = round_down(stripe_size, BTRFS_STRIPE_LEN); -- cgit v1.1 From 3c181c12c431fe33b669410d663beb9cceefcd1b Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 22 Feb 2018 21:58:42 +0800 Subject: btrfs: use proper endianness accessors for super_copy The fs_info::super_copy is a byte copy of the on-disk structure and all members must use the accessor macros/functions to obtain the right value. This was missing in update_super_roots and in sysfs readers. Moving between opposite endianness hosts will report bogus numbers in sysfs, and mount may fail as the root will not be restored correctly. If the filesystem is always used on a same endian host, this will not be a problem. Fix this by using the btrfs_set_super...() functions to set fs_info::super_copy values, and for the sysfs, use the cached fs_info::nodesize/sectorsize values. CC: stable@vger.kernel.org Fixes: df93589a17378 ("btrfs: export more from FS_INFO to sysfs") Signed-off-by: Anand Jain Reviewed-by: Liu Bo Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba --- fs/btrfs/sysfs.c | 8 +++----- fs/btrfs/transaction.c | 20 ++++++++++++-------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index a8bafed..d11c70b 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -423,7 +423,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = to_fs_info(kobj); - return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize); + return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->nodesize); } BTRFS_ATTR(, nodesize, btrfs_nodesize_show); @@ -433,8 +433,7 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = to_fs_info(kobj); - return snprintf(buf, PAGE_SIZE, "%u\n", - fs_info->super_copy->sectorsize); + return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->sectorsize); } BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show); @@ -444,8 +443,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = to_fs_info(kobj); - return snprintf(buf, PAGE_SIZE, "%u\n", - fs_info->super_copy->sectorsize); + return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->sectorsize); } BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 04f0714..9220f00 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1722,19 +1722,23 @@ static void update_super_roots(struct btrfs_fs_info *fs_info) super = fs_info->super_copy; + /* update latest btrfs_super_block::chunk_root refs */ root_item = &fs_info->chunk_root->root_item; - super->chunk_root = root_item->bytenr; - super->chunk_root_generation = root_item->generation; - super->chunk_root_level = root_item->level; + btrfs_set_super_chunk_root(super, root_item->bytenr); + btrfs_set_super_chunk_root_generation(super, root_item->generation); + btrfs_set_super_chunk_root_level(super, root_item->level); + /* update latest btrfs_super_block::root refs */ root_item = &fs_info->tree_root->root_item; - super->root = root_item->bytenr; - super->generation = root_item->generation; - super->root_level = root_item->level; + btrfs_set_super_root(super, root_item->bytenr); + btrfs_set_super_generation(super, root_item->generation); + btrfs_set_super_root_level(super, root_item->level); + if (btrfs_test_opt(fs_info, SPACE_CACHE)) - super->cache_generation = root_item->generation; + btrfs_set_super_cache_generation(super, root_item->generation); if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags)) - super->uuid_tree_generation = root_item->generation; + btrfs_set_super_uuid_tree_generation(super, + root_item->generation); } int btrfs_transaction_in_commit(struct btrfs_fs_info *info) -- cgit v1.1 From d4dfc0f4d39475ccbbac947880b5464a74c30b99 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 6 Feb 2018 20:39:20 +0000 Subject: Btrfs: send, fix issuing write op when processing hole in no data mode When doing an incremental send of a filesystem with the no-holes feature enabled, we end up issuing a write operation when using the no data mode send flag, instead of issuing an update extent operation. Fix this by issuing the update extent operation instead. Trivial reproducer: $ mkfs.btrfs -f -O no-holes /dev/sdc $ mkfs.btrfs -f /dev/sdd $ mount /dev/sdc /mnt/sdc $ mount /dev/sdd /mnt/sdd $ xfs_io -f -c "pwrite -S 0xab 0 32K" /mnt/sdc/foobar $ btrfs subvolume snapshot -r /mnt/sdc /mnt/sdc/snap1 $ xfs_io -c "fpunch 8K 8K" /mnt/sdc/foobar $ btrfs subvolume snapshot -r /mnt/sdc /mnt/sdc/snap2 $ btrfs send /mnt/sdc/snap1 | btrfs receive /mnt/sdd $ btrfs send --no-data -p /mnt/sdc/snap1 /mnt/sdc/snap2 \ | btrfs receive -vv /mnt/sdd Before this change the output of the second receive command is: receiving snapshot snap2 uuid=f6922049-8c22-e544-9ff9-fc6755918447... utimes write foobar, offset 8192, len 8192 utimes foobar BTRFS_IOC_SET_RECEIVED_SUBVOL uuid=f6922049-8c22-e544-9ff9-... After this change it is: receiving snapshot snap2 uuid=564d36a3-ebc8-7343-aec9-bf6fda278e64... utimes update_extent foobar: offset=8192, len=8192 utimes foobar BTRFS_IOC_SET_RECEIVED_SUBVOL uuid=564d36a3-ebc8-7343-aec9-bf6fda278e64... Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/send.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index f306c60..484e2af 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5005,6 +5005,9 @@ static int send_hole(struct send_ctx *sctx, u64 end) u64 len; int ret = 0; + if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) + return send_update_extent(sctx, offset, end - offset); + p = fs_path_alloc(); if (!p) return -ENOMEM; -- cgit v1.1 From 9a6509c4daa91400b52a5fd541a5521c649a8fea Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 28 Feb 2018 15:55:40 +0000 Subject: Btrfs: fix log replay failure after linking special file and fsync If in the same transaction we rename a special file (fifo, character/block device or symbolic link), create a hard link for it having its old name then sync the log, we will end up with a log that can not be replayed and at when attempting to replay it, an EEXIST error is returned and mounting the filesystem fails. Example scenario: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt $ mkdir /mnt/testdir $ mkfifo /mnt/testdir/foo # Make sure everything done so far is durably persisted. $ sync # Create some unrelated file and fsync it, this is just to create a log # tree. The file must be in the same directory as our special file. $ touch /mnt/testdir/f1 $ xfs_io -c "fsync" /mnt/testdir/f1 # Rename our special file and then create a hard link with its old name. $ mv /mnt/testdir/foo /mnt/testdir/bar $ ln /mnt/testdir/bar /mnt/testdir/foo # Create some other unrelated file and fsync it, this is just to persist # the log tree which was modified by the previous rename and link # operations. Alternatively we could have modified file f1 and fsync it. $ touch /mnt/f2 $ xfs_io -c "fsync" /mnt/f2 $ mount /dev/sdc /mnt mount: mount /dev/sdc on /mnt failed: File exists This happens because when both the log tree and the subvolume's tree have an entry in the directory "testdir" with the same name, that is, there is one key (258 INODE_REF 257) in the subvolume tree and another one in the log tree (where 258 is the inode number of our special file and 257 is the inode for directory "testdir"). Only the data of those two keys differs, in the subvolume tree the index field for inode reference has a value of 3 while the log tree it has a value of 5. Because the same key exists in both trees, but have different index, the log replay fails with an -EEXIST error when attempting to replay the inode reference from the log tree. Fix this by setting the last_unlink_trans field of the inode (our special file) to the current transaction id when a hard link is created, as this forces logging the parent directory inode, solving the conflict at log replay time. A new generic test case for fstests was also submitted. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 61f20c3..f7a1875 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5851,7 +5851,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, * this will force the logging code to walk the dentry chain * up for the file */ - if (S_ISREG(inode->vfs_inode.i_mode)) + if (!S_ISDIR(inode->vfs_inode.i_mode)) inode->last_unlink_trans = trans->transid; /* -- cgit v1.1 From 1f250e929a9c9332fd6ea34da684afee73837cfe Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 28 Feb 2018 15:56:10 +0000 Subject: Btrfs: fix log replay failure after unlink and link combination If we have a file with 2 (or more) hard links in the same directory, remove one of the hard links, create a new file (or link an existing file) in the same directory with the name of the removed hard link, and then finally fsync the new file, we end up with a log that fails to replay, causing a mount failure. Example: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ mkdir /mnt/testdir $ touch /mnt/testdir/foo $ ln /mnt/testdir/foo /mnt/testdir/bar $ sync $ unlink /mnt/testdir/bar $ touch /mnt/testdir/bar $ xfs_io -c "fsync" /mnt/testdir/bar $ mount /dev/sdb /mnt mount: mount(2) failed: /mnt: No such file or directory When replaying the log, for that example, we also see the following in dmesg/syslog: [71813.671307] BTRFS info (device dm-0): failed to delete reference to bar, inode 258 parent 257 [71813.674204] ------------[ cut here ]------------ [71813.675694] BTRFS: Transaction aborted (error -2) [71813.677236] WARNING: CPU: 1 PID: 13231 at fs/btrfs/inode.c:4128 __btrfs_unlink_inode+0x17b/0x355 [btrfs] [71813.679669] Modules linked in: btrfs xfs f2fs dm_flakey dm_mod dax ghash_clmulni_intel ppdev pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper evdev psmouse i2c_piix4 parport_pc i2c_core pcspkr sg serio_raw parport button sunrpc loop autofs4 ext4 crc16 mbcache jbd2 zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod ata_generic sd_mod virtio_scsi ata_piix libata virtio_pci virtio_ring crc32c_intel floppy virtio e1000 scsi_mod [last unloaded: btrfs] [71813.679669] CPU: 1 PID: 13231 Comm: mount Tainted: G W 4.15.0-rc9-btrfs-next-56+ #1 [71813.679669] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014 [71813.679669] RIP: 0010:__btrfs_unlink_inode+0x17b/0x355 [btrfs] [71813.679669] RSP: 0018:ffffc90001cef738 EFLAGS: 00010286 [71813.679669] RAX: 0000000000000025 RBX: ffff880217ce4708 RCX: 0000000000000001 [71813.679669] RDX: 0000000000000000 RSI: ffffffff81c14bae RDI: 00000000ffffffff [71813.679669] RBP: ffffc90001cef7c0 R08: 0000000000000001 R09: 0000000000000001 [71813.679669] R10: ffffc90001cef5e0 R11: ffffffff8343f007 R12: ffff880217d474c8 [71813.679669] R13: 00000000fffffffe R14: ffff88021ccf1548 R15: 0000000000000101 [71813.679669] FS: 00007f7cee84c480(0000) GS:ffff88023fc80000(0000) knlGS:0000000000000000 [71813.679669] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [71813.679669] CR2: 00007f7cedc1abf9 CR3: 00000002354b4003 CR4: 00000000001606e0 [71813.679669] Call Trace: [71813.679669] btrfs_unlink_inode+0x17/0x41 [btrfs] [71813.679669] drop_one_dir_item+0xfa/0x131 [btrfs] [71813.679669] add_inode_ref+0x71e/0x851 [btrfs] [71813.679669] ? __lock_is_held+0x39/0x71 [71813.679669] ? replay_one_buffer+0x53/0x53a [btrfs] [71813.679669] replay_one_buffer+0x4a4/0x53a [btrfs] [71813.679669] ? rcu_read_unlock+0x3a/0x57 [71813.679669] ? __lock_is_held+0x39/0x71 [71813.679669] walk_up_log_tree+0x101/0x1d2 [btrfs] [71813.679669] walk_log_tree+0xad/0x188 [btrfs] [71813.679669] btrfs_recover_log_trees+0x1fa/0x31e [btrfs] [71813.679669] ? replay_one_extent+0x544/0x544 [btrfs] [71813.679669] open_ctree+0x1cf6/0x2209 [btrfs] [71813.679669] btrfs_mount_root+0x368/0x482 [btrfs] [71813.679669] ? trace_hardirqs_on_caller+0x14c/0x1a6 [71813.679669] ? __lockdep_init_map+0x176/0x1c2 [71813.679669] ? mount_fs+0x64/0x10b [71813.679669] mount_fs+0x64/0x10b [71813.679669] vfs_kern_mount+0x68/0xce [71813.679669] btrfs_mount+0x13e/0x772 [btrfs] [71813.679669] ? trace_hardirqs_on_caller+0x14c/0x1a6 [71813.679669] ? __lockdep_init_map+0x176/0x1c2 [71813.679669] ? mount_fs+0x64/0x10b [71813.679669] mount_fs+0x64/0x10b [71813.679669] vfs_kern_mount+0x68/0xce [71813.679669] do_mount+0x6e5/0x973 [71813.679669] ? memdup_user+0x3e/0x5c [71813.679669] SyS_mount+0x72/0x98 [71813.679669] entry_SYSCALL_64_fastpath+0x1e/0x8b [71813.679669] RIP: 0033:0x7f7cedf150ba [71813.679669] RSP: 002b:00007ffca71da688 EFLAGS: 00000206 [71813.679669] Code: 7f a0 e8 51 0c fd ff 48 8b 43 50 f0 0f ba a8 30 2c 00 00 02 72 17 41 83 fd fb 74 11 44 89 ee 48 c7 c7 7d 11 7f a0 e8 38 f5 8d e0 <0f> ff 44 89 e9 ba 20 10 00 00 eb 4d 48 8b 4d b0 48 8b 75 88 4c [71813.679669] ---[ end trace 83bd473fc5b4663b ]--- [71813.854764] BTRFS: error (device dm-0) in __btrfs_unlink_inode:4128: errno=-2 No such entry [71813.886994] BTRFS: error (device dm-0) in btrfs_replay_log:2307: errno=-2 No such entry (Failed to recover log tree) [71813.903357] BTRFS error (device dm-0): cleaner transaction attach returned -30 [71814.128078] BTRFS error (device dm-0): open_ctree failed This happens because the log has inode reference items for both inode 258 (the first file we created) and inode 259 (the second file created), and when processing the reference item for inode 258, we replace the corresponding item in the subvolume tree (which has two names, "foo" and "bar") witht he one in the log (which only has one name, "foo") without removing the corresponding dir index keys from the parent directory. Later, when processing the inode reference item for inode 259, which has a name of "bar" associated to it, we notice that dir index entries exist for that name and for a different inode, so we attempt to unlink that name, which fails because the inode reference item for inode 258 no longer has the name "bar" associated to it, making a call to btrfs_unlink_inode() fail with a -ENOENT error. Fix this by unlinking all the names in an inode reference item from a subvolume tree that are not present in the inode reference item found in the log tree, before overwriting it with the item from the log tree. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 5 ++- fs/btrfs/inode-item.c | 44 ++++++++++++-------- fs/btrfs/tree-log.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 139 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0f521ba..da30877 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3095,7 +3095,10 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, u64 inode_objectid, u64 ref_objectid, int ins_len, int cow); -int btrfs_find_name_in_ext_backref(struct btrfs_path *path, +int btrfs_find_name_in_backref(struct extent_buffer *leaf, int slot, + const char *name, + int name_len, struct btrfs_inode_ref **ref_ret); +int btrfs_find_name_in_ext_backref(struct extent_buffer *leaf, int slot, u64 ref_objectid, const char *name, int name_len, struct btrfs_inode_extref **extref_ret); diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 39c968f..65e1a76 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -22,10 +22,10 @@ #include "transaction.h" #include "print-tree.h" -static int find_name_in_backref(struct btrfs_path *path, const char *name, - int name_len, struct btrfs_inode_ref **ref_ret) +int btrfs_find_name_in_backref(struct extent_buffer *leaf, int slot, + const char *name, + int name_len, struct btrfs_inode_ref **ref_ret) { - struct extent_buffer *leaf; struct btrfs_inode_ref *ref; unsigned long ptr; unsigned long name_ptr; @@ -33,9 +33,8 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name, u32 cur_offset = 0; int len; - leaf = path->nodes[0]; - item_size = btrfs_item_size_nr(leaf, path->slots[0]); - ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + item_size = btrfs_item_size_nr(leaf, slot); + ptr = btrfs_item_ptr_offset(leaf, slot); while (cur_offset < item_size) { ref = (struct btrfs_inode_ref *)(ptr + cur_offset); len = btrfs_inode_ref_name_len(leaf, ref); @@ -44,18 +43,19 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name, if (len != name_len) continue; if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) { - *ref_ret = ref; + if (ref_ret) + *ref_ret = ref; return 1; } } return 0; } -int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid, +int btrfs_find_name_in_ext_backref(struct extent_buffer *leaf, int slot, + u64 ref_objectid, const char *name, int name_len, struct btrfs_inode_extref **extref_ret) { - struct extent_buffer *leaf; struct btrfs_inode_extref *extref; unsigned long ptr; unsigned long name_ptr; @@ -63,9 +63,8 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid, u32 cur_offset = 0; int ref_name_len; - leaf = path->nodes[0]; - item_size = btrfs_item_size_nr(leaf, path->slots[0]); - ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + item_size = btrfs_item_size_nr(leaf, slot); + ptr = btrfs_item_ptr_offset(leaf, slot); /* * Search all extended backrefs in this item. We're only @@ -113,7 +112,9 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, return ERR_PTR(ret); if (ret > 0) return NULL; - if (!btrfs_find_name_in_ext_backref(path, ref_objectid, name, name_len, &extref)) + if (!btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0], + ref_objectid, name, name_len, + &extref)) return NULL; return extref; } @@ -155,7 +156,8 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, * This should always succeed so error here will make the FS * readonly. */ - if (!btrfs_find_name_in_ext_backref(path, ref_objectid, + if (!btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0], + ref_objectid, name, name_len, &extref)) { btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL); ret = -EROFS; @@ -225,7 +227,8 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, } else if (ret < 0) { goto out; } - if (!find_name_in_backref(path, name, name_len, &ref)) { + if (!btrfs_find_name_in_backref(path->nodes[0], path->slots[0], + name, name_len, &ref)) { ret = -ENOENT; search_ext_refs = 1; goto out; @@ -293,7 +296,9 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, root, path, &key, ins_len); if (ret == -EEXIST) { - if (btrfs_find_name_in_ext_backref(path, ref_objectid, + if (btrfs_find_name_in_ext_backref(path->nodes[0], + path->slots[0], + ref_objectid, name, name_len, NULL)) goto out; @@ -351,7 +356,8 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, if (ret == -EEXIST) { u32 old_size; - if (find_name_in_backref(path, name, name_len, &ref)) + if (btrfs_find_name_in_backref(path->nodes[0], path->slots[0], + name, name_len, &ref)) goto out; old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); @@ -365,7 +371,9 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, ret = 0; } else if (ret < 0) { if (ret == -EOVERFLOW) { - if (find_name_in_backref(path, name, name_len, &ref)) + if (btrfs_find_name_in_backref(path->nodes[0], + path->slots[0], + name, name_len, &ref)) ret = -EEXIST; else ret = -EMLINK; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f7a1875..4c50f82 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -966,7 +966,9 @@ static noinline int backref_in_log(struct btrfs_root *log, ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); if (key->type == BTRFS_INODE_EXTREF_KEY) { - if (btrfs_find_name_in_ext_backref(path, ref_objectid, + if (btrfs_find_name_in_ext_backref(path->nodes[0], + path->slots[0], + ref_objectid, name, namelen, NULL)) match = 1; @@ -1190,7 +1192,8 @@ static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, read_extent_buffer(eb, *name, (unsigned long)&extref->name, *namelen); - *index = btrfs_inode_extref_index(eb, extref); + if (index) + *index = btrfs_inode_extref_index(eb, extref); if (parent_objectid) *parent_objectid = btrfs_inode_extref_parent(eb, extref); @@ -1211,12 +1214,102 @@ static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen); - *index = btrfs_inode_ref_index(eb, ref); + if (index) + *index = btrfs_inode_ref_index(eb, ref); return 0; } /* + * Take an inode reference item from the log tree and iterate all names from the + * inode reference item in the subvolume tree with the same key (if it exists). + * For any name that is not in the inode reference item from the log tree, do a + * proper unlink of that name (that is, remove its entry from the inode + * reference item and both dir index keys). + */ +static int unlink_old_inode_refs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_inode *inode, + struct extent_buffer *log_eb, + int log_slot, + struct btrfs_key *key) +{ + int ret; + unsigned long ref_ptr; + unsigned long ref_end; + struct extent_buffer *eb; + +again: + btrfs_release_path(path); + ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret > 0) { + ret = 0; + goto out; + } + if (ret < 0) + goto out; + + eb = path->nodes[0]; + ref_ptr = btrfs_item_ptr_offset(eb, path->slots[0]); + ref_end = ref_ptr + btrfs_item_size_nr(eb, path->slots[0]); + while (ref_ptr < ref_end) { + char *name = NULL; + int namelen; + u64 parent_id; + + if (key->type == BTRFS_INODE_EXTREF_KEY) { + ret = extref_get_fields(eb, ref_ptr, &namelen, &name, + NULL, &parent_id); + } else { + parent_id = key->offset; + ret = ref_get_fields(eb, ref_ptr, &namelen, &name, + NULL); + } + if (ret) + goto out; + + if (key->type == BTRFS_INODE_EXTREF_KEY) + ret = btrfs_find_name_in_ext_backref(log_eb, log_slot, + parent_id, name, + namelen, NULL); + else + ret = btrfs_find_name_in_backref(log_eb, log_slot, name, + namelen, NULL); + + if (!ret) { + struct inode *dir; + + btrfs_release_path(path); + dir = read_one_inode(root, parent_id); + if (!dir) { + ret = -ENOENT; + kfree(name); + goto out; + } + ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), + inode, name, namelen); + kfree(name); + iput(dir); + if (ret) + goto out; + goto again; + } + + kfree(name); + ref_ptr += namelen; + if (key->type == BTRFS_INODE_EXTREF_KEY) + ref_ptr += sizeof(struct btrfs_inode_extref); + else + ref_ptr += sizeof(struct btrfs_inode_ref); + } + ret = 0; + out: + btrfs_release_path(path); + return ret; +} + +/* * replay one inode back reference item found in the log tree. * eb, slot and key refer to the buffer and key found in the log tree. * root is the destination we are replaying into, and path is for temp @@ -1344,6 +1437,19 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, } } + /* + * Before we overwrite the inode reference item in the subvolume tree + * with the item from the log tree, we must unlink all names from the + * parent directory that are in the subvolume's tree inode reference + * item, otherwise we end up with an inconsistent subvolume tree where + * dir index entries exist for a name but there is no inode reference + * item with the same name. + */ + ret = unlink_old_inode_refs(trans, root, path, BTRFS_I(inode), eb, slot, + key); + if (ret) + goto out; + /* finally write the back reference in the inode */ ret = overwrite_item(trans, root, path, eb, slot, key); out: -- cgit v1.1 From 61277981dd535ee4b5947b6069badb16f3a21ace Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Tue, 13 Feb 2018 08:58:20 +0100 Subject: Makefile: Fix lying comment re. silentoldconfig The comment above the silentoldconfig invocation is outdated. 'make oldconfig' updates just .config and doesn't touch the include/config/ tree. This came up in https://lkml.org/lkml/2018/2/12/415. While fixing the comment, make it more informative by explaining the purpose of the unfortunately named silentoldconfig. I can't make sense of the comment re. auto.conf.cmd and a cleaned tree. include/config/auto.conf and include/config/auto.conf.cmd are both created simultaneously by silentoldconfig (in scripts/kconfig/confdata.c, by conf_write_autoconf()), and nothing seems to remove auto.conf.cmd that wouldn't remove auto.conf. Remove that part of the comment rather than blindly copying it. It might be a leftover from an older way of doing things. The include/config/auto.conf.cmd prerequisite might be there to ensure that silentoldconfig gets rerun if conf_write_autoconf() fails between writing out auto.conf.cmd and auto.conf (a comment in the function indicates that auto.conf is deliberately written out last to mark completion of the operation). It seems the Makefile dependency between include/config/auto.conf and .config would already take care of that though, since include/config/auto.conf would still be out of date re. .config if the operation fails. Cop out and leave the prerequisite in for now. Signed-off-by: Ulf Magnusson Signed-off-by: Masahiro Yamada --- Makefile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 659a778..ae07c70 100644 --- a/Makefile +++ b/Makefile @@ -579,10 +579,9 @@ ifeq ($(KBUILD_EXTMOD),) # To avoid any implicit rule to kick in, define an empty command $(KCONFIG_CONFIG) include/config/auto.conf.cmd: ; -# If .config is newer than include/config/auto.conf, someone tinkered -# with it and forgot to run make oldconfig. -# if auto.conf.cmd is missing then we are probably in a cleaned tree so -# we execute the config step to be sure to catch updated Kconfig files +# The actual configuration files used during the build are stored in +# include/generated/ and include/config/. Update them if .config is newer than +# include/config/auto.conf (which mirrors .config). include/config/%.conf: $(KCONFIG_CONFIG) include/config/auto.conf.cmd $(Q)$(MAKE) -f $(srctree)/Makefile silentoldconfig else -- cgit v1.1 From 6c49f359ca14f973970324afc9b20208fa0bbad5 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Thu, 15 Feb 2018 22:07:50 +0100 Subject: kbuild: disable sparse warnings about unknown attributes Currently, sparse issues warnings on code using an attribute it doesn't know about. One of the problem with this is that these warnings have no value for the developer, it's just noise for him. At best these warnings tell something about some deficiencies of sparse itself but not about a potential problem with code analyzed. A second problem with this is that sparse release are, alas, less frequent than new attributes are added to GCC. So, avoid the noise by asking sparse to not warn about attributes it doesn't know about. Reference: https://marc.info/?l=linux-sparse&m=151871600016790 Reference: https://marc.info/?l=linux-sparse&m=151871725417322 Signed-off-by: Luc Van Oostenryck Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ae07c70..7b30804 100644 --- a/Makefile +++ b/Makefile @@ -388,7 +388,7 @@ PYTHON = python CHECK = sparse CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ - -Wbitwise -Wno-return-void $(CF) + -Wbitwise -Wno-return-void -Wno-unknown-attribute $(CF) NOSTDINC_FLAGS = CFLAGS_MODULE = AFLAGS_MODULE = -- cgit v1.1 From cd81fc82b93fa408c30e08f59e5ef8caaa91d1d2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 17 Feb 2018 03:38:31 +0900 Subject: kconfig: add xstrdup() helper We already have xmalloc(), xcalloc(), and xrealloc((). Add xstrdup() as well to save tedious error handling. Signed-off-by: Masahiro Yamada --- scripts/kconfig/confdata.c | 2 +- scripts/kconfig/kxgettext.c | 2 +- scripts/kconfig/lkc.h | 1 + scripts/kconfig/symbol.c | 4 ++-- scripts/kconfig/util.c | 11 +++++++++++ scripts/kconfig/zconf.y | 2 +- 6 files changed, 17 insertions(+), 5 deletions(-) diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 5c12dc9..df26c7b 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -178,7 +178,7 @@ static int conf_set_sym_val(struct symbol *sym, int def, int def_flags, char *p) case S_HEX: done: if (sym_string_valid(sym, p)) { - sym->def[def].val = strdup(p); + sym->def[def].val = xstrdup(p); sym->flags |= def_flags; } else { if (def != S_DEF_AUTO) diff --git a/scripts/kconfig/kxgettext.c b/scripts/kconfig/kxgettext.c index 2858738..240880a 100644 --- a/scripts/kconfig/kxgettext.c +++ b/scripts/kconfig/kxgettext.c @@ -101,7 +101,7 @@ static struct message *message__new(const char *msg, char *option, if (self->files == NULL) goto out_fail; - self->msg = strdup(msg); + self->msg = xstrdup(msg); if (self->msg == NULL) goto out_fail_msg; diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h index 4e23feb..2d5ec2d 100644 --- a/scripts/kconfig/lkc.h +++ b/scripts/kconfig/lkc.h @@ -115,6 +115,7 @@ int file_write_dep(const char *name); void *xmalloc(size_t size); void *xcalloc(size_t nmemb, size_t size); void *xrealloc(void *p, size_t size); +char *xstrdup(const char *s); struct gstr { size_t len; diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index cca9663..2220bc4 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -183,7 +183,7 @@ static void sym_validate_range(struct symbol *sym) sprintf(str, "%lld", val2); else sprintf(str, "0x%llx", val2); - sym->curr.val = strdup(str); + sym->curr.val = xstrdup(str); } static void sym_set_changed(struct symbol *sym) @@ -849,7 +849,7 @@ struct symbol *sym_lookup(const char *name, int flags) : !(symbol->flags & (SYMBOL_CONST|SYMBOL_CHOICE)))) return symbol; } - new_name = strdup(name); + new_name = xstrdup(name); } else { new_name = NULL; hash = 0; diff --git a/scripts/kconfig/util.c b/scripts/kconfig/util.c index b98a79e..c6f6e21 100644 --- a/scripts/kconfig/util.c +++ b/scripts/kconfig/util.c @@ -154,3 +154,14 @@ void *xrealloc(void *p, size_t size) fprintf(stderr, "Out of memory.\n"); exit(1); } + +char *xstrdup(const char *s) +{ + char *p; + + p = strdup(s); + if (p) + return p; + fprintf(stderr, "Out of memory.\n"); + exit(1); +} diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y index 4be9805..f5cb55f 100644 --- a/scripts/kconfig/zconf.y +++ b/scripts/kconfig/zconf.y @@ -127,7 +127,7 @@ no_mainmenu_stmt: /* empty */ * later regardless of whether it comes from the 'prompt' in * mainmenu_stmt or here */ - menu_add_prompt(P_MENU, strdup("Linux Kernel Configuration"), NULL); + menu_add_prompt(P_MENU, xstrdup("Linux Kernel Configuration"), NULL); }; -- cgit v1.1 From f3bc78d2d4b489590540ab2788d5376583e28173 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 28 Feb 2018 09:35:29 -0800 Subject: mq-deadline: Make sure to always unlock zones In case of a failed write request (all retries failed) and when using libata, the SCSI error handler calls scsi_finish_command(). In the case of blk-mq this means that scsi_mq_done() does not get called, that blk_mq_complete_request() does not get called and also that the mq-deadline .completed_request() method is not called. This results in the target zone of the failed write request being left in a locked state, preventing that any new write requests are issued to the same zone. Fix this by replacing the .completed_request() method with the .finish_request() method as this method is always called whether or not a request completes successfully. Since the .finish_request() method is only called by the blk-mq core if a .prepare_request() method exists, add a dummy .prepare_request() method. Fixes: 5700f69178e9 ("mq-deadline: Introduce zone locking support") Cc: Hannes Reinecke Reviewed-by: Ming Lei Signed-off-by: Damien Le Moal [ bvanassche: edited patch description ] Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/mq-deadline.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index c56f211..8ec0ba9 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -536,12 +536,21 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, } /* + * Nothing to do here. This is defined only to ensure that .finish_request + * method is called upon request completion. + */ +static void dd_prepare_request(struct request *rq, struct bio *bio) +{ +} + +/* * For zoned block devices, write unlock the target zone of * completed write requests. Do this while holding the zone lock * spinlock so that the zone is never unlocked while deadline_fifo_request() - * while deadline_next_request() are executing. + * or deadline_next_request() are executing. This function is called for + * all requests, whether or not these requests complete successfully. */ -static void dd_completed_request(struct request *rq) +static void dd_finish_request(struct request *rq) { struct request_queue *q = rq->q; @@ -756,7 +765,8 @@ static struct elevator_type mq_deadline = { .ops.mq = { .insert_requests = dd_insert_requests, .dispatch_request = dd_dispatch_request, - .completed_request = dd_completed_request, + .prepare_request = dd_prepare_request, + .finish_request = dd_finish_request, .next_request = elv_rb_latter_request, .former_request = elv_rb_former_request, .bio_merge = dd_bio_merge, -- cgit v1.1 From 1c789249578895bb14ab62b4327306439b754857 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Thu, 1 Mar 2018 14:24:51 +0800 Subject: ceph: fix potential memory leak in init_caches() There is lack of cache destroy operation for ceph_file_cachep when failing from fscache register. Signed-off-by: Chengguang Xu Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/super.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 1c470b4..fb2bc9c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -713,14 +713,17 @@ static int __init init_caches(void) goto bad_dentry; ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); - if (!ceph_file_cachep) goto bad_file; - if ((error = ceph_fscache_register())) - goto bad_file; + error = ceph_fscache_register(); + if (error) + goto bad_fscache; return 0; + +bad_fscache: + kmem_cache_destroy(ceph_file_cachep); bad_file: kmem_cache_destroy(ceph_dentry_cachep); bad_dentry: -- cgit v1.1 From 7c5a0dcf557c6511a61e092ba887de28882fe857 Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Tue, 27 Feb 2018 20:10:03 +0800 Subject: block: fix the count of PGPGOUT for WRITE_SAME The vm counters is counted in sectors, so we should do the conversation in submit_bio. Fixes: 74d46992e0d9 ("block: replace bi_bdev with a gendisk pointer and partitions index") Cc: stable@vger.kernel.org Reviewed-by: Omar Sandoval Reviewed-by: Christoph Hellwig Signed-off-by: Jiufei Xue Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 2d1a7bb..6d82c4f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2434,7 +2434,7 @@ blk_qc_t submit_bio(struct bio *bio) unsigned int count; if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) - count = queue_logical_block_size(bio->bi_disk->queue); + count = queue_logical_block_size(bio->bi_disk->queue) >> 9; else count = bio_sectors(bio); -- cgit v1.1 From 9c0fb1e313aaf4e8edec22433c8b22dd308e466c Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Tue, 27 Feb 2018 20:10:18 +0800 Subject: block: display the correct diskname for bio bio_devname use __bdevname to display the device name, and can only show the major and minor of the part0, Fix this by using disk_name to display the correct name. Fixes: 74d46992e0d9 ("block: replace bi_bdev with a gendisk pointer and partitions index") Reviewed-by: Omar Sandoval Reviewed-by: Christoph Hellwig Signed-off-by: Jiufei Xue Signed-off-by: Jens Axboe --- block/partition-generic.c | 6 ++++++ include/linux/bio.h | 4 +--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/block/partition-generic.c b/block/partition-generic.c index 91622db..08dabcd 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -51,6 +51,12 @@ const char *bdevname(struct block_device *bdev, char *buf) EXPORT_SYMBOL(bdevname); +const char *bio_devname(struct bio *bio, char *buf) +{ + return disk_name(bio->bi_disk, bio->bi_partno, buf); +} +EXPORT_SYMBOL(bio_devname); + /* * There's very little reason to use this, you should really * have a struct block_device just about everywhere and use diff --git a/include/linux/bio.h b/include/linux/bio.h index d0eb659..ce547a2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -511,6 +511,7 @@ void zero_fill_bio(struct bio *bio); extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *); extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); +extern const char *bio_devname(struct bio *bio, char *buffer); #define bio_set_dev(bio, bdev) \ do { \ @@ -529,9 +530,6 @@ do { \ #define bio_dev(bio) \ disk_devt((bio)->bi_disk) -#define bio_devname(bio, buf) \ - __bdevname(bio_dev(bio), (buf)) - #ifdef CONFIG_BLK_CGROUP int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); void bio_disassociate_task(struct bio *bio); -- cgit v1.1 From 158e61865a31ef7abf39629c37285810504d60b5 Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Tue, 27 Feb 2018 20:10:22 +0800 Subject: block: fix a typo Fix a typo in pkt_start_recovery. Fixes: 74d46992e0d9 ("block: replace bi_bdev with a gendisk pointer and partitions index") Reviewed-by: Christoph Hellwig Signed-off-by: Jiufei Xue Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 531a091..c61d20c 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -1122,7 +1122,7 @@ static int pkt_start_recovery(struct packet_data *pkt) pkt->sector = new_sector; bio_reset(pkt->bio); - bio_set_set(pkt->bio, pd->bdev); + bio_set_dev(pkt->bio, pd->bdev); bio_set_op_attrs(pkt->bio, REQ_OP_WRITE, 0); pkt->bio->bi_iter.bi_sector = new_sector; pkt->bio->bi_iter.bi_size = pkt->frames * CD_FRAMESIZE; -- cgit v1.1 From 651438bb0af5213f1f70d66e75bf11d08cb5537a Mon Sep 17 00:00:00 2001 From: Wen Xiong Date: Thu, 15 Feb 2018 14:05:10 -0600 Subject: nvme-pci: Fix EEH failure on ppc Triggering PPC EEH detection and handling requires a memory mapped read failure. The NVMe driver removed the periodic health check MMIO, so there's no early detection mechanism to trigger the recovery. Instead, the detection now happens when the nvme driver handles an IO timeout event. This takes the pci channel offline, so we do not want the driver to proceed with escalating its own recovery efforts that may conflict with the EEH handler. This patch ensures the driver will observe the channel was set to offline after a failed MMIO read and resets the IO timer so the EEH handler has a chance to recover the device. Signed-off-by: Wen Xiong [updated change log] Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5933a5c..e5ce07f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1153,12 +1153,6 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) if (!(csts & NVME_CSTS_CFS) && !nssro) return false; - /* If PCI error recovery process is happening, we cannot reset or - * the recovery mechanism will surely fail. - */ - if (pci_channel_offline(to_pci_dev(dev->dev))) - return false; - return true; } @@ -1189,6 +1183,13 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) struct nvme_command cmd; u32 csts = readl(dev->bar + NVME_REG_CSTS); + /* If PCI error recovery process is happening, we cannot reset or + * the recovery mechanism will surely fail. + */ + mb(); + if (pci_channel_offline(to_pci_dev(dev->dev))) + return BLK_EH_RESET_TIMER; + /* * Reset immediately if the controller is failed */ -- cgit v1.1 From cb57469c9573f6018cd1302953dd45d6e05aba7b Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 16 Feb 2018 11:02:01 -0800 Subject: staging: android: ashmem: Fix lockdep issue during llseek ashmem_mutex create a chain of dependencies like so: (1) mmap syscall -> mmap_sem -> (acquired) ashmem_mmap ashmem_mutex (try to acquire) (block) (2) llseek syscall -> ashmem_llseek -> ashmem_mutex -> (acquired) inode_lock -> inode->i_rwsem (try to acquire) (block) (3) getdents -> iterate_dir -> inode_lock -> inode->i_rwsem (acquired) copy_to_user -> mmap_sem (try to acquire) There is a lock ordering created between mmap_sem and inode->i_rwsem causing a lockdep splat [2] during a syzcaller test, this patch fixes the issue by unlocking the mutex earlier. Functionally that's Ok since we don't need to protect vfs_llseek. [1] https://patchwork.kernel.org/patch/10185031/ [2] https://lkml.org/lkml/2018/1/10/48 Acked-by: Todd Kjos Cc: Arve Hjonnevag Cc: stable@vger.kernel.org Reported-by: syzbot+8ec30bb7bf1a981a2012@syzkaller.appspotmail.com Signed-off-by: Joel Fernandes Acked-by: Greg Hackmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 6dbba5a..d545036 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -326,24 +326,23 @@ static loff_t ashmem_llseek(struct file *file, loff_t offset, int origin) mutex_lock(&ashmem_mutex); if (asma->size == 0) { - ret = -EINVAL; - goto out; + mutex_unlock(&ashmem_mutex); + return -EINVAL; } if (!asma->file) { - ret = -EBADF; - goto out; + mutex_unlock(&ashmem_mutex); + return -EBADF; } + mutex_unlock(&ashmem_mutex); + ret = vfs_llseek(asma->file, offset, origin); if (ret < 0) - goto out; + return ret; /** Copy f_pos from backing file, since f_ops->llseek() sets it */ file->f_pos = asma->file->f_pos; - -out: - mutex_unlock(&ashmem_mutex); return ret; } -- cgit v1.1 From 16ccfff2897613007b5eda9e29d65303c6280026 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 6 Feb 2018 20:17:42 +0800 Subject: nvme: pci: pass max vectors as num_possible_cpus() to pci_alloc_irq_vectors 84676c1f21 ("genirq/affinity: assign vectors to all possible CPUs") has switched to do irq vectors spread among all possible CPUs, so pass num_possible_cpus() as max vecotrs to be assigned. For example, in a 8 cores system, 0~3 online, 4~8 offline/not present, see 'lscpu': [ming@box]$lscpu Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 4 On-line CPU(s) list: 0-3 Thread(s) per core: 1 Core(s) per socket: 2 Socket(s): 2 NUMA node(s): 2 ... NUMA node0 CPU(s): 0-3 NUMA node1 CPU(s): ... 1) before this patch, follows the allocated vectors and their affinity: irq 47, cpu list 0,4 irq 48, cpu list 1,6 irq 49, cpu list 2,5 irq 50, cpu list 3,7 2) after this patch, follows the allocated vectors and their affinity: irq 43, cpu list 0 irq 44, cpu list 1 irq 45, cpu list 2 irq 46, cpu list 3 irq 47, cpu list 4 irq 48, cpu list 6 irq 49, cpu list 5 irq 50, cpu list 7 Cc: Keith Busch Cc: Sagi Grimberg Cc: Thomas Gleixner Cc: Christoph Hellwig Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e5ce07f..b6f43b7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1914,7 +1914,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) int result, nr_io_queues; unsigned long size; - nr_io_queues = num_present_cpus(); + nr_io_queues = num_possible_cpus(); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) return result; -- cgit v1.1 From e82df670235138575b37ff0ec24412a471efd97f Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Fri, 23 Feb 2018 19:41:30 +0800 Subject: virtio_ring: fix num_free handling in error case The vq->vq.num_free hasn't been changed when error happens, so it shouldn't be changed when handling the error. Fixes: 780bc7903a32 ("virtio_ring: Support DMA APIs") Cc: Andy Lutomirski Cc: Michael S. Tsirkin Cc: stable@vger.kernel.org Signed-off-by: Tiwei Bie Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index eb30f3e..71458f4 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -428,8 +428,6 @@ unmap_release: i = virtio16_to_cpu(_vq->vdev, vq->vring.desc[i].next); } - vq->vq.num_free += total_sg; - if (indirect) kfree(desc); -- cgit v1.1 From 801e459a6f3a63af9d447e6249088c76ae16efc4 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 21 Feb 2018 13:39:51 -0600 Subject: KVM: x86: Add a framework for supporting MSR-based features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide a new KVM capability that allows bits within MSRs to be recognized as features. Two new ioctls are added to the /dev/kvm ioctl routine to retrieve the list of these MSRs and then retrieve their values. A kvm_x86_ops callback is used to determine support for the listed MSR-based features. Signed-off-by: Tom Lendacky Signed-off-by: Paolo Bonzini [Tweaked documentation. - Radim] Signed-off-by: Radim Krčmář --- Documentation/virtual/kvm/api.txt | 40 ++++++++++++++------- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/svm.c | 6 ++++ arch/x86/kvm/vmx.c | 6 ++++ arch/x86/kvm/x86.c | 75 ++++++++++++++++++++++++++++++++++++--- include/uapi/linux/kvm.h | 2 ++ 6 files changed, 114 insertions(+), 17 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 792fa87..d6b3ff5 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -123,14 +123,15 @@ memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the flag KVM_VM_MIPS_VZ. -4.3 KVM_GET_MSR_INDEX_LIST +4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST -Capability: basic +Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST Architectures: x86 -Type: system +Type: system ioctl Parameters: struct kvm_msr_list (in/out) Returns: 0 on success; -1 on error Errors: + EFAULT: the msr index list cannot be read from or written to E2BIG: the msr index list is to be to fit in the array specified by the user. @@ -139,16 +140,23 @@ struct kvm_msr_list { __u32 indices[0]; }; -This ioctl returns the guest msrs that are supported. The list varies -by kvm version and host processor, but does not change otherwise. The -user fills in the size of the indices array in nmsrs, and in return -kvm adjusts nmsrs to reflect the actual number of msrs and fills in -the indices array with their numbers. +The user fills in the size of the indices array in nmsrs, and in return +kvm adjusts nmsrs to reflect the actual number of msrs and fills in the +indices array with their numbers. + +KVM_GET_MSR_INDEX_LIST returns the guest msrs that are supported. The list +varies by kvm version and host processor, but does not change otherwise. Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are not returned in the MSR list, as different vcpus can have a different number of banks, as set via the KVM_X86_SETUP_MCE ioctl. +KVM_GET_MSR_FEATURE_INDEX_LIST returns the list of MSRs that can be passed +to the KVM_GET_MSRS system ioctl. This lets userspace probe host capabilities +and processor features that are exposed via MSRs (e.g., VMX capabilities). +This list also varies by kvm version and host processor, but does not change +otherwise. + 4.4 KVM_CHECK_EXTENSION @@ -475,14 +483,22 @@ Support for this has been removed. Use KVM_SET_GUEST_DEBUG instead. 4.18 KVM_GET_MSRS -Capability: basic +Capability: basic (vcpu), KVM_CAP_GET_MSR_FEATURES (system) Architectures: x86 -Type: vcpu ioctl +Type: system ioctl, vcpu ioctl Parameters: struct kvm_msrs (in/out) -Returns: 0 on success, -1 on error +Returns: number of msrs successfully returned; + -1 on error + +When used as a system ioctl: +Reads the values of MSR-based features that are available for the VM. This +is similar to KVM_GET_SUPPORTED_CPUID, but it returns MSR indices and values. +The list of msr-based features can be obtained using KVM_GET_MSR_FEATURE_INDEX_LIST +in a system ioctl. +When used as a vcpu ioctl: Reads model-specific registers from the vcpu. Supported msr indices can -be obtained using KVM_GET_MSR_INDEX_LIST. +be obtained using KVM_GET_MSR_INDEX_LIST in a system ioctl. struct kvm_msrs { __u32 nmsrs; /* number of msrs in entries */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0a9e330..bab0694 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1095,6 +1095,8 @@ struct kvm_x86_ops { int (*mem_enc_op)(struct kvm *kvm, void __user *argp); int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); + + int (*get_msr_feature)(struct kvm_msr_entry *entry); }; struct kvm_arch_async_pf { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 3d8377f..d8db947 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3869,6 +3869,11 @@ static int cr8_write_interception(struct vcpu_svm *svm) return 0; } +static int svm_get_msr_feature(struct kvm_msr_entry *msr) +{ + return 1; +} + static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct vcpu_svm *svm = to_svm(vcpu); @@ -6832,6 +6837,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .vcpu_unblocking = svm_vcpu_unblocking, .update_bp_intercept = update_bp_intercept, + .get_msr_feature = svm_get_msr_feature, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ec14f23..fafc1f6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3226,6 +3226,11 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, return !(val & ~valid_bits); } +static int vmx_get_msr_feature(struct kvm_msr_entry *msr) +{ + return 1; +} + /* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. @@ -12296,6 +12301,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .vcpu_put = vmx_vcpu_put, .update_bp_intercept = update_exception_bitmap, + .get_msr_feature = vmx_get_msr_feature, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 96edda8..239fc1f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1049,6 +1049,28 @@ static u32 emulated_msrs[] = { static unsigned num_emulated_msrs; +/* + * List of msr numbers which are used to expose MSR-based features that + * can be used by a hypervisor to validate requested CPU features. + */ +static u32 msr_based_features[] = { +}; + +static unsigned int num_msr_based_features; + +static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) +{ + struct kvm_msr_entry msr; + + msr.index = index; + if (kvm_x86_ops->get_msr_feature(&msr)) + return 1; + + *data = msr.data; + + return 0; +} + bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) { if (efer & efer_reserved_bits) @@ -2680,13 +2702,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, int (*do_msr)(struct kvm_vcpu *vcpu, unsigned index, u64 *data)) { - int i, idx; + int i; - idx = srcu_read_lock(&vcpu->kvm->srcu); for (i = 0; i < msrs->nmsrs; ++i) if (do_msr(vcpu, entries[i].index, &entries[i].data)) break; - srcu_read_unlock(&vcpu->kvm->srcu, idx); return i; } @@ -2785,6 +2805,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SET_BOOT_CPU_ID: case KVM_CAP_SPLIT_IRQCHIP: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_GET_MSR_FEATURES: r = 1; break; case KVM_CAP_ADJUST_CLOCK: @@ -2899,6 +2920,31 @@ long kvm_arch_dev_ioctl(struct file *filp, goto out; r = 0; break; + case KVM_GET_MSR_FEATURE_INDEX_LIST: { + struct kvm_msr_list __user *user_msr_list = argp; + struct kvm_msr_list msr_list; + unsigned int n; + + r = -EFAULT; + if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list))) + goto out; + n = msr_list.nmsrs; + msr_list.nmsrs = num_msr_based_features; + if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list))) + goto out; + r = -E2BIG; + if (n < msr_list.nmsrs) + goto out; + r = -EFAULT; + if (copy_to_user(user_msr_list->indices, &msr_based_features, + num_msr_based_features * sizeof(u32))) + goto out; + r = 0; + break; + } + case KVM_GET_MSRS: + r = msr_io(NULL, argp, do_get_msr_feature, 1); + break; } default: r = -EINVAL; @@ -3636,12 +3682,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = 0; break; } - case KVM_GET_MSRS: + case KVM_GET_MSRS: { + int idx = srcu_read_lock(&vcpu->kvm->srcu); r = msr_io(vcpu, argp, do_get_msr, 1); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; - case KVM_SET_MSRS: + } + case KVM_SET_MSRS: { + int idx = srcu_read_lock(&vcpu->kvm->srcu); r = msr_io(vcpu, argp, do_set_msr, 0); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; + } case KVM_TPR_ACCESS_REPORTING: { struct kvm_tpr_access_ctl tac; @@ -4464,6 +4516,19 @@ static void kvm_init_msr_list(void) j++; } num_emulated_msrs = j; + + for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) { + struct kvm_msr_entry msr; + + msr.index = msr_based_features[i]; + if (kvm_x86_ops->get_msr_feature(&msr)) + continue; + + if (j < i) + msr_based_features[j] = msr_based_features[i]; + j++; + } + num_msr_based_features = j; } static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0fb5ef9..7b26d4b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -761,6 +761,7 @@ struct kvm_ppc_resize_hpt { #define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 #define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 #define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) +#define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list) /* * Extension capability list. @@ -934,6 +935,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_AIS_MIGRATION 150 #define KVM_CAP_PPC_GET_CPU_CHAR 151 #define KVM_CAP_S390_BPB 152 +#define KVM_CAP_GET_MSR_FEATURES 153 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.1 From d1d93fa90f1afa926cb060b7f78ab01a65705b4d Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Sat, 24 Feb 2018 00:18:20 +0100 Subject: KVM: SVM: Add MSR-based feature support for serializing LFENCE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to determine if LFENCE is a serializing instruction on AMD processors, MSR 0xc0011029 (MSR_F10H_DECFG) must be read and the state of bit 1 checked. This patch will add support to allow a guest to properly make this determination. Add the MSR feature callback operation to svm.c and add MSR 0xc0011029 to the list of MSR-based features. If LFENCE is serializing, then the feature is supported, allowing the hypervisor to set the value of the MSR that guest will see. Support is also added to write (hypervisor only) and read the MSR value for the guest. A write by the guest will result in a #GP. A read by the guest will return the value as set by the host. In this way, the support to expose the feature to the guest is controlled by the hypervisor. Reviewed-by: Paolo Bonzini Signed-off-by: Tom Lendacky Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/svm.c | 36 +++++++++++++++++++++++++++++++++++- arch/x86/kvm/x86.c | 1 + 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d8db947..f874798 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -178,6 +178,8 @@ struct vcpu_svm { uint64_t sysenter_eip; uint64_t tsc_aux; + u64 msr_decfg; + u64 next_rip; u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; @@ -3871,7 +3873,18 @@ static int cr8_write_interception(struct vcpu_svm *svm) static int svm_get_msr_feature(struct kvm_msr_entry *msr) { - return 1; + msr->data = 0; + + switch (msr->index) { + case MSR_F10H_DECFG: + if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) + msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; + break; + default: + return 1; + } + + return 0; } static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -3969,6 +3982,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0x1E; } break; + case MSR_F10H_DECFG: + msr_info->data = svm->msr_decfg; + break; default: return kvm_get_msr_common(vcpu, msr_info); } @@ -4147,6 +4163,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; + case MSR_F10H_DECFG: { + struct kvm_msr_entry msr_entry; + + msr_entry.index = msr->index; + if (svm_get_msr_feature(&msr_entry)) + return 1; + + /* Check the supported bits */ + if (data & ~msr_entry.data) + return 1; + + /* Don't allow the guest to change a bit, #GP */ + if (!msr->host_initiated && (data ^ msr_entry.data)) + return 1; + + svm->msr_decfg = data; + break; + } case MSR_IA32_APICBASE: if (kvm_vcpu_apicv_active(vcpu)) avic_update_vapic_bar(to_svm(vcpu), data); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 239fc1f..54b4ed5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1054,6 +1054,7 @@ static unsigned num_emulated_msrs; * can be used by a hypervisor to validate requested CPU features. */ static u32 msr_based_features[] = { + MSR_F10H_DECFG, }; static unsigned int num_msr_based_features; -- cgit v1.1 From 1cedc6385d5f7310af0a08831c6c4303486ba850 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 1 Mar 2018 08:08:23 -0800 Subject: platform/x86: wmi: Fix misuse of vsprintf extension %pULL %pULL doesn't officially exist but %pUL does. Miscellanea: o Add missing newlines to a couple logging messages Signed-off-by: Joe Perches Signed-off-by: Darren Hart (VMware) --- drivers/platform/x86/wmi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index c0c8945..8796211 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -945,7 +945,7 @@ static int wmi_dev_probe(struct device *dev) wblock->char_dev.mode = 0444; ret = misc_register(&wblock->char_dev); if (ret) { - dev_warn(dev, "failed to register char dev: %d", ret); + dev_warn(dev, "failed to register char dev: %d\n", ret); ret = -ENOMEM; goto probe_misc_failure; } @@ -1048,7 +1048,7 @@ static int wmi_create_device(struct device *wmi_bus_dev, if (result) { dev_warn(wmi_bus_dev, - "%s data block query control method not found", + "%s data block query control method not found\n", method); return result; } @@ -1198,7 +1198,7 @@ static int parse_wdg(struct device *wmi_bus_dev, struct acpi_device *device) retval = device_add(&wblock->dev.dev); if (retval) { - dev_err(wmi_bus_dev, "failed to register %pULL\n", + dev_err(wmi_bus_dev, "failed to register %pUL\n", wblock->gblock.guid); if (debug_event) wmi_method_enable(wblock, 0); -- cgit v1.1 From 773daa3caf5d3f87fdb1ab43e9c1b367a38fa394 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 28 Feb 2018 14:32:48 +0100 Subject: net: ipv4: avoid unused variable warning for sysctl The newly introudced ip_min_valid_pmtu variable is only used when CONFIG_SYSCTL is set: net/ipv4/route.c:135:12: error: 'ip_min_valid_pmtu' defined but not used [-Werror=unused-variable] This moves it to the other variables like it, to avoid the harmless warning. Fixes: c7272c2f1229 ("net: ipv4: don't allow setting net.ipv4.route.min_pmtu below 68") Signed-off-by: Arnd Bergmann Acked-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/ipv4/route.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9548437..465196e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -133,8 +133,6 @@ static int ip_rt_min_advmss __read_mostly = 256; static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; -static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU; - /* * Interface to generic destination cache. */ @@ -2821,6 +2819,7 @@ void ip_rt_multicast_event(struct in_device *in_dev) static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_gc_elasticity __read_mostly = 8; +static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU; static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write, void __user *buffer, -- cgit v1.1 From 66421c1ec340096b291af763ed5721314cdd9c5c Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 28 Feb 2018 14:03:30 +0800 Subject: KVM: X86: Introduce kvm_get_msr_feature() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce kvm_get_msr_feature() to handle the msrs which are supported by different vendors and sharing the same emulation logic. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Liran Alon Cc: Nadav Amit Cc: Borislav Petkov Cc: Tom Lendacky Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 54b4ed5..d97620e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1059,13 +1059,25 @@ static u32 msr_based_features[] = { static unsigned int num_msr_based_features; +static int kvm_get_msr_feature(struct kvm_msr_entry *msr) +{ + switch (msr->index) { + default: + if (kvm_x86_ops->get_msr_feature(msr)) + return 1; + } + return 0; +} + static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) { struct kvm_msr_entry msr; + int r; msr.index = index; - if (kvm_x86_ops->get_msr_feature(&msr)) - return 1; + r = kvm_get_msr_feature(&msr); + if (r) + return r; *data = msr.data; @@ -4522,7 +4534,7 @@ static void kvm_init_msr_list(void) struct kvm_msr_entry msr; msr.index = msr_based_features[i]; - if (kvm_x86_ops->get_msr_feature(&msr)) + if (kvm_get_msr_feature(&msr)) continue; if (j < i) -- cgit v1.1 From 518e7b94817abed94becfe6a44f1ece0d4745afe Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 28 Feb 2018 14:03:31 +0800 Subject: KVM: X86: Allow userspace to define the microcode version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Linux (among the others) has checks to make sure that certain features aren't enabled on a certain family/model/stepping if the microcode version isn't greater than or equal to a known good version. By exposing the real microcode version, we're preventing buggy guests that don't check that they are running virtualized (i.e., they should trust the hypervisor) from disabling features that are effectively not buggy. Suggested-by: Filippo Sironi Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Liran Alon Cc: Nadav Amit Cc: Borislav Petkov Cc: Tom Lendacky Signed-off-by: Wanpeng Li Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 4 +--- arch/x86/kvm/vmx.c | 1 + arch/x86/kvm/x86.c | 11 +++++++++-- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bab0694..b605a5b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -507,6 +507,7 @@ struct kvm_vcpu_arch { u64 smi_count; bool tpr_access_reporting; u64 ia32_xss; + u64 microcode_version; /* * Paging state of the vcpu diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f874798..312f33f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1907,6 +1907,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 dummy; u32 eax = 1; + vcpu->arch.microcode_version = 0x01000065; svm->spec_ctrl = 0; if (!init_event) { @@ -3962,9 +3963,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = svm->spec_ctrl; break; - case MSR_IA32_UCODE_REV: - msr_info->data = 0x01000065; - break; case MSR_F15H_IC_CFG: { int family, model; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fafc1f6..5912148 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5771,6 +5771,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->rmode.vm86_active = 0; vmx->spec_ctrl = 0; + vcpu->arch.microcode_version = 0x100000000ULL; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(vcpu, 0); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d97620e..11649d2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1055,6 +1055,7 @@ static unsigned num_emulated_msrs; */ static u32 msr_based_features[] = { MSR_F10H_DECFG, + MSR_IA32_UCODE_REV, }; static unsigned int num_msr_based_features; @@ -1062,6 +1063,9 @@ static unsigned int num_msr_based_features; static int kvm_get_msr_feature(struct kvm_msr_entry *msr) { switch (msr->index) { + case MSR_IA32_UCODE_REV: + rdmsrl(msr->index, msr->data); + break; default: if (kvm_x86_ops->get_msr_feature(msr)) return 1; @@ -2257,7 +2261,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr) { case MSR_AMD64_NB_CFG: - case MSR_IA32_UCODE_REV: case MSR_IA32_UCODE_WRITE: case MSR_VM_HSAVE_PA: case MSR_AMD64_PATCH_LOADER: @@ -2265,6 +2268,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_AMD64_DC_CFG: break; + case MSR_IA32_UCODE_REV: + if (msr_info->host_initiated) + vcpu->arch.microcode_version = data; + break; case MSR_EFER: return set_efer(vcpu, data); case MSR_K7_HWCR: @@ -2560,7 +2567,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0; break; case MSR_IA32_UCODE_REV: - msr_info->data = 0x100000000ULL; + msr_info->data = vcpu->arch.microcode_version; break; case MSR_MTRRcap: case 0x200 ... 0x2ff: -- cgit v1.1 From b7e31be385584afe7f073130e8e570d53c95f7fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 1 Mar 2018 15:24:25 +0100 Subject: KVM: x86: fix vcpu initialization with userspace lapic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moving the code around broke this rare configuration. Use this opportunity to finally call lapic reset from vcpu reset. Reported-by: syzbot+fb7a33a4b6c35007a72b@syzkaller.appspotmail.com Suggested-by: Paolo Bonzini Fixes: 0b2e9904c159 ("KVM: x86: move LAPIC initialization after VMCS creation") Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/kvm/lapic.c | 10 ++++------ arch/x86/kvm/x86.c | 3 ++- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cc5fe7a..391dda8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2002,14 +2002,13 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) { - struct kvm_lapic *apic; + struct kvm_lapic *apic = vcpu->arch.apic; int i; - apic_debug("%s\n", __func__); + if (!apic) + return; - ASSERT(vcpu); - apic = vcpu->arch.apic; - ASSERT(apic != NULL); + apic_debug("%s\n", __func__); /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); @@ -2568,7 +2567,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) pe = xchg(&apic->pending_events, 0); if (test_bit(KVM_APIC_INIT, &pe)) { - kvm_lapic_reset(vcpu, true); kvm_vcpu_reset(vcpu, true); if (kvm_vcpu_is_bsp(apic->vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 11649d2..18b5ca7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8060,7 +8060,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) kvm_vcpu_mtrr_init(vcpu); vcpu_load(vcpu); kvm_vcpu_reset(vcpu, false); - kvm_lapic_reset(vcpu, false); kvm_mmu_setup(vcpu); vcpu_put(vcpu); return 0; @@ -8103,6 +8102,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { + kvm_lapic_reset(vcpu, init_event); + vcpu->arch.hflags = 0; vcpu->arch.smi_pending = 0; -- cgit v1.1 From 172ed391f6e40f799273e005405041b57c343cf7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Mar 2018 14:10:31 -0800 Subject: xfs: don't allocate COW blocks for zeroing holes or unwritten extents The iomap zeroing interface is smart enough to skip zeroing holes or unwritten extents. Don't subvert this logic for reflink files. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_iomap.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 66e1edb..4e771e0 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -955,6 +955,13 @@ static inline bool imap_needs_alloc(struct inode *inode, (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN); } +static inline bool needs_cow_for_zeroing(struct xfs_bmbt_irec *imap, int nimaps) +{ + return nimaps && + imap->br_startblock != HOLESTARTBLOCK && + imap->br_state != XFS_EXT_UNWRITTEN; +} + static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags) { /* @@ -1024,7 +1031,9 @@ xfs_file_iomap_begin( goto out_unlock; } - if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { + if (xfs_is_reflink_inode(ip) && + ((flags & IOMAP_WRITE) || + ((flags & IOMAP_ZERO) && needs_cow_for_zeroing(&imap, nimaps)))) { if (flags & IOMAP_DIRECT) { /* * A reflinked inode will result in CoW alloc. -- cgit v1.1 From af5b5afe9ac68406892fa343fafba4ea988c3c69 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Mar 2018 14:12:12 -0800 Subject: xfs: don't start out with the exclusive ilock for direct I/O There is no reason to take the ilock exclusively at the start of xfs_file_iomap_begin for direct I/O, given that it will be demoted just before calling xfs_iomap_write_direct anyway. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_iomap.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 4e771e0..ee01859 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -965,13 +965,11 @@ static inline bool needs_cow_for_zeroing(struct xfs_bmbt_irec *imap, int nimaps) static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags) { /* - * COW writes will allocate delalloc space, so we need to make sure - * to take the lock exclusively here. + * COW writes may allocate delalloc space or convert unwritten COW + * extents, so we need to make sure to take the lock exclusively here. */ if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO))) return true; - if ((flags & IOMAP_DIRECT) && (flags & IOMAP_WRITE)) - return true; return false; } -- cgit v1.1 From ff3d8b9c4cb95180ae6ef9eed28409840525b9fa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Mar 2018 14:12:45 -0800 Subject: xfs: don't block on the ilock for RWF_NOWAIT Fix xfs_file_iomap_begin to trylock the ilock if IOMAP_NOWAIT is passed, so that we don't block io_submit callers. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_iomap.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index ee01859..046469f 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -970,6 +970,15 @@ static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags) */ if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO))) return true; + + /* + * Extents not yet cached requires exclusive access, don't block. + * This is an opencoded xfs_ilock_data_map_shared() to cater for the + * non-blocking behaviour. + */ + if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && + !(ip->i_df.if_flags & XFS_IFEXTENTS)) + return true; return false; } @@ -998,16 +1007,18 @@ xfs_file_iomap_begin( return xfs_file_iomap_begin_delay(inode, offset, length, iomap); } - if (need_excl_ilock(ip, flags)) { + if (need_excl_ilock(ip, flags)) lockmode = XFS_ILOCK_EXCL; - xfs_ilock(ip, XFS_ILOCK_EXCL); - } else { - lockmode = xfs_ilock_data_map_shared(ip); - } + else + lockmode = XFS_ILOCK_SHARED; - if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) { - error = -EAGAIN; - goto out_unlock; + if (flags & IOMAP_NOWAIT) { + if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) + return -EAGAIN; + if (!xfs_ilock_nowait(ip, lockmode)) + return -EAGAIN; + } else { + xfs_ilock(ip, lockmode); } ASSERT(offset <= mp->m_super->s_maxbytes); -- cgit v1.1 From f4bc1eefc1608e9a7d40f5fdfc3acd560ba6f477 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 17 Feb 2018 03:38:32 +0900 Subject: kconfig: set SYMBOL_AUTO to the symbol marked with defconfig_list The 'defconfig_list' is a weird attribute. If the '.config' is missing, conf_read_simple() iterates over all visible defaults, then it uses the first one for which fopen() succeeds. config DEFCONFIG_LIST string depends on !UML option defconfig_list default "/lib/modules/$UNAME_RELEASE/.config" default "/etc/kernel-config" default "/boot/config-$UNAME_RELEASE" default "$ARCH_DEFCONFIG" default "arch/$ARCH/defconfig" However, like other symbols, the first visible default is always written out to the .config file. This might be different from what has been actually used. For example, on my machine, the third one "/boot/config-$UNAME_RELEASE" is opened, like follows: $ rm .config $ make oldconfig 2>/dev/null scripts/kconfig/conf --oldconfig Kconfig # # using defaults found in /boot/config-4.4.0-112-generic # * * Restart config... * * * IRQ subsystem * Expose irq internals in debugfs (GENERIC_IRQ_DEBUGFS) [N/y/?] (NEW) However, the resulted .config file contains the first one since it is visible: $ grep CONFIG_DEFCONFIG_LIST .config CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" In order to stop confusing people, prevent this CONFIG option from being written to the .config file. Signed-off-by: Masahiro Yamada Reviewed-by: Ulf Magnusson --- scripts/kconfig/menu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index 9922285..36cd3e1 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -212,6 +212,7 @@ void menu_add_option(int token, char *arg) sym_defconfig_list = current_entry->sym; else if (sym_defconfig_list != current_entry->sym) zconf_error("trying to redefine defconfig symbol"); + sym_defconfig_list->flags |= SYMBOL_AUTO; break; case T_OPT_ENV: prop_add_env(arg); -- cgit v1.1 From 1b1e4ee86e0064ea2a8b8e5ead13734b1e813a92 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 20 Feb 2018 02:09:42 +0900 Subject: sh: fix build error for empty CONFIG_BUILTIN_DTB_SOURCE If CONFIG_USE_BUILTIN_DTB is enabled, but CONFIG_BUILTIN_DTB_SOURCE is empty (for example, allmodconfig), it fails to build, like this: make[2]: *** No rule to make target 'arch/sh/boot/dts/.dtb.o', needed by 'arch/sh/boot/dts/built-in.o'. Stop. Surround obj-y with ifneq ... endif. I replaced $(CONFIG_USE_BUILTIN_DTB) with 'y' since this is always the case from the following code from arch/sh/Makefile: core-$(CONFIG_USE_BUILTIN_DTB) += arch/sh/boot/dts/ Signed-off-by: Masahiro Yamada --- arch/sh/boot/dts/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/sh/boot/dts/Makefile b/arch/sh/boot/dts/Makefile index 715def0..01d0f7f 100644 --- a/arch/sh/boot/dts/Makefile +++ b/arch/sh/boot/dts/Makefile @@ -1 +1,3 @@ -obj-$(CONFIG_USE_BUILTIN_DTB) += $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o +ifneq ($(CONFIG_BUILTIN_DTB_SOURCE),"") +obj-y += $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o +endif -- cgit v1.1 From bf0bbdcf1003220b7ca9a6aa00a84e27e94287e8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 20 Feb 2018 20:40:29 +0900 Subject: kconfig: Don't leak choice names during parsing The named choice is not used in the kernel tree, but if it were used, it would not be freed. The intention of the named choice can be seen in the log of commit 5a1aa8a1aff6 ("kconfig: add named choice group"). Signed-off-by: Masahiro Yamada Reviewed-by: Ulf Magnusson --- scripts/kconfig/zconf.y | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y index f5cb55f..ad6305b 100644 --- a/scripts/kconfig/zconf.y +++ b/scripts/kconfig/zconf.y @@ -276,6 +276,7 @@ choice: T_CHOICE word_opt T_EOL sym->flags |= SYMBOL_AUTO; menu_add_entry(sym); menu_add_expr(P_CHOICE, NULL, NULL); + free($2); printd(DEBUG_PARSE, "%s:%d:choice\n", zconf_curname(), zconf_lineno()); }; -- cgit v1.1 From a7b151fffbf5236dd24b60aa0a83dc1c53c9f6c0 Mon Sep 17 00:00:00 2001 From: Cao jin Date: Wed, 21 Feb 2018 12:25:07 +0800 Subject: kbuild: drop superfluous GCC_PLUGINS_CFLAGS assignment GCC_PLUGINS_CFLAGS is already in the environment, so it is superfluous to add it in commandline of final build of init/. Signed-off-by: Cao jin Signed-off-by: Masahiro Yamada --- scripts/link-vmlinux.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index c0d129d..be56a11 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -246,7 +246,7 @@ else fi; # final build of init/ -${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init GCC_PLUGINS_CFLAGS="${GCC_PLUGINS_CFLAGS}" +${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init archive_builtin -- cgit v1.1 From 0da4fabdf40a36e22b50f9a4143be18a0d98963f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 23 Feb 2018 13:56:52 +0900 Subject: kbuild: test --build-id linker flag by ld-option instead of cc-ldoption '--build-id' is passed to $(LD), so it should be tested by 'ld-option'. This seems a kind of misconversion when ld-option was renamed to cc-ldoption. Commit f86fd3066052 ("kbuild: rename ld-option to cc-ldoption") renamed all instances of 'ld-option' to 'cc-ldoption'. Then, commit 691ef3e7fdc1 ("kbuild: introduce ld-option") re-added 'ld-option' as a new implementation. Signed-off-by: Masahiro Yamada --- Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 7b30804..20c01bd 100644 --- a/Makefile +++ b/Makefile @@ -856,8 +856,7 @@ KBUILD_AFLAGS += $(ARCH_AFLAGS) $(KAFLAGS) KBUILD_CFLAGS += $(ARCH_CFLAGS) $(KCFLAGS) # Use --build-id when available. -LDFLAGS_BUILD_ID := $(patsubst -Wl$(comma)%,%,\ - $(call cc-ldoption, -Wl$(comma)--build-id,)) +LDFLAGS_BUILD_ID := $(call ld-option, --build-id) KBUILD_LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID) LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID) -- cgit v1.1 From cbf7a90e304c4f2e1a867b2245edd408a7a0ed8b Mon Sep 17 00:00:00 2001 From: Cao jin Date: Tue, 27 Feb 2018 16:16:19 +0800 Subject: kbuild/kallsyms: trivial typo fix Signed-off-by: Cao jin Signed-off-by: Masahiro Yamada --- scripts/kallsyms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 9ee9bf7..6579265 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -595,7 +595,7 @@ static void optimize_result(void) * original char code */ if (!best_table_len[i]) { - /* find the token with the breates profit value */ + /* find the token with the best profit value */ best = find_best_token(); if (token_profit[best] == 0) break; -- cgit v1.1 From 1a90ce36c6eff6fe989eabf0516beb12fc59e067 Mon Sep 17 00:00:00 2001 From: Arvind Prasanna Date: Wed, 28 Feb 2018 16:32:19 -0500 Subject: kconfig: Update ncurses package names for menuconfig The package name is ncurses-devel for Redhat based distros and libncurses-dev for Debian based distros. Signed-off-by: Arvind Prasanna Acked-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- scripts/kconfig/lxdialog/check-lxdialog.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/lxdialog/check-lxdialog.sh b/scripts/kconfig/lxdialog/check-lxdialog.sh index a10bd9d..6c0bcd9 100755 --- a/scripts/kconfig/lxdialog/check-lxdialog.sh +++ b/scripts/kconfig/lxdialog/check-lxdialog.sh @@ -55,7 +55,8 @@ EOF echo " *** required header files." 1>&2 echo " *** 'make menuconfig' requires the ncurses libraries." 1>&2 echo " *** " 1>&2 - echo " *** Install ncurses (ncurses-devel) and try again." 1>&2 + echo " *** Install ncurses (ncurses-devel or libncurses-dev " 1>&2 + echo " *** depending on your distribution) and try again." 1>&2 echo " *** " 1>&2 exit 1 fi -- cgit v1.1 From cd4a6f3ab4d80cb919d15897eb3cbc85c2009d4b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 26 Feb 2018 15:22:22 +1100 Subject: selftests/powerpc: Skip the subpage_prot tests if the syscall is unavailable The subpage_prot syscall is only functional when the system is using the Hash MMU. Since commit 5b2b80714796 ("powerpc/mm: Invalidate subpage_prot() system call on radix platforms") it returns ENOENT when the Radix MMU is active. Currently this just makes the test fail. Additionally the syscall is not available if the kernel is built with 4K pages, or if CONFIG_PPC_SUBPAGE_PROT=n, in which case it returns ENOSYS because the syscall is missing entirely. So check explicitly for ENOENT and ENOSYS and skip if we see either of those. Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/mm/subpage_prot.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c index 35ade74..3ae77ba 100644 --- a/tools/testing/selftests/powerpc/mm/subpage_prot.c +++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c @@ -135,6 +135,16 @@ static int run_test(void *addr, unsigned long size) return 0; } +static int syscall_available(void) +{ + int rc; + + errno = 0; + rc = syscall(__NR_subpage_prot, 0, 0, 0); + + return rc == 0 || (errno != ENOENT && errno != ENOSYS); +} + int test_anon(void) { unsigned long align; @@ -145,6 +155,8 @@ int test_anon(void) void *mallocblock; unsigned long mallocsize; + SKIP_IF(!syscall_available()); + if (getpagesize() != 0x10000) { fprintf(stderr, "Kernel page size must be 64K!\n"); return 1; @@ -180,6 +192,8 @@ int test_file(void) off_t filesize; int fd; + SKIP_IF(!syscall_available()); + fd = open(file_name, O_RDWR); if (fd == -1) { perror("failed to open file"); -- cgit v1.1 From 1514839b366417934e2f1328edb50ed1e8a719f5 Mon Sep 17 00:00:00 2001 From: "himanshu.madhani@cavium.com" Date: Mon, 12 Feb 2018 10:28:14 -0800 Subject: scsi: qla2xxx: Fix NULL pointer crash due to active timer for ABTS This patch fixes NULL pointer crash due to active timer running for abort IOCB. From crash dump analysis it was discoverd that get_next_timer_interrupt() encountered a corrupted entry on the timer list. #9 [ffff95e1f6f0fd40] page_fault at ffffffff914fe8f8 [exception RIP: get_next_timer_interrupt+440] RIP: ffffffff90ea3088 RSP: ffff95e1f6f0fdf0 RFLAGS: 00010013 RAX: ffff95e1f6451028 RBX: 000218e2389e5f40 RCX: 00000001232ad600 RDX: 0000000000000001 RSI: ffff95e1f6f0fdf0 RDI: 0000000001232ad6 RBP: ffff95e1f6f0fe40 R8: ffff95e1f6451188 R9: 0000000000000001 R10: 0000000000000016 R11: 0000000000000016 R12: 00000001232ad5f6 R13: ffff95e1f6450000 R14: ffff95e1f6f0fdf8 R15: ffff95e1f6f0fe10 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 Looking at the assembly of get_next_timer_interrupt(), address came from %r8 (ffff95e1f6451188) which is pointing to list_head with single entry at ffff95e5ff621178. 0xffffffff90ea307a : mov (%r8),%rdx 0xffffffff90ea307d : cmp %r8,%rdx 0xffffffff90ea3080 : je 0xffffffff90ea30a7 0xffffffff90ea3082 : nopw 0x0(%rax,%rax,1) 0xffffffff90ea3088 : testb $0x1,0x18(%rdx) crash> rd ffff95e1f6451188 10 ffff95e1f6451188: ffff95e5ff621178 ffff95e5ff621178 x.b.....x.b..... ffff95e1f6451198: ffff95e1f6451198 ffff95e1f6451198 ..E.......E..... ffff95e1f64511a8: ffff95e1f64511a8 ffff95e1f64511a8 ..E.......E..... ffff95e1f64511b8: ffff95e77cf509a0 ffff95e77cf509a0 ...|.......|.... ffff95e1f64511c8: ffff95e1f64511c8 ffff95e1f64511c8 ..E.......E..... crash> rd ffff95e5ff621178 10 ffff95e5ff621178: 0000000000000001 ffff95e15936aa00 ..........6Y.... ffff95e5ff621188: 0000000000000000 00000000ffffffff ................ ffff95e5ff621198: 00000000000000a0 0000000000000010 ................ ffff95e5ff6211a8: ffff95e5ff621198 000000000000000c ..b............. ffff95e5ff6211b8: 00000f5800000000 ffff95e751f8d720 ....X... ..Q.... ffff95e5ff621178 belongs to freed mempool object at ffff95e5ff621080. CACHE NAME OBJSIZE ALLOCATED TOTAL SLABS SSIZE ffff95dc7fd74d00 mnt_cache 384 19785 24948 594 16k SLAB MEMORY NODE TOTAL ALLOCATED FREE ffffdc5dabfd8800 ffff95e5ff620000 1 42 29 13 FREE / [ALLOCATED] ffff95e5ff621080 (cpu 6 cache) Examining the contents of that memory reveals a pointer to a constant string in the driver, "abort\0", which is set by qla24xx_async_abort_cmd(). crash> rd ffffffffc059277c 20 ffffffffc059277c: 6e490074726f6261 0074707572726574 abort.Interrupt. ffffffffc059278c: 00676e696c6c6f50 6920726576697244 Polling.Driver i ffffffffc059279c: 646f6d207325206e 6974736554000a65 n %s mode..Testi ffffffffc05927ac: 636976656420676e 786c252074612065 ng device at %lx ffffffffc05927bc: 6b63656843000a2e 646f727020676e69 ...Checking prod ffffffffc05927cc: 6f20444920746375 0a2e706968632066 uct ID of chip.. ffffffffc05927dc: 5120646e756f4600 204130303232414c .Found QLA2200A ffffffffc05927ec: 43000a2e70696843 20676e696b636568 Chip...Checking ffffffffc05927fc: 65786f626c69616d 6c636e69000a2e73 mailboxes...incl ffffffffc059280c: 756e696c2f656475 616d2d616d642f78 ude/linux/dma-ma crash> struct -ox srb_iocb struct srb_iocb { union { struct {...} logio; struct {...} els_logo; struct {...} tmf; struct {...} fxiocb; struct {...} abt; struct ct_arg ctarg; struct {...} mbx; struct {...} nack; [0x0 ] } u; [0xb8] struct timer_list timer; [0x108] void (*timeout)(void *); } SIZE: 0x110 crash> ! bc ibase=16 obase=10 B8+40 F8 The object is a srb_t, and at offset 0xf8 within that structure (i.e. ffff95e5ff621080 + f8 -> ffff95e5ff621178) is a struct timer_list. Cc: #4.4+ Fixes: 4440e46d5db7 ("[SCSI] qla2xxx: Add IOCB Abort command asynchronous handling.") Signed-off-by: Himanshu Madhani Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 2dea112..0487062 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1527,6 +1527,7 @@ qla24xx_abort_sp_done(void *ptr, int res) srb_t *sp = ptr; struct srb_iocb *abt = &sp->u.iocb_cmd; + del_timer(&sp->u.iocb_cmd.timer); complete(&abt->u.abt.comp); } -- cgit v1.1 From 1c6cacf4ea6c04a58a0e3057f5ed60c24a4ffeff Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 22 Feb 2018 09:49:35 +0100 Subject: scsi: qla2xxx: Fixup locking for session deletion Commit d8630bb95f46 ('Serialize session deletion by using work_lock') tries to fixup a deadlock when deleting sessions, but fails to take into account the locking rules. This patch resolves the situation by introducing a separate lock for processing the GNLIST response, and ensures that sess_lock is released before calling qlt_schedule_sess_delete(). Cc: Himanshu Madhani Cc: Quinn Tran Fixes: d8630bb95f46 ("scsi: qla2xxx: Serialize session deletion by using work_lock") Signed-off-by: Hannes Reinecke Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_def.h | 4 ++-- drivers/scsi/qla2xxx/qla_init.c | 24 +++++++++++++++--------- drivers/scsi/qla2xxx/qla_os.c | 7 ++++++- drivers/scsi/qla2xxx/qla_target.c | 17 ++++++----------- 4 files changed, 29 insertions(+), 23 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index be7d682..3ca4b6a 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -261,9 +261,9 @@ struct name_list_extended { struct get_name_list_extended *l; dma_addr_t ldma; - struct list_head fcports; /* protect by sess_list */ + struct list_head fcports; + spinlock_t fcports_lock; u32 size; - u8 sent; }; /* * Timeout timer counts in seconds diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 0487062..cacf2cc 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -643,8 +643,7 @@ qla24xx_async_gnl_sp_done(void *s, int res) (loop_id & 0x7fff)); } - spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags); - vha->gnl.sent = 0; + spin_lock_irqsave(&vha->gnl.fcports_lock, flags); INIT_LIST_HEAD(&h); fcport = tf = NULL; @@ -653,12 +652,16 @@ qla24xx_async_gnl_sp_done(void *s, int res) list_for_each_entry_safe(fcport, tf, &h, gnl_entry) { list_del_init(&fcport->gnl_entry); + spin_lock(&vha->hw->tgt.sess_lock); fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); + spin_unlock(&vha->hw->tgt.sess_lock); ea.fcport = fcport; qla2x00_fcport_event_handler(vha, &ea); } + spin_unlock_irqrestore(&vha->gnl.fcports_lock, flags); + spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags); /* create new fcport if fw has knowledge of new sessions */ for (i = 0; i < n; i++) { port_id_t id; @@ -710,18 +713,21 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) ql_dbg(ql_dbg_disc, vha, 0x20d9, "Async-gnlist WWPN %8phC \n", fcport->port_name); - spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags); + spin_lock_irqsave(&vha->gnl.fcports_lock, flags); + if (!list_empty(&fcport->gnl_entry)) { + spin_unlock_irqrestore(&vha->gnl.fcports_lock, flags); + rval = QLA_SUCCESS; + goto done; + } + + spin_lock(&vha->hw->tgt.sess_lock); fcport->disc_state = DSC_GNL; fcport->last_rscn_gen = fcport->rscn_gen; fcport->last_login_gen = fcport->login_gen; + spin_unlock(&vha->hw->tgt.sess_lock); list_add_tail(&fcport->gnl_entry, &vha->gnl.fcports); - if (vha->gnl.sent) { - spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); - return QLA_SUCCESS; - } - vha->gnl.sent = 1; - spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); + spin_unlock_irqrestore(&vha->gnl.fcports_lock, flags); sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index afcb5567..585f371 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -4577,6 +4577,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, spin_lock_init(&vha->work_lock); spin_lock_init(&vha->cmd_list_lock); + spin_lock_init(&vha->gnl.fcports_lock); init_waitqueue_head(&vha->fcport_waitQ); init_waitqueue_head(&vha->vref_waitq); @@ -4877,6 +4878,8 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e) } qlt_plogi_ack_unref(vha, pla); } else { + fc_port_t *dfcp = NULL; + spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags); tfcp = qla2x00_find_fcport_by_nportid(vha, &e->u.new_sess.id, 1); @@ -4899,11 +4902,13 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e) default: fcport->login_pause = 1; tfcp->conflict = fcport; - qlt_schedule_sess_for_deletion(tfcp); + dfcp = tfcp; break; } } spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); + if (dfcp) + qlt_schedule_sess_for_deletion(tfcp); wwn = wwn_to_u64(fcport->node_name); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 896b2d8..b49ac85 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1224,10 +1224,10 @@ static void qla24xx_chk_fcp_state(struct fc_port *sess) } } -/* ha->tgt.sess_lock supposed to be held on entry */ void qlt_schedule_sess_for_deletion(struct fc_port *sess) { struct qla_tgt *tgt = sess->tgt; + struct qla_hw_data *ha = sess->vha->hw; unsigned long flags; if (sess->disc_state == DSC_DELETE_PEND) @@ -1244,16 +1244,16 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess) return; } + spin_lock_irqsave(&ha->tgt.sess_lock, flags); if (sess->deleted == QLA_SESS_DELETED) sess->logout_on_delete = 0; - spin_lock_irqsave(&sess->vha->work_lock, flags); if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { - spin_unlock_irqrestore(&sess->vha->work_lock, flags); + spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); return; } sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; - spin_unlock_irqrestore(&sess->vha->work_lock, flags); + spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); sess->disc_state = DSC_DELETE_PEND; @@ -1262,13 +1262,10 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess) ql_dbg(ql_dbg_tgt, sess->vha, 0xe001, "Scheduling sess %p for deletion\n", sess); - /* use cancel to push work element through before re-queue */ - cancel_work_sync(&sess->del_work); INIT_WORK(&sess->del_work, qla24xx_delete_sess_fn); - queue_work(sess->vha->hw->wq, &sess->del_work); + WARN_ON(!queue_work(sess->vha->hw->wq, &sess->del_work)); } -/* ha->tgt.sess_lock supposed to be held on entry */ static void qlt_clear_tgt_db(struct qla_tgt *tgt) { struct fc_port *sess; @@ -1451,8 +1448,8 @@ qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport, int max_gen) ql_dbg(ql_dbg_tgt_mgt, vha, 0xf008, "qla_tgt_fc_port_deleted %p", sess); sess->local = 1; - qlt_schedule_sess_for_deletion(sess); spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); + qlt_schedule_sess_for_deletion(sess); } static inline int test_tgt_sess_count(struct qla_tgt *tgt) @@ -1512,10 +1509,8 @@ int qlt_stop_phase1(struct qla_tgt *tgt) * Lock is needed, because we still can get an incoming packet. */ mutex_lock(&vha->vha_tgt.tgt_mutex); - spin_lock_irqsave(&ha->tgt.sess_lock, flags); tgt->tgt_stop = 1; qlt_clear_tgt_db(tgt); - spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); mutex_unlock(&vha->vha_tgt.tgt_mutex); mutex_unlock(&qla_tgt_mutex); -- cgit v1.1 From 07ea4b6026ee8b8dfaf9bbe83a09b3ba905d20fd Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 22 Feb 2018 09:49:36 +0100 Subject: scsi: qla2xxx: do not check login_state if no loop id is assigned When no loop id is assigned in qla24xx_fcport_handle_login() the login state needs to be ignored; it will get set later on in qla_chk_n2n_b4_login(). Cc: Quinn Tran Cc: Himanshu Madhani Fixes: 040036bb0bc1 ("scsi: qla2xxx: Delay loop id allocation at login") Signed-off-by: Hannes Reinecke Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_init.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index cacf2cc..4efc257 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1157,8 +1157,9 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) if (fcport->scan_state != QLA_FCPORT_FOUND) return 0; - if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) || - (fcport->fw_login_state == DSC_LS_PRLI_PEND)) + if ((fcport->loop_id != FC_NO_LOOP_ID) && + ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) || + (fcport->fw_login_state == DSC_LS_PRLI_PEND))) return 0; if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) { -- cgit v1.1 From fa83e65885b9147e2f2b89fdd4ecf7b4ff91571d Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 22 Feb 2018 09:49:37 +0100 Subject: scsi: qla2xxx: ensure async flags are reset correctly The fcport flags FCF_ASYNC_ACTIVE and FCF_ASYNC_SENT are used to throttle the state machine, so we need to ensure to always set and unset them correctly. Not doing so will lead to the state machine getting confused and no login attempt into remote ports. Cc: Quinn Tran Cc: Himanshu Madhani Fixes: 3dbec59bdf63 ("scsi: qla2xxx: Prevent multiple active discovery commands per session") Signed-off-by: Hannes Reinecke Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_gs.c | 2 ++ drivers/scsi/qla2xxx/qla_init.c | 13 ++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index 5bf9a59..21eff2d 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -3179,6 +3179,7 @@ done_free_sp: sp->free(sp); fcport->flags &= ~FCF_ASYNC_SENT; done: + fcport->flags &= ~FCF_ASYNC_ACTIVE; return rval; } @@ -3370,6 +3371,7 @@ done_free_sp: sp->free(sp); fcport->flags &= ~FCF_ASYNC_SENT; done: + fcport->flags &= ~FCF_ASYNC_ACTIVE; return rval; } diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 4efc257..d5a45c4 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -213,6 +213,7 @@ done_free_sp: sp->free(sp); fcport->flags &= ~FCF_ASYNC_SENT; done: + fcport->flags &= ~FCF_ASYNC_ACTIVE; return rval; } @@ -263,7 +264,7 @@ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport) done_free_sp: sp->free(sp); done: - fcport->flags &= ~FCF_ASYNC_SENT; + fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); return rval; } @@ -271,6 +272,7 @@ void qla2x00_async_prlo_done(struct scsi_qla_host *vha, fc_port_t *fcport, uint16_t *data) { + fcport->flags &= ~FCF_ASYNC_ACTIVE; /* Don't re-login in target mode */ if (!fcport->tgt_session) qla2x00_mark_device_lost(vha, fcport, 1, 0); @@ -284,6 +286,7 @@ qla2x00_async_prlo_sp_done(void *s, int res) struct srb_iocb *lio = &sp->u.iocb_cmd; struct scsi_qla_host *vha = sp->vha; + sp->fcport->flags &= ~FCF_ASYNC_ACTIVE; if (!test_bit(UNLOADING, &vha->dpc_flags)) qla2x00_post_async_prlo_done_work(sp->fcport->vha, sp->fcport, lio->u.logio.data); @@ -322,6 +325,7 @@ qla2x00_async_prlo(struct scsi_qla_host *vha, fc_port_t *fcport) done_free_sp: sp->free(sp); done: + fcport->flags &= ~FCF_ASYNC_ACTIVE; return rval; } @@ -375,6 +379,8 @@ qla2x00_async_adisc_sp_done(void *ptr, int res) "Async done-%s res %x %8phC\n", sp->name, res, sp->fcport->port_name); + sp->fcport->flags &= ~FCF_ASYNC_SENT; + memset(&ea, 0, sizeof(ea)); ea.event = FCME_ADISC_DONE; ea.rc = res; @@ -425,7 +431,7 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport, done_free_sp: sp->free(sp); done: - fcport->flags &= ~FCF_ASYNC_SENT; + fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); qla2x00_post_async_adisc_work(vha, fcport, data); return rval; } @@ -1799,6 +1805,7 @@ qla2x00_async_logout_done(struct scsi_qla_host *vha, fc_port_t *fcport, qla2x00_mark_device_lost(vha, fcport, 1, 0); qlt_logo_completion_handler(fcport, data[0]); fcport->login_gen++; + fcport->flags &= ~FCF_ASYNC_ACTIVE; return; } @@ -1806,6 +1813,7 @@ void qla2x00_async_adisc_done(struct scsi_qla_host *vha, fc_port_t *fcport, uint16_t *data) { + fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); if (data[0] == MBS_COMMAND_COMPLETE) { qla2x00_update_fcport(vha, fcport); @@ -1813,7 +1821,6 @@ qla2x00_async_adisc_done(struct scsi_qla_host *vha, fc_port_t *fcport, } /* Retry login. */ - fcport->flags &= ~FCF_ASYNC_SENT; if (data[1] & QLA_LOGIO_LOGIN_RETRIED) set_bit(RELOGIN_NEEDED, &vha->dpc_flags); else -- cgit v1.1 From 3be8828fc507cdafe7040a3dcf361a2bcd8e305b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 22 Feb 2018 11:30:20 -0800 Subject: scsi: core: Avoid that ATA error handling can trigger a kernel hang or oops Avoid that the recently introduced call_rcu() call in the SCSI core triggers a double call_rcu() call. Reported-by: Natanael Copa Reported-by: Damien Le Moal References: https://bugzilla.kernel.org/show_bug.cgi?id=198861 Fixes: 3bd6f43f5cb3 ("scsi: core: Ensure that the SCSI error handler gets woken up") Signed-off-by: Bart Van Assche Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Cc: Natanael Copa Cc: Damien Le Moal Cc: Alexandre Oliva Cc: Pavel Tikhomirov Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: Signed-off-by: Martin K. Petersen --- drivers/scsi/hosts.c | 3 --- drivers/scsi/scsi_error.c | 5 +++-- drivers/scsi/scsi_lib.c | 2 ++ include/scsi/scsi_cmnd.h | 3 +++ include/scsi/scsi_host.h | 2 -- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 57bf43e..dd94649 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -328,8 +328,6 @@ static void scsi_host_dev_release(struct device *dev) if (shost->work_q) destroy_workqueue(shost->work_q); - destroy_rcu_head(&shost->rcu); - if (shost->shost_state == SHOST_CREATED) { /* * Free the shost_dev device name here if scsi_host_alloc() @@ -404,7 +402,6 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) INIT_LIST_HEAD(&shost->starved_list); init_waitqueue_head(&shost->host_wait); mutex_init(&shost->scan_mutex); - init_rcu_head(&shost->rcu); index = ida_simple_get(&host_index_ida, 0, 0, GFP_KERNEL); if (index < 0) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index d042915..ca53a5f 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -223,7 +223,8 @@ static void scsi_eh_reset(struct scsi_cmnd *scmd) static void scsi_eh_inc_host_failed(struct rcu_head *head) { - struct Scsi_Host *shost = container_of(head, typeof(*shost), rcu); + struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu); + struct Scsi_Host *shost = scmd->device->host; unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); @@ -259,7 +260,7 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd) * Ensure that all tasks observe the host state change before the * host_failed change. */ - call_rcu(&shost->rcu, scsi_eh_inc_host_failed); + call_rcu(&scmd->rcu, scsi_eh_inc_host_failed); } /** diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 5cbc69b..4af1682 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -670,6 +670,7 @@ static bool scsi_end_request(struct request *req, blk_status_t error, if (!blk_rq_is_scsi(req)) { WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED)); cmd->flags &= ~SCMD_INITIALIZED; + destroy_rcu_head(&cmd->rcu); } if (req->mq_ctx) { @@ -1150,6 +1151,7 @@ static void scsi_initialize_rq(struct request *rq) struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); scsi_req_init(&cmd->req); + init_rcu_head(&cmd->rcu); cmd->jiffies_at_alloc = jiffies; cmd->retries = 0; } diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 949a016..0382cea 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -69,6 +69,9 @@ struct scsi_cmnd { struct list_head list; /* scsi_cmnd participates in queue lists */ struct list_head eh_entry; /* entry for the host eh_cmd_q */ struct delayed_work abort_work; + + struct rcu_head rcu; + int eh_eflags; /* Used by error handlr */ /* diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 1a1df0d..a8b7bf8 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -571,8 +571,6 @@ struct Scsi_Host { struct blk_mq_tag_set tag_set; }; - struct rcu_head rcu; - atomic_t host_busy; /* commands actually active on low-level */ atomic_t host_blocked; -- cgit v1.1 From e39a97353e5378eb46bf01679799c5704d397f32 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 26 Feb 2018 08:39:59 +0100 Subject: scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte() When converting __scsi_error_from_host_byte() to BLK_STS error codes the case DID_OK was forgotten, resulting in it always returning an error. Fixes: 2a842acab109 ("block: introduce new block status code type") Cc: Doug Gilbert Signed-off-by: Hannes Reinecke Reviewed-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 4af1682..c984404 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -720,6 +720,8 @@ static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result) { switch (host_byte(result)) { + case DID_OK: + return BLK_STS_OK; case DID_TRANSPORT_FAILFAST: return BLK_STS_TRANSPORT; case DID_TARGET_FAILURE: -- cgit v1.1 From 2b5b96473efceb755d7700d47982370d49e8815f Mon Sep 17 00:00:00 2001 From: Darren Trapp Date: Tue, 27 Feb 2018 16:31:12 -0800 Subject: scsi: qla2xxx: Fix FC-NVMe LUN discovery commit a4239945b8ad ("scsi: qla2xxx: Add switch command to simplify fabric discovery") introduced regression when it did not consider FC-NVMe code path which broke NVMe LUN discovery. Fixes: a4239945b8ad ("scsi: qla2xxx: Add switch command to simplify fabric discovery") Signed-off-by: Darren Trapp Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_def.h | 1 + drivers/scsi/qla2xxx/qla_gs.c | 3 +++ drivers/scsi/qla2xxx/qla_init.c | 8 +++++++- drivers/scsi/qla2xxx/qla_os.c | 7 +++++-- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 3ca4b6a..c9689f9 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2217,6 +2217,7 @@ typedef struct { /* FCP-4 types */ #define FC4_TYPE_FCP_SCSI 0x08 +#define FC4_TYPE_NVME 0x28 #define FC4_TYPE_OTHER 0x0 #define FC4_TYPE_UNKNOWN 0xff diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index 21eff2d..403fa09 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -3973,6 +3973,9 @@ out: spin_lock_irqsave(&vha->work_lock, flags); vha->scan.scan_flags &= ~SF_SCANNING; spin_unlock_irqrestore(&vha->work_lock, flags); + + if ((fc4type == FC4_TYPE_FCP_SCSI) && vha->flags.nvme_enabled) + qla24xx_async_gpnft(vha, FC4_TYPE_NVME); } static void qla2x00_async_gpnft_gnnft_sp_done(void *s, int res) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index d5a45c4..00329dd 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1061,6 +1061,7 @@ void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha, struct event_arg *ea) fc_port_t *fcport = ea->fcport; struct port_database_24xx *pd; struct srb *sp = ea->sp; + uint8_t ls; pd = (struct port_database_24xx *)sp->u.iocb_cmd.u.mbx.in; @@ -1073,7 +1074,12 @@ void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha, struct event_arg *ea) if (fcport->disc_state == DSC_DELETE_PEND) return; - switch (pd->current_login_state) { + if (fcport->fc4f_nvme) + ls = pd->current_login_state >> 4; + else + ls = pd->current_login_state & 0xf; + + switch (ls) { case PDS_PRLI_COMPLETE: __qla24xx_parse_gpdb(vha, fcport, pd); break; diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 585f371..285911e 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -4807,9 +4807,12 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e) fcport->d_id = e->u.new_sess.id; fcport->flags |= FCF_FABRIC_DEVICE; fcport->fw_login_state = DSC_LS_PLOGI_PEND; - if (e->u.new_sess.fc4_type == FC4_TYPE_FCP_SCSI) + if (e->u.new_sess.fc4_type == FC4_TYPE_FCP_SCSI) { fcport->fc4_type = FC4_TYPE_FCP_SCSI; - + } else if (e->u.new_sess.fc4_type == FC4_TYPE_NVME) { + fcport->fc4_type = FC4_TYPE_OTHER; + fcport->fc4f_nvme = FC4_TYPE_NVME; + } memcpy(fcport->port_name, e->u.new_sess.port_name, WWN_SIZE); } else { -- cgit v1.1 From 967823d6c3980a30e214b92bfe6a101e7b46d025 Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Mon, 26 Feb 2018 01:01:17 -0800 Subject: scsi: qedi: Fix kernel crash during port toggle BUG: unable to handle kernel NULL pointer dereference at 0000000000000100 [ 985.596918] IP: _raw_spin_lock_bh+0x17/0x30 [ 985.601581] PGD 0 P4D 0 [ 985.604405] Oops: 0002 [#1] SMP : [ 985.704533] CPU: 16 PID: 1156 Comm: qedi_thread/16 Not tainted 4.16.0-rc2 #1 [ 985.712397] Hardware name: Dell Inc. PowerEdge R730/0599V5, BIOS 2.4.3 01/17/2017 [ 985.720747] RIP: 0010:_raw_spin_lock_bh+0x17/0x30 [ 985.725996] RSP: 0018:ffffa4b1c43d3e10 EFLAGS: 00010246 [ 985.731823] RAX: 0000000000000000 RBX: ffff94a31bd03000 RCX: 0000000000000000 [ 985.739783] RDX: 0000000000000001 RSI: ffff94a32fa16938 RDI: 0000000000000100 [ 985.747744] RBP: 0000000000000004 R08: 0000000000000000 R09: 0000000000000a33 [ 985.755703] R10: 0000000000000000 R11: ffffa4b1c43d3af0 R12: 0000000000000000 [ 985.763662] R13: ffff94a301f40818 R14: 0000000000000000 R15: 000000000000000c [ 985.771622] FS: 0000000000000000(0000) GS:ffff94a32fa00000(0000) knlGS:0000000000000000 [ 985.780649] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 985.787057] CR2: 0000000000000100 CR3: 000000067a009006 CR4: 00000000001606e0 [ 985.795017] Call Trace: [ 985.797747] qedi_fp_process_cqes+0x258/0x980 [qedi] [ 985.803294] qedi_percpu_io_thread+0x10f/0x1b0 [qedi] [ 985.808931] kthread+0xf5/0x130 [ 985.812434] ? qedi_free_uio+0xd0/0xd0 [qedi] [ 985.817298] ? kthread_bind+0x10/0x10 [ 985.821372] ? do_syscall_64+0x6e/0x1a0 Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_fw.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index 20a9259..03c772c 100644 --- a/drivers/scsi/qedi/qedi_fw.c +++ b/drivers/scsi/qedi/qedi_fw.c @@ -761,6 +761,11 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, iscsi_cid = cqe->conn_id; qedi_conn = qedi->cid_que.conn_cid_tbl[iscsi_cid]; + if (!qedi_conn) { + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, + "icid not found 0x%x\n", cqe->conn_id); + return; + } /* Based on this itt get the corresponding qedi_cmd */ spin_lock_bh(&qedi_conn->tmf_work_lock); -- cgit v1.1 From 07c5ccd70ad702e561fcda8e4df494f098a42742 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Thu, 22 Feb 2018 15:17:38 +1100 Subject: ocxl: Add get_metadata IOCTL to share OCXL information to userspace Some required information is not exposed to userspace currently (eg. the PASID), pass this information back, along with other information which is currently communicated via sysfs, which saves some parsing effort in userspace. Signed-off-by: Alastair D'Silva Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman --- drivers/misc/ocxl/file.c | 27 +++++++++++++++++++++++++++ include/uapi/misc/ocxl.h | 17 +++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index 337462e..038509e 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -102,10 +102,32 @@ static long afu_ioctl_attach(struct ocxl_context *ctx, return rc; } +static long afu_ioctl_get_metadata(struct ocxl_context *ctx, + struct ocxl_ioctl_metadata __user *uarg) +{ + struct ocxl_ioctl_metadata arg; + + memset(&arg, 0, sizeof(arg)); + + arg.version = 0; + + arg.afu_version_major = ctx->afu->config.version_major; + arg.afu_version_minor = ctx->afu->config.version_minor; + arg.pasid = ctx->pasid; + arg.pp_mmio_size = ctx->afu->config.pp_mmio_stride; + arg.global_mmio_size = ctx->afu->config.global_mmio_size; + + if (copy_to_user(uarg, &arg, sizeof(arg))) + return -EFAULT; + + return 0; +} + #define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" : \ x == OCXL_IOCTL_IRQ_ALLOC ? "IRQ_ALLOC" : \ x == OCXL_IOCTL_IRQ_FREE ? "IRQ_FREE" : \ x == OCXL_IOCTL_IRQ_SET_FD ? "IRQ_SET_FD" : \ + x == OCXL_IOCTL_GET_METADATA ? "GET_METADATA" : \ "UNKNOWN") static long afu_ioctl(struct file *file, unsigned int cmd, @@ -159,6 +181,11 @@ static long afu_ioctl(struct file *file, unsigned int cmd, irq_fd.eventfd); break; + case OCXL_IOCTL_GET_METADATA: + rc = afu_ioctl_get_metadata(ctx, + (struct ocxl_ioctl_metadata __user *) args); + break; + default: rc = -EINVAL; } diff --git a/include/uapi/misc/ocxl.h b/include/uapi/misc/ocxl.h index 4b0b0b7..0af83d8 100644 --- a/include/uapi/misc/ocxl.h +++ b/include/uapi/misc/ocxl.h @@ -32,6 +32,22 @@ struct ocxl_ioctl_attach { __u64 reserved3; }; +struct ocxl_ioctl_metadata { + __u16 version; // struct version, always backwards compatible + + // Version 0 fields + __u8 afu_version_major; + __u8 afu_version_minor; + __u32 pasid; // PASID assigned to the current context + + __u64 pp_mmio_size; // Per PASID MMIO size + __u64 global_mmio_size; + + // End version 0 fields + + __u64 reserved[13]; // Total of 16*u64 +}; + struct ocxl_ioctl_irq_fd { __u64 irq_offset; __s32 eventfd; @@ -45,5 +61,6 @@ struct ocxl_ioctl_irq_fd { #define OCXL_IOCTL_IRQ_ALLOC _IOR(OCXL_MAGIC, 0x11, __u64) #define OCXL_IOCTL_IRQ_FREE _IOW(OCXL_MAGIC, 0x12, __u64) #define OCXL_IOCTL_IRQ_SET_FD _IOW(OCXL_MAGIC, 0x13, struct ocxl_ioctl_irq_fd) +#define OCXL_IOCTL_GET_METADATA _IOR(OCXL_MAGIC, 0x14, struct ocxl_ioctl_metadata) #endif /* _UAPI_MISC_OCXL_H */ -- cgit v1.1 From e7666d046ac0eda535282a5fd3b188f31d0f4afd Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Thu, 22 Feb 2018 15:17:39 +1100 Subject: ocxl: Document the OCXL_IOCTL_GET_METADATA IOCTL Signed-off-by: Alastair D'Silva Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman --- Documentation/accelerators/ocxl.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/accelerators/ocxl.rst b/Documentation/accelerators/ocxl.rst index 4f7af84..ddcc58d 100644 --- a/Documentation/accelerators/ocxl.rst +++ b/Documentation/accelerators/ocxl.rst @@ -152,6 +152,11 @@ OCXL_IOCTL_IRQ_SET_FD: Associate an event fd to an AFU interrupt so that the user process can be notified when the AFU sends an interrupt. +OCXL_IOCTL_GET_METADATA: + + Obtains configuration information from the card, such at the size of + MMIO areas, the AFU version, and the PASID for the current context. + mmap ---- -- cgit v1.1 From 50d629e7a843d1635ecb1658335279503c4ec9a8 Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Mon, 26 Feb 2018 23:49:30 +0000 Subject: net: allow interface to be set into VRF if VLAN interface in same VRF Setting an interface into a VRF fails with 'RTNETLINK answers: File exists' if one of its VLAN interfaces is already in the same VRF. As the VRF is an upper device of the VLAN interface, it is also showing up as an upper device of the interface itself. The solution is to restrict this check to devices other than master. As only one master device can be linked to a device, the check in this case is that the upper device (VRF) being linked to is not the same as the master device instead of it not being any one of the upper devices. The following example shows an interface ens12 (with a VLAN interface ens12.10) being set into VRF green, which behaves as expected: # ip link add link ens12 ens12.10 type vlan id 10 # ip link set dev ens12 master vrfgreen # ip link show dev ens12 3: ens12: mtu 1500 qdisc fq_codel master vrfgreen state UP mode DEFAULT group default qlen 1000 link/ether 52:54:00:4c:a0:45 brd ff:ff:ff:ff:ff:ff But if the VLAN interface has previously been set into the same VRF, then setting the interface into the VRF fails: # ip link set dev ens12 nomaster # ip link set dev ens12.10 master vrfgreen # ip link show dev ens12.10 39: ens12.10@ens12: mtu 1500 qdisc noqueue master vrfgreen state UP mode DEFAULT group default qlen 1000 link/ether 52:54:00:4c:a0:45 brd ff:ff:ff:ff:ff:ff # ip link set dev ens12 master vrfgreen RTNETLINK answers: File exists The workaround is to move the VLAN interface back into the default VRF beforehand, but it has to be shut first so as to avoid the risk of traffic leaking from the VRF. This fix avoids needing this workaround. Signed-off-by: Mike Manning Signed-off-by: David S. Miller --- net/core/dev.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index d4362be..2cedf52 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6396,6 +6396,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, .linking = true, .upper_info = upper_info, }; + struct net_device *master_dev; int ret = 0; ASSERT_RTNL(); @@ -6407,11 +6408,14 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (netdev_has_upper_dev(upper_dev, dev)) return -EBUSY; - if (netdev_has_upper_dev(dev, upper_dev)) - return -EEXIST; - - if (master && netdev_master_upper_dev_get(dev)) - return -EBUSY; + if (!master) { + if (netdev_has_upper_dev(dev, upper_dev)) + return -EEXIST; + } else { + master_dev = netdev_master_upper_dev_get(dev); + if (master_dev) + return master_dev == upper_dev ? -EEXIST : -EBUSY; + } ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, &changeupper_info.info); -- cgit v1.1 From e2c0dc1f1d8e31eabed412b6f154ad549986bc28 Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Date: Wed, 28 Feb 2018 12:20:44 -0500 Subject: vrf: check forwarding on the original netdevice when generating ICMP dest unreachable When ip_error() is called the device is the l3mdev master instead of the original device. So the forwarding check should be on the original one. Changes from v2: - Handle the original device disappearing (per David Ahern) - Minimize the change in code order Changes from v1: - Only need to reset the device on which __in_dev_get_rcu() is done (per David Ahern). Signed-off-by: Stephen Suryaputra Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 465196e..860b3fd 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -931,14 +931,23 @@ out_put_peer: static int ip_error(struct sk_buff *skb) { - struct in_device *in_dev = __in_dev_get_rcu(skb->dev); struct rtable *rt = skb_rtable(skb); + struct net_device *dev = skb->dev; + struct in_device *in_dev; struct inet_peer *peer; unsigned long now; struct net *net; bool send; int code; + if (netif_is_l3_master(skb->dev)) { + dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif); + if (!dev) + goto out; + } + + in_dev = __in_dev_get_rcu(dev); + /* IP on this device is disabled. */ if (!in_dev) goto out; -- cgit v1.1 From a6d50512b4d86ecd9f5952525e454583be1c3b14 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 28 Feb 2018 19:15:58 +0000 Subject: net: ethtool: don't ignore return from driver get_fecparam method If ethtool_ops->get_fecparam returns an error, pass that error on to the user, rather than ignoring it. Fixes: 1a5f3da20bd9 ("net: ethtool: add support for forward error correction modes") Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- net/core/ethtool.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 494e6a5..3f89c76 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -2520,11 +2520,14 @@ static int set_phy_tunable(struct net_device *dev, void __user *useraddr) static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr) { struct ethtool_fecparam fecparam = { ETHTOOL_GFECPARAM }; + int rc; if (!dev->ethtool_ops->get_fecparam) return -EOPNOTSUPP; - dev->ethtool_ops->get_fecparam(dev, &fecparam); + rc = dev->ethtool_ops->get_fecparam(dev, &fecparam); + if (rc) + return rc; if (copy_to_user(useraddr, &fecparam, sizeof(fecparam))) return -EFAULT; -- cgit v1.1 From c3856aeb29402e94ad9b3879030165cc6a4fdc56 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 23 Feb 2018 21:21:12 +1100 Subject: KVM: PPC: Book3S HV: Fix handling of large pages in radix page fault handler This fixes several bugs in the radix page fault handler relating to the way large pages in the memory backing the guest were handled. First, the check for large pages only checked for explicit huge pages and missed transparent huge pages. Then the check that the addresses (host virtual vs. guest physical) had appropriate alignment was wrong, meaning that the code never put a large page in the partition scoped radix tree; it was always demoted to a small page. Fixing this exposed bugs in kvmppc_create_pte(). We were never invalidating a 2MB PTE, which meant that if a page was initially faulted in without write permission and the guest then attempted to store to it, we would never update the PTE to have write permission. If we find a valid 2MB PTE in the PMD, we need to clear it and do a TLB invalidation before installing either the new 2MB PTE or a pointer to a page table page. This also corrects an assumption that get_user_pages_fast would set the _PAGE_DIRTY bit if we are writing, which is not true. Instead we mark the page dirty explicitly with set_page_dirty_lock(). This also means we don't need the dirty bit set on the host PTE when providing write access on a read fault. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 69 +++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 0c85481..5cb4e46 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -195,6 +195,12 @@ static void kvmppc_pte_free(pte_t *ptep) kmem_cache_free(kvm_pte_cache, ptep); } +/* Like pmd_huge() and pmd_large(), but works regardless of config options */ +static inline int pmd_is_leaf(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_PTE); +} + static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, unsigned int level, unsigned long mmu_seq) { @@ -219,7 +225,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, else new_pmd = pmd_alloc_one(kvm->mm, gpa); - if (level == 0 && !(pmd && pmd_present(*pmd))) + if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd))) new_ptep = kvmppc_pte_alloc(); /* Check if we might have been invalidated; let the guest retry if so */ @@ -244,12 +250,30 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, new_pmd = NULL; } pmd = pmd_offset(pud, gpa); - if (pmd_large(*pmd)) { - /* Someone else has instantiated a large page here; retry */ - ret = -EAGAIN; - goto out_unlock; - } - if (level == 1 && !pmd_none(*pmd)) { + if (pmd_is_leaf(*pmd)) { + unsigned long lgpa = gpa & PMD_MASK; + + /* + * If we raced with another CPU which has just put + * a 2MB pte in after we saw a pte page, try again. + */ + if (level == 0 && !new_ptep) { + ret = -EAGAIN; + goto out_unlock; + } + /* Valid 2MB page here already, remove it */ + old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd), + ~0UL, 0, lgpa, PMD_SHIFT); + kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT); + if (old & _PAGE_DIRTY) { + unsigned long gfn = lgpa >> PAGE_SHIFT; + struct kvm_memory_slot *memslot; + memslot = gfn_to_memslot(kvm, gfn); + if (memslot && memslot->dirty_bitmap) + kvmppc_update_dirty_map(memslot, + gfn, PMD_SIZE); + } + } else if (level == 1 && !pmd_none(*pmd)) { /* * There's a page table page here, but we wanted * to install a large page. Tell the caller and let @@ -412,28 +436,24 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, } else { page = pages[0]; pfn = page_to_pfn(page); - if (PageHuge(page)) { - page = compound_head(page); - pte_size <<= compound_order(page); + if (PageCompound(page)) { + pte_size <<= compound_order(compound_head(page)); /* See if we can insert a 2MB large-page PTE here */ if (pte_size >= PMD_SIZE && - (gpa & PMD_MASK & PAGE_MASK) == - (hva & PMD_MASK & PAGE_MASK)) { + (gpa & (PMD_SIZE - PAGE_SIZE)) == + (hva & (PMD_SIZE - PAGE_SIZE))) { level = 1; pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); } } /* See if we can provide write access */ if (writing) { - /* - * We assume gup_fast has set dirty on the host PTE. - */ pgflags |= _PAGE_WRITE; } else { local_irq_save(flags); ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL); - if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) + if (ptep && pte_write(*ptep)) pgflags |= _PAGE_WRITE; local_irq_restore(flags); } @@ -459,18 +479,15 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte = pfn_pte(pfn, __pgprot(pgflags)); ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); } - if (ret == 0 || ret == -EAGAIN) - ret = RESUME_GUEST; if (page) { - /* - * We drop pages[0] here, not page because page might - * have been set to the head page of a compound, but - * we have to drop the reference on the correct tail - * page to match the get inside gup() - */ - put_page(pages[0]); + if (!ret && (pgflags & _PAGE_WRITE)) + set_page_dirty_lock(page); + put_page(page); } + + if (ret == 0 || ret == -EAGAIN) + ret = RESUME_GUEST; return ret; } @@ -644,7 +661,7 @@ void kvmppc_free_radix(struct kvm *kvm) continue; pmd = pmd_offset(pud, 0); for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) { - if (pmd_huge(*pmd)) { + if (pmd_is_leaf(*pmd)) { pmd_clear(pmd); continue; } -- cgit v1.1 From debd574f4195e205ba505b25e19b2b797f4bcd94 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 2 Mar 2018 15:38:04 +1100 Subject: KVM: PPC: Book3S HV: Fix VRMA initialization with 2MB or 1GB memory backing The current code for initializing the VRMA (virtual real memory area) for HPT guests requires the page size of the backing memory to be one of 4kB, 64kB or 16MB. With a radix host we have the possibility that the backing memory page size can be 2MB or 1GB. In these cases, if the guest switches to HPT mode, KVM will not initialize the VRMA and the guest will fail to run. In fact it is not necessary that the VRMA page size is the same as the backing memory page size; any VRMA page size less than or equal to the backing memory page size is acceptable. Therefore we now choose the largest page size out of the set {4k, 64k, 16M} which is not larger than the backing memory page size. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 8970735..b4a538b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3656,15 +3656,17 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) goto up_out; psize = vma_kernel_pagesize(vma); - porder = __ilog2(psize); up_read(¤t->mm->mmap_sem); /* We can handle 4k, 64k or 16M pages in the VRMA */ - err = -EINVAL; - if (!(psize == 0x1000 || psize == 0x10000 || - psize == 0x1000000)) - goto out_srcu; + if (psize >= 0x1000000) + psize = 0x1000000; + else if (psize >= 0x10000) + psize = 0x10000; + else + psize = 0x1000; + porder = __ilog2(psize); senc = slb_pgsize_encoding(psize); kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | -- cgit v1.1 From f3e5feeb92a163c935659b7222a32965276c1c23 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Thu, 1 Mar 2018 22:34:32 +0100 Subject: drm/sun4i: Release exclusive clock lock when disabling TCON Currently exclusive TCON clock lock is never released, which, for example, prevents changing resolution on HDMI. In order to fix that, release clock when disabling TCON. TCON is always disabled first before new mode is set. Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20180301213442.16677-7-jernej.skrabec@siol.net --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index b396011..ade197b 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -101,10 +101,12 @@ static void sun4i_tcon_channel_set_status(struct sun4i_tcon *tcon, int channel, return; } - if (enabled) + if (enabled) { clk_prepare_enable(clk); - else + } else { + clk_rate_exclusive_put(clk); clk_disable_unprepare(clk); + } } static void sun4i_tcon_lvds_set_status(struct sun4i_tcon *tcon, -- cgit v1.1 From d5078193e56bb24f4593f00102a3b5e07bb84ee0 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 2 Mar 2018 13:05:36 +0800 Subject: ALSA: hda - Fix a wrong FIXUP for alc289 on Dell machines With the alc289, the Pin 0x1b is Headphone-Mic, so we should assign ALC269_FIXUP_DELL4_MIC_NO_PRESENCE rather than ALC225_FIXUP_DELL1_MIC_NO_PRESENCE to it. And this change is suggested by Kailang of Realtek and is verified on the machine. Fixes: 3f2f7c553d07 ("ALSA: hda - Fix headset mic detection problem for two Dell machines") Cc: Kailang Yang Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b9c93fa..7a9a867 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6872,7 +6872,7 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60120}, {0x14, 0x90170110}, {0x21, 0x0321101f}), - SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, + SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, {0x12, 0xb7a60130}, {0x14, 0x90170110}, {0x21, 0x04211020}), -- cgit v1.1 From 0adb24e03a124b79130c9499731936b11ce2677d Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 27 Feb 2018 08:16:07 -0500 Subject: parisc: Fix ordering of cache and TLB flushes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The change to flush_kernel_vmap_range() wasn't sufficient to avoid the SMP stalls.  The problem is some drivers call these routines with interrupts disabled.  Interrupts need to be enabled for flush_tlb_all() and flush_cache_all() to work.  This version adds checks to ensure interrupts are not disabled before calling routines that need IPI interrupts.  When interrupts are disabled, we now drop into slower code. The attached change fixes the ordering of cache and TLB flushes in several cases.  When we flush the cache using the existing PTE/TLB entries, we need to flush the TLB after doing the cache flush.  We don't need to do this when we flush the entire instruction and data caches as these flushes don't use the existing TLB entries.  The same is true for tmpalias region flushes. The flush_kernel_vmap_range() and invalidate_kernel_vmap_range() routines have been updated. Secondly, we added a new purge_kernel_dcache_range_asm() routine to pacache.S and use it in invalidate_kernel_vmap_range().  Nominally, purges are faster than flushes as the cache lines don't have to be written back to memory. Hopefully, this is sufficient to resolve the remaining problems due to cache speculation.  So far, testing indicates that this is the case.  I did work up a patch using tmpalias flushes, but there is a performance hit because we need the physical address for each page, and we also need to sequence access to the tmpalias flush code.  This increases the probability of stalls. Signed-off-by: John David Anglin  Cc: stable@vger.kernel.org # 4.9+ Signed-off-by: Helge Deller --- arch/parisc/include/asm/cacheflush.h | 1 + arch/parisc/kernel/cache.c | 57 ++++++++++++++++++++---------------- arch/parisc/kernel/pacache.S | 22 ++++++++++++++ 3 files changed, 54 insertions(+), 26 deletions(-) diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 3742508..bd5ce31 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -26,6 +26,7 @@ void flush_user_icache_range_asm(unsigned long, unsigned long); void flush_kernel_icache_range_asm(unsigned long, unsigned long); void flush_user_dcache_range_asm(unsigned long, unsigned long); void flush_kernel_dcache_range_asm(unsigned long, unsigned long); +void purge_kernel_dcache_range_asm(unsigned long, unsigned long); void flush_kernel_dcache_page_asm(void *); void flush_kernel_icache_page(void *); diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 19c0c14..7908977 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -465,10 +465,10 @@ EXPORT_SYMBOL(copy_user_page); int __flush_tlb_range(unsigned long sid, unsigned long start, unsigned long end) { - unsigned long flags, size; + unsigned long flags; - size = (end - start); - if (size >= parisc_tlb_flush_threshold) { + if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && + end - start >= parisc_tlb_flush_threshold) { flush_tlb_all(); return 1; } @@ -539,13 +539,11 @@ void flush_cache_mm(struct mm_struct *mm) struct vm_area_struct *vma; pgd_t *pgd; - /* Flush the TLB to avoid speculation if coherency is required. */ - if (parisc_requires_coherency()) - flush_tlb_all(); - /* Flushing the whole cache on each cpu takes forever on rp3440, etc. So, avoid it if the mm isn't too big. */ - if (mm_total_size(mm) >= parisc_cache_flush_threshold) { + if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && + mm_total_size(mm) >= parisc_cache_flush_threshold) { + flush_tlb_all(); flush_cache_all(); return; } @@ -553,9 +551,9 @@ void flush_cache_mm(struct mm_struct *mm) if (mm->context == mfsp(3)) { for (vma = mm->mmap; vma; vma = vma->vm_next) { flush_user_dcache_range_asm(vma->vm_start, vma->vm_end); - if ((vma->vm_flags & VM_EXEC) == 0) - continue; - flush_user_icache_range_asm(vma->vm_start, vma->vm_end); + if (vma->vm_flags & VM_EXEC) + flush_user_icache_range_asm(vma->vm_start, vma->vm_end); + flush_tlb_range(vma, vma->vm_start, vma->vm_end); } return; } @@ -581,14 +579,9 @@ void flush_cache_mm(struct mm_struct *mm) void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - BUG_ON(!vma->vm_mm->context); - - /* Flush the TLB to avoid speculation if coherency is required. */ - if (parisc_requires_coherency()) + if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && + end - start >= parisc_cache_flush_threshold) { flush_tlb_range(vma, start, end); - - if ((end - start) >= parisc_cache_flush_threshold - || vma->vm_mm->context != mfsp(3)) { flush_cache_all(); return; } @@ -596,6 +589,7 @@ void flush_cache_range(struct vm_area_struct *vma, flush_user_dcache_range_asm(start, end); if (vma->vm_flags & VM_EXEC) flush_user_icache_range_asm(start, end); + flush_tlb_range(vma, start, end); } void @@ -604,8 +598,7 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long BUG_ON(!vma->vm_mm->context); if (pfn_valid(pfn)) { - if (parisc_requires_coherency()) - flush_tlb_page(vma, vmaddr); + flush_tlb_page(vma, vmaddr); __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } } @@ -613,21 +606,33 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long void flush_kernel_vmap_range(void *vaddr, int size) { unsigned long start = (unsigned long)vaddr; + unsigned long end = start + size; - if ((unsigned long)size > parisc_cache_flush_threshold) + if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && + (unsigned long)size >= parisc_cache_flush_threshold) { + flush_tlb_kernel_range(start, end); flush_data_cache(); - else - flush_kernel_dcache_range_asm(start, start + size); + return; + } + + flush_kernel_dcache_range_asm(start, end); + flush_tlb_kernel_range(start, end); } EXPORT_SYMBOL(flush_kernel_vmap_range); void invalidate_kernel_vmap_range(void *vaddr, int size) { unsigned long start = (unsigned long)vaddr; + unsigned long end = start + size; - if ((unsigned long)size > parisc_cache_flush_threshold) + if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && + (unsigned long)size >= parisc_cache_flush_threshold) { + flush_tlb_kernel_range(start, end); flush_data_cache(); - else - flush_kernel_dcache_range_asm(start, start + size); + return; + } + + purge_kernel_dcache_range_asm(start, end); + flush_tlb_kernel_range(start, end); } EXPORT_SYMBOL(invalidate_kernel_vmap_range); diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index 2d40c4f..67b0f75 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -1110,6 +1110,28 @@ ENTRY_CFI(flush_kernel_dcache_range_asm) .procend ENDPROC_CFI(flush_kernel_dcache_range_asm) +ENTRY_CFI(purge_kernel_dcache_range_asm) + .proc + .callinfo NO_CALLS + .entry + + ldil L%dcache_stride, %r1 + ldw R%dcache_stride(%r1), %r23 + ldo -1(%r23), %r21 + ANDCM %r26, %r21, %r26 + +1: cmpb,COND(<<),n %r26, %r25,1b + pdc,m %r23(%r26) + + sync + syncdma + bv %r0(%r2) + nop + .exit + + .procend +ENDPROC_CFI(purge_kernel_dcache_range_asm) + ENTRY_CFI(flush_user_icache_range_asm) .proc .callinfo NO_CALLS -- cgit v1.1 From fd8d0ca2563151204f3fe555dc8ca4bcfe8677a3 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 12 Jan 2018 22:57:15 +0100 Subject: parisc: Hide virtual kernel memory layout For security reasons do not expose the virtual kernel memory layout to userspace. Signed-off-by: Helge Deller Suggested-by: Kees Cook Cc: stable@vger.kernel.org # 4.15 Reviewed-by: Kees Cook --- arch/parisc/mm/init.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 48f4139..cab32ee 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -629,7 +629,12 @@ void __init mem_init(void) #endif mem_init_print_info(NULL); -#ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */ + +#if 0 + /* + * Do not expose the virtual kernel memory layout to userspace. + * But keep code for debugging purposes. + */ printk("virtual kernel memory layout:\n" " vmalloc : 0x%px - 0x%px (%4ld MB)\n" " memory : 0x%px - 0x%px (%4ld MB)\n" -- cgit v1.1 From 0ed1fe4ad394e54783bbffa84102faf435661a2e Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 12 Jan 2018 22:51:22 +0100 Subject: parisc: Check if secondary CPUs want own PDC calls The architecture specification says (for 64-bit systems): PDC is a per processor resource, and operating system software must be prepared to manage separate pointers to PDCE_PROC for each processor. The address of PDCE_PROC for the monarch processor is stored in the Page Zero location MEM_PDC. The address of PDCE_PROC for each non-monarch processor is passed in gr26 when PDCE_RESET invokes OS_RENDEZ. Currently we still use one PDC for all CPUs, but in case we face a machine which is following the specification let's warn about it. Signed-off-by: Helge Deller --- arch/parisc/kernel/head.S | 18 ++++++++++++------ arch/parisc/kernel/smp.c | 7 ++++++- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index bbbe360..fbb4e43 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -138,6 +138,16 @@ $pgt_fill_loop: std %dp,0x18(%r10) #endif +#ifdef CONFIG_64BIT + /* Get PDCE_PROC for monarch CPU. */ +#define MEM_PDC_LO 0x388 +#define MEM_PDC_HI 0x35C + ldw MEM_PDC_LO(%r0),%r3 + ldw MEM_PDC_HI(%r0),%r10 + depd %r10, 31, 32, %r3 /* move to upper word */ +#endif + + #ifdef CONFIG_SMP /* Set the smp rendezvous address into page zero. ** It would be safer to do this in init_smp_config() but @@ -196,12 +206,6 @@ common_stext: ** Someday, palo might not do this for the Monarch either. */ 2: -#define MEM_PDC_LO 0x388 -#define MEM_PDC_HI 0x35C - ldw MEM_PDC_LO(%r0),%r3 - ldw MEM_PDC_HI(%r0),%r6 - depd %r6, 31, 32, %r3 /* move to upper word */ - mfctl %cr30,%r6 /* PCX-W2 firmware bug */ ldo PDC_PSW(%r0),%arg0 /* 21 */ @@ -268,6 +272,8 @@ $install_iva: aligned_rfi: pcxt_ssm_bug + copy %r3, %arg0 /* PDCE_PROC for smp_callin() */ + rsm PSW_SM_QUIET,%r0 /* off troublesome PSW bits */ /* Don't need NOPs, have 8 compliant insn before rfi */ diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 30c28ab..4065b5e 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -292,10 +292,15 @@ smp_cpu_init(int cpunum) * Slaves start using C here. Indirectly called from smp_slave_stext. * Do what start_kernel() and main() do for boot strap processor (aka monarch) */ -void __init smp_callin(void) +void __init smp_callin(unsigned long pdce_proc) { int slave_id = cpu_now_booting; +#ifdef CONFIG_64BIT + WARN_ON(((unsigned long)(PAGE0->mem_pdc_hi) << 32 + | PAGE0->mem_pdc) != pdce_proc); +#endif + smp_cpu_init(slave_id); preempt_disable(); -- cgit v1.1 From 5ffa8518851f1401817c15d2a7eecc0373c26ff9 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 12 Jan 2018 22:44:00 +0100 Subject: parisc: Use cr16 interval timers unconditionally on qemu When running on qemu we know that the (emulated) cr16 cpu-internal clocks are syncronized. So let's use them unconditionally on qemu. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # 4.14+ --- arch/parisc/include/asm/processor.h | 2 ++ arch/parisc/kernel/time.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index 0e6ab6e..2dbe558 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -316,6 +316,8 @@ extern int _parisc_requires_coherency; #define parisc_requires_coherency() (0) #endif +extern int running_on_qemu; + #endif /* __ASSEMBLY__ */ #endif /* __ASM_PARISC_PROCESSOR_H */ diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 4b8fd6d..68e88e5 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -248,7 +248,7 @@ static int __init init_cr16_clocksource(void) * different sockets, so mark them unstable and lower rating on * multi-socket SMP systems. */ - if (num_online_cpus() > 1) { + if (num_online_cpus() > 1 && !running_on_qemu) { int cpu; unsigned long cpu0_loc; cpu0_loc = per_cpu(cpu_data, 0).cpu_loc; -- cgit v1.1 From 636a415bcc7f4fd020ece8fd5fc648c4cef19c34 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 12 Feb 2018 21:43:55 +0100 Subject: parisc: Reduce irq overhead when run in qemu When run under QEMU, calling mfctl(16) creates some overhead because the qemu timer has to be scaled and moved into the register. This patch reduces the number of calls to mfctl(16) by moving the calls out of the loops. Additionally, increase the minimal time interval to 8000 cycles instead of 500 to compensate possible QEMU delays when delivering interrupts. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # 4.14+ --- arch/parisc/kernel/time.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 68e88e5..f7e6845 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -76,10 +76,10 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id) next_tick = cpuinfo->it_value; /* Calculate how many ticks have elapsed. */ + now = mfctl(16); do { ++ticks_elapsed; next_tick += cpt; - now = mfctl(16); } while (next_tick - now > cpt); /* Store (in CR16 cycles) up to when we are accounting right now. */ @@ -103,16 +103,17 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id) * if one or the other wrapped. If "now" is "bigger" we'll end up * with a very large unsigned number. */ - while (next_tick - mfctl(16) > cpt) + now = mfctl(16); + while (next_tick - now > cpt) next_tick += cpt; /* Program the IT when to deliver the next interrupt. * Only bottom 32-bits of next_tick are writable in CR16! * Timer interrupt will be delivered at least a few hundred cycles - * after the IT fires, so if we are too close (<= 500 cycles) to the + * after the IT fires, so if we are too close (<= 8000 cycles) to the * next cycle, simply skip it. */ - if (next_tick - mfctl(16) <= 500) + if (next_tick - now <= 8000) next_tick += cpt; mtctl(next_tick, 16); -- cgit v1.1 From 7c4246797b84e55e2dfaaf8a18033de9df7c18c1 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Tue, 27 Feb 2018 16:42:13 +0100 Subject: i2c: octeon: Prevent error message on bus error The error message: [Fri Feb 16 13:42:13 2018] i2c-thunderx 0000:01:09.4: unhandled state: 0 is mis-leading as state 0 (bus error) is not an unknown state. Return -EIO as before but avoid printing the message. Also rename STAT_ERROR to STATE_BUS_ERROR. Signed-off-by: Jan Glauber Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-octeon-core.c | 1 + drivers/i2c/busses/i2c-octeon-core.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-octeon-core.c b/drivers/i2c/busses/i2c-octeon-core.c index 1d8775799..d960790 100644 --- a/drivers/i2c/busses/i2c-octeon-core.c +++ b/drivers/i2c/busses/i2c-octeon-core.c @@ -233,6 +233,7 @@ static int octeon_i2c_check_status(struct octeon_i2c *i2c, int final_read) return -EOPNOTSUPP; case STAT_TXDATA_NAK: + case STAT_BUS_ERROR: return -EIO; case STAT_TXADDR_NAK: case STAT_RXADDR_NAK: diff --git a/drivers/i2c/busses/i2c-octeon-core.h b/drivers/i2c/busses/i2c-octeon-core.h index a7ef198..9bb9f64 100644 --- a/drivers/i2c/busses/i2c-octeon-core.h +++ b/drivers/i2c/busses/i2c-octeon-core.h @@ -43,7 +43,7 @@ #define TWSI_CTL_AAK 0x04 /* Assert ACK */ /* Status values */ -#define STAT_ERROR 0x00 +#define STAT_BUS_ERROR 0x00 #define STAT_START 0x08 #define STAT_REP_START 0x10 #define STAT_TXADDR_ACK 0x18 -- cgit v1.1 From 61e18270f604c744ed9f8f1b740022516f9726f8 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 1 Mar 2018 14:40:52 -0800 Subject: s390: Fix runtime warning about negative pgtables_bytes When running s390 images with 'compat' processes, the following BUG is seen repeatedly. BUG: non-zero pgtables_bytes on freeing mm: -16384 Bisect points to commit b4e98d9ac775 ("mm: account pud page tables"). Analysis shows that init_new_context() is called with mm->context.asce_limit set to _REGION3_SIZE. In this situation, pgtables_bytes remains set to 0 and is not increased. The message is displayed when the affected process dies and mm_dec_nr_puds() is called. Cc: Kirill A. Shutemov Cc: Heiko Carstens Fixes: b4e98d9ac775 ("mm: account pud page tables") Signed-off-by: Guenter Roeck Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu_context.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 65154ea..6c8ce15 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -63,6 +63,7 @@ static inline int init_new_context(struct task_struct *tsk, _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; /* pgd_alloc() did not account this pmd */ mm_inc_nr_pmds(mm); + mm_inc_nr_puds(mm); } crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; -- cgit v1.1 From 84eef2b2187ed73c0e4520cbfeb874e964a0b56a Mon Sep 17 00:00:00 2001 From: Ka-Cheong Poon Date: Thu, 1 Mar 2018 21:07:18 -0800 Subject: rds: Incorrect reference counting in TCP socket creation Commit 0933a578cd55 ("rds: tcp: use sock_create_lite() to create the accept socket") has a reference counting issue in TCP socket creation when accepting a new connection. The code uses sock_create_lite() to create a kernel socket. But it does not do __module_get() on the socket owner. When the connection is shutdown and sock_release() is called to free the socket, the owner's reference count is decremented and becomes incorrect. Note that this bug only shows up when the socket owner is configured as a kernel module. v2: Update comments Fixes: 0933a578cd55 ("rds: tcp: use sock_create_lite() to create the accept socket") Signed-off-by: Ka-Cheong Poon Acked-by: Santosh Shilimkar Acked-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/tcp_listen.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index c061d6e..2257118 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Oracle. All rights reserved. + * Copyright (c) 2006, 2018 Oracle. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -142,12 +142,20 @@ int rds_tcp_accept_one(struct socket *sock) if (ret) goto out; - new_sock->type = sock->type; - new_sock->ops = sock->ops; ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true); if (ret < 0) goto out; + /* sock_create_lite() does not get a hold on the owner module so we + * need to do it here. Note that sock_release() uses sock->ops to + * determine if it needs to decrement the reference count. So set + * sock->ops after calling accept() in case that fails. And there's + * no need to do try_module_get() as the listener should have a hold + * already. + */ + new_sock->ops = sock->ops; + __module_get(new_sock->ops->owner); + ret = rds_tcp_keepalive(new_sock); if (ret < 0) goto out; -- cgit v1.1 From a11761c2dda64737bfe47e7c15545d4648f8573c Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Thu, 1 Mar 2018 10:57:21 +0200 Subject: Coccinelle: memdup: Fix typo in warning messages Replace 'kmemdep' with 'kmemdup' in warning messages. Signed-off-by: Dafna Hirschfeld Acked-by: Julia Lawall Acked-by: Nicolas Palix Signed-off-by: Masahiro Yamada --- scripts/coccinelle/api/memdup.cocci | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/coccinelle/api/memdup.cocci b/scripts/coccinelle/api/memdup.cocci index 1249b72..8fd6437 100644 --- a/scripts/coccinelle/api/memdup.cocci +++ b/scripts/coccinelle/api/memdup.cocci @@ -56,10 +56,10 @@ statement S; p << r.p; @@ -coccilib.org.print_todo(p[0], "WARNING opportunity for kmemdep") +coccilib.org.print_todo(p[0], "WARNING opportunity for kmemdup") @script:python depends on report@ p << r.p; @@ -coccilib.report.print_report(p[0], "WARNING opportunity for kmemdep") +coccilib.report.print_report(p[0], "WARNING opportunity for kmemdup") -- cgit v1.1 From 5ae6fcc4bb82bd05996cc685b8786c586637e56d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 2 Mar 2018 16:05:12 +0900 Subject: kconfig: fix line number in recursive inclusion error message When recursive inclusion is detected, the line number of the last 'included from:' is wrong. [Test Case] Kconfig: -------->8-------- source "Kconfig2" -------->8-------- Kconfig2: -------->8-------- source "Kconfig3" -------->8-------- Kconfig3: -------->8-------- source "Kconfig" -------->8-------- [Result] $ make allyesconfig scripts/kconfig/conf --allyesconfig Kconfig Kconfig:1: recursive inclusion detected. Inclusion path: current file : 'Kconfig' included from: 'Kconfig3:1' included from: 'Kconfig2:1' included from: 'Kconfig:3' scripts/kconfig/Makefile:89: recipe for target 'allyesconfig' failed make[1]: *** [allyesconfig] Error 1 Makefile:512: recipe for target 'allyesconfig' failed make: *** [allyesconfig] Error 2 where we expect current file : 'Kconfig' included from: 'Kconfig3:1' included from: 'Kconfig2:1' included from: 'Kconfig:1' The 'iter->lineno+1' in the second fpinrtf() should be 'iter->lineno-1'. I refactored the code to merge the two fprintf() calls. Signed-off-by: Masahiro Yamada Reviewed-by: Ulf Magnusson --- scripts/kconfig/zconf.l | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index 02de6fe..88b650e 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -332,16 +332,12 @@ void zconf_nextfile(const char *name) "Inclusion path:\n current file : '%s'\n", zconf_curname(), zconf_lineno(), zconf_curname()); - iter = current_file->parent; - while (iter && \ - strcmp(iter->name,current_file->name)) { - fprintf(stderr, " included from: '%s:%d'\n", - iter->name, iter->lineno-1); + iter = current_file; + do { iter = iter->parent; - } - if (iter) fprintf(stderr, " included from: '%s:%d'\n", - iter->name, iter->lineno+1); + iter->name, iter->lineno - 1); + } while (strcmp(iter->name, current_file->name)); exit(1); } } -- cgit v1.1 From ba004a2955f759946d8c98ca1a9c8d09818b1223 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 1 Mar 2018 13:04:28 -0700 Subject: selftests: memory-hotplug: fix emit_tests regression Commit 16c513b13477 ("selftests: memory-hotplug: silence test command echo") introduced regression in emit_tests and results in the following failure when selftests are installed and run. Fix it. Running tests in memory-hotplug ======================================== ./run_kselftest.sh: line 121: @./mem-on-off-test.sh: No such file or directory selftests: memory-hotplug [FAIL] Fixes: 16c513b13477 (selftests: memory-hotplug: silence test command echo") Reported-by: Naresh Kamboju Tested-by: Anders Roxell Signed-off-by: Shuah Khan --- tools/testing/selftests/memory-hotplug/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile index 183b468..686da51 100644 --- a/tools/testing/selftests/memory-hotplug/Makefile +++ b/tools/testing/selftests/memory-hotplug/Makefile @@ -5,7 +5,8 @@ include ../lib.mk TEST_PROGS := mem-on-off-test.sh override RUN_TESTS := @./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]" -override EMIT_TESTS := echo "$(RUN_TESTS)" + +override EMIT_TESTS := echo "$(subst @,,$(RUN_TESTS))" run_full_test: @/bin/bash ./mem-on-off-test.sh && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]" -- cgit v1.1 From fde9fc766e96c494b82931b1d270a9a751be07c0 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 19 Feb 2018 16:55:06 +0000 Subject: signals: Move put_compat_sigset to compat.h to silence hardened usercopy Since commit afcc90f8621e ("usercopy: WARN() on slab cache usercopy region violations"), MIPS systems booting with a compat root filesystem emit a warning when copying compat siginfo to userspace: WARNING: CPU: 0 PID: 953 at mm/usercopy.c:81 usercopy_warn+0x98/0xe8 Bad or missing usercopy whitelist? Kernel memory exposure attempt detected from SLAB object 'task_struct' (offset 1432, size 16)! Modules linked in: CPU: 0 PID: 953 Comm: S01logging Not tainted 4.16.0-rc2 #10 Stack : ffffffff808c0000 0000000000000000 0000000000000001 65ac85163f3bdc4a 65ac85163f3bdc4a 0000000000000000 90000000ff667ab8 ffffffff808c0000 00000000000003f8 ffffffff808d0000 00000000000000d1 0000000000000000 000000000000003c 0000000000000000 ffffffff808c8ca8 ffffffff808d0000 ffffffff808d0000 ffffffff80810000 fffffc0000000000 ffffffff80785c30 0000000000000009 0000000000000051 90000000ff667eb0 90000000ff667db0 000000007fe0d938 0000000000000018 ffffffff80449958 0000000020052798 ffffffff808c0000 90000000ff664000 90000000ff667ab0 00000000100c0000 ffffffff80698810 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 ffffffff8010d02c 65ac85163f3bdc4a ... Call Trace: [] show_stack+0x9c/0x130 [] dump_stack+0x90/0xd0 [] __warn+0x100/0x118 [] warn_slowpath_fmt+0x4c/0x70 [] usercopy_warn+0x98/0xe8 [] __check_object_size+0xfc/0x250 [] put_compat_sigset+0x30/0x88 [] setup_rt_frame_n32+0xc4/0x160 [] do_signal+0x19c/0x230 [] do_notify_resume+0x60/0x78 [] work_notifysig+0x10/0x18 ---[ end trace 88fffbf69147f48a ]--- Commit 5905429ad856 ("fork: Provide usercopy whitelisting for task_struct") noted that: "While the blocked and saved_sigmask fields of task_struct are copied to userspace (via sigmask_to_save() and setup_rt_frame()), it is always copied with a static length (i.e. sizeof(sigset_t))." However, this is not true in the case of compat signals, whose sigset is copied by put_compat_sigset and receives size as an argument. At most call sites, put_compat_sigset is copying a sigset from the current task_struct. This triggers a warning when CONFIG_HARDENED_USERCOPY is active. However, by marking this function as static inline, the warning can be avoided because in all of these cases the size is constant at compile time, which is allowed. The only site where this is not the case is handling the rt_sigpending syscall, but there the copy is being made from a stack local variable so does not trigger the warning. Move put_compat_sigset to compat.h, and mark it static inline. This fixes the WARN on MIPS. Fixes: afcc90f8621e ("usercopy: WARN() on slab cache usercopy region violations") Signed-off-by: Matt Redfearn Acked-by: Kees Cook Cc: "Dmitry V . Levin" Cc: Al Viro Cc: kernel-hardening@lists.openwall.com Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/18639/ Signed-off-by: James Hogan --- include/linux/compat.h | 26 ++++++++++++++++++++++++-- kernel/compat.c | 19 ------------------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/include/linux/compat.h b/include/linux/compat.h index 8a96438..c4139c7 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -17,6 +17,7 @@ #include #include #include /* for aio_context_t */ +#include #include #include @@ -550,8 +551,29 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); extern int get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat); -extern int put_compat_sigset(compat_sigset_t __user *compat, - const sigset_t *set, unsigned int size); + +/* + * Defined inline such that size can be compile time constant, which avoids + * CONFIG_HARDENED_USERCOPY complaining about copies from task_struct + */ +static inline int +put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, + unsigned int size) +{ + /* size <= sizeof(compat_sigset_t) <= sizeof(sigset_t) */ +#ifdef __BIG_ENDIAN + compat_sigset_t v; + switch (_NSIG_WORDS) { + case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3]; + case 3: v.sig[5] = (set->sig[2] >> 32); v.sig[4] = set->sig[2]; + case 2: v.sig[3] = (set->sig[1] >> 32); v.sig[2] = set->sig[1]; + case 1: v.sig[1] = (set->sig[0] >> 32); v.sig[0] = set->sig[0]; + } + return copy_to_user(compat, &v, size) ? -EFAULT : 0; +#else + return copy_to_user(compat, set, size) ? -EFAULT : 0; +#endif +} asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes, diff --git a/kernel/compat.c b/kernel/compat.c index 3247fe7..3f5fa89 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -488,25 +488,6 @@ get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat) } EXPORT_SYMBOL_GPL(get_compat_sigset); -int -put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, - unsigned int size) -{ - /* size <= sizeof(compat_sigset_t) <= sizeof(sigset_t) */ -#ifdef __BIG_ENDIAN - compat_sigset_t v; - switch (_NSIG_WORDS) { - case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3]; - case 3: v.sig[5] = (set->sig[2] >> 32); v.sig[4] = set->sig[2]; - case 2: v.sig[3] = (set->sig[1] >> 32); v.sig[2] = set->sig[1]; - case 1: v.sig[1] = (set->sig[0] >> 32); v.sig[0] = set->sig[0]; - } - return copy_to_user(compat, &v, size) ? -EFAULT : 0; -#else - return copy_to_user(compat, set, size) ? -EFAULT : 0; -#endif -} - #ifdef CONFIG_NUMA COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages, compat_uptr_t __user *, pages32, -- cgit v1.1 From 94db151dc89262bfa82922c44e8320cea2334667 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 4 Feb 2018 10:34:02 -0800 Subject: vfio: disable filesystem-dax page pinning Filesystem-DAX is incompatible with 'longterm' page pinning. Without page cache indirection a DAX mapping maps filesystem blocks directly. This means that the filesystem must not modify a file's block map while any page in a mapping is pinned. In order to prevent the situation of userspace holding of filesystem operations indefinitely, disallow 'longterm' Filesystem-DAX mappings. RDMA has the same conflict and the plan there is to add a 'with lease' mechanism to allow the kernel to notify userspace that the mapping is being torn down for block-map maintenance. Perhaps something similar can be put in place for vfio. Note that xfs and ext4 still report: "DAX enabled. Warning: EXPERIMENTAL, use at your own risk" ...at mount time, and resolving the dax-dma-vs-truncate problem is one of the last hurdles to remove that designation. Acked-by: Alex Williamson Cc: Michal Hocko Cc: kvm@vger.kernel.org Cc: Reported-by: Haozhong Zhang Tested-by: Haozhong Zhang Fixes: d475c6346a38 ("dax,ext2: replace XIP read and write with DAX I/O") Reviewed-by: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/vfio/vfio_iommu_type1.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index e30e29a..45657e2 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -338,11 +338,12 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, { struct page *page[1]; struct vm_area_struct *vma; + struct vm_area_struct *vmas[1]; int ret; if (mm == current->mm) { - ret = get_user_pages_fast(vaddr, 1, !!(prot & IOMMU_WRITE), - page); + ret = get_user_pages_longterm(vaddr, 1, !!(prot & IOMMU_WRITE), + page, vmas); } else { unsigned int flags = 0; @@ -351,7 +352,18 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, down_read(&mm->mmap_sem); ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags, page, - NULL, NULL); + vmas, NULL); + /* + * The lifetime of a vaddr_get_pfn() page pin is + * userspace-controlled. In the fs-dax case this could + * lead to indefinite stalls in filesystem operations. + * Disallow attempts to pin fs-dax pages via this + * interface. + */ + if (ret > 0 && vma_is_fsdax(vmas[0])) { + ret = -EOPNOTSUPP; + put_page(page[0]); + } up_read(&mm->mmap_sem); } -- cgit v1.1 From 50186e121ea1adcc43d0f9f790ee45e0b0f1202f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 2 Mar 2018 22:04:59 +0900 Subject: MAINTAINERS: take over Kconfig maintainership I have recently picked up Kconfig patches to my tree without any declaration. Making it official now. Signed-off-by: Masahiro Yamada Acked-by: Linus Torvalds --- MAINTAINERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 93a12af..b708b6d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7600,8 +7600,10 @@ F: mm/kasan/ F: scripts/Makefile.kasan KCONFIG +M: Masahiro Yamada +T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git kconfig L: linux-kbuild@vger.kernel.org -S: Orphan +S: Maintained F: Documentation/kbuild/kconfig-language.txt F: scripts/kconfig/ -- cgit v1.1 From 5fdf8e5ba5666fe153bd61f851a40078a6347822 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 2 Mar 2018 19:31:40 -0800 Subject: libnvdimm: re-enable deep flush for pmem devices via fsync() Re-enable deep flush so that users always have a way to be sure that a write makes it all the way out to media. Writes from the PMEM driver always arrive at the NVDIMM since movnt is used to bypass the cache, and the driver relies on the ADR (Asynchronous DRAM Refresh) mechanism to flush write buffers on power failure. The Deep Flush mechanism is there to explicitly write buffers to protect against (rare) ADR failure. This change prevents a regression in deep flush behavior so that applications can continue to depend on fsync() as a mechanism to trigger deep flush in the filesystem-DAX case. Fixes: 06e8ccdab15f4 ("acpi: nfit: Add support for detect platform CPU cache...") Reviewed-by: Jeff Moyer Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 10041ac..06f8dcc 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -335,8 +335,7 @@ static int pmem_attach_disk(struct device *dev, dev_warn(dev, "unable to guarantee persistence of writes\n"); fua = 0; } - wbc = nvdimm_has_cache(nd_region) && - !test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags); + wbc = nvdimm_has_cache(nd_region); if (!devm_request_mem_region(dev, res->start, resource_size(res), dev_name(&ndns->dev))) { -- cgit v1.1 From 949b93250a566cc7a578b4f829cf76b70d19a62c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Feb 2018 19:34:11 -0800 Subject: memremap: fix softlockup reports at teardown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cond_resched() currently in the setup path needs to be duplicated in the teardown path. Rather than require each instance of for_each_device_pfn() to open code the same sequence, embed it in the helper. Link: https://github.com/intel/ixpdimm_sw/issues/11 Cc: "Jérôme Glisse" Cc: Michal Hocko Cc: Christoph Hellwig Cc: Fixes: 71389703839e ("mm, zone_device: Replace {get, put}_zone_device_page()...") Signed-off-by: Dan Williams --- kernel/memremap.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/memremap.c b/kernel/memremap.c index 4849be5..4dd4274 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -275,8 +275,15 @@ static unsigned long pfn_end(struct dev_pagemap *pgmap) return (res->start + resource_size(res)) >> PAGE_SHIFT; } +static unsigned long pfn_next(unsigned long pfn) +{ + if (pfn % 1024 == 0) + cond_resched(); + return pfn + 1; +} + #define for_each_device_pfn(pfn, map) \ - for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++) + for (pfn = pfn_first(map); pfn < pfn_end(map); pfn = pfn_next(pfn)) static void devm_memremap_pages_release(void *data) { @@ -337,10 +344,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) resource_size_t align_start, align_size, align_end; struct vmem_altmap *altmap = pgmap->altmap_valid ? &pgmap->altmap : NULL; + struct resource *res = &pgmap->res; unsigned long pfn, pgoff, order; pgprot_t pgprot = PAGE_KERNEL; - int error, nid, is_ram, i = 0; - struct resource *res = &pgmap->res; + int error, nid, is_ram; align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) @@ -409,8 +416,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) list_del(&page->lru); page->pgmap = pgmap; percpu_ref_get(pgmap->ref); - if (!(++i % 1024)) - cond_resched(); } devm_add_action(dev, devm_memremap_pages_release, pgmap); -- cgit v1.1 From 61bd0f66ff92d5ce765ff9850fd3cbfec773c560 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 2 Mar 2018 11:51:56 +0100 Subject: KVM: PPC: Book3S HV: Fix guest time accounting with VIRT_CPU_ACCOUNTING_GEN Since commit 8b24e69fc47e ("KVM: PPC: Book3S HV: Close race with testing for signals on guest entry"), if CONFIG_VIRT_CPU_ACCOUNTING_GEN is set, the guest time is not accounted to guest time and user time, but instead to system time. This is because guest_enter()/guest_exit() are called while interrupts are disabled and the tick counter cannot be updated between them. To fix that, move guest_exit() after local_irq_enable(), and as guest_enter() is called with IRQ disabled, call guest_enter_irqoff() instead. Fixes: 8b24e69fc47e ("KVM: PPC: Book3S HV: Close race with testing for signals on guest entry") Signed-off-by: Laurent Vivier Reviewed-by: Paolo Bonzini Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index b4a538b..9cb9448 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2885,7 +2885,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ trace_hardirqs_on(); - guest_enter(); + guest_enter_irqoff(); srcu_idx = srcu_read_lock(&vc->kvm->srcu); @@ -2893,8 +2893,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) srcu_read_unlock(&vc->kvm->srcu, srcu_idx); - guest_exit(); - trace_hardirqs_off(); set_irq_happened(trap); @@ -2937,6 +2935,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_set_host_core(pcpu); local_irq_enable(); + guest_exit(); /* Let secondaries go back to the offline loop */ for (i = 0; i < controlled_threads; ++i) { -- cgit v1.1 From 7bd3e7b743956afbec30fb525bc3c5e22e3d475c Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Wed, 28 Feb 2018 00:59:12 -0800 Subject: watchdog: f71808e_wdt: Fix magic close handling Watchdog close is "expected" when any byte is 'V' not just the last one. Writing "V" to the device fails because the last byte is the end of string. $ echo V > /dev/watchdog f71808e_wdt: Unexpected close, not stopping watchdog! Signed-off-by: Igor Pylypiv Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/f71808e_wdt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index e0678c1..3a33c53 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -566,7 +566,8 @@ static ssize_t watchdog_write(struct file *file, const char __user *buf, char c; if (get_user(c, buf + i)) return -EFAULT; - expect_close = (c == 'V'); + if (c == 'V') + expect_close = true; } /* Properly order writes across fork()ed processes */ -- cgit v1.1 From 93ac3deb7c220cbcec032a967220a1f109d58431 Mon Sep 17 00:00:00 2001 From: Jayachandran C Date: Wed, 28 Feb 2018 02:52:20 -0800 Subject: watchdog: sbsa: use 32-bit read for WCV According to SBSA spec v3.1 section 5.3: All registers are 32 bits in size and should be accessed using 32-bit reads and writes. If an access size other than 32 bits is used then the results are IMPLEMENTATION DEFINED. [...] The Generic Watchdog is little-endian The current code uses readq to read the watchdog compare register which does a 64-bit access. This fails on ThunderX2 which does not implement 64-bit access to this register. Fix this by using lo_hi_readq() that does two 32-bit reads. Signed-off-by: Jayachandran C Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sbsa_gwdt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c index 316c2eb..e8bd988 100644 --- a/drivers/watchdog/sbsa_gwdt.c +++ b/drivers/watchdog/sbsa_gwdt.c @@ -50,6 +50,7 @@ */ #include +#include #include #include #include @@ -159,7 +160,7 @@ static unsigned int sbsa_gwdt_get_timeleft(struct watchdog_device *wdd) !(readl(gwdt->control_base + SBSA_GWDT_WCS) & SBSA_GWDT_WCS_WS0)) timeleft += readl(gwdt->control_base + SBSA_GWDT_WOR); - timeleft += readq(gwdt->control_base + SBSA_GWDT_WCV) - + timeleft += lo_hi_readq(gwdt->control_base + SBSA_GWDT_WCV) - arch_counter_get_cntvct(); do_div(timeleft, gwdt->clk); -- cgit v1.1 From 2b3d89b402b085b08498e896c65267a145bed486 Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Sun, 25 Feb 2018 20:22:20 -0700 Subject: watchdog: hpwdt: Remove legacy NMI sourcing. Gen8 and prior Proliant systems supported the "CRU" interface to firmware. This interfaces allows linux to "call back" into firmware to source the cause of an NMI. This feature isn't fully utilized as the actual source of the NMI isn't printed, the driver only indicates that the source couldn't be determined when the call fails. With the advent of Gen9, iCRU replaces the CRU. The call back feature is no longer available in firmware. To be compatible and not attempt to call back into firmware on system not supporting CRU, the SMBIOS table is consulted to determine if it is safe to make the call back or not. This results in about half of the driver code being devoted to either making CRU calls or determing if it is safe to make CRU calls. As noted, the driver isn't really using the results of the CRU calls. Furthermore, as a consequence of the Spectre security issue, the BIOS/EFI calls are being wrapped into Spectre-disabling section. Removing the call back in hpwdt_pretimeout assists in this effort. As the CRU sourcing of the NMI isn't required for handling the NMI and there are security concerns with making the call back, remove the legacy (pre Gen9) NMI sourcing and the DMI code to determine if the system had the CRU interface. Signed-off-by: Jerry Hoemann Acked-by: Ingo Molnar Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/hpwdt.c | 501 +---------------------------------------------- 1 file changed, 9 insertions(+), 492 deletions(-) diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index f1f00df..b0a1580 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -28,16 +28,7 @@ #include #include #include -#ifdef CONFIG_HPWDT_NMI_DECODING -#include -#include -#include -#include -#include -#include -#endif /* CONFIG_HPWDT_NMI_DECODING */ #include -#include #define HPWDT_VERSION "1.4.0" #define SECS_TO_TICKS(secs) ((secs) * 1000 / 128) @@ -48,6 +39,9 @@ static unsigned int soft_margin = DEFAULT_MARGIN; /* in seconds */ static unsigned int reload; /* the computed soft_margin */ static bool nowayout = WATCHDOG_NOWAYOUT; +#ifdef CONFIG_HPWDT_NMI_DECODING +static unsigned int allow_kdump = 1; +#endif static char expect_release; static unsigned long hpwdt_is_open; @@ -63,373 +57,6 @@ static const struct pci_device_id hpwdt_devices[] = { }; MODULE_DEVICE_TABLE(pci, hpwdt_devices); -#ifdef CONFIG_HPWDT_NMI_DECODING -#define PCI_BIOS32_SD_VALUE 0x5F32335F /* "_32_" */ -#define CRU_BIOS_SIGNATURE_VALUE 0x55524324 -#define PCI_BIOS32_PARAGRAPH_LEN 16 -#define PCI_ROM_BASE1 0x000F0000 -#define ROM_SIZE 0x10000 - -struct bios32_service_dir { - u32 signature; - u32 entry_point; - u8 revision; - u8 length; - u8 checksum; - u8 reserved[5]; -}; - -/* type 212 */ -struct smbios_cru64_info { - u8 type; - u8 byte_length; - u16 handle; - u32 signature; - u64 physical_address; - u32 double_length; - u32 double_offset; -}; -#define SMBIOS_CRU64_INFORMATION 212 - -/* type 219 */ -struct smbios_proliant_info { - u8 type; - u8 byte_length; - u16 handle; - u32 power_features; - u32 omega_features; - u32 reserved; - u32 misc_features; -}; -#define SMBIOS_ICRU_INFORMATION 219 - - -struct cmn_registers { - union { - struct { - u8 ral; - u8 rah; - u16 rea2; - }; - u32 reax; - } u1; - union { - struct { - u8 rbl; - u8 rbh; - u8 reb2l; - u8 reb2h; - }; - u32 rebx; - } u2; - union { - struct { - u8 rcl; - u8 rch; - u16 rec2; - }; - u32 recx; - } u3; - union { - struct { - u8 rdl; - u8 rdh; - u16 red2; - }; - u32 redx; - } u4; - - u32 resi; - u32 redi; - u16 rds; - u16 res; - u32 reflags; -} __attribute__((packed)); - -static unsigned int hpwdt_nmi_decoding; -static unsigned int allow_kdump = 1; -static unsigned int is_icru; -static unsigned int is_uefi; -static DEFINE_SPINLOCK(rom_lock); -static void *cru_rom_addr; -static struct cmn_registers cmn_regs; - -extern asmlinkage void asminline_call(struct cmn_registers *pi86Regs, - unsigned long *pRomEntry); - -#ifdef CONFIG_X86_32 -/* --32 Bit Bios------------------------------------------------------------ */ - -#define HPWDT_ARCH 32 - -asm(".text \n\t" - ".align 4 \n\t" - ".globl asminline_call \n" - "asminline_call: \n\t" - "pushl %ebp \n\t" - "movl %esp, %ebp \n\t" - "pusha \n\t" - "pushf \n\t" - "push %es \n\t" - "push %ds \n\t" - "pop %es \n\t" - "movl 8(%ebp),%eax \n\t" - "movl 4(%eax),%ebx \n\t" - "movl 8(%eax),%ecx \n\t" - "movl 12(%eax),%edx \n\t" - "movl 16(%eax),%esi \n\t" - "movl 20(%eax),%edi \n\t" - "movl (%eax),%eax \n\t" - "push %cs \n\t" - "call *12(%ebp) \n\t" - "pushf \n\t" - "pushl %eax \n\t" - "movl 8(%ebp),%eax \n\t" - "movl %ebx,4(%eax) \n\t" - "movl %ecx,8(%eax) \n\t" - "movl %edx,12(%eax) \n\t" - "movl %esi,16(%eax) \n\t" - "movl %edi,20(%eax) \n\t" - "movw %ds,24(%eax) \n\t" - "movw %es,26(%eax) \n\t" - "popl %ebx \n\t" - "movl %ebx,(%eax) \n\t" - "popl %ebx \n\t" - "movl %ebx,28(%eax) \n\t" - "pop %es \n\t" - "popf \n\t" - "popa \n\t" - "leave \n\t" - "ret \n\t" - ".previous"); - - -/* - * cru_detect - * - * Routine Description: - * This function uses the 32-bit BIOS Service Directory record to - * search for a $CRU record. - * - * Return Value: - * 0 : SUCCESS - * <0 : FAILURE - */ -static int cru_detect(unsigned long map_entry, - unsigned long map_offset) -{ - void *bios32_map; - unsigned long *bios32_entrypoint; - unsigned long cru_physical_address; - unsigned long cru_length; - unsigned long physical_bios_base = 0; - unsigned long physical_bios_offset = 0; - int retval = -ENODEV; - - bios32_map = ioremap(map_entry, (2 * PAGE_SIZE)); - - if (bios32_map == NULL) - return -ENODEV; - - bios32_entrypoint = bios32_map + map_offset; - - cmn_regs.u1.reax = CRU_BIOS_SIGNATURE_VALUE; - - set_memory_x((unsigned long)bios32_map, 2); - asminline_call(&cmn_regs, bios32_entrypoint); - - if (cmn_regs.u1.ral != 0) { - pr_warn("Call succeeded but with an error: 0x%x\n", - cmn_regs.u1.ral); - } else { - physical_bios_base = cmn_regs.u2.rebx; - physical_bios_offset = cmn_regs.u4.redx; - cru_length = cmn_regs.u3.recx; - cru_physical_address = - physical_bios_base + physical_bios_offset; - - /* If the values look OK, then map it in. */ - if ((physical_bios_base + physical_bios_offset)) { - cru_rom_addr = - ioremap(cru_physical_address, cru_length); - if (cru_rom_addr) { - set_memory_x((unsigned long)cru_rom_addr & PAGE_MASK, - (cru_length + PAGE_SIZE - 1) >> PAGE_SHIFT); - retval = 0; - } - } - - pr_debug("CRU Base Address: 0x%lx\n", physical_bios_base); - pr_debug("CRU Offset Address: 0x%lx\n", physical_bios_offset); - pr_debug("CRU Length: 0x%lx\n", cru_length); - pr_debug("CRU Mapped Address: %p\n", &cru_rom_addr); - } - iounmap(bios32_map); - return retval; -} - -/* - * bios_checksum - */ -static int bios_checksum(const char __iomem *ptr, int len) -{ - char sum = 0; - int i; - - /* - * calculate checksum of size bytes. This should add up - * to zero if we have a valid header. - */ - for (i = 0; i < len; i++) - sum += ptr[i]; - - return ((sum == 0) && (len > 0)); -} - -/* - * bios32_present - * - * Routine Description: - * This function finds the 32-bit BIOS Service Directory - * - * Return Value: - * 0 : SUCCESS - * <0 : FAILURE - */ -static int bios32_present(const char __iomem *p) -{ - struct bios32_service_dir *bios_32_ptr; - int length; - unsigned long map_entry, map_offset; - - bios_32_ptr = (struct bios32_service_dir *) p; - - /* - * Search for signature by checking equal to the swizzled value - * instead of calling another routine to perform a strcmp. - */ - if (bios_32_ptr->signature == PCI_BIOS32_SD_VALUE) { - length = bios_32_ptr->length * PCI_BIOS32_PARAGRAPH_LEN; - if (bios_checksum(p, length)) { - /* - * According to the spec, we're looking for the - * first 4KB-aligned address below the entrypoint - * listed in the header. The Service Directory code - * is guaranteed to occupy no more than 2 4KB pages. - */ - map_entry = bios_32_ptr->entry_point & ~(PAGE_SIZE - 1); - map_offset = bios_32_ptr->entry_point - map_entry; - - return cru_detect(map_entry, map_offset); - } - } - return -ENODEV; -} - -static int detect_cru_service(void) -{ - char __iomem *p, *q; - int rc = -1; - - /* - * Search from 0x0f0000 through 0x0fffff, inclusive. - */ - p = ioremap(PCI_ROM_BASE1, ROM_SIZE); - if (p == NULL) - return -ENOMEM; - - for (q = p; q < p + ROM_SIZE; q += 16) { - rc = bios32_present(q); - if (!rc) - break; - } - iounmap(p); - return rc; -} -/* ------------------------------------------------------------------------- */ -#endif /* CONFIG_X86_32 */ -#ifdef CONFIG_X86_64 -/* --64 Bit Bios------------------------------------------------------------ */ - -#define HPWDT_ARCH 64 - -asm(".text \n\t" - ".align 4 \n\t" - ".globl asminline_call \n\t" - ".type asminline_call, @function \n\t" - "asminline_call: \n\t" - FRAME_BEGIN - "pushq %rax \n\t" - "pushq %rbx \n\t" - "pushq %rdx \n\t" - "pushq %r12 \n\t" - "pushq %r9 \n\t" - "movq %rsi, %r12 \n\t" - "movq %rdi, %r9 \n\t" - "movl 4(%r9),%ebx \n\t" - "movl 8(%r9),%ecx \n\t" - "movl 12(%r9),%edx \n\t" - "movl 16(%r9),%esi \n\t" - "movl 20(%r9),%edi \n\t" - "movl (%r9),%eax \n\t" - "call *%r12 \n\t" - "pushfq \n\t" - "popq %r12 \n\t" - "movl %eax, (%r9) \n\t" - "movl %ebx, 4(%r9) \n\t" - "movl %ecx, 8(%r9) \n\t" - "movl %edx, 12(%r9) \n\t" - "movl %esi, 16(%r9) \n\t" - "movl %edi, 20(%r9) \n\t" - "movq %r12, %rax \n\t" - "movl %eax, 28(%r9) \n\t" - "popq %r9 \n\t" - "popq %r12 \n\t" - "popq %rdx \n\t" - "popq %rbx \n\t" - "popq %rax \n\t" - FRAME_END - "ret \n\t" - ".previous"); - -/* - * dmi_find_cru - * - * Routine Description: - * This function checks whether or not a SMBIOS/DMI record is - * the 64bit CRU info or not - */ -static void dmi_find_cru(const struct dmi_header *dm, void *dummy) -{ - struct smbios_cru64_info *smbios_cru64_ptr; - unsigned long cru_physical_address; - - if (dm->type == SMBIOS_CRU64_INFORMATION) { - smbios_cru64_ptr = (struct smbios_cru64_info *) dm; - if (smbios_cru64_ptr->signature == CRU_BIOS_SIGNATURE_VALUE) { - cru_physical_address = - smbios_cru64_ptr->physical_address + - smbios_cru64_ptr->double_offset; - cru_rom_addr = ioremap(cru_physical_address, - smbios_cru64_ptr->double_length); - set_memory_x((unsigned long)cru_rom_addr & PAGE_MASK, - smbios_cru64_ptr->double_length >> PAGE_SHIFT); - } - } -} - -static int detect_cru_service(void) -{ - cru_rom_addr = NULL; - - dmi_walk(dmi_find_cru, NULL); - - /* if cru_rom_addr has been set then we found a CRU service */ - return ((cru_rom_addr != NULL) ? 0 : -ENODEV); -} -/* ------------------------------------------------------------------------- */ -#endif /* CONFIG_X86_64 */ -#endif /* CONFIG_HPWDT_NMI_DECODING */ /* * Watchdog operations @@ -486,30 +113,12 @@ static int hpwdt_my_nmi(void) */ static int hpwdt_pretimeout(unsigned int ulReason, struct pt_regs *regs) { - unsigned long rom_pl; - static int die_nmi_called; - - if (!hpwdt_nmi_decoding) - return NMI_DONE; - if ((ulReason == NMI_UNKNOWN) && !hpwdt_my_nmi()) return NMI_DONE; - spin_lock_irqsave(&rom_lock, rom_pl); - if (!die_nmi_called && !is_icru && !is_uefi) - asminline_call(&cmn_regs, cru_rom_addr); - die_nmi_called = 1; - spin_unlock_irqrestore(&rom_lock, rom_pl); - if (allow_kdump) hpwdt_stop(); - if (!is_icru && !is_uefi) { - if (cmn_regs.u1.ral == 0) { - nmi_panic(regs, "An NMI occurred, but unable to determine source.\n"); - return NMI_HANDLED; - } - } nmi_panic(regs, "An NMI occurred. Depending on your system the reason " "for the NMI is logged in any one of the following " "resources:\n" @@ -675,84 +284,11 @@ static struct miscdevice hpwdt_miscdev = { * Init & Exit */ -#ifdef CONFIG_HPWDT_NMI_DECODING -#ifdef CONFIG_X86_LOCAL_APIC -static void hpwdt_check_nmi_decoding(struct pci_dev *dev) -{ - /* - * If nmi_watchdog is turned off then we can turn on - * our nmi decoding capability. - */ - hpwdt_nmi_decoding = 1; -} -#else -static void hpwdt_check_nmi_decoding(struct pci_dev *dev) -{ - dev_warn(&dev->dev, "NMI decoding is disabled. " - "Your kernel does not support a NMI Watchdog.\n"); -} -#endif /* CONFIG_X86_LOCAL_APIC */ - -/* - * dmi_find_icru - * - * Routine Description: - * This function checks whether or not we are on an iCRU-based server. - * This check is independent of architecture and needs to be made for - * any ProLiant system. - */ -static void dmi_find_icru(const struct dmi_header *dm, void *dummy) -{ - struct smbios_proliant_info *smbios_proliant_ptr; - - if (dm->type == SMBIOS_ICRU_INFORMATION) { - smbios_proliant_ptr = (struct smbios_proliant_info *) dm; - if (smbios_proliant_ptr->misc_features & 0x01) - is_icru = 1; - if (smbios_proliant_ptr->misc_features & 0x1400) - is_uefi = 1; - } -} static int hpwdt_init_nmi_decoding(struct pci_dev *dev) { +#ifdef CONFIG_HPWDT_NMI_DECODING int retval; - - /* - * On typical CRU-based systems we need to map that service in - * the BIOS. For 32 bit Operating Systems we need to go through - * the 32 Bit BIOS Service Directory. For 64 bit Operating - * Systems we get that service through SMBIOS. - * - * On systems that support the new iCRU service all we need to - * do is call dmi_walk to get the supported flag value and skip - * the old cru detect code. - */ - dmi_walk(dmi_find_icru, NULL); - if (!is_icru && !is_uefi) { - - /* - * We need to map the ROM to get the CRU service. - * For 32 bit Operating Systems we need to go through the 32 Bit - * BIOS Service Directory - * For 64 bit Operating Systems we get that service through SMBIOS. - */ - retval = detect_cru_service(); - if (retval < 0) { - dev_warn(&dev->dev, - "Unable to detect the %d Bit CRU Service.\n", - HPWDT_ARCH); - return retval; - } - - /* - * We know this is the only CRU call we need to make so lets keep as - * few instructions as possible once the NMI comes in. - */ - cmn_regs.u1.rah = 0x0D; - cmn_regs.u1.ral = 0x02; - } - /* * Only one function can register for NMI_UNKNOWN */ @@ -780,45 +316,26 @@ error: dev_warn(&dev->dev, "Unable to register a die notifier (err=%d).\n", retval); - if (cru_rom_addr) - iounmap(cru_rom_addr); return retval; +#endif /* CONFIG_HPWDT_NMI_DECODING */ + return 0; } static void hpwdt_exit_nmi_decoding(void) { +#ifdef CONFIG_HPWDT_NMI_DECODING unregister_nmi_handler(NMI_UNKNOWN, "hpwdt"); unregister_nmi_handler(NMI_SERR, "hpwdt"); unregister_nmi_handler(NMI_IO_CHECK, "hpwdt"); - if (cru_rom_addr) - iounmap(cru_rom_addr); -} -#else /* !CONFIG_HPWDT_NMI_DECODING */ -static void hpwdt_check_nmi_decoding(struct pci_dev *dev) -{ -} - -static int hpwdt_init_nmi_decoding(struct pci_dev *dev) -{ - return 0; +#endif } -static void hpwdt_exit_nmi_decoding(void) -{ -} -#endif /* CONFIG_HPWDT_NMI_DECODING */ - static int hpwdt_init_one(struct pci_dev *dev, const struct pci_device_id *ent) { int retval; /* - * Check if we can do NMI decoding or not - */ - hpwdt_check_nmi_decoding(dev); - - /* * First let's find out if we are on an iLO2+ server. We will * not run on a legacy ASM box. * So we only support the G5 ProLiant servers and higher. @@ -922,6 +439,6 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" #ifdef CONFIG_HPWDT_NMI_DECODING module_param(allow_kdump, int, 0); MODULE_PARM_DESC(allow_kdump, "Start a kernel dump after NMI occurs"); -#endif /* !CONFIG_HPWDT_NMI_DECODING */ +#endif /* CONFIG_HPWDT_NMI_DECODING */ module_pci_driver(hpwdt_driver); -- cgit v1.1 From 317660940fd9dddd3201c2f92e25c27902c753fa Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 2 Mar 2018 07:22:30 -0800 Subject: perf/x86/intel/uncore: Fix Skylake UPI event format There is no event extension (bit 21) for SKX UPI, so use 'event' instead of 'event_ext'. Reported-by: Stephane Eranian Signed-off-by: Kan Liang Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Fixes: cd34cd97b7b4 ("perf/x86/intel/uncore: Add Skylake server uncore support") Link: http://lkml.kernel.org/r/1520004150-4855-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snbep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 6d8044a..22ec65b 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3606,7 +3606,7 @@ static struct intel_uncore_type skx_uncore_imc = { }; static struct attribute *skx_upi_uncore_formats_attr[] = { - &format_attr_event_ext.attr, + &format_attr_event.attr, &format_attr_umask_ext.attr, &format_attr_edge.attr, &format_attr_inv.attr, -- cgit v1.1 From 779b7931b27bfa80bac46d0115d229259aef580b Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 1 Mar 2018 17:13:37 +1100 Subject: net: rename skb_gso_validate_mtu -> skb_gso_validate_network_len If you take a GSO skb, and split it into packets, will the network length (L3 headers + L4 headers + payload) of those packets be small enough to fit within a given MTU? skb_gso_validate_mtu gives you the answer to that question. However, we recently added to add a way to validate the MAC length of a split GSO skb (L2+L3+L4+payload), and the names get confusing, so rename skb_gso_validate_mtu to skb_gso_validate_network_len Signed-off-by: Daniel Axtens Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- net/core/skbuff.c | 11 ++++++----- net/ipv4/ip_forward.c | 2 +- net/ipv4/ip_output.c | 2 +- net/ipv4/netfilter/nf_flow_table_ipv4.c | 2 +- net/ipv6/ip6_output.c | 2 +- net/ipv6/netfilter/nf_flow_table_ipv6.c | 2 +- net/mpls/af_mpls.c | 2 +- net/xfrm/xfrm_device.c | 2 +- 9 files changed, 14 insertions(+), 13 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c1e66bd..a057dd1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3286,7 +3286,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); -bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu); +bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu); bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 09bd89c..b637670 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4955,19 +4955,20 @@ static inline bool skb_gso_size_check(const struct sk_buff *skb, } /** - * skb_gso_validate_mtu - Return in case such skb fits a given MTU + * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU? * * @skb: GSO skb * @mtu: MTU to validate against * - * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU - * once split. + * skb_gso_validate_network_len validates if a given skb will fit a + * wanted MTU once split. It considers L3 headers, L4 headers, and the + * payload. */ -bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu) +bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu) { return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu); } -EXPORT_SYMBOL_GPL(skb_gso_validate_mtu); +EXPORT_SYMBOL_GPL(skb_gso_validate_network_len); /** * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length? diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 2dd21c3..b54b948 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -55,7 +55,7 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) if (skb->ignore_df) return false; - if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) return false; return true; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e8e675b..66340ab 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -248,7 +248,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, /* common case: seglen is <= mtu */ - if (skb_gso_validate_mtu(skb, mtu)) + if (skb_gso_validate_network_len(skb, mtu)) return ip_finish_output2(net, sk, skb); /* Slowpath - GSO segment length exceeds the egress MTU. diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c index 282b9cc..0cd46bf 100644 --- a/net/ipv4/netfilter/nf_flow_table_ipv4.c +++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c @@ -186,7 +186,7 @@ static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) return false; - if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) return false; return true; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 997c7f1..a8a9195 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -412,7 +412,7 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) if (skb->ignore_df) return false; - if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) return false; return true; diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c index d346705..207cb35 100644 --- a/net/ipv6/netfilter/nf_flow_table_ipv6.c +++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c @@ -178,7 +178,7 @@ static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) if (skb->len <= mtu) return false; - if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) return false; return true; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index e545a3c..7a4de6d 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -122,7 +122,7 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) if (skb->len <= mtu) return false; - if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) return false; return true; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 8e70291..e87d6c4 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -217,7 +217,7 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) if (skb->len <= mtu) goto ok; - if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) goto ok; } -- cgit v1.1 From ee78bbef8d63202ca0f2485aecf30b8c2b0088cc Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 1 Mar 2018 17:13:38 +1100 Subject: net: sched: tbf: handle GSO_BY_FRAGS case in enqueue tbf_enqueue() checks the size of a packet before enqueuing it. However, the GSO size check does not consider the GSO_BY_FRAGS case, and so will drop GSO SCTP packets, causing a massive drop in throughput. Use skb_gso_validate_mac_len() instead, as it does consider that case. Signed-off-by: Daniel Axtens Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 229172d..03225a8 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -188,7 +188,8 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, int ret; if (qdisc_pkt_len(skb) > q->max_size) { - if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size) + if (skb_is_gso(skb) && + skb_gso_validate_mac_len(skb, q->max_size)) return tbf_segment(skb, sch, to_free); return qdisc_drop(skb, sch, to_free); } -- cgit v1.1 From 80f5974d15ea96c7112604c7999a83a502d15b9f Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 1 Mar 2018 17:13:39 +1100 Subject: net: xfrm: use skb_gso_validate_network_len() to check gso sizes Replace skb_gso_network_seglen() with skb_gso_validate_network_len(), as it considers the GSO_BY_FRAGS case. Signed-off-by: Daniel Axtens Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/ipv4/xfrm4_output.c | 3 ++- net/ipv6/xfrm6_output.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 94b8702..be980c1 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -30,7 +30,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) mtu = dst_mtu(skb_dst(skb)); if ((!skb_is_gso(skb) && skb->len > mtu) || - (skb_is_gso(skb) && skb_gso_network_seglen(skb) > ip_skb_dst_mtu(skb->sk, skb))) { + (skb_is_gso(skb) && + !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { skb->protocol = htons(ETH_P_IP); if (skb->sk) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 8ae87d4..5959ce9 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -82,7 +82,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) if ((!skb_is_gso(skb) && skb->len > mtu) || (skb_is_gso(skb) && - skb_gso_network_seglen(skb) > ip6_skb_dst_mtu(skb))) { + !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { skb->dev = dst->dev; skb->protocol = htons(ETH_P_IPV6); -- cgit v1.1 From a4a77718ee4053a44aa40fe67247c1afb5ce2f1e Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 1 Mar 2018 17:13:40 +1100 Subject: net: make skb_gso_*_seglen functions private They're very hard to use properly as they do not consider the GSO_BY_FRAGS case. Code should use skb_gso_validate_network_len and skb_gso_validate_mac_len as they do consider this case. Make the seglen functions static, which stops people using them outside of skbuff.c Signed-off-by: Daniel Axtens Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/linux/skbuff.h | 33 --------------------------------- net/core/skbuff.c | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a057dd1..ddf77cf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3285,7 +3285,6 @@ int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); -unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu); bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); @@ -4104,38 +4103,6 @@ static inline bool skb_head_is_locked(const struct sk_buff *skb) return !skb->head_frag || skb_cloned(skb); } -/** - * skb_gso_network_seglen - Return length of individual segments of a gso packet - * - * @skb: GSO skb - * - * skb_gso_network_seglen is used to determine the real size of the - * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP). - * - * The MAC/L2 header is not accounted for. - */ -static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb) -{ - unsigned int hdr_len = skb_transport_header(skb) - - skb_network_header(skb); - return hdr_len + skb_gso_transport_seglen(skb); -} - -/** - * skb_gso_mac_seglen - Return length of individual segments of a gso packet - * - * @skb: GSO skb - * - * skb_gso_mac_seglen is used to determine the real size of the - * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4 - * headers (TCP/UDP). - */ -static inline unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) -{ - unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); - return hdr_len + skb_gso_transport_seglen(skb); -} - /* Local Checksum Offload. * Compute outer checksum based on the assumption that the * inner checksum will be offloaded later. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b637670..0bb0d88 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4891,7 +4891,7 @@ EXPORT_SYMBOL_GPL(skb_scrub_packet); * * The MAC/L2 or network (IP, IPv6) headers are not accounted for. */ -unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) +static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) { const struct skb_shared_info *shinfo = skb_shinfo(skb); unsigned int thlen = 0; @@ -4913,7 +4913,40 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) */ return thlen + shinfo->gso_size; } -EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); + +/** + * skb_gso_network_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_network_seglen is used to determine the real size of the + * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP). + * + * The MAC/L2 header is not accounted for. + */ +static unsigned int skb_gso_network_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - + skb_network_header(skb); + + return hdr_len + skb_gso_transport_seglen(skb); +} + +/** + * skb_gso_mac_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_mac_seglen is used to determine the real size of the + * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4 + * headers (TCP/UDP). + */ +static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + + return hdr_len + skb_gso_transport_seglen(skb); +} /** * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS -- cgit v1.1 From 661e50bc853209e41a5c14a290ca4decc43cbfd1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Mar 2018 14:54:11 -0800 Subject: Linux 4.16-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 541f0c5..c4322de 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.1 From 0a8a1bf17e3af34f1f8d2368916a6327f8b3bfd5 Mon Sep 17 00:00:00 2001 From: Shalom Toledo Date: Thu, 1 Mar 2018 11:37:05 +0100 Subject: mlxsw: spectrum_switchdev: Check success of FDB add operation Until now, we assumed that in case of error when adding FDB entries, the write operation will fail, but this is not the case. Instead, we need to check that the number of entries reported in the response is equal to the number of entries specified in the request. Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC") Reported-by: Ido Schimmel Signed-off-by: Shalom Toledo Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 29 ++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 593ad31..161bcdc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1203,6 +1203,7 @@ static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, bool dynamic) { char *sfd_pl; + u8 num_rec; int err; sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); @@ -1212,9 +1213,16 @@ static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), mac, fid, action, local_port); + num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); - kfree(sfd_pl); + if (err) + goto out; + + if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl)) + err = -EBUSY; +out: + kfree(sfd_pl); return err; } @@ -1239,6 +1247,7 @@ static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, bool adding, bool dynamic) { char *sfd_pl; + u8 num_rec; int err; sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); @@ -1249,9 +1258,16 @@ static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, mlxsw_reg_sfd_uc_lag_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP, lag_vid, lag_id); + num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); - kfree(sfd_pl); + if (err) + goto out; + + if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl)) + err = -EBUSY; +out: + kfree(sfd_pl); return err; } @@ -1296,6 +1312,7 @@ static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr, u16 fid, u16 mid_idx, bool adding) { char *sfd_pl; + u8 num_rec; int err; sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); @@ -1305,7 +1322,15 @@ static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr, mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); mlxsw_reg_sfd_mc_pack(sfd_pl, 0, addr, fid, MLXSW_REG_SFD_REC_ACTION_NOP, mid_idx); + num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); + if (err) + goto out; + + if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl)) + err = -EBUSY; + +out: kfree(sfd_pl); return err; } -- cgit v1.1 From 79f3a8e662c1ae6e85737eca9ae7d6b52cf87815 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 2 Mar 2018 14:44:39 +0100 Subject: tc-testing: skbmod: fix match value of ethertype iproute2 print_skbmod() prints the configured ethertype using format 0x%X: therefore, test 9aa8 systematically fails, because it configures action #4 using ethertype 0x0031, and expects 0x0031 when it reads it back. Changing the expected value to 0x31 lets the test result 'not ok' become 'ok'. tested with: # ./tdc.py -e 9aa8 Test 9aa8: Get a single skbmod action from a list All test results: 1..1 ok 1 9aa8 Get a single skbmod action from a list Fixes: cf797ac49b94 ("tc-testing: Add test cases for police and skbmod") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json index e3407505..90bba48 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json @@ -315,7 +315,7 @@ "cmdUnderTest": "$TC actions ls action skbmod", "expExitCode": "0", "verifyCmd": "$TC actions get action skbmod index 4", - "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x0031", + "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x31", "matchCount": "1", "teardown": [ "$TC actions flush action skbmod" -- cgit v1.1 From 77f840e3e5f09c6d7d727e85e6e08276dd813d11 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 2 Mar 2018 18:41:16 +0100 Subject: ppp: prevent unregistered channels from connecting to PPP units PPP units don't hold any reference on the channels connected to it. It is the channel's responsibility to ensure that it disconnects from its unit before being destroyed. In practice, this is ensured by ppp_unregister_channel() disconnecting the channel from the unit before dropping a reference on the channel. However, it is possible for an unregistered channel to connect to a PPP unit: register a channel with ppp_register_net_channel(), attach a /dev/ppp file to it with ioctl(PPPIOCATTCHAN), unregister the channel with ppp_unregister_channel() and finally connect the /dev/ppp file to a PPP unit with ioctl(PPPIOCCONNECT). Once in this situation, the channel is only held by the /dev/ppp file, which can be released at anytime and free the channel without letting the parent PPP unit know. Then the ppp structure ends up with dangling pointers in its ->channels list. Prevent this scenario by forbidding unregistered channels from connecting to PPP units. This maintains the code logic by keeping ppp_unregister_channel() responsible from disconnecting the channel if necessary and avoids modification on the reference counting mechanism. This issue seems to predate git history (successfully reproduced on Linux 2.6.26 and earlier PPP commits are unrelated). Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 255a5de..fa2a9bd 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -3161,6 +3161,15 @@ ppp_connect_channel(struct channel *pch, int unit) goto outl; ppp_lock(ppp); + spin_lock_bh(&pch->downl); + if (!pch->chan) { + /* Don't connect unregistered channels */ + spin_unlock_bh(&pch->downl); + ppp_unlock(ppp); + ret = -ENOTCONN; + goto outl; + } + spin_unlock_bh(&pch->downl); if (pch->file.hdrlen > ppp->file.hdrlen) ppp->file.hdrlen = pch->file.hdrlen; hdrlen = pch->file.hdrlen + 2; /* for protocol bytes */ -- cgit v1.1 From 3cc81a9aac43829d86ebf775c388b42d770bc0ac Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 2 Mar 2018 17:29:14 +0800 Subject: virtio-net: re enable XDP_REDIRECT for mergeable buffer XDP_REDIRECT support for mergeable buffer was removed since commit 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable() case"). This is because we don't reserve enough tailroom for struct skb_shared_info which breaks XDP assumption. So this patch fixes this by reserving enough tailroom and using fixed size of rx buffer. Signed-off-by: Jason Wang Acked-by: Jesper Dangaard Brouer Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 54 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 2d54123..2337460 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -504,6 +504,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, page_off += *len; while (--*num_buf) { + int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); unsigned int buflen; void *buf; int off; @@ -518,7 +519,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, /* guard against a misconfigured or uncooperative backend that * is sending packet larger than the MTU. */ - if ((page_off + buflen) > PAGE_SIZE) { + if ((page_off + buflen + tailroom) > PAGE_SIZE) { put_page(p); goto err_buf; } @@ -690,6 +691,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, unsigned int truesize; unsigned int headroom = mergeable_ctx_to_headroom(ctx); bool sent; + int err; head_skb = NULL; @@ -701,7 +703,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, void *data; u32 act; - /* This happens when rx buffer size is underestimated */ + /* This happens when rx buffer size is underestimated + * or headroom is not enough because of the buffer + * was refilled before XDP is set. This should only + * happen for the first several packets, so we don't + * care much about its performance. + */ if (unlikely(num_buf > 1 || headroom < virtnet_get_headroom(vi))) { /* linearize data for XDP */ @@ -736,9 +743,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, act = bpf_prog_run_xdp(xdp_prog, &xdp); - if (act != XDP_PASS) - ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); - switch (act) { case XDP_PASS: /* recalculate offset to account for any header @@ -770,6 +774,18 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, goto err_xdp; rcu_read_unlock(); goto xdp_xmit; + case XDP_REDIRECT: + err = xdp_do_redirect(dev, &xdp, xdp_prog); + if (err) { + if (unlikely(xdp_page != page)) + put_page(xdp_page); + goto err_xdp; + } + *xdp_xmit = true; + if (unlikely(xdp_page != page)) + goto err_xdp; + rcu_read_unlock(); + goto xdp_xmit; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: @@ -1013,13 +1029,18 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, } static unsigned int get_mergeable_buf_len(struct receive_queue *rq, - struct ewma_pkt_len *avg_pkt_len) + struct ewma_pkt_len *avg_pkt_len, + unsigned int room) { const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); unsigned int len; - len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), + if (room) + return PAGE_SIZE - room; + + len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), rq->min_buf_len, PAGE_SIZE - hdr_len); + return ALIGN(len, L1_CACHE_BYTES); } @@ -1028,21 +1049,27 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, { struct page_frag *alloc_frag = &rq->alloc_frag; unsigned int headroom = virtnet_get_headroom(vi); + unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; + unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); char *buf; void *ctx; int err; unsigned int len, hole; - len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len); - if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp))) + /* Extra tailroom is needed to satisfy XDP's assumption. This + * means rx frags coalescing won't work, but consider we've + * disabled GSO for XDP, it won't be a big issue. + */ + len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); + if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) return -ENOMEM; buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; buf += headroom; /* advance address leaving hole at front of pkt */ get_page(alloc_frag->page); - alloc_frag->offset += len + headroom; + alloc_frag->offset += len + room; hole = alloc_frag->size - alloc_frag->offset; - if (hole < len + headroom) { + if (hole < len + room) { /* To avoid internal fragmentation, if there is very likely not * enough space for another buffer, add the remaining space to * the current buffer. @@ -2578,12 +2605,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, { struct virtnet_info *vi = netdev_priv(queue->dev); unsigned int queue_index = get_netdev_rx_queue_index(queue); + unsigned int headroom = virtnet_get_headroom(vi); + unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; struct ewma_pkt_len *avg; BUG_ON(queue_index >= vi->max_queue_pairs); avg = &vi->rq[queue_index].mrg_avg_pkt_len; return sprintf(buf, "%u\n", - get_mergeable_buf_len(&vi->rq[queue_index], avg)); + get_mergeable_buf_len(&vi->rq[queue_index], avg, + SKB_DATA_ALIGN(headroom + tailroom))); } static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = -- cgit v1.1 From 12f69661a49446840d742d8feb593ace022d9f66 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 2 Mar 2018 13:49:01 -0800 Subject: hv_netvsc: avoid retry on send during shutdown Change the initialization order so that the device is ready to transmit (ie connect vsp is completed) before setting the internal reference to the device with RCU. This avoids any races on initialization and prevents retry issues on shutdown. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 17e529a..686900d 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -852,13 +852,6 @@ int netvsc_send(struct net_device *ndev, if (unlikely(!net_device || net_device->destroy)) return -ENODEV; - /* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get - * here before the negotiation with the host is finished and - * send_section_map may not be allocated yet. - */ - if (unlikely(!net_device->send_section_map)) - return -EAGAIN; - nvchan = &net_device->chan_table[packet->q_idx]; packet->send_buf_index = NETVSC_INVALID_INDEX; packet->cp_partial = false; @@ -866,10 +859,8 @@ int netvsc_send(struct net_device *ndev, /* Send control message directly without accessing msd (Multi-Send * Data) field which may be changed during data packet processing. */ - if (!skb) { - cur_send = packet; - goto send_now; - } + if (!skb) + return netvsc_send_pkt(device, packet, net_device, pb, skb); /* batch packets in send buffer if possible */ msdp = &nvchan->msd; @@ -953,7 +944,6 @@ int netvsc_send(struct net_device *ndev, } } -send_now: if (cur_send) ret = netvsc_send_pkt(device, cur_send, net_device, pb, skb); @@ -1306,11 +1296,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, napi_enable(&net_device->chan_table[0].napi); - /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is - * populated. - */ - rcu_assign_pointer(net_device_ctx->nvdev, net_device); - /* Connect with the NetVsp */ ret = netvsc_connect_vsp(device, net_device, device_info); if (ret != 0) { @@ -1319,6 +1304,11 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, goto close; } + /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is + * populated. + */ + rcu_assign_pointer(net_device_ctx->nvdev, net_device); + return net_device; close: -- cgit v1.1 From f4950e4586dfc957e0a28226eeb992ddc049b5a2 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 2 Mar 2018 13:49:02 -0800 Subject: hv_netvsc: only wake transmit queue if link is up Don't wake transmit queues if link is not up yet. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c5584c2..fa6cf18 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -91,12 +91,11 @@ static int netvsc_open(struct net_device *net) return ret; } - netif_tx_wake_all_queues(net); - rdev = nvdev->extension; - - if (!rdev->link_state) + if (!rdev->link_state) { netif_carrier_on(net); + netif_tx_wake_all_queues(net); + } if (vf_netdev) { /* Setting synthetic device up transparently sets -- cgit v1.1 From fcfb4a00d1e514e8313277a01ef919de1113025b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 2 Mar 2018 13:49:03 -0800 Subject: hv_netvsc: fix error unwind handling if vmbus_open fails Need to delete NAPI association if vmbus_open fails. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 686900d..ff97a85 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1286,7 +1286,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, netvsc_channel_cb, net_device->chan_table); if (ret != 0) { - netif_napi_del(&net_device->chan_table[0].napi); netdev_err(ndev, "unable to open channel: %d\n", ret); goto cleanup; } @@ -1319,6 +1318,7 @@ close: vmbus_close(device->channel); cleanup: + netif_napi_del(&net_device->chan_table[0].napi); free_netvsc_device(&net_device->rcu); return ERR_PTR(ret); -- cgit v1.1 From a7483ec0267c69b34e818738da60b392623da94b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 2 Mar 2018 13:49:04 -0800 Subject: hv_netvsc: cancel subchannel setup before halting device Block setup of multiple channels earlier in the teardown process. This avoids possible races between halt and subchannel initialization. Suggested-by: Haiyang Zhang Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/rndis_filter.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index c3ca191..1cba767 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1340,6 +1340,9 @@ void rndis_filter_device_remove(struct hv_device *dev, { struct rndis_device *rndis_dev = net_dev->extension; + /* Don't try and setup sub channels if about to halt */ + cancel_work_sync(&net_dev->subchan_work); + /* Halt and release the rndis device */ rndis_filter_halt_device(rndis_dev); -- cgit v1.1 From d64e38ae690e3337db0d38d9b149a193a1646c4b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 2 Mar 2018 13:49:05 -0800 Subject: hv_netvsc: fix race in napi poll when rescheduling There is a race between napi_reschedule and re-enabling interrupts which could lead to missed host interrrupts. This occurs when interrupts are re-enabled (hv_end_read) and vmbus irq callback (netvsc_channel_cb) has already scheduled NAPI. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index ff97a85..4237ced 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1207,9 +1207,10 @@ int netvsc_poll(struct napi_struct *napi, int budget) if (send_recv_completions(ndev, net_device, nvchan) == 0 && work_done < budget && napi_complete_done(napi, work_done) && - hv_end_read(&channel->inbound)) { + hv_end_read(&channel->inbound) && + napi_schedule_prep(napi)) { hv_begin_read(&channel->inbound); - napi_reschedule(napi); + __napi_schedule(napi); } /* Driver may overshoot since multiple packets per descriptor */ -- cgit v1.1 From 68633edaef655ce94e51088ecef5dd4e1d2f6f34 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 2 Mar 2018 13:49:06 -0800 Subject: hv_netvsc: use napi_schedule_irqoff Since the netvsc_channel_cb is already called in interrupt context from vmbus, there is no need to do irqsave/restore. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 4237ced..0265d70 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1233,7 +1233,7 @@ void netvsc_channel_cb(void *context) /* disable interupts from host */ hv_begin_read(rbi); - __napi_schedule(&nvchan->napi); + __napi_schedule_irqoff(&nvchan->napi); } } -- cgit v1.1 From b3bf5666a51068ad5ddd89a76ed877101ef3bc16 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 2 Mar 2018 13:49:07 -0800 Subject: hv_netvsc: defer queue selection to VF When VF is used for accelerated networking it will likely have more queues (and different policy) than the synthetic NIC. This patch defers the queue policy to the VF so that all the queues can be used. This impacts workloads like local generate UDP. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index fa6cf18..5299cfb 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -298,8 +298,19 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, rcu_read_lock(); vf_netdev = rcu_dereference(ndc->vf_netdev); if (vf_netdev) { - txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; - qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; + const struct net_device_ops *vf_ops = vf_netdev->netdev_ops; + + if (vf_ops->ndo_select_queue) + txq = vf_ops->ndo_select_queue(vf_netdev, skb, + accel_priv, fallback); + else + txq = fallback(vf_netdev, skb); + + /* Record the queue selected by VF so that it can be + * used for common case where VF has more queues than + * the synthetic device. + */ + qdisc_skb_cb(skb)->slave_dev_queue_mapping = txq; } else { txq = netvsc_pick_tx(ndev, skb); } -- cgit v1.1 From 009f766ca2383d8788acd65c2c36c51bbfb19470 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 2 Mar 2018 13:49:08 -0800 Subject: hv_netvsc: filter multicast/broadcast The netvsc driver was always enabling all multicast and broadcast even if netdevice flag had not enabled it. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/rndis_filter.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 1cba767..8927c48 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -854,15 +854,19 @@ static void rndis_set_multicast(struct work_struct *w) { struct rndis_device *rdev = container_of(w, struct rndis_device, mcast_work); + u32 filter = NDIS_PACKET_TYPE_DIRECTED; + unsigned int flags = rdev->ndev->flags; - if (rdev->ndev->flags & IFF_PROMISC) - rndis_filter_set_packet_filter(rdev, - NDIS_PACKET_TYPE_PROMISCUOUS); - else - rndis_filter_set_packet_filter(rdev, - NDIS_PACKET_TYPE_BROADCAST | - NDIS_PACKET_TYPE_ALL_MULTICAST | - NDIS_PACKET_TYPE_DIRECTED); + if (flags & IFF_PROMISC) { + filter = NDIS_PACKET_TYPE_PROMISCUOUS; + } else { + if (flags & IFF_ALLMULTI) + flags |= NDIS_PACKET_TYPE_ALL_MULTICAST; + if (flags & IFF_BROADCAST) + flags |= NDIS_PACKET_TYPE_BROADCAST; + } + + rndis_filter_set_packet_filter(rdev, filter); } void rndis_filter_update(struct netvsc_device *nvdev) -- cgit v1.1 From bee9d41b37ea6b1f860e5bc0989cf1cf1d7e6ab3 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 2 Mar 2018 13:49:09 -0800 Subject: hv_netvsc: propagate rx filters to VF The netvsc device should propagate filters to the SR-IOV VF device (if present). The flags also need to be propagated to the VF device as well. This only really matters on local Hyper-V since Azure does not support multiple addresses. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5299cfb..cdb78ee 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -66,10 +66,36 @@ static int debug = -1; module_param(debug, int, S_IRUGO); MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); -static void netvsc_set_multicast_list(struct net_device *net) +static void netvsc_change_rx_flags(struct net_device *net, int change) { - struct net_device_context *net_device_ctx = netdev_priv(net); - struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); + struct net_device_context *ndev_ctx = netdev_priv(net); + struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); + int inc; + + if (!vf_netdev) + return; + + if (change & IFF_PROMISC) { + inc = (net->flags & IFF_PROMISC) ? 1 : -1; + dev_set_promiscuity(vf_netdev, inc); + } + + if (change & IFF_ALLMULTI) { + inc = (net->flags & IFF_ALLMULTI) ? 1 : -1; + dev_set_allmulti(vf_netdev, inc); + } +} + +static void netvsc_set_rx_mode(struct net_device *net) +{ + struct net_device_context *ndev_ctx = netdev_priv(net); + struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); + struct netvsc_device *nvdev = rtnl_dereference(ndev_ctx->nvdev); + + if (vf_netdev) { + dev_uc_sync(vf_netdev, net); + dev_mc_sync(vf_netdev, net); + } rndis_filter_update(nvdev); } @@ -1586,7 +1612,8 @@ static const struct net_device_ops device_ops = { .ndo_open = netvsc_open, .ndo_stop = netvsc_close, .ndo_start_xmit = netvsc_start_xmit, - .ndo_set_rx_mode = netvsc_set_multicast_list, + .ndo_change_rx_flags = netvsc_change_rx_flags, + .ndo_set_rx_mode = netvsc_set_rx_mode, .ndo_change_mtu = netvsc_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = netvsc_set_mac_addr, @@ -1817,6 +1844,11 @@ static void __netvsc_vf_setup(struct net_device *ndev, netdev_warn(vf_netdev, "unable to change mtu to %u\n", ndev->mtu); + /* set multicast etc flags on VF */ + dev_change_flags(vf_netdev, ndev->flags | IFF_SLAVE); + dev_uc_sync(vf_netdev, ndev); + dev_mc_sync(vf_netdev, ndev); + if (netif_running(ndev)) { ret = dev_open(vf_netdev); if (ret) -- cgit v1.1 From 9ac79ba9c77d8595157bbdc4327919f8ee062426 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 12 Feb 2018 14:55:13 +0100 Subject: gpio: rcar: Use wakeup_path i.s.o. explicit clock handling Since commit ab82fa7da4dce5c7 ("gpio: rcar: Prevent module clock disable when wake-up is enabled"), when a GPIO is used for wakeup, the GPIO block's module clock (if exists) is manually kept running during system suspend, to make sure the device stays active. However, this explicit clock handling is merely a workaround for a failure to properly communicate wakeup information to the device core. Instead, set the device's power.wakeup_path field, to indicate this device is part of the wakeup path. Depending on the PM Domain's active_wakeup configuration, the genpd core code will keep the device enabled (and the clock running) during system suspend when needed. This allows for the removal of all explicit clock handling code from the driver. Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Walleij --- drivers/gpio/gpio-rcar.c | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index e76de57..ebaea8b 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -14,7 +14,6 @@ * GNU General Public License for more details. */ -#include #include #include #include @@ -37,10 +36,9 @@ struct gpio_rcar_priv { struct platform_device *pdev; struct gpio_chip gpio_chip; struct irq_chip irq_chip; - struct clk *clk; unsigned int irq_parent; + atomic_t wakeup_path; bool has_both_edge_trigger; - bool needs_clk; }; #define IOINTSEL 0x00 /* General IO/Interrupt Switching Register */ @@ -186,13 +184,10 @@ static int gpio_rcar_irq_set_wake(struct irq_data *d, unsigned int on) } } - if (!p->clk) - return 0; - if (on) - clk_enable(p->clk); + atomic_inc(&p->wakeup_path); else - clk_disable(p->clk); + atomic_dec(&p->wakeup_path); return 0; } @@ -330,17 +325,14 @@ static int gpio_rcar_direction_output(struct gpio_chip *chip, unsigned offset, struct gpio_rcar_info { bool has_both_edge_trigger; - bool needs_clk; }; static const struct gpio_rcar_info gpio_rcar_info_gen1 = { .has_both_edge_trigger = false, - .needs_clk = false, }; static const struct gpio_rcar_info gpio_rcar_info_gen2 = { .has_both_edge_trigger = true, - .needs_clk = true, }; static const struct of_device_id gpio_rcar_of_table[] = { @@ -403,7 +395,6 @@ static int gpio_rcar_parse_dt(struct gpio_rcar_priv *p, unsigned int *npins) ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &args); *npins = ret == 0 ? args.args[2] : RCAR_MAX_GPIO_PER_BANK; p->has_both_edge_trigger = info->has_both_edge_trigger; - p->needs_clk = info->needs_clk; if (*npins == 0 || *npins > RCAR_MAX_GPIO_PER_BANK) { dev_warn(&p->pdev->dev, @@ -440,16 +431,6 @@ static int gpio_rcar_probe(struct platform_device *pdev) platform_set_drvdata(pdev, p); - p->clk = devm_clk_get(dev, NULL); - if (IS_ERR(p->clk)) { - if (p->needs_clk) { - dev_err(dev, "unable to get clock\n"); - ret = PTR_ERR(p->clk); - goto err0; - } - p->clk = NULL; - } - pm_runtime_enable(dev); irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); @@ -531,11 +512,24 @@ static int gpio_rcar_remove(struct platform_device *pdev) return 0; } +static int __maybe_unused gpio_rcar_suspend(struct device *dev) +{ + struct gpio_rcar_priv *p = dev_get_drvdata(dev); + + if (atomic_read(&p->wakeup_path)) + device_set_wakeup_path(dev); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(gpio_rcar_pm_ops, gpio_rcar_suspend, NULL); + static struct platform_driver gpio_rcar_device_driver = { .probe = gpio_rcar_probe, .remove = gpio_rcar_remove, .driver = { .name = "gpio_rcar", + .pm = &gpio_rcar_pm_ops, .of_match_table = of_match_ptr(gpio_rcar_of_table), } }; -- cgit v1.1 From 1a087f032111a88e826877449dfb93ceb22b78b9 Mon Sep 17 00:00:00 2001 From: Xinyong Date: Fri, 2 Mar 2018 19:20:07 +0800 Subject: usb: gadget: f_fs: Fix use-after-free in ffs_fs_kill_sb() When I debug a kernel crash issue in funcitonfs, found ffs_data.ref overflowed, While functionfs is unmounting, ffs_data is put twice. Commit 43938613c6fd ("drivers, usb: convert ffs_data.ref from atomic_t to refcount_t") can avoid refcount overflow, but that is risk some situations. So no need put ffs data in ffs_fs_kill_sb, already put in ffs_data_closed. The issue can be reproduced in Mediatek mt6763 SoC, ffs for ADB device. KASAN enabled configuration reports use-after-free errro. BUG: KASAN: use-after-free in refcount_dec_and_test+0x14/0xe0 at addr ffffffc0579386a0 Read of size 4 by task umount/4650 ==================================================== BUG kmalloc-512 (Tainted: P W O ): kasan: bad access detected ----------------------------------------------------------------------------- INFO: Allocated in ffs_fs_mount+0x194/0x844 age=22856 cpu=2 pid=566 alloc_debug_processing+0x1ac/0x1e8 ___slab_alloc.constprop.63+0x640/0x648 __slab_alloc.isra.57.constprop.62+0x24/0x34 kmem_cache_alloc_trace+0x1a8/0x2bc ffs_fs_mount+0x194/0x844 mount_fs+0x6c/0x1d0 vfs_kern_mount+0x50/0x1b4 do_mount+0x258/0x1034 INFO: Freed in ffs_data_put+0x25c/0x320 age=0 cpu=3 pid=4650 free_debug_processing+0x22c/0x434 __slab_free+0x2d8/0x3a0 kfree+0x254/0x264 ffs_data_put+0x25c/0x320 ffs_data_closed+0x124/0x15c ffs_fs_kill_sb+0xb8/0x110 deactivate_locked_super+0x6c/0x98 deactivate_super+0xb0/0xbc INFO: Object 0xffffffc057938600 @offset=1536 fp=0x (null) ...... Call trace: [] dump_backtrace+0x0/0x250 [] show_stack+0x14/0x1c [] dump_stack+0xa0/0xc8 [] print_trailer+0x158/0x260 [] object_err+0x3c/0x40 [] kasan_report_error+0x2a8/0x754 [] kasan_report+0x5c/0x60 [] __asan_load4+0x70/0x88 [] refcount_dec_and_test+0x14/0xe0 [] ffs_data_put+0x80/0x320 [] ffs_fs_kill_sb+0xc8/0x110 [] deactivate_locked_super+0x6c/0x98 [] deactivate_super+0xb0/0xbc [] cleanup_mnt+0x64/0xec [] __cleanup_mnt+0x10/0x18 [] task_work_run+0xcc/0x124 [] do_notify_resume+0x60/0x70 [] work_pending+0x10/0x14 Cc: stable@vger.kernel.org Signed-off-by: Xinyong Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index c2592d8..d2428a9 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1538,7 +1538,6 @@ ffs_fs_kill_sb(struct super_block *sb) if (sb->s_fs_info) { ffs_release_dev(sb->s_fs_info); ffs_data_closed(sb->s_fs_info); - ffs_data_put(sb->s_fs_info); } } -- cgit v1.1 From 4c437920fa216f66f6a5d469cae2a0360cc2d9c7 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Thu, 1 Mar 2018 11:05:34 +0100 Subject: dt-bindings: usb: fix the STM32F7 DWC2 OTG HS core binding This patch fixes binding documentation for DWC2 controller in HS mode found on STMicroelectronics STM32F7 SoC. The v2 former patch [1] had been acked by Rob Herring, but v1 was merged. [1] https://patchwork.kernel.org/patch/9925575/ Fixes: 000777dadc7e ("dt-bindings: usb: Document the STM32F7xx DWC2 ...") Signed-off-by: Amelie Delaunay Reviewed-by: Rob Herring Signed-off-by: Felipe Balbi --- Documentation/devicetree/bindings/usb/dwc2.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/dwc2.txt b/Documentation/devicetree/bindings/usb/dwc2.txt index e64d903..46da5f1 100644 --- a/Documentation/devicetree/bindings/usb/dwc2.txt +++ b/Documentation/devicetree/bindings/usb/dwc2.txt @@ -19,7 +19,7 @@ Required properties: configured in FS mode; - "st,stm32f4x9-hsotg": The DWC2 USB HS controller instance in STM32F4x9 SoCs configured in HS mode; - - "st,stm32f7xx-hsotg": The DWC2 USB HS controller instance in STM32F7xx SoCs + - "st,stm32f7-hsotg": The DWC2 USB HS controller instance in STM32F7 SoCs configured in HS mode; - reg : Should contain 1 register range (address and length) - interrupts : Should contain 1 interrupt -- cgit v1.1 From 1a149e3554e0324a3d551dfb327bdb67b150a320 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Thu, 1 Mar 2018 11:05:35 +0100 Subject: usb: dwc2: fix STM32F7 USB OTG HS compatible This patch fixes compatible for STM32F7 USB OTG HS and consistently rename dw2_set_params function. The v2 former patch [1] had been acked by Paul Young, but v1 was merged. [1] https://patchwork.kernel.org/patch/9925573/ Fixes: d8fae8b93682 ("usb: dwc2: add support for STM32F7xx USB OTG HS") Signed-off-by: Amelie Delaunay Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/params.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c index 03fd20f..c4a4749 100644 --- a/drivers/usb/dwc2/params.c +++ b/drivers/usb/dwc2/params.c @@ -137,7 +137,7 @@ static void dwc2_set_stm32f4x9_fsotg_params(struct dwc2_hsotg *hsotg) p->activate_stm_fs_transceiver = true; } -static void dwc2_set_stm32f7xx_hsotg_params(struct dwc2_hsotg *hsotg) +static void dwc2_set_stm32f7_hsotg_params(struct dwc2_hsotg *hsotg) { struct dwc2_core_params *p = &hsotg->params; @@ -164,8 +164,8 @@ const struct of_device_id dwc2_of_match_table[] = { { .compatible = "st,stm32f4x9-fsotg", .data = dwc2_set_stm32f4x9_fsotg_params }, { .compatible = "st,stm32f4x9-hsotg" }, - { .compatible = "st,stm32f7xx-hsotg", - .data = dwc2_set_stm32f7xx_hsotg_params }, + { .compatible = "st,stm32f7-hsotg", + .data = dwc2_set_stm32f7_hsotg_params }, {}, }; MODULE_DEVICE_TABLE(of, dwc2_of_match_table); -- cgit v1.1 From 54f02945f703404cdf17c9618316b3d3387fa072 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 27 Feb 2018 17:16:02 +0900 Subject: usb: renesas_usbhs: add binding for r8a77965 This patch adds binding for r8a77965 (R-Car M3-N). Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- Documentation/devicetree/bindings/usb/renesas_usbhs.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt index d060172..43960fa 100644 --- a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt +++ b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt @@ -12,6 +12,7 @@ Required properties: - "renesas,usbhs-r8a7794" for r8a7794 (R-Car E2) compatible device - "renesas,usbhs-r8a7795" for r8a7795 (R-Car H3) compatible device - "renesas,usbhs-r8a7796" for r8a7796 (R-Car M3-W) compatible device + - "renesas,usbhs-r8a77965" for r8a77965 (R-Car M3-N) compatible device - "renesas,usbhs-r8a77995" for r8a77995 (R-Car D3) compatible device - "renesas,usbhs-r7s72100" for r7s72100 (RZ/A1) compatible device - "renesas,rcar-gen2-usbhs" for R-Car Gen2 or RZ/G1 compatible devices -- cgit v1.1 From c6ba5084ce0d00d4a005b0577d9e764d39b638e1 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 27 Feb 2018 17:16:03 +0900 Subject: usb: gadget: udc: renesas_usb3: add binging for r8a77965 This patch adds binding for r8a77965 (R-Car M3-N). Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- Documentation/devicetree/bindings/usb/renesas_usb3.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/usb/renesas_usb3.txt b/Documentation/devicetree/bindings/usb/renesas_usb3.txt index 87a45e2..2c071bb 100644 --- a/Documentation/devicetree/bindings/usb/renesas_usb3.txt +++ b/Documentation/devicetree/bindings/usb/renesas_usb3.txt @@ -4,6 +4,7 @@ Required properties: - compatible: Must contain one of the following: - "renesas,r8a7795-usb3-peri" - "renesas,r8a7796-usb3-peri" + - "renesas,r8a77965-usb3-peri" - "renesas,rcar-gen3-usb3-peri" for a generic R-Car Gen3 compatible device -- cgit v1.1 From 6cfc70c4321bde35cb132831cba4685821e65065 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 1 Mar 2018 10:37:41 +0800 Subject: MIPS: Loongson64: Select ARCH_MIGHT_HAVE_PC_PARPORT Commit a211a0820d3c ("MIPS: Push ARCH_MIGHT_HAVE_PC_PARPORT down to platform level") moves the global MIPS ARCH_MIGHT_HAVE_PC_PARPORT select down to various platforms, but doesn't add it to Loongson64 platforms which need it, so add the selects to these platforms too. Fixes: a211a0820d3c ("MIPS: Push ARCH_MIGHT_HAVE_PC_PARPORT down to platform level") Signed-off-by: Huacai Chen Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/18703/ Signed-off-by: James Hogan --- arch/mips/loongson64/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig index bc2fdbf..12812a8b 100644 --- a/arch/mips/loongson64/Kconfig +++ b/arch/mips/loongson64/Kconfig @@ -7,6 +7,7 @@ choice config LEMOTE_FULOONG2E bool "Lemote Fuloong(2e) mini-PC" select ARCH_SPARSEMEM_ENABLE + select ARCH_MIGHT_HAVE_PC_PARPORT select CEVT_R4K select CSRC_R4K select SYS_HAS_CPU_LOONGSON2E @@ -33,6 +34,7 @@ config LEMOTE_FULOONG2E config LEMOTE_MACH2F bool "Lemote Loongson 2F family machines" select ARCH_SPARSEMEM_ENABLE + select ARCH_MIGHT_HAVE_PC_PARPORT select BOARD_SCACHE select BOOT_ELF32 select CEVT_R4K if ! MIPS_EXTERNAL_TIMER @@ -62,6 +64,7 @@ config LEMOTE_MACH2F config LOONGSON_MACH3X bool "Generic Loongson 3 family machines" select ARCH_SPARSEMEM_ENABLE + select ARCH_MIGHT_HAVE_PC_PARPORT select GENERIC_ISA_DMA_SUPPORT_BROKEN select BOOT_ELF32 select BOARD_SCACHE -- cgit v1.1 From ee2515d95f9a12e04a3863916ae45831438210ce Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 1 Mar 2018 10:37:42 +0800 Subject: MIPS: Loongson64: Select ARCH_MIGHT_HAVE_PC_SERIO Commit 7a407aa5e0d3 ("MIPS: Push ARCH_MIGHT_HAVE_PC_SERIO down to platform level") moves the global MIPS ARCH_MIGHT_HAVE_PC_SERIO select down to various platforms, but doesn't add it to Loongson64 platforms which need it, so add the selects to these platforms too. Fixes: 7a407aa5e0d3 ("MIPS: Push ARCH_MIGHT_HAVE_PC_SERIO down to platform level") Signed-off-by: Huacai Chen Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/18704/ Signed-off-by: James Hogan --- arch/mips/loongson64/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig index 12812a8b..72af0c1 100644 --- a/arch/mips/loongson64/Kconfig +++ b/arch/mips/loongson64/Kconfig @@ -8,6 +8,7 @@ config LEMOTE_FULOONG2E bool "Lemote Fuloong(2e) mini-PC" select ARCH_SPARSEMEM_ENABLE select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO select CEVT_R4K select CSRC_R4K select SYS_HAS_CPU_LOONGSON2E @@ -35,6 +36,7 @@ config LEMOTE_MACH2F bool "Lemote Loongson 2F family machines" select ARCH_SPARSEMEM_ENABLE select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO select BOARD_SCACHE select BOOT_ELF32 select CEVT_R4K if ! MIPS_EXTERNAL_TIMER @@ -65,6 +67,7 @@ config LOONGSON_MACH3X bool "Generic Loongson 3 family machines" select ARCH_SPARSEMEM_ENABLE select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO select GENERIC_ISA_DMA_SUPPORT_BROKEN select BOOT_ELF32 select BOARD_SCACHE -- cgit v1.1 From 14a596a7e6fd9c5baa6b2cfc57962e2c3bda6c69 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 28 Feb 2018 20:17:35 +0100 Subject: fixdep: remove stale references to uml-config.h uml-config.h hasn't existed in this decade (87e299e5c750 - x86, um: get rid of uml-config.h). The few remaining UML_CONFIG instances are defined directly in terms of their real CONFIG symbol in common-offsets.h, so unlike when the symbols got defined via a sed script, anything that uses UML_CONFIG_FOO now should also automatically pick up a dependency on CONFIG_FOO via the normal fixdep mechanism (since common-offsets.h should at least recursively be a dependency). Hence I believe we should actually be able to ignore the HELLO_CONFIG_BOOM cases. Cc: Al Viro Cc: Richard Weinberger Cc: user-mode-linux-devel@lists.sourceforge.net Signed-off-by: Rasmus Villemoes Signed-off-by: Masahiro Yamada --- scripts/basic/fixdep.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c index fa3d39b6..d7fbe54 100644 --- a/scripts/basic/fixdep.c +++ b/scripts/basic/fixdep.c @@ -93,14 +93,6 @@ * (Note: it'd be easy to port over the complete mkdep state machine, * but I don't think the added complexity is worth it) */ -/* - * Note 2: if somebody writes HELLO_CONFIG_BOOM in a file, it will depend onto - * CONFIG_BOOM. This could seem a bug (not too hard to fix), but please do not - * fix it! Some UserModeLinux files (look at arch/um/) call CONFIG_BOOM as - * UML_CONFIG_BOOM, to avoid conflicts with /usr/include/linux/autoconf.h, - * through arch/um/include/uml-config.h; this fixdep "bug" makes sure that - * those files will have correct dependencies. - */ #include #include @@ -286,7 +278,6 @@ static int is_ignored_file(const char *s, int len) { return str_ends_with(s, len, "include/generated/autoconf.h") || str_ends_with(s, len, "include/generated/autoksyms.h") || - str_ends_with(s, len, "arch/um/include/uml-config.h") || str_ends_with(s, len, "include/linux/kconfig.h") || str_ends_with(s, len, ".ver"); } -- cgit v1.1 From 5b8ad96d1a4421ffe417e647a65064aad1e84fb4 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 28 Feb 2018 20:17:36 +0100 Subject: fixdep: remove some false CONFIG_ matches The string CONFIG_ quite often appears after other alphanumerics, meaning that that instance cannot be referencing a Kconfig symbol. Omitting these means make has fewer files to stat() when deciding what needs to be rebuilt - for a defconfig build, this seems to remove about 2% of the (wildcard ...) lines from the .o.cmd files. Signed-off-by: Rasmus Villemoes Signed-off-by: Masahiro Yamada --- scripts/basic/fixdep.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c index d7fbe54..1b21870 100644 --- a/scripts/basic/fixdep.c +++ b/scripts/basic/fixdep.c @@ -225,8 +225,13 @@ static int str_ends_with(const char *s, int slen, const char *sub) static void parse_config_file(const char *p) { const char *q, *r; + const char *start = p; while ((p = strstr(p, "CONFIG_"))) { + if (p > start && (isalnum(p[-1]) || p[-1] == '_')) { + p += 7; + continue; + } p += 7; q = p; while (*q && (isalnum(*q) || *q == '_')) -- cgit v1.1 From 638e69cf2230737655fcb5ee9879c2fab7679187 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 28 Feb 2018 20:17:37 +0100 Subject: fixdep: do not ignore kconfig.h kconfig.h was excluded from consideration by fixdep by 6a5be57f0f00 (fixdep: fix extraneous dependencies) to avoid some false positive hits (1) include/config/.h (2) include/config/h.h (3) include/config/foo.h (1) occurred because kconfig.h contains the string CONFIG_ in a comment. However, since dee81e988674 (fixdep: faster CONFIG_ search), we have a check that the part after CONFIG_ is non-empty, so this does not happen anymore (and CONFIG_ appears by itself elsewhere, so that check is worthwhile). (2) comes from the include guard, __LINUX_KCONFIG_H. But with the previous patch, we no longer match that either. That leaves (3), which amounts to one [1] false dependency (aka stat() call done by make), which I think we can live with: We've already had one case [2] where the lack of include/linux/kconfig.h in the .o.cmd file caused a missing rebuild, and while I originally thought we should just put kconfig.h in the dependency list without parsing it for the CONFIG_ pattern, we actually do have some real CONFIG_ symbols mentioned in it, and one can imagine some translation unit that just does '#ifdef __BIG_ENDIAN' but doesn't through some other header actually depend on CONFIG_CPU_BIG_ENDIAN - so changing the target endianness could end up rebuilding the world, minus that small TU. Quoting Linus, ... when missing dependencies cause a missed re-compile, the resulting bugs can be _really_ subtle. [1] well, two, we now also have CONFIG_BOOGER/booger.h - we could change that to FOO if we care [2] https://lkml.org/lkml/2018/2/22/838 Cc: Linus Torvalds Signed-off-by: Rasmus Villemoes Signed-off-by: Masahiro Yamada --- scripts/basic/fixdep.c | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c index 1b21870..449b68c 100644 --- a/scripts/basic/fixdep.c +++ b/scripts/basic/fixdep.c @@ -283,7 +283,6 @@ static int is_ignored_file(const char *s, int len) { return str_ends_with(s, len, "include/generated/autoconf.h") || str_ends_with(s, len, "include/generated/autoksyms.h") || - str_ends_with(s, len, "include/linux/kconfig.h") || str_ends_with(s, len, ".ver"); } -- cgit v1.1 From f6d3f35e006496c282ccbb67494d90b04f6cba10 Mon Sep 17 00:00:00 2001 From: Sangwon Hong Date: Mon, 12 Feb 2018 04:37:44 +0900 Subject: perf kallsyms: Fix the usage on the man page First, all man pages highlight only perf and subcommands except 'perf kallsyms', which includes the full usage. Fix it for commands to monopolize underlines. Second, options can be ommited when executing 'perf kallsyms', so add square brackets between