From 747e1f60470b975363cbbfcde0c41a3166391be5 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 13 Sep 2017 18:21:38 +0200 Subject: spi: armada-3700: Fix failing commands with quad-SPI A3700 SPI controller datasheet states that only the first line (IO0) is used to receive and send instructions, addresses and dummy bytes, unless for addresses during an RX operation in a quad SPI configuration (see p.821 of the Armada-3720-DB datasheet). Otherwise, some commands such as SPI NOR commands like READ_FROM_CACHE_DUAL_IO(0xeb) and READ_FROM_CACHE_DUAL_IO(0xbb) will fail because these commands must send address bytes through the four pins. Data transfer always use the four bytes with this setup. Thus, in quad SPI configuration, the A3700_SPI_ADDR_PIN bit must be set only in this case to inform the controller that it must use the number of pins indicated in the {A3700_SPI_DATA_PIN1,A3700_SPI_DATA_PIN0} field during the address cycles of an RX operation. Suggested-by: Ken Ma Signed-off-by: Miquel Raynal Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-armada-3700.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index 6c7d7a4..a28702b 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -161,7 +161,7 @@ static void a3700_spi_deactivate_cs(struct a3700_spi *a3700_spi, } static int a3700_spi_pin_mode_set(struct a3700_spi *a3700_spi, - unsigned int pin_mode) + unsigned int pin_mode, bool receiving) { u32 val; @@ -177,6 +177,9 @@ static int a3700_spi_pin_mode_set(struct a3700_spi *a3700_spi, break; case SPI_NBITS_QUAD: val |= A3700_SPI_DATA_PIN1; + /* RX during address reception uses 4-pin */ + if (receiving) + val |= A3700_SPI_ADDR_PIN; break; default: dev_err(&a3700_spi->master->dev, "wrong pin mode %u", pin_mode); @@ -653,7 +656,7 @@ static int a3700_spi_transfer_one(struct spi_master *master, else if (xfer->rx_buf) nbits = xfer->rx_nbits; - a3700_spi_pin_mode_set(a3700_spi, nbits); + a3700_spi_pin_mode_set(a3700_spi, nbits, xfer->rx_buf ? true : false); if (xfer->rx_buf) { /* Set read data length */ -- cgit v1.1 From 6fd6fd68c9e2f3a206a098ef57b1d5548f9d00d1 Mon Sep 17 00:00:00 2001 From: Zachary Zhang Date: Wed, 13 Sep 2017 18:21:39 +0200 Subject: spi: armada-3700: Fix padding when sending not 4-byte aligned data In 4-byte transfer mode, extra padding/dummy bytes '0xff' would be sent in write operation if TX data is not 4-byte aligned since the SPI data register is always shifted out as whole 4 bytes. Fix this by using the header count feature that allows to transfer 0 to 4 bytes. Use it to actually send the first 1 to 3 bytes of data before the rest of the buffer that will hence be 4-byte aligned. Signed-off-by: Zachary Zhang Signed-off-by: Miquel Raynal Signed-off-by: Mark Brown --- drivers/spi/spi-armada-3700.c | 135 +++++++++++++----------------------------- 1 file changed, 41 insertions(+), 94 deletions(-) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index a28702b..9172cb2 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -99,11 +99,6 @@ /* A3700_SPI_IF_TIME_REG */ #define A3700_SPI_CLK_CAPT_EDGE BIT(7) -/* Flags and macros for struct a3700_spi */ -#define A3700_INSTR_CNT 1 -#define A3700_ADDR_CNT 3 -#define A3700_DUMMY_CNT 1 - struct a3700_spi { struct spi_master *master; void __iomem *base; @@ -117,9 +112,6 @@ struct a3700_spi { u8 byte_len; u32 wait_mask; struct completion done; - u32 addr_cnt; - u32 instr_cnt; - size_t hdr_cnt; }; static u32 spireg_read(struct a3700_spi *a3700_spi, u32 offset) @@ -449,59 +441,43 @@ static void a3700_spi_set_cs(struct spi_device *spi, bool enable) static void a3700_spi_header_set(struct a3700_spi *a3700_spi) { - u32 instr_cnt = 0, addr_cnt = 0, dummy_cnt = 0; + unsigned int addr_cnt; u32 val = 0; /* Clear the header registers */ spireg_write(a3700_spi, A3700_SPI_IF_INST_REG, 0); spireg_write(a3700_spi, A3700_SPI_IF_ADDR_REG, 0); spireg_write(a3700_spi, A3700_SPI_IF_RMODE_REG, 0); + spireg_write(a3700_spi, A3700_SPI_IF_HDR_CNT_REG, 0); /* Set header counters */ if (a3700_spi->tx_buf) { - if (a3700_spi->buf_len <= a3700_spi->instr_cnt) { - instr_cnt = a3700_spi->buf_len; - } else if (a3700_spi->buf_len <= (a3700_spi->instr_cnt + - a3700_spi->addr_cnt)) { - instr_cnt = a3700_spi->instr_cnt; - addr_cnt = a3700_spi->buf_len - instr_cnt; - } else if (a3700_spi->buf_len <= a3700_spi->hdr_cnt) { - instr_cnt = a3700_spi->instr_cnt; - addr_cnt = a3700_spi->addr_cnt; - /* Need to handle the normal write case with 1 byte - * data - */ - if (!a3700_spi->tx_buf[instr_cnt + addr_cnt]) - dummy_cnt = a3700_spi->buf_len - instr_cnt - - addr_cnt; + /* + * when tx data is not 4 bytes aligned, there will be unexpected + * bytes out of SPI output register, since it always shifts out + * as whole 4 bytes. This might cause incorrect transaction with + * some devices. To avoid that, use SPI header count feature to + * transfer up to 3 bytes of data first, and then make the rest + * of data 4-byte aligned. + */ + addr_cnt = a3700_spi->buf_len % 4; + if (addr_cnt) { + val = (addr_cnt & A3700_SPI_ADDR_CNT_MASK) + << A3700_SPI_ADDR_CNT_BIT; + spireg_write(a3700_spi, A3700_SPI_IF_HDR_CNT_REG, val); + + /* Update the buffer length to be transferred */ + a3700_spi->buf_len -= addr_cnt; + + /* transfer 1~3 bytes through address count */ + val = 0; + while (addr_cnt--) { + val = (val << 8) | a3700_spi->tx_buf[0]; + a3700_spi->tx_buf++; + } + spireg_write(a3700_spi, A3700_SPI_IF_ADDR_REG, val); } - val |= ((instr_cnt & A3700_SPI_INSTR_CNT_MASK) - << A3700_SPI_INSTR_CNT_BIT); - val |= ((addr_cnt & A3700_SPI_ADDR_CNT_MASK) - << A3700_SPI_ADDR_CNT_BIT); - val |= ((dummy_cnt & A3700_SPI_DUMMY_CNT_MASK) - << A3700_SPI_DUMMY_CNT_BIT); } - spireg_write(a3700_spi, A3700_SPI_IF_HDR_CNT_REG, val); - - /* Update the buffer length to be transferred */ - a3700_spi->buf_len -= (instr_cnt + addr_cnt + dummy_cnt); - - /* Set Instruction */ - val = 0; - while (instr_cnt--) { - val = (val << 8) | a3700_spi->tx_buf[0]; - a3700_spi->tx_buf++; - } - spireg_write(a3700_spi, A3700_SPI_IF_INST_REG, val); - - /* Set Address */ - val = 0; - while (addr_cnt--) { - val = (val << 8) | a3700_spi->tx_buf[0]; - a3700_spi->tx_buf++; - } - spireg_write(a3700_spi, A3700_SPI_IF_ADDR_REG, val); } static int a3700_is_wfifo_full(struct a3700_spi *a3700_spi) @@ -515,35 +491,12 @@ static int a3700_is_wfifo_full(struct a3700_spi *a3700_spi) static int a3700_spi_fifo_write(struct a3700_spi *a3700_spi) { u32 val; - int i = 0; while (!a3700_is_wfifo_full(a3700_spi) && a3700_spi->buf_len) { - val = 0; - if (a3700_spi->buf_len >= 4) { - val = cpu_to_le32(*(u32 *)a3700_spi->tx_buf); - spireg_write(a3700_spi, A3700_SPI_DATA_OUT_REG, val); - - a3700_spi->buf_len -= 4; - a3700_spi->tx_buf += 4; - } else { - /* - * If the remained buffer length is less than 4-bytes, - * we should pad the write buffer with all ones. So that - * it avoids overwrite the unexpected bytes following - * the last one. - */ - val = GENMASK(31, 0); - while (a3700_spi->buf_len) { - val &= ~(0xff << (8 * i)); - val |= *a3700_spi->tx_buf++ << (8 * i); - i++; - a3700_spi->buf_len--; - - spireg_write(a3700_spi, A3700_SPI_DATA_OUT_REG, - val); - } - break; - } + val = cpu_to_le32(*(u32 *)a3700_spi->tx_buf); + spireg_write(a3700_spi, A3700_SPI_DATA_OUT_REG, val); + a3700_spi->buf_len -= 4; + a3700_spi->tx_buf += 4; } return 0; @@ -648,9 +601,6 @@ static int a3700_spi_transfer_one(struct spi_master *master, a3700_spi->rx_buf = xfer->rx_buf; a3700_spi->buf_len = xfer->len; - /* SPI transfer headers */ - a3700_spi_header_set(a3700_spi); - if (xfer->tx_buf) nbits = xfer->tx_nbits; else if (xfer->rx_buf) @@ -658,6 +608,12 @@ static int a3700_spi_transfer_one(struct spi_master *master, a3700_spi_pin_mode_set(a3700_spi, nbits, xfer->rx_buf ? true : false); + /* Flush the FIFOs */ + a3700_spi_fifo_flush(a3700_spi); + + /* Transfer first bytes of data when buffer is not 4-byte aligned */ + a3700_spi_header_set(a3700_spi); + if (xfer->rx_buf) { /* Set read data length */ spireg_write(a3700_spi, A3700_SPI_IF_DIN_CNT_REG, @@ -736,16 +692,11 @@ static int a3700_spi_transfer_one(struct spi_master *master, dev_err(&spi->dev, "wait wfifo empty timed out\n"); return -ETIMEDOUT; } - } else { - /* - * If the instruction in SPI_INSTR does not require data - * to be written to the SPI device, wait until SPI_RDY - * is 1 for the SPI interface to be in idle. - */ - if (!a3700_spi_transfer_wait(spi, A3700_SPI_XFER_RDY)) { - dev_err(&spi->dev, "wait xfer ready timed out\n"); - return -ETIMEDOUT; - } + } + + if (!a3700_spi_transfer_wait(spi, A3700_SPI_XFER_RDY)) { + dev_err(&spi->dev, "wait xfer ready timed out\n"); + return -ETIMEDOUT; } val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG); @@ -837,10 +788,6 @@ static int a3700_spi_probe(struct platform_device *pdev) memset(spi, 0, sizeof(struct a3700_spi)); spi->master = master; - spi->instr_cnt = A3700_INSTR_CNT; - spi->addr_cnt = A3700_ADDR_CNT; - spi->hdr_cnt = A3700_INSTR_CNT + A3700_ADDR_CNT + - A3700_DUMMY_CNT; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); spi->base = devm_ioremap_resource(dev, res); -- cgit v1.1 From 8b5d729a3a8a07fe273af266e90bc52114dd69a6 Mon Sep 17 00:00:00 2001 From: Christos Gkekas Date: Sun, 10 Sep 2017 14:55:29 +0100 Subject: spi: stm32: Fix logical error in stm32_spi_prepare_mbr() stm32_spi_prepare_mbr() is returning an error value when div is less than SPI_MBR_DIV_MIN *and* greater than SPI_MBR_DIV_MAX, which always evaluates to false. This should change to use *or*. Signed-off-by: Christos Gkekas Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 680cdf5..ba9743f 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -263,8 +263,8 @@ static int stm32_spi_prepare_mbr(struct stm32_spi *spi, u32 speed_hz) * no need to check it there. * However, we need to ensure the following calculations. */ - if ((div < SPI_MBR_DIV_MIN) && - (div > SPI_MBR_DIV_MAX)) + if (div < SPI_MBR_DIV_MIN || + div > SPI_MBR_DIV_MAX) return -EINVAL; /* Determine the first power of 2 greater than or equal to div */ -- cgit v1.1 From a2b4a79b88b24c49d98d45a06a014ffd22ada1a4 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sun, 10 Sep 2017 20:29:45 +0300 Subject: spi: uapi: spidev: add missing ioctl header The SPI_IOC_MESSAGE() macro references _IOC_SIZEBITS. Add linux/ioctl.h to make sure this macro is defined. This fixes the following build failure of lcdproc with the musl libc: In file included from .../sysroot/usr/include/sys/ioctl.h:7:0, from hd44780-spi.c:31: hd44780-spi.c: In function 'spi_transfer': hd44780-spi.c:89:24: error: '_IOC_SIZEBITS' undeclared (first use in this function) status = ioctl(p->fd, SPI_IOC_MESSAGE(1), &xfer); ^ Signed-off-by: Baruch Siach Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- include/uapi/linux/spi/spidev.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/spi/spidev.h b/include/uapi/linux/spi/spidev.h index dd5f21e..856de39 100644 --- a/include/uapi/linux/spi/spidev.h +++ b/include/uapi/linux/spi/spidev.h @@ -23,6 +23,7 @@ #define SPIDEV_H #include +#include /* User space versions of kernel symbols for SPI clocking modes, * matching -- cgit v1.1 From eb35279dd7c7834d6320edf24e1b9786d31e4899 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Wed, 24 May 2017 22:52:29 -0700 Subject: iio: proximity: as3935: noise detection + threshold changes Most applications are too noisy to allow the default noise and watchdog settings, and thus need to be configurable via DT properties. Also default settings to POR defaults on a reset, and register distuber interrupts as noise since it prevents proper usage. Cc: devicetree@vger.kernel.org Signed-off-by: Matt Ranostay Acked-by: Rob Herring Signed-off-by: Jonathan Cameron --- .../ABI/testing/sysfs-bus-iio-proximity-as3935 | 8 ++++ .../devicetree/bindings/iio/proximity/as3935.txt | 5 +++ drivers/iio/proximity/as3935.c | 43 ++++++++++++++++++++-- 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 b/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 index 33e96f7..147d4e8 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 +++ b/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 @@ -14,3 +14,11 @@ Description: Show or set the gain boost of the amp, from 0-31 range. 18 = indoors (default) 14 = outdoors + +What /sys/bus/iio/devices/iio:deviceX/noise_level_tripped +Date: May 2017 +KernelVersion: 4.13 +Contact: Matt Ranostay +Description: + When 1 the noise level is over the trip level and not reporting + valid data diff --git a/Documentation/devicetree/bindings/iio/proximity/as3935.txt b/Documentation/devicetree/bindings/iio/proximity/as3935.txt index 38d7431..b6c1afa 100644 --- a/Documentation/devicetree/bindings/iio/proximity/as3935.txt +++ b/Documentation/devicetree/bindings/iio/proximity/as3935.txt @@ -16,6 +16,10 @@ Optional properties: - ams,tuning-capacitor-pf: Calibration tuning capacitor stepping value 0 - 120pF. This will require using the calibration data from the manufacturer. + - ams,nflwdth: Set the noise and watchdog threshold register on + startup. This will need to set according to the noise from the + MCU board, and possibly the local environment. Refer to the + datasheet for the threshold settings. Example: @@ -27,4 +31,5 @@ as3935@0 { interrupt-parent = <&gpio1>; interrupts = <16 1>; ams,tuning-capacitor-pf = <80>; + ams,nflwdth = <0x44>; }; diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c index 0eeff29..4a48b7b 100644 --- a/drivers/iio/proximity/as3935.c +++ b/drivers/iio/proximity/as3935.c @@ -39,8 +39,12 @@ #define AS3935_AFE_GAIN_MAX 0x1F #define AS3935_AFE_PWR_BIT BIT(0) +#define AS3935_NFLWDTH 0x01 +#define AS3935_NFLWDTH_MASK 0x7f + #define AS3935_INT 0x03 #define AS3935_INT_MASK 0x0f +#define AS3935_DISTURB_INT BIT(2) #define AS3935_EVENT_INT BIT(3) #define AS3935_NOISE_INT BIT(0) @@ -48,6 +52,7 @@ #define AS3935_DATA_MASK 0x3F #define AS3935_TUNE_CAP 0x08 +#define AS3935_DEFAULTS 0x3C #define AS3935_CALIBRATE 0x3D #define AS3935_READ_DATA BIT(14) @@ -62,7 +67,9 @@ struct as3935_state { struct mutex lock; struct delayed_work work; + unsigned long noise_tripped; u32 tune_cap; + u32 nflwdth_reg; u8 buffer[16]; /* 8-bit data + 56-bit padding + 64-bit timestamp */ u8 buf[2] ____cacheline_aligned; }; @@ -145,12 +152,29 @@ static ssize_t as3935_sensor_sensitivity_store(struct device *dev, return len; } +static ssize_t as3935_noise_level_tripped_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct as3935_state *st = iio_priv(dev_to_iio_dev(dev)); + int ret; + + mutex_lock(&st->lock); + ret = sprintf(buf, "%d\n", !time_after(jiffies, st->noise_tripped + HZ)); + mutex_unlock(&st->lock); + + return ret; +} + static IIO_DEVICE_ATTR(sensor_sensitivity, S_IRUGO | S_IWUSR, as3935_sensor_sensitivity_show, as3935_sensor_sensitivity_store, 0); +static IIO_DEVICE_ATTR(noise_level_tripped, S_IRUGO, + as3935_noise_level_tripped_show, NULL, 0); static struct attribute *as3935_attributes[] = { &iio_dev_attr_sensor_sensitivity.dev_attr.attr, + &iio_dev_attr_noise_level_tripped.dev_attr.attr, NULL, }; @@ -246,7 +270,11 @@ static void as3935_event_work(struct work_struct *work) case AS3935_EVENT_INT: iio_trigger_poll_chained(st->trig); break; + case AS3935_DISTURB_INT: case AS3935_NOISE_INT: + mutex_lock(&st->lock); + st->noise_tripped = jiffies; + mutex_unlock(&st->lock); dev_warn(&st->spi->dev, "noise level is too high\n"); break; } @@ -269,15 +297,14 @@ static irqreturn_t as3935_interrupt_handler(int irq, void *private) static void calibrate_as3935(struct as3935_state *st) { - /* mask disturber interrupt bit */ - as3935_write(st, AS3935_INT, BIT(5)); - + as3935_write(st, AS3935_DEFAULTS, 0x96); as3935_write(st, AS3935_CALIBRATE, 0x96); as3935_write(st, AS3935_TUNE_CAP, BIT(5) | (st->tune_cap / TUNE_CAP_DIV)); mdelay(2); as3935_write(st, AS3935_TUNE_CAP, (st->tune_cap / TUNE_CAP_DIV)); + as3935_write(st, AS3935_NFLWDTH, st->nflwdth_reg); } #ifdef CONFIG_PM_SLEEP @@ -370,6 +397,15 @@ static int as3935_probe(struct spi_device *spi) return -EINVAL; } + ret = of_property_read_u32(np, + "ams,nflwdth", &st->nflwdth_reg); + if (!ret && st->nflwdth_reg > AS3935_NFLWDTH_MASK) { + dev_err(&spi->dev, + "invalid nflwdth setting of %d\n", + st->nflwdth_reg); + return -EINVAL; + } + indio_dev->dev.parent = &spi->dev; indio_dev->name = spi_get_device_id(spi)->name; indio_dev->channels = as3935_channels; @@ -384,6 +420,7 @@ static int as3935_probe(struct spi_device *spi) return -ENOMEM; st->trig = trig; + st->noise_tripped = jiffies - HZ; trig->dev.parent = indio_dev->dev.parent; iio_trigger_set_drvdata(trig, indio_dev); trig->ops = &iio_interrupt_trigger_ops; -- cgit v1.1 From f61dfff2f5b9fcb087bf5c444bc44b444709588f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Jul 2017 10:14:59 +0200 Subject: iio: pressure: zpa2326: Remove always-true check which confuses gcc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With gcc 4.1.2: drivers/iio/pressure/zpa2326.c: In function ‘zpa2326_wait_oneshot_completion’: drivers/iio/pressure/zpa2326.c:868: warning: ‘ret’ may be used uninitialized in this function When testing for "timeout < 0", timeout is already guaranteed to be strict negative, so the branch is always taken, and ret is thus always initialized. But (some version of) gcc is not smart enough to notice. Remove the check to fix this. As there is no other code in between assigning the error codes and returning them, the error codes can be returned immediately, and the intermediate variable can be dropped. Drop the "else" to please checkpatch. Fixes: e7215fe4d51e69c9 ("iio: pressure: zpa2326: report interrupted case as failure") Signed-off-by: Geert Uytterhoeven Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/zpa2326.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/iio/pressure/zpa2326.c b/drivers/iio/pressure/zpa2326.c index ebfb1de..9143145 100644 --- a/drivers/iio/pressure/zpa2326.c +++ b/drivers/iio/pressure/zpa2326.c @@ -865,7 +865,6 @@ complete: static int zpa2326_wait_oneshot_completion(const struct iio_dev *indio_dev, struct zpa2326_private *private) { - int ret; unsigned int val; long timeout; @@ -887,14 +886,11 @@ static int zpa2326_wait_oneshot_completion(const struct iio_dev *indio_dev, /* Timed out. */ zpa2326_warn(indio_dev, "no one shot interrupt occurred (%ld)", timeout); - ret = -ETIME; - } else if (timeout < 0) { - zpa2326_warn(indio_dev, - "wait for one shot interrupt cancelled"); - ret = -ERESTARTSYS; + return -ETIME; } - return ret; + zpa2326_warn(indio_dev, "wait for one shot interrupt cancelled"); + return -ERESTARTSYS; } static int zpa2326_init_managed_irq(struct device *parent, -- cgit v1.1 From 1df79cb3bae754e4a42240f9851ed82549a44f1a Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 20 Sep 2017 12:35:57 +0530 Subject: phy: tegra: Handle return value of kasprintf kasprintf() can fail and it's return value must be checked. Signed-off-by: Arvind Yadav Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/tegra/xusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 3cbcb25..4307bf0 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -454,6 +454,8 @@ tegra_xusb_find_port_node(struct tegra_xusb_padctl *padctl, const char *type, char *name; name = kasprintf(GFP_KERNEL, "%s-%u", type, index); + if (!name) + return ERR_PTR(-ENOMEM); np = of_find_node_by_name(np, name); kfree(name); } -- cgit v1.1 From 554a56fc83f679c73b4f851a330045d0ec7ec1a5 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Thu, 21 Sep 2017 18:31:48 +0800 Subject: phy: phy-mtk-tphy: fix NULL point of chip bank Chip bank of version-1 is initialized as NULL, but it's used by pcie_phy_instance_power_on/off(), so assign it a right address. Signed-off-by: Chunfeng Yun Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/mediatek/phy-mtk-tphy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c index e3baad7..721a2a1 100644 --- a/drivers/phy/mediatek/phy-mtk-tphy.c +++ b/drivers/phy/mediatek/phy-mtk-tphy.c @@ -27,6 +27,7 @@ /* banks shared by multiple phys */ #define SSUSB_SIFSLV_V1_SPLLC 0x000 /* shared by u3 phys */ #define SSUSB_SIFSLV_V1_U2FREQ 0x100 /* shared by u2 phys */ +#define SSUSB_SIFSLV_V1_CHIP 0x300 /* shared by u3 phys */ /* u2 phy bank */ #define SSUSB_SIFSLV_V1_U2PHY_COM 0x000 /* u3/pcie/sata phy banks */ @@ -762,7 +763,7 @@ static void phy_v1_banks_init(struct mtk_tphy *tphy, case PHY_TYPE_USB3: case PHY_TYPE_PCIE: u3_banks->spllc = tphy->sif_base + SSUSB_SIFSLV_V1_SPLLC; - u3_banks->chip = NULL; + u3_banks->chip = tphy->sif_base + SSUSB_SIFSLV_V1_CHIP; u3_banks->phyd = instance->port_base + SSUSB_SIFSLV_V1_U3PHYD; u3_banks->phya = instance->port_base + SSUSB_SIFSLV_V1_U3PHYA; break; -- cgit v1.1 From caef3e0b657d091a540232e07e5e5b4648110a52 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 18 Sep 2017 10:04:20 +0200 Subject: phy: mvebu-cp110-comphy: fix mux error check The mux value is retrieved from the mvebu_comphy_get_mux() function which returns an int. In mvebu_comphy_power_on() this int is stored to a u32 and a check is made to ensure it's not negative. Which is wrong. This fixes it. Fixes: d0438bd6aa09 ("phy: add the mvebu cp110 comphy driver") Signed-off-by: Antoine Tenart Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/marvell/phy-mvebu-cp110-comphy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c index 73ebad6..514a1a4 100644 --- a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c @@ -468,8 +468,8 @@ static int mvebu_comphy_power_on(struct phy *phy) { struct mvebu_comphy_lane *lane = phy_get_drvdata(phy); struct mvebu_comphy_priv *priv = lane->priv; - int ret; - u32 mux, val; + int ret, mux; + u32 val; mux = mvebu_comphy_get_mux(lane->id, lane->port, lane->mode); if (mux < 0) -- cgit v1.1 From 17fb745d4acfe52f9ebb1ba2b10f2fcd796fb5ce Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 18 Sep 2017 10:04:22 +0200 Subject: phy: mvebu-cp110-comphy: explicitly set the pipe selector The pipe selector is used to select some modes (such as USB or PCIe). Otherwise it must be set to 0 (or "unconnected"). This patch does this to ensure it is not set to an incompatible value when using the supported modes (SGMII, 10GKR). Signed-off-by: Antoine Tenart Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/marvell/phy-mvebu-cp110-comphy.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c index 514a1a4..e8ed7e3 100644 --- a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c @@ -111,6 +111,8 @@ #define MVEBU_COMPHY_CONF6_40B BIT(18) #define MVEBU_COMPHY_SELECTOR 0x1140 #define MVEBU_COMPHY_SELECTOR_PHY(n) ((n) * 0x4) +#define MVEBU_COMPHY_PIPE_SELECTOR 0x1144 +#define MVEBU_COMPHY_PIPE_SELECTOR_PIPE(n) ((n) * 0x4) #define MVEBU_COMPHY_LANES 6 #define MVEBU_COMPHY_PORTS 3 @@ -475,6 +477,10 @@ static int mvebu_comphy_power_on(struct phy *phy) if (mux < 0) return -ENOTSUPP; + regmap_read(priv->regmap, MVEBU_COMPHY_PIPE_SELECTOR, &val); + val &= ~(0xf << MVEBU_COMPHY_PIPE_SELECTOR_PIPE(lane->id)); + regmap_write(priv->regmap, MVEBU_COMPHY_PIPE_SELECTOR, val); + regmap_read(priv->regmap, MVEBU_COMPHY_SELECTOR, &val); val &= ~(0xf << MVEBU_COMPHY_SELECTOR_PHY(lane->id)); val |= mux << MVEBU_COMPHY_SELECTOR_PHY(lane->id); @@ -526,6 +532,10 @@ static int mvebu_comphy_power_off(struct phy *phy) val &= ~(0xf << MVEBU_COMPHY_SELECTOR_PHY(lane->id)); regmap_write(priv->regmap, MVEBU_COMPHY_SELECTOR, val); + regmap_read(priv->regmap, MVEBU_COMPHY_PIPE_SELECTOR, &val); + val &= ~(0xf << MVEBU_COMPHY_PIPE_SELECTOR_PIPE(lane->id)); + regmap_write(priv->regmap, MVEBU_COMPHY_PIPE_SELECTOR, val); + return 0; } -- cgit v1.1 From c1c7acac0998ffcc9cd81e016a7d1b58b1afbb51 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 8 Sep 2017 13:31:37 +0300 Subject: phy: mvebu-cp110: checking for NULL instead of IS_ERR() devm_ioremap_resource() never returns NULL, it only returns error pointers so this test needs to be changed. Fixes: d0438bd6aa09 ("phy: add the mvebu cp110 comphy driver") Signed-off-by: Dan Carpenter Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/marvell/phy-mvebu-cp110-comphy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c index e8ed7e3..89c887e 100644 --- a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c @@ -586,8 +586,8 @@ static int mvebu_comphy_probe(struct platform_device *pdev) return PTR_ERR(priv->regmap); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); priv->base = devm_ioremap_resource(&pdev->dev, res); - if (!priv->base) - return -ENOMEM; + if (IS_ERR(priv->base)) + return PTR_ERR(priv->base); for_each_available_child_of_node(pdev->dev.of_node, child) { struct mvebu_comphy_lane *lane; -- cgit v1.1 From f98b74387551f6d266c044d90e87e4919b25b970 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 22 Sep 2017 09:44:03 -0700 Subject: phy: rockchip-typec: Set the AUX channel flip state earlier On some DP monitors we found that setting the wrong flip state on the AUX channel could cause the monitor to stop asserting HotPlug Detect (HPD). Setting the right flip state caused these monitors to start asserting HotPlug Detect again. Here's what we believe was happening: * We'd plug in the monitor and we'd see HPD assert * We'd quickly see HPD deassert * The kernel would try to init the type C PHY but would init it in USB mode (because there was a peripheral there but no HPD) * Because the kernel never set the flip mode properly we'd never see the HPD come back. With this change, we'll still see HPD disappear (we don't think there's anything we can do about that), but then it will come back. Overall we can say that it's sane to set the AUX channel flip state even when HPD is not asserted. NOTE: to make this change possible, I needed to do a bit of cleanup to the tcphy_dp_aux_calibration() function so that it doesn't ever clobber the FLIP state. This made it very obvious that a line of code documented as "setting bit 12" also did a bunch of other magic, undocumented stuff. For now I'll just break out the bits and add a comment that this is black magic and we'll try to document tcphy_dp_aux_calibration() better in a future CL. ALSO NOTE: the old function used to write a bunch of hardcoded values in _some_ cases instead of doing a read-modify-write. One could possibly assert that these could have had (beneficial) side effects and thus with this new code (which always does read-modify-write) we could have a bug. We shouldn't need to worry, though, since in the old code tcphy_dp_aux_calibration() was always called following the de-assertion of "reset" the the type C PHY. ...so the type C PHY was always in default state. TX_ANA_CTRL_REG_1 is documented to be 0x0 after reset. This was also confirmed by printk. Suggested-by: Shawn Nematbakhsh Reviewed-by: Chris Zhong Signed-off-by: Douglas Anderson Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/rockchip/phy-rockchip-typec.c | 62 +++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 20 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-typec.c b/drivers/phy/rockchip/phy-rockchip-typec.c index 4d2c57f..342a777 100644 --- a/drivers/phy/rockchip/phy-rockchip-typec.c +++ b/drivers/phy/rockchip/phy-rockchip-typec.c @@ -443,14 +443,34 @@ static inline int property_enable(struct rockchip_typec_phy *tcphy, return regmap_write(tcphy->grf_regs, reg->offset, val | mask); } +static void tcphy_dp_aux_set_flip(struct rockchip_typec_phy *tcphy) +{ + u16 tx_ana_ctrl_reg_1; + + /* + * Select the polarity of the xcvr: + * 1, Reverses the polarity (If TYPEC, Pulls ups aux_p and pull + * down aux_m) + * 0, Normal polarity (if TYPEC, pulls up aux_m and pulls down + * aux_p) + */ + tx_ana_ctrl_reg_1 = readl(tcphy->base + TX_ANA_CTRL_REG_1); + if (!tcphy->flip) + tx_ana_ctrl_reg_1 |= BIT(12); + else + tx_ana_ctrl_reg_1 &= ~BIT(12); + writel(tx_ana_ctrl_reg_1, tcphy->base + TX_ANA_CTRL_REG_1); +} + static void tcphy_dp_aux_calibration(struct rockchip_typec_phy *tcphy) { + u16 tx_ana_ctrl_reg_1; u16 rdata, rdata2, val; /* disable txda_cal_latch_en for rewrite the calibration values */ - rdata = readl(tcphy->base + TX_ANA_CTRL_REG_1); - val = rdata & 0xdfff; - writel(val, tcphy->base + TX_ANA_CTRL_REG_1); + tx_ana_ctrl_reg_1 = readl(tcphy->base + TX_ANA_CTRL_REG_1); + tx_ana_ctrl_reg_1 &= ~BIT(13); + writel(tx_ana_ctrl_reg_1, tcphy->base + TX_ANA_CTRL_REG_1); /* * read a resistor calibration code from CMN_TXPUCAL_CTRL[6:0] and @@ -472,9 +492,8 @@ static void tcphy_dp_aux_calibration(struct rockchip_typec_phy *tcphy) * Activate this signal for 1 clock cycle to sample new calibration * values. */ - rdata = readl(tcphy->base + TX_ANA_CTRL_REG_1); - val = rdata | 0x2000; - writel(val, tcphy->base + TX_ANA_CTRL_REG_1); + tx_ana_ctrl_reg_1 |= BIT(13); + writel(tx_ana_ctrl_reg_1, tcphy->base + TX_ANA_CTRL_REG_1); usleep_range(150, 200); /* set TX Voltage Level and TX Deemphasis to 0 */ @@ -482,8 +501,10 @@ static void tcphy_dp_aux_calibration(struct rockchip_typec_phy *tcphy) /* re-enable decap */ writel(0x100, tcphy->base + TX_ANA_CTRL_REG_2); writel(0x300, tcphy->base + TX_ANA_CTRL_REG_2); - writel(0x2008, tcphy->base + TX_ANA_CTRL_REG_1); - writel(0x2018, tcphy->base + TX_ANA_CTRL_REG_1); + tx_ana_ctrl_reg_1 |= BIT(3); + writel(tx_ana_ctrl_reg_1, tcphy->base + TX_ANA_CTRL_REG_1); + tx_ana_ctrl_reg_1 |= BIT(4); + writel(tx_ana_ctrl_reg_1, tcphy->base + TX_ANA_CTRL_REG_1); writel(0, tcphy->base + TX_ANA_CTRL_REG_5); @@ -494,8 +515,10 @@ static void tcphy_dp_aux_calibration(struct rockchip_typec_phy *tcphy) writel(0x1001, tcphy->base + TX_ANA_CTRL_REG_4); /* re-enables Bandgap reference for LDO */ - writel(0x2098, tcphy->base + TX_ANA_CTRL_REG_1); - writel(0x2198, tcphy->base + TX_ANA_CTRL_REG_1); + tx_ana_ctrl_reg_1 |= BIT(7); + writel(tx_ana_ctrl_reg_1, tcphy->base + TX_ANA_CTRL_REG_1); + tx_ana_ctrl_reg_1 |= BIT(8); + writel(tx_ana_ctrl_reg_1, tcphy->base + TX_ANA_CTRL_REG_1); /* * re-enables the transmitter pre-driver, driver data selection MUX, @@ -505,17 +528,15 @@ static void tcphy_dp_aux_calibration(struct rockchip_typec_phy *tcphy) writel(0x303, tcphy->base + TX_ANA_CTRL_REG_2); /* - * BIT 12: Controls auxda_polarity, which selects the polarity of the - * xcvr: - * 1, Reverses the polarity (If TYPEC, Pulls ups aux_p and pull - * down aux_m) - * 0, Normal polarity (if TYPE_C, pulls up aux_m and pulls down - * aux_p) + * Do some magic undocumented stuff, some of which appears to + * undo the "re-enables Bandgap reference for LDO" above. */ - val = 0xa078; - if (!tcphy->flip) - val |= BIT(12); - writel(val, tcphy->base + TX_ANA_CTRL_REG_1); + tx_ana_ctrl_reg_1 |= BIT(15); + tx_ana_ctrl_reg_1 &= ~BIT(8); + tx_ana_ctrl_reg_1 &= ~BIT(7); + tx_ana_ctrl_reg_1 |= BIT(6); + tx_ana_ctrl_reg_1 |= BIT(5); + writel(tx_ana_ctrl_reg_1, tcphy->base + TX_ANA_CTRL_REG_1); writel(0, tcphy->base + TX_ANA_CTRL_REG_3); writel(0, tcphy->base + TX_ANA_CTRL_REG_4); @@ -555,6 +576,7 @@ static int tcphy_phy_init(struct rockchip_typec_phy *tcphy, u8 mode) reset_control_deassert(tcphy->tcphy_rst); property_enable(tcphy, &cfg->typec_conn_dir, tcphy->flip); + tcphy_dp_aux_set_flip(tcphy); tcphy_cfg_24m(tcphy); -- cgit v1.1 From 26e03d803c8191e906360a0320c05b12d45a37ae Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 22 Sep 2017 09:44:04 -0700 Subject: phy: rockchip-typec: Don't set the aux voltage swing to 400 mV On rk3399-gru-kevin there are some cases where we're seeing AUX CH failures when trying to do DisplayPort over type C. Problems are intermittent and don't reproduce all the time. Problems are often bursty and failures persist for several seconds before going away. The failure case I focused on is: * A particular type C to HDMI adapter. * One orientation (flip mode) of that adapter. * Easier to see failures when something is plugged into the _other type C port at the same time. * Problems reproduce on both type C ports (left and right side). Ironically problems also stop reproducing when I solder wires onto the AUX CH signals on a port (even if no scope is connected to the signals). In this case, problems only stop reproducing on the port with the wires connected. From the above it appears that something about the signaling on the aux channel is marginal and any slight differences can bring us over the edge to failure. It turns out that we can fix our problems by just increasing the voltage swing of the AUX CH, giving us a bunch of extra margin. In DP up to version 1.2 the voltage swing on the aux channel was specced as .29 V to 1.38 V. In DP version 1.3 the aux channel voltage was tightened to be between .29 V and .40 V, but it clarifies that it really only needs the lower voltage when operating at the highest speed (HBR3 mode). So right now we are trying to use a voltage that technically should be valid for all versions of the spec (including version 1.3 when transmitting at HBR3). That would be great to do if it worked reliably. ...but it doesn't seem to. It turns out that if you continue to read through the DP part of the rk3399 TRM and other parts of the type C PHY spec you'll find out that while the rk3399 does support DP 1.3, it doesn't support HBR3. The docs specifically say "RBR, HBR and HBR2 data rates only". Thus there is actually no requirement to support an AUX CH swing of .4 V. Even if there is no actual requirement to support the tighter voltage swing, one could possibly argue that we should support it anyway. The DP spec clarifies that the lower voltage on the AUX CH will reduce cross talk in some cases and that seems like it could be beneficial even at the lower bit rates. At the moment, though, we are seeing problems with the AUX CH and not on the other lines. Also, checking another known working and similar laptop shows that the other laptop runs the AUX channel at a higher voltage. Other notes: * Looking at measurements done on the AUX CH we weren't actually compliant with the DP 1.3 spec anyway. AUX CH peek-to-peek voltage was measured on rk3399-gru-kevin as .466 V which is > .4 V. * With this new patch the AUX channel isn't actually 1.0 V, but it has been confirmed that the signal is better and has more margin. Eye diagram passes. * If someone were truly an expert in the Type C PHY and in DisplayPort signaling they might be able to make things work and keep the voltage at < .4 V. The Type C PHY seems to have a plethora of tuning knobs that could almost certainly improve the signal integrity. Some of these things (like enabling tx_fcm_full_margin) even seem to fix my problems. However, lacking expertise I can't say whether this is a better or worse solution. Tightening signals to give cleaner waveforms can often have adverse affects, like increasing EMI or adding noise to other signals. I'd rather not tune things like this without a healthy application of expertise that I don't have. Signed-off-by: Douglas Anderson Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/rockchip/phy-rockchip-typec.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-typec.c b/drivers/phy/rockchip/phy-rockchip-typec.c index 342a777..b25c004 100644 --- a/drivers/phy/rockchip/phy-rockchip-typec.c +++ b/drivers/phy/rockchip/phy-rockchip-typec.c @@ -543,10 +543,11 @@ static void tcphy_dp_aux_calibration(struct rockchip_typec_phy *tcphy) writel(0, tcphy->base + TX_ANA_CTRL_REG_5); /* - * Controls low_power_swing_en, set the voltage swing of the driver - * to 400mv. The values below are peak to peak (differential) values. + * Controls low_power_swing_en, don't set the voltage swing of the + * driver to 400mv. The values below are peak to peak (differential) + * values. */ - writel(4, tcphy->base + TXDA_COEFF_CALC_CTRL); + writel(0, tcphy->base + TXDA_COEFF_CALC_CTRL); writel(0, tcphy->base + TXDA_CYA_AUXDA_CYA); /* Controls tx_high_z_tm_en */ -- cgit v1.1 From 13ffe9a26df4e156363579b25c904dd0b1e31bfb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 13 Sep 2017 18:02:02 +0100 Subject: staging: iio: ade7759: fix signed extension bug on shift of a u8 The current shift of st->rx[2] left shifts a u8 24 bits left, promotes the integer to a an int and then to a unsigned u64. If the top bit of st->rx[2] is set then we end up with all the upper bits being set to 1. Fix this by casting st->rx[2] to a u64 before the 24 bit left shift. Detected by CoverityScan CID#144940 ("Unintended sign extension") Fixes: 2919fa54ef64 ("staging: iio: meter: new driver for ADE7759 devices") Signed-off-by: Colin Ian King Signed-off-by: Jonathan Cameron --- drivers/staging/iio/meter/ade7759.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/meter/ade7759.c b/drivers/staging/iio/meter/ade7759.c index 1691760..02573c5 100644 --- a/drivers/staging/iio/meter/ade7759.c +++ b/drivers/staging/iio/meter/ade7759.c @@ -172,7 +172,7 @@ static int ade7759_spi_read_reg_40(struct device *dev, reg_address); goto error_ret; } - *val = ((u64)st->rx[1] << 32) | (st->rx[2] << 24) | + *val = ((u64)st->rx[1] << 32) | ((u64)st->rx[2] << 24) | (st->rx[3] << 16) | (st->rx[4] << 8) | st->rx[5]; error_ret: -- cgit v1.1 From be94a6f6d488b4767662e8949dc62361bd1d6311 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 29 Sep 2017 15:24:05 +0200 Subject: iio: dummy: events: Add missing break Add missing break in iio_simple_dummy_write_event_config() for the voltage threshold event enable attribute. Without this writing to the in_voltage0_thresh_rising_en always returns -EINVAL even though the change was correctly applied. Fixes: 3e34e650db197 ("iio: dummy: Demonstrate the usage of new channel types") Signed-off-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dummy/iio_simple_dummy_events.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/dummy/iio_simple_dummy_events.c b/drivers/iio/dummy/iio_simple_dummy_events.c index ed63ffd..7ec2a0b 100644 --- a/drivers/iio/dummy/iio_simple_dummy_events.c +++ b/drivers/iio/dummy/iio_simple_dummy_events.c @@ -72,6 +72,7 @@ int iio_simple_dummy_write_event_config(struct iio_dev *indio_dev, st->event_en = state; else return -EINVAL; + break; default: return -EINVAL; } -- cgit v1.1 From 2fb850092fd95198a0a4746f07b80077d5a3aa37 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 29 Sep 2017 16:58:46 -0700 Subject: phy: rockchip-typec: Check for errors from tcphy_phy_init() The function tcphy_phy_init() could return an error but the callers weren't checking the return value. They should. In at least one case while testing I saw the message "wait pma ready timeout" which indicates that tcphy_phy_init() really could return an error and we should account for it. Signed-off-by: Douglas Anderson Reviewed-by: Guenter Roeck Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/rockchip/phy-rockchip-typec.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-typec.c b/drivers/phy/rockchip/phy-rockchip-typec.c index b25c004..a958c9b 100644 --- a/drivers/phy/rockchip/phy-rockchip-typec.c +++ b/drivers/phy/rockchip/phy-rockchip-typec.c @@ -708,8 +708,11 @@ static int rockchip_usb3_phy_power_on(struct phy *phy) if (tcphy->mode == new_mode) goto unlock_ret; - if (tcphy->mode == MODE_DISCONNECT) - tcphy_phy_init(tcphy, new_mode); + if (tcphy->mode == MODE_DISCONNECT) { + ret = tcphy_phy_init(tcphy, new_mode); + if (ret) + goto unlock_ret; + } /* wait TCPHY for pipe ready */ for (timeout = 0; timeout < 100; timeout++) { @@ -783,10 +786,12 @@ static int rockchip_dp_phy_power_on(struct phy *phy) */ if (new_mode == MODE_DFP_DP && tcphy->mode != MODE_DISCONNECT) { tcphy_phy_deinit(tcphy); - tcphy_phy_init(tcphy, new_mode); + ret = tcphy_phy_init(tcphy, new_mode); } else if (tcphy->mode == MODE_DISCONNECT) { - tcphy_phy_init(tcphy, new_mode); + ret = tcphy_phy_init(tcphy, new_mode); } + if (ret) + goto unlock_ret; ret = readx_poll_timeout(readl, tcphy->base + DP_MODE_CTL, val, val & DP_MODE_A2, 1000, -- cgit v1.1 From 986e7b7e4991a5d3abab26f97a671512e09e4417 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 29 Sep 2017 11:25:08 +0800 Subject: regulator: axp20x: Fix poly-phase bit offset for AXP803 DCDC5/6 The bit offset used to check if DCDC5 and DCDC6 are tied together in poly-phase output is wrong. It was checking against a reserved bit, which is always false. In reality, neither the reference design layout nor actually produced boards tie these two buck regulators together. But we should still fix it, just in case. Fixes: 1dbe0ccb0631 ("regulator: axp20x-regulator: add support for AXP803") Signed-off-by: Chen-Yu Tsai Tested-by: Maxime Ripard Acked-by: Maxime Ripard Signed-off-by: Mark Brown --- drivers/regulator/axp20x-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index f18b36d..376a99b 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -590,7 +590,7 @@ static bool axp20x_is_polyphase_slave(struct axp20x_dev *axp20x, int id) case AXP803_DCDC3: return !!(reg & BIT(6)); case AXP803_DCDC6: - return !!(reg & BIT(7)); + return !!(reg & BIT(5)); } break; -- cgit v1.1 From 028568d84da3cfca49f5f846eeeef01441d70451 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 2 Oct 2017 17:07:28 +0900 Subject: kbuild: revert $(realpath ...) to $(shell cd ... && /bin/pwd) I thought commit 8e9b46679923 ("kbuild: use $(abspath ...) instead of $(shell cd ... && /bin/pwd)") was a safe conversion, but it changed the behavior. $(abspath ...) / $(realpath ...) does not expand shell special characters, such as '~'. Here is a simple Makefile example: ---------------->8---------------- $(info /bin/pwd: $(shell cd ~/; /bin/pwd)) $(info abspath: $(abspath ~/)) $(info realpath: $(realpath ~/)) all: @: ---------------->8---------------- $ make /bin/pwd: /home/masahiro abspath: /home/masahiro/workspace/~ realpath: This can be a real problem if 'make O=~/foo' is invoked from another Makefile or primitive shell like dash. This commit partially reverts 8e9b46679923. Fixes: 8e9b46679923 ("kbuild: use $(abspath ...) instead of $(shell cd ... && /bin/pwd)") Reported-by: Julien Grall Signed-off-by: Masahiro Yamada Tested-by: Julien Grall --- Makefile | 4 ++-- tools/power/cpupower/Makefile | 2 +- tools/scripts/Makefile.include | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index cf007a3..1b48717 100644 --- a/Makefile +++ b/Makefile @@ -130,8 +130,8 @@ endif ifneq ($(KBUILD_OUTPUT),) # check that the output directory actually exists saved-output := $(KBUILD_OUTPUT) -$(shell [ -d $(KBUILD_OUTPUT) ] || mkdir -p $(KBUILD_OUTPUT)) -KBUILD_OUTPUT := $(realpath $(KBUILD_OUTPUT)) +KBUILD_OUTPUT := $(shell mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) \ + && /bin/pwd) $(if $(KBUILD_OUTPUT),, \ $(error failed to create output directory "$(saved-output)")) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 4c5a481..d6e1c02 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -26,7 +26,7 @@ endif ifneq ($(OUTPUT),) # check that the output directory actually exists -OUTDIR := $(realpath $(OUTPUT)) +OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) endif diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 9dc8f07..1e8b611 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -1,7 +1,7 @@ ifneq ($(O),) ifeq ($(origin O), command line) - ABSOLUTE_O := $(realpath $(O)) - dummy := $(if $(ABSOLUTE_O),,$(error O=$(O) does not exist)) + dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) + ABSOLUTE_O := $(shell cd $(O) ; pwd) OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/) COMMAND_O := O=$(ABSOLUTE_O) ifeq ($(objtree),) @@ -12,7 +12,7 @@ endif # check that the output directory actually exists ifneq ($(OUTPUT),) -OUTDIR := $(realpath $(OUTPUT)) +OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) endif -- cgit v1.1 From 990404188581cba45937ebad24a4b1910a4ec568 Mon Sep 17 00:00:00 2001 From: Cao jin Date: Mon, 25 Sep 2017 20:25:10 +0800 Subject: kbuild: drop unused symverfile in Makefile.modpost Since commit 040fcc819a2e ("kbuild: improved modversioning support for external modules"), symverfile has been replaced with kernelsymfile and modulesymfile. Signed-off-by: Cao jin Signed-off-by: Masahiro Yamada --- scripts/Makefile.modpost | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 16923ba..756d14f 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -97,7 +97,6 @@ vmlinux.o: FORCE $(call cmd,kernel-mod) # Declare generated files as targets for modpost -$(symverfile): __modpost ; $(modules:.ko=.mod.c): __modpost ; -- cgit v1.1 From 2cc3ce24a9874451e6751a062cc2ae5040fd8bee Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Wed, 4 Oct 2017 01:53:26 +0200 Subject: kbuild: Fix optimization level choice default The choice containing the CC_OPTIMIZE_FOR_PERFORMANCE symbol accidentally added a "CONFIG_" prefix when trying to make it the default, selecting an undefined symbol as the default. The mistake is harmless here: Since the default symbol is not visible, the choice falls back on using the visible symbol as the default instead, which is CC_OPTIMIZE_FOR_PERFORMANCE, as intended. A patch that makes Kconfig print a warning in this case has been submitted separately: http://www.spinics.net/lists/linux-kbuild/msg15566.html Signed-off-by: Ulf Magnusson Acked-by: Arnd Bergmann Signed-off-by: Masahiro Yamada --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 78cb246..3c1faaa 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1033,7 +1033,7 @@ endif choice prompt "Compiler optimization level" - default CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE + default CC_OPTIMIZE_FOR_PERFORMANCE config CC_OPTIMIZE_FOR_PERFORMANCE bool "Optimize for performance" -- cgit v1.1 From bbfe63b60aad29bb0ed65a1968519765bd68368a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 6 Oct 2017 17:17:52 -0700 Subject: Makefile: kselftest: fix grammar typo Correct typo in kselftest help text. Signed-off-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1b48717..d5ed3ce 100644 --- a/Makefile +++ b/Makefile @@ -1395,7 +1395,7 @@ help: @echo ' Build, install, and boot kernel before' @echo ' running kselftest on it' @echo ' kselftest-clean - Remove all generated kselftest files' - @echo ' kselftest-merge - Merge all the config dependencies of kselftest to existed' + @echo ' kselftest-merge - Merge all the config dependencies of kselftest to existing' @echo ' .config.' @echo '' @echo 'Userspace tools targets:' -- cgit v1.1 From 5151b4afb41dd7c5e13a130efcc95326a49da8c6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 7 Oct 2017 16:53:20 -0700 Subject: iio: adc: dln2-adc: fix build error The dln2-adc driver uses interface(s) that are controlled by the IIO_TRIGGERED_BUFFER Kconfig symbol, so the driver needs to select that symbol to prevent the build error. drivers/iio/adc/dln2-adc.o: In function `dln2_adc_probe': dln2-adc.c:(.text+0x528): undefined reference to `devm_iio_triggered_buffer_setup' Signed-off-by: Randy Dunlap Reported-by: kbuild test robot Cc: Jonathan Cameron Cc: linux-iio@vger.kernel.org Cc: Jack Andersen Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 5762565..1d13bf0 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -243,6 +243,8 @@ config DA9150_GPADC config DLN2_ADC tristate "Diolan DLN-2 ADC driver support" depends on MFD_DLN2 + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say yes here to build support for Diolan DLN-2 ADC. -- cgit v1.1 From a528629dfd3b87f206ac0e5637f7e7f958143ab8 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 6 Oct 2017 16:44:27 -0700 Subject: ARC: [plat-hsdk] select CONFIG_RESET_HSDK from Kconfig Signed-off-by: Vineet Gupta --- arch/arc/configs/hsdk_defconfig | 1 - arch/arc/plat-hsdk/Kconfig | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 15f0f6b..7b8f8fa 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -63,7 +63,6 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_RESET_HSDK=y CONFIG_EXT3_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig index bd08de4..19ab3cf 100644 --- a/arch/arc/plat-hsdk/Kconfig +++ b/arch/arc/plat-hsdk/Kconfig @@ -8,3 +8,4 @@ menuconfig ARC_SOC_HSDK bool "ARC HS Development Kit SOC" select CLK_HSDK + select RESET_HSDK -- cgit v1.1 From 5a866ec0014b2baa4ecbb1eaa19c835482829d08 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 10 Oct 2017 10:43:17 +0200 Subject: spi: a3700: Return correct value on timeout detection When waiting for transfer completion, a3700_spi_wait_completion returns a boolean indicating if a timeout occurred. The function was returning 'true' everytime, failing to detect any timeout. This patch makes it return 'false' when a timeout is reached. Signed-off-by: Maxime Chevallier Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-armada-3700.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index 9172cb2..568e1c6 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -387,7 +387,8 @@ static bool a3700_spi_wait_completion(struct spi_device *spi) spireg_write(a3700_spi, A3700_SPI_INT_MASK_REG, 0); - return true; + /* Timeout was reached */ + return false; } static bool a3700_spi_transfer_wait(struct spi_device *spi, -- cgit v1.1 From 692b48258dda7c302e777d7d5f4217244478f1f6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Oct 2017 08:04:13 -0700 Subject: workqueue: replace pool->manager_arb mutex with a flag Josef reported a HARDIRQ-safe -> HARDIRQ-unsafe lock order detected by lockdep: [ 1270.472259] WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected [ 1270.472783] 4.14.0-rc1-xfstests-12888-g76833e8 #110 Not tainted [ 1270.473240] ----------------------------------------------------- [ 1270.473710] kworker/u5:2/5157 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: [ 1270.474239] (&(&lock->wait_lock)->rlock){+.+.}, at: [] __mutex_unlock_slowpath+0xa2/0x280 [ 1270.474994] [ 1270.474994] and this task is already holding: [ 1270.475440] (&pool->lock/1){-.-.}, at: [] worker_thread+0x366/0x3c0 [ 1270.476046] which would create a new lock dependency: [ 1270.476436] (&pool->lock/1){-.-.} -> (&(&lock->wait_lock)->rlock){+.+.} [ 1270.476949] [ 1270.476949] but this new dependency connects a HARDIRQ-irq-safe lock: [ 1270.477553] (&pool->lock/1){-.-.} ... [ 1270.488900] to a HARDIRQ-irq-unsafe lock: [ 1270.489327] (&(&lock->wait_lock)->rlock){+.+.} ... [ 1270.494735] Possible interrupt unsafe locking scenario: [ 1270.494735] [ 1270.495250] CPU0 CPU1 [ 1270.495600] ---- ---- [ 1270.495947] lock(&(&lock->wait_lock)->rlock); [ 1270.496295] local_irq_disable(); [ 1270.496753] lock(&pool->lock/1); [ 1270.497205] lock(&(&lock->wait_lock)->rlock); [ 1270.497744] [ 1270.497948] lock(&pool->lock/1); , which will cause a irq inversion deadlock if the above lock scenario happens. The root cause of this safe -> unsafe lock order is the mutex_unlock(pool->manager_arb) in manage_workers() with pool->lock held. Unlocking mutex while holding an irq spinlock was never safe and this problem has been around forever but it never got noticed because the only time the mutex is usually trylocked while holding irqlock making actual failures very unlikely and lockdep annotation missed the condition until the recent b9c16a0e1f73 ("locking/mutex: Fix lockdep_assert_held() fail"). Using mutex for pool->manager_arb has always been a bit of stretch. It primarily is an mechanism to arbitrate managership between workers which can easily be done with a pool flag. The only reason it became a mutex is that pool destruction path wants to exclude parallel managing operations. This patch replaces the mutex with a new pool flag POOL_MANAGER_ACTIVE and make the destruction path wait for the current manager on a wait queue. v2: Drop unnecessary flag clearing before pool destruction as suggested by Boqun. Signed-off-by: Tejun Heo Reported-by: Josef Bacik Reviewed-by: Lai Jiangshan Cc: Peter Zijlstra Cc: Boqun Feng Cc: stable@vger.kernel.org --- kernel/workqueue.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 64d0edf..a2dccfe 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -68,6 +68,7 @@ enum { * attach_mutex to avoid changing binding state while * worker_attach_to_pool() is in progress. */ + POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */ POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ /* worker flags */ @@ -165,7 +166,6 @@ struct worker_pool { /* L: hash of busy workers */ /* see manage_workers() for details on the two manager mutexes */ - struct mutex manager_arb; /* manager arbitration */ struct worker *manager; /* L: purely informational */ struct mutex attach_mutex; /* attach/detach exclusion */ struct list_head workers; /* A: attached workers */ @@ -299,6 +299,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf; static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ +static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */ static LIST_HEAD(workqueues); /* PR: list of all workqueues */ static bool workqueue_freezing; /* PL: have wqs started freezing? */ @@ -801,7 +802,7 @@ static bool need_to_create_worker(struct worker_pool *pool) /* Do we have too many workers and should some go away? */ static bool too_many_workers(struct worker_pool *pool) { - bool managing = mutex_is_locked(&pool->manager_arb); + bool managing = pool->flags & POOL_MANAGER_ACTIVE; int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ int nr_busy = pool->nr_workers - nr_idle; @@ -1980,24 +1981,17 @@ static bool manage_workers(struct worker *worker) { struct worker_pool *pool = worker->pool; - /* - * Anyone who successfully grabs manager_arb wins the arbitration - * and becomes the manager. mutex_trylock() on pool->manager_arb - * failure while holding pool->lock reliably indicates that someone - * else is managing the pool and the worker which failed trylock - * can proceed to executing work items. This means that anyone - * grabbing manager_arb is responsible for actually performing - * manager duties. If manager_arb is grabbed and released without - * actual management, the pool may stall indefinitely. - */ - if (!mutex_trylock(&pool->manager_arb)) + if (pool->flags & POOL_MANAGER_ACTIVE) return false; + + pool->flags |= POOL_MANAGER_ACTIVE; pool->manager = worker; maybe_create_worker(pool); pool->manager = NULL; - mutex_unlock(&pool->manager_arb); + pool->flags &= ~POOL_MANAGER_ACTIVE; + wake_up(&wq_manager_wait); return true; } @@ -3248,7 +3242,6 @@ static int init_worker_pool(struct worker_pool *pool) setup_timer(&pool->mayday_timer, pool_mayday_timeout, (unsigned long)pool); - mutex_init(&pool->manager_arb); mutex_init(&pool->attach_mutex); INIT_LIST_HEAD(&pool->workers); @@ -3318,13 +3311,15 @@ static void put_unbound_pool(struct worker_pool *pool) hash_del(&pool->hash_node); /* - * Become the manager and destroy all workers. Grabbing - * manager_arb prevents @pool's workers from blocking on - * attach_mutex. + * Become the manager and destroy all workers. This prevents + * @pool's workers from blocking on attach_mutex. We're the last + * manager and @pool gets freed with the flag set. */ - mutex_lock(&pool->manager_arb); - spin_lock_irq(&pool->lock); + wait_event_lock_irq(wq_manager_wait, + !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock); + pool->flags |= POOL_MANAGER_ACTIVE; + while ((worker = first_idle_worker(pool))) destroy_worker(worker); WARN_ON(pool->nr_workers || pool->nr_idle); @@ -3338,8 +3333,6 @@ static void put_unbound_pool(struct worker_pool *pool) if (pool->detach_completion) wait_for_completion(pool->detach_completion); - mutex_unlock(&pool->manager_arb); - /* shut down the timers */ del_timer_sync(&pool->idle_timer); del_timer_sync(&pool->mayday_timer); -- cgit v1.1 From 10a7ef33679073d13bf1dd05e3f1b7912f999543 Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 10 Oct 2017 20:59:38 -0700 Subject: ipsec: Fix dst leak in xfrm_bundle_create(). If we cannot find a suitable inner_mode value, we will leak the currently allocated 'xdst'. The fix is to make sure it is linked into the chain before erroring out. Signed-off-by: David S. Miller Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f062539..2746b62 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1573,6 +1573,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto put_states; } + if (!dst_prev) + dst0 = dst1; + else + /* Ref count is taken during xfrm_alloc_dst() + * No need to do dst_clone() on dst1 + */ + dst_prev->child = dst1; + if (xfrm[i]->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(xfrm[i], xfrm_af2proto(family)); @@ -1584,14 +1592,6 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, } else inner_mode = xfrm[i]->inner_mode; - if (!dst_prev) - dst0 = dst1; - else - /* Ref count is taken during xfrm_alloc_dst() - * No need to do dst_clone() on dst1 - */ - dst_prev->child = dst1; - xdst->route = dst; dst_copy_metrics(dst1, dst); -- cgit v1.1 From 753affba96d3608e058cefc4534007661efd8c96 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 11 Oct 2017 20:01:42 +0300 Subject: ARC: [plat-hsdk] Increase SDIO CIU frequency to 50000000Hz With current SDIO CIU clock frequency (12500000Hz) DW MMC controller fails to initialize some SD cards (which don't support slow mode). So increase SDIO CIU frequency from 12500000Hz to 50000000Hz by switching from the default divisor value (div-by-8) to the minimum possible value of the divisor (div-by-2) in HSDK platform code. Reported-by: Vineet Gupta Tested-by: Vineet Gupta Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 11 ++++++----- arch/arc/plat-hsdk/platform.c | 10 ++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 8adde1b..8f627c2 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -137,14 +137,15 @@ /* * DW sdio controller has external ciu clock divider * controlled via register in SDIO IP. Due to its - * unexpected default value (it should devide by 1 - * but it devides by 8) SDIO IP uses wrong clock and + * unexpected default value (it should divide by 1 + * but it divides by 8) SDIO IP uses wrong clock and * works unstable (see STAR 9001204800) + * We switched to the minimum possible value of the + * divisor (div-by-2) in HSDK platform code. * So add temporary fix and change clock frequency - * from 100000000 to 12500000 Hz until we fix dw sdio - * driver itself. + * to 50000000 Hz until we fix dw sdio driver itself. */ - clock-frequency = <12500000>; + clock-frequency = <50000000>; #clock-cells = <0>; }; diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index 744e62e..fd0ae5e 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -74,6 +74,10 @@ static void __init hsdk_set_cpu_freq_1ghz(void) pr_err("Failed to setup CPU frequency to 1GHz!"); } +#define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000) +#define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108) +#define SDIO_UHS_REG_EXT_DIV_2 (2 << 30) + static void __init hsdk_init_early(void) { /* @@ -90,6 +94,12 @@ static void __init hsdk_init_early(void) writel(1, (void __iomem *) CREG_PAE_UPDATE); /* + * Switch SDIO external ciu clock divider from default div-by-8 to + * minimum possible div-by-2. + */ + iowrite32(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT); + + /* * Setup CPU frequency to 1GHz. * TODO: remove it after smart hsdk pll driver will be introduced. */ -- cgit v1.1 From fdbed19697e1aa0f7cb719c11c67f2da26ea7f47 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 11 Oct 2017 17:07:41 -0700 Subject: ARC: unbork module link errors with !CONFIG_ARC_HAS_LLSC | SYSMAP System.map | Building modules, stage 2. | MODPOST 18 modules |ERROR: "smp_atomic_ops_lock" [drivers/gpu/drm/drm_kms_helper.ko] undefined! |ERROR: "smp_bitops_lock" [drivers/gpu/drm/drm_kms_helper.ko] undefined! |ERROR: "smp_atomic_ops_lock" [drivers/gpu/drm/drm.ko] undefined! | ERROR: "smp_bitops_lock" [drivers/gpu/drm/drm.ko] undefined! |../scripts/Makefile.modpost:91: recipe for target '__modpost' failed Signed-off-by: Vineet Gupta --- arch/arc/kernel/smp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index f462671..6df9d94 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -23,6 +23,8 @@ #include #include #include +#include + #include #include #include @@ -30,6 +32,9 @@ #ifndef CONFIG_ARC_HAS_LLSC arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED; arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED; + +EXPORT_SYMBOL_GPL(smp_atomic_ops_lock); +EXPORT_SYMBOL_GPL(smp_bitops_lock); #endif struct plat_smp_ops __weak plat_smp_ops; -- cgit v1.1 From c0368e4db4a3e8a3dce40f3f621c06e14c560d79 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 11 Oct 2017 14:59:22 -0700 Subject: spi: bcm-qspi: Fix use after free in bcm_qspi_probe() in error path There was an inversion in how the error path in bcm_qspi_probe() is done which would make us trip over a KASAN use-after-free report. Turns out that qspi->dev_ids does not get allocated until later in the probe process. Fix this by introducing a new lable: qspi_resource_err which takes care of cleaning up the SPI master instance. Fixes: fa236a7ef240 ("spi: bcm-qspi: Add Broadcom MSPI driver") Signed-off-by: Florian Fainelli Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-bcm-qspi.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 6ef6c44..a172ab2 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -1250,7 +1250,7 @@ int bcm_qspi_probe(struct platform_device *pdev, goto qspi_probe_err; } } else { - goto qspi_probe_err; + goto qspi_resource_err; } res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "bspi"); @@ -1272,7 +1272,7 @@ int bcm_qspi_probe(struct platform_device *pdev, qspi->base[CHIP_SELECT] = devm_ioremap_resource(dev, res); if (IS_ERR(qspi->base[CHIP_SELECT])) { ret = PTR_ERR(qspi->base[CHIP_SELECT]); - goto qspi_probe_err; + goto qspi_resource_err; } } @@ -1280,7 +1280,7 @@ int bcm_qspi_probe(struct platform_device *pdev, GFP_KERNEL); if (!qspi->dev_ids) { ret = -ENOMEM; - goto qspi_probe_err; + goto qspi_resource_err; } for (val = 0; val < num_irqs; val++) { @@ -1369,8 +1369,9 @@ qspi_reg_err: bcm_qspi_hw_uninit(qspi); clk_disable_unprepare(qspi->clk); qspi_probe_err: - spi_master_put(master); kfree(qspi->dev_ids); +qspi_resource_err: + spi_master_put(master); return ret; } /* probe function to be called by SoC specific platform driver probe */ -- cgit v1.1 From 4a40aedec653bb9e22c01ef4fe0a66278b1a666f Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Tue, 3 Oct 2017 15:20:27 +0100 Subject: DT: arm,gic-v3: Update the ITS size in the examples Currently, the examples are using 2MB for the ITS size. Per the specification (section 8.18 in ARM IHI 0069D), the ITS address map is 128KB. Update the examples to match the specification. Signed-off-by: Julien Grall Signed-off-by: Marc Zyngier --- .../devicetree/bindings/interrupt-controller/arm,gic-v3.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt index 4c29cda..5eb108e 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt @@ -99,7 +99,7 @@ Examples: compatible = "arm,gic-v3-its"; msi-controller; #msi-cells = <1>; - reg = <0x0 0x2c200000 0 0x200000>; + reg = <0x0 0x2c200000 0 0x20000>; }; }; @@ -124,14 +124,14 @@ Examples: compatible = "arm,gic-v3-its"; msi-controller; #msi-cells = <1>; - reg = <0x0 0x2c200000 0 0x200000>; + reg = <0x0 0x2c200000 0 0x20000>; }; gic-its@2c400000 { compatible = "arm,gic-v3-its"; msi-controller; #msi-cells = <1>; - reg = <0x0 0x2c400000 0 0x200000>; + reg = <0x0 0x2c400000 0 0x20000>; }; ppi-partitions { -- cgit v1.1 From c427a475b6bc9d3304cca04acdec53464f71f24c Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Sat, 23 Sep 2017 13:50:19 -0500 Subject: irqchip/gic-v3-its: Fix the incorrect BUG_ON in its_init_vpe_domain() The driver probe path hits 'BUG_ON(entries != vpe_proxy.dev->nr_ites)' on systems where it has VLPI capability, doesn't support direct LPI feature and boot with a single CPU. Relax the BUG_ON() condition to fix the issue. Signed-off-by: Shanker Donthineni Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index e8d8934..e3a59f4 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2851,7 +2851,7 @@ static int its_init_vpe_domain(void) return -ENOMEM; } - BUG_ON(entries != vpe_proxy.dev->nr_ites); + BUG_ON(entries > vpe_proxy.dev->nr_ites); raw_spin_lock_init(&vpe_proxy.lock); vpe_proxy.next_victim = 0; -- cgit v1.1 From 32bd44dc19de012e22f1fdebd2606b5fb86d54c5 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Sat, 7 Oct 2017 15:43:48 -0500 Subject: irqchip/gic-v3-its: Fix the incorrect parsing of VCPU table size The VCPU table consists of vPE entries, and its size provides the number of VPEs supported by GICv4 hardware. Unfortunately the maximum size of the VPE table is not discoverable like Device table. All VLPI commands limits the number of bits to 16 to hold VPEID, which is index into VCPU table. Don't apply DEVID bits for VCPU table instead assume maximum bits to 16. ITS log messages on QDF2400 without fix: allocated 524288 Devices (indirect, esz 8, psz 64K, shr 1) allocated 8192 Interrupt Collections (flat, esz 8, psz 64K, shr 1) Virtual CPUs Table too large, reduce ids 32->26 Virtual CPUs too large, reduce ITS pages 8192->256 allocated 2097152 Virtual CPUs (flat, esz 8, psz 64K, shr 1) ITS log messages on QDF2400 with fix: allocated 524288 Devices (indirect, esz 8, psz 64K, shr 1) allocated 8192 Interrupt Collections (flat, esz 8, psz 64K, shr 1) allocated 65536 Virtual CPUs (flat, esz 8, psz 64K, shr 1) Signed-off-by: Shanker Donthineni Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index e3a59f4..991cf33 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -107,6 +107,10 @@ struct its_node { #define ITS_ITT_ALIGN SZ_256 +/* The maximum number of VPEID bits supported by VLPI commands */ +#define ITS_MAX_VPEID_BITS (16) +#define ITS_MAX_VPEID (1 << (ITS_MAX_VPEID_BITS)) + /* Convert page order to size in bytes */ #define PAGE_ORDER_TO_SIZE(o) (PAGE_SIZE << (o)) @@ -1582,13 +1586,12 @@ retry_baser: static bool its_parse_indirect_baser(struct its_node *its, struct its_baser *baser, - u32 psz, u32 *order) + u32 psz, u32 *order, u32 ids) { u64 tmp = its_read_baser(its, baser); u64 type = GITS_BASER_TYPE(tmp); u64 esz = GITS_BASER_ENTRY_SIZE(tmp); u64 val = GITS_BASER_InnerShareable | GITS_BASER_RaWaWb; - u32 ids = its->device_ids; u32 new_order = *order; bool indirect = false; @@ -1680,9 +1683,13 @@ static int its_alloc_tables(struct its_node *its) continue; case GITS_BASER_TYPE_DEVICE: + indirect = its_parse_indirect_baser(its, baser, + psz, &order, + its->device_ids); case GITS_BASER_TYPE_VCPU: indirect = its_parse_indirect_baser(its, baser, - psz, &order); + psz, &order, + ITS_MAX_VPEID_BITS); break; } @@ -2551,7 +2558,7 @@ static struct irq_chip its_vpe_irq_chip = { static int its_vpe_id_alloc(void) { - return ida_simple_get(&its_vpeid_ida, 0, 1 << 16, GFP_KERNEL); + return ida_simple_get(&its_vpeid_ida, 0, ITS_MAX_VPEID, GFP_KERNEL); } static void its_vpe_id_free(u16 id) -- cgit v1.1 From 30ae9610d275f8f03f5bf7612ce71d8af6fc400b Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Mon, 9 Oct 2017 11:46:55 -0500 Subject: irqchip/gic-v3-its: Add missing changes to support 52bit physical address The current ITS driver works fine as long as normal memory and GICR regions are located within the lower 48bit (>=0 && <2^48) physical address space. Some of the registers GICR_PEND/PROP, GICR_VPEND/VPROP and GITS_CBASER are handled properly but not all when configuring the hardware with 52bit physical address. This patch does the following changes to support 52bit PA. -Handle 52bit PA in GITS_BASERn. -Fix ITT_addr width to 52bits, bits[51:8]. -Fix RDbase width to 52bits, bits[51:16]. -Fix VPT_addr width to 52bits, bits[51:16]. Definition of the GITS_BASERn register when ITS PageSize is 64KB: -Bits[47:16] of the register provide bits[47:16] of the table PA. -Bits[15:12] of the register provide bits[51:48] of the table PA. -Bits[15:00] of the base physical address are 0. Signed-off-by: Shanker Donthineni Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 26 +++++++++++++++++++++----- include/linux/irqchip/arm-gic-v3.h | 2 ++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 991cf33..e883956 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -312,7 +312,7 @@ static void its_encode_size(struct its_cmd_block *cmd, u8 size) static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr) { - its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 50, 8); + its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8); } static void its_encode_valid(struct its_cmd_block *cmd, int valid) @@ -322,7 +322,7 @@ static void its_encode_valid(struct its_cmd_block *cmd, int valid) static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr) { - its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 50, 16); + its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16); } static void its_encode_collection(struct its_cmd_block *cmd, u16 col) @@ -362,7 +362,7 @@ static void its_encode_its_list(struct its_cmd_block *cmd, u16 its_list) static void its_encode_vpt_addr(struct its_cmd_block *cmd, u64 vpt_pa) { - its_mask_encode(&cmd->raw_cmd[3], vpt_pa >> 16, 50, 16); + its_mask_encode(&cmd->raw_cmd[3], vpt_pa >> 16, 51, 16); } static void its_encode_vpt_size(struct its_cmd_block *cmd, u8 vpt_size) @@ -1482,9 +1482,9 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, u64 val = its_read_baser(its, baser); u64 esz = GITS_BASER_ENTRY_SIZE(val); u64 type = GITS_BASER_TYPE(val); + u64 baser_phys, tmp; u32 alloc_pages; void *base; - u64 tmp; retry_alloc_baser: alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz); @@ -1500,8 +1500,24 @@ retry_alloc_baser: if (!base) return -ENOMEM; + baser_phys = virt_to_phys(base); + + /* Check if the physical address of the memory is above 48bits */ + if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && (baser_phys >> 48)) { + + /* 52bit PA is supported only when PageSize=64K */ + if (psz != SZ_64K) { + pr_err("ITS: no 52bit PA support when psz=%d\n", psz); + free_pages((unsigned long)base, order); + return -ENXIO; + } + + /* Convert 52bit PA to 48bit field */ + baser_phys = GITS_BASER_PHYS_52_to_48(baser_phys); + } + retry_baser: - val = (virt_to_phys(base) | + val = (baser_phys | (type << GITS_BASER_TYPE_SHIFT) | ((esz - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) | ((alloc_pages - 1) << GITS_BASER_PAGES_SHIFT) | diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 1ea576c..14b74f2 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -372,6 +372,8 @@ #define GITS_BASER_ENTRY_SIZE_SHIFT (48) #define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1) #define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48) +#define GITS_BASER_PHYS_52_to_48(phys) \ + (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12) #define GITS_BASER_SHAREABILITY_SHIFT (10) #define GITS_BASER_InnerShareable \ GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) -- cgit v1.1 From 20608924cc2e6bdeaf6f58ccbe9ddfe12dbfa082 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 4 Oct 2017 14:26:26 +0200 Subject: genirq: generic chip: Add irq_gc_mask_disable_and_ack_set() The irq_gc_mask_disable_reg_and_ack() function name implies that it provides the combined functions of irq_gc_mask_disable_reg() and irq_gc_ack(). However, the implementation does not actually do that since it writes the mask instead of the disable register. It also does not maintain the mask cache which makes it inappropriate to use with other masking functions. In addition, commit 659fb32d1b67 ("genirq: replace irq_gc_ack() with {set,clr}_bit variants (fwd)") effectively renamed irq_gc_ack() to irq_gc_ack_set_bit() so this function probably should have also been renamed at that time. The generic chip code currently provides three functions for use with the irq_mask member of the irq_chip structure and two functions for use with the irq_ack member of the irq_chip structure. These functions could be combined into six functions for use with the irq_mask_ack member of the irq_chip structure. However, since only one of the combinations is currently used, only the function irq_gc_mask_disable_and_ack_set() is added by this commit. The '_reg' and '_bit' portions of the base function name were left out of the new combined function name in an attempt to keep the function name length manageable with the 80 character source code line length while still allowing the distinct aspects of each combination to be captured by the name. If other combinations are desired in the future please add them to the irq generic chip library at that time. Signed-off-by: Doug Berger Signed-off-by: Marc Zyngier --- include/linux/irq.h | 1 + kernel/irq/generic-chip.c | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/include/linux/irq.h b/include/linux/irq.h index d4728bf..494d328 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1010,6 +1010,7 @@ void irq_gc_unmask_enable_reg(struct irq_data *d); void irq_gc_ack_set_bit(struct irq_data *d); void irq_gc_ack_clr_bit(struct irq_data *d); void irq_gc_mask_disable_reg_and_ack(struct irq_data *d); +void irq_gc_mask_disable_and_ack_set(struct irq_data *d); void irq_gc_eoi(struct irq_data *d); int irq_gc_set_wake(struct irq_data *d, unsigned int on); diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 5270a54..ec5fe9a 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -151,6 +151,31 @@ void irq_gc_mask_disable_reg_and_ack(struct irq_data *d) } /** + * irq_gc_mask_disable_and_ack_set - Mask and ack pending interrupt + * @d: irq_data + * + * This generic implementation of the irq_mask_ack method is for chips + * with separate enable/disable registers instead of a single mask + * register and where a pending interrupt is acknowledged by setting a + * bit. + * + * Note: This is the only permutation currently used. Similar generic + * functions should be added here if other permutations are required. + */ +void irq_gc_mask_disable_and_ack_set(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); + u32 mask = d->mask; + + irq_gc_lock(gc); + irq_reg_writel(gc, mask, ct->regs.disable); + *ct->mask_cache &= ~mask; + irq_reg_writel(gc, mask, ct->regs.ack); + irq_gc_unlock(gc); +} + +/** * irq_gc_eoi - EOI interrupt * @d: irq_data */ -- cgit v1.1 From 16150904d8ba7b93b51d97bcfc671951b7f3dc02 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 4 Oct 2017 14:27:20 +0200 Subject: irqchip/tango: Use irq_gc_mask_disable_and_ack_set The only usage of the irq_gc_mask_disable_reg_and_ack() function is by the Tango irqchip driver. This usage is replaced by the irq_gc_mask_disable_and_ack_set() function since it provides the intended functionality. Fixes: 4bba66899ac6 ("irqchip/tango: Add support for Sigma Designs SMP86xx/SMP87xx interrupt controller") Acked-by: Mans Rullgard Acked-by: Marc Gonzalez Signed-off-by: Florian Fainelli Signed-off-by: Doug Berger Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-tango.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-tango.c b/drivers/irqchip/irq-tango.c index bdbb5c0..0c08530 100644 --- a/drivers/irqchip/irq-tango.c +++ b/drivers/irqchip/irq-tango.c @@ -141,7 +141,7 @@ static void __init tangox_irq_init_chip(struct irq_chip_generic *gc, for (i = 0; i < 2; i++) { ct[i].chip.irq_ack = irq_gc_ack_set_bit; ct[i].chip.irq_mask = irq_gc_mask_disable_reg; - ct[i].chip.irq_mask_ack = irq_gc_mask_disable_reg_and_ack; + ct[i].chip.irq_mask_ack = irq_gc_mask_disable_and_ack_set; ct[i].chip.irq_unmask = irq_gc_unmask_enable_reg; ct[i].chip.irq_set_type = tangox_irq_set_type; ct[i].chip.name = gc->domain->name; -- cgit v1.1 From 0d08af35f16a0cc418ad2afde3bc5f70ace82705 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 4 Oct 2017 14:28:17 +0200 Subject: genirq: generic chip: remove irq_gc_mask_disable_reg_and_ack() Any usage of the irq_gc_mask_disable_reg_and_ack() function has been replaced with the desired functionality. The incorrect and ambiguously named function is removed here to prevent accidental misuse. Signed-off-by: Doug Berger Signed-off-by: Marc Zyngier --- include/linux/irq.h | 1 - kernel/irq/generic-chip.c | 16 ---------------- 2 files changed, 17 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index 494d328..5ad1094 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1009,7 +1009,6 @@ void irq_gc_mask_clr_bit(struct irq_data *d); void irq_gc_unmask_enable_reg(struct irq_data *d); void irq_gc_ack_set_bit(struct irq_data *d); void irq_gc_ack_clr_bit(struct irq_data *d); -void irq_gc_mask_disable_reg_and_ack(struct irq_data *d); void irq_gc_mask_disable_and_ack_set(struct irq_data *d); void irq_gc_eoi(struct irq_data *d); int irq_gc_set_wake(struct irq_data *d, unsigned int on); diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index ec5fe9a..c26c5bb 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -135,22 +135,6 @@ void irq_gc_ack_clr_bit(struct irq_data *d) } /** - * irq_gc_mask_disable_reg_and_ack - Mask and ack pending interrupt - * @d: irq_data - */ -void irq_gc_mask_disable_reg_and_ack(struct irq_data *d) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); - struct irq_chip_type *ct = irq_data_get_chip_type(d); - u32 mask = d->mask; - - irq_gc_lock(gc); - irq_reg_writel(gc, mask, ct->regs.mask); - irq_reg_writel(gc, mask, ct->regs.ack); - irq_gc_unlock(gc); -} - -/** * irq_gc_mask_disable_and_ack_set - Mask and ack pending interrupt * @d: irq_data * -- cgit v1.1 From 70b01dfd765dd2196d51f33a49df23954416f34a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 13 Oct 2017 10:37:28 +0200 Subject: perf hists: Fix crash in perf_hpp__reset_output_field() Du Changbin reported crash [1] when calling perf_hpp__reset_output_field() after unregistering field via perf_hpp__column_unregister(). This ends up in calling following list_del* sequence on the same format: perf_hpp__column_unregister: list_del(&format->list); perf_hpp__reset_output_field: list_del_init(&fmt->list); where the later list_del_init might touch already freed formats. Fixing this by replacing list_del() with list_del_init() in perf_hpp__column_unregister(). [1] http://marc.info/?l=linux-kernel&m=149059595826019&w=2 Reported-by: Changbin Du Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/20171013083736.15037-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index ddb2c6f..6ee6b36 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -532,7 +532,7 @@ void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list, void perf_hpp__column_unregister(struct perf_hpp_fmt *format) { - list_del(&format->list); + list_del_init(&format->list); } void perf_hpp__cancel_cumulate(void) -- cgit v1.1 From d0e35234f647631ddfa5fa8c8ec66c9bc698f0ab Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 13 Oct 2017 10:37:29 +0200 Subject: perf hists: Add extra integrity checks to fmt_free() Make sure the struct perf_hpp_fmt is properly unhooked before we free it. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Changbin Du Cc: David Ahern Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/20171013083736.15037-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 6ee6b36..db79017 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -606,6 +606,13 @@ next: static void fmt_free(struct perf_hpp_fmt *fmt) { + /* + * At this point fmt should be completely + * unhooked, if not it's a bug. + */ + BUG_ON(!list_empty(&fmt->list)); + BUG_ON(!list_empty(&fmt->sort_list)); + if (fmt->free) fmt->free(fmt); } -- cgit v1.1 From 29479bfe83bafb8aa37f36ca132ee8349d11da0c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 13 Oct 2017 10:37:35 +0200 Subject: perf tools: Check wether the eBPF file exists in event parsing Adding the check wether the eBPF file exists, to consider it as eBPF input file. This way we can differentiate eBPF events from events that end up with same suffix as eBPF file. Before: $ perf stat -e 'cpu/uops_executed.core/' true bpf: builtin compilation failed: -95, try external compiler WARNING: unable to get correct kernel building directory. Hint: Set correct kbuild directory using 'kbuild-dir' option in [llvm] section of ~/.perfconfig or set it to "" to suppress kbuild detection. event syntax error: 'cpu/uops_executed.core/' \___ Failed to load cpu/uops_executed.c from source: 'version' section incorrect or lost After: $ perf stat -e 'cpu/uops_executed.core/' true Performance counter stats for 'true': 181,533 cpu/uops_executed.core/:u 0.002795447 seconds time elapsed If user makes type in the eBPF file, we prioritize the event syntax and show following warning: $ perf stat -e 'krava.c//' true event syntax error: 'krava.c//' \___ Cannot find PMU `krava.c'. Missing kernel support? Reported-and-Tested-by: Andi Kleen Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Changbin Du Cc: David Ahern Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/20171013083736.15037-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.l | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index c42edea..dcfdafd 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -8,6 +8,9 @@ %{ #include +#include +#include +#include #include "../perf.h" #include "parse-events.h" #include "parse-events-bison.h" @@ -53,9 +56,8 @@ static int str(yyscan_t scanner, int token) return token; } -static bool isbpf(yyscan_t scanner) +static bool isbpf_suffix(char *text) { - char *text = parse_events_get_text(scanner); int len = strlen(text); if (len < 2) @@ -68,6 +70,17 @@ static bool isbpf(yyscan_t scanner) return false; } +static bool isbpf(yyscan_t scanner) +{ + char *text = parse_events_get_text(scanner); + struct stat st; + + if (!isbpf_suffix(text)) + return false; + + return stat(text, &st) == 0; +} + /* * This function is called when the parser gets two kind of input: * -- cgit v1.1 From ac64115a66c18c01745bbd3c47a36b124e5fd8c0 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 14 Sep 2017 23:56:25 +0200 Subject: KVM: PPC: Fix oops when checking KVM_CAP_PPC_HTM The following program causes a kernel oops: #include #include #include #include #include main() { int fd = open("/dev/kvm", O_RDWR); ioctl(fd, KVM_CHECK_EXTENSION, KVM_CAP_PPC_HTM); } This happens because when using the global KVM fd with KVM_CHECK_EXTENSION, kvm_vm_ioctl_check_extension() gets called with a NULL kvm argument, which gets dereferenced in is_kvmppc_hv_enabled(). Spotted while reading the code. Let's use the hv_enabled fallback variable, like everywhere else in this function. Fixes: 23528bb21ee2 ("KVM: PPC: Introduce KVM_CAP_PPC_HTM") Cc: stable@vger.kernel.org # v4.7+ Signed-off-by: Greg Kurz Reviewed-by: David Gibson Reviewed-by: Thomas Huth Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/powerpc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3480faa..ee279c7 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -644,8 +644,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; #endif case KVM_CAP_PPC_HTM: - r = cpu_has_feature(CPU_FTR_TM_COMP) && - is_kvmppc_hv_enabled(kvm); + r = cpu_has_feature(CPU_FTR_TM_COMP) && hv_enabled; break; default: r = 0; -- cgit v1.1 From 2cde3716321ec64a1faeaf567bd94100c7b4160f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 10 Oct 2017 20:18:28 +1000 Subject: KVM: PPC: Book3S HV: POWER9 more doorbell fixes - Add another case where msgsync is required. - Required barrier sequence for global doorbells is msgsync ; lwsync When msgsnd is used for IPIs to other cores, msgsync must be executed by the target to order stores performed on the source before its msgsnd (provided the source executes the appropriate sync). Fixes: 1704a81ccebc ("KVM: PPC: Book3S HV: Use msgsnd for IPIs to other cores on POWER9") Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index ec69fa4..c700bed 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1310,6 +1310,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) bne 3f BEGIN_FTR_SECTION PPC_MSGSYNC + lwsync END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lbz r0, HSTATE_HOST_IPI(r13) cmpwi r0, 0 @@ -2788,6 +2789,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) PPC_MSGCLR(6) /* see if it's a host IPI */ li r3, 1 +BEGIN_FTR_SECTION + PPC_MSGSYNC + lwsync +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lbz r0, HSTATE_HOST_IPI(r13) cmpwi r0, 0 bnelr -- cgit v1.1 From 8f6a9f0d0604817f7c8d4376fd51718f1bf192ee Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 11 Oct 2017 16:00:34 +1100 Subject: KVM: PPC: Book3S: Protect kvmppc_gpa_to_ua() with SRCU kvmppc_gpa_to_ua() accesses KVM memory slot array via srcu_dereference_check() and this produces warnings from RCU like below. This extends the existing srcu_read_lock/unlock to cover that kvmppc_gpa_to_ua() as well. We did not hit this before as this lock is not needed for the realmode handlers and hash guests would use the realmode path all the time; however the radix guests are always redirected to the virtual mode handlers and hence the warning. [ 68.253798] ./include/linux/kvm_host.h:575 suspicious rcu_dereference_check() usage! [ 68.253799] other info that might help us debug this: [ 68.253802] rcu_scheduler_active = 2, debug_locks = 1 [ 68.253804] 1 lock held by qemu-system-ppc/6413: [ 68.253806] #0: (&vcpu->mutex){+.+.}, at: [] vcpu_load+0x3c/0xc0 [kvm] [ 68.253826] stack backtrace: [ 68.253830] CPU: 92 PID: 6413 Comm: qemu-system-ppc Tainted: G W 4.14.0-rc3-00553-g432dcba58e9c-dirty #72 [ 68.253833] Call Trace: [ 68.253839] [c000000fd3d9f790] [c000000000b7fcc8] dump_stack+0xe8/0x160 (unreliable) [ 68.253845] [c000000fd3d9f7d0] [c0000000001924c0] lockdep_rcu_suspicious+0x110/0x180 [ 68.253851] [c000000fd3d9f850] [c0000000000e825c] kvmppc_gpa_to_ua+0x26c/0x2b0 [ 68.253858] [c000000fd3d9f8b0] [c00800000e3e1984] kvmppc_h_put_tce+0x12c/0x2a0 [kvm] Fixes: 121f80ba68f1 ("KVM: PPC: VFIO: Add in-kernel acceleration for VFIO") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_vio.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 8f2da8b..4dffa61 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -478,28 +478,30 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, return ret; dir = iommu_tce_direction(tce); + + idx = srcu_read_lock(&vcpu->kvm->srcu); + if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, - tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) - return H_PARAMETER; + tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) { + ret = H_PARAMETER; + goto unlock_exit; + } entry = ioba >> stt->page_shift; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { - if (dir == DMA_NONE) { + if (dir == DMA_NONE) ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stit->tbl, entry); - } else { - idx = srcu_read_lock(&vcpu->kvm->srcu); + else ret = kvmppc_tce_iommu_map(vcpu->kvm, stit->tbl, entry, ua, dir); - srcu_read_unlock(&vcpu->kvm->srcu, idx); - } if (ret == H_SUCCESS) continue; if (ret == H_TOO_HARD) - return ret; + goto unlock_exit; WARN_ON_ONCE(1); kvmppc_clear_tce(stit->tbl, entry); @@ -507,7 +509,10 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, kvmppc_tce_put(stt, entry, tce); - return H_SUCCESS; +unlock_exit: + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + return ret; } EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); -- cgit v1.1 From ca4c302398963c0cae29bc168e44cf91e40ff0d3 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Wed, 11 Oct 2017 14:21:14 +0300 Subject: iio: adc: at91-sama5d2_adc: fix probe error on missing trigger property This fix allows platforms to probe correctly even if the trigger edge property is missing. The hardware trigger will no longer be registered in the sybsystem Preserves backwards compatibility with the support that was in the driver before the hardware trigger. https://storage.kernelci.org/mainline/master/v4.14-rc2-255-g74d83ec2b734/arm/sama5_defconfig/lab-free-electrons/boot-at91-sama5d2_xplained.txt Signed-off-by: Eugen Hristev Fixes: 5e1a1da0f ("iio: adc: at91-sama5d2_adc: add hw trigger and buffer support") Signed-off-by: Jonathan Cameron --- drivers/iio/adc/at91-sama5d2_adc.c | 45 ++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index bc5b38e..a70ef7f 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -225,6 +225,7 @@ struct at91_adc_trigger { char *name; unsigned int trgmod_value; unsigned int edge_type; + bool hw_trig; }; struct at91_adc_state { @@ -254,16 +255,25 @@ static const struct at91_adc_trigger at91_adc_trigger_list[] = { .name = "external_rising", .trgmod_value = AT91_SAMA5D2_TRGR_TRGMOD_EXT_TRIG_RISE, .edge_type = IRQ_TYPE_EDGE_RISING, + .hw_trig = true, }, { .name = "external_falling", .trgmod_value = AT91_SAMA5D2_TRGR_TRGMOD_EXT_TRIG_FALL, .edge_type = IRQ_TYPE_EDGE_FALLING, + .hw_trig = true, }, { .name = "external_any", .trgmod_value = AT91_SAMA5D2_TRGR_TRGMOD_EXT_TRIG_ANY, .edge_type = IRQ_TYPE_EDGE_BOTH, + .hw_trig = true, + }, + { + .name = "software", + .trgmod_value = AT91_SAMA5D2_TRGR_TRGMOD_NO_TRIGGER, + .edge_type = IRQ_TYPE_NONE, + .hw_trig = false, }, }; @@ -597,7 +607,7 @@ static int at91_adc_probe(struct platform_device *pdev) struct at91_adc_state *st; struct resource *res; int ret, i; - u32 edge_type; + u32 edge_type = IRQ_TYPE_NONE; indio_dev = devm_iio_device_alloc(&pdev->dev, sizeof(*st)); if (!indio_dev) @@ -641,14 +651,14 @@ static int at91_adc_probe(struct platform_device *pdev) ret = of_property_read_u32(pdev->dev.of_node, "atmel,trigger-edge-type", &edge_type); if (ret) { - dev_err(&pdev->dev, - "invalid or missing value for atmel,trigger-edge-type\n"); - return ret; + dev_dbg(&pdev->dev, + "atmel,trigger-edge-type not specified, only software trigger available\n"); } st->selected_trig = NULL; - for (i = 0; i < AT91_SAMA5D2_HW_TRIG_CNT; i++) + /* find the right trigger, or no trigger at all */ + for (i = 0; i < AT91_SAMA5D2_HW_TRIG_CNT + 1; i++) if (at91_adc_trigger_list[i].edge_type == edge_type) { st->selected_trig = &at91_adc_trigger_list[i]; break; @@ -717,24 +727,27 @@ static int at91_adc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, indio_dev); - ret = at91_adc_buffer_init(indio_dev); - if (ret < 0) { - dev_err(&pdev->dev, "couldn't initialize the buffer.\n"); - goto per_clk_disable_unprepare; - } + if (st->selected_trig->hw_trig) { + ret = at91_adc_buffer_init(indio_dev); + if (ret < 0) { + dev_err(&pdev->dev, "couldn't initialize the buffer.\n"); + goto per_clk_disable_unprepare; + } - ret = at91_adc_trigger_init(indio_dev); - if (ret < 0) { - dev_err(&pdev->dev, "couldn't setup the triggers.\n"); - goto per_clk_disable_unprepare; + ret = at91_adc_trigger_init(indio_dev); + if (ret < 0) { + dev_err(&pdev->dev, "couldn't setup the triggers.\n"); + goto per_clk_disable_unprepare; + } } ret = iio_device_register(indio_dev); if (ret < 0) goto per_clk_disable_unprepare; - dev_info(&pdev->dev, "setting up trigger as %s\n", - st->selected_trig->name); + if (st->selected_trig->hw_trig) + dev_info(&pdev->dev, "setting up trigger as %s\n", + st->selected_trig->name); dev_info(&pdev->dev, "version: %x\n", readl_relaxed(st->base + AT91_SAMA5D2_VERSION)); -- cgit v1.1 From ad98dd1a75ac6a8b68cd2f7bf4676b65734f2a43 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 16 Oct 2017 08:37:54 +1100 Subject: KVM: PPC: Book3S HV: Add more barriers in XIVE load/unload code On POWER9 systems, we push the VCPU context onto the XIVE (eXternal Interrupt Virtualization Engine) hardware when entering a guest, and pull the context off the XIVE when exiting the guest. The push is done with cache-inhibited stores, and the pull with cache-inhibited loads. Testing has revealed that it is possible (though very rare) for the stores to get reordered with the loads so that we end up with the guest VCPU context still loaded on the XIVE after we have exited the guest. When that happens, it is possible for the same VCPU context to then get loaded on another CPU, which causes the machine to checkstop. To fix this, we add I/O barrier instructions (eieio) before and after the push and pull operations. As partial compensation for the potential slowdown caused by the extra barriers, we remove the eieio instructions between the two stores in the push operation, and between the two loads in the pull operation. (The architecture requires loads to cache-inhibited, guarded storage to be kept in order, and requires stores to cache-inhibited, guarded storage likewise to be kept in order, but allows such loads and stores to be reordered with respect to each other.) Reported-by: Carol L Soto Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c700bed..42639fb 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -989,13 +989,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) beq no_xive ld r11, VCPU_XIVE_SAVED_STATE(r4) li r9, TM_QW1_OS - stdcix r11,r9,r10 eieio + stdcix r11,r9,r10 lwz r11, VCPU_XIVE_CAM_WORD(r4) li r9, TM_QW1_OS + TM_WORD2 stwcix r11,r9,r10 li r9, 1 stw r9, VCPU_XIVE_PUSHED(r4) + eieio no_xive: #endif /* CONFIG_KVM_XICS */ @@ -1401,8 +1402,8 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ cmpldi cr0, r10, 0 beq 1f /* First load to pull the context, we ignore the value */ - lwzx r11, r7, r10 eieio + lwzx r11, r7, r10 /* Second load to recover the context state (Words 0 and 1) */ ldx r11, r6, r10 b 3f @@ -1410,8 +1411,8 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ cmpldi cr0, r10, 0 beq 1f /* First load to pull the context, we ignore the value */ - lwzcix r11, r7, r10 eieio + lwzcix r11, r7, r10 /* Second load to recover the context state (Words 0 and 1) */ ldcix r11, r6, r10 3: std r11, VCPU_XIVE_SAVED_STATE(r9) @@ -1421,6 +1422,7 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ stw r10, VCPU_XIVE_PUSHED(r9) stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9) stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9) + eieio 1: #endif /* CONFIG_KVM_XICS */ /* Save more register state */ -- cgit v1.1 From e6fc454b7794fc45c27364c7896b8f03094635ee Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 15 Oct 2017 17:02:03 +0100 Subject: x86/cpu/intel_cacheinfo: Remove redundant assignment to 'this_leaf' The 'this_leaf' variable is assigned a value that is never read and it is updated a little later with a newer value, hence we can remove the redundant assignment. Cleans up the following Clang warning: Value stored to 'this_leaf' is never read Signed-off-by: Colin Ian King Reviewed-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-janitors@vger.kernel.org Link: http://lkml.kernel.org/r/20171015160203.12332-1-colin.king@canonical.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 24f74932..9990a71 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -831,7 +831,6 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { unsigned int apicid, nshared, first, last; - this_leaf = this_cpu_ci->info_list + index; nshared = base->eax.split.num_threads_sharing + 1; apicid = cpu_data(cpu).apicid; first = apicid - (apicid % nshared); -- cgit v1.1 From 31dc3f819bac28a0990b36510197560258ab7421 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 12 Oct 2017 14:50:46 +0200 Subject: USB: serial: metro-usb: add MS7820 device id Add device-id entry for (Honeywell) Metrologic MS7820 bar code scanner. The device has two interfaces (in this mode?); a vendor-specific interface with two interrupt endpoints and a second HID interface, which we do not bind to. Reported-by: Ladislav Dobrovsky Tested-by: Ladislav Dobrovsky Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/metro-usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/metro-usb.c b/drivers/usb/serial/metro-usb.c index cc84da8..14511d6 100644 --- a/drivers/usb/serial/metro-usb.c +++ b/drivers/usb/serial/metro-usb.c @@ -45,6 +45,7 @@ struct metrousb_private { static const struct usb_device_id id_table[] = { { USB_DEVICE(FOCUS_VENDOR_ID, FOCUS_PRODUCT_ID_BI) }, { USB_DEVICE(FOCUS_VENDOR_ID, FOCUS_PRODUCT_ID_UNI) }, + { USB_DEVICE_INTERFACE_CLASS(0x0c2e, 0x0730, 0xff) }, /* MS7820 */ { }, /* Terminating entry. */ }; MODULE_DEVICE_TABLE(usb, id_table); -- cgit v1.1 From 4c625a974fb81724e60966b677e47fcba782c950 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 13 Oct 2017 14:08:55 -0400 Subject: SUNRPC: fix a list corruption issue in xprt_release() We remove the request from the receive list before we call xprt_wait_on_pinned_rqst(), and so we need to use list_del_init(). Otherwise, we will see list corruption when xprt_complete_rqst() is called. Reported-by: Emre Celebi Fixes: ce7c252a8c741 ("SUNRPC: Add a separate spinlock to protect...") Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e741ec2..1a39ad1 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1333,7 +1333,7 @@ void xprt_release(struct rpc_task *task) rpc_count_iostats(task, task->tk_client->cl_metrics); spin_lock(&xprt->recv_lock); if (!list_empty(&req->rq_list)) { - list_del(&req->rq_list); + list_del_init(&req->rq_list); xprt_wait_on_pinned_rqst(req); } spin_unlock(&xprt->recv_lock); -- cgit v1.1 From 8bc9481f43103da77d75a532d9eae55790b9eea6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Oct 2017 10:47:39 -0300 Subject: perf tools: Add long time reviewers to MAINTAINERS Jiri and Namhyung have long contributed a lot of code and time reviewing patches to tools/, so lets make that reflected in the MAINTAINERS file to encourage patch submitters to add them to the CC list, speeding up the process of tools/perf/ patch processing. Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-onicopw68bg6kn56lnybfpns@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index cc42c83..f205662 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10559,6 +10559,8 @@ M: Peter Zijlstra M: Ingo Molnar M: Arnaldo Carvalho de Melo R: Alexander Shishkin +R: Jiri Olsa +R: Namhyung Kim L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core S: Supported -- cgit v1.1 From 9c48c0965b97e14ddcf75490a754e84e05aaa062 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Oct 2017 12:12:16 +0200 Subject: x86/idt: Initialize early IDT before cr4_init_shadow() Moving the early IDT setup out of assembly code breaks the boot on first generation 486 systems. The reason is that the call of idt_setup_early_handler, which sets up the early handlers was added after the call to cr4_init_shadow(). cr4_init_shadow() tries to read CR4 which is not available on those systems. The accessor function uses a extable fixup to handle the resulting fault. As the IDT is not set up yet, the cr4 read exception causes an instantaneous reboot for obvious reasons. Call idt_setup_early_handler() before cr4_init_shadow() so IDT is set up before the first exception hits. Fixes: 87e81786b13b ("x86/idt: Move early IDT setup out of 32-bit asm") Reported-and-tested-by: Matthew Whitehead Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1710161210290.1973@nanos --- arch/x86/kernel/head32.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index cf2ce06..2902ca4 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -30,10 +30,11 @@ static void __init i386_default_early_setup(void) asmlinkage __visible void __init i386_start_kernel(void) { - cr4_init_shadow(); - + /* Make sure IDT is set up before any exception happens */ idt_setup_early_handler(); + cr4_init_shadow(); + sanitize_boot_params(&boot_params); x86_early_init_platform_quirks(); -- cgit v1.1 From 226584aedd94acd61ffa51fb69bcf6b3309a7b8f Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 16 Oct 2017 12:27:58 +0200 Subject: spi: fix IDR collision on systems with both fixed and dynamic SPI bus numbers On systems where some controllers get a dynamic ID assigned and some have a fixed number from DT, the current implemention might run into an IDR collision if the dynamic controllers gets probed first and get an IDR number, which is later requested by the controller with the fixed numbering. When this happens the fixed controller will fail to register with the SPI core. Fix this by skipping all known alias numbers when assigning the dynamic IDs. Fixes: 9b61e302210e (spi: Pick spi bus number from Linux idr or spi alias) Signed-off-by: Lucas Stach Signed-off-by: Mark Brown --- drivers/spi/spi.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 6e65524..e8b5a5e 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -45,7 +45,6 @@ #define CREATE_TRACE_POINTS #include -#define SPI_DYN_FIRST_BUS_NUM 0 static DEFINE_IDR(spi_master_idr); @@ -2086,7 +2085,7 @@ int spi_register_controller(struct spi_controller *ctlr) struct device *dev = ctlr->dev.parent; struct boardinfo *bi; int status = -ENODEV; - int id; + int id, first_dynamic; if (!dev) return -ENODEV; @@ -2116,9 +2115,15 @@ int spi_register_controller(struct spi_controller *ctlr) } } if (ctlr->bus_num < 0) { + first_dynamic = of_alias_get_highest_id("spi"); + if (first_dynamic < 0) + first_dynamic = 0; + else + first_dynamic++; + mutex_lock(&board_lock); - id = idr_alloc(&spi_master_idr, ctlr, SPI_DYN_FIRST_BUS_NUM, 0, - GFP_KERNEL); + id = idr_alloc(&spi_master_idr, ctlr, first_dynamic, + 0, GFP_KERNEL); mutex_unlock(&board_lock); if (WARN(id < 0, "couldn't get idr")) return id; -- cgit v1.1 From ab31fd0ce65ec93828b617123792c1bb7c6dcc42 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 13 Oct 2017 15:40:07 +0200 Subject: scsi: zfcp: fix erp_action use-before-initialize in REC action trace v4.10 commit 6f2ce1c6af37 ("scsi: zfcp: fix rport unblock race with LUN recovery") extended accessing parent pointer fields of struct zfcp_erp_action for tracing. If an erp_action has never been enqueued before, these parent pointer fields are uninitialized and NULL. Examples are zfcp objects freshly added to the parent object's children list, before enqueueing their first recovery subsequently. In zfcp_erp_try_rport_unblock(), we iterate such list. Accessing erp_action fields can cause a NULL pointer dereference. Since the kernel can read from lowcore on s390, it does not immediately cause a kernel page fault. Instead it can cause hangs on trying to acquire the wrong erp_action->adapter->dbf->rec_lock in zfcp_dbf_rec_action_lvl() ^bogus^ while holding already other locks with IRQs disabled. Real life example from attaching lots of LUNs in parallel on many CPUs: crash> bt 17723 PID: 17723 TASK: ... CPU: 25 COMMAND: "zfcperp0.0.1800" LOWCORE INFO: -psw : 0x0404300180000000 0x000000000038e424 -function : _raw_spin_lock_wait_flags at 38e424 ... #0 [fdde8fc90] zfcp_dbf_rec_action_lvl at 3e0004e9862 [zfcp] #1 [fdde8fce8] zfcp_erp_try_rport_unblock at 3e0004dfddc [zfcp] #2 [fdde8fd38] zfcp_erp_strategy at 3e0004e0234 [zfcp] #3 [fdde8fda8] zfcp_erp_thread at 3e0004e0a12 [zfcp] #4 [fdde8fe60] kthread at 173550 #5 [fdde8feb8] kernel_thread_starter at 10add2 zfcp_adapter zfcp_port zfcp_unit
, 0x404040d600000000 scsi_device NULL, returning early! zfcp_scsi_dev.status = 0x40000000 0x40000000 ZFCP_STATUS_COMMON_RUNNING crash> zfcp_unit
struct zfcp_unit { erp_action = { adapter = 0x0, port = 0x0, unit = 0x0, }, } zfcp_erp_action is always fully embedded into its container object. Such container object is never moved in its object tree (only add or delete). Hence, erp_action parent pointers can never change. To fix the issue, initialize the erp_action parent pointers before adding the erp_action container to any list and thus before it becomes accessible from outside of its initializing function. In order to also close the time window between zfcp_erp_setup_act() memsetting the entire erp_action to zero and setting the parent pointers again, drop the memset and instead explicitly initialize individually all erp_action fields except for parent pointers. To be extra careful not to introduce any other unintended side effect, even keep zeroing the erp_action fields for list and timer. Also double-check with WARN_ON_ONCE that erp_action parent pointers never change, so we get to know when we would deviate from previous behavior. Signed-off-by: Steffen Maier Fixes: 6f2ce1c6af37 ("scsi: zfcp: fix rport unblock race with LUN recovery") Cc: #2.6.32+ Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_aux.c | 5 +++++ drivers/s390/scsi/zfcp_erp.c | 18 +++++++++++------- drivers/s390/scsi/zfcp_scsi.c | 5 +++++ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index 82ac331..8475215 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -357,6 +357,8 @@ struct zfcp_adapter *zfcp_adapter_enqueue(struct ccw_device *ccw_device) adapter->next_port_scan = jiffies; + adapter->erp_action.adapter = adapter; + if (zfcp_qdio_setup(adapter)) goto failed; @@ -513,6 +515,9 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn, port->dev.groups = zfcp_port_attr_groups; port->dev.release = zfcp_port_release; + port->erp_action.adapter = adapter; + port->erp_action.port = port; + if (dev_set_name(&port->dev, "0x%016llx", (unsigned long long)wwpn)) { kfree(port); goto err_out; diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 37408f5..ec2532e 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -193,9 +193,8 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status, atomic_or(ZFCP_STATUS_COMMON_ERP_INUSE, &zfcp_sdev->status); erp_action = &zfcp_sdev->erp_action; - memset(erp_action, 0, sizeof(struct zfcp_erp_action)); - erp_action->port = port; - erp_action->sdev = sdev; + WARN_ON_ONCE(erp_action->port != port); + WARN_ON_ONCE(erp_action->sdev != sdev); if (!(atomic_read(&zfcp_sdev->status) & ZFCP_STATUS_COMMON_RUNNING)) act_status |= ZFCP_STATUS_ERP_CLOSE_ONLY; @@ -208,8 +207,8 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status, zfcp_erp_action_dismiss_port(port); atomic_or(ZFCP_STATUS_COMMON_ERP_INUSE, &port->status); erp_action = &port->erp_action; - memset(erp_action, 0, sizeof(struct zfcp_erp_action)); - erp_action->port = port; + WARN_ON_ONCE(erp_action->port != port); + WARN_ON_ONCE(erp_action->sdev != NULL); if (!(atomic_read(&port->status) & ZFCP_STATUS_COMMON_RUNNING)) act_status |= ZFCP_STATUS_ERP_CLOSE_ONLY; break; @@ -219,7 +218,8 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status, zfcp_erp_action_dismiss_adapter(adapter); atomic_or(ZFCP_STATUS_COMMON_ERP_INUSE, &adapter->status); erp_action = &adapter->erp_action; - memset(erp_action, 0, sizeof(struct zfcp_erp_action)); + WARN_ON_ONCE(erp_action->port != NULL); + WARN_ON_ONCE(erp_action->sdev != NULL); if (!(atomic_read(&adapter->status) & ZFCP_STATUS_COMMON_RUNNING)) act_status |= ZFCP_STATUS_ERP_CLOSE_ONLY; @@ -229,7 +229,11 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status, return NULL; } - erp_action->adapter = adapter; + WARN_ON_ONCE(erp_action->adapter != adapter); + memset(&erp_action->list, 0, sizeof(erp_action->list)); + memset(&erp_action->timer, 0, sizeof(erp_action->timer)); + erp_action->step = ZFCP_ERP_STEP_UNINITIALIZED; + erp_action->fsf_req_id = 0; erp_action->action = need; erp_action->status = act_status; diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index ec3ddd1..6cf8732 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -115,10 +115,15 @@ static int zfcp_scsi_slave_alloc(struct scsi_device *sdev) struct zfcp_unit *unit; int npiv = adapter->connection_features & FSF_FEATURE_NPIV_MODE; + zfcp_sdev->erp_action.adapter = adapter; + zfcp_sdev->erp_action.sdev = sdev; + port = zfcp_get_port_by_wwpn(adapter, rport->port_name); if (!port) return -ENXIO; + zfcp_sdev->erp_action.port = port; + unit = zfcp_unit_find(port, zfcp_scsi_dev_lun(sdev)); if (unit) put_device(&unit->dev); -- cgit v1.1 From 1010f21ecf8ac43be676d498742de18fa6c20987 Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Mon, 16 Oct 2017 11:26:05 -0700 Subject: scsi: qla2xxx: Initialize Work element before requesting IRQs commit a9e170e28636 ("scsi: qla2xxx: Fix uninitialized work element") moved initializiation of work element earlier in the probe to fix call stack. However, it still leaves a window where interrupt can be generated before work element is initialized. Fix that window by initializing work element before we are requesting IRQs. [mkp: fixed typos] Fixes: a9e170e28636 ("scsi: qla2xxx: Fix uninitialized work element") Cc: # 4.13 Signed-off-by: Himanshu Madhani Signed-off-by: Quinn Tran Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 9372098..3bd956d 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3061,6 +3061,8 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) host->max_cmd_len, host->max_channel, host->max_lun, host->transportt, sht->vendor_id); + INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn); + /* Set up the irqs */ ret = qla2x00_request_irqs(ha, rsp); if (ret) @@ -3175,8 +3177,6 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) host->can_queue, base_vha->req, base_vha->mgmt_svr_loop_id, host->sg_tablesize); - INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn); - if (ha->mqenable) { bool mq = false; bool startit = false; -- cgit v1.1 From c99dfd20f295b2b8c46da5185c0889493ba1f291 Mon Sep 17 00:00:00 2001 From: Christos Gkekas Date: Mon, 16 Oct 2017 20:28:02 +0100 Subject: =?UTF-8?q?scsi:=20hpsa:=20Fix=20configured=5Flogical=5Fdrive=5Fco?= =?UTF-8?q?unt=C2=B7check?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check whether configured_logical_drive_count is less than 255. Previous check was always evaluating to true as this variable is defined as u8. Signed-off-by: Christos Gkekas Acked-by: Don Brace --- drivers/scsi/hpsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 9abe810..4ed3d26 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -4091,7 +4091,7 @@ static int hpsa_set_local_logical_count(struct ctlr_info *h, memset(id_ctlr, 0, sizeof(*id_ctlr)); rc = hpsa_bmic_id_controller(h, id_ctlr, sizeof(*id_ctlr)); if (!rc) - if (id_ctlr->configured_logical_drive_count < 256) + if (id_ctlr->configured_logical_drive_count < 255) *nlocals = id_ctlr->configured_logical_drive_count; else *nlocals = le16_to_cpu( -- cgit v1.1 From 45348de2c8a7a1e64c5be27b22c9786b4152dd41 Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Mon, 16 Oct 2017 17:22:31 -0700 Subject: scsi: aacraid: Fix controller initialization failure This is a fix to an issue where the driver sends its periodic WELLNESS command to the controller after the driver shut it down.This causes the controller to crash. The window where this can happen is small, but it can be hit at around 4 hours of constant resets. Cc: Fixes: fbd185986eba (aacraid: Fix AIF triggered IOP_RESET) Signed-off-by: Raghava Aditya Renukunta Reviewed-by: Dave Carroll Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/comminit.c | 8 +++++--- drivers/scsi/aacraid/linit.c | 7 ++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index 97d269f..1bc623a 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -302,9 +302,11 @@ int aac_send_shutdown(struct aac_dev * dev) return -ENOMEM; aac_fib_init(fibctx); - mutex_lock(&dev->ioctl_mutex); - dev->adapter_shutdown = 1; - mutex_unlock(&dev->ioctl_mutex); + if (!dev->adapter_shutdown) { + mutex_lock(&dev->ioctl_mutex); + dev->adapter_shutdown = 1; + mutex_unlock(&dev->ioctl_mutex); + } cmd = (struct aac_close *) fib_data(fibctx); cmd->command = cpu_to_le32(VM_CloseAll); diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 62beb25..c9252b1 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -1551,8 +1551,9 @@ static void __aac_shutdown(struct aac_dev * aac) { int i; + mutex_lock(&aac->ioctl_mutex); aac->adapter_shutdown = 1; - aac_send_shutdown(aac); + mutex_unlock(&aac->ioctl_mutex); if (aac->aif_thread) { int i; @@ -1565,7 +1566,11 @@ static void __aac_shutdown(struct aac_dev * aac) } kthread_stop(aac->thread); } + + aac_send_shutdown(aac); + aac_adapter_disable_int(aac); + if (aac_is_src(aac)) { if (aac->max_msix > 1) { for (i = 0; i < aac->max_msix; i++) { -- cgit v1.1 From ea7d0d69426cab6747ed311c53f4142eb48b9454 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 6 Oct 2017 17:45:27 +0300 Subject: xhci: Identify USB 3.1 capable hosts by their port protocol capability Many USB 3.1 capable hosts never updated the Serial Bus Release Number (SBRN) register to USB 3.1 from USB 3.0 xhci driver identified USB 3.1 capable hosts based on this SBRN register, which according to specs "contains the release of the Universal Serial Bus Specification with which this Universal Serial Bus Host Controller module is compliant." but still in october 2017 gives USB 3.0 as the only possible option. Make an additional check for USB 3.1 support and enable it if the xHCI supported protocol capablity lists USB 3.1 capable ports. Cc: # v4.6+ Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index ee198ea..51535ba 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4805,7 +4805,8 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) */ hcd->has_tt = 1; } else { - if (xhci->sbrn == 0x31) { + /* Some 3.1 hosts return sbrn 0x30, can't rely on sbrn alone */ + if (xhci->sbrn == 0x31 || xhci->usb3_rhub.min_rev >= 1) { xhci_info(xhci, "Host supports USB 3.1 Enhanced SuperSpeed\n"); hcd->speed = HCD_USB31; hcd->self.root_hub->speed = USB_SPEED_SUPER_PLUS; -- cgit v1.1 From d1aad52cf8b3f95dfe9b5b64da66343306ddf73b Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Fri, 6 Oct 2017 17:45:28 +0300 Subject: xhci: Cleanup current_cmd in xhci_cleanup_command_queue() KASAN reported use-after-free bug when xhci host controller died: [ 176.952537] BUG: KASAN: use-after-free in xhci_handle_command_timeout+0x68/0x224 [ 176.960846] Write of size 4 at addr ffffffc0cbb01608 by task kworker/3:3/1680 ... [ 177.180644] Freed by task 0: [ 177.183882] kasan_slab_free+0x90/0x15c [ 177.188194] kfree+0x114/0x28c [ 177.191630] xhci_cleanup_command_queue+0xc8/0xf8 [ 177.196916] xhci_hc_died+0x84/0x358 Problem here is that when the cmd_timer fired, it would try to access current_cmd while the command queue is already freed by xhci_hc_died(). Cleanup current_cmd in xhci_cleanup_command_queue() to avoid that. Fixes: d9f11ba9f107 ("xhci: Rework how we handle unresponsive or hoptlug removed hosts") Cc: # v4.12+ Signed-off-by: Jeffy Chen Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index a944365..48ae15af 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1309,6 +1309,7 @@ static void xhci_complete_del_and_free_cmd(struct xhci_command *cmd, u32 status) void xhci_cleanup_command_queue(struct xhci_hcd *xhci) { struct xhci_command *cur_cmd, *tmp_cmd; + xhci->current_cmd = NULL; list_for_each_entry_safe(cur_cmd, tmp_cmd, &xhci->cmd_list, cmd_list) xhci_complete_del_and_free_cmd(cur_cmd, COMP_COMMAND_ABORTED); } -- cgit v1.1 From 810a624bd1b64b13ddcc2eb5c1880526a750a870 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 6 Oct 2017 17:45:29 +0300 Subject: usb: xhci: Reset halted endpoint if trb is noop When a URB is cancled, xhci driver turns the untransferred trbs into no-ops. If an endpoint stalls on a no-op trb that belongs to the cancelled URB, the event handler won't reset the endpoint. Hence, it will stay halted. Link: http://marc.info/?l=linux-usb&m=149582598330127&w=2 Cc: Signed-off-by: Lu Baolu Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 48ae15af..82c746e 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2580,15 +2580,21 @@ static int handle_tx_event(struct xhci_hcd *xhci, (struct xhci_generic_trb *) ep_trb); /* - * No-op TRB should not trigger interrupts. - * If ep_trb is a no-op TRB, it means the - * corresponding TD has been cancelled. Just ignore - * the TD. + * No-op TRB could trigger interrupts in a case where + * a URB was killed and a STALL_ERROR happens right + * after the endpoint ring stopped. Reset the halted + * endpoint. Otherwise, the endpoint remains stalled + * indefinitely. */ if (trb_is_noop(ep_trb)) { - xhci_dbg(xhci, - "ep_trb is a no-op TRB. Skip it for slot %u ep %u\n", - slot_id, ep_index); + if (trb_comp_code == COMP_STALL_ERROR || + xhci_requires_manual_halt_cleanup(xhci, ep_ctx, + trb_comp_code)) + xhci_cleanup_halted_endpoint(xhci, slot_id, + ep_index, + ep_ring->stream_id, + td, ep_trb, + EP_HARD_RESET); goto cleanup; } -- cgit v1.1 From b3207c65dfafae27e7c492cb9188c0dc0eeaf3fd Mon Sep 17 00:00:00 2001 From: Mayank Rana Date: Fri, 6 Oct 2017 17:45:30 +0300 Subject: usb: xhci: Handle error condition in xhci_stop_device() xhci_stop_device() calls xhci_queue_stop_endpoint() multiple times without checking the return value. xhci_queue_stop_endpoint() can return error if the HC is already halted or unable to queue commands. This can cause a deadlock condition as xhci_stop_device() would end up waiting indefinitely for a completion for the command that didn't get queued. Fix this by checking the return value and bailing out of xhci_stop_device() in case of error. This patch happens to fix potential memory leaks of the allocated command structures as well. Fixes: c311e391a7ef ("xhci: rework command timeout and cancellation,") Cc: Signed-off-by: Mayank Rana Signed-off-by: Jack Pham Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index da9158f..a2336de 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -420,14 +420,25 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend) GFP_NOWAIT); if (!command) { spin_unlock_irqrestore(&xhci->lock, flags); - xhci_free_command(xhci, cmd); - return -ENOMEM; + ret = -ENOMEM; + goto cmd_cleanup; + } + + ret = xhci_queue_stop_endpoint(xhci, command, slot_id, + i, suspend); + if (ret) { + spin_unlock_irqrestore(&xhci->lock, flags); + xhci_free_command(xhci, command); + goto cmd_cleanup; } - xhci_queue_stop_endpoint(xhci, command, slot_id, i, - suspend); } } - xhci_queue_stop_endpoint(xhci, cmd, slot_id, 0, suspend); + ret = xhci_queue_stop_endpoint(xhci, cmd, slot_id, 0, suspend); + if (ret) { + spin_unlock_irqrestore(&xhci->lock, flags); + goto cmd_cleanup; + } + xhci_ring_cmd_db(xhci); spin_unlock_irqrestore(&xhci->lock, flags); @@ -439,6 +450,8 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend) xhci_warn(xhci, "Timeout while waiting for stop endpoint command\n"); ret = -ETIME; } + +cmd_cleanup: xhci_free_command(xhci, cmd); return ret; } -- cgit v1.1 From 845d584f41eac3475c21e4a7d5e88d0f6e410cf7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 16 Oct 2017 16:21:19 +0200 Subject: USB: devio: Revert "USB: devio: Don't corrupt user memory" Taking the uurb->buffer_length userspace passes in as a maximum for the actual urbs transfer_buffer_length causes 2 serious issues: 1) It breaks isochronous support for all userspace apps using libusb, as existing libusb versions pass in 0 for uurb->buffer_length, relying on the kernel using the lenghts of the usbdevfs_iso_packet_desc descriptors passed in added together as buffer length. This for example causes redirection of USB audio and Webcam's into virtual machines using qemu-kvm to no longer work. This is a userspace ABI break and as such must be reverted. Note that the original commit does not protect other users / the kernels memory, it only stops the userspace process making the call from shooting itself in the foot. 2) It may cause the kernel to program host controllers to DMA over random memory. Just as the devio code used to only look at the iso_packet_desc lenghts, the host drivers do the same, relying on the submitter of the urbs to make sure the entire buffer is large enough and not checking transfer_buffer_length. But the "USB: devio: Don't corrupt user memory" commit now takes the userspace provided uurb->buffer_length for the buffer-size while copying over the user-provided iso_packet_desc lengths 1:1, allowing the user to specify a small buffer size while programming the host controller to dma a lot more data. (Atleast the ohci, uhci, xhci and fhci drivers do not check transfer_buffer_length for isoc transfers.) This reverts commit fa1ed74eb1c2 ("USB: devio: Don't corrupt user memory") fixing both these issues. Cc: Dan Carpenter Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 4664e54..e9326f3 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1576,11 +1576,7 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb totlen += isopkt[u].length; } u *= sizeof(struct usb_iso_packet_descriptor); - if (totlen <= uurb->buffer_length) - uurb->buffer_length = totlen; - else - WARN_ONCE(1, "uurb->buffer_length is too short %d vs %d", - totlen, uurb->buffer_length); + uurb->buffer_length = totlen; break; default: -- cgit v1.1 From 765fb2f181cad669f2beb87842a05d8071f2be85 Mon Sep 17 00:00:00 2001 From: Maksim Salau Date: Wed, 11 Oct 2017 11:10:52 +0300 Subject: usb: cdc_acm: Add quirk for Elatec TWN3 Elatec TWN3 has the union descriptor on data interface. This results in failure to bind the device to the driver with the following log: usb 1-1.2: new full speed USB device using streamplug-ehci and address 4 usb 1-1.2: New USB device found, idVendor=09d8, idProduct=0320 usb 1-1.2: New USB device strings: Mfr=1, Product=2, SerialNumber=0 usb 1-1.2: Product: RFID Device (COM) usb 1-1.2: Manufacturer: OEM cdc_acm 1-1.2:1.0: Zero length descriptor references cdc_acm: probe of 1-1.2:1.0 failed with error -22 Adding the NO_UNION_NORMAL quirk for the device fixes the issue. `lsusb -v` of the device: Bus 001 Device 003: ID 09d8:0320 Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 2 Communications bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 32 idVendor 0x09d8 idProduct 0x0320 bcdDevice 3.00 iManufacturer 1 OEM iProduct 2 RFID Device (COM) iSerial 0 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 67 bNumInterfaces 2 bConfigurationValue 1 iConfiguration 0 bmAttributes 0x80 (Bus Powered) MaxPower 250mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 1 AT-commands (v.25ter) iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x83 EP 3 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0020 1x 32 bytes bInterval 2 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0020 1x 32 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0020 1x 32 bytes bInterval 0 CDC Header: bcdCDC 1.10 CDC Call Management: bmCapabilities 0x03 call management use DataInterface bDataInterface 1 CDC ACM: bmCapabilities 0x06 sends break line coding and serial state CDC Union: bMasterInterface 0 bSlaveInterface 1 Device Status: 0x0000 (Bus Powered) Signed-off-by: Maksim Salau Acked-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 5e056064..18c923a 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1832,6 +1832,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0xfff0, 0x0100), /* DATECS FP-2000 */ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ }, + { USB_DEVICE(0x09d8, 0x0320), /* Elatec GmbH TWN3 */ + .driver_info = NO_UNION_NORMAL, /* has misplaced union descriptor */ + }, { USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */ .driver_info = CLEAR_HALT_CONDITIONS, -- cgit v1.1 From 4f190e0b9de89c4c917c3ffb3799e9d00fc534ac Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 9 Oct 2017 22:46:07 -0500 Subject: USB: musb: fix session-bit runtime-PM quirk The current session-bit quirk implementation does not prevent the retry counter from underflowing, something which could break runtime PM and keep the device active for a very long time (about 2^32 seconds) after a disconnect. This notably breaks the B-device timeout case, but could potentially cause problems also when the controller is operating as an A-device. Fixes: 2bff3916fda9 ("usb: musb: Fix PM for hub disconnect") Cc: stable # 4.9 Cc: Tony Lindgren Signed-off-by: Johan Hovold Tested-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 0296920..07b8c71 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1861,22 +1861,22 @@ static void musb_pm_runtime_check_session(struct musb *musb) MUSB_DEVCTL_HR; switch (devctl & ~s) { case MUSB_QUIRK_B_INVALID_VBUS_91: - if (musb->quirk_retries--) { + if (musb->quirk_retries) { musb_dbg(musb, "Poll devctl on invalid vbus, assume no session"); schedule_delayed_work(&musb->irq_work, msecs_to_jiffies(1000)); - + musb->quirk_retries--; return; } /* fall through */ case MUSB_QUIRK_A_DISCONNECT_19: - if (musb->quirk_retries--) { + if (musb->quirk_retries) { musb_dbg(musb, "Poll devctl on possible host mode disconnect"); schedule_delayed_work(&musb->irq_work, msecs_to_jiffies(1000)); - + musb->quirk_retries--; return; } if (!musb->session) -- cgit v1.1 From 0c3aae9bd59978fb8c3557d7883380bef0f2cfa1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 9 Oct 2017 22:46:08 -0500 Subject: USB: musb: fix late external abort on suspend The musb delayed irq work was never flushed on suspend, something which since 4.9 can lead to an external abort if the work is scheduled after the grandparent's clock has been disabled: PM: Suspending system (mem) PM: suspend of devices complete after 125.224 msecs PM: suspend devices took 0.132 seconds PM: late suspend of devices complete after 7.423 msecs PM: noirq suspend of devices complete after 7.083 msecs suspend debug: Waiting for 5 second(s). Unhandled fault: external abort on non-linefetch (0x1008) at 0xd0262c60 ... [] (musb_default_readb) from [] (musb_irq_work+0x48/0x220) [] (musb_irq_work) from [] (process_one_work+0x1f4/0x758) [] (process_one_work) from [] (worker_thread+0x54/0x514) [] (worker_thread) from [] (kthread+0x128/0x158) [] (kthread) from [] (ret_from_fork+0x14/0x24) Commit 2bff3916fda9 ("usb: musb: Fix PM for hub disconnect") started scheduling musb_irq_work with a delay of up to a second and with retries thereby making this easy to trigger, for example, by suspending shortly after a disconnect. Note that we set a flag to prevent the irq work from rescheduling itself during suspend and instead process a disconnect immediately. This takes care of the case where we are disconnected shortly before suspending. However, when in host mode, a disconnect while suspended will still go unnoticed and thus prevent the controller from runtime suspending upon resume as the session bit is always set. This will need to be addressed separately. Fixes: 550a7375fe72 ("USB: Add MUSB and TUSB support") Fixes: 467d5c980709 ("usb: musb: Implement session bit based runtime PM for musb-core") Fixes: 2bff3916fda9 ("usb: musb: Fix PM for hub disconnect") Cc: stable # 4.9 Cc: Felipe Balbi Cc: Tony Lindgren Signed-off-by: Johan Hovold Tested-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 11 +++++++++-- drivers/usb/musb/musb_core.h | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 07b8c71..1ced3af 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1861,7 +1861,7 @@ static void musb_pm_runtime_check_session(struct musb *musb) MUSB_DEVCTL_HR; switch (devctl & ~s) { case MUSB_QUIRK_B_INVALID_VBUS_91: - if (musb->quirk_retries) { + if (musb->quirk_retries && !musb->flush_irq_work) { musb_dbg(musb, "Poll devctl on invalid vbus, assume no session"); schedule_delayed_work(&musb->irq_work, @@ -1871,7 +1871,7 @@ static void musb_pm_runtime_check_session(struct musb *musb) } /* fall through */ case MUSB_QUIRK_A_DISCONNECT_19: - if (musb->quirk_retries) { + if (musb->quirk_retries && !musb->flush_irq_work) { musb_dbg(musb, "Poll devctl on possible host mode disconnect"); schedule_delayed_work(&musb->irq_work, @@ -2681,8 +2681,15 @@ static int musb_suspend(struct device *dev) musb_platform_disable(musb); musb_disable_interrupts(musb); + + musb->flush_irq_work = true; + while (flush_delayed_work(&musb->irq_work)) + ; + musb->flush_irq_work = false; + if (!(musb->io.quirks & MUSB_PRESERVE_SESSION)) musb_writeb(musb->mregs, MUSB_DEVCTL, 0); + WARN_ON(!list_empty(&musb->pending_list)); spin_lock_irqsave(&musb->lock, flags); diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index c748f4a..20f4614 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -428,6 +428,8 @@ struct musb { unsigned test_mode:1; unsigned softconnect:1; + unsigned flush_irq_work:1; + u8 address; u8 test_mode_nr; u16 ackpend; /* ep0 */ -- cgit v1.1 From bfa53e0e366b98185fadb03f7916d1538cb90ebd Mon Sep 17 00:00:00 2001 From: Alexandre Bailon Date: Mon, 9 Oct 2017 22:46:09 -0500 Subject: usb: musb: musb_cppi41: Fix the address of teardown and autoreq registers The DA8xx and DSPS platforms don't use the same address for few registers. On Da8xx, this is causing some issues (e.g. teardown that doesn't work). Configure the address of the register during the init and use them instead of constants. Cc: stable@vger.kernel.org # v4.12+ Reported-by: nsekhar@ti.com Signed-off-by: Alexandre Bailon Tested-by: Sekhar Nori Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_cppi41.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c index ba25528..d66416a 100644 --- a/drivers/usb/musb/musb_cppi41.c +++ b/drivers/usb/musb/musb_cppi41.c @@ -26,6 +26,9 @@ #define MUSB_DMA_NUM_CHANNELS 15 +#define DA8XX_USB_AUTOREQ 0x14 +#define DA8XX_USB_TEARDOWN 0x1c + struct cppi41_dma_controller { struct dma_controller controller; struct cppi41_dma_channel rx_channel[MUSB_DMA_NUM_CHANNELS]; @@ -35,6 +38,9 @@ struct cppi41_dma_controller { u32 rx_mode; u32 tx_mode; u32 auto_req; + + u32 tdown_reg; + u32 autoreq_reg; }; static void save_rx_toggle(struct cppi41_dma_channel *cppi41_channel) @@ -364,8 +370,8 @@ static void cppi41_set_autoreq_mode(struct cppi41_dma_channel *cppi41_channel, if (new_mode == old_mode) return; controller->auto_req = new_mode; - musb_writel(controller->controller.musb->ctrl_base, USB_CTRL_AUTOREQ, - new_mode); + musb_writel(controller->controller.musb->ctrl_base, + controller->autoreq_reg, new_mode); } static bool cppi41_configure_channel(struct dma_channel *channel, @@ -581,12 +587,13 @@ static int cppi41_dma_channel_abort(struct dma_channel *channel) do { if (is_tx) - musb_writel(musb->ctrl_base, USB_TDOWN, tdbit); + musb_writel(musb->ctrl_base, controller->tdown_reg, + tdbit); ret = dmaengine_terminate_all(cppi41_channel->dc); } while (ret == -EAGAIN); if (is_tx) { - musb_writel(musb->ctrl_base, USB_TDOWN, tdbit); + musb_writel(musb->ctrl_base, controller->tdown_reg, tdbit); csr = musb_readw(epio, MUSB_TXCSR); if (csr & MUSB_TXCSR_TXPKTRDY) { @@ -727,6 +734,14 @@ cppi41_dma_controller_create(struct musb *musb, void __iomem *base) controller->controller.is_compatible = cppi41_is_compatible; controller->controller.musb = musb; + if (musb->io.quirks & MUSB_DA8XX) { + controller->tdown_reg = DA8XX_USB_TEARDOWN; + controller->autoreq_reg = DA8XX_USB_AUTOREQ; + } else { + controller->tdown_reg = USB_TDOWN; + controller->autoreq_reg = USB_CTRL_AUTOREQ; + } + ret = cppi41_dma_controller_start(controller); if (ret) goto plat_get_fail; -- cgit v1.1 From e10c5b0c773efb8643ee89d387d310584ca30830 Mon Sep 17 00:00:00 2001 From: Alexandre Bailon Date: Mon, 9 Oct 2017 22:46:10 -0500 Subject: usb: musb: musb_cppi41: Fix cppi41_set_dma_mode() for DA8xx The way to configure the DMA mode on DA8xx is different from DSPS. Add a new function to configure DMA mode on DA8xx and use a callback to call the right function based on the platform. Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Alexandre Bailon Tested-by: Sekhar Nori Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_cppi41.c | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c index d66416a..b2b1306 100644 --- a/drivers/usb/musb/musb_cppi41.c +++ b/drivers/usb/musb/musb_cppi41.c @@ -26,6 +26,7 @@ #define MUSB_DMA_NUM_CHANNELS 15 +#define DA8XX_USB_MODE 0x10 #define DA8XX_USB_AUTOREQ 0x14 #define DA8XX_USB_TEARDOWN 0x1c @@ -41,6 +42,9 @@ struct cppi41_dma_controller { u32 tdown_reg; u32 autoreq_reg; + + void (*set_dma_mode)(struct cppi41_dma_channel *cppi41_channel, + unsigned int mode); }; static void save_rx_toggle(struct cppi41_dma_channel *cppi41_channel) @@ -355,6 +359,32 @@ static void cppi41_set_dma_mode(struct cppi41_dma_channel *cppi41_channel, } } +static void da8xx_set_dma_mode(struct cppi41_dma_channel *cppi41_channel, + unsigned int mode) +{ + struct cppi41_dma_controller *controller = cppi41_channel->controller; + struct musb *musb = controller->controller.musb; + unsigned int shift; + u32 port; + u32 new_mode; + u32 old_mode; + + old_mode = controller->tx_mode; + port = cppi41_channel->port_num; + + shift = (port - 1) * 4; + if (!cppi41_channel->is_tx) + shift += 16; + new_mode = old_mode & ~(3 << shift); + new_mode |= mode << shift; + + if (new_mode == old_mode) + return; + controller->tx_mode = new_mode; + musb_writel(musb->ctrl_base, DA8XX_USB_MODE, new_mode); +} + + static void cppi41_set_autoreq_mode(struct cppi41_dma_channel *cppi41_channel, unsigned mode) { @@ -379,6 +409,7 @@ static bool cppi41_configure_channel(struct dma_channel *channel, dma_addr_t dma_addr, u32 len) { struct cppi41_dma_channel *cppi41_channel = channel->private_data; + struct cppi41_dma_controller *controller = cppi41_channel->controller; struct dma_chan *dc = cppi41_channel->dc; struct dma_async_tx_descriptor *dma_desc; enum dma_transfer_direction direction; @@ -404,7 +435,7 @@ static bool cppi41_configure_channel(struct dma_channel *channel, musb_writel(musb->ctrl_base, RNDIS_REG(cppi41_channel->port_num), len); /* gen rndis */ - cppi41_set_dma_mode(cppi41_channel, + controller->set_dma_mode(cppi41_channel, EP_MODE_DMA_GEN_RNDIS); /* auto req */ @@ -413,14 +444,15 @@ static bool cppi41_configure_channel(struct dma_channel *channel, } else { musb_writel(musb->ctrl_base, RNDIS_REG(cppi41_channel->port_num), 0); - cppi41_set_dma_mode(cppi41_channel, + controller->set_dma_mode(cppi41_channel, EP_MODE_DMA_TRANSPARENT); cppi41_set_autoreq_mode(cppi41_channel, EP_MODE_AUTOREQ_NONE); } } else { /* fallback mode */ - cppi41_set_dma_mode(cppi41_channel, EP_MODE_DMA_TRANSPARENT); + controller->set_dma_mode(cppi41_channel, + EP_MODE_DMA_TRANSPARENT); cppi41_set_autoreq_mode(cppi41_channel, EP_MODE_AUTOREQ_NONE); len = min_t(u32, packet_sz, len); } @@ -737,9 +769,11 @@ cppi41_dma_controller_create(struct musb *musb, void __iomem *base) if (musb->io.quirks & MUSB_DA8XX) { controller->tdown_reg = DA8XX_USB_TEARDOWN; controller->autoreq_reg = DA8XX_USB_AUTOREQ; + controller->set_dma_mode = da8xx_set_dma_mode; } else { controller->tdown_reg = USB_TDOWN; controller->autoreq_reg = USB_CTRL_AUTOREQ; + controller->set_dma_mode = cppi41_set_dma_mode; } ret = cppi41_dma_controller_start(controller); -- cgit v1.1 From 297d7fe9e439473800ab1f2f853b4b5f8c888500 Mon Sep 17 00:00:00 2001 From: Alexandre Bailon Date: Mon, 9 Oct 2017 22:46:11 -0500 Subject: usb: musb: musb_cppi41: Configure the number of channels for DA8xx Currently, the number of channels is set to 15 but in the case of DA8xx, the number of channels is 4. Update the driver to configure the number of channels at runtime. Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Alexandre Bailon Tested-by: Sekhar Nori Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_cppi41.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c index b2b1306..1ec0a49 100644 --- a/drivers/usb/musb/musb_cppi41.c +++ b/drivers/usb/musb/musb_cppi41.c @@ -30,10 +30,12 @@ #define DA8XX_USB_AUTOREQ 0x14 #define DA8XX_USB_TEARDOWN 0x1c +#define DA8XX_DMA_NUM_CHANNELS 4 + struct cppi41_dma_controller { struct dma_controller controller; - struct cppi41_dma_channel rx_channel[MUSB_DMA_NUM_CHANNELS]; - struct cppi41_dma_channel tx_channel[MUSB_DMA_NUM_CHANNELS]; + struct cppi41_dma_channel *rx_channel; + struct cppi41_dma_channel *tx_channel; struct hrtimer early_tx; struct list_head early_tx_list; u32 rx_mode; @@ -45,6 +47,7 @@ struct cppi41_dma_controller { void (*set_dma_mode)(struct cppi41_dma_channel *cppi41_channel, unsigned int mode); + u8 num_channels; }; static void save_rx_toggle(struct cppi41_dma_channel *cppi41_channel) @@ -483,7 +486,7 @@ static struct dma_channel *cppi41_dma_channel_allocate(struct dma_controller *c, struct cppi41_dma_channel *cppi41_channel = NULL; u8 ch_num = hw_ep->epnum - 1; - if (ch_num >= MUSB_DMA_NUM_CHANNELS) + if (ch_num >= controller->num_channels) return NULL; if (is_tx) @@ -643,7 +646,7 @@ static void cppi41_release_all_dma_chans(struct cppi41_dma_controller *ctrl) struct dma_chan *dc; int i; - for (i = 0; i < MUSB_DMA_NUM_CHANNELS; i++) { + for (i = 0; i < ctrl->num_channels; i++) { dc = ctrl->tx_channel[i].dc; if (dc) dma_release_channel(dc); @@ -695,7 +698,7 @@ static int cppi41_dma_controller_start(struct cppi41_dma_controller *controller) goto err; ret = -EINVAL; - if (port > MUSB_DMA_NUM_CHANNELS || !port) + if (port > controller->num_channels || !port) goto err; if (is_tx) cppi41_channel = &controller->tx_channel[port - 1]; @@ -736,6 +739,8 @@ void cppi41_dma_controller_destroy(struct dma_controller *c) hrtimer_cancel(&controller->early_tx); cppi41_dma_controller_stop(controller); + kfree(controller->rx_channel); + kfree(controller->tx_channel); kfree(controller); } EXPORT_SYMBOL_GPL(cppi41_dma_controller_destroy); @@ -744,6 +749,7 @@ struct dma_controller * cppi41_dma_controller_create(struct musb *musb, void __iomem *base) { struct cppi41_dma_controller *controller; + int channel_size; int ret = 0; if (!musb->controller->parent->of_node) { @@ -770,18 +776,33 @@ cppi41_dma_controller_create(struct musb *musb, void __iomem *base) controller->tdown_reg = DA8XX_USB_TEARDOWN; controller->autoreq_reg = DA8XX_USB_AUTOREQ; controller->set_dma_mode = da8xx_set_dma_mode; + controller->num_channels = DA8XX_DMA_NUM_CHANNELS; } else { controller->tdown_reg = USB_TDOWN; controller->autoreq_reg = USB_CTRL_AUTOREQ; controller->set_dma_mode = cppi41_set_dma_mode; + controller->num_channels = MUSB_DMA_NUM_CHANNELS; } + channel_size = controller->num_channels * + sizeof(struct cppi41_dma_channel); + controller->rx_channel = kzalloc(channel_size, GFP_KERNEL); + if (!controller->rx_channel) + goto rx_channel_alloc_fail; + controller->tx_channel = kzalloc(channel_size, GFP_KERNEL); + if (!controller->tx_channel) + goto tx_channel_alloc_fail; + ret = cppi41_dma_controller_start(controller); if (ret) goto plat_get_fail; return &controller->controller; plat_get_fail: + kfree(controller->tx_channel); +tx_channel_alloc_fail: + kfree(controller->rx_channel); +rx_channel_alloc_fail: kfree(controller); kzalloc_fail: if (ret == -EPROBE_DEFER) -- cgit v1.1 From 445ef61543da3db5b699f87fb0aa4f227165f6ed Mon Sep 17 00:00:00 2001 From: Jonathan Liu Date: Mon, 9 Oct 2017 22:46:12 -0500 Subject: usb: musb: Check for host-mode using is_host_active() on reset interrupt The sunxi musb has a bug where sometimes it will generate a babble error on device disconnect instead of a disconnect IRQ. When this happens the musb controller switches from host mode to device mode (it clears MUSB_DEVCTL_HM/MUSB_DEVCTL_SESSION and sets MUSB_DEVCTL_BDEVICE) and gets stuck in this state. The babble error is misdetected as a bus reset because MUSB_DEVCTL_HM was cleared. To fix this, use is_host_active() rather than (devctl & MUSB_DEVCTL_HM) to detect babble error so that sunxi musb babble recovery can handle it by restoring the mode. This information is provided by the driver logic and does not rely on register contents. Cc: stable@vger.kernel.org # v4.1+ Signed-off-by: Jonathan Liu Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 1ced3af..ff5a1a8 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -906,7 +906,7 @@ b_host: */ if (int_usb & MUSB_INTR_RESET) { handled = IRQ_HANDLED; - if (devctl & MUSB_DEVCTL_HM) { + if (is_host_active(musb)) { /* * When BABBLE happens what we can depends on which * platform MUSB is running, because some platforms @@ -916,9 +916,7 @@ b_host: * drop the session. */ dev_err(musb->controller, "Babble\n"); - - if (is_host_active(musb)) - musb_recover_from_babble(musb); + musb_recover_from_babble(musb); } else { musb_dbg(musb, "BUS RESET as %s", usb_otg_state_string(musb->xceiv->otg->state)); -- cgit v1.1 From 6ed05c68cbcae42cd52b8e53b66952bfa9c002ce Mon Sep 17 00:00:00 2001 From: Jonathan Liu Date: Mon, 9 Oct 2017 22:46:13 -0500 Subject: usb: musb: sunxi: Explicitly release USB PHY on exit This fixes a kernel oops when unloading the driver due to usb_put_phy being called after usb_phy_generic_unregister when the device is detached. Calling usb_phy_generic_unregister causes x->dev->driver to be NULL in usb_put_phy and results in a NULL pointer dereference. Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Jonathan Liu Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/sunxi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c index c9a09b5..dc353e2 100644 --- a/drivers/usb/musb/sunxi.c +++ b/drivers/usb/musb/sunxi.c @@ -297,6 +297,8 @@ static int sunxi_musb_exit(struct musb *musb) if (test_bit(SUNXI_MUSB_FL_HAS_SRAM, &glue->flags)) sunxi_sram_release(musb->controller->parent); + devm_usb_put_phy(glue->dev, glue->xceiv); + return 0; } -- cgit v1.1 From 2811501e6d8f5747d08f8e25b9ecf472d0dc4c7d Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 3 Oct 2017 11:16:43 +0300 Subject: usb: quirks: add quirk for WORLDE MINI MIDI keyboard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This keyboard doesn't implement Get String descriptors properly even though string indexes are valid. What happens is that when requesting for the String descriptor, the device disconnects and reconnects. Without this quirk, this loop will continue forever. Cc: Alan Stern Reported-by: Владимир Мартьянов Cc: stable Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 82806e3..a6aaf2f 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -221,6 +221,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Corsair Strafe RGB */ { USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT }, + /* MIDI keyboard WORLDE MINI */ + { USB_DEVICE(0x1c75, 0x0204), .driver_info = + USB_QUIRK_CONFIG_INTF_STRINGS }, + /* Acer C120 LED Projector */ { USB_DEVICE(0x1de1, 0xc102), .driver_info = USB_QUIRK_NO_LPM }, -- cgit v1.1 From 3f50f614d61f91ad30b1947c429d1f235493a7f9 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Sat, 14 Oct 2017 00:10:12 +0900 Subject: perf record: Fix documentation for a inexistent option '-l' 'perf record' had a '-l' option that meant "scale counter values" a very long time ago, but it currently belongs to 'perf stat' as '-c'. So remove it. I found this problem in the below case. $ perf record -e cycles -l sleep 3 Error: unknown switch `l Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1507907412-19813-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e397453..63526f4 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -8,8 +8,8 @@ perf-record - Run a command and record its profile into perf.data SYNOPSIS -------- [verse] -'perf record' [-e | --event=EVENT] [-l] [-a] -'perf record' [-e | --event=EVENT] [-l] [-a] -- [] +'perf record' [-e | --event=EVENT] [-a] +'perf record' [-e | --event=EVENT] [-a] -- [] DESCRIPTION ----------- -- cgit v1.1 From 7f0cd23615040b9e53bb4980c986b721cba08bbc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 17 Oct 2017 22:29:00 +0900 Subject: perf buildid-list: Fix crash when processing PERF_RECORD_NAMESPACE Thomas reported that 'perf buildid-list' gets a SEGFAULT due to NULL pointer deref when he ran it on a data with namespace events. It was because the buildid_id__mark_dso_hit_ops lacks the namespace event handler and perf_too__fill_default() didn't set it. Program received signal SIGSEGV, Segmentation fault. 0x0000000000000000 in ?? () Missing separate debuginfos, use: dnf debuginfo-install audit-libs-2.7.7-1.fc25.s390x bzip2-libs-1.0.6-21.fc25.s390x elfutils-libelf-0.169-1.fc25.s390x +elfutils-libs-0.169-1.fc25.s390x libcap-ng-0.7.8-1.fc25.s390x numactl-libs-2.0.11-2.ibm.fc25.s390x openssl-libs-1.1.0e-1.1.ibm.fc25.s390x perl-libs-5.24.1-386.fc25.s390x +python-libs-2.7.13-2.fc25.s390x slang-2.3.0-7.fc25.s390x xz-libs-5.2.3-2.fc25.s390x zlib-1.2.8-10.fc25.s390x (gdb) where #0 0x0000000000000000 in ?? () #1 0x00000000010fad6a in machines__deliver_event (machines=, machines@entry=0x2c6fd18, evlist=, event=event@entry=0x3fffdf00470, sample=0x3ffffffe880, sample@entry=0x3ffffffe888, tool=tool@entry=0x1312968 , file_offset=1136) at util/session.c:1287 #2 0x00000000010fbf4e in perf_session__deliver_event (file_offset=1136, tool=0x1312968 , sample=0x3ffffffe888, event=0x3fffdf00470, session=0x2c6fc30) at util/session.c:1340 #3 perf_session__process_event (session=0x2c6fc30, session@entry=0x0, event=event@entry=0x3fffdf00470, file_offset=file_offset@entry=1136) at util/session.c:1522 #4 0x00000000010fddde in __perf_session__process_events (file_size=11880, data_size=, data_offset=, session=0x0) at util/session.c:1899 #5 perf_session__process_events (session=0x0, session@entry=0x2c6fc30) at util/session.c:1953 #6 0x000000000103b2ac in perf_session__list_build_ids (with_hits=, force=) at builtin-buildid-list.c:83 #7 cmd_buildid_list (argc=, argv=) at builtin-buildid-list.c:115 #8 0x00000000010a026c in run_builtin (p=0x1311f78 , argc=argc@entry=2, argv=argv@entry=0x3fffffff3c0) at perf.c:296 #9 0x000000000102bc00 in handle_internal_command (argv=, argc=2) at perf.c:348 #10 run_argv (argcp=, argv=) at perf.c:392 #11 main (argc=, argv=0x3fffffff3c0) at perf.c:536 (gdb) Fix it by adding a stub event handler for namespace event. Committer testing: Further clarifying, plain using 'perf buildid-list' will not end up in a SEGFAULT when processing a perf.data file with namespace info: # perf record -a --namespaces sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.024 MB perf.data (1058 samples) ] # perf buildid-list | wc -l 38 # perf buildid-list | head -5 e2a171c7b905826fc8494f0711ba76ab6abbd604 /lib/modules/4.14.0-rc3+/build/vmlinux 874840a02d8f8a31cedd605d0b8653145472ced3 /lib/modules/4.14.0-rc3+/kernel/arch/x86/kvm/kvm-intel.ko ea7223776730cd8a22f320040aae4d54312984bc /lib/modules/4.14.0-rc3+/kernel/drivers/gpu/drm/i915/i915.ko 5961535e6732a8edb7f22b3f148bb2fa2e0be4b9 /lib/modules/4.14.0-rc3+/kernel/drivers/gpu/drm/drm.ko f045f54aa78cf1931cc893f78b6cbc52c72a8cb1 /usr/lib64/libc-2.25.so # It is only when one asks for checking what of those entries actually had samples, i.e. when we use either -H or --with-hits, that we will process all the PERF_RECORD_ events, and since tools/perf/builtin-buildid-list.c neither explicitely set a perf_tool.namespaces() callback nor the default stub was set that we end up, when processing a PERF_RECORD_NAMESPACE record, causing a SEGFAULT: # perf buildid-list -H Segmentation fault (core dumped) ^C # Reported-and-Tested-by: Thomas-Mich Richter Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Hari Bathini Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Thomas-Mich Richter Fixes: f3b3614a284d ("perf tools: Add PERF_RECORD_NAMESPACES to include namespaces related info") Link: http://lkml.kernel.org/r/20171017132900.11043-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a7ebd9f..76ab070 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -374,6 +374,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->mmap2 = process_event_stub; if (tool->comm == NULL) tool->comm = process_event_stub; + if (tool->namespaces == NULL) + tool->namespaces = process_event_stub; if (tool->fork == NULL) tool->fork = process_event_stub; if (tool->exit == NULL) -- cgit v1.1 From 671632a019853744bc6c0b2ed8bcfdbcea371848 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 17 Oct 2017 08:22:25 -0500 Subject: MAINTAINERS: fix git tree url for musb module Commit 3b2435192fe91 ("MAINTAINERS: drop OMAP USB and MUSB maintainership") switched the maintainer for musb module, but didn't update the git tree location. Delete the git tree information, since the current maintainer doesn't have a public tree. Reported-by: Jonathan Liu Signed-off-by: Bin Liu Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a74227a..1f33ae3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9213,7 +9213,6 @@ F: include/linux/isicom.h MUSB MULTIPOINT HIGH SPEED DUAL-ROLE CONTROLLER M: Bin Liu L: linux-usb@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git S: Maintained F: drivers/usb/musb/ -- cgit v1.1 From 587c3c9f286cee5c9cac38d28c8ae1875f4ec85b Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 15 Oct 2017 18:16:33 +0100 Subject: scsi: sg: Re-fix off by one in sg_fill_request_table() Commit 109bade9c625 ("scsi: sg: use standard lists for sg_requests") introduced an off-by-one error in sg_ioctl(), which was fixed by commit bd46fc406b30 ("scsi: sg: off by one in sg_ioctl()"). Unfortunately commit 4759df905a47 ("scsi: sg: factor out sg_fill_request_table()") moved that code, and reintroduced the bug (perhaps due to a botched rebase). Fix it again. Fixes: 4759df905a47 ("scsi: sg: factor out sg_fill_request_table()") Cc: stable@vger.kernel.org Signed-off-by: Ben Hutchings Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/sg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 0419c22..aa28874 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -837,7 +837,7 @@ sg_fill_request_table(Sg_fd *sfp, sg_req_info_t *rinfo) val = 0; list_for_each_entry(srp, &sfp->rq_list, entry) { - if (val > SG_MAX_QUEUE) + if (val >= SG_MAX_QUEUE) break; rinfo[val].req_state = srp->done + 1; rinfo[val].problem = -- cgit v1.1 From 2bdd713b92a9cade239d3c7d15205a09f556624d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 17 Oct 2017 20:32:07 +0200 Subject: mac80211: use constant time comparison with keys Otherwise we risk leaking information via timing side channel. Fixes: fdf7cb4185b6 ("mac80211: accept key reinstall without changing anything") Signed-off-by: Jason A. Donenfeld Signed-off-by: Johannes Berg --- net/mac80211/key.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index ae995c8..035d16f 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "ieee80211_i.h" #include "driver-ops.h" @@ -635,7 +636,7 @@ int ieee80211_key_link(struct ieee80211_key *key, * new version of the key to avoid nonce reuse or replay issues. */ if (old_key && key->conf.keylen == old_key->conf.keylen && - !memcmp(key->conf.key, old_key->conf.key, key->conf.keylen)) { + !crypto_memneq(key->conf.key, old_key->conf.key, key->conf.keylen)) { ieee80211_key_free_unused(key); ret = 0; goto out; -- cgit v1.1 From 51e13359cd5ea34acc62c90627603352956380af Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 17 Oct 2017 21:56:20 +0200 Subject: cfg80211: fix connect/disconnect edge cases If we try to connect while already connected/connecting, but this fails, we set ssid_len=0 but leave current_bss hanging, leading to errors. Check all of this better, first of all ensuring that we can't try to connect to a different SSID while connected/ing; ensure that prev_bssid is set for re-association attempts even in the case of the driver supporting the connect() method, and don't reset ssid_len in the failure cases. While at it, also reset ssid_len while disconnecting unless we were connected and expect a disconnected event, and warn on a successful connection without ssid_len being set. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/wireless/sme.c | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 0a49b88..b6533ec 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -522,11 +522,6 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, return -EOPNOTSUPP; if (wdev->current_bss) { - if (!prev_bssid) - return -EALREADY; - if (prev_bssid && - !ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid)) - return -ENOTCONN; cfg80211_unhold_bss(wdev->current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); wdev->current_bss = NULL; @@ -1063,11 +1058,35 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); - if (WARN_ON(wdev->connect_keys)) { - kzfree(wdev->connect_keys); - wdev->connect_keys = NULL; + /* + * If we have an ssid_len, we're trying to connect or are + * already connected, so reject a new SSID unless it's the + * same (which is the case for re-association.) + */ + if (wdev->ssid_len && + (wdev->ssid_len != connect->ssid_len || + memcmp(wdev->ssid, connect->ssid, wdev->ssid_len))) + return -EALREADY; + + /* + * If connected, reject (re-)association unless prev_bssid + * matches the current BSSID. + */ + if (wdev->current_bss) { + if (!prev_bssid) + return -EALREADY; + if (!ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid)) + return -ENOTCONN; } + /* + * Reject if we're in the process of connecting with WEP, + * this case isn't very interesting and trying to handle + * it would make the code much more complex. + */ + if (wdev->connect_keys) + return -EINPROGRESS; + cfg80211_oper_and_ht_capa(&connect->ht_capa_mask, rdev->wiphy.ht_capa_mod_mask); @@ -1118,7 +1137,12 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, if (err) { wdev->connect_keys = NULL; - wdev->ssid_len = 0; + /* + * This could be reassoc getting refused, don't clear + * ssid_len in that case. + */ + if (!wdev->current_bss) + wdev->ssid_len = 0; return err; } @@ -1145,6 +1169,14 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, else if (wdev->ssid_len) err = rdev_disconnect(rdev, dev, reason); + /* + * Clear ssid_len unless we actually were fully connected, + * in which case cfg80211_disconnected() will take care of + * this later. + */ + if (!wdev->current_bss) + wdev->ssid_len = 0; + return err; } -- cgit v1.1 From e5f5ce37a7918ed7406c52987c7cc8b670ed5e14 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Oct 2017 09:36:51 +0200 Subject: mac80211: validate user rate mask before configuring driver Ben reported that when the user rate mask is rejected for not matching any basic rate, the driver had already been configured. This is clearly an oversight in my original change, fix this by doing the validation before calling the driver. Reported-by: Ben Greear Fixes: e8e4f5280ddd ("mac80211: reject/clear user rate mask if not usable") Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a354f19..fb15d3b9 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2727,12 +2727,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; - if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { - ret = drv_set_bitrate_mask(local, sdata, mask); - if (ret) - return ret; - } - /* * If active validate the setting and reject it if it doesn't leave * at least one basic rate usable, since we really have to be able @@ -2748,6 +2742,12 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, return -EINVAL; } + if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { + ret = drv_set_bitrate_mask(local, sdata, mask); + if (ret) + return ret; + } + for (i = 0; i < NUM_NL80211_BANDS; i++) { struct ieee80211_supported_band *sband = wiphy->bands[i]; int j; -- cgit v1.1 From 0bfe649fbb1337400065fa47679b381b2ac845f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 16 Oct 2017 17:05:57 +0200 Subject: fq_impl: Properly enforce memory limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fq structure would fail to properly enforce the memory limit in the case where the packet being enqueued was bigger than the packet being removed to bring the memory usage down. So keep dropping packets until the memory usage is back below the limit. Also, fix the statistics for memory limit violations. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- include/net/fq_impl.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index 4e6131c..ac1a231 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -146,6 +146,7 @@ static void fq_tin_enqueue(struct fq *fq, fq_flow_get_default_t get_default_func) { struct fq_flow *flow; + bool oom; lockdep_assert_held(&fq->lock); @@ -167,8 +168,8 @@ static void fq_tin_enqueue(struct fq *fq, } __skb_queue_tail(&flow->queue, skb); - - if (fq->backlog > fq->limit || fq->memory_usage > fq->memory_limit) { + oom = (fq->memory_usage > fq->memory_limit); + while (fq->backlog > fq->limit || oom) { flow = list_first_entry_or_null(&fq->backlogs, struct fq_flow, backlogchain); @@ -183,8 +184,10 @@ static void fq_tin_enqueue(struct fq *fq, flow->tin->overlimit++; fq->overlimit++; - if (fq->memory_usage > fq->memory_limit) + if (oom) { fq->overmemory++; + oom = (fq->memory_usage > fq->memory_limit); + } } } -- cgit v1.1 From d824c7a8e88a7162d14782e73a6a6c867a266500 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Fri, 13 Oct 2017 19:37:31 +0300 Subject: regulator: rn5t618: Do not index regulator_desc arrays by id The regulator_desc arrays in this driver are indexed by RN5T618_* constants and some elements can be missing. This causes probe failures on older models: rn5t618-regulator rn5t618-regulator: failed to register (null) regulator rn5t618-regulator: probe of rn5t618-regulator failed with error -22 Fix this by making the arrays flat. This also saves a little memory because the regulator_desc arrays become smaller. Signed-off-by: Leonard Crestez Fixes: 83b2a3c2ab24 ("regulator: rn5t618: add RC5T619 PMIC support") Signed-off-by: Mark Brown --- drivers/regulator/rn5t618-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/rn5t618-regulator.c b/drivers/regulator/rn5t618-regulator.c index ef2be56..790a4a7 100644 --- a/drivers/regulator/rn5t618-regulator.c +++ b/drivers/regulator/rn5t618-regulator.c @@ -29,7 +29,7 @@ static const struct regulator_ops rn5t618_reg_ops = { }; #define REG(rid, ereg, emask, vreg, vmask, min, max, step) \ - [RN5T618_##rid] = { \ + { \ .name = #rid, \ .of_match = of_match_ptr(#rid), \ .regulators_node = of_match_ptr("regulators"), \ -- cgit v1.1 From 3d8bba9535ac6e79453c769dd0c8ea852a51ad60 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 18 Oct 2017 23:11:18 +0800 Subject: perf xyarray: Fix wrong processing when closing evsel fd In current xyarray code, xyarray__max_x() returns max_y, and xyarray__max_y() returns max_x. It's confusing and for code logic it looks not correct. Error happens when closing evsel fd. Let's see this scenario: 1. Allocate an fd (pseudo-code) perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); } xyarray__new(int xlen, int ylen, size_t entry_size) { size_t row_size = ylen * entry_size; struct xyarray *xy = zalloc(sizeof(*xy) + xlen * row_size); xy->entry_size = entry_size; xy->row_size = row_size; xy->entries = xlen * ylen; xy->max_x = xlen; xy->max_y = ylen; ...... } So max_x is ncpus, max_y is nthreads and row_size = nthreads * 4. 2. Use perf syscall and get the fd int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads) { for (cpu = 0; cpu < cpus->nr; cpu++) { for (thread = 0; thread < nthreads; thread++) { int fd, group_fd; fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], group_fd, flags); FD(evsel, cpu, thread) = fd; } } static inline void *xyarray__entry(struct xyarray *xy, int x, int y) { return &xy->contents[x * xy->row_size + y * xy->entry_size]; } These codes don't have issues. The issue happens in the closing of fd. 3. Close fd. void perf_evsel__close_fd(struct perf_evsel *evsel) { int cpu, thread; for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { close(FD(evsel, cpu, thread)); FD(evsel, cpu, thread) = -1; } } Since xyarray__max_x() returns max_y (nthreads) and xyarry__max_y() returns max_x (ncpus), so above code is actually to be: for (cpu = 0; cpu < nthreads; cpu++) for (thread = 0; thread < ncpus; ++thread) { close(FD(evsel, cpu, thread)); FD(evsel, cpu, thread) = -1; } It's not correct! This change is introduced by "475fb533fb7d" ("perf evsel: Fix buffer overflow while freeing events") This fix is to let xyarray__max_x() return max_x (ncpus) and let xyarry__max_y() return max_y (nthreads) Committer note: This was also fixed by Ravi Bangoria, who provided the same patch, noticing the problem with 'perf record': I see 'perf record -p ' crashes with following log: *** Error in `./perf': free(): invalid next size (normal): 0x000000000298b340 *** ======= Backtrace: ========= /lib/x86_64-linux-gnu/libc.so.6(+0x777e5)[0x7f7fd85c87e5] /lib/x86_64-linux-gnu/libc.so.6(+0x8037a)[0x7f7fd85d137a] /lib/x86_64-linux-gnu/libc.so.6(cfree+0x4c)[0x7f7fd85d553c] ./perf(perf_evsel__close+0xb4)[0x4b7614] ./perf(perf_evlist__delete+0x100)[0x4ab180] ./perf(cmd_record+0x1d9)[0x43a5a9] ./perf[0x49aa2f] ./perf(main+0x631)[0x427841] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f7fd8571830] ./perf(_start+0x29)[0x427a59] Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Cc: Ravi Bangoria Fixes: d74be4767367 ("perf xyarray: Save max_x, max_y") Link: http://lkml.kernel.org/r/1508339478-26674-1-git-send-email-yao.jin@linux.intel.com Link: http://lkml.kernel.org/r/1508327446-15302-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/xyarray.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/xyarray.h b/tools/perf/util/xyarray.h index 4ba726c..54af6046 100644 --- a/tools/perf/util/xyarray.h +++ b/tools/perf/util/xyarray.h @@ -23,12 +23,12 @@ static inline void *xyarray__entry(struct xyarray *xy, int x, int y) static inline int xyarray__max_y(struct xyarray *xy) { - return xy->max_x; + return xy->max_y; } static inline int xyarray__max_x(struct xyarray *xy) { - return xy->max_y; + return xy->max_x; } #endif /* _PERF_XYARRAY_H_ */ -- cgit v1.1 From 74f8e22c153f4464060a0c2e4cfd1d6e51af2109 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 18 Oct 2017 16:34:09 +0800 Subject: perf test shell trace+probe_libc_inet_pton.sh: Be compatible with Debian/Ubuntu In debian/ubuntu, libc.so is located at a different place, /lib/x86_64-linux-gnu/libc-2.23.so, so it outputs like this when testing: PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.040 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.040/0.040/0.040/0.000 ms 0.000 probe_libc:inet_pton:(7f0e2db741c0)) __GI___inet_pton (/lib/x86_64-linux-gnu/libc-2.23.so) getaddrinfo (/lib/x86_64-linux-gnu/libc-2.23.so) [0xffffa9d40f34ff4d] (/bin/ping) Fix up the libc path to make sure this test works in more OSes. Committer testing: When this test fails one can use 'perf test -v', i.e. in verbose mode, where it'll show the expected backtrace, so, after applying this test: On Fedora 26: # perf test -v ping 62: probe libc's inet_pton & backtrace it with ping : --- start --- test child forked, pid 23322 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.058 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.058/0.058/0.058/0.000 ms 0.000 probe_libc:inet_pton:(7fe344310d80)) __GI___inet_pton (/usr/lib64/libc-2.25.so) getaddrinfo (/usr/lib64/libc-2.25.so) _init (/usr/bin/ping) test child finished with 0 ---- end ---- probe libc's inet_pton & backtrace it with ping: Ok # Signed-off-by: Li Zhijian Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Kim Phillips Cc: Li Zhijian Cc: Peter Zijlstra Cc: Philip Li Link: http://lkml.kernel.org/r/1508315649-18836-1-git-send-email-lizhijian@cn.fujitsu.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_libc_inet_pton.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh index 462fc75..7a84d73 100755 --- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh @@ -10,6 +10,9 @@ . $(dirname $0)/lib/probe.sh +ld=$(realpath /lib64/ld*.so.* | uniq) +libc=$(echo $ld | sed 's/ld/libc/g') + trace_libc_inet_pton_backtrace() { idx=0 expected[0]="PING.*bytes" @@ -18,8 +21,8 @@ trace_libc_inet_pton_backtrace() { expected[3]=".*packets transmitted.*" expected[4]="rtt min.*" expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)" - expected[6]=".*inet_pton[[:space:]]\(/usr/lib.*/libc-[0-9]+\.[0-9]+\.so\)$" - expected[7]="getaddrinfo[[:space:]]\(/usr/lib.*/libc-[0-9]+\.[0-9]+\.so\)$" + expected[6]=".*inet_pton[[:space:]]\($libc\)$" + expected[7]="getaddrinfo[[:space:]]\($libc\)$" expected[8]=".*\(.*/bin/ping.*\)$" perf trace --no-syscalls -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do @@ -35,7 +38,7 @@ trace_libc_inet_pton_backtrace() { } skip_if_no_perf_probe && \ -perf probe -q /lib64/libc-*.so inet_pton && \ +perf probe -q $libc inet_pton && \ trace_libc_inet_pton_backtrace err=$? rm -f ${file} -- cgit v1.1 From c97d96b4e612c7dc1b37d7afc61b598a9a25994d Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Sun, 24 Sep 2017 15:20:49 +0100 Subject: staging: bcm2835-audio: Fix memory corruption The previous commit (0adbfd46) fixed a memory leak but also freed a block in the success case, causing a stale pointer to be used with potentially fatal results. Only free the vchi_instance block in the case that vchi_connect fails; once connected, the instance is retained for subsequent connections. Simplifying the code by removing a bunch of gotos and returning errors directly. Signed-off-by: Phil Elwell Fixes: 0adbfd4694c2 ("staging: bcm2835-audio: fix memory leak in bcm2835_audio_open_connection()") Cc: stable # 4.12+ Tested-by: Stefan Wahren Signed-off-by: Greg Kroah-Hartman --- .../vc04_services/bcm2835-audio/bcm2835-vchiq.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c index 5f3d8f2..4be864d 100644 --- a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c +++ b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c @@ -390,8 +390,7 @@ static int bcm2835_audio_open_connection(struct bcm2835_alsa_stream *alsa_stream __func__, instance); instance->alsa_stream = alsa_stream; alsa_stream->instance = instance; - ret = 0; // xxx todo -1; - goto err_free_mem; + return 0; } /* Initialize and create a VCHI connection */ @@ -401,16 +400,15 @@ static int bcm2835_audio_open_connection(struct bcm2835_alsa_stream *alsa_stream LOG_ERR("%s: failed to initialise VCHI instance (ret=%d)\n", __func__, ret); - ret = -EIO; - goto err_free_mem; + return -EIO; } ret = vchi_connect(NULL, 0, vchi_instance); if (ret) { LOG_ERR("%s: failed to connect VCHI instance (ret=%d)\n", __func__, ret); - ret = -EIO; - goto err_free_mem; + kfree(vchi_instance); + return -EIO; } initted = 1; } @@ -421,19 +419,16 @@ static int bcm2835_audio_open_connection(struct bcm2835_alsa_stream *alsa_stream if (IS_ERR(instance)) { LOG_ERR("%s: failed to initialize audio service\n", __func__); - ret = PTR_ERR(instance); - goto err_free_mem; + /* vchi_instance is retained for use the next time. */ + return PTR_ERR(instance); } instance->alsa_stream = alsa_stream; alsa_stream->instance = instance; LOG_DBG(" success !\n"); - ret = 0; -err_free_mem: - kfree(vchi_instance); - return ret; + return 0; } int bcm2835_audio_open(struct bcm2835_alsa_stream *alsa_stream) -- cgit v1.1 From 723f2828a98c8ca19842042f418fb30dd8cfc0f7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 18 Oct 2017 13:12:25 +0200 Subject: x86/microcode/intel: Disable late loading on model 79 Blacklist Broadwell X model 79 for late loading due to an erratum. Signed-off-by: Borislav Petkov Acked-by: Tony Luck Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171018111225.25635-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/microcode/intel.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 8f7a9bb..7dbcb7a 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -918,6 +919,18 @@ static int get_ucode_fw(void *to, const void *from, size_t n) return 0; } +static bool is_blacklisted(unsigned int cpu) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) { + pr_err_once("late loading on model 79 is disabled.\n"); + return true; + } + + return false; +} + static enum ucode_state request_microcode_fw(int cpu, struct device *device, bool refresh_fw) { @@ -926,6 +939,9 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device, const struct firmware *firmware; enum ucode_state ret; + if (is_blacklisted(cpu)) + return UCODE_NFOUND; + sprintf(name, "intel-ucode/%02x-%02x-%02x", c->x86, c->x86_model, c->x86_mask); @@ -950,6 +966,9 @@ static int get_ucode_user(void *to, const void *from, size_t n) static enum ucode_state request_microcode_user(int cpu, const void __user *buf, size_t size) { + if (is_blacklisted(cpu)) + return UCODE_NFOUND; + return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); } -- cgit v1.1 From e8b9b0cc8269c85d8167aaee024bfcbb4976c031 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 14 Oct 2017 09:59:49 -0700 Subject: x86/mm/64: Remove the last VM_BUG_ON() from the TLB code Let's avoid hard-to-diagnose crashes in the future. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/f423bbc97864089fbdeb813f1ea126c6eaed844a.1508000261.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/tlb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 658bf00..7db23f9 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -147,8 +147,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, this_cpu_write(cpu_tlbstate.is_lazy, false); if (real_prev == next) { - VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != - next->context.ctx_id); + VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != + next->context.ctx_id); /* * We don't currently support having a real mm loaded without -- cgit v1.1 From 4e57b94664fef55aa71cac33b4632fdfdd52b695 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 14 Oct 2017 09:59:50 -0700 Subject: x86/mm: Tidy up "x86/mm: Flush more aggressively in lazy TLB mode" Due to timezones, commit: b956575bed91 ("x86/mm: Flush more aggressively in lazy TLB mode") was an outdated patch that well tested and fixed the bug but didn't address Borislav's review comments. Tidy it up: - The name "tlb_use_lazy_mode()" was highly confusing. Change it to "tlb_defer_switch_to_init_mm()", which describes what it actually means. - Move the static_branch crap into a helper. - Improve comments. Actually removing the debugfs option is in the next patch. Reported-by: Borislav Petkov Signed-off-by: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: b956575bed91 ("x86/mm: Flush more aggressively in lazy TLB mode") Link: http://lkml.kernel.org/r/154ef95428d4592596b6e98b0af1d2747d6cfbf8.1508000261.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 7 ++++++- arch/x86/mm/tlb.c | 30 ++++++++++++++++++------------ 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index d362161..0d4a1bb 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -87,7 +87,12 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) * to init_mm when we switch to a kernel thread (e.g. the idle thread). If * it's false, then we immediately switch CR3 when entering a kernel thread. */ -DECLARE_STATIC_KEY_TRUE(tlb_use_lazy_mode); +DECLARE_STATIC_KEY_TRUE(__tlb_defer_switch_to_init_mm); + +static inline bool tlb_defer_switch_to_init_mm(void) +{ + return static_branch_unlikely(&__tlb_defer_switch_to_init_mm); +} /* * 6 because 6 should be plenty and struct tlb_state will fit in diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 7db23f9..5ee3b59 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -30,7 +30,7 @@ atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); -DEFINE_STATIC_KEY_TRUE(tlb_use_lazy_mode); +DEFINE_STATIC_KEY_TRUE(__tlb_defer_switch_to_init_mm); static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, u16 *new_asid, bool *need_flush) @@ -213,6 +213,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, } /* + * Please ignore the name of this function. It should be called + * switch_to_kernel_thread(). + * * enter_lazy_tlb() is a hint from the scheduler that we are entering a * kernel thread or other context without an mm. Acceptable implementations * include doing nothing whatsoever, switching to init_mm, or various clever @@ -227,7 +230,7 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm) return; - if (static_branch_unlikely(&tlb_use_lazy_mode)) { + if (tlb_defer_switch_to_init_mm()) { /* * There's a significant optimization that may be possible * here. We have accurate enough TLB flush tracking that we @@ -632,7 +635,8 @@ static ssize_t tlblazy_read_file(struct file *file, char __user *user_buf, { char buf[2]; - buf[0] = static_branch_likely(&tlb_use_lazy_mode) ? '1' : '0'; + buf[0] = static_branch_likely(&__tlb_defer_switch_to_init_mm) + ? '1' : '0'; buf[1] = '\n'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -647,9 +651,9 @@ static ssize_t tlblazy_write_file(struct file *file, return -EINVAL; if (val) - static_branch_enable(&tlb_use_lazy_mode); + static_branch_enable(&__tlb_defer_switch_to_init_mm); else - static_branch_disable(&tlb_use_lazy_mode); + static_branch_disable(&__tlb_defer_switch_to_init_mm); return count; } @@ -660,23 +664,25 @@ static const struct file_operations fops_tlblazy = { .llseek = default_llseek, }; -static int __init init_tlb_use_lazy_mode(void) +static int __init init_tlblazy(void) { if (boot_cpu_has(X86_FEATURE_PCID)) { /* - * Heuristic: with PCID on, switching to and from - * init_mm is reasonably fast, but remote flush IPIs - * as expensive as ever, so turn off lazy TLB mode. + * If we have PCID, then switching to init_mm is reasonably + * fast. If we don't have PCID, then switching to init_mm is + * quite slow, so we default to trying to defer it in the + * hopes that we can avoid it entirely. The latter approach + * runs the risk of receiving otherwise unnecessary IPIs. * * We can't do this in setup_pcid() because static keys * haven't been initialized yet, and it would blow up * badly. */ - static_branch_disable(&tlb_use_lazy_mode); + static_branch_disable(&__tlb_defer_switch_to_init_mm); } - debugfs_create_file("tlb_use_lazy_mode", S_IRUSR | S_IWUSR, + debugfs_create_file("tlb_defer_switch_to_init_mm", S_IRUSR | S_IWUSR, arch_debugfs_dir, NULL, &fops_tlblazy); return 0; } -late_initcall(init_tlb_use_lazy_mode); +late_initcall(init_tlblazy); -- cgit v1.1 From 7ac7f2c315ef76437f5119df354d334448534fb5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 14 Oct 2017 09:59:51 -0700 Subject: x86/mm: Remove debug/x86/tlb_defer_switch_to_init_mm Borislav thinks that we don't need this knob in a released kernel. Get rid of it. Requested-by: Borislav Petkov Signed-off-by: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: b956575bed91 ("x86/mm: Flush more aggressively in lazy TLB mode") Link: http://lkml.kernel.org/r/1fa72431924e81e86c164ff7881bf9240d1f1a6c.1508000261.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 20 ++++++++------ arch/x86/mm/tlb.c | 58 ----------------------------------------- 2 files changed, 12 insertions(+), 66 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 0d4a1bb..c4aed0d 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -82,16 +82,20 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) #define __flush_tlb_single(addr) __native_flush_tlb_single(addr) #endif -/* - * If tlb_use_lazy_mode is true, then we try to avoid switching CR3 to point - * to init_mm when we switch to a kernel thread (e.g. the idle thread). If - * it's false, then we immediately switch CR3 when entering a kernel thread. - */ -DECLARE_STATIC_KEY_TRUE(__tlb_defer_switch_to_init_mm); - static inline bool tlb_defer_switch_to_init_mm(void) { - return static_branch_unlikely(&__tlb_defer_switch_to_init_mm); + /* + * If we have PCID, then switching to init_mm is reasonably + * fast. If we don't have PCID, then switching to init_mm is + * quite slow, so we try to defer it in the hopes that we can + * avoid it entirely. The latter approach runs the risk of + * receiving otherwise unnecessary IPIs. + * + * This choice is just a heuristic. The tlb code can handle this + * function returning true or false regardless of whether we have + * PCID. + */ + return !static_cpu_has(X86_FEATURE_PCID); } /* diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 5ee3b59..0f3d0ce 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -30,7 +30,6 @@ atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); -DEFINE_STATIC_KEY_TRUE(__tlb_defer_switch_to_init_mm); static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, u16 *new_asid, bool *need_flush) @@ -629,60 +628,3 @@ static int __init create_tlb_single_page_flush_ceiling(void) return 0; } late_initcall(create_tlb_single_page_flush_ceiling); - -static ssize_t tlblazy_read_file(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - char buf[2]; - - buf[0] = static_branch_likely(&__tlb_defer_switch_to_init_mm) - ? '1' : '0'; - buf[1] = '\n'; - - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t tlblazy_write_file(struct file *file, - const char __user *user_buf, size_t count, loff_t *ppos) -{ - bool val; - - if (kstrtobool_from_user(user_buf, count, &val)) - return -EINVAL; - - if (val) - static_branch_enable(&__tlb_defer_switch_to_init_mm); - else - static_branch_disable(&__tlb_defer_switch_to_init_mm); - - return count; -} - -static const struct file_operations fops_tlblazy = { - .read = tlblazy_read_file, - .write = tlblazy_write_file, - .llseek = default_llseek, -}; - -static int __init init_tlblazy(void) -{ - if (boot_cpu_has(X86_FEATURE_PCID)) { - /* - * If we have PCID, then switching to init_mm is reasonably - * fast. If we don't have PCID, then switching to init_mm is - * quite slow, so we default to trying to defer it in the - * hopes that we can avoid it entirely. The latter approach - * runs the risk of receiving otherwise unnecessary IPIs. - * - * We can't do this in setup_pcid() because static keys - * haven't been initialized yet, and it would blow up - * badly. - */ - static_branch_disable(&__tlb_defer_switch_to_init_mm); - } - - debugfs_create_file("tlb_defer_switch_to_init_mm", S_IRUSR | S_IWUSR, - arch_debugfs_dir, NULL, &fops_tlblazy); - return 0; -} -late_initcall(init_tlblazy); -- cgit v1.1 From 06e2290844fa408d3295ac03a1647f0798518ebe Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 25 Sep 2017 20:11:58 -0500 Subject: Fix encryption labels and lengths for SMB3.1.1 SMB3.1.1 is most secure and recent dialect. Fixup labels and lengths for sMB3.1.1 signing and encryption. Signed-off-by: Steve French CC: Stable --- fs/cifs/cifsglob.h | 8 ++++++-- fs/cifs/smb2transport.c | 26 ++++++++++++++------------ 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index de5b2e1..e185b28 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -661,7 +661,9 @@ struct TCP_Server_Info { #endif unsigned int max_read; unsigned int max_write; - __u8 preauth_hash[512]; +#ifdef CONFIG_CIFS_SMB311 + __u8 preauth_sha_hash[64]; /* save initital negprot hash */ +#endif /* 3.1.1 */ struct delayed_work reconnect; /* reconnect workqueue job */ struct mutex reconnect_mutex; /* prevent simultaneous reconnects */ unsigned long echo_interval; @@ -849,7 +851,9 @@ struct cifs_ses { __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE]; - __u8 preauth_hash[512]; +#ifdef CONFIG_CIFS_SMB311 + __u8 preauth_sha_hash[64]; +#endif /* 3.1.1 */ }; static inline bool diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 67367cf..9949394 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -390,6 +390,7 @@ generate_smb30signingkey(struct cifs_ses *ses) return generate_smb3signingkey(ses, &triplet); } +#ifdef CONFIG_CIFS_SMB311 int generate_smb311signingkey(struct cifs_ses *ses) @@ -398,25 +399,26 @@ generate_smb311signingkey(struct cifs_ses *ses) struct derivation *d; d = &triplet.signing; - d->label.iov_base = "SMB2AESCMAC"; - d->label.iov_len = 12; - d->context.iov_base = "SmbSign"; - d->context.iov_len = 8; + d->label.iov_base = "SMBSigningKey"; + d->label.iov_len = 14; + d->context.iov_base = ses->preauth_sha_hash; + d->context.iov_len = 64; d = &triplet.encryption; - d->label.iov_base = "SMB2AESCCM"; - d->label.iov_len = 11; - d->context.iov_base = "ServerIn "; - d->context.iov_len = 10; + d->label.iov_base = "SMBC2SCipherKey"; + d->label.iov_len = 16; + d->context.iov_base = ses->preauth_sha_hash; + d->context.iov_len = 64; d = &triplet.decryption; - d->label.iov_base = "SMB2AESCCM"; - d->label.iov_len = 11; - d->context.iov_base = "ServerOut"; - d->context.iov_len = 10; + d->label.iov_base = "SMBS2CCipherKey"; + d->label.iov_len = 16; + d->context.iov_base = ses->preauth_sha_hash; + d->context.iov_len = 64; return generate_smb3signingkey(ses, &triplet); } +#endif /* 311 */ int smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) -- cgit v1.1 From 7cb3def44ce834e5ea462fd900505af4cd68b3d5 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 28 Sep 2017 09:39:58 +1000 Subject: cifs: handle large EA requests more gracefully in smb2+ Update reading the EA using increasingly larger buffer sizes until the response will fit in the buffer, or we exceed the (arbitrary) maximum set to 64kb. Without this change, a user is able to add more and more EAs using setfattr until the point where the total space of all EAs exceed 2kb at which point the user can no longer list the EAs at all and getfattr will abort with an error. The same issue still exists for EAs in SMB1. Signed-off-by: Ronnie Sahlberg Reported-by: Xiaoli Feng Signed-off-by: Steve French --- fs/cifs/smb2maperror.c | 2 +- fs/cifs/smb2ops.c | 31 +++++++++++++++++++++++++------ fs/cifs/smb2pdu.c | 6 +++--- fs/cifs/smb2pdu.h | 3 ++- fs/cifs/smb2proto.h | 1 + 5 files changed, 32 insertions(+), 11 deletions(-) diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 7ca9808..62c88df 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -214,7 +214,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_DATATYPE_MISALIGNMENT, -EIO, "STATUS_DATATYPE_MISALIGNMENT"}, {STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"}, {STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"}, - {STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"}, + {STATUS_BUFFER_OVERFLOW, -E2BIG, "STATUS_BUFFER_OVERFLOW"}, {STATUS_NO_MORE_FILES, -ENODATA, "STATUS_NO_MORE_FILES"}, {STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"}, {STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"}, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0dafdba..bdb963d 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -522,6 +522,7 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_open_parms oparms; struct cifs_fid fid; struct smb2_file_full_ea_info *smb2_data; + int ea_buf_size = SMB2_MIN_EA_BUF; utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) @@ -541,14 +542,32 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, return rc; } - smb2_data = kzalloc(SMB2_MAX_EA_BUF, GFP_KERNEL); - if (smb2_data == NULL) { - SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); - return -ENOMEM; + while (1) { + smb2_data = kzalloc(ea_buf_size, GFP_KERNEL); + if (smb2_data == NULL) { + SMB2_close(xid, tcon, fid.persistent_fid, + fid.volatile_fid); + return -ENOMEM; + } + + rc = SMB2_query_eas(xid, tcon, fid.persistent_fid, + fid.volatile_fid, + ea_buf_size, smb2_data); + + if (rc != -E2BIG) + break; + + kfree(smb2_data); + ea_buf_size <<= 1; + + if (ea_buf_size > SMB2_MAX_EA_BUF) { + cifs_dbg(VFS, "EA size is too large\n"); + SMB2_close(xid, tcon, fid.persistent_fid, + fid.volatile_fid); + return -ENOMEM; + } } - rc = SMB2_query_eas(xid, tcon, fid.persistent_fid, fid.volatile_fid, - smb2_data); SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); if (!rc) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 6f0e634..ba3865b 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2233,12 +2233,12 @@ qinf_exit: } int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, - struct smb2_file_full_ea_info *data) + u64 persistent_fid, u64 volatile_fid, + int ea_buf_size, struct smb2_file_full_ea_info *data) { return query_info(xid, tcon, persistent_fid, volatile_fid, FILE_FULL_EA_INFORMATION, SMB2_O_INFO_FILE, 0, - SMB2_MAX_EA_BUF, + ea_buf_size, sizeof(struct smb2_file_full_ea_info), (void **)&data, NULL); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 6c9653a..4c155b9 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -1178,7 +1178,8 @@ struct smb2_file_link_info { /* encoding of request for level 11 */ char FileName[0]; /* Name to be assigned to new link */ } __packed; /* level 11 Set */ -#define SMB2_MAX_EA_BUF 2048 +#define SMB2_MIN_EA_BUF 2048 +#define SMB2_MAX_EA_BUF 65536 struct smb2_file_full_ea_info { /* encoding of response for level 15 */ __le32 next_entry_offset; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 0032170..e9ab522 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -134,6 +134,7 @@ extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id); extern int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, + int ea_buf_size, struct smb2_file_full_ea_info *data); extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, -- cgit v1.1 From 4572f0539c731c588cdce9575cb7d60ef23a521d Mon Sep 17 00:00:00 2001 From: Long Li Date: Sun, 1 Oct 2017 19:30:24 -0700 Subject: CIFS: SMBD: Fix the definition for SMB2_CHANNEL_RDMA_V1_INVALIDATE The channel value for requesting server remote invalidating local memory registration should be 0x00000002 Signed-off-by: Long Li Signed-off-by: Steve French --- fs/cifs/smb2pdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 4c155b9..c2ec934 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -832,7 +832,7 @@ struct smb2_flush_rsp { /* Channel field for read and write: exactly one of following flags can be set*/ #define SMB2_CHANNEL_NONE 0x00000000 #define SMB2_CHANNEL_RDMA_V1 0x00000001 /* SMB3 or later */ -#define SMB2_CHANNEL_RDMA_V1_INVALIDATE 0x00000001 /* SMB3.02 or later */ +#define SMB2_CHANNEL_RDMA_V1_INVALIDATE 0x00000002 /* SMB3.02 or later */ /* SMB2 read request without RFC1001 length at the beginning */ struct smb2_read_plain_req { -- cgit v1.1 From 1c0edc3633b56000e18d82fc241e3995ca18a69e Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 18 Oct 2017 12:49:38 -0400 Subject: USB: core: fix out-of-bounds access bug in usb_get_bos_descriptor() Andrey used the syzkaller fuzzer to find an out-of-bounds memory access in usb_get_bos_descriptor(). The code wasn't checking that the next usb_dev_cap_header structure could fit into the remaining buffer space. This patch fixes the error and also reduces the bNumDeviceCaps field in the header to match the actual number of capabilities found, in cases where there are fewer than expected. Reported-by: Andrey Konovalov Signed-off-by: Alan Stern Tested-by: Andrey Konovalov CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 68b54bd..883549e 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -960,10 +960,12 @@ int usb_get_bos_descriptor(struct usb_device *dev) for (i = 0; i < num; i++) { buffer += length; cap = (struct usb_dev_cap_header *)buffer; - length = cap->bLength; - if (total_len < length) + if (total_len < sizeof(*cap) || total_len < cap->bLength) { + dev->bos->desc->bNumDeviceCaps = i; break; + } + length = cap->bLength; total_len -= length; if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) { -- cgit v1.1 From 8a82dbf19129dde9e6fc9ab25a00dbc7569abe6a Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 9 Oct 2017 13:39:44 -0700 Subject: nvme-fc: fix iowait hang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing iowait head initialization. Fix irqsave vs irq: wait_event_lock_irq() doesn't do irq save/restore Fixes: 36715cf4b366 ("nvme_fc: replace ioabort msleep loop with completion”) Cc: # 4.13 Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Himanshu Madhani Tested-by: Himanshu Madhani Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index af075e9..8182b19 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2545,10 +2545,10 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) nvme_fc_abort_aen_ops(ctrl); /* wait for all io that had to be aborted */ - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irq(&ctrl->lock); wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock); ctrl->flags &= ~FCCTRL_TERMIO; - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irq(&ctrl->lock); nvme_fc_term_aen_ops(ctrl); @@ -2760,6 +2760,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->rport = rport; ctrl->dev = lport->dev; ctrl->cnum = idx; + init_waitqueue_head(&ctrl->ioabort_wait); get_device(ctrl->dev); kref_init(&ctrl->ref); -- cgit v1.1 From 17c4dc6eb7e1b2fb1ce6a52467e3be635224606e Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 9 Oct 2017 16:39:22 -0700 Subject: nvme-fc: retry initial controller connections 3 times Currently, if a frame is lost of command fails as part of initial association create for a new controller, the new controller connection request will immediately fail. Add in an immediate 3 retry loop before giving up. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 8182b19..be49d0f 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2734,7 +2734,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, { struct nvme_fc_ctrl *ctrl; unsigned long flags; - int ret, idx; + int ret, idx, retry; if (!(rport->remoteport.port_role & (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { @@ -2826,9 +2826,37 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); spin_unlock_irqrestore(&rport->lock, flags); - ret = nvme_fc_create_association(ctrl); + /* + * It's possible that transactions used to create the association + * may fail. Examples: CreateAssociation LS or CreateIOConnection + * LS gets dropped/corrupted/fails; or a frame gets dropped or a + * command times out for one of the actions to init the controller + * (Connect, Get/Set_Property, Set_Features, etc). Many of these + * transport errors (frame drop, LS failure) inherently must kill + * the association. The transport is coded so that any command used + * to create the association (prior to a LIVE state transition + * while NEW or RECONNECTING) will fail if it completes in error or + * times out. + * + * As such: as the connect request was mostly likely due to a + * udev event that discovered the remote port, meaning there is + * not an admin or script there to restart if the connect + * request fails, retry the initial connection creation up to + * three times before giving up and declaring failure. + */ + for (retry = 0; retry < 3; retry++) { + ret = nvme_fc_create_association(ctrl); + if (!ret) + break; + } + if (ret) { + /* couldn't schedule retry - fail out */ + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum); + ctrl->ctrl.opts = NULL; + /* initiate nvme ctrl ref counting teardown */ nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); -- cgit v1.1 From f9cf2a64912d67c9cf49c316a0a0ada0ea7ed1da Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 18 Oct 2017 14:33:59 -0700 Subject: nvmet: synchronize sqhd update In testing target io in read write mix, we did indeed get into cases where sqhd didn't update properly and slowly missed enough updates to shutdown the queue. Protect the updating sqhd by using cmpxchg, and for that turn the sqhd field into a u32 so that cmpxchg works on it for all architectures. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 15 ++++++++++++--- drivers/nvme/target/nvmet.h | 2 +- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 1b208be..645ba7e 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -387,12 +387,21 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) static void __nvmet_req_complete(struct nvmet_req *req, u16 status) { + u32 old_sqhd, new_sqhd; + u16 sqhd; + if (status) nvmet_set_status(req, status); - if (req->sq->size) - req->sq->sqhd = (req->sq->sqhd + 1) % req->sq->size; - req->rsp->sq_head = cpu_to_le16(req->sq->sqhd); + if (req->sq->size) { + do { + old_sqhd = req->sq->sqhd; + new_sqhd = (old_sqhd + 1) % req->sq->size; + } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != + old_sqhd); + } + sqhd = req->sq->sqhd & 0x0000FFFF; + req->rsp->sq_head = cpu_to_le16(sqhd); req->rsp->sq_id = cpu_to_le16(req->sq->qid); req->rsp->command_id = req->cmd->common.command_id; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 7b8e20a..87e429b 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -74,7 +74,7 @@ struct nvmet_sq { struct percpu_ref ref; u16 qid; u16 size; - u16 sqhd; + u32 sqhd; struct completion free_done; struct completion confirm_done; }; -- cgit v1.1 From 1ac7db63333db1eeff901bfd6bbcd502b4634fa4 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 17 Oct 2017 16:07:33 +0300 Subject: usb: hub: Allow reset retry for USB2 devices on connect bounce If the connect status change is set during reset signaling, but the status remains connected just retry port reset. This solves an issue with connecting a 90W HP Thunderbolt 3 dock with a Lenovo Carbon x1 (5th generation) which causes a 30min loop of a high speed device being re-discovererd before usb ports starts working. [...] [ 389.023845] usb 3-1: new high-speed USB device number 55 using xhci_hcd [ 389.491841] usb 3-1: new high-speed USB device number 56 using xhci_hcd [ 389.959928] usb 3-1: new high-speed USB device number 57 using xhci_hcd [...] This is caused by a high speed device that doesn't successfully go to the enabled state after the second port reset. Instead the connection bounces (connected, with connect status change), bailing out completely from enumeration just to restart from scratch. Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1716332 Cc: Stable Signed-off-by: Mathias Nyman Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index b5c7336..e9ce6bb 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2710,13 +2710,16 @@ static int hub_port_wait_reset(struct usb_hub *hub, int port1, if (!(portstatus & USB_PORT_STAT_CONNECTION)) return -ENOTCONN; - /* bomb out completely if the connection bounced. A USB 3.0 - * connection may bounce if multiple warm resets were issued, + /* Retry if connect change is set but status is still connected. + * A USB 3.0 connection may bounce if multiple warm resets were issued, * but the device may have successfully re-connected. Ignore it. */ if (!hub_is_superspeed(hub->hdev) && - (portchange & USB_PORT_STAT_C_CONNECTION)) - return -ENOTCONN; + (portchange & USB_PORT_STAT_C_CONNECTION)) { + usb_clear_port_feature(hub->hdev, port1, + USB_PORT_FEAT_C_CONNECTION); + return -EAGAIN; + } if (!(portstatus & USB_PORT_STAT_ENABLE)) return -EBUSY; -- cgit v1.1 From 6afb10267c1692ada3a2903e31ea339917ad3ac0 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Mon, 2 Oct 2017 12:00:54 +0800 Subject: pinctrl/amd: fix masking of GPIO interrupts On Asus laptop models X505BA, X505BP, X542BA and X542BP, the i2c-hid touchpad (using a GPIO for interrupts) becomes unresponsive after a few minutes of usage, or after placing two fingers on the touchpad, which seems to have the effect of queuing up a large amount of input data to be transferred. When the touchpad is in unresponsive state, we observed that the GPIO level-triggered interrupt is still at it's active level, however the pinctrl-amd driver is not receiving/dispatching more interrupts at this point. After the initial interrupt arrives, amd_gpio_irq_mask() is called however we then see amd_gpio_irq_handler() being called repeatedly for the same irq; the interrupt mask is not taking effect because of the following sequence of events: - amd_gpio_irq_handler fires, reads and caches pin reg - amd_gpio_irq_handler calls generic_handle_irq() - During IRQ handling, amd_gpio_irq_mask() is called and modifies pin reg - amd_gpio_irq_handler clears interrupt by writing cached value The stale cached value written at the final stage undoes the masking. Fix this by re-reading the register before clearing the interrupt. I also spotted that the interrupt-clearing code can race against amd_gpio_irq_mask() / amd_gpio_irq_unmask(), so add locking there. Presumably this race was leading to the loss of interrupts. After these changes, the touchpad appears to be working fine. Signed-off-by: Daniel Drake Acked-by: Shah, Nehal-bakulchandra Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 3f6b34f..433af32 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -534,8 +534,16 @@ static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id) continue; irq = irq_find_mapping(gc->irqdomain, irqnr + i); generic_handle_irq(irq); - /* Clear interrupt */ + + /* Clear interrupt. + * We must read the pin register again, in case the + * value was changed while executing + * generic_handle_irq() above. + */ + raw_spin_lock_irqsave(&gpio_dev->lock, flags); + regval = readl(regs + i); writel(regval, regs + i); + raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); ret = IRQ_HANDLED; } } -- cgit v1.1 From 59861701076b0cfac0b5eaa67a380fff6ab85b9e Mon Sep 17 00:00:00 2001 From: Dmitry Mastykin Date: Wed, 18 Oct 2017 17:21:02 +0300 Subject: pinctrl: mcp23s08: fix interrupt handling regression interrupt handling was broken with conversion to using regmap caching. cached_gpio value was updated by boolean status instead of gpio reading. Fixes: 8f38910ba4f6 ("pinctrl: mcp23s08: switch to regmap caching") Tested-by: Phil Reid Signed-off-by: Dmitry Mastykin Reviewed-by: Sebastian Reichel Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-mcp23s08.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index 3e40d42..9c950bbf 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -407,10 +407,10 @@ static int mcp23s08_get(struct gpio_chip *chip, unsigned offset) ret = mcp_read(mcp, MCP_GPIO, &status); if (ret < 0) status = 0; - else + else { + mcp->cached_gpio = status; status = !!(status & (1 << offset)); - - mcp->cached_gpio = status; + } mutex_unlock(&mcp->lock); return status; -- cgit v1.1 From b3885bd6edb41b91a0e3976469f72ae31bfb8d95 Mon Sep 17 00:00:00 2001 From: Hirofumi Nakagawa Date: Tue, 26 Sep 2017 03:09:53 +0900 Subject: ovl: add NULL check in ovl_alloc_inode This was detected by fault injection test Signed-off-by: Hirofumi Nakagawa Fixes: 13cf199d0088 ("ovl: allocate an ovl_inode struct") Cc: # v4.13 --- fs/overlayfs/super.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 092d150..f5738e9 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -174,6 +174,9 @@ static struct inode *ovl_alloc_inode(struct super_block *sb) { struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); + if (!oi) + return NULL; + oi->cache = NULL; oi->redirect = NULL; oi->version = 0; -- cgit v1.1 From 0ce5cdc9d79277e55c3d80bf7d2b1adea2752078 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 22 Sep 2017 23:45:18 +0300 Subject: ovl: Return -ENOMEM if an allocation fails ovl_lookup() The error code is missing here so it means we return ERR_PTR(0) or NULL. The other error paths all return an error code so this probably should as well. Fixes: 02b69b284cd7 ("ovl: lookup redirects") Signed-off-by: Dan Carpenter Reviewed-by: Chandan Rajendra Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 654bea1..e081641 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -635,6 +635,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, } if (d.redirect) { + err = -ENOMEM; upperredirect = kstrdup(d.redirect, GFP_KERNEL); if (!upperredirect) goto out_put_upper; -- cgit v1.1 From bd9f07590a17f3158b51fb869dca723f1f606bdc Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 19 Oct 2017 16:00:30 +0300 Subject: nvme-rdma: Fix possible double free in reconnect flow The fact that we free the async event buffer in nvme_rdma_destroy_admin_queue can cause us to free it more than once because this happens in every reconnect attempt since commit 31fdf1840170. we rely on the queue state flags DELETING to avoid this for other resources. A more complete fix is to not destroy the admin/io queues unconditionally on every reconnect attempt, but its a bit more extensive and will go in the next release. Fixes: 31fdf1840170 ("nvme-rdma: reuse configure/destroy_admin_queue") Reported-by: Yi Zhang Reviewed-by: Johannes Thumshirn Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 92a03ff..4dbee89 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -571,6 +571,12 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) return; + if (nvme_rdma_queue_idx(queue) == 0) { + nvme_rdma_free_qe(queue->device->dev, + &queue->ctrl->async_event_sqe, + sizeof(struct nvme_command), DMA_TO_DEVICE); + } + nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); } @@ -739,8 +745,6 @@ out: static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, bool remove) { - nvme_rdma_free_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe, - sizeof(struct nvme_command), DMA_TO_DEVICE); nvme_rdma_stop_queue(&ctrl->queues[0]); if (remove) { blk_cleanup_queue(ctrl->ctrl.admin_q); -- cgit v1.1 From f04b9cc87b5fc466b1b7231ba7b078e885956c5b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 19 Oct 2017 18:10:53 +0300 Subject: nvme-rdma: Fix error status return in tagset allocation failure We should make sure to escelate allocation failures to prevent a use-after-free in nvmf_create_ctrl. Fixes: b28a308ee777 ("nvme-rdma: move tagset allocation to a dedicated routine") Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 4dbee89..87bac27 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -769,8 +769,10 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, if (new) { ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); - if (IS_ERR(ctrl->ctrl.admin_tagset)) + if (IS_ERR(ctrl->ctrl.admin_tagset)) { + error = PTR_ERR(ctrl->ctrl.admin_tagset); goto out_free_queue; + } ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); if (IS_ERR(ctrl->ctrl.admin_q)) { @@ -850,8 +852,10 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) if (new) { ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, false); - if (IS_ERR(ctrl->ctrl.tagset)) + if (IS_ERR(ctrl->ctrl.tagset)) { + ret = PTR_ERR(ctrl->ctrl.tagset); goto out_free_io_queues; + } ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); if (IS_ERR(ctrl->ctrl.connect_q)) { -- cgit v1.1 From 528fd3547bad0bdd31c8f987e5bd00c83df8af39 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Oct 2017 12:13:10 -0400 Subject: SUNRPC: Destroy transport from the system workqueue The transport may need to flush transport connect and receive tasks that are running on rpciod. In order to do so safely, we need to ensure that the caller of cancel_work_sync() etc is not itself running on rpciod. Do so by running the destroy task from the system workqueue. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1a39ad1..898485e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1445,6 +1445,23 @@ out: return xprt; } +static void xprt_destroy_cb(struct work_struct *work) +{ + struct rpc_xprt *xprt = + container_of(work, struct rpc_xprt, task_cleanup); + + rpc_xprt_debugfs_unregister(xprt); + rpc_destroy_wait_queue(&xprt->binding); + rpc_destroy_wait_queue(&xprt->pending); + rpc_destroy_wait_queue(&xprt->sending); + rpc_destroy_wait_queue(&xprt->backlog); + kfree(xprt->servername); + /* + * Tear down transport state and free the rpc_xprt + */ + xprt->ops->destroy(xprt); +} + /** * xprt_destroy - destroy an RPC transport, killing off all requests. * @xprt: transport to destroy @@ -1454,22 +1471,19 @@ static void xprt_destroy(struct rpc_xprt *xprt) { dprintk("RPC: destroying transport %p\n", xprt); - /* Exclude transport connect/disconnect handlers */ + /* + * Exclude transport connect/disconnect handlers and autoclose + */ wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE); del_timer_sync(&xprt->timer); - rpc_xprt_debugfs_unregister(xprt); - rpc_destroy_wait_queue(&xprt->binding); - rpc_destroy_wait_queue(&xprt->pending); - rpc_destroy_wait_queue(&xprt->sending); - rpc_destroy_wait_queue(&xprt->backlog); - cancel_work_sync(&xprt->task_cleanup); - kfree(xprt->servername); /* - * Tear down transport state and free the rpc_xprt + * Destroy sockets etc from the system workqueue so they can + * safely flush receive work running on rpciod. */ - xprt->ops->destroy(xprt); + INIT_WORK(&xprt->task_cleanup, xprt_destroy_cb); + schedule_work(&xprt->task_cleanup); } static void xprt_destroy_kref(struct kref *kref) -- cgit v1.1 From b703798386fb7288d5a995bd2284a984a5e24f3c Mon Sep 17 00:00:00 2001 From: Kamalesh Babulal Date: Thu, 19 Oct 2017 11:27:24 -0500 Subject: objtool: Fix memory leak in decode_instructions() When an error occurs before adding an allocated insn to the list, free it before returning. Signed-off-by: Kamalesh Babulal Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/336da800bf6070eae11f4e0a3b9ca64c27658114.1508430423.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/check.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a0c518e..c0e26ad 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -267,12 +267,13 @@ static int decode_instructions(struct objtool_file *file) &insn->immediate, &insn->stack_op); if (ret) - return ret; + goto err; if (!insn->type || insn->type > INSN_LAST) { WARN_FUNC("invalid instruction type %d", insn->sec, insn->offset, insn->type); - return -1; + ret = -1; + goto err; } hash_add(file->insn_hash, &insn->hash, insn->offset); @@ -296,6 +297,10 @@ static int decode_instructions(struct objtool_file *file) } return 0; + +err: + free(insn); + return ret; } /* -- cgit v1.1 From ce56a86e2ade45d052b3228cdfebe913a1ae7381 Mon Sep 17 00:00:00 2001 From: Craig Bergstrom Date: Thu, 19 Oct 2017 13:28:56 -0600 Subject: x86/mm: Limit mmap() of /dev/mem to valid physical addresses Currently, it is possible to mmap() any offset from /dev/mem. If a program mmaps() /dev/mem offsets outside of the addressable limits of a system, the page table can be corrupted by setting reserved bits. For example if you mmap() offset 0x0001000000000000 of /dev/mem on an x86_64 system with a 48-bit bus, the page fault handler will be called with error_code set to RSVD. The kernel then crashes with a page table corruption error. This change prevents this page table corruption on x86 by refusing to mmap offsets higher than the highest valid address in the system. Signed-off-by: Craig Bergstrom Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: dsafonov@virtuozzo.com Cc: kirill.shutemov@linux.intel.com Cc: mhocko@suse.com Cc: oleg@redhat.com Link: http://lkml.kernel.org/r/20171019192856.39672-1-craigb@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 4 ++++ arch/x86/mm/mmap.c | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index c40a95c..322d25a 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -110,6 +110,10 @@ build_mmio_write(__writeq, "q", unsigned long, "r", ) #endif +#define ARCH_HAS_VALID_PHYS_ADDR_RANGE +extern int valid_phys_addr_range(phys_addr_t addr, size_t size); +extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); + /** * virt_to_phys - map virtual addresses to physical * @address: address to remap diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index a996798..320c623 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -174,3 +174,15 @@ const char *arch_vma_name(struct vm_area_struct *vma) return "[mpx]"; return NULL; } + +int valid_phys_addr_range(phys_addr_t addr, size_t count) +{ + return addr + count <= __pa(high_memory); +} + +int valid_mmap_phys_addr_range(unsigned long pfn, size_t count) +{ + phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT; + + return valid_phys_addr_range(addr, count); +} -- cgit v1.1 From 736f20a7060857ff569e9e9586ae6c1204a73e07 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 20 Oct 2017 15:06:34 +0800 Subject: ALSA: hda/realtek - Add support for ALC236/ALC3204 Add support for ALC236/ALC3204. Add headset mode support for ALC236/ALC3204. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0ce7111..00fa802 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -327,6 +327,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0215: case 0x10ec0225: case 0x10ec0233: + case 0x10ec0236: case 0x10ec0255: case 0x10ec0256: case 0x10ec0282: @@ -911,6 +912,7 @@ static struct alc_codec_rename_pci_table rename_pci_tbl[] = { { 0x10ec0275, 0x1028, 0, "ALC3260" }, { 0x10ec0899, 0x1028, 0, "ALC3861" }, { 0x10ec0298, 0x1028, 0, "ALC3266" }, + { 0x10ec0236, 0x1028, 0, "ALC3204" }, { 0x10ec0256, 0x1028, 0, "ALC3246" }, { 0x10ec0225, 0x1028, 0, "ALC3253" }, { 0x10ec0295, 0x1028, 0, "ALC3254" }, @@ -3930,6 +3932,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) alc_process_coef_fw(codec, coef0255_1); alc_process_coef_fw(codec, coef0255); break; + case 0x10ec0236: case 0x10ec0256: alc_process_coef_fw(codec, coef0256); alc_process_coef_fw(codec, coef0255); @@ -4028,6 +4031,7 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin, }; switch (codec->core.vendor_id) { + case 0x10ec0236: case 0x10ec0255: case 0x10ec0256: alc_write_coef_idx(codec, 0x45, 0xc489); @@ -4160,6 +4164,7 @@ static void alc_headset_mode_default(struct hda_codec *codec) alc_process_coef_fw(codec, alc225_pre_hsmode); alc_process_coef_fw(codec, coef0225); break; + case 0x10ec0236: case 0x10ec0255: case 0x10ec0256: alc_process_coef_fw(codec, coef0255); @@ -4256,6 +4261,7 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) case 0x10ec0255: alc_process_coef_fw(codec, coef0255); break; + case 0x10ec0236: case 0x10ec0256: alc_process_coef_fw(codec, coef0256); break; @@ -4366,6 +4372,7 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) case 0x10ec0255: alc_process_coef_fw(codec, coef0255); break; + case 0x10ec0236: case 0x10ec0256: alc_process_coef_fw(codec, coef0256); break; @@ -4451,6 +4458,7 @@ static void alc_determine_headset_type(struct hda_codec *codec) }; switch (codec->core.vendor_id) { + case 0x10ec0236: case 0x10ec0255: case 0x10ec0256: alc_process_coef_fw(codec, coef0255); @@ -4705,6 +4713,7 @@ static void alc255_set_default_jack_type(struct hda_codec *codec) case 0x10ec0255: alc_process_coef_fw(codec, alc255fw); break; + case 0x10ec0236: case 0x10ec0256: alc_process_coef_fw(codec, alc256fw); break; @@ -6806,6 +6815,7 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0255: spec->codec_variant = ALC269_TYPE_ALC255; break; + case 0x10ec0236: case 0x10ec0256: spec->codec_variant = ALC269_TYPE_ALC256; spec->shutup = alc256_shutup; @@ -7857,6 +7867,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0233, "ALC233", patch_alc269), HDA_CODEC_ENTRY(0x10ec0234, "ALC234", patch_alc269), HDA_CODEC_ENTRY(0x10ec0235, "ALC233", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0236, "ALC236", patch_alc269), HDA_CODEC_ENTRY(0x10ec0255, "ALC255", patch_alc269), HDA_CODEC_ENTRY(0x10ec0256, "ALC256", patch_alc269), HDA_CODEC_ENTRY(0x10ec0260, "ALC260", patch_alc260), -- cgit v1.1 From 66b83a4cdd3b73effdc285d1d66763c69ffe2ee8 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Mon, 9 Oct 2017 14:26:56 +0200 Subject: binder: call poll_wait() unconditionally. Because we're not guaranteed that subsequent calls to poll() will have a poll_table_struct parameter with _qproc set. When _qproc is not set, poll_wait() is a noop, and we won't be woken up correctly. Signed-off-by: Martijn Coenen Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 0621a95..fddf76e 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3662,12 +3662,6 @@ static void binder_stat_br(struct binder_proc *proc, } } -static int binder_has_thread_work(struct binder_thread *thread) -{ - return !binder_worklist_empty(thread->proc, &thread->todo) || - thread->looper_need_return; -} - static int binder_put_node_cmd(struct binder_proc *proc, struct binder_thread *thread, void __user **ptrp, @@ -4297,12 +4291,9 @@ static unsigned int binder_poll(struct file *filp, binder_inner_proc_unlock(thread->proc); - if (binder_has_work(thread, wait_for_proc_work)) - return POLLIN; - poll_wait(filp, &thread->wait, wait); - if (binder_has_thread_work(thread)) + if (binder_has_work(thread, wait_for_proc_work)) return POLLIN; return 0; -- cgit v1.1 From eb39a7c0355393c5a8d930f342ad7a6231b552c4 Mon Sep 17 00:00:00 2001 From: David Kozub Date: Thu, 19 Oct 2017 22:57:02 +0200 Subject: clockevents/drivers/cs5535: Improve resilience to spurious interrupts The interrupt handler mfgpt_tick() is not robust versus spurious interrupts which happen before the clock event device is registered and fully initialized. The reason is that the safe guard against spurious interrupts solely checks for the clockevents shutdown state, but lacks a check for detached state. If the interrupt hits while the device is in detached state it passes the safe guard and dereferences the event handler call back which is NULL. Add the missing state check. Fixes: 8f9327cbb6e8 ("clockevents/drivers/cs5535: Migrate to new 'set-state' interface") Suggested-by: Thomas Gleixner Signed-off-by: David Kozub Signed-off-by: Thomas Gleixner Cc: Daniel Lezcano Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20171020093103.3317F6004D@linux.fjfi.cvut.cz --- drivers/clocksource/cs5535-clockevt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/cs5535-clockevt.c b/drivers/clocksource/cs5535-clockevt.c index a1df588..1de8cac 100644 --- a/drivers/clocksource/cs5535-clockevt.c +++ b/drivers/clocksource/cs5535-clockevt.c @@ -117,7 +117,8 @@ static irqreturn_t mfgpt_tick(int irq, void *dev_id) /* Turn off the clock (and clear the event) */ disable_timer(cs5535_event_clock); - if (clockevent_state_shutdown(&cs5535_clockevent)) + if (clockevent_state_detached(&cs5535_clockevent) || + clockevent_state_shutdown(&cs5535_clockevent)) return IRQ_HANDLED; /* Clear the counter */ -- cgit v1.1 From 9d35593b4f0b89ab0c194349c7d357b3b159e99a Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 18 Oct 2017 02:08:40 -0700 Subject: vmbus: hvsock: add proper sync for vmbus_hvsock_device_unregister() Without the patch, vmbus_hvsock_device_unregister() can destroy the device prematurely when close() is called, and can cause NULl dereferencing or potential data loss (the last portion of the data stream may be dropped prematurely). Signed-off-by: Dexuan Cui Cc: Haiyang Zhang Cc: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 018d2e0..379b0df 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -937,7 +937,10 @@ void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) { BUG_ON(!is_hvsock_channel(channel)); - channel->rescind = true; + /* We always get a rescind msg when a connection is closed. */ + while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind)) + msleep(1); + vmbus_device_unregister(channel->device_obj); } EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); -- cgit v1.1 From a0c2baaf81bd53dc76fccdddc721ba7dbb62be21 Mon Sep 17 00:00:00 2001 From: Sherry Yang Date: Fri, 20 Oct 2017 20:58:58 -0400 Subject: android: binder: Don't get mm from task MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use binder_alloc struct's mm_struct rather than getting a reference to the mm struct through get_task_mm to avoid a potential deadlock between lru lock, task lock and dentry lock, since a thread can be holding the task lock and the dentry lock while trying to acquire the lru lock. Acked-by: Arve Hjønnevåg Signed-off-by: Sherry Yang Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 22 +++++++++------------- drivers/android/binder_alloc.h | 1 - 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 064f5e3..e12072b 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -215,17 +215,12 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, } } - if (!vma && need_mm) - mm = get_task_mm(alloc->tsk); + if (!vma && need_mm && mmget_not_zero(alloc->vma_vm_mm)) + mm = alloc->vma_vm_mm; if (mm) { down_write(&mm->mmap_sem); vma = alloc->vma; - if (vma && mm != alloc->vma_vm_mm) { - pr_err("%d: vma mm and task mm mismatch\n", - alloc->pid); - vma = NULL; - } } if (!vma && need_mm) { @@ -720,6 +715,7 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, barrier(); alloc->vma = vma; alloc->vma_vm_mm = vma->vm_mm; + mmgrab(alloc->vma_vm_mm); return 0; @@ -795,6 +791,8 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) vfree(alloc->buffer); } mutex_unlock(&alloc->mutex); + if (alloc->vma_vm_mm) + mmdrop(alloc->vma_vm_mm); binder_alloc_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d buffers %d, pages %d\n", @@ -889,7 +887,6 @@ int binder_alloc_get_allocated_count(struct binder_alloc *alloc) void binder_alloc_vma_close(struct binder_alloc *alloc) { WRITE_ONCE(alloc->vma, NULL); - WRITE_ONCE(alloc->vma_vm_mm, NULL); } /** @@ -926,9 +923,9 @@ enum lru_status binder_alloc_free_page(struct list_head *item, page_addr = (uintptr_t)alloc->buffer + index * PAGE_SIZE; vma = alloc->vma; if (vma) { - mm = get_task_mm(alloc->tsk); - if (!mm) - goto err_get_task_mm_failed; + if (!mmget_not_zero(alloc->vma_vm_mm)) + goto err_mmget; + mm = alloc->vma_vm_mm; if (!down_write_trylock(&mm->mmap_sem)) goto err_down_write_mmap_sem_failed; } @@ -963,7 +960,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item, err_down_write_mmap_sem_failed: mmput_async(mm); -err_get_task_mm_failed: +err_mmget: err_page_already_freed: mutex_unlock(&alloc->mutex); err_get_alloc_mutex_failed: @@ -1002,7 +999,6 @@ struct shrinker binder_shrinker = { */ void binder_alloc_init(struct binder_alloc *alloc) { - alloc->tsk = current->group_leader; alloc->pid = current->group_leader->pid; mutex_init(&alloc->mutex); INIT_LIST_HEAD(&alloc->buffers); diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index a3a3602..2dd33b6 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h @@ -100,7 +100,6 @@ struct binder_lru_page { */ struct binder_alloc { struct mutex mutex; - struct task_struct *tsk; struct vm_area_struct *vma; struct mm_struct *vma_vm_mm; void *buffer; -- cgit v1.1 From ae65c8510f3319dfb2114cc48d476b81232e27b3 Mon Sep 17 00:00:00 2001 From: Sherry Yang Date: Fri, 20 Oct 2017 20:58:59 -0400 Subject: android: binder: Fix null ptr dereference in debug msg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't access next->data in kernel debug message when the next buffer is null. Acked-by: Arve Hjønnevåg Signed-off-by: Sherry Yang Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index e12072b..c2819a3 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -560,7 +560,7 @@ static void binder_delete_free_buffer(struct binder_alloc *alloc, binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: merge free, buffer %pK do not share page with %pK or %pK\n", alloc->pid, buffer->data, - prev->data, next->data); + prev->data, next ? next->data : NULL); binder_update_page_range(alloc, 0, buffer_start_page(buffer), buffer_start_page(buffer) + PAGE_SIZE, NULL); -- cgit v1.1 From 1f7c70d6b2bc5de301f30456621e1161fddf4242 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 21 Oct 2017 16:06:52 +0200 Subject: cpu/hotplug: Reset node state after operation The recent rework of the cpu hotplug internals changed the usage of the per cpu state->node field, but missed to clean it up after usage. So subsequent hotplug operations use the stale pointer from a previous operation and hand it into the callback functions. The callbacks then dereference a pointer which either belongs to a different facility or points to freed and potentially reused memory. In either case data corruption and crashes are the obvious consequence. Reset the node and the last pointers in the per cpu state to NULL after the operation which set them has completed. Fixes: 96abb968549c ("smp/hotplug: Allow external multi-instance rollback") Reported-by: Tvrtko Ursulin Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Boris Ostrovsky Cc: "Paul E. McKenney" Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1710211606130.3213@nanos --- kernel/cpu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/cpu.c b/kernel/cpu.c index d851df2..04892a8 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -632,6 +632,11 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup, __cpuhp_kick_ap(st); } + /* + * Clean up the leftovers so the next hotplug operation wont use stale + * data. + */ + st->node = st->last = NULL; return ret; } -- cgit v1.1 From b16918a5fd87983d421a7e2241a1314e877c4ea2 Mon Sep 17 00:00:00 2001 From: Martyn Welch Date: Thu, 19 Oct 2017 16:51:44 +0100 Subject: hwmon: (da9052) Increase sample rate when using TSI The TSI channel, which is usually used for touchscreen support, but can be used as 4 general purpose ADCs. When used as a touchscreen interface the touchscreen driver switches the device into 1ms sampling mode (rather than the default 10ms economy mode) as recommended by the manufacturer. When using the TSI channels as a general purpose ADC we are currently not doing this and testing suggests that this can result in ADC timeouts: [ 5827.198289] da9052 spi2.0: timeout waiting for ADC conversion interrupt [ 5827.728293] da9052 spi2.0: timeout waiting for ADC conversion interrupt [ 5993.808335] da9052 spi2.0: timeout waiting for ADC conversion interrupt [ 5994.328441] da9052 spi2.0: timeout waiting for ADC conversion interrupt [ 5994.848291] da9052 spi2.0: timeout waiting for ADC conversion interrupt Switching to the 1ms timing resolves this issue. Fixes: 4f16cab19a3d5 ("hwmon: da9052: Add support for TSI channel") Signed-off-by: Martyn Welch Acked-by: Steve Twiss Signed-off-by: Guenter Roeck --- drivers/hwmon/da9052-hwmon.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwmon/da9052-hwmon.c b/drivers/hwmon/da9052-hwmon.c index 97a62f5..a973eb6 100644 --- a/drivers/hwmon/da9052-hwmon.c +++ b/drivers/hwmon/da9052-hwmon.c @@ -477,6 +477,11 @@ static int da9052_hwmon_probe(struct platform_device *pdev) /* disable touchscreen features */ da9052_reg_write(hwmon->da9052, DA9052_TSI_CONT_A_REG, 0x00); + /* Sample every 1ms */ + da9052_reg_update(hwmon->da9052, DA9052_ADC_CONT_REG, + DA9052_ADCCONT_ADCMODE, + DA9052_ADCCONT_ADCMODE); + err = da9052_request_irq(hwmon->da9052, DA9052_IRQ_TSIREADY, "tsiready-irq", da9052_tsi_datardy_irq, hwmon); -- cgit v1.1 From 8b95f4f730cba02ef6febbdc4ca7e55ca045b00e Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Fri, 20 Oct 2017 15:07:41 +0800 Subject: drm/amd/powerplay: fix uninitialized variable refresh_rate was not initialized when program display gap. this patch can fix vce ring test failed when do S3 on Polaris10. bug: https://bugs.freedesktop.org/show_bug.cgi?id=103102 bug: https://bugzilla.kernel.org/show_bug.cgi?id=196615 Reviewed-by: Alex Deucher Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index c274323..b526f49 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -830,7 +830,7 @@ uint32_t smu7_get_xclk(struct pp_hwmgr *hwmgr) { uint32_t reference_clock, tmp; struct cgs_display_info info = {0}; - struct cgs_mode_info mode_info; + struct cgs_mode_info mode_info = {0}; info.mode_info = &mode_info; @@ -3948,10 +3948,9 @@ static int smu7_program_display_gap(struct pp_hwmgr *hwmgr) uint32_t ref_clock; uint32_t refresh_rate = 0; struct cgs_display_info info = {0}; - struct cgs_mode_info mode_info; + struct cgs_mode_info mode_info = {0}; info.mode_info = &mode_info; - cgs_get_active_displays_info(hwmgr->device, &info); num_active_displays = info.display_count; @@ -3967,6 +3966,7 @@ static int smu7_program_display_gap(struct pp_hwmgr *hwmgr) frame_time_in_us = 1000000 / refresh_rate; pre_vbi_time_in_us = frame_time_in_us - 200 - mode_info.vblank_time_us; + data->frame_time_x2 = frame_time_in_us * 2 / 100; display_gap2 = pre_vbi_time_in_us * (ref_clock / 100); -- cgit v1.1 From bfc1168de949cd3e9ca18c3480b5085deff1ea7c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 22 Oct 2017 12:47:31 +0200 Subject: x86/cpu/AMD: Apply the Erratum 688 fix when the BIOS doesn't Some F14h machines have an erratum which, "under a highly specific and detailed set of internal timing conditions" can lead to skipping instructions and RIP corruption. Add the fix for those machines when their BIOS doesn't apply it or there simply isn't BIOS update for them. Tested-by: Signed-off-by: Borislav Petkov Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sherry Hurwitz Cc: Thomas Gleixner Cc: Yazen Ghannam Link: http://lkml.kernel.org/r/20171022104731.28249-1-bp@alien8.de Link: https://bugzilla.kernel.org/show_bug.cgi?id=197285 [ Added pr_info() that we activated the workaround. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_nb.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 458da85..6db28f1 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -27,6 +27,8 @@ static const struct pci_device_id amd_root_ids[] = { {} }; +#define PCI_DEVICE_ID_AMD_CNB17H_F4 0x1704 + const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, @@ -37,6 +39,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, {} }; EXPORT_SYMBOL_GPL(amd_nb_misc_ids); @@ -48,6 +51,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, {} }; @@ -402,11 +406,48 @@ void amd_flush_garts(void) } EXPORT_SYMBOL_GPL(amd_flush_garts); +static void __fix_erratum_688(void *info) +{ +#define MSR_AMD64_IC_CFG 0xC0011021 + + msr_set_bit(MSR_AMD64_IC_CFG, 3); + msr_set_bit(MSR_AMD64_IC_CFG, 14); +} + +/* Apply erratum 688 fix so machines without a BIOS fix work. */ +static __init void fix_erratum_688(void) +{ + struct pci_dev *F4; + u32 val; + + if (boot_cpu_data.x86 != 0x14) + return; + + if (!amd_northbridges.num) + return; + + F4 = node_to_amd_nb(0)->link; + if (!F4) + return; + + if (pci_read_config_dword(F4, 0x164, &val)) + return; + + if (val & BIT(2)) + return; + + on_each_cpu(__fix_erratum_688, NULL, 0); + + pr_info("x86/cpu/AMD: CPU erratum 688 worked around\n"); +} + static __init int init_amd_nbs(void) { amd_cache_northbridges(); amd_cache_gart(); + fix_erratum_688(); + return 0; } -- cgit v1.1 From 09c3e01b255fe89eb1f1b7ded68b1b7d55e6d02b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 22 Oct 2017 11:42:29 -0700 Subject: Input: do not use property bits when generating module alias The commit 8724ecb07229 ("Input: allow matching device IDs on property bits") started using property bits when generating module aliases for input handlers, but did not adjust the generation of MODALIAS attribute on input device uevents, breaking automatic module loading. Given that no handler currently uses property bits in their module tables, let's revert this part of the commit for now. Reported-by: Damien Wyart Tested-by: Damien Wyart Fixes: 8724ecb07229 ("Input: allow matching device IDs on property bits") Signed-off-by: Dmitry Torokhov --- scripts/mod/devicetable-offsets.c | 1 - scripts/mod/file2alias.c | 6 +----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index 812657a..e4d90e5 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -105,7 +105,6 @@ int main(void) DEVID_FIELD(input_device_id, sndbit); DEVID_FIELD(input_device_id, ffbit); DEVID_FIELD(input_device_id, swbit); - DEVID_FIELD(input_device_id, propbit); DEVID(eisa_device_id); DEVID_FIELD(eisa_device_id, sig); diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index bc25898..29d6699 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -761,7 +761,7 @@ static void do_input(char *alias, sprintf(alias + strlen(alias), "%X,*", i); } -/* input:b0v0p0e0-eXkXrXaXmXlXsXfXwXprX where X is comma-separated %02X. */ +/* input:b0v0p0e0-eXkXrXaXmXlXsXfXwX where X is comma-separated %02X. */ static int do_input_entry(const char *filename, void *symval, char *alias) { @@ -779,7 +779,6 @@ static int do_input_entry(const char *filename, void *symval, DEF_FIELD_ADDR(symval, input_device_id, sndbit); DEF_FIELD_ADDR(symval, input_device_id, ffbit); DEF_FIELD_ADDR(symval, input_device_id, swbit); - DEF_FIELD_ADDR(symval, input_device_id, propbit); sprintf(alias, "input:"); @@ -817,9 +816,6 @@ static int do_input_entry(const char *filename, void *symval, sprintf(alias + strlen(alias), "w*"); if (flags & INPUT_DEVICE_ID_MATCH_SWBIT) do_input(alias, *swbit, 0, INPUT_DEVICE_ID_SW_MAX); - sprintf(alias + strlen(alias), "pr*"); - if (flags & INPUT_DEVICE_ID_MATCH_PROPBIT) - do_input(alias, *propbit, 0, INPUT_DEVICE_ID_PROP_MAX); return 1; } ADD_TO_DEVTABLE("input", input_device_id, do_input_entry); -- cgit v1.1 From 3a91d29f20276fa7cd4d0c9c7f3e78b30708159d Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Sun, 22 Oct 2017 13:13:16 +0900 Subject: tcp: do tcp_mstamp_refresh before retransmits on TSQ handler When retransmission on TSQ handler was introduced in the commit f9616c35a0d7 ("tcp: implement TSQ for retransmits"), the retransmitted skbs' timestamps were updated on the actual transmission. In the later commit 385e20706fac ("tcp: use tp->tcp_mstamp in output path"), it stops being done so. In the commit, the comment says "We try to refresh tp->tcp_mstamp only when necessary", and at present tcp_tsq_handler and tcp_v4_mtu_reduced applies to this. About the latter, it's okay since it's rare enough. About the former, even though possible retransmissions on the tasklet comes just after the destructor run in NET_RX softirq handling, the time between them could be nonnegligibly large to the extent that tcp_rack_advance or rto rearming be affected if other (remaining) RX, BLOCK and (preceding) TASKLET sofirq handlings are unexpectedly heavy. So in the same way as tcp_write_timer_handler does, doing tcp_mstamp_refresh ensures the accuracy of algorithms relying on it. Fixes: 385e20706fac ("tcp: use tp->tcp_mstamp in output path") Signed-off-by: Koichiro Den Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0bc9e46..973befc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -739,8 +739,10 @@ static void tcp_tsq_handler(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tp->lost_out > tp->retrans_out && - tp->snd_cwnd > tcp_packets_in_flight(tp)) + tp->snd_cwnd > tcp_packets_in_flight(tp)) { + tcp_mstamp_refresh(tp); tcp_xmit_retransmit_queue(sk); + } tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle, 0, GFP_ATOMIC); -- cgit v1.1 From a6ca7abe53633d08eea1c6756cb49c9b2d4c90bf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 22 Oct 2017 12:33:57 -0700 Subject: tcp/dccp: fix lockdep splat in inet_csk_route_req() This patch fixes the following lockdep splat in inet_csk_route_req() lockdep_rcu_suspicious inet_csk_route_req tcp_v4_send_synack tcp_rtx_synack inet_rtx_syn_ack tcp_fastopen_synack_time tcp_retransmit_timer tcp_write_timer_handler tcp_write_timer call_timer_fn Thread running inet_csk_route_req() owns a reference on the request socket, so we have the guarantee ireq->ireq_opt wont be changed or freed. lockdep can enforce this invariant for us. Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5ec9136..18cd2ea 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -543,7 +543,8 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, struct ip_options_rcu *opt; struct rtable *rt; - opt = rcu_dereference(ireq->ireq_opt); + opt = rcu_dereference_protected(ireq->ireq_opt, + refcount_read(&req->rsk_refcnt) > 0); flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), -- cgit v1.1 From 1137b5e2529a8f5ca8ee709288ecba3e68044df2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 19 Oct 2017 20:51:10 +0800 Subject: ipsec: Fix aborted xfrm policy dump crash An independent security researcher, Mohamed Ghannam, has reported this vulnerability to Beyond Security's SecuriTeam Secure Disclosure program. The xfrm_dump_policy_done function expects xfrm_dump_policy to have been called at least once or it will crash. This can be triggered if a dump fails because the target socket's receive buffer is full. This patch fixes it by using the cb->start mechanism to ensure that the initialisation is always done regardless of the buffer situation. Fixes: 12a169e7d8f4 ("ipsec: Put dumpers on the dump list") Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b997f13..e44a0fe 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1693,32 +1693,34 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr static int xfrm_dump_policy_done(struct netlink_callback *cb) { - struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args; struct net *net = sock_net(cb->skb->sk); xfrm_policy_walk_done(walk, net); return 0; } +static int xfrm_dump_policy_start(struct netlink_callback *cb) +{ + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args; + + BUILD_BUG_ON(sizeof(*walk) > sizeof(cb->args)); + + xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY); + return 0; +} + static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args; struct xfrm_dump_info info; - BUILD_BUG_ON(sizeof(struct xfrm_policy_walk) > - sizeof(cb->args) - sizeof(cb->args[0])); - info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; - if (!cb->args[0]) { - cb->args[0] = 1; - xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY); - } - (void) xfrm_policy_walk(net, walk, dump_one_policy, &info); return skb->len; @@ -2474,6 +2476,7 @@ static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { static const struct xfrm_link { int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff *, struct netlink_callback *); int (*done)(struct netlink_callback *); const struct nla_policy *nla_pol; @@ -2487,6 +2490,7 @@ static const struct xfrm_link { [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy }, [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy }, [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy, + .start = xfrm_dump_policy_start, .dump = xfrm_dump_policy, .done = xfrm_dump_policy_done }, [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi }, @@ -2539,6 +2543,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, { struct netlink_dump_control c = { + .start = link->start, .dump = link->dump, .done = link->done, }; -- cgit v1.1 From 8fe8ffb12c81b36877984274db184953c337db73 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 20 Oct 2017 11:46:45 -0700 Subject: scsi: Suppress a kernel warning in case the prep function returns BLKPREP_DEFER The legacy block layer handles requests as follows: - If the prep function returns BLKPREP_OK, let blk_peek_request() return the pointer to that request. - If the prep function returns BLKPREP_DEFER, keep the RQF_STARTED flag and retry calling the prep function later. - If the prep function returns BLKPREP_KILL or BLKPREP_INVALID, end the request. In none of these cases it is correct to clear the SCMD_INITIALIZED flag from inside scsi_prep_fn(). Since scsi_prep_fn() already guarantees that scsi_init_command() will be called once even if scsi_prep_fn() is called multiple times, remove the code that clears SCMD_INITIALIZED from scsi_prep_fn(). The scsi-mq code handles requests as follows: - If scsi_mq_prep_fn() returns BLKPREP_OK, set the RQF_DONTPREP flag and submit the request to the SCSI LLD. - If scsi_mq_prep_fn() returns BLKPREP_DEFER, call blk_mq_delay_run_hw_queue() and return BLK_STS_RESOURCE. - If the prep function returns BLKPREP_KILL or BLKPREP_INVALID, call scsi_mq_uninit_cmd() and let the blk-mq core end the request. In none of these cases scsi_mq_prep_fn() should clear the SCMD_INITIALIZED flag. Hence remove the code from scsi_mq_prep_fn() function that clears that flag. This patch avoids that the following warning is triggered when using the legacy block layer: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 4198 at drivers/scsi/scsi_lib.c:654 scsi_end_request+0x1de/0x220 CPU: 1 PID: 4198 Comm: mkfs.f2fs Not tainted 4.14.0-rc5+ #1 task: ffff91c147a4b800 task.stack: ffffb282c37b8000 RIP: 0010:scsi_end_request+0x1de/0x220 Call Trace: scsi_io_completion+0x204/0x5e0 scsi_finish_command+0xce/0xe0 scsi_softirq_done+0x126/0x130 blk_done_softirq+0x6e/0x80 __do_softirq+0xcf/0x2a8 irq_exit+0xab/0xb0 do_IRQ+0x7b/0xc0 common_interrupt+0x90/0x90 RIP: 0010:_raw_spin_unlock_irqrestore+0x9/0x10 __test_set_page_writeback+0xc7/0x2c0 __block_write_full_page+0x158/0x3b0 block_write_full_page+0xc4/0xd0 blkdev_writepage+0x13/0x20 __writepage+0x12/0x40 write_cache_pages+0x204/0x500 generic_writepages+0x48/0x70 blkdev_writepages+0x9/0x10 do_writepages+0x34/0xc0 __filemap_fdatawrite_range+0x6c/0x90 file_write_and_wait_range+0x31/0x90 blkdev_fsync+0x16/0x40 vfs_fsync_range+0x44/0xa0 do_fsync+0x38/0x60 SyS_fsync+0xb/0x10 entry_SYSCALL_64_fastpath+0x13/0x94 ---[ end trace 86e8ef85a4a6c1d1 ]--- Fixes: commit 64104f703212 ("scsi: Call scsi_initialize_rq() for filesystem requests") Signed-off-by: Bart Van Assche Cc: Damien Le Moal Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9cf6a80..ad3ea24 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1379,8 +1379,6 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) ret = scsi_setup_cmnd(sdev, req); out: - if (ret != BLKPREP_OK) - cmd->flags &= ~SCMD_INITIALIZED; return scsi_prep_return(q, req, ret); } @@ -1900,7 +1898,6 @@ static int scsi_mq_prep_fn(struct request *req) struct scsi_device *sdev = req->q->queuedata; struct Scsi_Host *shost = sdev->host; struct scatterlist *sg; - int ret; scsi_init_command(sdev, cmd); @@ -1934,10 +1931,7 @@ static int scsi_mq_prep_fn(struct request *req) blk_mq_start_request(req); - ret = scsi_setup_cmnd(sdev, req); - if (ret != BLK_STS_OK) - cmd->flags &= ~SCMD_INITIALIZED; - return ret; + return scsi_setup_cmnd(sdev, req); } static void scsi_mq_done(struct scsi_cmnd *cmd) -- cgit v1.1 From bb176f67090ca54869fc1262c913aa69d2ede070 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 23 Oct 2017 06:49:47 -0400 Subject: Linux 4.14-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 46bfb0e..01875d6 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.1 From 98990a33b77dda9babf91cb235654f6729e5702e Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 20 Oct 2017 11:21:33 -0500 Subject: x86/entry: Fix idtentry unwind hint This fixes the following ORC warning in the 'int3' entry code: WARNING: can't dereference iret registers at ffff8801c5f17fe0 for ip ffffffff95f0d94b The ORC metadata had the wrong stack offset for the iret registers. Their location on the stack is dependent on whether the exception has an error code. Reported-and-tested-by: Andrei Vagin Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 8c1f75587a18 ("x86/entry/64: Add unwind hint annotations") Link: http://lkml.kernel.org/r/931d57f0551ed7979d5e7e05370d445c8e5137f8.1508516398.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4916725..f6cdb7a 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -808,7 +808,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ENTRY(\sym) - UNWIND_HINT_IRET_REGS offset=8 + UNWIND_HINT_IRET_REGS offset=\has_error_code*8 /* Sanity check */ .if \shift_ist != -1 && \paranoid == 0 -- cgit v1.1 From 58c3862b521ead4f69a24ef009a679cb3c519620 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 20 Oct 2017 11:21:34 -0500 Subject: x86/unwind: Show function name+offset in ORC error messages Improve the warning messages to show the relevant function name+offset. This makes it much easier to diagnose problems with the ORC metadata. Before: WARNING: can't dereference iret registers at ffff8801c5f17fe0 for ip ffffffff95f0d94b After: WARNING: can't dereference iret registers at ffff880178f5ffe0 for ip int3+0x5b/0x60 Reported-by: Andrei Vagin Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") Link: http://lkml.kernel.org/r/6bada6b9eac86017e16bd79e1e77877935cb50bb.1508516398.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/unwind_orc.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 570b70d..b95007e 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -86,8 +86,8 @@ static struct orc_entry *orc_find(unsigned long ip) idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE; if (unlikely((idx >= lookup_num_blocks-1))) { - orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%lx\n", - idx, lookup_num_blocks, ip); + orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n", + idx, lookup_num_blocks, (void *)ip); return NULL; } @@ -96,8 +96,8 @@ static struct orc_entry *orc_find(unsigned long ip) if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) || (__start_orc_unwind + stop > __stop_orc_unwind))) { - orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%lx\n", - idx, lookup_num_blocks, start, stop, ip); + orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n", + idx, lookup_num_blocks, start, stop, (void *)ip); return NULL; } @@ -373,7 +373,7 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_REG_R10: if (!state->regs || !state->full_regs) { - orc_warn("missing regs for base reg R10 at ip %p\n", + orc_warn("missing regs for base reg R10 at ip %pB\n", (void *)state->ip); goto done; } @@ -382,7 +382,7 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_REG_R13: if (!state->regs || !state->full_regs) { - orc_warn("missing regs for base reg R13 at ip %p\n", + orc_warn("missing regs for base reg R13 at ip %pB\n", (void *)state->ip); goto done; } @@ -391,7 +391,7 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_REG_DI: if (!state->regs || !state->full_regs) { - orc_warn("missing regs for base reg DI at ip %p\n", + orc_warn("missing regs for base reg DI at ip %pB\n", (void *)state->ip); goto done; } @@ -400,7 +400,7 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_REG_DX: if (!state->regs || !state->full_regs) { - orc_warn("missing regs for base reg DX at ip %p\n", + orc_warn("missing regs for base reg DX at ip %pB\n", (void *)state->ip); goto done; } @@ -408,7 +408,7 @@ bool unwind_next_frame(struct unwind_state *state) break; default: - orc_warn("unknown SP base reg %d for ip %p\n", + orc_warn("unknown SP base reg %d for ip %pB\n", orc->sp_reg, (void *)state->ip); goto done; } @@ -436,7 +436,7 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_TYPE_REGS: if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) { - orc_warn("can't dereference registers at %p for ip %p\n", + orc_warn("can't dereference registers at %p for ip %pB\n", (void *)sp, (void *)orig_ip); goto done; } @@ -448,7 +448,7 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_TYPE_REGS_IRET: if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) { - orc_warn("can't dereference iret registers at %p for ip %p\n", + orc_warn("can't dereference iret registers at %p for ip %pB\n", (void *)sp, (void *)orig_ip); goto done; } @@ -465,7 +465,8 @@ bool unwind_next_frame(struct unwind_state *state) break; default: - orc_warn("unknown .orc_unwind entry type %d\n", orc->type); + orc_warn("unknown .orc_unwind entry type %d for ip %pB\n", + orc->type, (void *)orig_ip); break; } @@ -487,7 +488,7 @@ bool unwind_next_frame(struct unwind_state *state) break; default: - orc_warn("unknown BP base reg %d for ip %p\n", + orc_warn("unknown BP base reg %d for ip %pB\n", orc->bp_reg, (void *)orig_ip); goto done; } @@ -496,7 +497,7 @@ bool unwind_next_frame(struct unwind_state *state) if (state->stack_info.type == prev_type && on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) && state->sp <= prev_sp) { - orc_warn("stack going in the wrong direction? ip=%p\n", + orc_warn("stack going in the wrong direction? ip=%pB\n", (void *)orig_ip); goto done; } -- cgit v1.1 From 88796e7e5c457cae72833196cb98e6895dd107e2 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 20 Oct 2017 10:13:46 -0700 Subject: sched/swait: Document it clearly that the swait facilities are special and shouldn't be used We currently welcome using swait over wait whenever possible because it is a slimmer data structure. However, Linus has made it very clear that he does not want this used, unless under very specific RT scenarios (such as current users). Update the comments before kernel hipsters start thinking swait is the cool thing to do. Signed-off-by: Davidlohr Bueso Acked-by: Luis R. Rodriguez Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Cc: wagi@monom.org Link: http://lkml.kernel.org/r/20171020171346.24445-1-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/swait.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/include/linux/swait.h b/include/linux/swait.h index 73e97a0..cf30f502 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -9,13 +9,16 @@ /* * Simple wait queues * - * While these are very similar to the other/complex wait queues (wait.h) the - * most important difference is that the simple waitqueue allows for - * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold - * times. + * While these are very similar to regular wait queues (wait.h) the most + * important difference is that the simple waitqueue allows for deterministic + * behaviour -- IOW it has strictly bounded IRQ and lock hold times. * - * In order to make this so, we had to drop a fair number of features of the - * other waitqueue code; notably: + * Mainly, this is accomplished by two things. Firstly not allowing swake_up_all + * from IRQ disabled, and dropping the lock upon every wakeup, giving a higher + * priority task a chance to run. + * + * Secondly, we had to drop a fair number of features of the other waitqueue + * code; notably: * * - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue; * all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right @@ -24,12 +27,14 @@ * - the exclusive mode; because this requires preserving the list order * and this is hard. * - * - custom wake functions; because you cannot give any guarantees about - * random code. - * - * As a side effect of this; the data structures are slimmer. + * - custom wake callback functions; because you cannot give any guarantees + * about random code. This also allows swait to be used in RT, such that + * raw spinlock can be used for the swait queue head. * - * One would recommend using this wait queue where possible. + * As a side effect of these; the data structures are slimmer albeit more ad-hoc. + * For all the above, note that simple wait queues should _only_ be used under + * very specific realtime constraints -- it is best to stick with the regular + * wait queues in most cases. */ struct task_struct; -- cgit v1.1 From 83beee5c88a6c71ded70e2eef5ca7406a02605cc Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Mon, 4 Sep 2017 22:37:21 -0700 Subject: platform/x86: intel_pmc_ipc: Use devm_* calls in driver probe function This patch cleans up unnecessary free/alloc calls in ipc_plat_probe(), ipc_pci_probe() and ipc_plat_get_res() functions by using devm_* calls. This patch also adds proper error handling for failure cases in ipc_pci_probe() function. Signed-off-by: Kuppuswamy Sathyanarayanan [andy: fixed style issues, missed devm_free_irq(), removed unnecessary log message] Signed-off-by: Andy Shevchenko --- drivers/platform/x86/intel_pmc_ipc.c | 94 +++++++++++------------------------- 1 file changed, 28 insertions(+), 66 deletions(-) diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c index bb792a5..751b121 100644 --- a/drivers/platform/x86/intel_pmc_ipc.c +++ b/drivers/platform/x86/intel_pmc_ipc.c @@ -480,52 +480,39 @@ static irqreturn_t ioc(int irq, void *dev_id) static int ipc_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - resource_size_t pci_resource; + struct intel_pmc_ipc_dev *pmc = &ipcdev; int ret; - int len; - ipcdev.dev = &pci_dev_get(pdev)->dev; - ipcdev.irq_mode = IPC_TRIGGER_MODE_IRQ; + /* Only one PMC is supported */ + if (pmc->dev) + return -EBUSY; - ret = pci_enable_device(pdev); + pmc->irq_mode = IPC_TRIGGER_MODE_IRQ; + + ret = pcim_enable_device(pdev); if (ret) return ret; - ret = pci_request_regions(pdev, "intel_pmc_ipc"); + ret = pcim_iomap_regions(pdev, 1 << 0, pci_name(pdev)); if (ret) return ret; - pci_resource = pci_resource_start(pdev, 0); - len = pci_resource_len(pdev, 0); - if (!pci_resource || !len) { - dev_err(&pdev->dev, "Failed to get resource\n"); - return -ENOMEM; - } + init_completion(&pmc->cmd_complete); - init_completion(&ipcdev.cmd_complete); + pmc->ipc_base = pcim_iomap_table(pdev)[0]; - if (request_irq(pdev->irq, ioc, 0, "intel_pmc_ipc", &ipcdev)) { + ret = devm_request_irq(&pdev->dev, pdev->irq, ioc, 0, "intel_pmc_ipc", + pmc); + if (ret) { dev_err(&pdev->dev, "Failed to request irq\n"); - return -EBUSY; + return ret; } - ipcdev.ipc_base = ioremap_nocache(pci_resource, len); - if (!ipcdev.ipc_base) { - dev_err(&pdev->dev, "Failed to ioremap ipc base\n"); - free_irq(pdev->irq, &ipcdev); - ret = -ENOMEM; - } + pmc->dev = &pdev->dev; - return ret; -} + pci_set_drvdata(pdev, pmc); -static void ipc_pci_remove(struct pci_dev *pdev) -{ - free_irq(pdev->irq, &ipcdev); - pci_release_regions(pdev); - pci_dev_put(pdev); - iounmap(ipcdev.ipc_base); - ipcdev.dev = NULL; + return 0; } static const struct pci_device_id ipc_pci_ids[] = { @@ -540,7 +527,6 @@ static struct pci_driver ipc_pci_driver = { .name = "intel_pmc_ipc", .id_table = ipc_pci_ids, .probe = ipc_pci_probe, - .remove = ipc_pci_remove, }; static ssize_t intel_pmc_ipc_simple_cmd_store(struct device *dev, @@ -850,17 +836,12 @@ static int ipc_plat_get_res(struct platform_device *pdev) return -ENXIO; } size = PLAT_RESOURCE_IPC_SIZE + PLAT_RESOURCE_GCR_SIZE; + res->end = res->start + size - 1; + + addr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(addr)) + return PTR_ERR(addr); - if (!request_mem_region(res->start, size, pdev->name)) { - dev_err(&pdev->dev, "Failed to request ipc resource\n"); - return -EBUSY; - } - addr = ioremap_nocache(res->start, size); - if (!addr) { - dev_err(&pdev->dev, "I/O memory remapping failed\n"); - release_mem_region(res->start, size); - return -ENOMEM; - } ipcdev.ipc_base = addr; ipcdev.gcr_mem_base = addr + PLAT_RESOURCE_GCR_OFFSET; @@ -917,7 +898,6 @@ MODULE_DEVICE_TABLE(acpi, ipc_acpi_ids); static int ipc_plat_probe(struct platform_device *pdev) { - struct resource *res; int ret; ipcdev.dev = &pdev->dev; @@ -939,11 +919,11 @@ static int ipc_plat_probe(struct platform_device *pdev) ret = ipc_create_pmc_devices(); if (ret) { dev_err(&pdev->dev, "Failed to create pmc devices\n"); - goto err_device; + return ret; } - if (request_irq(ipcdev.irq, ioc, IRQF_NO_SUSPEND, - "intel_pmc_ipc", &ipcdev)) { + if (devm_request_irq(&pdev->dev, ipcdev.irq, ioc, IRQF_NO_SUSPEND, + "intel_pmc_ipc", &ipcdev)) { dev_err(&pdev->dev, "Failed to request irq\n"); ret = -EBUSY; goto err_irq; @@ -960,40 +940,22 @@ static int ipc_plat_probe(struct platform_device *pdev) return 0; err_sys: - free_irq(ipcdev.irq, &ipcdev); + devm_free_irq(&pdev->dev, ipcdev.irq, &ipcdev); err_irq: platform_device_unregister(ipcdev.tco_dev); platform_device_unregister(ipcdev.punit_dev); platform_device_unregister(ipcdev.telemetry_dev); -err_device: - iounmap(ipcdev.ipc_base); - res = platform_get_resource(pdev, IORESOURCE_MEM, - PLAT_RESOURCE_IPC_INDEX); - if (res) { - release_mem_region(res->start, - PLAT_RESOURCE_IPC_SIZE + - PLAT_RESOURCE_GCR_SIZE); - } + return ret; } static int ipc_plat_remove(struct platform_device *pdev) { - struct resource *res; - sysfs_remove_group(&pdev->dev.kobj, &intel_ipc_group); - free_irq(ipcdev.irq, &ipcdev); + devm_free_irq(&pdev->dev, ipcdev.irq, &ipcdev); platform_device_unregister(ipcdev.tco_dev); platform_device_unregister(ipcdev.punit_dev); platform_device_unregister(ipcdev.telemetry_dev); - iounmap(ipcdev.ipc_base); - res = platform_get_resource(pdev, IORESOURCE_MEM, - PLAT_RESOURCE_IPC_INDEX); - if (res) { - release_mem_region(res->start, - PLAT_RESOURCE_IPC_SIZE + - PLAT_RESOURCE_GCR_SIZE); - } ipcdev.dev = NULL; return 0; } -- cgit v1.1 From 6687aeb9cd3d40904d1f9e884d2145603c23adfa Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Sat, 7 Oct 2017 15:19:51 -0700 Subject: platform/x86: intel_pmc_ipc: Use spin_lock to protect GCR updates Currently, update_no_reboot_bit() function implemented in this driver uses mutex_lock() to protect its register updates. But this function is called with in atomic context in iTCO_wdt_start() and iTCO_wdt_stop() functions in iTCO_wdt.c driver, which in turn causes "sleeping into atomic context" issue. This patch fixes this issue by replacing the mutex_lock() with spin_lock() to protect the GCR read/write/update APIs. Fixes: 9d855d4 ("platform/x86: intel_pmc_ipc: Fix iTCO_wdt GCS memory mapping failure") Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Andy Shevchenko --- drivers/platform/x86/intel_pmc_ipc.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c index 751b121..e03fa314 100644 --- a/drivers/platform/x86/intel_pmc_ipc.c +++ b/drivers/platform/x86/intel_pmc_ipc.c @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -131,6 +132,7 @@ static struct intel_pmc_ipc_dev { /* gcr */ void __iomem *gcr_mem_base; bool has_gcr_regs; + spinlock_t gcr_lock; /* punit */ struct platform_device *punit_dev; @@ -225,17 +227,17 @@ int intel_pmc_gcr_read(u32 offset, u32 *data) { int ret; - mutex_lock(&ipclock); + spin_lock(&ipcdev.gcr_lock); ret = is_gcr_valid(offset); if (ret < 0) { - mutex_unlock(&ipclock); + spin_unlock(&ipcdev.gcr_lock); return ret; } *data = readl(ipcdev.gcr_mem_base + offset); - mutex_unlock(&ipclock); + spin_unlock(&ipcdev.gcr_lock); return 0; } @@ -255,17 +257,17 @@ int intel_pmc_gcr_write(u32 offset, u32 data) { int ret; - mutex_lock(&ipclock); + spin_lock(&ipcdev.gcr_lock); ret = is_gcr_valid(offset); if (ret < 0) { - mutex_unlock(&ipclock); + spin_unlock(&ipcdev.gcr_lock); return ret; } writel(data, ipcdev.gcr_mem_base + offset); - mutex_unlock(&ipclock); + spin_unlock(&ipcdev.gcr_lock); return 0; } @@ -287,7 +289,7 @@ int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val) u32 new_val; int ret = 0; - mutex_lock(&ipclock); + spin_lock(&ipcdev.gcr_lock); ret = is_gcr_valid(offset); if (ret < 0) @@ -309,7 +311,7 @@ int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val) } gcr_ipc_unlock: - mutex_unlock(&ipclock); + spin_unlock(&ipcdev.gcr_lock); return ret; } EXPORT_SYMBOL_GPL(intel_pmc_gcr_update); @@ -489,6 +491,8 @@ static int ipc_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) pmc->irq_mode = IPC_TRIGGER_MODE_IRQ; + spin_lock_init(&ipcdev.gcr_lock); + ret = pcim_enable_device(pdev); if (ret) return ret; @@ -903,6 +907,7 @@ static int ipc_plat_probe(struct platform_device *pdev) ipcdev.dev = &pdev->dev; ipcdev.irq_mode = IPC_TRIGGER_MODE_IRQ; init_completion(&ipcdev.cmd_complete); + spin_lock_init(&ipcdev.gcr_lock); ipcdev.irq = platform_get_irq(pdev, 0); if (ipcdev.irq < 0) { -- cgit v1.1 From d87e47e13a9b347801be08da81f16ae65f1eda0f Mon Sep 17 00:00:00 2001 From: Cao jin Date: Thu, 19 Oct 2017 11:17:05 +0800 Subject: kbuild doc: a bundle of fixes on makefiles.txt It does several fixes: 1. move the displaced ld example to its reasonable place. 2. add new example for command gzip. 3. fix 2 number errors. 4. fix format of chapter 7.x, make it looks the same as other chapters. Signed-off-by: Cao jin Signed-off-by: Masahiro Yamada --- Documentation/kbuild/makefiles.txt | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 329e740..f6f8038 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -1108,14 +1108,6 @@ When kbuild executes, the following steps are followed (roughly): ld Link target. Often, LDFLAGS_$@ is used to set specific options to ld. - objcopy - Copy binary. Uses OBJCOPYFLAGS usually specified in - arch/$(ARCH)/Makefile. - OBJCOPYFLAGS_$@ may be used to set additional options. - - gzip - Compress target. Use maximum compression to compress target. - Example: #arch/x86/boot/Makefile LDFLAGS_bootsect := -Ttext 0x0 -s --oformat binary @@ -1139,6 +1131,19 @@ When kbuild executes, the following steps are followed (roughly): resulting in the target file being recompiled for no obvious reason. + objcopy + Copy binary. Uses OBJCOPYFLAGS usually specified in + arch/$(ARCH)/Makefile. + OBJCOPYFLAGS_$@ may be used to set additional options. + + gzip + Compress target. Use maximum compression to compress target. + + Example: + #arch/x86/boot/compressed/Makefile + $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE + $(call if_changed,gzip) + dtc Create flattened device tree blob object suitable for linking into vmlinux. Device tree blobs linked into vmlinux are placed @@ -1219,7 +1224,7 @@ When kbuild executes, the following steps are followed (roughly): that may be shared between individual architectures. The recommended approach how to use a generic header file is to list the file in the Kbuild file. - See "7.3 generic-y" for further info on syntax etc. + See "7.2 generic-y" for further info on syntax etc. --- 6.11 Post-link pass @@ -1254,13 +1259,13 @@ A Kbuild file may be defined under arch//include/uapi/asm/ and arch//include/asm/ to list asm files coming from asm-generic. See subsequent chapter for the syntax of the Kbuild file. - --- 7.1 no-export-headers +--- 7.1 no-export-headers no-export-headers is essentially used by include/uapi/linux/Kbuild to avoid exporting specific headers (e.g. kvm.h) on architectures that do not support it. It should be avoided as much as possible. - --- 7.2 generic-y +--- 7.2 generic-y If an architecture uses a verbatim copy of a header from include/asm-generic then this is listed in the file @@ -1287,7 +1292,7 @@ See subsequent chapter for the syntax of the Kbuild file. Example: termios.h #include - --- 7.3 generated-y +--- 7.3 generated-y If an architecture generates other header files alongside generic-y wrappers, generated-y specifies them. @@ -1299,7 +1304,7 @@ See subsequent chapter for the syntax of the Kbuild file. #arch/x86/include/asm/Kbuild generated-y += syscalls_32.h - --- 7.5 mandatory-y +--- 7.4 mandatory-y mandatory-y is essentially used by include/uapi/asm-generic/Kbuild.asm to define the minimum set of headers that must be exported in -- cgit v1.1 From bb3f38c3c5b759163e09b9152629cc789731de47 Mon Sep 17 00:00:00 2001 From: David Lin Date: Fri, 20 Oct 2017 14:09:13 -0700 Subject: kbuild: clang: fix build failures with sparse check We should avoid using the space character when passing arguments to clang, because static code analysis check tool such as sparse may misinterpret the arguments followed by spaces as build targets hence cause the build to fail. Signed-off-by: David Lin Signed-off-by: Masahiro Yamada --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index d5ed3ce..8593c40 100644 --- a/Makefile +++ b/Makefile @@ -697,11 +697,11 @@ KBUILD_CFLAGS += $(stackp-flag) ifeq ($(cc-name),clang) ifneq ($(CROSS_COMPILE),) -CLANG_TARGET := -target $(notdir $(CROSS_COMPILE:%-=%)) +CLANG_TARGET := --target=$(notdir $(CROSS_COMPILE:%-=%)) GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) endif ifneq ($(GCC_TOOLCHAIN),) -CLANG_GCC_TC := -gcc-toolchain $(GCC_TOOLCHAIN) +CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) endif KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) -- cgit v1.1 From 942491c9e6d631c012f3c4ea8e7777b0b02edeab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Oct 2017 18:31:50 -0700 Subject: xfs: fix AIM7 regression Apparently our current rwsem code doesn't like doing the trylock, then lock for real scheme. So change our read/write methods to just do the trylock for the RWF_NOWAIT case. This fixes a ~25% regression in AIM7. Fixes: 91f9943e ("fs: support RWF_NOWAIT for buffered reads") Reported-by: kernel test robot Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_file.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 56d0e52..6526ef0 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -237,11 +237,13 @@ xfs_file_dax_read( if (!count) return 0; /* skip atime */ - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) return -EAGAIN; + } else { xfs_ilock(ip, XFS_IOLOCK_SHARED); } + ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); xfs_iunlock(ip, XFS_IOLOCK_SHARED); @@ -259,9 +261,10 @@ xfs_file_buffered_aio_read( trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos); - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) return -EAGAIN; + } else { xfs_ilock(ip, XFS_IOLOCK_SHARED); } ret = generic_file_read_iter(iocb, to); @@ -552,9 +555,10 @@ xfs_file_dio_aio_write( iolock = XFS_IOLOCK_SHARED; } - if (!xfs_ilock_nowait(ip, iolock)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!xfs_ilock_nowait(ip, iolock)) return -EAGAIN; + } else { xfs_ilock(ip, iolock); } @@ -606,9 +610,10 @@ xfs_file_dax_write( size_t count; loff_t pos; - if (!xfs_ilock_nowait(ip, iolock)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!xfs_ilock_nowait(ip, iolock)) return -EAGAIN; + } else { xfs_ilock(ip, iolock); } -- cgit v1.1 From 9d11b06638f6aa30d099090e6b8a540c558295ac Mon Sep 17 00:00:00 2001 From: Ran Wang Date: Mon, 23 Oct 2017 18:10:23 +0800 Subject: drivers/net/usb: add device id for TP-LINK UE300 USB 3.0 Ethernet This product is named 'TP-LINK USB 3.0 Gigabit Ethernet Network Adapter (Model No.is UE300)'. It uses chip RTL8153 and works with driver drivers/net/usb/r8152.c Signed-off-by: Ran Wang Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 8 ++++++++ drivers/net/usb/r8152.c | 2 ++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 52ea80b..5529bd1 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -561,6 +561,7 @@ static const struct driver_info wwan_info = { #define HP_VENDOR_ID 0x03f0 #define MICROSOFT_VENDOR_ID 0x045e #define UBLOX_VENDOR_ID 0x1546 +#define TPLINK_VENDOR_ID 0x2357 static const struct usb_device_id products[] = { /* BLACKLIST !! @@ -813,6 +814,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, + /* TP-LINK UE300 USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(TPLINK_VENDOR_ID, 0x0601, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 941ece0..d51d9ab 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -615,6 +615,7 @@ enum rtl8152_flags { #define VENDOR_ID_LENOVO 0x17ef #define VENDOR_ID_LINKSYS 0x13b1 #define VENDOR_ID_NVIDIA 0x0955 +#define VENDOR_ID_TPLINK 0x2357 #define MCU_TYPE_PLA 0x0100 #define MCU_TYPE_USB 0x0000 @@ -5319,6 +5320,7 @@ static const struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7214)}, {REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)}, {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)}, + {REALTEK_USB_DEVICE(VENDOR_ID_TPLINK, 0x0601)}, {} }; -- cgit v1.1 From 07f37efdaa3fa327ecbfd519110bc6bd0c2582cc Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Mon, 23 Oct 2017 17:16:41 +0200 Subject: cdc_ether: flag the Huawei ME906/ME909 as WWAN The Huawei ME906 (12d1:15c1) comes with a standard ECM interface that requires management via AT commands sent over one of the control TTYs (e.g. connected with AT^NDISDUP). Signed-off-by: Aleksander Morgado Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 5529bd1..3e7a3ac 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -872,6 +872,12 @@ static const struct usb_device_id products[] = { USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (kernel_ulong_t)&wwan_info, }, { + /* Huawei ME906 and ME909 */ + USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x15c1, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, +}, { /* ZTE modules */ USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, -- cgit v1.1 From 822eaf7cfb7c4783375bceadbc7651137346ac00 Mon Sep 17 00:00:00 2001 From: Yan Markman Date: Mon, 23 Oct 2017 15:24:29 +0200 Subject: net: mvpp2: fix TSO headers allocation and management TSO headers are managed with txq index and therefore should be aligned with the txq size, not with the aggregated txq size. Fixes: 186cd4d4e414 ("net: mvpp2: software tso support") Reported-by: Marc Zyngier Signed-off-by: Yan Markman Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 9c86cb7..72e43d8 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -5609,7 +5609,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port, txq_pcpu->tso_headers = dma_alloc_coherent(port->dev->dev.parent, - MVPP2_AGGR_TXQ_SIZE * TSO_HEADER_SIZE, + txq_pcpu->size * TSO_HEADER_SIZE, &txq_pcpu->tso_headers_dma, GFP_KERNEL); if (!txq_pcpu->tso_headers) @@ -5623,7 +5623,7 @@ cleanup: kfree(txq_pcpu->buffs); dma_free_coherent(port->dev->dev.parent, - MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE, + txq_pcpu->size * TSO_HEADER_SIZE, txq_pcpu->tso_headers, txq_pcpu->tso_headers_dma); } @@ -5647,7 +5647,7 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port, kfree(txq_pcpu->buffs); dma_free_coherent(port->dev->dev.parent, - MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE, + txq_pcpu->size * TSO_HEADER_SIZE, txq_pcpu->tso_headers, txq_pcpu->tso_headers_dma); } -- cgit v1.1 From 20920267885218fda08dc12c7d3814938ab15b54 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 23 Oct 2017 15:24:30 +0200 Subject: net: mvpp2: do not unmap TSO headers buffers The TSO header buffers are coming from a per cpu pool and should not be unmapped as they are reused. The PPv2 driver was unmapping all descriptors buffers unconditionally. This patch fixes this by checking the buffers dma addresses before unmapping them, and by not unmapping those who are located in the TSO header pool. Fixes: 186cd4d4e414 ("net: mvpp2: software tso support") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 72e43d8..3de05a8 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -1167,6 +1167,11 @@ struct mvpp2_bm_pool { u32 port_map; }; +#define IS_TSO_HEADER(txq_pcpu, addr) \ + ((addr) >= (txq_pcpu)->tso_headers_dma && \ + (addr) < (txq_pcpu)->tso_headers_dma + \ + (txq_pcpu)->size * TSO_HEADER_SIZE) + /* Queue modes */ #define MVPP2_QDIST_SINGLE_MODE 0 #define MVPP2_QDIST_MULTI_MODE 1 @@ -5321,8 +5326,9 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port, struct mvpp2_txq_pcpu_buf *tx_buf = txq_pcpu->buffs + txq_pcpu->txq_get_index; - dma_unmap_single(port->dev->dev.parent, tx_buf->dma, - tx_buf->size, DMA_TO_DEVICE); + if (!IS_TSO_HEADER(txq_pcpu, tx_buf->dma)) + dma_unmap_single(port->dev->dev.parent, tx_buf->dma, + tx_buf->size, DMA_TO_DEVICE); if (tx_buf->skb) dev_kfree_skb_any(tx_buf->skb); @@ -6212,12 +6218,15 @@ static inline void tx_desc_unmap_put(struct mvpp2_port *port, struct mvpp2_tx_queue *txq, struct mvpp2_tx_desc *desc) { + struct mvpp2_txq_pcpu *txq_pcpu = this_cpu_ptr(txq->pcpu); + dma_addr_t buf_dma_addr = mvpp2_txdesc_dma_addr_get(port, desc); size_t buf_sz = mvpp2_txdesc_size_get(port, desc); - dma_unmap_single(port->dev->dev.parent, buf_dma_addr, - buf_sz, DMA_TO_DEVICE); + if (!IS_TSO_HEADER(txq_pcpu, buf_dma_addr)) + dma_unmap_single(port->dev->dev.parent, buf_dma_addr, + buf_sz, DMA_TO_DEVICE); mvpp2_txq_desc_put(txq); } -- cgit v1.1 From 082297e61480c4d72ed75b31077e74aca0e7c799 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 23 Oct 2017 15:24:31 +0200 Subject: net: mvpp2: do not call txq_done from the Tx path when Tx irqs are used When Tx IRQs are used, txq_bufs_free() can be called from both the Tx path and from NAPI poll(). This led to CPU stalls as if these two tasks (Tx and Poll) are scheduled on two CPUs at the same time, DMA unmapping operations are done on the same txq buffers. This patch adds a check not to call txq_done() from the Tx path if Tx interrupts are used as it does not make sense to do so. Fixes: edc660fa09e2 ("net: mvpp2: replace TX coalescing interrupts with hrtimer") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 3de05a8..28c6e8a 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -6499,7 +6499,7 @@ out: } /* Finalize TX processing */ - if (txq_pcpu->count >= txq->done_pkts_coal) + if (!port->has_tx_irqs && txq_pcpu->count >= txq->done_pkts_coal) mvpp2_txq_done(port, txq, txq_pcpu); /* Set the timer in case not all frags were processed */ -- cgit v1.1 From b71d21c274eff20a9db8158882b545b141b73ab8 Mon Sep 17 00:00:00 2001 From: Laszlo Toth Date: Mon, 23 Oct 2017 19:19:33 +0200 Subject: sctp: full support for ipv6 ip_nonlocal_bind & IP_FREEBIND Commit 9b9742022888 ("sctp: support ipv6 nonlocal bind") introduced support for the above options as v4 sctp did, so patched sctp_v6_available(). In the v4 implementation it's enough, because sctp_inet_bind_verify() just returns with sctp_v4_available(). However sctp_inet6_bind_verify() has an extra check before that for link-local scope_id, which won't respect the above options. Added the checks before calling ipv6_chk_addr(), but not before the validation of scope_id. before (w/ both options): ./v6test fe80::10 sctp bind failed, errno: 99 (Cannot assign requested address) ./v6test fe80::10 tcp bind success, errno: 0 (Success) after (w/ both options): ./v6test fe80::10 sctp bind success, errno: 0 (Success) Signed-off-by: Laszlo Toth Reviewed-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 51c4887..7fe9e1d1 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -882,8 +882,10 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) net = sock_net(&opt->inet.sk); rcu_read_lock(); dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id); - if (!dev || - !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) { + if (!dev || !(opt->inet.freebind || + net->ipv6.sysctl.ip_nonlocal_bind || + ipv6_chk_addr(net, &addr->v6.sin6_addr, + dev, 0))) { rcu_read_unlock(); return 0; } -- cgit v1.1 From 3a379f5b36ae039dfeb6f73316e47ab1af4945df Mon Sep 17 00:00:00 2001 From: Gerhard Bertelsmann Date: Thu, 17 Aug 2017 15:59:49 +0200 Subject: can: sun4i: fix loopback mode Fix loopback mode by setting the right flag and remove presume mode. Signed-off-by: Gerhard Bertelsmann Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sun4i_can.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c index 68ef0a4..b0c8085 100644 --- a/drivers/net/can/sun4i_can.c +++ b/drivers/net/can/sun4i_can.c @@ -342,7 +342,7 @@ static int sun4i_can_start(struct net_device *dev) /* enter the selected mode */ mod_reg_val = readl(priv->base + SUN4I_REG_MSEL_ADDR); - if (priv->can.ctrlmode & CAN_CTRLMODE_PRESUME_ACK) + if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) mod_reg_val |= SUN4I_MSEL_LOOPBACK_MODE; else if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) mod_reg_val |= SUN4I_MSEL_LISTEN_ONLY_MODE; @@ -811,7 +811,6 @@ static int sun4ican_probe(struct platform_device *pdev) priv->can.ctrlmode_supported = CAN_CTRLMODE_BERR_REPORTING | CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_LOOPBACK | - CAN_CTRLMODE_PRESUME_ACK | CAN_CTRLMODE_3_SAMPLES; priv->base = addr; priv->clk = clk; -- cgit v1.1 From 8f65a923e6b628e187d5e791cf49393dd5e8c2f9 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Tue, 24 Oct 2017 12:23:28 +0200 Subject: can: kvaser_usb: Correct return value in printout If the return value from kvaser_usb_send_simple_msg() was non-zero, the return value from kvaser_usb_flush_queue() was printed in the kernel warning. Signed-off-by: Jimmy Assarsson Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 18cc529..861e90e 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1609,7 +1609,8 @@ static int kvaser_usb_close(struct net_device *netdev) if (err) netdev_warn(netdev, "Cannot flush queue, error %d\n", err); - if (kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, priv->channel)) + err = kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, priv->channel); + if (err) netdev_warn(netdev, "Cannot reset card, error %d\n", err); err = kvaser_usb_stop_chip(priv); -- cgit v1.1 From e1d2d1329a5722dbecc9c278303fcc4aa01f8790 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Tue, 24 Oct 2017 12:23:29 +0200 Subject: can: kvaser_usb: Ignore CMD_FLUSH_QUEUE_REPLY messages To avoid kernel warning "Unhandled message (68)", ignore the CMD_FLUSH_QUEUE_REPLY message for now. As of Leaf v2 firmware version v4.1.844 (2017-02-15), flush tx queue is synchronous. There is a capability bit indicating whether flushing tx queue is synchronous or asynchronous. A proper solution would be to query the device for capabilities. If the synchronous tx flush capability bit is set, we should wait for CMD_FLUSH_QUEUE_REPLY message, while flushing the tx queue. Signed-off-by: Jimmy Assarsson Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 861e90e..9b18d96 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -137,6 +137,7 @@ static inline bool kvaser_is_usbcan(const struct usb_device_id *id) #define CMD_RESET_ERROR_COUNTER 49 #define CMD_TX_ACKNOWLEDGE 50 #define CMD_CAN_ERROR_EVENT 51 +#define CMD_FLUSH_QUEUE_REPLY 68 #define CMD_LEAF_USB_THROTTLE 77 #define CMD_LEAF_LOG_MESSAGE 106 @@ -1301,6 +1302,11 @@ static void kvaser_usb_handle_message(const struct kvaser_usb *dev, goto warn; break; + case CMD_FLUSH_QUEUE_REPLY: + if (dev->family != KVASER_LEAF) + goto warn; + break; + default: warn: dev_warn(dev->udev->dev.parent, "Unhandled message (%d)\n", msg->id); -- cgit v1.1 From 2eece390bf68ec8f733d7e4a3ba8a5ea350082ae Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Mon, 23 Oct 2017 15:35:33 +0300 Subject: perf/x86/intel/bts: Fix exclusive event reference leak Commit: d2878d642a4ed ("perf/x86/intel/bts: Disallow use by unprivileged users on paranoid systems") ... adds a privilege check in the exactly wrong place in the event init path: after the 'LBR exclusive' reference has been taken, and doesn't release it in the case of insufficient privileges. After this, nobody in the system gets to use PT or LBR afterwards. This patch moves the privilege check to where it should have been in the first place. Signed-off-by: Alexander Shishkin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: d2878d642a4ed ("perf/x86/intel/bts: Disallow use by unprivileged users on paranoid systems") Link: http://lkml.kernel.org/r/20171023123533.16973-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 16076eb..141e07b 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -546,9 +546,6 @@ static int bts_event_init(struct perf_event *event) if (event->attr.type != bts_pmu.type) return -ENOENT; - if (x86_add_exclusive(x86_lbr_exclusive_bts)) - return -EBUSY; - /* * BTS leaks kernel addresses even when CPL0 tracing is * disabled, so disallow intel_bts driver for unprivileged @@ -562,6 +559,9 @@ static int bts_event_init(struct perf_event *event) !capable(CAP_SYS_ADMIN)) return -EACCES; + if (x86_add_exclusive(x86_lbr_exclusive_bts)) + return -EBUSY; + ret = x86_reserve_hardware(); if (ret) { x86_del_exclusive(x86_lbr_exclusive_bts); -- cgit v1.1 From f265788c336979090ac80b9ae173aa817c4fe40d Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 24 Oct 2017 16:53:34 +0800 Subject: ALSA: hda - fix headset mic problem for Dell machines with alc236 We have several Dell laptops which use the codec alc236, the headset mic can't work on these machines. Following the commit 736f20a70, we add the pin cfg table to make the headset mic work. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 00fa802..546d515 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6428,6 +6428,14 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { ALC225_STANDARD_PINS, {0x12, 0xb7a60130}, {0x1b, 0x90170110}), + SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60140}, + {0x14, 0x90170110}, + {0x21, 0x02211020}), + SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60140}, + {0x14, 0x90170150}, + {0x21, 0x02211020}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE, {0x14, 0x90170110}, {0x21, 0x02211020}), -- cgit v1.1 From d0725439354a58f2b13b9f5234420641b662b9c4 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 23 Oct 2017 17:36:03 -0700 Subject: hwmon: (tmp102) Fix first temperature reading Commit 3d8f7a89a197 ("hwmon: (tmp102) Improve handling of initial read delay") reduced the initial temperature read delay and made it dependent on the chip's shutdown mode. If the chip was not in shutdown mode at probe, the read delay no longer applies. This ignores the fact that the chip initialization changes the temperature sensor resolution, and that the temperature register values change when the resolution is changed. As a result, the reported temperature is twice as high as the real temperature until the first temperature conversion after the configuration change is complete. This can result in unexpected behavior and, worst case, in a system shutdown. To fix the problem, let's just always wait for a conversion to complete before reporting a temperature. Fixes: 3d8f7a89a197 ("hwmon: (tmp102) Improve handling of initial read delay") Link: https://bugzilla.kernel.org/show_bug.cgi?id=197167 Reported-by: Ralf Goebel Cc: Ralf Goebel Reviewed-by: Jean Delvare Signed-off-by: Guenter Roeck --- drivers/hwmon/tmp102.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c index 5eafbaa..dfc40c7 100644 --- a/drivers/hwmon/tmp102.c +++ b/drivers/hwmon/tmp102.c @@ -268,14 +268,11 @@ static int tmp102_probe(struct i2c_client *client, return err; } - tmp102->ready_time = jiffies; - if (tmp102->config_orig & TMP102_CONF_SD) { - /* - * Mark that we are not ready with data until the first - * conversion is complete - */ - tmp102->ready_time += msecs_to_jiffies(CONVERSION_TIME_MS); - } + /* + * Mark that we are not ready with data until the first + * conversion is complete + */ + tmp102->ready_time = jiffies + msecs_to_jiffies(CONVERSION_TIME_MS); hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name, tmp102, -- cgit v1.1 From 0cc2b4e5a020fc7f4d1795741c116c983e9467d7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 24 Oct 2017 15:20:45 +0200 Subject: PM / QoS: Fix device resume latency PM QoS The special value of 0 for device resume latency PM QoS means "no restriction", but there are two problems with that. First, device resume latency PM QoS requests with 0 as the value are always put in front of requests with positive values in the priority lists used internally by the PM QoS framework, causing 0 to be chosen as an effective constraint value. However, that 0 is then interpreted as "no restriction" effectively overriding the other requests with specific restrictions which is incorrect. Second, the users of device resume latency PM QoS have no way to specify that *any* resume latency at all should be avoided, which is an artificial limitation in general. To address these issues, modify device resume latency PM QoS to use S32_MAX as the "no constraint" value and 0 as the "no latency at all" one and rework its users (the cpuidle menu governor, the genpd QoS governor and the runtime PM framework) to follow these changes. Also add a special "n/a" value to the corresponding user space I/F to allow user space to indicate that it cannot accept any resume latencies at all for the given device. Fixes: 85dc0b8a4019 (PM / QoS: Make it possible to expose PM QoS latency constraints) Link: https://bugzilla.kernel.org/show_bug.cgi?id=197323 Reported-by: Reinette Chatre Tested-by: Reinette Chatre Signed-off-by: Rafael J. Wysocki Acked-by: Alex Shi Cc: All applicable --- Documentation/ABI/testing/sysfs-devices-power | 4 +- drivers/base/cpu.c | 3 +- drivers/base/power/domain_governor.c | 53 +++++++++++++++------------ drivers/base/power/qos.c | 2 +- drivers/base/power/runtime.c | 2 +- drivers/base/power/sysfs.c | 25 +++++++++++-- drivers/cpuidle/governors/menu.c | 4 +- include/linux/pm_qos.h | 5 ++- 8 files changed, 63 insertions(+), 35 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 676fdf5..5cbb6f0 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -211,7 +211,9 @@ Description: device, after it has been suspended at run time, from a resume request to the moment the device will be ready to process I/O, in microseconds. If it is equal to 0, however, this means that - the PM QoS resume latency may be arbitrary. + the PM QoS resume latency may be arbitrary and the special value + "n/a" means that user space cannot accept any resume latency at + all for the given device. Not all drivers support this attribute. If it isn't supported, it is not present. diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 321cd7b..227bac5 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -377,7 +377,8 @@ int register_cpu(struct cpu *cpu, int num) per_cpu(cpu_sys_devices, num) = &cpu->dev; register_cpu_under_node(num, cpu_to_node(num)); - dev_pm_qos_expose_latency_limit(&cpu->dev, 0); + dev_pm_qos_expose_latency_limit(&cpu->dev, + PM_QOS_RESUME_LATENCY_NO_CONSTRAINT); return 0; } diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 281f949..51751cc 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -14,23 +14,20 @@ static int dev_update_qos_constraint(struct device *dev, void *data) { s64 *constraint_ns_p = data; - s32 constraint_ns = -1; + s64 constraint_ns = -1; if (dev->power.subsys_data && dev->power.subsys_data->domain_data) constraint_ns = dev_gpd_data(dev)->td.effective_constraint_ns; - if (constraint_ns < 0) { + if (constraint_ns < 0) constraint_ns = dev_pm_qos_read_value(dev); - constraint_ns *= NSEC_PER_USEC; - } - if (constraint_ns == 0) + + if (constraint_ns == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) return 0; - /* - * constraint_ns cannot be negative here, because the device has been - * suspended. - */ - if (constraint_ns < *constraint_ns_p || *constraint_ns_p == 0) + constraint_ns *= NSEC_PER_USEC; + + if (constraint_ns < *constraint_ns_p || *constraint_ns_p < 0) *constraint_ns_p = constraint_ns; return 0; @@ -63,10 +60,14 @@ static bool default_suspend_ok(struct device *dev) spin_unlock_irqrestore(&dev->power.lock, flags); - if (constraint_ns < 0) + if (constraint_ns == 0) return false; - constraint_ns *= NSEC_PER_USEC; + if (constraint_ns == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) + constraint_ns = -1; + else + constraint_ns *= NSEC_PER_USEC; + /* * We can walk the children without any additional locking, because * they all have been suspended at this point and their @@ -76,14 +77,19 @@ static bool default_suspend_ok(struct device *dev) device_for_each_child(dev, &constraint_ns, dev_update_qos_constraint); - if (constraint_ns > 0) { - constraint_ns -= td->suspend_latency_ns + - td->resume_latency_ns; - if (constraint_ns == 0) - return false; + if (constraint_ns < 0) { + /* The children have no constraints. */ + td->effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; + td->cached_suspend_ok = true; + } else { + constraint_ns -= td->suspend_latency_ns + td->resume_latency_ns; + if (constraint_ns > 0) { + td->effective_constraint_ns = constraint_ns; + td->cached_suspend_ok = true; + } else { + td->effective_constraint_ns = 0; + } } - td->effective_constraint_ns = constraint_ns; - td->cached_suspend_ok = constraint_ns >= 0; /* * The children have been suspended already, so we don't need to take @@ -145,13 +151,14 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd, td = &to_gpd_data(pdd)->td; constraint_ns = td->effective_constraint_ns; /* default_suspend_ok() need not be called before us. */ - if (constraint_ns < 0) { + if (constraint_ns < 0) constraint_ns = dev_pm_qos_read_value(pdd->dev); - constraint_ns *= NSEC_PER_USEC; - } - if (constraint_ns == 0) + + if (constraint_ns == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) continue; + constraint_ns *= NSEC_PER_USEC; + /* * constraint_ns cannot be negative here, because the device has * been suspended. diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 277d43a..7d29286 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -189,7 +189,7 @@ static int dev_pm_qos_constraints_allocate(struct device *dev) plist_head_init(&c->list); c->target_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE; c->default_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE; - c->no_constraint_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE; + c->no_constraint_value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; c->type = PM_QOS_MIN; c->notifiers = n; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 7bcf80f..13e0159 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -253,7 +253,7 @@ static int rpm_check_suspend_allowed(struct device *dev) || (dev->power.request_pending && dev->power.request == RPM_REQ_RESUME)) retval = -EAGAIN; - else if (__dev_pm_qos_read_value(dev) < 0) + else if (__dev_pm_qos_read_value(dev) == 0) retval = -EPERM; else if (dev->power.runtime_status == RPM_SUSPENDED) retval = 1; diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 156ab57..632077f 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -218,7 +218,14 @@ static ssize_t pm_qos_resume_latency_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", dev_pm_qos_requested_resume_latency(dev)); + s32 value = dev_pm_qos_requested_resume_latency(dev); + + if (value == 0) + return sprintf(buf, "n/a\n"); + else if (value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) + value = 0; + + return sprintf(buf, "%d\n", value); } static ssize_t pm_qos_resume_latency_store(struct device *dev, @@ -228,11 +235,21 @@ static ssize_t pm_qos_resume_latency_store(struct device *dev, s32 value; int ret; - if (kstrtos32(buf, 0, &value)) - return -EINVAL; + if (!kstrtos32(buf, 0, &value)) { + /* + * Prevent users from writing negative or "no constraint" values + * directly. + */ + if (value < 0 || value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) + return -EINVAL; - if (value < 0) + if (value == 0) + value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; + } else if (!strcmp(buf, "n/a") || !strcmp(buf, "n/a\n")) { + value = 0; + } else { return -EINVAL; + } ret = dev_pm_qos_update_request(dev->power.qos->resume_latency_req, value); diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 48eaf28..aa39040 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -298,8 +298,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->needs_update = 0; } - /* resume_latency is 0 means no restriction */ - if (resume_latency && resume_latency < latency_req) + if (resume_latency < latency_req && + resume_latency != PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) latency_req = resume_latency; /* Special case when user has set very strict latency requirement */ diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 032b559..6737a8c 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -27,16 +27,17 @@ enum pm_qos_flags_status { PM_QOS_FLAGS_ALL, }; -#define PM_QOS_DEFAULT_VALUE -1 +#define PM_QOS_DEFAULT_VALUE (-1) +#define PM_QOS_LATENCY_ANY S32_MAX #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 #define PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE 0 #define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE 0 +#define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT PM_QOS_LATENCY_ANY #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0 #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1) -#define PM_QOS_LATENCY_ANY ((s32)(~(__u32)0 >> 1)) #define PM_QOS_FLAG_NO_POWER_OFF (1 << 0) #define PM_QOS_FLAG_REMOTE_WAKEUP (1 << 1) -- cgit v1.1 From 6eaf011144af10cad34c0d46f82e50d382c8e926 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 12 Oct 2017 19:03:04 +0300 Subject: ovl: fix EIO from lookup of non-indexed upper Commit fbaf94ee3cd5 ("ovl: don't set origin on broken lower hardlink") attempt to avoid the condition of non-indexed upper inode with lower hardlink as origin. If this condition is found, lookup returns EIO. The protection of commit mentioned above does not cover the case of lower that is not a hardlink when it is copied up (with either index=off/on) and then lower is hardlinked while overlay is offline. Changes to lower layer while overlayfs is offline should not result in unexpected behavior, so a permanent EIO error after creating a link in lower layer should not be considered as correct behavior. This fix replaces EIO error with success in cases where upper has origin but no index is found, or index is found that does not match upper inode. In those cases, lookup will not fail and the returned overlay inode will be hashed by upper inode instead of by lower origin inode. Fixes: 359f392ca53e ("ovl: lookup index entry for copy up origin") Cc: # v4.13 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 20 ++++++++++++++++---- fs/overlayfs/namei.c | 20 ++++++++------------ fs/overlayfs/overlayfs.h | 3 ++- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index a619add..321511e 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -598,18 +598,30 @@ static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, return true; } -struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry) +struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, + struct dentry *index) { struct dentry *lowerdentry = ovl_dentry_lower(dentry); struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; + /* Already indexed or could be indexed on copy up? */ + bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry)); + + if (WARN_ON(upperdentry && indexed && !lowerdentry)) + return ERR_PTR(-EIO); if (!realinode) realinode = d_inode(lowerdentry); - if (!S_ISDIR(realinode->i_mode) && - (upperdentry || (lowerdentry && ovl_indexdir(dentry->d_sb)))) { - struct inode *key = d_inode(lowerdentry ?: upperdentry); + /* + * Copy up origin (lower) may exist for non-indexed upper, but we must + * not use lower as hash key in that case. + * Hash inodes that are or could be indexed by origin inode and + * non-indexed upper inodes that could be hard linked by upper inode. + */ + if (!S_ISDIR(realinode->i_mode) && (upperdentry || indexed)) { + struct inode *key = d_inode(indexed ? lowerdentry : + upperdentry); unsigned int nlink; inode = iget5_locked(dentry->d_sb, (unsigned long) key, diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index e081641..05e9a0a 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -516,18 +516,9 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, inode = d_inode(index); if (d_is_negative(index)) { - if (upper && d_inode(origin)->i_nlink > 1) { - pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n", - d_inode(origin)->i_ino); - goto fail; - } - - dput(index); - index = NULL; + goto out_dput; } else if (upper && d_inode(upper) != inode) { - pr_warn_ratelimited("overlayfs: wrong index found (index=%pd2, ino=%lu, upper ino=%lu).\n", - index, inode->i_ino, d_inode(upper)->i_ino); - goto fail; + goto out_dput; } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) || ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) { /* @@ -547,6 +538,11 @@ out: kfree(name.name); return index; +out_dput: + dput(index); + index = NULL; + goto out; + fail: dput(index); index = ERR_PTR(-EIO); @@ -710,7 +706,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, upperdentry = dget(index); if (upperdentry || ctr) { - inode = ovl_get_inode(dentry, upperdentry); + inode = ovl_get_inode(dentry, upperdentry, index); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_free_oe; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index c706a6f..d9a0edd 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -286,7 +286,8 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); bool ovl_is_private_xattr(const char *name); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); -struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry); +struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, + struct dentry *index); static inline void ovl_copyattr(struct inode *from, struct inode *to) { to->i_uid = from->i_uid; -- cgit v1.1 From 7937a56fdf0b064c2ffa33025210f725a4ebc822 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 20 Oct 2017 17:19:06 +0300 Subject: ovl: handle ENOENT on index lookup Treat ENOENT from index entry lookup the same way as treating a returned negative dentry. Apparently, either could be returned if file is not found, depending on the underlying file system. Fixes: 359f392ca53e ("ovl: lookup index entry for copy up origin") Cc: # v4.13 Signed-off-by: Amir Goldstein --- fs/overlayfs/namei.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 05e9a0a..0d9b8ce 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -507,6 +507,10 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len); if (IS_ERR(index)) { err = PTR_ERR(index); + if (err == -ENOENT) { + index = NULL; + goto out; + } pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n" "overlayfs: mount with '-o index=off' to disable inodes index.\n", d_inode(origin)->i_ino, name.len, name.name, -- cgit v1.1 From fa0096e3bad69ed6f34843fd7ae1c45ca987012a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 24 Oct 2017 12:24:11 +0300 Subject: ovl: do not cleanup unsupported index entries With index=on, ovl_indexdir_cleanup() tries to cleanup invalid index entries (e.g. bad index name). This behavior could result in cleaning of entries created by newer kernels and is therefore undesirable. Instead, abort mount if such entries are encountered. We still cleanup 'stale' entries and 'orphan' entries, both those cases can be a result of offline changes to lower and upper dirs. When encoutering an index entry of type directory or whiteout, kernel was supposed to fallback to read-only mount, but the fill_super() operation returns EROFS in this case instead of returning success with read-only mount flag, so mount fails when encoutering directory or whiteout index entries. Bless this behavior by returning -EINVAL on directory and whiteout index entries as we do for all unsupported index entries. Fixes: 61b674710cd9 ("ovl: do not cleanup directory and whiteout index..") Cc: # v4.13 Signed-off-by: Amir Goldstein --- fs/overlayfs/namei.c | 7 +++---- fs/overlayfs/readdir.c | 11 +++++------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 0d9b8ce..a12dc10 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -405,14 +405,13 @@ int ovl_verify_index(struct dentry *index, struct path *lowerstack, * be treated as stale (i.e. after unlink of the overlay inode). * We don't know the verification rules for directory and whiteout * index entries, because they have not been implemented yet, so return - * EROFS if those entries are found to avoid corrupting an index that - * was created by a newer kernel. + * EINVAL if those entries are found to abort the mount to avoid + * corrupting an index that was created by a newer kernel. */ - err = -EROFS; + err = -EINVAL; if (d_is_dir(index) || ovl_is_whiteout(index)) goto fail; - err = -EINVAL; if (index->d_name.len < sizeof(struct ovl_fh)*2) goto fail; diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 0f85ee9..698b74d 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -1021,13 +1021,12 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, break; } err = ovl_verify_index(index, lowerstack, numlower); - if (err) { - if (err == -EROFS) - break; + /* Cleanup stale and orphan index entries */ + if (err && (err == -ESTALE || err == -ENOENT)) err = ovl_cleanup(dir, index); - if (err) - break; - } + if (err) + break; + dput(index); index = NULL; } -- cgit v1.1 From 57a95b41869b8f0d1949c24df2a9dac1ca7082ee Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 24 Oct 2017 11:08:18 -0700 Subject: Input: elan_i2c - add ELAN0611 to the ACPI table ELAN0611 touchpad uses elan_i2c as its driver. It can be found on Lenovo ideapad 320-15IKB. So add it to ACPI table to enable the touchpad. [Ido Adiv reports that the same ACPI ID is used for Elan touchpad in ideapad 520]. BugLink: https://bugs.launchpad.net/bugs/1723736 Signed-off-by: Kai-Heng Feng Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 0e761d0..6d6b092 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1258,6 +1258,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0605", 0 }, { "ELAN0609", 0 }, { "ELAN060B", 0 }, + { "ELAN0611", 0 }, { "ELAN1000", 0 }, { } }; -- cgit v1.1 From 32e67a3a06b88904155170560b7a63d372b320bd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 24 Oct 2017 15:57:18 -0400 Subject: nbd: handle interrupted sendmsg with a sndtimeo set If you do not set sk_sndtimeo you will get -ERESTARTSYS if there is a pending signal when you enter sendmsg, which we handle properly. However if you set a timeout for your commands we'll set sk_sndtimeo to that timeout, which means that sendmsg will start returning -EINTR instead of -ERESTARTSYS. Fix this by checking either cases and doing the correct thing. Cc: stable@vger.kernel.org Fixes: dc88e34d69d8 ("nbd: set sk->sk_sndtimeo for our sockets") Reported-and-tested-by: Daniel Xu Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index baebbdf..9adfb54 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -386,6 +386,15 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, return result; } +/* + * Different settings for sk->sk_sndtimeo can result in different return values + * if there is a signal pending when we enter sendmsg, because reasons? + */ +static inline int was_interrupted(int result) +{ + return result == -ERESTARTSYS || result == -EINTR; +} + /* always call with the tx_lock held */ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) { @@ -458,7 +467,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) result = sock_xmit(nbd, index, 1, &from, (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); if (result <= 0) { - if (result == -ERESTARTSYS) { + if (was_interrupted(result)) { /* If we havne't sent anything we can just return BUSY, * however if we have sent something we need to make * sure we only allow this req to be sent until we are @@ -502,7 +511,7 @@ send_pages: } result = sock_xmit(nbd, index, 1, &from, flags, &sent); if (result <= 0) { - if (result == -ERESTARTSYS) { + if (was_interrupted(result)) { /* We've already sent the header, we * have no choice but to set pending and * return BUSY. -- cgit v1.1 From 829385f08ae99740276cbd46c9db29764c519211 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 20 Oct 2017 16:40:43 -0700 Subject: strparser: Use delayed work instead of timer for msg timeout Sock lock may be taken in the message timer function which is a problem since timers run in BH. Instead of timers use delayed_work. Reported-by: Eric Dumazet Fixes: bbb03029a899 ("strparser: Generalize strparser") Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/strparser.h | 3 +-- net/strparser/strparser.c | 17 ++++++++--------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/net/strparser.h b/include/net/strparser.h index 7dc131d..d96b59f 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -74,10 +74,9 @@ struct strparser { u32 unrecov_intr : 1; struct sk_buff **skb_nextp; - struct timer_list msg_timer; struct sk_buff *skb_head; unsigned int need_bytes; - struct delayed_work delayed_work; + struct delayed_work msg_timer_work; struct work_struct work; struct strp_stats stats; struct strp_callbacks cb; diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index d4ea46a..c5fda15 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -49,7 +49,7 @@ static void strp_abort_strp(struct strparser *strp, int err) { /* Unrecoverable error in receive */ - del_timer(&strp->msg_timer); + cancel_delayed_work(&strp->msg_timer_work); if (strp->stopped) return; @@ -68,7 +68,7 @@ static void strp_abort_strp(struct strparser *strp, int err) static void strp_start_timer(struct strparser *strp, long timeo) { if (timeo) - mod_timer(&strp->msg_timer, timeo); + mod_delayed_work(strp_wq, &strp->msg_timer_work, timeo); } /* Lower lock held */ @@ -319,7 +319,7 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, eaten += (cand_len - extra); /* Hurray, we have a new message! */ - del_timer(&strp->msg_timer); + cancel_delayed_work(&strp->msg_timer_work); strp->skb_head = NULL; STRP_STATS_INCR(strp->stats.msgs); @@ -450,9 +450,10 @@ static void strp_work(struct work_struct *w) do_strp_work(container_of(w, struct strparser, work)); } -static void strp_msg_timeout(unsigned long arg) +static void strp_msg_timeout(struct work_struct *w) { - struct strparser *strp = (struct strparser *)arg; + struct strparser *strp = container_of(w, struct strparser, + msg_timer_work.work); /* Message assembly timed out */ STRP_STATS_INCR(strp->stats.msg_timeouts); @@ -505,9 +506,7 @@ int strp_init(struct strparser *strp, struct sock *sk, strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done; strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp; - setup_timer(&strp->msg_timer, strp_msg_timeout, - (unsigned long)strp); - + INIT_DELAYED_WORK(&strp->msg_timer_work, strp_msg_timeout); INIT_WORK(&strp->work, strp_work); return 0; @@ -532,7 +531,7 @@ void strp_done(struct strparser *strp) { WARN_ON(!strp->stopped); - del_timer_sync(&strp->msg_timer); + cancel_delayed_work_sync(&strp->msg_timer_work); cancel_work_sync(&strp->work); if (strp->skb_head) { -- cgit v1.1 From 3eb8feeb1708c7dbfd2e97df92a2a407c116606e Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 24 Oct 2017 16:37:19 -0400 Subject: net: dsa: check master device before put In the case of pdata, the dsa_cpu_parse function calls dev_put() before making sure it isn't NULL. Fix this. Fixes: 71e0bbde0d88 ("net: dsa: Add support for platform data") Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 873af01..045d8a1 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -496,14 +496,15 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, if (!ethernet) return -EINVAL; ethernet_dev = of_find_net_device_by_node(ethernet); + if (!ethernet_dev) + return -EPROBE_DEFER; } else { ethernet_dev = dsa_dev_to_net_device(ds->cd->netdev[index]); + if (!ethernet_dev) + return -EPROBE_DEFER; dev_put(ethernet_dev); } - if (!ethernet_dev) - return -EPROBE_DEFER; - if (!dst->cpu_dp) { dst->cpu_dp = port; dst->cpu_dp->netdev = ethernet_dev; -- cgit v1.1 From 0a5e2ec2647737907d267c09dc9a25fab1468865 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 5 Oct 2017 08:29:47 +0200 Subject: s390/kvm: fix detection of guest machine checks The new detection code for guest machine checks added a check based on %r11 to .Lcleanup_sie to distinguish between normal asynchronous interrupts and machine checks. But the funtion is called from the program check handler as well with an undefined value in %r11. The effect is that all program exceptions pointing to the SIE instruction will set the CIF_MCCK_GUEST bit. The bit stays set for the CPU until the next machine check comes in which will incorrectly be interpreted as a guest machine check. The simplest fix is to stop using .Lcleanup_sie in the program check handler and duplicate a few instructions. Fixes: c929500d7a5a ("s390/nmi: s390: New low level handling for machine check happening in guest") Cc: # v4.13+ Reviewed-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 21900e1..d185aa3 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -521,12 +521,15 @@ ENTRY(pgm_check_handler) tmhh %r8,0x0001 # test problem state bit jnz 2f # -> fault in user space #if IS_ENABLED(CONFIG_KVM) - # cleanup critical section for sie64a + # cleanup critical section for program checks in sie64a lgr %r14,%r9 slg %r14,BASED(.Lsie_critical_start) clg %r14,BASED(.Lsie_critical_length) jhe 0f - brasl %r14,.Lcleanup_sie + lg %r14,__SF_EMPTY(%r15) # get control block pointer + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + larl %r9,sie_exit # skip forward to sie_exit #endif 0: tmhh %r8,0x4000 # PER bit set in old PSW ? jnz 1f # -> enabled, can't be a double fault -- cgit v1.1 From 6c2838fbdedb9b72a81c931d49e56b229b6cdbca Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 19 Oct 2017 08:52:58 -0400 Subject: ceph: unlock dangling spinlock in try_flush_caps() sparse warns: fs/ceph/caps.c:2042:9: warning: context imbalance in 'try_flush_caps' - wrong count at exit We need to exit this function with the lock unlocked, but a couple of cases leave it locked. Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 157fe59..1978a8c 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1991,6 +1991,7 @@ static int try_flush_caps(struct inode *inode, u64 *ptid) retry: spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { + spin_unlock(&ci->i_ceph_lock); dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode); goto out; } @@ -2008,8 +2009,10 @@ retry: mutex_lock(&session->s_mutex); goto retry; } - if (cap->session->s_state < CEPH_MDS_SESSION_OPEN) + if (cap->session->s_state < CEPH_MDS_SESSION_OPEN) { + spin_unlock(&ci->i_ceph_lock); goto out; + } flushing = __mark_caps_flushing(inode, session, true, &flush_tid, &oldest_flush_tid); -- cgit v1.1 From cfbb0d90a7abb289edc91833d0905931f8805f12 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Oct 2017 21:12:13 +0200 Subject: mac80211: don't compare TKIP TX MIC key in reinstall prevention For the reinstall prevention, the code I had added compares the whole key. It turns out though that iwlwifi firmware doesn't provide the TKIP TX MIC key as it's not needed in client mode, and thus the comparison will always return false. For client mode, thus always zero out the TX MIC key part before doing the comparison in order to avoid accepting the reinstall of the key with identical encryption and RX MIC key, but not the same TX MIC key (since the supplicant provides the real one.) Fixes: fdf7cb4185b6 ("mac80211: accept key reinstall without changing anything") Signed-off-by: Johannes Berg --- net/mac80211/key.c | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 035d16f..9380493 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -610,6 +610,39 @@ void ieee80211_key_free_unused(struct ieee80211_key *key) ieee80211_key_free_common(key); } +static bool ieee80211_key_identical(struct ieee80211_sub_if_data *sdata, + struct ieee80211_key *old, + struct ieee80211_key *new) +{ + u8 tkip_old[WLAN_KEY_LEN_TKIP], tkip_new[WLAN_KEY_LEN_TKIP]; + u8 *tk_old, *tk_new; + + if (!old || new->conf.keylen != old->conf.keylen) + return false; + + tk_old = old->conf.key; + tk_new = new->conf.key; + + /* + * In station mode, don't compare the TX MIC key, as it's never used + * and offloaded rekeying may not care to send it to the host. This + * is the case in iwlwifi, for example. + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION && + new->conf.cipher == WLAN_CIPHER_SUITE_TKIP && + new->conf.keylen == WLAN_KEY_LEN_TKIP && + !(new->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) { + memcpy(tkip_old, tk_old, WLAN_KEY_LEN_TKIP); + memcpy(tkip_new, tk_new, WLAN_KEY_LEN_TKIP); + memset(tkip_old + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8); + memset(tkip_new + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8); + tk_old = tkip_old; + tk_new = tkip_new; + } + + return !crypto_memneq(tk_old, tk_new, new->conf.keylen); +} + int ieee80211_key_link(struct ieee80211_key *key, struct ieee80211_sub_if_data *sdata, struct sta_info *sta) @@ -635,8 +668,7 @@ int ieee80211_key_link(struct ieee80211_key *key, * Silently accept key re-installation without really installing the * new version of the key to avoid nonce reuse or replay issues. */ - if (old_key && key->conf.keylen == old_key->conf.keylen && - !crypto_memneq(key->conf.key, old_key->conf.key, key->conf.keylen)) { + if (ieee80211_key_identical(sdata, old_key, key)) { ieee80211_key_free_unused(key); ret = 0; goto out; -- cgit v1.1 From 092e72c9edab16d4d6ad10c683a95047d53b6db4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Oct 2017 11:04:47 +0100 Subject: efi/efi_test: Prevent an Oops in efi_runtime_query_capsulecaps() If "qcaps.capsule_count" is ULONG_MAX then "qcaps.capsule_count + 1" will overflow to zero and kcalloc() will return the ZERO_SIZE_PTR. We try to dereference it inside the loop and crash. Signed-off-by: Dan Carpenter Signed-off-by: Matt Fleming Signed-off-by: Ard Biesheuvel Acked-by: Ivan Hu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: ff6301dabc3c ("efi: Add efi_test driver for exporting UEFI runtime service interfaces") Link: http://lkml.kernel.org/r/20171025100448.26056-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/test/efi_test.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/efi/test/efi_test.c b/drivers/firmware/efi/test/efi_test.c index 08129b7..41c48a1 100644 --- a/drivers/firmware/efi/test/efi_test.c +++ b/drivers/firmware/efi/test/efi_test.c @@ -593,6 +593,9 @@ static long efi_runtime_query_capsulecaps(unsigned long arg) if (copy_from_user(&qcaps, qcaps_user, sizeof(qcaps))) return -EFAULT; + if (qcaps.capsule_count == ULONG_MAX) + return -EINVAL; + capsules = kcalloc(qcaps.capsule_count + 1, sizeof(efi_capsule_header_t), GFP_KERNEL); if (!capsules) -- cgit v1.1 From 38fb6652229c2149e8694d57db442878fdf8a1bd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 25 Oct 2017 11:04:48 +0100 Subject: efi/libstub/arm: Don't randomize runtime regions when CONFIG_HIBERNATION=y Commit: e69176d68d26 ("ef/libstub/arm/arm64: Randomize the base of the UEFI rt services region") implemented randomization of the virtual mapping that the OS chooses for the UEFI runtime services. This was motivated by the fact that UEFI usually does not bother to specify any permission restrictions for those regions, making them prime real estate for exploitation now that the OS is getting more and more careful not to leave any R+W+X mapped regions lying around. However, this randomization breaks assumptions in the resume from hibernation code, which expects all memory regions populated by UEFI to remain in the same place, including their virtual mapping into the OS memory space. While this assumption may not be entirely reasonable in the first place, breaking it deliberately does not make a lot of sense either. So let's refrain from this randomization pass if CONFIG_HIBERNATION=y. Signed-off-by: Ard Biesheuvel Cc: James Morse Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20171025100448.26056-3-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/arm-stub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 1cb2d1c..a94601d 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -238,7 +238,8 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, efi_random_get_seed(sys_table); - if (!nokaslr()) { + /* hibernation expects the runtime regions to stay in the same place */ + if (!IS_ENABLED(CONFIG_HIBERNATION) && !nokaslr()) { /* * Randomize the base of the UEFI runtime services region. * Preserve the 2 MB alignment of the region by taking a -- cgit v1.1 From d3daa2c7865cbfa830651b11c8ad1df23465b46e Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Mon, 23 Oct 2017 11:27:35 -0400 Subject: drm/amd/amdgpu: Remove workaround check for UVD6 on APUs On APUs the uvd6 driver was skipping proper suspend/resume routines resulting in a broken state upon resume. Signed-off-by: Tom St Denis Acked-by: Alex Deucher Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 31db356..430a6b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -225,11 +225,7 @@ static int uvd_v6_0_suspend(void *handle) if (r) return r; - /* Skip this for APU for now */ - if (!(adev->flags & AMD_IS_APU)) - r = amdgpu_uvd_suspend(adev); - - return r; + return amdgpu_uvd_suspend(adev); } static int uvd_v6_0_resume(void *handle) @@ -237,12 +233,10 @@ static int uvd_v6_0_resume(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* Skip this for APU for now */ - if (!(adev->flags & AMD_IS_APU)) { - r = amdgpu_uvd_resume(adev); - if (r) - return r; - } + r = amdgpu_uvd_resume(adev); + if (r) + return r; + return uvd_v6_0_hw_init(adev); } -- cgit v1.1 From c6cdd51404b7ac12dd95173ddfc548c59ecf037f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 25 Oct 2017 16:34:27 +0200 Subject: fuse: fix READDIRPLUS skipping an entry Marios Titas running a Haskell program noticed a problem with fuse's readdirplus: when it is interrupted by a signal, it skips one directory entry. The reason is that fuse erronously updates ctx->pos after a failed dir_emit(). The issue originates from the patch adding readdirplus support. Reported-by: Jakob Unterwurzacher Tested-by: Marios Titas Signed-off-by: Miklos Szeredi Fixes: 0b05b18381ee ("fuse: implement NFS-like readdirplus support") Cc: # v3.9 --- fs/fuse/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 622081b..2496738 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1308,7 +1308,8 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, */ over = !dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino, dirent->type); - ctx->pos = dirent->off; + if (!over) + ctx->pos = dirent->off; } buf += reclen; -- cgit v1.1 From 7277f755048da562eb2489becacd38d0d05e1e06 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 24 Oct 2017 16:27:28 +0100 Subject: drm/i915/perf: fix perf enable/disable ioctls with 32bits userspace The compat callback was missing and triggered failures in 32bits userspace when enabling/disable the perf stream. We don't require any particular processing here as these ioctls don't take any argument. Signed-off-by: Lionel Landwerlin Fixes: eec688e1420 ("drm/i915: Add i915 perf infrastructure") Cc: linux-stable Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171024152728.4873-1-lionel.g.landwerlin@intel.com (cherry picked from commit 191f896085cf3b5d85920d58a759da4eea141721) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_perf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 94185d6..370b9d2 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2537,6 +2537,10 @@ static const struct file_operations fops = { .poll = i915_perf_poll, .read = i915_perf_read, .unlocked_ioctl = i915_perf_ioctl, + /* Our ioctl have no arguments, so it's safe to use the same function + * to handle 32bits compatibility. + */ + .compat_ioctl = i915_perf_ioctl, }; -- cgit v1.1 From 298d275d4d9bea3524ff4bc76678c140611d8a8d Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 25 Oct 2017 17:08:07 +0200 Subject: xen/gntdev: avoid out of bounds access in case of partial gntdev_mmap() In case gntdev_mmap() succeeds only partially in mapping grant pages it will leave some vital information uninitialized needed later for cleanup. This will lead to an out of bounds array access when unmapping the already mapped pages. So just initialize the data needed for unmapping the pages a little bit earlier. Cc: Reported-by: Arthur Borsboom Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/gntdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 8236059..57efbd3 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -1024,6 +1024,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) mutex_unlock(&priv->lock); if (use_ptemod) { + map->pages_vm_start = vma->vm_start; err = apply_to_page_range(vma->vm_mm, vma->vm_start, vma->vm_end - vma->vm_start, find_grant_ptes, map); @@ -1061,7 +1062,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) set_grant_ptes_as_special, NULL); } #endif - map->pages_vm_start = vma->vm_start; } return 0; -- cgit v1.1 From 5b454a64555055aaa5769b3ba877bd911d375d5a Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Thu, 19 Oct 2017 13:09:29 -0700 Subject: cifs: Select all required crypto modules Some dependencies were lost when CIFS_SMB2 was merged into CIFS. Fixes: 2a38e12053b7 ("[SMB3] Remove ifdef since SMB3 (and later) now STRONGLY preferred") Signed-off-by: Benjamin Gilbert Reviewed-by: Aurelien Aptel CC: Stable Signed-off-by: Steve French --- fs/cifs/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index f724361..d5b2e12 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -5,9 +5,14 @@ config CIFS select CRYPTO select CRYPTO_MD4 select CRYPTO_MD5 + select CRYPTO_SHA256 + select CRYPTO_CMAC select CRYPTO_HMAC select CRYPTO_ARC4 + select CRYPTO_AEAD2 + select CRYPTO_CCM select CRYPTO_ECB + select CRYPTO_AES select CRYPTO_DES help This is the client VFS module for the SMB3 family of NAS protocols, -- cgit v1.1 From 48923d2a9d4f6ca909102061a4240b9896ff8ea2 Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Tue, 17 Oct 2017 14:47:17 +0200 Subject: CIFS: do not send invalid input buffer on QUERY_INFO requests query_info() doesn't use the InputBuffer field of the QUERY_INFO request, therefore according to [MS-SMB2] it must: a) set the InputBufferOffset to 0 b) send a zero-length InputBuffer Doing a) is trivial but b) is a bit more tricky. The packet is allocated according to it's StructureSize, which takes into account an extra 1 byte buffer which we don't need here. StructureSize fields must have constant values no matter the actual length of the whole packet so we can't just edit that constant. Both the NetBIOS-over-TCP message length ("rfc1002 length") L and the iovec length L' have to be updated. Since L' is computed from L we just update L by decrementing it by one. Signed-off-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index ba3865b..fa17caa 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2191,9 +2191,13 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, req->PersistentFileId = persistent_fid; req->VolatileFileId = volatile_fid; req->AdditionalInformation = cpu_to_le32(additional_info); - /* 4 for rfc1002 length field and 1 for Buffer */ - req->InputBufferOffset = - cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4); + + /* + * We do not use the input buffer (do not send extra byte) + */ + req->InputBufferOffset = 0; + inc_rfc1001_len(req, -1); + req->OutputBufferLength = cpu_to_le32(output_len); iov[0].iov_base = (char *)req; -- cgit v1.1 From db3b5474f462e77b82ca1e27627f03c47b622c99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Aptel?= Date: Wed, 11 Oct 2017 13:23:36 +0200 Subject: CIFS: Fix NULL pointer deref on SMB2_tcon() failure If SendReceive2() fails rsp is set to NULL but is dereferenced in the error handling code. Cc: stable@vger.kernel.org Signed-off-by: Aurelien Aptel Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index fa17caa..3efcd96 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1255,7 +1255,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, struct smb2_tree_connect_req *req; struct smb2_tree_connect_rsp *rsp = NULL; struct kvec iov[2]; - struct kvec rsp_iov; + struct kvec rsp_iov = { NULL, 0 }; int rc = 0; int resp_buftype; int unc_path_len; @@ -1372,7 +1372,7 @@ tcon_exit: return rc; tcon_error_exit: - if (rsp->hdr.sync_hdr.Status == STATUS_BAD_NETWORK_NAME) { + if (rsp && rsp->hdr.sync_hdr.Status == STATUS_BAD_NETWORK_NAME) { cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); } goto tcon_exit; -- cgit v1.1 From fe83bebc05228e838ed5cbbc62712ab50dd40e18 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 20 Oct 2017 14:49:37 +0200 Subject: SMB: fix leak of validate negotiate info response buffer Fixes: ff1c038addc4 ("Check SMB3 dialects against downgrade attacks") Signed-off-by: David Disseldorp Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 3efcd96..6b03e2d 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -648,7 +648,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) { int rc = 0; struct validate_negotiate_info_req vneg_inbuf; - struct validate_negotiate_info_rsp *pneg_rsp; + struct validate_negotiate_info_rsp *pneg_rsp = NULL; u32 rsplen; u32 inbuflen; /* max of 4 dialects */ @@ -728,7 +728,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) /* relax check since Mac returns max bufsize allowed on ioctl */ if (rsplen > CIFSMaxBufSize) - return -EIO; + goto err_rsp_free; } /* check validate negotiate info response matches what we got earlier */ @@ -747,10 +747,13 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) /* validate negotiate successful */ cifs_dbg(FYI, "validate negotiate info successful\n"); + kfree(pneg_rsp); return 0; vneg_out: cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n"); +err_rsp_free: + kfree(pneg_rsp); return -EIO; } -- cgit v1.1 From a2d9daad1d2dfbd307ab158044d1c323d7babbde Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 20 Oct 2017 14:49:38 +0200 Subject: SMB: fix validate negotiate info uninitialised memory use An undersize validate negotiate info server response causes the client to use uninitialised memory for struct validate_negotiate_info_rsp comparisons of Dialect, SecurityMode and/or Capabilities members. Link: https://bugzilla.samba.org/show_bug.cgi?id=13092 Fixes: 7db0a6efdc3e ("SMB3: Work around mount failure when using SMB3 dialect to Macs") Signed-off-by: David Disseldorp Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 6b03e2d..ba58af7 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -727,7 +727,8 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) rsplen); /* relax check since Mac returns max bufsize allowed on ioctl */ - if (rsplen > CIFSMaxBufSize) + if ((rsplen > CIFSMaxBufSize) + || (rsplen < sizeof(struct validate_negotiate_info_rsp))) goto err_rsp_free; } -- cgit v1.1 From b4d91aeb6e120b7e2f207021c31b914895c69bc4 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 24 Oct 2017 08:41:01 -0400 Subject: RDMA/netlink: OOPs in rdma_nl_rcv_msg() from misinterpreted flag rdma_nl_rcv_msg() checks to see if it should use the .dump() callback or the .doit() callback. The check is done with this check: if (flags & NLM_F_DUMP) ... The NLM_F_DUMP flag is two bits (NLM_F_ROOT | NLM_F_MATCH). When an RDMA_NL_LS message (response) is received, the bit used for indicating an error is the same bit as NLM_F_ROOT. NLM_F_ROOT == (0x100) == RDMA_NL_LS_F_ERR. ibacm sends a response with the RDMA_NL_LS_F_ERR bit set if an error occurs in the service. The current code then misinterprets the NLM_F_DUMP bit and trys to call the .dump() callback. If the .dump() callback for the specified request is not available (which is true for the RDMA_NL_LS messages) the following Oops occurs: [ 4555.960256] BUG: unable to handle kernel NULL pointer dereference at (null) [ 4555.969046] IP: (null) [ 4555.972664] PGD 10543f1067 P4D 10543f1067 PUD 1033f93067 PMD 0 [ 4555.979287] Oops: 0010 [#1] SMP [ 4555.982809] Modules linked in: rpcrdma ib_isert iscsi_target_mod target_core_mod ib_iser libiscsi scsi_transport_iscsi ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm dm_mirror dm_region_hash dm_log dm_mod dax sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel crypto_simd glue_helper cryptd hfi1 rdmavt iTCO_wdt iTCO_vendor_support ib_core mei_me lpc_ich pcspkr mei ioatdma sg shpchp i2c_i801 mfd_core wmi ipmi_si ipmi_devintf ipmi_msghandler acpi_power_meter acpi_pad nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables ext4 mbcache jbd2 sd_mod mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm igb ahci crc32c_intel ptp libahci pps_core drm dca libata i2c_algo_bit i2c_core [ 4556.061190] CPU: 54 PID: 9841 Comm: ibacm Tainted: G I 4.14.0-rc2+ #6 [ 4556.069667] Hardware name: Intel Corporation S2600WT2/S2600WT2, BIOS SE5C610.86B.01.01.0008.021120151325 02/11/2015 [ 4556.081339] task: ffff880855f42d00 task.stack: ffffc900246b4000 [ 4556.087967] RIP: 0010: (null) [ 4556.092166] RSP: 0018:ffffc900246b7bc8 EFLAGS: 00010246 [ 4556.098018] RAX: ffffffff81dbe9e0 RBX: ffff881058bb1000 RCX: 0000000000000000 [ 4556.105997] RDX: 0000000000001100 RSI: ffff881058bb1320 RDI: ffff881056362000 [ 4556.113984] RBP: ffffc900246b7bf8 R08: 0000000000000ec0 R09: 0000000000001100 [ 4556.121971] R10: ffff8810573a5000 R11: 0000000000000000 R12: ffff881056362000 [ 4556.129957] R13: 0000000000000ec0 R14: ffff881058bb1320 R15: 0000000000000ec0 [ 4556.137945] FS: 00007fe0ba5a38c0(0000) GS:ffff88105f080000(0000) knlGS:0000000000000000 [ 4556.147000] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4556.153433] CR2: 0000000000000000 CR3: 0000001056f5d003 CR4: 00000000001606e0 [ 4556.161419] Call Trace: [ 4556.164167] ? netlink_dump+0x12c/0x290 [ 4556.168468] __netlink_dump_start+0x186/0x1f0 [ 4556.173357] rdma_nl_rcv_msg+0x193/0x1b0 [ib_core] [ 4556.178724] rdma_nl_rcv+0xdc/0x130 [ib_core] [ 4556.183604] netlink_unicast+0x181/0x240 [ 4556.187998] netlink_sendmsg+0x2c2/0x3b0 [ 4556.192392] sock_sendmsg+0x38/0x50 [ 4556.196299] SYSC_sendto+0x102/0x190 [ 4556.200308] ? __audit_syscall_entry+0xaf/0x100 [ 4556.205387] ? syscall_trace_enter+0x1d0/0x2b0 [ 4556.210366] ? __audit_syscall_exit+0x209/0x290 [ 4556.215442] SyS_sendto+0xe/0x10 [ 4556.219060] do_syscall_64+0x67/0x1b0 [ 4556.223165] entry_SYSCALL64_slow_path+0x25/0x25 [ 4556.228328] RIP: 0033:0x7fe0b9db2a63 [ 4556.232333] RSP: 002b:00007ffc55edc260 EFLAGS: 00000293 ORIG_RAX: 000000000000002c [ 4556.240808] RAX: ffffffffffffffda RBX: 0000000000000010 RCX: 00007fe0b9db2a63 [ 4556.248796] RDX: 0000000000000010 RSI: 00007ffc55edc280 RDI: 000000000000000d [ 4556.256782] RBP: 00007ffc55edc670 R08: 00007ffc55edc270 R09: 000000000000000c [ 4556.265321] R10: 0000000000000000 R11: 0000000000000293 R12: 00007ffc55edc280 [ 4556.273846] R13: 000000000260b400 R14: 000000000000000d R15: 0000000000000001 [ 4556.282368] Code: Bad RIP value. [ 4556.286629] RIP: (null) RSP: ffffc900246b7bc8 [ 4556.293013] CR2: 0000000000000000 [ 4556.297292] ---[ end trace 8d67abcfd10ec209 ]--- [ 4556.305465] Kernel panic - not syncing: Fatal exception [ 4556.313786] Kernel Offset: disabled [ 4556.321563] ---[ end Kernel panic - not syncing: Fatal exception [ 4556.328960] ------------[ cut here ]------------ Special case RDMA_NL_LS response messages to call the appropriate callback. Additionally, make sure that the .dump() callback is not NULL before calling it. Fixes: 647c75ac59a48a54 ("RDMA/netlink: Convert LS to doit callback") Reviewed-by: Mike Marciniszyn Reviewed-by: Kaike Wan Reviewed-by: Alex Estrin Signed-off-by: Michael J. Ruhl Reviewed-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/core/netlink.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index b12e587..1fb72c3 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -175,13 +175,24 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; + /* + * LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't + * mistakenly call the .dump() function. + */ + if (index == RDMA_NL_LS) { + if (cb_table[op].doit) + return cb_table[op].doit(skb, nlh, extack); + return -EINVAL; + } /* FIXME: Convert IWCM to properly handle doit callbacks */ if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_RDMA_CM || index == RDMA_NL_IWCM) { struct netlink_dump_control c = { .dump = cb_table[op].dump, }; - return netlink_dump_start(nls, skb, nlh, &c); + if (c.dump) + return netlink_dump_start(nls, skb, nlh, &c); + return -EINVAL; } if (cb_table[op].doit) -- cgit v1.1 From 0f5da659d8f1810f44de14acf2c80cd6499623a0 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 25 Oct 2017 10:16:42 -0700 Subject: net/unix: don't show information about sockets from other namespaces socket_diag shows information only about sockets from a namespace where a diag socket lives. But if we request information about one unix socket, the kernel don't check that its netns is matched with a diag socket namespace, so any user can get information about any unix socket in a system. This looks like a bug. v2: add a Fixes tag Fixes: 51d7cccf0723 ("net: make sock diag per-namespace") Signed-off-by: Andrei Vagin Signed-off-by: David S. Miller --- net/unix/diag.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/unix/diag.c b/net/unix/diag.c index 4d96797..384c84e 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -257,6 +257,8 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, err = -ENOENT; if (sk == NULL) goto out_nosk; + if (!net_eq(sock_net(sk), net)) + goto out; err = sock_diag_check_cookie(sk, req->udiag_cookie); if (err) -- cgit v1.1 From d309ae5c6a00648198d1932e6db483d612c2e260 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Wed, 25 Oct 2017 11:47:05 -0700 Subject: nfp: refuse offloading filters that redirects to upper devices Previously we did not ensure that a netdev is a representative netdev before dereferencing its private data. This can occur when an upper netdev is created on a representative netdev. This patch corrects this by first ensuring that the netdev is a representative netdev before using it. Checking only switchdev_port_same_parent_id is not sufficient to ensure that we can safely use the netdev. Failing to check that the netdev is also a representative netdev would result in incorrect dereferencing. Fixes: 1a1e586f54bf ("nfp: add basic action capabilities to flower offloads") Signed-off-by: Jakub Kicinski Signed-off-by: Pieter Jansen van Vuuren Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/action.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index db97506..8ea9320 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -110,6 +110,8 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action, */ if (!switchdev_port_same_parent_id(in_dev, out_dev)) return -EOPNOTSUPP; + if (!nfp_netdev_is_nfp_repr(out_dev)) + return -EOPNOTSUPP; output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev)); if (!output->port) -- cgit v1.1 From 5c25f65fd1e42685f7ccd80e0621829c105785d9 Mon Sep 17 00:00:00 2001 From: Julien Gomes Date: Wed, 25 Oct 2017 11:50:50 -0700 Subject: tun: allow positive return values on dev_get_valid_name() call If the name argument of dev_get_valid_name() contains "%d", it will try to assign it a unit number in __dev__alloc_name() and return either the unit number (>= 0) or an error code (< 0). Considering positive values as error values prevent tun device creations relying this mechanism, therefor we should only consider negative values as errors here. Signed-off-by: Julien Gomes Acked-by: Cong Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e21bf90..b9973fb 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2028,7 +2028,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (!dev) return -ENOMEM; err = dev_get_valid_name(net, dev, name); - if (err) + if (err < 0) goto err_free_dev; dev_net_set(dev, net); -- cgit v1.1 From 6377ed0bbae6fa28853e1679d068a9106c8a8908 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 19 Oct 2017 14:14:29 +0300 Subject: net/mlx5: Fix health work queue spin lock to IRQ safe spin_lock/unlock of health->wq_lock should be IRQ safe. It was changed to spin_lock_irqsave since adding commit 0179720d6be2 ("net/mlx5: Introduce trigger_health_work function") which uses spin_lock from asynchronous event (IRQ) context. Thus, all spin_lock/unlock of health->wq_lock should have been moved to IRQ safe mode. However, one occurrence on new code using this lock missed that change, resulting in possible deadlock: kernel: Possible unsafe locking scenario: kernel: CPU0 kernel: ---- kernel: lock(&(&health->wq_lock)->rlock); kernel: kernel: lock(&(&health->wq_lock)->rlock); kernel: #012 *** DEADLOCK *** Fixes: 2a0165a034ac ("net/mlx5: Cancel delayed recovery work when unloading the driver") Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 8aea0a0..db86e15 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -356,10 +356,11 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev) void mlx5_drain_health_recovery(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; + unsigned long flags; - spin_lock(&health->wq_lock); + spin_lock_irqsave(&health->wq_lock, flags); set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); - spin_unlock(&health->wq_lock); + spin_unlock_irqrestore(&health->wq_lock, flags); cancel_delayed_work_sync(&dev->priv.health.recover_work); } -- cgit v1.1 From 4ca637a20a524cd8ddbca696f12bfa92111c96e3 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Wed, 4 Oct 2017 17:58:21 -0500 Subject: net/mlx5: Delay events till mlx5 interface's add complete for pci resume mlx5_ib_add is called during mlx5_pci_resume after a pci error. Before mlx5_ib_add completes, there are multiple events which trigger function mlx5_ib_event. This cause kernel panic because mlx5_ib_event accesses unitialized resources. The fix is to extend Erez Shitrit's patch <97834eba7c19> ("net/mlx5: Delay events till ib registration ends") to cover the pci resume code path. Trace: mlx5_core 0001:01:00.6: mlx5_pci_resume was called mlx5_core 0001:01:00.6: firmware version: 16.20.1011 mlx5_core 0001:01:00.6: mlx5_attach_interface:164:(pid 779): mlx5_ib_event:2996:(pid 34777): warning: event on port 1 mlx5_ib_event:2996:(pid 34782): warning: event on port 1 Unable to handle kernel paging request for data at address 0x0001c104 Faulting instruction address: 0xd000000008f411fc Oops: Kernel access of bad area, sig: 11 [#1] ... ... Call Trace: [c000000fff77bb70] [d000000008f4119c] mlx5_ib_event+0x64/0x470 [mlx5_ib] (unreliable) [c000000fff77bc60] [d000000008e67130] mlx5_core_event+0xb8/0x210 [mlx5_core] [c000000fff77bd10] [d000000008e4bd00] mlx5_eq_int+0x528/0x860[mlx5_core] Fixes: 97834eba7c19 ("net/mlx5: Delay events till ib registration ends") Signed-off-by: Huy Nguyen Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 70 ++++++++++++++++----------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index ff60cf7..fc28171 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -77,35 +77,41 @@ static void add_delayed_event(struct mlx5_priv *priv, list_add_tail(&delayed_event->list, &priv->waiting_events_list); } -static void fire_delayed_event_locked(struct mlx5_device_context *dev_ctx, - struct mlx5_core_dev *dev, - struct mlx5_priv *priv) +static void delayed_event_release(struct mlx5_device_context *dev_ctx, + struct mlx5_priv *priv) { + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); struct mlx5_delayed_event *de; struct mlx5_delayed_event *n; + struct list_head temp; - /* stop delaying events */ - priv->is_accum_events = false; + INIT_LIST_HEAD(&temp); + + spin_lock_irq(&priv->ctx_lock); - /* fire all accumulated events before new event comes */ - list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) { + priv->is_accum_events = false; + list_splice_init(&priv->waiting_events_list, &temp); + if (!dev_ctx->context) + goto out; + list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param); + +out: + spin_unlock_irq(&priv->ctx_lock); + + list_for_each_entry_safe(de, n, &temp, list) { list_del(&de->list); kfree(de); } } -static void cleanup_delayed_evets(struct mlx5_priv *priv) +/* accumulating events that can come after mlx5_ib calls to + * ib_register_device, till adding that interface to the events list. + */ +static void delayed_event_start(struct mlx5_priv *priv) { - struct mlx5_delayed_event *de; - struct mlx5_delayed_event *n; - spin_lock_irq(&priv->ctx_lock); - priv->is_accum_events = false; - list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) { - list_del(&de->list); - kfree(de); - } + priv->is_accum_events = true; spin_unlock_irq(&priv->ctx_lock); } @@ -122,11 +128,8 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) return; dev_ctx->intf = intf; - /* accumulating events that can come after mlx5_ib calls to - * ib_register_device, till adding that interface to the events list. - */ - priv->is_accum_events = true; + delayed_event_start(priv); dev_ctx->context = intf->add(dev); set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); @@ -137,8 +140,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) spin_lock_irq(&priv->ctx_lock); list_add_tail(&dev_ctx->list, &priv->ctx_list); - fire_delayed_event_locked(dev_ctx, dev, priv); - #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING if (dev_ctx->intf->pfault) { if (priv->pfault) { @@ -150,11 +151,12 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) } #endif spin_unlock_irq(&priv->ctx_lock); - } else { - kfree(dev_ctx); - /* delete all accumulated events */ - cleanup_delayed_evets(priv); } + + delayed_event_release(dev_ctx, priv); + + if (!dev_ctx->context) + kfree(dev_ctx); } static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf, @@ -205,17 +207,21 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv if (!dev_ctx) return; + delayed_event_start(priv); if (intf->attach) { if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) - return; + goto out; intf->attach(dev, dev_ctx->context); set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); } else { if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) - return; + goto out; dev_ctx->context = intf->add(dev); set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); } + +out: + delayed_event_release(dev_ctx, priv); } void mlx5_attach_device(struct mlx5_core_dev *dev) @@ -414,8 +420,14 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, if (priv->is_accum_events) add_delayed_event(priv, dev, event, param); + /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is + * still in priv->ctx_list. In this case, only notify the dev_ctx if its + * ADDED or ATTACHED bit are set. + */ list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf->event) + if (dev_ctx->intf->event && + (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) || + test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))) dev_ctx->intf->event(dev, dev_ctx->context, event, param); spin_unlock_irqrestore(&priv->ctx_lock, flags); -- cgit v1.1 From 3c37745ec614ff048d5dce38f976804b05d307ee Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 17 Oct 2017 12:33:43 +0200 Subject: net/mlx5e: Properly deal with encap flows add/del under neigh update Currently, the encap action offload is handled in the actions parse function and not in mlx5e_tc_add_fdb_flow() where we deal with all the other aspects of offloading actions (vlan, modify header) and the rule itself. When the neigh update code (mlx5e_tc_encap_flows_add()) recreates the encap entry and offloads the related flows, we wrongly call again into mlx5e_tc_add_fdb_flow(), this for itself would cause us to handle again the offloading of vlans and header re-write which puts things in non consistent state and step on freed memory (e.g the modify header parse buffer which is already freed). Since on error, mlx5e_tc_add_fdb_flow() detaches and may release the encap entry, it causes a corruption at the neigh update code which goes over the list of flows associated with this encap entry, or double free when the tc flow is later deleted by user-space. When neigh update (mlx5e_tc_encap_flows_del()) unoffloads the flows related to an encap entry which is now invalid, we do a partial repeat of the eswitch flow removal code which is wrong too. To fix things up we do the following: (1) handle the encap action offload in the eswitch flow add function mlx5e_tc_add_fdb_flow() as done for the other actions and the rule itself. (2) modify the neigh update code (mlx5e_tc_encap_flows_add/del) to only deal with the encap entry and rules delete/add and not with any of the other offloaded actions. Fixes: 232c001398ae ('net/mlx5e: Add support to neighbour update flow') Signed-off-by: Or Gerlitz Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 89 +++++++++++++++---------- 1 file changed, 54 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 1aa2028..9ba1f720 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -78,9 +78,11 @@ struct mlx5e_tc_flow { }; struct mlx5e_tc_flow_parse_attr { + struct ip_tunnel_info tun_info; struct mlx5_flow_spec spec; int num_mod_hdr_actions; void *mod_hdr_actions; + int mirred_ifindex; }; enum { @@ -322,6 +324,12 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, static void mlx5e_detach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); +static int mlx5e_attach_encap(struct mlx5e_priv *priv, + struct ip_tunnel_info *tun_info, + struct net_device *mirred_dev, + struct net_device **encap_dev, + struct mlx5e_tc_flow *flow); + static struct mlx5_flow_handle * mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, @@ -329,9 +337,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; - struct mlx5_flow_handle *rule; + struct net_device *out_dev, *encap_dev = NULL; + struct mlx5_flow_handle *rule = NULL; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *out_priv; int err; + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { + out_dev = __dev_get_by_index(dev_net(priv->netdev), + attr->parse_attr->mirred_ifindex); + err = mlx5e_attach_encap(priv, &parse_attr->tun_info, + out_dev, &encap_dev, flow); + if (err) { + rule = ERR_PTR(err); + if (err != -EAGAIN) + goto err_attach_encap; + } + out_priv = netdev_priv(encap_dev); + rpriv = out_priv->ppriv; + attr->out_rep = rpriv->rep; + } + err = mlx5_eswitch_add_vlan_action(esw, attr); if (err) { rule = ERR_PTR(err); @@ -347,10 +373,14 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } } - rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); - if (IS_ERR(rule)) - goto err_add_rule; - + /* we get here if (1) there's no error (rule being null) or when + * (2) there's an encap action and we're on -EAGAIN (no valid neigh) + */ + if (rule != ERR_PTR(-EAGAIN)) { + rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); + if (IS_ERR(rule)) + goto err_add_rule; + } return rule; err_add_rule: @@ -361,6 +391,7 @@ err_mod_hdr: err_add_vlan: if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) mlx5e_detach_encap(priv, flow); +err_attach_encap: return rule; } @@ -389,6 +420,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *esw_attr; struct mlx5e_tc_flow *flow; int err; @@ -404,10 +437,9 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, mlx5e_rep_queue_neigh_stats_work(priv); list_for_each_entry(flow, &e->flows, encap) { - flow->esw_attr->encap_id = e->encap_id; - flow->rule = mlx5e_tc_add_fdb_flow(priv, - flow->esw_attr->parse_attr, - flow); + esw_attr = flow->esw_attr; + esw_attr->encap_id = e->encap_id; + flow->rule = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr); if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", @@ -421,15 +453,13 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow *flow; - struct mlx5_fc *counter; list_for_each_entry(flow, &e->flows, encap) { if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; - counter = mlx5_flow_rule_counter(flow->rule); - mlx5_del_flow_rules(flow->rule); - mlx5_fc_destroy(priv->mdev, counter); + mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr); } } @@ -1942,7 +1972,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); - struct net_device *out_dev, *encap_dev = NULL; + struct net_device *out_dev; struct mlx5e_priv *out_priv; out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); @@ -1955,17 +1985,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, rpriv = out_priv->ppriv; attr->out_rep = rpriv->rep; } else if (encap) { - err = mlx5e_attach_encap(priv, info, - out_dev, &encap_dev, flow); - if (err && err != -EAGAIN) - return err; + parse_attr->mirred_ifindex = ifindex; + parse_attr->tun_info = *info; + attr->parse_attr = parse_attr; attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - out_priv = netdev_priv(encap_dev); - rpriv = out_priv->ppriv; - attr->out_rep = rpriv->rep; - attr->parse_attr = parse_attr; + /* attr->out_rep is resolved when we handle encap */ } else { pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", priv->netdev->name, out_dev->name); @@ -2047,7 +2073,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow); if (err < 0) - goto err_handle_encap_flow; + goto err_free; flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow); } else { err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow); @@ -2058,10 +2084,13 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); - goto err_free; + if (err != -EAGAIN) + goto err_free; } - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + if (err != -EAGAIN) + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + err = rhashtable_insert_fast(&tc->ht, &flow->node, tc->ht_params); if (err) @@ -2075,16 +2104,6 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, err_del_rule: mlx5e_tc_del_flow(priv, flow); -err_handle_encap_flow: - if (err == -EAGAIN) { - err = rhashtable_insert_fast(&tc->ht, &flow->node, - tc->ht_params); - if (err) - mlx5e_tc_del_flow(priv, flow); - else - return 0; - } - err_free: kvfree(parse_attr); kfree(flow); -- cgit v1.1 From be0f161ef141e4df368aa3f417a1c2ab9c362e75 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 28 Sep 2017 15:33:50 -0500 Subject: net/mlx5e: DCBNL, Implement tc with ets type and zero bandwidth Previously, tc with ets type and zero bandwidth is not accepted by driver. This behavior does not follow the IEEE802.1qaz spec. If there are tcs with ets type and zero bandwidth, these tcs are assigned to the lowest priority tc_group #0. We equally distribute 100% bw of the tc_group #0 to these zero bandwidth ets tcs. Also, the non zero bandwidth ets tcs are assigned to tc_group #1. If there is no zero bandwidth ets tc, the non zero bandwidth ets tcs are assigned to tc_group #0. Fixes: cdcf11212b22 ("net/mlx5e: Validate BW weight values of ETS") Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 113 +++++++++++++++------ drivers/net/ethernet/mellanox/mlx5/core/port.c | 21 ++++ include/linux/mlx5/port.h | 2 + 3 files changed, 106 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index c1d384f..51c4cc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -41,6 +41,11 @@ #define MLX5E_CEE_STATE_UP 1 #define MLX5E_CEE_STATE_DOWN 0 +enum { + MLX5E_VENDOR_TC_GROUP_NUM = 7, + MLX5E_LOWEST_PRIO_GROUP = 0, +}; + /* If dcbx mode is non-host set the dcbx mode to host. */ static int mlx5e_dcbnl_set_dcbx_mode(struct mlx5e_priv *priv, @@ -85,6 +90,9 @@ static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + u8 tc_group[IEEE_8021QAZ_MAX_TCS]; + bool is_tc_group_6_exist = false; + bool is_zero_bw_ets_tc = false; int err = 0; int i; @@ -96,37 +104,64 @@ static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev, err = mlx5_query_port_prio_tc(mdev, i, &ets->prio_tc[i]); if (err) return err; - } - for (i = 0; i < ets->ets_cap; i++) { + err = mlx5_query_port_tc_group(mdev, i, &tc_group[i]); + if (err) + return err; + err = mlx5_query_port_tc_bw_alloc(mdev, i, &ets->tc_tx_bw[i]); if (err) return err; + + if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC && + tc_group[i] == (MLX5E_LOWEST_PRIO_GROUP + 1)) + is_zero_bw_ets_tc = true; + + if (tc_group[i] == (MLX5E_VENDOR_TC_GROUP_NUM - 1)) + is_tc_group_6_exist = true; + } + + /* Report 0% ets tc if exits*/ + if (is_zero_bw_ets_tc) { + for (i = 0; i < ets->ets_cap; i++) + if (tc_group[i] == MLX5E_LOWEST_PRIO_GROUP) + ets->tc_tx_bw[i] = 0; + } + + /* Update tc_tsa based on fw setting*/ + for (i = 0; i < ets->ets_cap; i++) { if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC) priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS; + else if (tc_group[i] == MLX5E_VENDOR_TC_GROUP_NUM && + !is_tc_group_6_exist) + priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR; } - memcpy(ets->tc_tsa, priv->dcbx.tc_tsa, sizeof(ets->tc_tsa)); return err; } -enum { - MLX5E_VENDOR_TC_GROUP_NUM = 7, - MLX5E_ETS_TC_GROUP_NUM = 0, -}; - static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc) { bool any_tc_mapped_to_ets = false; + bool ets_zero_bw = false; int strict_group; int i; - for (i = 0; i <= max_tc; i++) - if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) + for (i = 0; i <= max_tc; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { any_tc_mapped_to_ets = true; + if (!ets->tc_tx_bw[i]) + ets_zero_bw = true; + } + } - strict_group = any_tc_mapped_to_ets ? 1 : 0; + /* strict group has higher priority than ets group */ + strict_group = MLX5E_LOWEST_PRIO_GROUP; + if (any_tc_mapped_to_ets) + strict_group++; + if (ets_zero_bw) + strict_group++; for (i = 0; i <= max_tc; i++) { switch (ets->tc_tsa[i]) { @@ -137,7 +172,9 @@ static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc) tc_group[i] = strict_group++; break; case IEEE_8021QAZ_TSA_ETS: - tc_group[i] = MLX5E_ETS_TC_GROUP_NUM; + tc_group[i] = MLX5E_LOWEST_PRIO_GROUP; + if (ets->tc_tx_bw[i] && ets_zero_bw) + tc_group[i] = MLX5E_LOWEST_PRIO_GROUP + 1; break; } } @@ -146,9 +183,23 @@ static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc) static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw, u8 *tc_group, int max_tc) { + int bw_for_ets_zero_bw_tc = 0; + int last_ets_zero_bw_tc = -1; + int num_ets_zero_bw = 0; int i; for (i = 0; i <= max_tc; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS && + !ets->tc_tx_bw[i]) { + num_ets_zero_bw++; + last_ets_zero_bw_tc = i; + } + } + + if (num_ets_zero_bw) + bw_for_ets_zero_bw_tc = MLX5E_MAX_BW_ALLOC / num_ets_zero_bw; + + for (i = 0; i <= max_tc; i++) { switch (ets->tc_tsa[i]) { case IEEE_8021QAZ_TSA_VENDOR: tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; @@ -157,12 +208,26 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw, tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; break; case IEEE_8021QAZ_TSA_ETS: - tc_tx_bw[i] = ets->tc_tx_bw[i]; + tc_tx_bw[i] = ets->tc_tx_bw[i] ? + ets->tc_tx_bw[i] : + bw_for_ets_zero_bw_tc; break; } } + + /* Make sure the total bw for ets zero bw group is 100% */ + if (last_ets_zero_bw_tc != -1) + tc_tx_bw[last_ets_zero_bw_tc] += + MLX5E_MAX_BW_ALLOC % num_ets_zero_bw; } +/* If there are ETS BW 0, + * Set ETS group # to 1 for all ETS non zero BW tcs. Their sum must be 100%. + * Set group #0 to all the ETS BW 0 tcs and + * equally splits the 100% BW between them + * Report both group #0 and #1 as ETS type. + * All the tcs in group #0 will be reported with 0% BW. + */ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) { struct mlx5_core_dev *mdev = priv->mdev; @@ -188,7 +253,6 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) return err; memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa)); - return err; } @@ -209,17 +273,9 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, } /* Validate Bandwidth Sum */ - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { - if (!ets->tc_tx_bw[i]) { - netdev_err(netdev, - "Failed to validate ETS: BW 0 is illegal\n"); - return -EINVAL; - } - + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) bw_sum += ets->tc_tx_bw[i]; - } - } if (bw_sum != 0 && bw_sum != 100) { netdev_err(netdev, @@ -533,8 +589,7 @@ static void mlx5e_dcbnl_getpgtccfgtx(struct net_device *netdev, static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev, int pgid, u8 *bw_pct) { - struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; + struct ieee_ets ets; if (pgid >= CEE_DCBX_MAX_PGS) { netdev_err(netdev, @@ -542,8 +597,8 @@ static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev, return; } - if (mlx5_query_port_tc_bw_alloc(mdev, pgid, bw_pct)) - *bw_pct = 0; + mlx5e_dcbnl_ieee_getets(netdev, &ets); + *bw_pct = ets.tc_tx_bw[pgid]; } static void mlx5e_dcbnl_setpfccfg(struct net_device *netdev, @@ -739,8 +794,6 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv) ets.prio_tc[i] = i; } - memcpy(priv->dcbx.tc_tsa, ets.tc_tsa, sizeof(ets.tc_tsa)); - /* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */ ets.prio_tc[0] = 1; ets.prio_tc[1] = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 1975d43..e07061f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -677,6 +677,27 @@ int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group) } EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group); +int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, + u8 tc, u8 *tc_group) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + void *ets_tcn_conf; + int err; + + err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out)); + if (err) + return err; + + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, + tc_configuration[tc]); + + *tc_group = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + group); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_tc_group); + int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw) { u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index c57d4b7..c59af8a 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -157,6 +157,8 @@ int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc); int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, u8 prio, u8 *tc); int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group); +int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, + u8 tc, u8 *tc_group); int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw); int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 tc, u8 *bw_pct); -- cgit v1.1 From ef4816f0ee576d4a27ed35cd1090904121391cb9 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 24 Oct 2017 11:41:26 +0200 Subject: net: mvpp2: fix typo in the tcam setup This patch fixes a typo in the mvpp2_prs_tcam_data_cmp() function, as the shift value is inverted with the data. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 28c6e8a..0b20171 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -1539,7 +1539,7 @@ static bool mvpp2_prs_tcam_data_cmp(struct mvpp2_prs_entry *pe, int offs, int off = MVPP2_PRS_TCAM_DATA_BYTE(offs); u16 tcam_data; - tcam_data = (8 << pe->tcam.byte[off + 1]) | pe->tcam.byte[off]; + tcam_data = (pe->tcam.byte[off + 1] << 8) | pe->tcam.byte[off]; if (tcam_data != data) return false; return true; -- cgit v1.1 From 20746d717ea390ac6ac3aa531f27ac156bf2e747 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 24 Oct 2017 11:41:27 +0200 Subject: net: mvpp2: fix invalid parameters order when calling the tcam init When calling mvpp2_prs_mac_multi_set() from mvpp2_prs_mac_init(), two parameters (the port index and the table index) are inverted. Fixes this. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 0b20171..c8ce652 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -2614,8 +2614,8 @@ static void mvpp2_prs_mac_init(struct mvpp2 *priv) /* place holders only - no ports */ mvpp2_prs_mac_drop_all_set(priv, 0, false); mvpp2_prs_mac_promisc_set(priv, 0, false); - mvpp2_prs_mac_multi_set(priv, MVPP2_PE_MAC_MC_ALL, 0, false); - mvpp2_prs_mac_multi_set(priv, MVPP2_PE_MAC_MC_IP6, 0, false); + mvpp2_prs_mac_multi_set(priv, 0, MVPP2_PE_MAC_MC_ALL, false); + mvpp2_prs_mac_multi_set(priv, 0, MVPP2_PE_MAC_MC_IP6, false); } /* Set default entries for various types of dsa packets */ -- cgit v1.1 From 239dd4ee4838523419ad16e05b16a2003b71d317 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 24 Oct 2017 11:41:28 +0200 Subject: net: mvpp2: do not sleep in set_rx_mode This patch replaces GFP_KERNEL by GFP_ATOMIC to avoid sleeping in the ndo_set_rx_mode() call which is called with BH disabled. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index c8ce652..a37af58 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -3396,7 +3396,7 @@ mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da, struct mvpp2_prs_entry *pe; int tid; - pe = kzalloc(sizeof(*pe), GFP_KERNEL); + pe = kzalloc(sizeof(*pe), GFP_ATOMIC); if (!pe) return NULL; mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC); @@ -3458,7 +3458,7 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port, if (tid < 0) return tid; - pe = kzalloc(sizeof(*pe), GFP_KERNEL); + pe = kzalloc(sizeof(*pe), GFP_ATOMIC); if (!pe) return -ENOMEM; mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC); -- cgit v1.1 From e9a0b99804ff662d02b78a556a84e22308066fe1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Bugge?= Date: Tue, 24 Oct 2017 18:17:18 +0200 Subject: rds: ib: Fix uninitialized variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit send_flags needs to be initialized before calling rds_ib_set_wr_signal_state(). Signed-off-by: Håkon Bugge Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_send.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 6ab39db..8f46755 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -792,6 +792,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask; send->s_atomic_wr.swap_mask = 0; } + send->s_wr.send_flags = 0; nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify); send->s_atomic_wr.wr.num_sge = 1; send->s_atomic_wr.wr.next = NULL; -- cgit v1.1 From a0c0865fa0abcbc142c11fabec3a2bffc1a4229d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20Bugge?= Date: Tue, 24 Oct 2017 16:16:28 +0200 Subject: rds: Fix inaccurate accounting of unsignaled wrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The number of unsignaled work-requests posted to the IB send queue is tracked by a counter in the rds_ib_connection struct. When it reaches zero, or the caller explicitly asks for it, the send-signaled bit is set in send_flags and the counter is reset. This is performed by the rds_ib_set_wr_signal_state() function. However, this function is not always used which yields inaccurate accounting. This commit fixes this, re-factors a code bloat related to the matter, and makes the actual parameter type to the function consistent. Signed-off-by: Håkon Bugge Signed-off-by: David S. Miller --- net/rds/ib_send.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 8f46755..8557a1c 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -661,13 +661,15 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, } } - rds_ib_set_wr_signal_state(ic, send, 0); + rds_ib_set_wr_signal_state(ic, send, false); /* * Always signal the last one if we're stopping due to flow control. */ - if (ic->i_flowctl && flow_controlled && i == (work_alloc-1)) - send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + if (ic->i_flowctl && flow_controlled && i == (work_alloc - 1)) { + rds_ib_set_wr_signal_state(ic, send, true); + send->s_wr.send_flags |= IB_SEND_SOLICITED; + } if (send->s_wr.send_flags & IB_SEND_SIGNALED) nr_sig++; @@ -705,11 +707,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, if (scat == &rm->data.op_sg[rm->data.op_count]) { prev->s_op = ic->i_data_op; prev->s_wr.send_flags |= IB_SEND_SOLICITED; - if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED)) { - ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs; - prev->s_wr.send_flags |= IB_SEND_SIGNALED; - nr_sig++; - } + if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED)) + nr_sig += rds_ib_set_wr_signal_state(ic, prev, true); ic->i_data_op = NULL; } -- cgit v1.1 From 06f877d613be3621604c2520ec0351d9fbdca15f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Oct 2017 08:20:31 -0700 Subject: tcp/dccp: fix other lockdep splats accessing ireq_opt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In my first attempt to fix the lockdep splat, I forgot we could enter inet_csk_route_req() with a freshly allocated request socket, for which refcount has not yet been elevated, due to complex SLAB_TYPESAFE_BY_RCU rules. We either are in rcu_read_lock() section _or_ we own a refcount on the request. Correct RCU verb to use here is rcu_dereference_check(), although it is not possible to prove we actually own a reference on a shared refcount :/ In v2, I added ireq_opt_deref() helper and use in three places, to fix other possible splats. [ 49.844590] lockdep_rcu_suspicious+0xea/0xf3 [ 49.846487] inet_csk_route_req+0x53/0x14d [ 49.848334] tcp_v4_route_req+0xe/0x10 [ 49.850174] tcp_conn_request+0x31c/0x6a0 [ 49.851992] ? __lock_acquire+0x614/0x822 [ 49.854015] tcp_v4_conn_request+0x5a/0x79 [ 49.855957] ? tcp_v4_conn_request+0x5a/0x79 [ 49.858052] tcp_rcv_state_process+0x98/0xdcc [ 49.859990] ? sk_filter_trim_cap+0x2f6/0x307 [ 49.862085] tcp_v4_do_rcv+0xfc/0x145 [ 49.864055] ? tcp_v4_do_rcv+0xfc/0x145 [ 49.866173] tcp_v4_rcv+0x5ab/0xaf9 [ 49.868029] ip_local_deliver_finish+0x1af/0x2e7 [ 49.870064] ip_local_deliver+0x1b2/0x1c5 [ 49.871775] ? inet_del_offload+0x45/0x45 [ 49.873916] ip_rcv_finish+0x3f7/0x471 [ 49.875476] ip_rcv+0x3f1/0x42f [ 49.876991] ? ip_local_deliver_finish+0x2e7/0x2e7 [ 49.878791] __netif_receive_skb_core+0x6d3/0x950 [ 49.880701] ? process_backlog+0x7e/0x216 [ 49.882589] __netif_receive_skb+0x1d/0x5e [ 49.884122] process_backlog+0x10c/0x216 [ 49.885812] net_rx_action+0x147/0x3df Fixes: a6ca7abe53633 ("tcp/dccp: fix lockdep splat in inet_csk_route_req()") Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races") Signed-off-by: Eric Dumazet Reported-by: kernel test robot Reported-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- include/net/inet_sock.h | 6 ++++++ net/dccp/ipv4.c | 2 +- net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/tcp_ipv4.c | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 425752f..db8162d 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -132,6 +132,12 @@ static inline int inet_request_bound_dev_if(const struct sock *sk, return sk->sk_bound_dev_if; } +static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) +{ + return rcu_dereference_check(ireq->ireq_opt, + refcount_read(&ireq->req.rsk_refcnt) > 0); +} + struct inet_cork { unsigned int flags; __be32 addr; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 0490916..e65fcb4 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -495,7 +495,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req ireq->ir_rmt_addr); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - rcu_dereference(ireq->ireq_opt)); + ireq_opt_deref(ireq)); err = net_xmit_eval(err); } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 18cd2ea..b47a59c 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -543,8 +543,8 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, struct ip_options_rcu *opt; struct rtable *rt; - opt = rcu_dereference_protected(ireq->ireq_opt, - refcount_read(&req->rsk_refcnt) > 0); + opt = ireq_opt_deref(ireq); + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4c43365..5b027c6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -877,7 +877,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - rcu_dereference(ireq->ireq_opt)); + ireq_opt_deref(ireq)); err = net_xmit_eval(err); } -- cgit v1.1 From 5889e2c0e441d84060e66211ed5c4517ca591167 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Tue, 24 Oct 2017 16:44:42 -0700 Subject: tcp: call tcp_rate_skb_sent() when retransmit with unaligned skb->data Current implementation calls tcp_rate_skb_sent() when tcp_transmit_skb() is called when it clones skb only. Not calling tcp_rate_skb_sent() is OK for all such code paths except from __tcp_retransmit_skb() which happens when skb->data address is not aligned. This may rarely happen e.g. when small amount of data is sent initially and the receiver partially acks odd number of bytes for some reason, possibly malicious. Signed-off-by: Yousuk Seung Signed-off-by: Neal Cardwell Signed-off-by: Soheil Hassas Yeganeh Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 973befc..1151870 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2843,8 +2843,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; - if (!err) + if (!err) { skb->skb_mstamp = tp->tcp_mstamp; + tcp_rate_skb_sent(sk, skb); + } } else { err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } -- cgit v1.1 From 78e0ea6791d7baafb8a0ca82b1bd0c7b3453c919 Mon Sep 17 00:00:00 2001 From: Girish Moodalbail Date: Wed, 25 Oct 2017 00:23:04 -0700 Subject: tap: double-free in error path in tap_open() Double free of skb_array in tap module is causing kernel panic. When tap_set_queue() fails we free skb_array right away by calling skb_array_cleanup(). However, later on skb_array_cleanup() is called again by tap_sock_destruct through sock_put(). This patch fixes that issue. Fixes: 362899b8725b35e3 (macvtap: switch to use skb array) Signed-off-by: Girish Moodalbail Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tap.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 21b71ae..98ee6cc 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -517,6 +517,10 @@ static int tap_open(struct inode *inode, struct file *file) &tap_proto, 0); if (!q) goto err; + if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL)) { + sk_free(&q->sk); + goto err; + } RCU_INIT_POINTER(q->sock.wq, &q->wq); init_waitqueue_head(&q->wq.wait); @@ -540,22 +544,18 @@ static int tap_open(struct inode *inode, struct file *file) if ((tap->dev->features & NETIF_F_HIGHDMA) && (tap->dev->features & NETIF_F_SG)) sock_set_flag(&q->sk, SOCK_ZEROCOPY); - err = -ENOMEM; - if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL)) - goto err_array; - err = tap_set_queue(tap, file, q); - if (err) - goto err_queue; + if (err) { + /* tap_sock_destruct() will take care of freeing skb_array */ + goto err_put; + } dev_put(tap->dev); rtnl_unlock(); return err; -err_queue: - skb_array_cleanup(&q->skb_array); -err_array: +err_put: sock_put(&q->sk); err: if (tap) -- cgit v1.1 From 5266b8e4445cc836c46689d80a9ff539fa3bfbda Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 26 Oct 2017 11:50:56 +0200 Subject: xen: fix booting ballooned down hvm guest Commit 96edd61dcf44362d3ef0bed1a5361e0ac7886a63 ("xen/balloon: don't online new memory initially") introduced a regression when booting a HVM domain with memory less than mem-max: instead of ballooning down immediately the system would try to use the memory up to mem-max resulting in Xen crashing the domain. For HVM domains the current size will be reflected in Xenstore node memory/static-max instead of memory/target. Additionally we have to trigger the ballooning process at once. Cc: # 4.13 Fixes: 96edd61dcf44362d3ef0bed1a5361e0ac7886a63 ("xen/balloon: don't online new memory initially") Reported-by: Simon Gaiser Suggested-by: Boris Ostrovsky Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/xen-balloon.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index e89136a..b437fcc 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -57,7 +57,7 @@ static int register_balloon(struct device *dev); static void watch_target(struct xenbus_watch *watch, const char *path, const char *token) { - unsigned long long new_target; + unsigned long long new_target, static_max; int err; static bool watch_fired; static long target_diff; @@ -72,13 +72,20 @@ static void watch_target(struct xenbus_watch *watch, * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. */ new_target >>= PAGE_SHIFT - 10; - if (watch_fired) { - balloon_set_new_target(new_target - target_diff); - return; + + if (!watch_fired) { + watch_fired = true; + err = xenbus_scanf(XBT_NIL, "memory", "static-max", "%llu", + &static_max); + if (err != 1) + static_max = new_target; + else + static_max >>= PAGE_SHIFT - 10; + target_diff = xen_pv_domain() ? 0 + : static_max - balloon_stats.target_pages; } - watch_fired = true; - target_diff = new_target - balloon_stats.target_pages; + balloon_set_new_target(new_target - target_diff); } static struct xenbus_watch target_watch = { .node = "memory/target", -- cgit v1.1 From 44c445c3d1b4eacff23141fa7977c3b2ec3a45c9 Mon Sep 17 00:00:00 2001 From: Vincenzo Maffione Date: Sat, 16 Sep 2017 18:00:00 +0200 Subject: e1000: fix race condition between e1000_down() and e1000_watchdog This patch fixes a race condition that can result into the interface being up and carrier on, but with transmits disabled in the hardware. The bug may show up by repeatedly IFF_DOWN+IFF_UP the interface, which allows e1000_watchdog() interleave with e1000_down(). CPU x CPU y -------------------------------------------------------------------- e1000_down(): netif_carrier_off() e1000_watchdog(): if (carrier == off) { netif_carrier_on(); enable_hw_transmit(); } disable_hw_transmit(); e1000_watchdog(): /* carrier on, do nothing */ Signed-off-by: Vincenzo Maffione Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 98375e1..1982f79 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -520,8 +520,6 @@ void e1000_down(struct e1000_adapter *adapter) struct net_device *netdev = adapter->netdev; u32 rctl, tctl; - netif_carrier_off(netdev); - /* disable receives in the hardware */ rctl = er32(RCTL); ew32(RCTL, rctl & ~E1000_RCTL_EN); @@ -537,6 +535,15 @@ void e1000_down(struct e1000_adapter *adapter) E1000_WRITE_FLUSH(); msleep(10); + /* Set the carrier off after transmits have been disabled in the + * hardware, to avoid race conditions with e1000_watchdog() (which + * may be running concurrently to us, checking for the carrier + * bit to decide whether it should enable transmits again). Such + * a race condition would result into transmission being disabled + * in the hardware until the next IFF_DOWN+IFF_UP cycle. + */ + netif_carrier_off(netdev); + napi_disable(&adapter->napi); e1000_irq_disable(adapter); -- cgit v1.1 From 5983587c8c5ef00d6886477544ad67d495bc5479 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Sep 2017 18:13:48 +0100 Subject: e1000: avoid null pointer dereference on invalid stat type Currently if the stat type is invalid then data[i] is being set either by dereferencing a null pointer p, or it is reading from an incorrect previous location if we had a valid stat type previously. Fix this by skipping over the read of p on an invalid stat type. Detected by CoverityScan, CID#113385 ("Explicit null dereferenced") Signed-off-by: Colin Ian King Reviewed-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index ec8aa45..3b3983a 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -1824,11 +1824,12 @@ static void e1000_get_ethtool_stats(struct net_device *netdev, { struct e1000_adapter *adapter = netdev_priv(netdev); int i; - char *p = NULL; const struct e1000_stats *stat = e1000_gstrings_stats; e1000_update_stats(adapter); - for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) { + for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++, stat++) { + char *p; + switch (stat->type) { case NETDEV_STATS: p = (char *)netdev + stat->stat_offset; @@ -1839,15 +1840,13 @@ static void e1000_get_ethtool_stats(struct net_device *netdev, default: WARN_ONCE(1, "Invalid E1000 stat type: %u index %d\n", stat->type, i); - break; + continue; } if (stat->sizeof_stat == sizeof(u64)) data[i] = *(u64 *)p; else data[i] = *(u32 *)p; - - stat++; } /* BUG_ON(i != E1000_STATS_LEN); */ } -- cgit v1.1 From 104ba83363d1d42af62abb247f1426c09a80fced Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 19 Oct 2017 20:07:36 +0100 Subject: igb: Fix TX map failure path When the driver cannot map a TX buffer, instead of rolling back gracefully and retrying later, we currently get a panic: [ 159.885994] igb 0000:00:00.0: TX DMA map failed [ 159.886588] Unable to handle kernel paging request at virtual address ffff00000a08c7a8 ... [ 159.897031] PC is at igb_xmit_frame_ring+0x9c8/0xcb8 Fix the erroneous test that leads to this situation. Signed-off-by: Jean-Philippe Brucker Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index fd4a46b..ea69af2 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5326,7 +5326,7 @@ dma_error: DMA_TO_DEVICE); dma_unmap_len_set(tx_buffer, len, 0); - if (i--) + if (i-- == 0) i += tx_ring->count; tx_buffer = &tx_ring->tx_buffer_info[i]; } -- cgit v1.1 From 069db9cd0bbde92d3aa947ed86a09cbd4ceb5f67 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 19 Oct 2017 17:07:13 -0400 Subject: ixgbe: Fix Tx map failure path This patch is a partial revert of "ixgbe: Don't bother clearing buffer memory for descriptor rings". Specifically I messed up the exception handling path a bit and this resulted in us incorrectly adding the count back in when we didn't need to. In order to make this simpler I am reverting most of the exception handling path change and instead just replacing the bit that was handled by the unmap_and_free call. Fixes: ffed21bcee7a ("ixgbe: Don't bother clearing buffer memory for descriptor rings") Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 4d76afd..6d5f31e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8020,29 +8020,23 @@ static int ixgbe_tx_map(struct ixgbe_ring *tx_ring, return 0; dma_error: dev_err(tx_ring->dev, "TX DMA map failed\n"); - tx_buffer = &tx_ring->tx_buffer_info[i]; /* clear dma mappings for failed tx_buffer_info map */ - while (tx_buffer != first) { + for (;;) { + tx_buffer = &tx_ring->tx_buffer_info[i]; if (dma_unmap_len(tx_buffer, len)) dma_unmap_page(tx_ring->dev, dma_unmap_addr(tx_buffer, dma), dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); dma_unmap_len_set(tx_buffer, len, 0); - - if (i--) + if (tx_buffer == first) + break; + if (i == 0) i += tx_ring->count; - tx_buffer = &tx_ring->tx_buffer_info[i]; + i--; } - if (dma_unmap_len(tx_buffer, len)) - dma_unmap_single(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - dma_unmap_len_set(tx_buffer, len, 0); - dev_kfree_skb_any(first->skb); first->skb = NULL; -- cgit v1.1 From 10781348cadebbd5291c8fb193e850365c914da8 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 20 Oct 2017 13:59:20 -0700 Subject: i40e: Fix incorrect use of tx_itr_setting when checking for Rx ITR setup It looks like there was either a copy/paste error or just a typo that resulted in the Tx ITR setting being used to determine if we were using adaptive Rx interrupt moderation or not. This patch fixes the typo. Fixes: 65e87c0398f5 ("i40evf: support queue-specific settings for interrupt moderation") Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 2756131..ab142e0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2269,7 +2269,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, goto enable_int; } - if (ITR_IS_DYNAMIC(tx_itr_setting)) { + if (ITR_IS_DYNAMIC(rx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } -- cgit v1.1 From 62b4c6694dfd3821bd5ea5bed48238bbabd5fe8b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 21 Oct 2017 18:12:29 -0700 Subject: i40e: Add programming descriptors to cleaned_count This patch updates the i40e driver to include programming descriptors in the cleaned_count. Without this change it becomes possible for us to leak memory as we don't trigger a large enough allocation when the time comes to allocate new buffers and we end up overwriting a number of rx_buffers equal to the number of programming descriptors we encountered. Fixes: 0e626ff7ccbf ("i40e: Fix support for flow director programming status") Signed-off-by: Alexander Duyck Tested-by: Anders K. Pedersen Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index ab142e0..120c68f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2102,6 +2102,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) if (unlikely(i40e_rx_is_programming_status(qword))) { i40e_clean_programming_status(rx_ring, rx_desc, qword); + cleaned_count++; continue; } size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> -- cgit v1.1 From 814eae5982cc55988f642f7f1d10eaf340845c00 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 26 Oct 2017 15:54:15 +0100 Subject: alpha/PCI: Move pci_map_irq()/pci_swizzle() out of initdata The introduction of {map/swizzle}_irq() hooks in the struct pci_host_bridge allowed to replace the pci_fixup_irqs() PCI IRQ allocation in alpha arch PCI code with per-bridge map/swizzle functions with commit 0e4c2eeb758a ("alpha/PCI: Replace pci_fixup_irqs() call with host bridge IRQ mapping hooks"). As a side effect of converting PCI IRQ allocation to the struct pci_host_bridge {map/swizzle}_irq() hooks mechanism, the actual PCI IRQ allocation function (ie pci_assign_irq()) is carried out per-device in pci_device_probe() that is called when a PCI device driver is about to be probed. This means that, for drivers compiled as loadable modules, the actual PCI device IRQ allocation can now happen after the system has booted so the struct pci_host_bridge {map/swizzle}_irq() hooks pci_assign_irq() relies on must stay valid after the system has booted so that PCI core can carry out PCI IRQ allocation correctly. Most of the alpha board structures pci_map_irq() and pci_swizzle() hooks (that are used to initialize their struct pci_host_bridge equivalent through the alpha_mv global variable - that represents the struct alpha_machine_vector of the running kernel) are marked as __init/__initdata; this causes freed memory dereferences when PCI IRQ allocation is carried out after the kernel has booted (ie when loading PCI drivers as loadable module) because when the kernel tries to bind the PCI device to its (module) driver, the function pci_assign_irq() is called, that in turn retrieves the struct pci_host_bridge {map/swizzle}_irq() hooks to carry out PCI IRQ allocation; if those hooks are marked as __init code/__initdata they point at freed/invalid memory. Fix the issue by removing the __init/__initdata markers from all subarch struct alpha_machine_vector.pci_map_irq()/pci_swizzle() functions (and data). Fixes: 0e4c2eeb758a ("alpha/PCI: Replace pci_fixup_irqs() call with host bridge IRQ mapping hooks") Link: http://lkml.kernel.org/r/alpine.LRH.2.21.1710251043170.7098@math.ut.ee Reported-by: Meelis Roos Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Meelis Roos Cc: Matt Turner --- arch/alpha/kernel/sys_alcor.c | 4 ++-- arch/alpha/kernel/sys_cabriolet.c | 12 ++++++------ arch/alpha/kernel/sys_dp264.c | 20 ++++++++++---------- arch/alpha/kernel/sys_eb64p.c | 4 ++-- arch/alpha/kernel/sys_eiger.c | 4 ++-- arch/alpha/kernel/sys_miata.c | 6 +++--- arch/alpha/kernel/sys_mikasa.c | 4 ++-- arch/alpha/kernel/sys_nautilus.c | 2 +- arch/alpha/kernel/sys_noritake.c | 6 +++--- arch/alpha/kernel/sys_rawhide.c | 4 ++-- arch/alpha/kernel/sys_ruffian.c | 6 +++--- arch/alpha/kernel/sys_rx164.c | 4 ++-- arch/alpha/kernel/sys_sable.c | 10 +++++----- arch/alpha/kernel/sys_sio.c | 8 ++++---- arch/alpha/kernel/sys_sx164.c | 4 ++-- arch/alpha/kernel/sys_takara.c | 6 +++--- arch/alpha/kernel/sys_wildfire.c | 4 ++-- 17 files changed, 54 insertions(+), 54 deletions(-) diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c index 118dc6a..7ad074f 100644 --- a/arch/alpha/kernel/sys_alcor.c +++ b/arch/alpha/kernel/sys_alcor.c @@ -181,10 +181,10 @@ alcor_init_irq(void) * comes in on. This makes interrupt processing much easier. */ -static int __init +static int alcor_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[7][5] __initdata = { + static char irq_tab[7][5] = { /*INT INTA INTB INTC INTD */ /* note: IDSEL 17 is XLT only */ {16+13, 16+13, 16+13, 16+13, 16+13}, /* IdSel 17, TULIP */ diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c index 4c50f8f4..c0fa1fe 100644 --- a/arch/alpha/kernel/sys_cabriolet.c +++ b/arch/alpha/kernel/sys_cabriolet.c @@ -173,10 +173,10 @@ pc164_init_irq(void) * because it is the Saturn IO (SIO) PCI/ISA Bridge Chip. */ -static inline int __init +static inline int eb66p_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[5][5] __initdata = { + static char irq_tab[5][5] = { /*INT INTA INTB INTC INTD */ {16+0, 16+0, 16+5, 16+9, 16+13}, /* IdSel 6, slot 0, J25 */ {16+1, 16+1, 16+6, 16+10, 16+14}, /* IdSel 7, slot 1, J26 */ @@ -203,10 +203,10 @@ eb66p_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) * because it is the Saturn IO (SIO) PCI/ISA Bridge Chip. */ -static inline int __init +static inline int cabriolet_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[5][5] __initdata = { + static char irq_tab[5][5] = { /*INT INTA INTB INTC INTD */ { 16+2, 16+2, 16+7, 16+11, 16+15}, /* IdSel 5, slot 2, J21 */ { 16+0, 16+0, 16+5, 16+9, 16+13}, /* IdSel 6, slot 0, J19 */ @@ -287,10 +287,10 @@ cia_cab_init_pci(void) * */ -static inline int __init +static inline int alphapc164_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[7][5] __initdata = { + static char irq_tab[7][5] = { /*INT INTA INTB INTC INTD */ { 16+2, 16+2, 16+9, 16+13, 16+17}, /* IdSel 5, slot 2, J20 */ { 16+0, 16+0, 16+7, 16+11, 16+15}, /* IdSel 6, slot 0, J29 */ diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index 6c35159..9e1e40e 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -356,7 +356,7 @@ clipper_init_irq(void) * 10 64 bit PCI option slot 3 (not bus 0) */ -static int __init +static int isa_irq_fixup(const struct pci_dev *dev, int irq) { u8 irq8; @@ -372,10 +372,10 @@ isa_irq_fixup(const struct pci_dev *dev, int irq) return irq8 & 0xf; } -static int __init +static int dp264_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[6][5] __initdata = { + static char irq_tab[6][5] = { /*INT INTA INTB INTC INTD */ { -1, -1, -1, -1, -1}, /* IdSel 5 ISA Bridge */ { 16+ 3, 16+ 3, 16+ 2, 16+ 2, 16+ 2}, /* IdSel 6 SCSI builtin*/ @@ -394,10 +394,10 @@ dp264_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return isa_irq_fixup(dev, irq); } -static int __init +static int monet_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[13][5] __initdata = { + static char irq_tab[13][5] = { /*INT INTA INTB INTC INTD */ { 45, 45, 45, 45, 45}, /* IdSel 3 21143 PCI1 */ { -1, -1, -1, -1, -1}, /* IdSel 4 unused */ @@ -423,7 +423,7 @@ monet_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return isa_irq_fixup(dev, COMMON_TABLE_LOOKUP); } -static u8 __init +static u8 monet_swizzle(struct pci_dev *dev, u8 *pinp) { struct pci_controller *hose = dev->sysdata; @@ -456,10 +456,10 @@ monet_swizzle(struct pci_dev *dev, u8 *pinp) return slot; } -static int __init +static int webbrick_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[13][5] __initdata = { + static char irq_tab[13][5] = { /*INT INTA INTB INTC INTD */ { -1, -1, -1, -1, -1}, /* IdSel 7 ISA Bridge */ { -1, -1, -1, -1, -1}, /* IdSel 8 unused */ @@ -478,10 +478,10 @@ webbrick_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return isa_irq_fixup(dev, COMMON_TABLE_LOOKUP); } -static int __init +static int clipper_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[7][5] __initdata = { + static char irq_tab[7][5] = { /*INT INTA INTB INTC INTD */ { 16+ 8, 16+ 8, 16+ 9, 16+10, 16+11}, /* IdSel 1 slot 1 */ { 16+12, 16+12, 16+13, 16+14, 16+15}, /* IdSel 2 slot 2 */ diff --git a/arch/alpha/kernel/sys_eb64p.c b/arch/alpha/kernel/sys_eb64p.c index ad40a42..372661c 100644 --- a/arch/alpha/kernel/sys_eb64p.c +++ b/arch/alpha/kernel/sys_eb64p.c @@ -167,10 +167,10 @@ eb64p_init_irq(void) * comes in on. This makes interrupt processing much easier. */ -static int __init +static int eb64p_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[5][5] __initdata = { + static char irq_tab[5][5] = { /*INT INTA INTB INTC INTD */ {16+7, 16+7, 16+7, 16+7, 16+7}, /* IdSel 5, slot ?, ?? */ {16+0, 16+0, 16+2, 16+4, 16+9}, /* IdSel 6, slot ?, ?? */ diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c index 15f4208..2731738 100644 --- a/arch/alpha/kernel/sys_eiger.c +++ b/arch/alpha/kernel/sys_eiger.c @@ -141,7 +141,7 @@ eiger_init_irq(void) } } -static int __init +static int eiger_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { u8 irq_orig; @@ -158,7 +158,7 @@ eiger_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return irq_orig - 0x80; } -static u8 __init +static u8 eiger_swizzle(struct pci_dev *dev, u8 *pinp) { struct pci_controller *hose = dev->sysdata; diff --git a/arch/alpha/kernel/sys_miata.c b/arch/alpha/kernel/sys_miata.c index d5b9776..731d693f 100644 --- a/arch/alpha/kernel/sys_miata.c +++ b/arch/alpha/kernel/sys_miata.c @@ -149,10 +149,10 @@ miata_init_irq(void) * comes in on. This makes interrupt processing much easier. */ -static int __init +static int miata_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[18][5] __initdata = { + static char irq_tab[18][5] = { /*INT INTA INTB INTC INTD */ {16+ 8, 16+ 8, 16+ 8, 16+ 8, 16+ 8}, /* IdSel 14, DC21142 */ { -1, -1, -1, -1, -1}, /* IdSel 15, EIDE */ @@ -196,7 +196,7 @@ miata_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return COMMON_TABLE_LOOKUP; } -static u8 __init +static u8 miata_swizzle(struct pci_dev *dev, u8 *pinp) { int slot, pin = *pinp; diff --git a/arch/alpha/kernel/sys_mikasa.c b/arch/alpha/kernel/sys_mikasa.c index 5e82dc1..350ec9c 100644 --- a/arch/alpha/kernel/sys_mikasa.c +++ b/arch/alpha/kernel/sys_mikasa.c @@ -145,10 +145,10 @@ mikasa_init_irq(void) * comes in on. This makes interrupt processing much easier. */ -static int __init +static int mikasa_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[8][5] __initdata = { + static char irq_tab[8][5] = { /*INT INTA INTB INTC INTD */ {16+12, 16+12, 16+12, 16+12, 16+12}, /* IdSel 17, SCSI */ { -1, -1, -1, -1, -1}, /* IdSel 18, PCEB */ diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 8ae04a1..d019e4c 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -62,7 +62,7 @@ nautilus_init_irq(void) common_init_isa_dma(); } -static int __init +static int nautilus_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { /* Preserve the IRQ set up by the console. */ diff --git a/arch/alpha/kernel/sys_noritake.c b/arch/alpha/kernel/sys_noritake.c index 063e594..2301678 100644 --- a/arch/alpha/kernel/sys_noritake.c +++ b/arch/alpha/kernel/sys_noritake.c @@ -193,10 +193,10 @@ noritake_init_irq(void) * comes in on. This makes interrupt processing much easier. */ -static int __init +static int noritake_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[15][5] __initdata = { + static char irq_tab[15][5] = { /*INT INTA INTB INTC INTD */ /* note: IDSELs 16, 17, and 25 are CORELLE only */ { 16+1, 16+1, 16+1, 16+1, 16+1}, /* IdSel 16, QLOGIC */ @@ -221,7 +221,7 @@ noritake_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return COMMON_TABLE_LOOKUP; } -static u8 __init +static u8 noritake_swizzle(struct pci_dev *dev, u8 *pinp) { int slot, pin = *pinp; diff --git a/arch/alpha/kernel/sys_rawhide.c b/arch/alpha/kernel/sys_rawhide.c index dfd510a..546822d 100644 --- a/arch/alpha/kernel/sys_rawhide.c +++ b/arch/alpha/kernel/sys_rawhide.c @@ -221,10 +221,10 @@ rawhide_init_irq(void) * */ -static int __init +static int rawhide_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[5][5] __initdata = { + static char irq_tab[5][5] = { /*INT INTA INTB INTC INTD */ { 16+16, 16+16, 16+16, 16+16, 16+16}, /* IdSel 1 SCSI PCI 1 */ { 16+ 0, 16+ 0, 16+ 1, 16+ 2, 16+ 3}, /* IdSel 2 slot 2 */ diff --git a/arch/alpha/kernel/sys_ruffian.c b/arch/alpha/kernel/sys_ruffian.c index a3f4852..3b35e19 100644 --- a/arch/alpha/kernel/sys_ruffian.c +++ b/arch/alpha/kernel/sys_ruffian.c @@ -117,10 +117,10 @@ ruffian_kill_arch (int mode) * */ -static int __init +static int ruffian_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[11][5] __initdata = { + static char irq_tab[11][5] = { /*INT INTA INTB INTC INTD */ {-1, -1, -1, -1, -1}, /* IdSel 13, 21052 */ {-1, -1, -1, -1, -1}, /* IdSel 14, SIO */ @@ -139,7 +139,7 @@ ruffian_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return COMMON_TABLE_LOOKUP; } -static u8 __init +static u8 ruffian_swizzle(struct pci_dev *dev, u8 *pinp) { int slot, pin = *pinp; diff --git a/arch/alpha/kernel/sys_rx164.c b/arch/alpha/kernel/sys_rx164.c index 08ee737..e178007 100644 --- a/arch/alpha/kernel/sys_rx164.c +++ b/arch/alpha/kernel/sys_rx164.c @@ -142,7 +142,7 @@ rx164_init_irq(void) * */ -static int __init +static int rx164_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { #if 0 @@ -156,7 +156,7 @@ rx164_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { 16+1, 16+1, 16+6, 16+11, 16+16}, /* IdSel 10, slot 4 */ }; #else - static char irq_tab[6][5] __initdata = { + static char irq_tab[6][5] = { /*INT INTA INTB INTC INTD */ { 16+0, 16+0, 16+6, 16+11, 16+16}, /* IdSel 5, slot 0 */ { 16+1, 16+1, 16+7, 16+12, 16+17}, /* IdSel 6, slot 1 */ diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c index 8a0aa6d..86d259c 100644 --- a/arch/alpha/kernel/sys_sable.c +++ b/arch/alpha/kernel/sys_sable.c @@ -192,10 +192,10 @@ sable_init_irq(void) * with the values in the irq swizzling tables above. */ -static int __init +static int sable_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[9][5] __initdata = { + static char irq_tab[9][5] = { /*INT INTA INTB INTC INTD */ { 32+0, 32+0, 32+0, 32+0, 32+0}, /* IdSel 0, TULIP */ { 32+1, 32+1, 32+1, 32+1, 32+1}, /* IdSel 1, SCSI */ @@ -374,10 +374,10 @@ lynx_init_irq(void) * with the values in the irq swizzling tables above. */ -static int __init +static int lynx_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[19][5] __initdata = { + static char irq_tab[19][5] = { /*INT INTA INTB INTC INTD */ { -1, -1, -1, -1, -1}, /* IdSel 13, PCEB */ { -1, -1, -1, -1, -1}, /* IdSel 14, PPB */ @@ -404,7 +404,7 @@ lynx_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return COMMON_TABLE_LOOKUP; } -static u8 __init +static u8 lynx_swizzle(struct pci_dev *dev, u8 *pinp) { int slot, pin = *pinp; diff --git a/arch/alpha/kernel/sys_sio.c b/arch/alpha/kernel/sys_sio.c index febd24e..9fd2895 100644 --- a/arch/alpha/kernel/sys_sio.c +++ b/arch/alpha/kernel/sys_sio.c @@ -144,7 +144,7 @@ sio_fixup_irq_levels(unsigned int level_bits) outb((level_bits >> 8) & 0xff, 0x4d1); } -static inline int __init +static inline int noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { /* @@ -165,7 +165,7 @@ noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) * that they use the default INTA line, if they are interrupt * driven at all). */ - static char irq_tab[][5] __initdata = { + static char irq_tab[][5] = { /*INT A B C D */ { 3, 3, 3, 3, 3}, /* idsel 6 (53c810) */ {-1, -1, -1, -1, -1}, /* idsel 7 (SIO: PCI/ISA bridge) */ @@ -183,10 +183,10 @@ noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return irq >= 0 ? tmp : -1; } -static inline int __init +static inline int p2k_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[][5] __initdata = { + static char irq_tab[][5] = { /*INT A B C D */ { 0, 0, -1, -1, -1}, /* idsel 6 (53c810) */ {-1, -1, -1, -1, -1}, /* idsel 7 (SIO: PCI/ISA bridge) */ diff --git a/arch/alpha/kernel/sys_sx164.c b/arch/alpha/kernel/sys_sx164.c index d063b36..23eee54 100644 --- a/arch/alpha/kernel/sys_sx164.c +++ b/arch/alpha/kernel/sys_sx164.c @@ -94,10 +94,10 @@ sx164_init_irq(void) * 9 32 bit PCI option slot 3 */ -static int __init +static int sx164_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[5][5] __initdata = { + static char irq_tab[5][5] = { /*INT INTA INTB INTC INTD */ { 16+ 9, 16+ 9, 16+13, 16+17, 16+21}, /* IdSel 5 slot 2 J17 */ { 16+11, 16+11, 16+15, 16+19, 16+23}, /* IdSel 6 slot 0 J19 */ diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c index dd0f1ea..9101f2b 100644 --- a/arch/alpha/kernel/sys_takara.c +++ b/arch/alpha/kernel/sys_takara.c @@ -155,10 +155,10 @@ takara_init_irq(void) * assign it whatever the hell IRQ we like and it doesn't matter. */ -static int __init +static int takara_map_irq_srm(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[15][5] __initdata = { + static char irq_tab[15][5] = { { 16+3, 16+3, 16+3, 16+3, 16+3}, /* slot 6 == device 3 */ { 16+2, 16+2, 16+2, 16+2, 16+2}, /* slot 7 == device 2 */ { 16+1, 16+1, 16+1, 16+1, 16+1}, /* slot 8 == device 1 */ @@ -210,7 +210,7 @@ takara_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return COMMON_TABLE_LOOKUP; } -static u8 __init +static u8 takara_swizzle(struct pci_dev *dev, u8 *pinp) { int slot = PCI_SLOT(dev->devfn); diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c index ee18748..c3f8b79 100644 --- a/arch/alpha/kernel/sys_wildfire.c +++ b/arch/alpha/kernel/sys_wildfire.c @@ -288,10 +288,10 @@ wildfire_device_interrupt(unsigned long vector) * 7 64 bit PCI 1 option slot 7 */ -static int __init +static int wildfire_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[8][5] __initdata = { + static char irq_tab[8][5] = { /*INT INTA INTB INTC INTD */ { -1, -1, -1, -1, -1}, /* IdSel 0 ISA Bridge */ { 36, 36, 36+1, 36+2, 36+3}, /* IdSel 1 SCSI builtin */ -- cgit v1.1 From 4587eee04e2ac7ac3ac9fa2bc164fb6e548f99cd Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 25 Oct 2017 15:58:31 -0500 Subject: SMB3: Validate negotiate request must always be signed According to MS-SMB2 3.2.55 validate_negotiate request must always be signed. Some Windows can fail the request if you send it unsigned See kernel bugzilla bug 197311 CC: Stable Acked-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index ba58af7..5331631 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1979,6 +1979,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, } else iov[0].iov_len = get_rfc1002_length(req) + 4; + /* validate negotiate request must be signed - see MS-SMB2 3.2.5.5 */ + if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO) + req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED; rc = SendReceive2(xid, ses, iov, n_iov, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); -- cgit v1.1 From 80c094a47dd4ea63375e3f60b5e076064f16e857 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 26 Oct 2017 19:35:35 +0200 Subject: Revert "apparmor: add base infastructure for socket mediation" This reverts commit 651e28c5537abb39076d3949fb7618536f1d242e. This caused a regression: "The specific problem is that dnsmasq refuses to start on openSUSE Leap 42.2. The specific cause is that and attempt to open a PF_LOCAL socket gets EACCES. This means that networking doesn't function on a system with a 4.14-rc2 system." Sadly, the developers involved seemed to be in denial for several weeks about this, delaying the revert. This has not been a good release for the security subsystem, and this area needs to change development practices. Reported-and-bisected-by: James Bottomley Tracked-by: Thorsten Leemhuis Cc: John Johansen Cc: Vlastimil Babka Cc: Seth Arnold Signed-off-by: Linus Torvalds --- security/apparmor/.gitignore | 1 - security/apparmor/Makefile | 43 +---- security/apparmor/apparmorfs.c | 1 - security/apparmor/file.c | 30 --- security/apparmor/include/audit.h | 26 +-- security/apparmor/include/net.h | 114 ----------- security/apparmor/include/perms.h | 5 +- security/apparmor/include/policy.h | 13 -- security/apparmor/lib.c | 5 +- security/apparmor/lsm.c | 387 ------------------------------------- security/apparmor/net.c | 184 ------------------ security/apparmor/policy_unpack.c | 47 +---- 12 files changed, 16 insertions(+), 840 deletions(-) delete mode 100644 security/apparmor/include/net.h delete mode 100644 security/apparmor/net.c diff --git a/security/apparmor/.gitignore b/security/apparmor/.gitignore index d5b291e..9cdec70 100644 --- a/security/apparmor/.gitignore +++ b/security/apparmor/.gitignore @@ -1,6 +1,5 @@ # # Generated include files # -net_names.h capability_names.h rlim_names.h diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile index dafdd38..81a3442 100644 --- a/security/apparmor/Makefile +++ b/security/apparmor/Makefile @@ -4,44 +4,11 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \ path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \ - resource.o secid.o file.o policy_ns.o label.o mount.o net.o + resource.o secid.o file.o policy_ns.o label.o mount.o apparmor-$(CONFIG_SECURITY_APPARMOR_HASH) += crypto.o -clean-files := capability_names.h rlim_names.h net_names.h +clean-files := capability_names.h rlim_names.h -# Build a lower case string table of address family names -# Transform lines from -# #define AF_LOCAL 1 /* POSIX name for AF_UNIX */ -# #define AF_INET 2 /* Internet IP Protocol */ -# to -# [1] = "local", -# [2] = "inet", -# -# and build the securityfs entries for the mapping. -# Transforms lines from -# #define AF_INET 2 /* Internet IP Protocol */ -# to -# #define AA_SFS_AF_MASK "local inet" -quiet_cmd_make-af = GEN $@ -cmd_make-af = echo "static const char *address_family_names[] = {" > $@ ;\ - sed $< >>$@ -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \ - 's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\ - echo "};" >> $@ ;\ - printf '%s' '\#define AA_SFS_AF_MASK "' >> $@ ;\ - sed -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \ - 's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/\L\1/p'\ - $< | tr '\n' ' ' | sed -e 's/ $$/"\n/' >> $@ - -# Build a lower case string table of sock type names -# Transform lines from -# SOCK_STREAM = 1, -# to -# [1] = "stream", -quiet_cmd_make-sock = GEN $@ -cmd_make-sock = echo "static const char *sock_type_names[] = {" >> $@ ;\ - sed $^ >>$@ -r -n \ - -e 's/^\tSOCK_([A-Z0-9_]+)[\t]+=[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\ - echo "};" >> $@ # Build a lower case string table of capability names # Transforms lines from @@ -94,7 +61,6 @@ cmd_make-rlim = echo "static const char *const rlim_names[RLIM_NLIMITS] = {" \ tr '\n' ' ' | sed -e 's/ $$/"\n/' >> $@ $(obj)/capability.o : $(obj)/capability_names.h -$(obj)/net.o : $(obj)/net_names.h $(obj)/resource.o : $(obj)/rlim_names.h $(obj)/capability_names.h : $(srctree)/include/uapi/linux/capability.h \ $(src)/Makefile @@ -102,8 +68,3 @@ $(obj)/capability_names.h : $(srctree)/include/uapi/linux/capability.h \ $(obj)/rlim_names.h : $(srctree)/include/uapi/asm-generic/resource.h \ $(src)/Makefile $(call cmd,make-rlim) -$(obj)/net_names.h : $(srctree)/include/linux/socket.h \ - $(srctree)/include/linux/net.h \ - $(src)/Makefile - $(call cmd,make-af) - $(call cmd,make-sock) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 518d592..caaf51d 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -2202,7 +2202,6 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = { AA_SFS_DIR("policy", aa_sfs_entry_policy), AA_SFS_DIR("domain", aa_sfs_entry_domain), AA_SFS_DIR("file", aa_sfs_entry_file), - AA_SFS_DIR("network", aa_sfs_entry_network), AA_SFS_DIR("mount", aa_sfs_entry_mount), AA_SFS_DIR("namespaces", aa_sfs_entry_ns), AA_SFS_FILE_U64("capability", VFS_CAP_FLAGS_MASK), diff --git a/security/apparmor/file.c b/security/apparmor/file.c index db80221..3382518 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -21,7 +21,6 @@ #include "include/context.h" #include "include/file.h" #include "include/match.h" -#include "include/net.h" #include "include/path.h" #include "include/policy.h" #include "include/label.h" @@ -567,32 +566,6 @@ static int __file_path_perm(const char *op, struct aa_label *label, return error; } -static int __file_sock_perm(const char *op, struct aa_label *label, - struct aa_label *flabel, struct file *file, - u32 request, u32 denied) -{ - struct socket *sock = (struct socket *) file->private_data; - int error; - - AA_BUG(!sock); - - /* revalidation due to label out of date. No revocation at this time */ - if (!denied && aa_label_is_subset(flabel, label)) - return 0; - - /* TODO: improve to skip profiles cached in flabel */ - error = aa_sock_file_perm(label, op, request, sock); - if (denied) { - /* TODO: improve to skip profiles checked above */ - /* check every profile in file label to is cached */ - last_error(error, aa_sock_file_perm(flabel, op, request, sock)); - } - if (!error) - update_file_ctx(file_ctx(file), label, request); - - return error; -} - /** * aa_file_perm - do permission revalidation check & audit for @file * @op: operation being checked @@ -637,9 +610,6 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file, error = __file_path_perm(op, label, flabel, file, request, denied); - else if (S_ISSOCK(file_inode(file)->i_mode)) - error = __file_sock_perm(op, label, flabel, file, request, - denied); done: rcu_read_unlock(); diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index ff4316e..620e811 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -121,29 +121,21 @@ struct apparmor_audit_data { /* these entries require a custom callback fn */ struct { struct aa_label *peer; - union { - struct { - kuid_t ouid; - const char *target; - } fs; - struct { - int type, protocol; - struct sock *peer_sk; - void *addr; - int addrlen; - } net; - int signal; - struct { - int rlim; - unsigned long max; - } rlim; - }; + struct { + const char *target; + kuid_t ouid; + } fs; }; struct { struct aa_profile *profile; const char *ns; long pos; } iface; + int signal; + struct { + int rlim; + unsigned long max; + } rlim; struct { const char *src_name; const char *type; diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h deleted file mode 100644 index 140c8efc..0000000 --- a/security/apparmor/include/net.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * AppArmor security module - * - * This file contains AppArmor network mediation definitions. - * - * Copyright (C) 1998-2008 Novell/SUSE - * Copyright 2009-2017 Canonical Ltd. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation, version 2 of the - * License. - */ - -#ifndef __AA_NET_H -#define __AA_NET_H - -#include -#include - -#include "apparmorfs.h" -#include "label.h" -#include "perms.h" -#include "policy.h" - -#define AA_MAY_SEND AA_MAY_WRITE -#define AA_MAY_RECEIVE AA_MAY_READ - -#define AA_MAY_SHUTDOWN AA_MAY_DELETE - -#define AA_MAY_CONNECT AA_MAY_OPEN -#define AA_MAY_ACCEPT 0x00100000 - -#define AA_MAY_BIND 0x00200000 -#define AA_MAY_LISTEN 0x00400000 - -#define AA_MAY_SETOPT 0x01000000 -#define AA_MAY_GETOPT 0x02000000 - -#define NET_PERMS_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CREATE | \ - AA_MAY_SHUTDOWN | AA_MAY_BIND | AA_MAY_LISTEN | \ - AA_MAY_CONNECT | AA_MAY_ACCEPT | AA_MAY_SETATTR | \ - AA_MAY_GETATTR | AA_MAY_SETOPT | AA_MAY_GETOPT) - -#define NET_FS_PERMS (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CREATE | \ - AA_MAY_SHUTDOWN | AA_MAY_CONNECT | AA_MAY_RENAME |\ - AA_MAY_SETATTR | AA_MAY_GETATTR | AA_MAY_CHMOD | \ - AA_MAY_CHOWN | AA_MAY_CHGRP | AA_MAY_LOCK | \ - AA_MAY_MPROT) - -#define NET_PEER_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CONNECT | \ - AA_MAY_ACCEPT) -struct aa_sk_ctx { - struct aa_label *label; - struct aa_label *peer; - struct path path; -}; - -#define SK_CTX(X) ((X)->sk_security) -#define SOCK_ctx(X) SOCK_INODE(X)->i_security -#define DEFINE_AUDIT_NET(NAME, OP, SK, F, T, P) \ - struct lsm_network_audit NAME ## _net = { .sk = (SK), \ - .family = (F)}; \ - DEFINE_AUDIT_DATA(NAME, \ - ((SK) && (F) != AF_UNIX) ? LSM_AUDIT_DATA_NET : \ - LSM_AUDIT_DATA_NONE, \ - OP); \ - NAME.u.net = &(NAME ## _net); \ - aad(&NAME)->net.type = (T); \ - aad(&NAME)->net.protocol = (P) - -#define DEFINE_AUDIT_SK(NAME, OP, SK) \ - DEFINE_AUDIT_NET(NAME, OP, SK, (SK)->sk_family, (SK)->sk_type, \ - (SK)->sk_protocol) - -/* struct aa_net - network confinement data - * @allow: basic network families permissions - * @audit: which network permissions to force audit - * @quiet: which network permissions to quiet rejects - */ -struct aa_net { - u16 allow[AF_MAX]; - u16 audit[AF_MAX]; - u16 quiet[AF_MAX]; -}; - - -extern struct aa_sfs_entry aa_sfs_entry_network[]; - -void audit_net_cb(struct audit_buffer *ab, void *va); -int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, - u32 request, u16 family, int type); -int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family, - int type, int protocol); -static inline int aa_profile_af_sk_perm(struct aa_profile *profile, - struct common_audit_data *sa, - u32 request, - struct sock *sk) -{ - return aa_profile_af_perm(profile, sa, request, sk->sk_family, - sk->sk_type); -} -int aa_sk_perm(const char *op, u32 request, struct sock *sk); - -int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request, - struct socket *sock); - - -static inline void aa_free_net_rules(struct aa_net *new) -{ - /* NOP */ -} - -#endif /* __AA_NET_H */ diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h index af04d5a..2b27bb7 100644 --- a/security/apparmor/include/perms.h +++ b/security/apparmor/include/perms.h @@ -135,10 +135,9 @@ extern struct aa_perms allperms; void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask); -void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names, - u32 mask); +void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask); void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs, - u32 chrsmask, const char * const *names, u32 namesmask); + u32 chrsmask, const char **names, u32 namesmask); void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms); void aa_compute_perms(struct aa_dfa *dfa, unsigned int state, diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 4364088..17fe41a 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -30,7 +30,6 @@ #include "file.h" #include "lib.h" #include "label.h" -#include "net.h" #include "perms.h" #include "resource.h" @@ -112,7 +111,6 @@ struct aa_data { * @policy: general match rules governing policy * @file: The set of rules governing basic file access and domain transitions * @caps: capabilities for the profile - * @net: network controls for the profile * @rlimits: rlimits for the profile * * @dents: dentries for the profiles file entries in apparmorfs @@ -150,7 +148,6 @@ struct aa_profile { struct aa_policydb policy; struct aa_file_rules file; struct aa_caps caps; - struct aa_net net; struct aa_rlimit rlimits; struct aa_loaddata *rawdata; @@ -223,16 +220,6 @@ static inline unsigned int PROFILE_MEDIATES_SAFE(struct aa_profile *profile, return 0; } -static inline unsigned int PROFILE_MEDIATES_AF(struct aa_profile *profile, - u16 AF) { - unsigned int state = PROFILE_MEDIATES(profile, AA_CLASS_NET); - u16 be_af = cpu_to_be16(AF); - - if (!state) - return 0; - return aa_dfa_match_len(profile->policy.dfa, state, (char *) &be_af, 2); -} - /** * aa_get_profile - increment refcount on profile @p * @p: profile (MAYBE NULL) diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 8818621..08ca26b 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -211,8 +211,7 @@ void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask) *str = '\0'; } -void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names, - u32 mask) +void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask) { const char *fmt = "%s"; unsigned int i, perm = 1; @@ -230,7 +229,7 @@ void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names, } void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs, - u32 chrsmask, const char * const *names, u32 namesmask) + u32 chrsmask, const char **names, u32 namesmask) { char str[33]; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 72b915d..1346ee5 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -33,7 +33,6 @@ #include "include/context.h" #include "include/file.h" #include "include/ipc.h" -#include "include/net.h" #include "include/path.h" #include "include/label.h" #include "include/policy.h" @@ -737,368 +736,6 @@ static int apparmor_task_kill(struct task_struct *target, struct siginfo *info, return error; } -/** - * apparmor_sk_alloc_security - allocate and attach the sk_security field - */ -static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t flags) -{ - struct aa_sk_ctx *ctx; - - ctx = kzalloc(sizeof(*ctx), flags); - if (!ctx) - return -ENOMEM; - - SK_CTX(sk) = ctx; - - return 0; -} - -/** - * apparmor_sk_free_security - free the sk_security field - */ -static void apparmor_sk_free_security(struct sock *sk) -{ - struct aa_sk_ctx *ctx = SK_CTX(sk); - - SK_CTX(sk) = NULL; - aa_put_label(ctx->label); - aa_put_label(ctx->peer); - path_put(&ctx->path); - kfree(ctx); -} - -/** - * apparmor_clone_security - clone the sk_security field - */ -static void apparmor_sk_clone_security(const struct sock *sk, - struct sock *newsk) -{ - struct aa_sk_ctx *ctx = SK_CTX(sk); - struct aa_sk_ctx *new = SK_CTX(newsk); - - new->label = aa_get_label(ctx->label); - new->peer = aa_get_label(ctx->peer); - new->path = ctx->path; - path_get(&new->path); -} - -static int aa_sock_create_perm(struct aa_label *label, int family, int type, - int protocol) -{ - AA_BUG(!label); - AA_BUG(in_interrupt()); - - return aa_af_perm(label, OP_CREATE, AA_MAY_CREATE, family, type, - protocol); -} - - -/** - * apparmor_socket_create - check perms before creating a new socket - */ -static int apparmor_socket_create(int family, int type, int protocol, int kern) -{ - struct aa_label *label; - int error = 0; - - label = begin_current_label_crit_section(); - if (!(kern || unconfined(label))) - error = aa_sock_create_perm(label, family, type, protocol); - end_current_label_crit_section(label); - - return error; -} - -/** - * apparmor_socket_post_create - setup the per-socket security struct - * - * Note: - * - kernel sockets currently labeled unconfined but we may want to - * move to a special kernel label - * - socket may not have sk here if created with sock_create_lite or - * sock_alloc. These should be accept cases which will be handled in - * sock_graft. - */ -static int apparmor_socket_post_create(struct socket *sock, int family, - int type, int protocol, int kern) -{ - struct aa_label *label; - - if (kern) { - struct aa_ns *ns = aa_get_current_ns(); - - label = aa_get_label(ns_unconfined(ns)); - aa_put_ns(ns); - } else - label = aa_get_current_label(); - - if (sock->sk) { - struct aa_sk_ctx *ctx = SK_CTX(sock->sk); - - aa_put_label(ctx->label); - ctx->label = aa_get_label(label); - } - aa_put_label(label); - - return 0; -} - -/** - * apparmor_socket_bind - check perms before bind addr to socket - */ -static int apparmor_socket_bind(struct socket *sock, - struct sockaddr *address, int addrlen) -{ - AA_BUG(!sock); - AA_BUG(!sock->sk); - AA_BUG(!address); - AA_BUG(in_interrupt()); - - return aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk); -} - -/** - * apparmor_socket_connect - check perms before connecting @sock to @address - */ -static int apparmor_socket_connect(struct socket *sock, - struct sockaddr *address, int addrlen) -{ - AA_BUG(!sock); - AA_BUG(!sock->sk); - AA_BUG(!address); - AA_BUG(in_interrupt()); - - return aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk); -} - -/** - * apparmor_socket_list - check perms before allowing listen - */ -static int apparmor_socket_listen(struct socket *sock, int backlog) -{ - AA_BUG(!sock); - AA_BUG(!sock->sk); - AA_BUG(in_interrupt()); - - return aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk); -} - -/** - * apparmor_socket_accept - check perms before accepting a new connection. - * - * Note: while @newsock is created and has some information, the accept - * has not been done. - */ -static int apparmor_socket_accept(struct socket *sock, struct socket *newsock) -{ - AA_BUG(!sock); - AA_BUG(!sock->sk); - AA_BUG(!newsock); - AA_BUG(in_interrupt()); - - return aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk); -} - -static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock, - struct msghdr *msg, int size) -{ - AA_BUG(!sock); - AA_BUG(!sock->sk); - AA_BUG(!msg); - AA_BUG(in_interrupt()); - - return aa_sk_perm(op, request, sock->sk); -} - -/** - * apparmor_socket_sendmsg - check perms before sending msg to another socket - */ -static int apparmor_socket_sendmsg(struct socket *sock, - struct msghdr *msg, int size) -{ - return aa_sock_msg_perm(OP_SENDMSG, AA_MAY_SEND, sock, msg, size); -} - -/** - * apparmor_socket_recvmsg - check perms before receiving a message - */ -static int apparmor_socket_recvmsg(struct socket *sock, - struct msghdr *msg, int size, int flags) -{ - return aa_sock_msg_perm(OP_RECVMSG, AA_MAY_RECEIVE, sock, msg, size); -} - -/* revaliation, get/set attr, shutdown */ -static int aa_sock_perm(const char *op, u32 request, struct socket *sock) -{ - AA_BUG(!sock); - AA_BUG(!sock->sk); - AA_BUG(in_interrupt()); - - return aa_sk_perm(op, request, sock->sk); -} - -/** - * apparmor_socket_getsockname - check perms before getting the local address - */ -static int apparmor_socket_getsockname(struct socket *sock) -{ - return aa_sock_perm(OP_GETSOCKNAME, AA_MAY_GETATTR, sock); -} - -/** - * apparmor_socket_getpeername - check perms before getting remote address - */ -static int apparmor_socket_getpeername(struct socket *sock) -{ - return aa_sock_perm(OP_GETPEERNAME, AA_MAY_GETATTR, sock); -} - -/* revaliation, get/set attr, opt */ -static int aa_sock_opt_perm(const char *op, u32 request, struct socket *sock, - int level, int optname) -{ - AA_BUG(!sock); - AA_BUG(!sock->sk); - AA_BUG(in_interrupt()); - - return aa_sk_perm(op, request, sock->sk); -} - -/** - * apparmor_getsockopt - check perms before getting socket options - */ -static int apparmor_socket_getsockopt(struct socket *sock, int level, - int optname) -{ - return aa_sock_opt_perm(OP_GETSOCKOPT, AA_MAY_GETOPT, sock, - level, optname); -} - -/** - * apparmor_setsockopt - check perms before setting socket options - */ -static int apparmor_socket_setsockopt(struct socket *sock, int level, - int optname) -{ - return aa_sock_opt_perm(OP_SETSOCKOPT, AA_MAY_SETOPT, sock, - level, optname); -} - -/** - * apparmor_socket_shutdown - check perms before shutting down @sock conn - */ -static int apparmor_socket_shutdown(struct socket *sock, int how) -{ - return aa_sock_perm(OP_SHUTDOWN, AA_MAY_SHUTDOWN, sock); -} - -/** - * apparmor_socket_sock_recv_skb - check perms before associating skb to sk - * - * Note: can not sleep may be called with locks held - * - * dont want protocol specific in __skb_recv_datagram() - * to deny an incoming connection socket_sock_rcv_skb() - */ -static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) -{ - return 0; -} - - -static struct aa_label *sk_peer_label(struct sock *sk) -{ - struct aa_sk_ctx *ctx = SK_CTX(sk); - - if (ctx->peer) - return ctx->peer; - - return ERR_PTR(-ENOPROTOOPT); -} - -/** - * apparmor_socket_getpeersec_stream - get security context of peer - * - * Note: for tcp only valid if using ipsec or cipso on lan - */ -static int apparmor_socket_getpeersec_stream(struct socket *sock, - char __user *optval, - int __user *optlen, - unsigned int len) -{ - char *name; - int slen, error = 0; - struct aa_label *label; - struct aa_label *peer; - - label = begin_current_label_crit_section(); - peer = sk_peer_label(sock->sk); - if (IS_ERR(peer)) { - error = PTR_ERR(peer); - goto done; - } - slen = aa_label_asxprint(&name, labels_ns(label), peer, - FLAG_SHOW_MODE | FLAG_VIEW_SUBNS | - FLAG_HIDDEN_UNCONFINED, GFP_KERNEL); - /* don't include terminating \0 in slen, it breaks some apps */ - if (slen < 0) { - error = -ENOMEM; - } else { - if (slen > len) { - error = -ERANGE; - } else if (copy_to_user(optval, name, slen)) { - error = -EFAULT; - goto out; - } - if (put_user(slen, optlen)) - error = -EFAULT; -out: - kfree(name); - - } - -done: - end_current_label_crit_section(label); - - return error; -} - -/** - * apparmor_socket_getpeersec_dgram - get security label of packet - * @sock: the peer socket - * @skb: packet data - * @secid: pointer to where to put the secid of the packet - * - * Sets the netlabel socket state on sk from parent - */ -static int apparmor_socket_getpeersec_dgram(struct socket *sock, - struct sk_buff *skb, u32 *secid) - -{ - /* TODO: requires secid support */ - return -ENOPROTOOPT; -} - -/** - * apparmor_sock_graft - Initialize newly created socket - * @sk: child sock - * @parent: parent socket - * - * Note: could set off of SOCK_CTX(parent) but need to track inode and we can - * just set sk security information off of current creating process label - * Labeling of sk for accept case - probably should be sock based - * instead of task, because of the case where an implicitly labeled - * socket is shared by different tasks. - */ -static void apparmor_sock_graft(struct sock *sk, struct socket *parent) -{ - struct aa_sk_ctx *ctx = SK_CTX(sk); - - if (!ctx->label) - ctx->label = aa_get_current_label(); -} - static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme), @@ -1133,30 +770,6 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(getprocattr, apparmor_getprocattr), LSM_HOOK_INIT(setprocattr, apparmor_setprocattr), - LSM_HOOK_INIT(sk_alloc_security, apparmor_sk_alloc_security), - LSM_HOOK_INIT(sk_free_security, apparmor_sk_free_security), - LSM_HOOK_INIT(sk_clone_security, apparmor_sk_clone_security), - - LSM_HOOK_INIT(socket_create, apparmor_socket_create), - LSM_HOOK_INIT(socket_post_create, apparmor_socket_post_create), - LSM_HOOK_INIT(socket_bind, apparmor_socket_bind), - LSM_HOOK_INIT(socket_connect, apparmor_socket_connect), - LSM_HOOK_INIT(socket_listen, apparmor_socket_listen), - LSM_HOOK_INIT(socket_accept, apparmor_socket_accept), - LSM_HOOK_INIT(socket_sendmsg, apparmor_socket_sendmsg), - LSM_HOOK_INIT(socket_recvmsg, apparmor_socket_recvmsg), - LSM_HOOK_INIT(socket_getsockname, apparmor_socket_getsockname), - LSM_HOOK_INIT(socket_getpeername, apparmor_socket_getpeername), - LSM_HOOK_INIT(socket_getsockopt, apparmor_socket_getsockopt), - LSM_HOOK_INIT(socket_setsockopt, apparmor_socket_setsockopt), - LSM_HOOK_INIT(socket_shutdown, apparmor_socket_shutdown), - LSM_HOOK_INIT(socket_sock_rcv_skb, apparmor_socket_sock_rcv_skb), - LSM_HOOK_INIT(socket_getpeersec_stream, - apparmor_socket_getpeersec_stream), - LSM_HOOK_INIT(socket_getpeersec_dgram, - apparmor_socket_getpeersec_dgram), - LSM_HOOK_INIT(sock_graft, apparmor_sock_graft), - LSM_HOOK_INIT(cred_alloc_blank, apparmor_cred_alloc_blank), LSM_HOOK_INIT(cred_free, apparmor_cred_free), LSM_HOOK_INIT(cred_prepare, apparmor_cred_prepare), diff --git a/security/apparmor/net.c b/security/apparmor/net.c deleted file mode 100644 index 33d5443..0000000 --- a/security/apparmor/net.c +++ /dev/null @@ -1,184 +0,0 @@ -/* - * AppArmor security module - * - * This file contains AppArmor network mediation - * - * Copyright (C) 1998-2008 Novell/SUSE - * Copyright 2009-2017 Canonical Ltd. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation, version 2 of the - * License. - */ - -#include "include/apparmor.h" -#include "include/audit.h" -#include "include/context.h" -#include "include/label.h" -#include "include/net.h" -#include "include/policy.h" - -#include "net_names.h" - - -struct aa_sfs_entry aa_sfs_entry_network[] = { - AA_SFS_FILE_STRING("af_mask", AA_SFS_AF_MASK), - { } -}; - -static const char * const net_mask_names[] = { - "unknown", - "send", - "receive", - "unknown", - - "create", - "shutdown", - "connect", - "unknown", - - "setattr", - "getattr", - "setcred", - "getcred", - - "chmod", - "chown", - "chgrp", - "lock", - - "mmap", - "mprot", - "unknown", - "unknown", - - "accept", - "bind", - "listen", - "unknown", - - "setopt", - "getopt", - "unknown", - "unknown", - - "unknown", - "unknown", - "unknown", - "unknown", -}; - - -/* audit callback for net specific fields */ -void audit_net_cb(struct audit_buffer *ab, void *va) -{ - struct common_audit_data *sa = va; - - audit_log_format(ab, " family="); - if (address_family_names[sa->u.net->family]) - audit_log_string(ab, address_family_names[sa->u.net->family]); - else - audit_log_format(ab, "\"unknown(%d)\"", sa->u.net->family); - audit_log_format(ab, " sock_type="); - if (sock_type_names[aad(sa)->net.type]) - audit_log_string(ab, sock_type_names[aad(sa)->net.type]); - else - audit_log_format(ab, "\"unknown(%d)\"", aad(sa)->net.type); - audit_log_format(ab, " protocol=%d", aad(sa)->net.protocol); - - if (aad(sa)->request & NET_PERMS_MASK) { - audit_log_format(ab, " requested_mask="); - aa_audit_perm_mask(ab, aad(sa)->request, NULL, 0, - net_mask_names, NET_PERMS_MASK); - - if (aad(sa)->denied & NET_PERMS_MASK) { - audit_log_format(ab, " denied_mask="); - aa_audit_perm_mask(ab, aad(sa)->denied, NULL, 0, - net_mask_names, NET_PERMS_MASK); - } - } - if (aad(sa)->peer) { - audit_log_format(ab, " peer="); - aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer, - FLAGS_NONE, GFP_ATOMIC); - } -} - - -/* Generic af perm */ -int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, - u32 request, u16 family, int type) -{ - struct aa_perms perms = { }; - - AA_BUG(family >= AF_MAX); - AA_BUG(type < 0 || type >= SOCK_MAX); - - if (profile_unconfined(profile)) - return 0; - - perms.allow = (profile->net.allow[family] & (1 << type)) ? - ALL_PERMS_MASK : 0; - perms.audit = (profile->net.audit[family] & (1 << type)) ? - ALL_PERMS_MASK : 0; - perms.quiet = (profile->net.quiet[family] & (1 << type)) ? - ALL_PERMS_MASK : 0; - aa_apply_modes_to_perms(profile, &perms); - - return aa_check_perms(profile, &perms, request, sa, audit_net_cb); -} - -int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family, - int type, int protocol) -{ - struct aa_profile *profile; - DEFINE_AUDIT_NET(sa, op, NULL, family, type, protocol); - - return fn_for_each_confined(label, profile, - aa_profile_af_perm(profile, &sa, request, family, - type)); -} - -static int aa_label_sk_perm(struct aa_label *label, const char *op, u32 request, - struct sock *sk) -{ - struct aa_profile *profile; - DEFINE_AUDIT_SK(sa, op, sk); - - AA_BUG(!label); - AA_BUG(!sk); - - if (unconfined(label)) - return 0; - - return fn_for_each_confined(label, profile, - aa_profile_af_sk_perm(profile, &sa, request, sk)); -} - -int aa_sk_perm(const char *op, u32 request, struct sock *sk) -{ - struct aa_label *label; - int error; - - AA_BUG(!sk); - AA_BUG(in_interrupt()); - - /* TODO: switch to begin_current_label ???? */ - label = begin_current_label_crit_section(); - error = aa_label_sk_perm(label, op, request, sk); - end_current_label_crit_section(label); - - return error; -} - - -int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request, - struct socket *sock) -{ - AA_BUG(!label); - AA_BUG(!sock); - AA_BUG(!sock->sk); - - return aa_label_sk_perm(label, op, request, sock->sk); -} diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 5a2aec3..4ede87c 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -275,19 +275,6 @@ fail: return 0; } -static bool unpack_u16(struct aa_ext *e, u16 *data, const char *name) -{ - if (unpack_nameX(e, AA_U16, name)) { - if (!inbounds(e, sizeof(u16))) - return 0; - if (data) - *data = le16_to_cpu(get_unaligned((__le16 *) e->pos)); - e->pos += sizeof(u16); - return 1; - } - return 0; -} - static bool unpack_u32(struct aa_ext *e, u32 *data, const char *name) { if (unpack_nameX(e, AA_U32, name)) { @@ -597,7 +584,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) struct aa_profile *profile = NULL; const char *tmpname, *tmpns = NULL, *name = NULL; const char *info = "failed to unpack profile"; - size_t size = 0, ns_len; + size_t ns_len; struct rhashtable_params params = { 0 }; char *key = NULL; struct aa_data *data; @@ -730,38 +717,6 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) goto fail; } - size = unpack_array(e, "net_allowed_af"); - if (size) { - - for (i = 0; i < size; i++) { - /* discard extraneous rules that this kernel will - * never request - */ - if (i >= AF_MAX) { - u16 tmp; - - if (!unpack_u16(e, &tmp, NULL) || - !unpack_u16(e, &tmp, NULL) || - !unpack_u16(e, &tmp, NULL)) - goto fail; - continue; - } - if (!unpack_u16(e, &profile->net.allow[i], NULL)) - goto fail; - if (!unpack_u16(e, &profile->net.audit[i], NULL)) - goto fail; - if (!unpack_u16(e, &profile->net.quiet[i], NULL)) - goto fail; - } - if (!unpack_nameX(e, AA_ARRAYEND, NULL)) - goto fail; - } - if (VERSION_LT(e->version, v7)) { - /* pre v7 policy always allowed these */ - profile->net.allow[AF_UNIX] = 0xffff; - profile->net.allow[AF_NETLINK] = 0xffff; - } - if (unpack_nameX(e, AA_STRUCT, "policydb")) { /* generic policy dfa - optional and may be NULL */ info = "failed to unpack policydb"; -- cgit v1.1 From 8f63fc2bc64716c16e269ab951130eeda78fe37a Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 19 Oct 2017 13:54:06 +0800 Subject: drm/i915/gvt: properly check per_ctx bb valid state Need to check valid state for per_ctx bb and bypass batch buffer combine for scan if necessary. Otherwise adding invalid MI batch buffer start cmd for per_ctx bb will cause scan failure, which is taken as -EFAULT now so vGPU would be put in failsafe. This trys to fix that by checking per_ctx bb valid state. Also remove old invalid WARNING that indirect ctx bb shouldn't depend on valid per_ctx bb. Signed-off-by: Zhenyu Wang Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 3 +++ drivers/gpu/drm/i915/gvt/execlist.c | 3 +-- drivers/gpu/drm/i915/gvt/scheduler.h | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 21c36e2..d4726a3 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2723,6 +2723,9 @@ static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) uint32_t per_ctx_start[CACHELINE_DWORDS] = {0}; unsigned char *bb_start_sva; + if (!wa_ctx->per_ctx.valid) + return 0; + per_ctx_start[0] = 0x18800001; per_ctx_start[1] = wa_ctx->per_ctx.guest_gma; diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 91b4300..e5320b4 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -701,8 +701,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, CACHELINE_BYTES; workload->wa_ctx.per_ctx.guest_gma = per_ctx & PER_CTX_ADDR_MASK; - - WARN_ON(workload->wa_ctx.indirect_ctx.size && !(per_ctx & 0x1)); + workload->wa_ctx.per_ctx.valid = per_ctx & 1; } if (emulate_schedule_in) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 0d431a9..93a49eb 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -68,6 +68,7 @@ struct shadow_indirect_ctx { struct shadow_per_ctx { unsigned long guest_gma; unsigned long shadow_gma; + unsigned valid; }; struct intel_shadow_wa_ctx { -- cgit v1.1 From edee7ecdb4d7311f351feaeb53e269f416bb1b57 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sat, 30 Sep 2017 17:32:16 +0800 Subject: drm/i915/gvt: Refine MMIO_RING_F() Inspect if the host has VCS2 ring by host i915 macro in MMIO_RING_F(). Also this helps on reducing some LOCs. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 44 ++----------------------------------- drivers/gpu/drm/i915/gvt/reg.h | 3 --- 2 files changed, 2 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 2294466..812f411 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1589,6 +1589,8 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu, MMIO_F(prefix(BLT_RING_BASE), s, f, am, rm, d, r, w); \ MMIO_F(prefix(GEN6_BSD_RING_BASE), s, f, am, rm, d, r, w); \ MMIO_F(prefix(VEBOX_RING_BASE), s, f, am, rm, d, r, w); \ + if (HAS_BSD2(dev_priv)) \ + MMIO_F(prefix(GEN8_BSD2_RING_BASE), s, f, am, rm, d, r, w); \ } while (0) #define MMIO_RING_D(prefix, d) \ @@ -1636,7 +1638,6 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) #define RING_REG(base) (base + 0x6c) MMIO_RING_DFH(RING_REG, D_ALL, 0, instdone_mmio_read, NULL); - MMIO_DH(RING_REG(GEN8_BSD2_RING_BASE), D_ALL, instdone_mmio_read, NULL); #undef RING_REG MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, instdone_mmio_read, NULL); @@ -2411,9 +2412,6 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) struct drm_i915_private *dev_priv = gvt->dev_priv; int ret; - MMIO_DFH(RING_IMR(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, NULL, - intel_vgpu_reg_imr_handler); - MMIO_DH(GEN8_GT_IMR(0), D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler); MMIO_DH(GEN8_GT_IER(0), D_BDW_PLUS, NULL, intel_vgpu_reg_ier_handler); MMIO_DH(GEN8_GT_IIR(0), D_BDW_PLUS, NULL, intel_vgpu_reg_iir_handler); @@ -2476,68 +2474,33 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DH(GEN8_MASTER_IRQ, D_BDW_PLUS, NULL, intel_vgpu_reg_master_irq_handler); - MMIO_DFH(RING_HWSTAM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x1c134, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - - MMIO_DFH(RING_TAIL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, - NULL, NULL); - MMIO_DFH(RING_HEAD(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_CMD_ACCESS, NULL, NULL); - MMIO_GM_RDR(RING_START(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); - MMIO_DFH(RING_CTL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, - NULL, NULL); - MMIO_DFH(RING_ACTHD(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(RING_ACTHD_UDW(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x1c29c, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, - ring_mode_mmio_write); - MMIO_DFH(RING_MI_MODE(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(RING_INSTPM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(RING_TIMESTAMP(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, - ring_timestamp_mmio_read, NULL); - MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); #define RING_REG(base) (base + 0xd0) MMIO_RING_F(RING_REG, 4, F_RO, 0, ~_MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET), D_BDW_PLUS, NULL, ring_reset_ctl_write); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO, 0, - ~_MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET), D_BDW_PLUS, NULL, - ring_reset_ctl_write); #undef RING_REG #define RING_REG(base) (base + 0x230) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, 0, NULL, elsp_mmio_write); - MMIO_DH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, elsp_mmio_write); #undef RING_REG #define RING_REG(base) (base + 0x234) MMIO_RING_F(RING_REG, 8, F_RO | F_CMD_ACCESS, 0, ~0, D_BDW_PLUS, NULL, NULL); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO | F_CMD_ACCESS, 0, - ~0LL, D_BDW_PLUS, NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x244) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, - NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x370) MMIO_RING_F(RING_REG, 48, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 48, F_RO, 0, ~0, D_BDW_PLUS, - NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x3a0) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_MODE_MASK, NULL, NULL); - MMIO_DFH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_MODE_MASK, NULL, NULL); #undef RING_REG MMIO_D(PIPEMISC(PIPE_A), D_BDW_PLUS); @@ -2557,11 +2520,9 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) #define RING_REG(base) (base + 0x270) MMIO_RING_F(RING_REG, 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL); #undef RING_REG MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL); - MMIO_GM_RDR(RING_HWS_PGA(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); @@ -2849,7 +2810,6 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(0x65f08, D_SKL | D_KBL); MMIO_D(0x320f0, D_SKL | D_KBL); - MMIO_DFH(_REG_VCS2_EXCC, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_D(0x70034, D_SKL_PLUS); MMIO_D(0x71034, D_SKL_PLUS); MMIO_D(0x72034, D_SKL_PLUS); diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h index fbd023a..7d01c77 100644 --- a/drivers/gpu/drm/i915/gvt/reg.h +++ b/drivers/gpu/drm/i915/gvt/reg.h @@ -54,9 +54,6 @@ #define VGT_SPRSTRIDE(pipe) _PIPE(pipe, _SPRA_STRIDE, _PLANE_STRIDE_2_B) -#define _REG_VECS_EXCC 0x1A028 -#define _REG_VCS2_EXCC 0x1c028 - #define _REG_701C0(pipe, plane) (0x701c0 + pipe * 0x1000 + (plane - 1) * 0x100) #define _REG_701C4(pipe, plane) (0x701c4 + pipe * 0x1000 + (plane - 1) * 0x100) -- cgit v1.1 From 20a2bcdec5071f78bebe48c5eecdb89de6e96acb Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Sat, 14 Oct 2017 06:34:46 +0800 Subject: drm/i915/gvt: Extract mmio_read_from_hw() common function The mmio read handler for ring timestmap / instdone register are same as reading hw value directly. Extract it as common function to reduce code duplications. Signed-off-by: Xiong Zhang Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 812f411..c24341f 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1429,18 +1429,7 @@ static int skl_lcpll_write(struct intel_vgpu *vgpu, unsigned int offset, return 0; } -static int ring_timestamp_mmio_read(struct intel_vgpu *vgpu, - unsigned int offset, void *p_data, unsigned int bytes) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - - mmio_hw_access_pre(dev_priv); - vgpu_vreg(vgpu, offset) = I915_READ(_MMIO(offset)); - mmio_hw_access_post(dev_priv); - return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); -} - -static int instdone_mmio_read(struct intel_vgpu *vgpu, +static int mmio_read_from_hw(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; @@ -1637,9 +1626,9 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) #undef RING_REG #define RING_REG(base) (base + 0x6c) - MMIO_RING_DFH(RING_REG, D_ALL, 0, instdone_mmio_read, NULL); + MMIO_RING_DFH(RING_REG, D_ALL, 0, mmio_read_from_hw, NULL); #undef RING_REG - MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, instdone_mmio_read, NULL); + MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, mmio_read_from_hw, NULL); MMIO_GM_RDR(0x2148, D_ALL, NULL, NULL); MMIO_GM_RDR(CCID, D_ALL, NULL, NULL); @@ -1663,9 +1652,9 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_TIMESTAMP, D_ALL, F_CMD_ACCESS, - ring_timestamp_mmio_read, NULL); + mmio_read_from_hw, NULL); MMIO_RING_DFH(RING_TIMESTAMP_UDW, D_ALL, F_CMD_ACCESS, - ring_timestamp_mmio_read, NULL); + mmio_read_from_hw, NULL); MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK | F_CMD_ACCESS, -- cgit v1.1 From 894e287b3dcc8bfc8d974f883dab3b5c66344089 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Sat, 14 Oct 2017 06:34:47 +0800 Subject: drm/i915/gvt: Adding ACTHD mmio read handler When a workload is too heavy to finish it in gpu hang check timer intervals(1.5), gpu hang check function will check ACTHD register value to decide whether gpu is real dead or not. On real hw, ACTHD is updated by HW when workload is running, then host kernel won't think it is gpu hang. while guest kernel always read a constant ACTHD value as GVT doesn't supply ACTHD emulate handler, then guest kernel detects a fake gpu hang. To remove such guest fake gpu hang, this patch supply ACTHD mmio read handler which read real HW ACTHD register directly. Signed-off-by: Xiong Zhang Signed-off-by: Zhi Wang Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/b4c9a097-3e62-124e-6856-b0c37764df7b@intel.com --- drivers/gpu/drm/i915/gvt/handlers.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index c24341f..a5bed2e 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1638,7 +1638,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_RING_DFH(RING_TAIL, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_HEAD, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_CTL, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_RING_DFH(RING_ACTHD, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_ACTHD, D_ALL, F_CMD_ACCESS, mmio_read_from_hw, NULL); MMIO_RING_GM_RDR(RING_START, D_ALL, NULL, NULL); /* RING MODE */ @@ -2463,7 +2463,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DH(GEN8_MASTER_IRQ, D_BDW_PLUS, NULL, intel_vgpu_reg_master_irq_handler); - MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, + mmio_read_from_hw, NULL); #define RING_REG(base) (base + 0xd0) MMIO_RING_F(RING_REG, 4, F_RO, 0, -- cgit v1.1 From a494ee6cfd771c2b37721a18a1c9209b61bdf81d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 5 Oct 2017 14:56:53 +0200 Subject: arm/xen: don't inclide rwlock.h directly. rwlock.h should not be included directly. Instead linux/splinlock.h should be included. One thing it does is to break the RT build. Cc: Stefano Stabellini Cc: xen-devel@lists.xenproject.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Stefano Stabellini Signed-off-by: Boris Ostrovsky --- arch/arm/xen/p2m.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index e71eefa..0641ba5 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.1 From 8fc669ea1639f3f6fb2c3a4a4a1e31d975d0e254 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 26 Oct 2017 12:31:54 +0200 Subject: maintainers: drop Chris Wright from pvops Mails to chrisw@sous-sol.org are not deliverable since several months. Drop him as PARAVIRT_OPS maintainer. Signed-off-by: Juergen Gross Acked-by: Chris Wright Signed-off-by: Juergen Gross --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2281af4..7ebc2a6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10156,7 +10156,6 @@ F: Documentation/parport*.txt PARAVIRT_OPS INTERFACE M: Juergen Gross -M: Chris Wright M: Alok Kataria M: Rusty Russell L: virtualization@lists.linux-foundation.org -- cgit v1.1 From 90edaac62729d3b9cbb97756261a0049a7fdd6a0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 27 Oct 2017 10:03:13 +0200 Subject: Revert "x86/mm: Limit mmap() of /dev/mem to valid physical addresses" This reverts commit ce56a86e2ade45d052b3228cdfebe913a1ae7381. There's unanticipated interaction with some boot parameters like 'mem=', which now cause the new checks via valid_mmap_phys_addr_range() to be too restrictive, crashing a Qemu bootup in fact, as reported by Fengguang Wu. So while the motivation of the change is still entirely valid, we need a few more rounds of testing to get it right - it's way too late after -rc6, so revert it for now. Reported-by: Fengguang Wu Signed-off-by: Ingo Molnar Acked-by: Craig Bergstrom Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: dsafonov@virtuozzo.com Cc: kirill.shutemov@linux.intel.com Cc: mhocko@suse.com Cc: oleg@redhat.com Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 4 ---- arch/x86/mm/mmap.c | 12 ------------ 2 files changed, 16 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 322d25a..c40a95c 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -110,10 +110,6 @@ build_mmio_write(__writeq, "q", unsigned long, "r", ) #endif -#define ARCH_HAS_VALID_PHYS_ADDR_RANGE -extern int valid_phys_addr_range(phys_addr_t addr, size_t size); -extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); - /** * virt_to_phys - map virtual addresses to physical * @address: address to remap diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 320c623..a996798 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -174,15 +174,3 @@ const char *arch_vma_name(struct vm_area_struct *vma) return "[mpx]"; return NULL; } - -int valid_phys_addr_range(phys_addr_t addr, size_t count) -{ - return addr + count <= __pa(high_memory); -} - -int valid_mmap_phys_addr_range(unsigned long pfn, size_t count) -{ - phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT; - - return valid_phys_addr_range(addr, count); -} -- cgit v1.1 From 4894ac6b6c25b2693dc21c611621dc9fd21f4090 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Thu, 26 Oct 2017 09:51:33 +0100 Subject: net: stmmac: dwc-qos-eth: Fix typo in DT bindings parsing According to DT bindings documentation we are expecting a property called "snps,read-requests" but we are parsing instead a property called "read,read-requests". This is clearly a typo. Fix it. Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 5efef80..3256e5c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -74,7 +74,7 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, plat_dat->axi->axi_wr_osr_lmt--; } - if (of_property_read_u32(np, "read,read-requests", + if (of_property_read_u32(np, "snps,read-requests", &plat_dat->axi->axi_rd_osr_lmt)) { /** * Since the register has a reset value of 1, if property -- cgit v1.1 From 6d9f0790af8d33476f936ac84a07cac42f808f6c Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Thu, 26 Oct 2017 10:07:12 +0100 Subject: net: stmmac: First Queue must always be in DCB mode According to DWMAC databook the first queue operating mode must always be in DCB. As MTL_QUEUE_DCB = 1, we need to always set the first queue operating mode to DCB otherwise driver will think that queue is in AVB mode (because MTL_QUEUE_AVB = 0). Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 8a280b4..6383695 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -150,6 +150,13 @@ static void stmmac_mtl_setup(struct platform_device *pdev, plat->rx_queues_to_use = 1; plat->tx_queues_to_use = 1; + /* First Queue must always be in DCB mode. As MTL_QUEUE_DCB = 1 we need + * to always set this, otherwise Queue will be classified as AVB + * (because MTL_QUEUE_AVB = 0). + */ + plat->rx_queues_cfg[0].mode_to_use = MTL_QUEUE_DCB; + plat->tx_queues_cfg[0].mode_to_use = MTL_QUEUE_DCB; + rx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-rx-config", 0); if (!rx_node) return; -- cgit v1.1 From f3594f0a7ea36661d7fd942facd7f31a64245f1a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 26 Oct 2017 19:19:56 +0800 Subject: ipip: only increase err_count for some certain type icmp in ipip_err t->err_count is used to count the link failure on tunnel and an err will be reported to user socket in tx path if t->err_count is not 0. udp socket could even return EHOSTUNREACH to users. Since commit fd58156e456d ("IPIP: Use ip-tunneling code.") removed the 'switch check' for icmp type in ipip_err(), err_count would be increased by the icmp packet with ICMP_EXC_FRAGTIME code. an link failure would be reported out due to this. In Jianlin's case, when receiving ICMP_EXC_FRAGTIME a icmp packet, udp netperf failed with the err: send_data: data send error: No route to host (errno 113) We expect this error reported from tunnel to socket when receiving some certain type icmp, but not ICMP_EXC_FRAGTIME, ICMP_SR_FAILED or ICMP_PARAMETERPROB ones. This patch is to bring 'switch check' for icmp type back to ipip_err so that it only reports link failure for the right type icmp, just as in ipgre_err() and ipip6_err(). Fixes: fd58156e456d ("IPIP: Use ip-tunneling code.") Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ipip.c | 59 ++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index fb1ad22..cdd6273 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly; static int ipip_err(struct sk_buff *skb, u32 info) { - -/* All the routers (except for Linux) return only - 8 bytes of packet payload. It means, that precise relaying of - ICMP in the real Internet is absolutely infeasible. - */ + /* All the routers (except for Linux) return only + * 8 bytes of packet payload. It means, that precise relaying of + * ICMP in the real Internet is absolutely infeasible. + */ struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); const struct iphdr *iph = (const struct iphdr *)skb->data; - struct ip_tunnel *t; - int err; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; + struct ip_tunnel *t; + int err = 0; + + switch (type) { + case ICMP_DEST_UNREACH: + switch (code) { + case ICMP_SR_FAILED: + /* Impossible event. */ + goto out; + default: + /* All others are translated to HOST_UNREACH. + * rfc2003 contains "deep thoughts" about NET_UNREACH, + * I believe they are just ether pollution. --ANK + */ + break; + } + break; + + case ICMP_TIME_EXCEEDED: + if (code != ICMP_EXC_TTL) + goto out; + break; + + case ICMP_REDIRECT: + break; + + default: + goto out; + } - err = -ENOENT; t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->daddr, iph->saddr, 0); - if (!t) + if (!t) { + err = -ENOENT; goto out; + } if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, iph->protocol, 0); - err = 0; + ipv4_update_pmtu(skb, net, info, t->parms.link, 0, + iph->protocol, 0); goto out; } if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, - iph->protocol, 0); - err = 0; + ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0); goto out; } - if (t->parms.iph.daddr == 0) + if (t->parms.iph.daddr == 0) { + err = -ENOENT; goto out; + } - err = 0; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) goto out; -- cgit v1.1 From f8d20b46ce55cf40afb30dcef6d9288f7ef46d9b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 26 Oct 2017 19:23:27 +0800 Subject: ip6_gre: only increase err_count for some certain type icmpv6 in ip6gre_err The similar fix in patch 'ipip: only increase err_count for some certain type icmp in ipip_err' is needed for ip6gre_err. In Jianlin's case, udp netperf broke even when receiving a TooBig icmpv6 packet. Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 1602b49..fb595e8 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -408,13 +408,16 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, case ICMPV6_DEST_UNREACH: net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", t->parms.name); - break; + if (code != ICMPV6_PORT_UNREACH) + break; + return; case ICMPV6_TIME_EXCEED: if (code == ICMPV6_EXC_HOPLIMIT) { net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", t->parms.name); + break; } - break; + return; case ICMPV6_PARAMPROB: teli = 0; if (code == ICMPV6_HDR_FIELD) @@ -430,7 +433,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n", t->parms.name); } - break; + return; case ICMPV6_PKT_TOOBIG: mtu = be32_to_cpu(info) - offset - t->tun_hlen; if (t->dev->type == ARPHRD_ETHER) @@ -438,7 +441,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; t->dev->mtu = mtu; - break; + return; } if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) -- cgit v1.1 From 8aec4959d832bae0889a8e2f348973b5e4abffef Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 26 Oct 2017 19:27:17 +0800 Subject: ip6_gre: update dst pmtu if dev mtu has been updated by toobig in __gre6_xmit When receiving a Toobig icmpv6 packet, ip6gre_err would just set tunnel dev's mtu, that's not enough. For skb_dst(skb)'s pmtu may still be using the old value, it has no chance to be updated with tunnel dev's mtu. Jianlin found this issue by reducing route's mtu while running netperf, the performance went to 0. ip6ip6 and ip4ip6 tunnel can work well with this, as they lookup the upper dst and update_pmtu it's pmtu or icmpv6_send a Toobig to upper socket after setting tunnel dev's mtu. We couldn't do that for ip6_gre, as gre's inner packet could be any protocol, it's difficult to handle them (like lookup upper dst) in a good way. So this patch is to fix it by updating skb_dst(skb)'s pmtu when dev->mtu < skb_dst(skb)'s pmtu in tx path. It's safe to do this update there, as usually dev->mtu <= skb_dst(skb)'s pmtu and no performance regression can be caused by this. Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index fb595e8..59c121b 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -503,8 +503,8 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, __u32 *pmtu, __be16 proto) { struct ip6_tnl *tunnel = netdev_priv(dev); - __be16 protocol = (dev->type == ARPHRD_ETHER) ? - htons(ETH_P_TEB) : proto; + struct dst_entry *dst = skb_dst(skb); + __be16 protocol; if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -518,9 +518,14 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, tunnel->o_seqno++; /* Push GRE header. */ + protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); + /* TooBig packet may have updated dst->dev's mtu */ + if (dst && dst_mtu(dst) > dst->dev->mtu) + dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); + return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, NEXTHDR_GRE); } -- cgit v1.1 From a50829479f58416a013a4ccca791336af3c584c7 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 23 Oct 2017 16:46:00 -0700 Subject: Input: gtco - fix potential out-of-bound access parse_hid_report_descriptor() has a while (i < length) loop, which only guarantees that there's at least 1 byte in the buffer, but the loop body can read multiple bytes which causes out-of-bounds access. Reported-by: Andrey Konovalov Reviewed-by: Andrey Konovalov Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/gtco.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c index b796e89..4b8b9d7 100644 --- a/drivers/input/tablet/gtco.c +++ b/drivers/input/tablet/gtco.c @@ -230,13 +230,17 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report, /* Walk this report and pull out the info we need */ while (i < length) { - prefix = report[i]; - - /* Skip over prefix */ - i++; + prefix = report[i++]; /* Determine data size and save the data in the proper variable */ - size = PREF_SIZE(prefix); + size = (1U << PREF_SIZE(prefix)) >> 1; + if (i + size > length) { + dev_err(ddev, + "Not enough data (need %d, have %d)\n", + i + size, length); + break; + } + switch (size) { case 1: data = report[i]; @@ -244,8 +248,7 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report, case 2: data16 = get_unaligned_le16(&report[i]); break; - case 3: - size = 4; + case 4: data32 = get_unaligned_le32(&report[i]); break; } -- cgit v1.1 From 3e64fcbdbd10e46dede502d507dbcc104837cd59 Mon Sep 17 00:00:00 2001 From: Andrew Duggan Date: Wed, 25 Oct 2017 09:30:16 -0700 Subject: Input: synaptics-rmi4 - limit the range of what GPIOs are buttons By convention the first 6 bits of F30 Ctrl 2 and 3 are used to signify GPIOs which are connected to buttons. Additional GPIOs may be used as input GPIOs to signal the touch controller of some event (ie disable touchpad). These additional GPIOs may meet the criteria of a button in rmi_f30_is_valid_button() but should not be considered buttons. This patch limits the GPIOs which are mapped to buttons to just the first 6. Signed-off-by: Andrew Duggan Reported-by: Daniel Martin Tested-by: Daniel Martin Acked-By: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_f30.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/input/rmi4/rmi_f30.c b/drivers/input/rmi4/rmi_f30.c index 34dfee5..82e0f0d 100644 --- a/drivers/input/rmi4/rmi_f30.c +++ b/drivers/input/rmi4/rmi_f30.c @@ -232,9 +232,10 @@ static int rmi_f30_map_gpios(struct rmi_function *fn, unsigned int trackstick_button = BTN_LEFT; bool button_mapped = false; int i; + int button_count = min_t(u8, f30->gpioled_count, TRACKSTICK_RANGE_END); f30->gpioled_key_map = devm_kcalloc(&fn->dev, - f30->gpioled_count, + button_count, sizeof(f30->gpioled_key_map[0]), GFP_KERNEL); if (!f30->gpioled_key_map) { @@ -242,7 +243,7 @@ static int rmi_f30_map_gpios(struct rmi_function *fn, return -ENOMEM; } - for (i = 0; i < f30->gpioled_count; i++) { + for (i = 0; i < button_count; i++) { if (!rmi_f30_is_valid_button(i, f30->ctrl)) continue; -- cgit v1.1 From a0cb2b5c390151837b08e5f7bca4a6ecddbcd39c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 27 Oct 2017 20:35:31 -0700 Subject: Fix tracing sample code warning. Commit 6575257c60e1 ("tracing/samples: Fix creation and deletion of simple_thread_fn creation") introduced a new warning due to using a boolean as a counter. Just make it "int". Fixes: 6575257c60e1 ("tracing/samples: Fix creation and deletion of simple_thread_fn creation") Cc: Steven Rostedt Signed-off-by: Linus Torvalds --- samples/trace_events/trace-events-sample.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index 446beb7..5522692 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -78,7 +78,7 @@ static int simple_thread_fn(void *arg) } static DEFINE_MUTEX(thread_mutex); -static bool simple_thread_cnt; +static int simple_thread_cnt; int foo_bar_reg(void) { -- cgit v1.1 From 63b9ab65bd76e5de6479bb14b4014b64aa1a317a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 27 Oct 2017 11:05:44 +0800 Subject: tuntap: properly align skb->head before building skb An unaligned alloc_frag->offset caused by previous allocation will result an unaligned skb->head. This will lead unaligned skb_shared_info and then unaligned dataref which requires to be aligned for accessing on some architecture. Fix this by aligning alloc_frag->offset before the frag refilling. Fixes: 0bbd7dad34f8 ("tun: make tun_build_skb() thread safe") Cc: Eric Dumazet Cc: Willem de Bruijn Cc: Wei Wei Cc: Dmitry Vyukov Cc: Mark Rutland Reported-by: Wei Wei Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b9973fb..5550f56 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1286,6 +1286,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, buflen += SKB_DATA_ALIGN(len + pad); rcu_read_unlock(); + alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL))) return ERR_PTR(-ENOMEM); -- cgit v1.1 From ee1836aec4f5a977c1699a311db4d9027ef21ac8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:21:40 -0700 Subject: tcp: refresh tp timestamp before tcp_mtu_probe() In the unlikely event tcp_mtu_probe() is sending a packet, we want tp->tcp_mstamp being as accurate as possible. This means we need to call tcp_mstamp_refresh() a bit earlier in tcp_write_xmit(). Fixes: 385e20706fac ("tcp: use tp->tcp_mstamp in output path") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1151870..ae60dd3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2239,6 +2239,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, sent_pkts = 0; + tcp_mstamp_refresh(tp); if (!push_one) { /* Do MTU probing. */ result = tcp_mtu_probe(sk); @@ -2250,7 +2251,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } max_segs = tcp_tso_segs(sk, mss_now); - tcp_mstamp_refresh(tp); while ((skb = tcp_send_head(sk))) { unsigned int limit; -- cgit v1.1 From dea6e19f4ef746aa18b4c33d1a7fed54356796ed Mon Sep 17 00:00:00 2001 From: Girish Moodalbail Date: Fri, 27 Oct 2017 00:00:16 -0700 Subject: tap: reference to KVA of an unloaded module causes kernel panic The commit 9a393b5d5988 ("tap: tap as an independent module") created a separate tap module that implements tap functionality and exports interfaces that will be used by macvtap and ipvtap modules to create create respective tap devices. However, that patch introduced a regression wherein the modules macvtap and ipvtap can be removed (through modprobe -r) while there are applications using the respective /dev/tapX devices. These applications cause kernel to hold reference to /dev/tapX through 'struct cdev macvtap_cdev' and 'struct cdev ipvtap_dev' defined in macvtap and ipvtap modules respectively. So, when the application is later closed the kernel panics because we are referencing KVA that is present in the unloaded modules. ----------8<------- Example ----------8<---------- $ sudo ip li add name mv0 link enp7s0 type macvtap $ sudo ip li show mv0 |grep mv0| awk -e '{print $1 $2}' 14:mv0@enp7s0: $ cat /dev/tap14 & $ lsmod |egrep -i 'tap|vlan' macvtap 16384 0 macvlan 24576 1 macvtap tap 24576 3 macvtap $ sudo modprobe -r macvtap $ fg cat /dev/tap14 ^C <...system panics...> BUG: unable to handle kernel paging request at ffffffffa038c500 IP: cdev_put+0xf/0x30 ----------8<-----------------8<---------- The fix is to set cdev.owner to the module that creates the tap device (either macvtap or ipvtap). With this set, the operations (in fs/char_dev.c) on char device holds and releases the module through cdev_get() and cdev_put() and will not allow the module to unload prematurely. Fixes: 9a393b5d5988ea4e (tap: tap as an independent module) Signed-off-by: Girish Moodalbail Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvtap.c | 4 ++-- drivers/net/macvtap.c | 4 ++-- drivers/net/tap.c | 5 +++-- include/linux/if_tap.h | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c index 5dea206..0bcc07f 100644 --- a/drivers/net/ipvlan/ipvtap.c +++ b/drivers/net/ipvlan/ipvtap.c @@ -197,8 +197,8 @@ static int ipvtap_init(void) { int err; - err = tap_create_cdev(&ipvtap_cdev, &ipvtap_major, "ipvtap"); - + err = tap_create_cdev(&ipvtap_cdev, &ipvtap_major, "ipvtap", + THIS_MODULE); if (err) goto out1; diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index c2d0ea2..cba5cb3 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -204,8 +204,8 @@ static int macvtap_init(void) { int err; - err = tap_create_cdev(&macvtap_cdev, &macvtap_major, "macvtap"); - + err = tap_create_cdev(&macvtap_cdev, &macvtap_major, "macvtap", + THIS_MODULE); if (err) goto out1; diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 98ee6cc..1b10fcc 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1249,8 +1249,8 @@ static int tap_list_add(dev_t major, const char *device_name) return 0; } -int tap_create_cdev(struct cdev *tap_cdev, - dev_t *tap_major, const char *device_name) +int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major, + const char *device_name, struct module *module) { int err; @@ -1259,6 +1259,7 @@ int tap_create_cdev(struct cdev *tap_cdev, goto out1; cdev_init(tap_cdev, &tap_fops); + tap_cdev->owner = module; err = cdev_add(tap_cdev, *tap_major, TAP_NUM_DEVS); if (err) goto out2; diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 4837157..9ae41cd 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -73,8 +73,8 @@ void tap_del_queues(struct tap_dev *tap); int tap_get_minor(dev_t major, struct tap_dev *tap); void tap_free_minor(dev_t major, struct tap_dev *tap); int tap_queue_resize(struct tap_dev *tap); -int tap_create_cdev(struct cdev *tap_cdev, - dev_t *tap_major, const char *device_name); +int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major, + const char *device_name, struct module *module); void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev); #endif /*_LINUX_IF_TAP_H_*/ -- cgit v1.1 From ea6789980fdaa610d7eb63602c746bf6ec70cd2b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 11 Oct 2017 23:32:27 +0100 Subject: assoc_array: Fix a buggy node-splitting case This fixes CVE-2017-12193. Fix a case in the assoc_array implementation in which a new leaf is added that needs to go into a node that happens to be full, where the existing leaves in that node cluster together at that level to the exclusion of new leaf. What needs to happen is that the existing leaves get moved out to a new node, N1, at level + 1 and the existing node needs replacing with one, N0, that has pointers to the new leaf and to N1. The code that tries to do this gets this wrong in two ways: (1) The pointer that should've pointed from N0 to N1 is set to point recursively to N0 instead. (2) The backpointer from N0 needs to be set correctly in the case N0 is either the root node or reached through a shortcut. Fix this by removing this path and using the split_node path instead, which achieves the same end, but in a more general way (thanks to Eric Biggers for spotting the redundancy). The problem manifests itself as: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: assoc_array_apply_edit+0x59/0xe5 Fixes: 3cb989501c26 ("Add a generic associative array implementation.") Reported-and-tested-by: WU Fan Signed-off-by: David Howells Cc: stable@vger.kernel.org [v3.13-rc1+] Signed-off-by: Linus Torvalds --- lib/assoc_array.c | 51 +++++++++++++++++---------------------------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 155c55d..4e53be8 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -598,21 +598,31 @@ static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) goto all_leaves_cluster_together; - /* Otherwise we can just insert a new node ahead of the old - * one. + /* Otherwise all the old leaves cluster in the same slot, but + * the new leaf wants to go into a different slot - so we + * create a new node (n0) to hold the new leaf and a pointer to + * a new node (n1) holding all the old leaves. + * + * This can be done by falling through to the node splitting + * path. */ - goto present_leaves_cluster_but_not_new_leaf; + pr_devel("present leaves cluster but not new leaf\n"); } split_node: pr_devel("split node\n"); - /* We need to split the current node; we know that the node doesn't - * simply contain a full set of leaves that cluster together (it - * contains meta pointers and/or non-clustering leaves). + /* We need to split the current node. The node must contain anything + * from a single leaf (in the one leaf case, this leaf will cluster + * with the new leaf) and the rest meta-pointers, to all leaves, some + * of which may cluster. + * + * It won't contain the case in which all the current leaves plus the + * new leaves want to cluster in the same slot. * * We need to expel at least two leaves out of a set consisting of the - * leaves in the node and the new leaf. + * leaves in the node and the new leaf. The current meta pointers can + * just be copied as they shouldn't cluster with any of the leaves. * * We need a new node (n0) to replace the current one and a new node to * take the expelled nodes (n1). @@ -717,33 +727,6 @@ found_slot_for_multiple_occupancy: pr_devel("<--%s() = ok [split node]\n", __func__); return true; -present_leaves_cluster_but_not_new_leaf: - /* All the old leaves cluster in the same slot, but the new leaf wants - * to go into a different slot, so we create a new node to hold the new - * leaf and a pointer to a new node holding all the old leaves. - */ - pr_devel("present leaves cluster but not new leaf\n"); - - new_n0->back_pointer = node->back_pointer; - new_n0->parent_slot = node->parent_slot; - new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; - new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); - new_n1->parent_slot = edit->segment_cache[0]; - new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch; - edit->adjust_count_on = new_n0; - - for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) - new_n1->slots[i] = node->slots[i]; - - new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0); - edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]]; - - edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot]; - edit->set[0].to = assoc_array_node_to_ptr(new_n0); - edit->excised_meta[0] = assoc_array_node_to_ptr(node); - pr_devel("<--%s() = ok [insert node before]\n", __func__); - return true; - all_leaves_cluster_together: /* All the leaves, new and old, want to cluster together in this node * in the same slot, so we have to replace this node with a shortcut to -- cgit v1.1 From 8108a77515126f6db4374e8593956e20430307c0 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 27 Oct 2017 09:45:34 -0700 Subject: bpf: bpf_compute_data uses incorrect cb structure SK_SKB program types use bpf_compute_data to store the end of the packet data. However, bpf_compute_data assumes the cb is stored in the qdisc layer format. But, for SK_SKB this is the wrong layer of the stack for this type. It happens to work (sort of!) because in most cases nothing happens to be overwritten today. This is very fragile and error prone. Fortunately, we have another hole in tcp_skb_cb we can use so lets put the data_end value there. Note, SK_SKB program types do not use data_meta, they are failed by sk_skb_is_valid_access(). Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + kernel/bpf/sockmap.c | 12 ++++++++++-- net/core/filter.c | 27 ++++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index b1ef98e..33599d17 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -844,6 +844,7 @@ struct tcp_skb_cb { __u32 key; __u32 flags; struct bpf_map *map; + void *data_end; } bpf; }; }; diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 2b6eb35..6778fb7 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -93,6 +93,14 @@ static inline struct smap_psock *smap_psock_sk(const struct sock *sk) return rcu_dereference_sk_user_data(sk); } +/* compute the linear packet data range [data, data_end) for skb when + * sk_skb type programs are in use. + */ +static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb) +{ + TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb); +} + static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) { struct bpf_prog *prog = READ_ONCE(psock->bpf_verdict); @@ -108,7 +116,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) */ TCP_SKB_CB(skb)->bpf.map = NULL; skb->sk = psock->sock; - bpf_compute_data_end(skb); + bpf_compute_data_end_sk_skb(skb); preempt_disable(); rc = (*prog->bpf_func)(skb, prog->insnsi); preempt_enable(); @@ -368,7 +376,7 @@ static int smap_parse_func_strparser(struct strparser *strp, * any socket yet. */ skb->sk = psock->sock; - bpf_compute_data_end(skb); + bpf_compute_data_end_sk_skb(skb); rc = (*prog->bpf_func)(skb, prog->insnsi); skb->sk = NULL; rcu_read_unlock(); diff --git a/net/core/filter.c b/net/core/filter.c index aa02659..68eaa2f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4243,6 +4243,31 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } +static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + int off; + + switch (si->off) { + case offsetof(struct __sk_buff, data_end): + off = si->off; + off -= offsetof(struct __sk_buff, data_end); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct tcp_skb_cb, bpf.data_end); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, + si->src_reg, off); + break; + default: + return bpf_convert_ctx_access(type, si, insn_buf, prog, + target_size); + } + + return insn - insn_buf; +} + const struct bpf_verifier_ops sk_filter_prog_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, @@ -4301,7 +4326,7 @@ const struct bpf_verifier_ops sock_ops_prog_ops = { const struct bpf_verifier_ops sk_skb_prog_ops = { .get_func_proto = sk_skb_func_proto, .is_valid_access = sk_skb_is_valid_access, - .convert_ctx_access = bpf_convert_ctx_access, + .convert_ctx_access = sk_skb_convert_ctx_access, .gen_prologue = sk_skb_prologue, }; -- cgit v1.1 From bfa640757e9378c2f26867e723f1287e94f5a7ad Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 27 Oct 2017 09:45:53 -0700 Subject: bpf: rename sk_actions to align with bpf infrastructure Recent additions to support multiple programs in cgroups impose a strict requirement, "all yes is yes, any no is no". To enforce this the infrastructure requires the 'no' return code, SK_DROP in this case, to be 0. To apply these rules to SK_SKB program types the sk_actions return codes need to be adjusted. This fix adds SK_PASS and makes 'SK_DROP = 0'. Finally, remove SK_ABORTED to remove any chance that the API may allow aborted program flows to be passed up the stack. This would be incorrect behavior and allow programs to break existing policies. Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 6 +++--- kernel/bpf/sockmap.c | 3 ++- net/core/filter.c | 5 +++-- tools/include/uapi/linux/bpf.h | 4 ++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f90860d..0d7948c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -575,7 +575,7 @@ union bpf_attr { * @map: pointer to sockmap * @key: key to lookup sock in map * @flags: reserved for future use - * Return: SK_REDIRECT + * Return: SK_PASS * * int bpf_sock_map_update(skops, map, key, flags) * @skops: pointer to bpf_sock_ops @@ -786,8 +786,8 @@ struct xdp_md { }; enum sk_action { - SK_ABORTED = 0, - SK_DROP, + SK_DROP = 0, + SK_PASS, SK_REDIRECT, }; diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 6778fb7..66f00a2 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -122,7 +122,8 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) preempt_enable(); skb->sk = NULL; - return rc; + return rc == SK_PASS ? + (TCP_SKB_CB(skb)->bpf.map ? SK_REDIRECT : SK_PASS) : SK_DROP; } static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) diff --git a/net/core/filter.c b/net/core/filter.c index 68eaa2f..6ae94f8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1844,14 +1844,15 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, { struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + /* If user passes invalid input drop the packet. */ if (unlikely(flags)) - return SK_ABORTED; + return SK_DROP; tcb->bpf.key = key; tcb->bpf.flags = flags; tcb->bpf.map = map; - return SK_REDIRECT; + return SK_PASS; } struct sock *do_sk_redirect_map(struct sk_buff *skb) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 24b35a1..c174971 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -787,8 +787,8 @@ struct xdp_md { }; enum sk_action { - SK_ABORTED = 0, - SK_DROP, + SK_DROP = 0, + SK_PASS, SK_REDIRECT, }; -- cgit v1.1 From d04adf1b355181e737b6b1e23d801b07f0b7c4c0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Oct 2017 02:13:29 +0800 Subject: sctp: reset owner sk for data chunks on out queues when migrating a sock Now when migrating sock to another one in sctp_sock_migrate(), it only resets owner sk for the data in receive queues, not the chunks on out queues. It would cause that data chunks length on the sock is not consistent with sk sk_wmem_alloc. When closing the sock or freeing these chunks, the old sk would never be freed, and the new sock may crash due to the overflow sk_wmem_alloc. syzbot found this issue with this series: r0 = socket$inet_sctp() sendto$inet(r0) listen(r0) accept4(r0) close(r0) Although listen() should have returned error when one TCP-style socket is in connecting (I may fix this one in another patch), it could also be reproduced by peeling off an assoc. This issue is there since very beginning. This patch is to reset owner sk for the chunks on out queues so that sk sk_wmem_alloc has correct value after accept one sock or peeloff an assoc to one sock. Note that when resetting owner sk for chunks on outqueue, it has to sctp_clear_owner_w/skb_orphan chunks before changing assoc->base.sk first and then sctp_set_owner_w them after changing assoc->base.sk, due to that sctp_wfree and it's callees are using assoc->base.sk. Reported-by: Dmitry Vyukov Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 17841ab..6f45d17 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -170,6 +170,36 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk) sk_mem_charge(sk, chunk->skb->truesize); } +static void sctp_clear_owner_w(struct sctp_chunk *chunk) +{ + skb_orphan(chunk->skb); +} + +static void sctp_for_each_tx_datachunk(struct sctp_association *asoc, + void (*cb)(struct sctp_chunk *)) + +{ + struct sctp_outq *q = &asoc->outqueue; + struct sctp_transport *t; + struct sctp_chunk *chunk; + + list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) + list_for_each_entry(chunk, &t->transmitted, transmitted_list) + cb(chunk); + + list_for_each_entry(chunk, &q->retransmit, list) + cb(chunk); + + list_for_each_entry(chunk, &q->sacked, list) + cb(chunk); + + list_for_each_entry(chunk, &q->abandoned, list) + cb(chunk); + + list_for_each_entry(chunk, &q->out_chunk_list, list) + cb(chunk); +} + /* Verify that this is a valid address. */ static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr, int len) @@ -8212,7 +8242,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, * paths won't try to lock it and then oldsk. */ lock_sock_nested(newsk, SINGLE_DEPTH_NESTING); + sctp_for_each_tx_datachunk(assoc, sctp_clear_owner_w); sctp_assoc_migrate(assoc, newsk); + sctp_for_each_tx_datachunk(assoc, sctp_set_owner_w); /* If the association on the newsk is already closed before accept() * is called, set RCV_SHUTDOWN flag. -- cgit v1.1 From 50317fce2cc70a2bbbc4b42c31bbad510382a53c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 27 Oct 2017 22:08:56 -0700 Subject: net_sched: avoid matching qdisc with zero handle Davide found the following script triggers a NULL pointer dereference: ip l a name eth0 type dummy tc q a dev eth0 parent :1 handle 1: htb This is because for a freshly created netdevice noop_qdisc is attached and when passing 'parent :1', kernel actually tries to match the major handle which is 0 and noop_qdisc has handle 0 so is matched by mistake. Commit 69012ae425d7 tries to fix a similar bug but still misses this case. Handle 0 is not a valid one, should be just skipped. In fact, kernel uses it as TC_H_UNSPEC. Fixes: 69012ae425d7 ("net: sched: fix handling of singleton qdiscs with qdisc_hash") Fixes: 59cc1f61f09c ("net: sched:convert qdisc linked list to hashtable") Reported-by: Davide Caratti Cc: Jiri Kosina Cc: Eric Dumazet Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c6deb74..22bc6fc 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -301,6 +301,8 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { struct Qdisc *q; + if (!handle) + return NULL; q = qdisc_match_from_root(dev->qdisc, handle); if (q) goto out; -- cgit v1.1 From 1da4fc97cbf89514e417a3df46eaec864a9b8a48 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Oct 2017 19:43:54 +0800 Subject: sctp: fix some type cast warnings introduced by stream reconf These warnings were found by running 'make C=2 M=net/sctp/'. They are introduced by not aware of Endian when coding stream reconf patches. Since commit c0d8bab6ae51 ("sctp: add get and set sockopt for reconf_enable") enabled stream reconf feature for users, the Fixes tag below would use it. Fixes: c0d8bab6ae51 ("sctp: add get and set sockopt for reconf_enable") Reported-by: Eric Dumazet Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 32 ++++++++++++++++---------------- include/net/sctp/sm.h | 2 +- include/net/sctp/ulpevent.h | 2 +- net/sctp/sm_make_chunk.c | 5 +++-- net/sctp/stream.c | 26 +++++++++++++++++--------- net/sctp/ulpevent.c | 2 +- 6 files changed, 39 insertions(+), 30 deletions(-) diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 82b171e..09d7412 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -716,28 +716,28 @@ struct sctp_reconf_chunk { struct sctp_strreset_outreq { struct sctp_paramhdr param_hdr; - __u32 request_seq; - __u32 response_seq; - __u32 send_reset_at_tsn; - __u16 list_of_streams[0]; + __be32 request_seq; + __be32 response_seq; + __be32 send_reset_at_tsn; + __be16 list_of_streams[0]; }; struct sctp_strreset_inreq { struct sctp_paramhdr param_hdr; - __u32 request_seq; - __u16 list_of_streams[0]; + __be32 request_seq; + __be16 list_of_streams[0]; }; struct sctp_strreset_tsnreq { struct sctp_paramhdr param_hdr; - __u32 request_seq; + __be32 request_seq; }; struct sctp_strreset_addstrm { struct sctp_paramhdr param_hdr; - __u32 request_seq; - __u16 number_of_streams; - __u16 reserved; + __be32 request_seq; + __be16 number_of_streams; + __be16 reserved; }; enum { @@ -752,16 +752,16 @@ enum { struct sctp_strreset_resp { struct sctp_paramhdr param_hdr; - __u32 response_seq; - __u32 result; + __be32 response_seq; + __be32 result; }; struct sctp_strreset_resptsn { struct sctp_paramhdr param_hdr; - __u32 response_seq; - __u32 result; - __u32 senders_next_tsn; - __u32 receivers_next_tsn; + __be32 response_seq; + __be32 result; + __be32 senders_next_tsn; + __be32 receivers_next_tsn; }; #endif /* __LINUX_SCTP_H__ */ diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 2db3d3a..88233cf 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -261,7 +261,7 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, struct sctp_fwdtsn_skip *skiplist); struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc); struct sctp_chunk *sctp_make_strreset_req(const struct sctp_association *asoc, - __u16 stream_num, __u16 *stream_list, + __u16 stream_num, __be16 *stream_list, bool out, bool in); struct sctp_chunk *sctp_make_strreset_tsnreq( const struct sctp_association *asoc); diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index b8c86ec..231dc42 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -130,7 +130,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event( struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event( const struct sctp_association *asoc, __u16 flags, - __u16 stream_num, __u16 *stream_list, gfp_t gfp); + __u16 stream_num, __be16 *stream_list, gfp_t gfp); struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event( const struct sctp_association *asoc, __u16 flags, diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index ca8f196..57c5504 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3591,7 +3591,7 @@ static struct sctp_chunk *sctp_make_reconf(const struct sctp_association *asoc, */ struct sctp_chunk *sctp_make_strreset_req( const struct sctp_association *asoc, - __u16 stream_num, __u16 *stream_list, + __u16 stream_num, __be16 *stream_list, bool out, bool in) { struct sctp_strreset_outreq outreq; @@ -3788,7 +3788,8 @@ bool sctp_verify_reconf(const struct sctp_association *asoc, { struct sctp_reconf_chunk *hdr; union sctp_params param; - __u16 last = 0, cnt = 0; + __be16 last = 0; + __u16 cnt = 0; hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr; sctp_walk_params(param, hdr, params) { diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 63ea155..fa8371f 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -118,6 +118,7 @@ int sctp_send_reset_streams(struct sctp_association *asoc, __u16 i, str_nums, *str_list; struct sctp_chunk *chunk; int retval = -EINVAL; + __be16 *nstr_list; bool out, in; if (!asoc->peer.reconf_capable || @@ -148,13 +149,18 @@ int sctp_send_reset_streams(struct sctp_association *asoc, if (str_list[i] >= stream->incnt) goto out; + nstr_list = kcalloc(str_nums, sizeof(__be16), GFP_KERNEL); + if (!nstr_list) { + retval = -ENOMEM; + goto out; + } + for (i = 0; i < str_nums; i++) - str_list[i] = htons(str_list[i]); + nstr_list[i] = htons(str_list[i]); - chunk = sctp_make_strreset_req(asoc, str_nums, str_list, out, in); + chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in); - for (i = 0; i < str_nums; i++) - str_list[i] = ntohs(str_list[i]); + kfree(nstr_list); if (!chunk) { retval = -ENOMEM; @@ -305,7 +311,7 @@ out: } static struct sctp_paramhdr *sctp_chunk_lookup_strreset_param( - struct sctp_association *asoc, __u32 resp_seq, + struct sctp_association *asoc, __be32 resp_seq, __be16 type) { struct sctp_chunk *chunk = asoc->strreset_chunk; @@ -345,8 +351,9 @@ struct sctp_chunk *sctp_process_strreset_outreq( { struct sctp_strreset_outreq *outreq = param.v; struct sctp_stream *stream = &asoc->stream; - __u16 i, nums, flags = 0, *str_p = NULL; __u32 result = SCTP_STRRESET_DENIED; + __u16 i, nums, flags = 0; + __be16 *str_p = NULL; __u32 request_seq; request_seq = ntohl(outreq->request_seq); @@ -439,8 +446,9 @@ struct sctp_chunk *sctp_process_strreset_inreq( struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; struct sctp_chunk *chunk = NULL; - __u16 i, nums, *str_p; __u32 request_seq; + __u16 i, nums; + __be16 *str_p; request_seq = ntohl(inreq->request_seq); if (TSN_lt(asoc->strreset_inseq, request_seq) || @@ -769,7 +777,7 @@ struct sctp_chunk *sctp_process_strreset_resp( if (req->type == SCTP_PARAM_RESET_OUT_REQUEST) { struct sctp_strreset_outreq *outreq; - __u16 *str_p; + __be16 *str_p; outreq = (struct sctp_strreset_outreq *)req; str_p = outreq->list_of_streams; @@ -794,7 +802,7 @@ struct sctp_chunk *sctp_process_strreset_resp( nums, str_p, GFP_ATOMIC); } else if (req->type == SCTP_PARAM_RESET_IN_REQUEST) { struct sctp_strreset_inreq *inreq; - __u16 *str_p; + __be16 *str_p; /* if the result is performed, it's impossible for inreq */ if (result == SCTP_STRRESET_PERFORMED) diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 67abc01..5447228 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -847,7 +847,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event( struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event( const struct sctp_association *asoc, __u16 flags, __u16 stream_num, - __u16 *stream_list, gfp_t gfp) + __be16 *stream_list, gfp_t gfp) { struct sctp_stream_reset_event *sreset; struct sctp_ulpevent *event; -- cgit v1.1 From 8d32503efde82db4e0a370981e90628ebd6718b5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Oct 2017 19:43:55 +0800 Subject: sctp: fix some type cast warnings introduced by transport rhashtable These warnings were found by running 'make C=2 M=net/sctp/'. They are introduced by not aware of Endian for the port when coding transport rhashtable patches. Fixes: 7fda702f9315 ("sctp: use new rhlist interface on sctp transport rhashtable") Reported-by: Eric Dumazet Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/input.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 34f10e7..621b5ca 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -794,7 +794,7 @@ hit: struct sctp_hash_cmp_arg { const union sctp_addr *paddr; const struct net *net; - u16 lport; + __be16 lport; }; static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, @@ -820,37 +820,37 @@ out: return err; } -static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed) +static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed) { const struct sctp_transport *t = data; const union sctp_addr *paddr = &t->ipaddr; const struct net *net = sock_net(t->asoc->base.sk); - u16 lport = htons(t->asoc->base.bind_addr.port); - u32 addr; + __be16 lport = htons(t->asoc->base.bind_addr.port); + __u32 addr; if (paddr->sa.sa_family == AF_INET6) addr = jhash(&paddr->v6.sin6_addr, 16, seed); else - addr = paddr->v4.sin_addr.s_addr; + addr = (__force __u32)paddr->v4.sin_addr.s_addr; - return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 | + return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 | (__force __u32)lport, net_hash_mix(net), seed); } -static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed) +static inline __u32 sctp_hash_key(const void *data, u32 len, u32 seed) { const struct sctp_hash_cmp_arg *x = data; const union sctp_addr *paddr = x->paddr; const struct net *net = x->net; - u16 lport = x->lport; - u32 addr; + __be16 lport = x->lport; + __u32 addr; if (paddr->sa.sa_family == AF_INET6) addr = jhash(&paddr->v6.sin6_addr, 16, seed); else - addr = paddr->v4.sin_addr.s_addr; + addr = (__force __u32)paddr->v4.sin_addr.s_addr; - return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 | + return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 | (__force __u32)lport, net_hash_mix(net), seed); } -- cgit v1.1 From f6fc6bc0b8e0bb13a210bd7386ffdcb1a5f30ef1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Oct 2017 19:43:56 +0800 Subject: sctp: fix a type cast warnings that causes a_rwnd gets the wrong value These warnings were found by running 'make C=2 M=net/sctp/'. Commit d4d6fb5787a6 ("sctp: Try not to change a_rwnd when faking a SACK from SHUTDOWN.") expected to use the peers old rwnd and add our flight size to the a_rwnd. But with the wrong Endian, it may not work as well as expected. So fix it by converting to the right value. Fixes: d4d6fb5787a6 ("sctp: Try not to change a_rwnd when faking a SACK from SHUTDOWN.") Reported-by: Eric Dumazet Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index e6a2974..8f2762b 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1680,8 +1680,8 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, case SCTP_CMD_PROCESS_CTSN: /* Dummy up a SACK for processing. */ sackh.cum_tsn_ack = cmd->obj.be32; - sackh.a_rwnd = asoc->peer.rwnd + - asoc->outqueue.outstanding_bytes; + sackh.a_rwnd = htonl(asoc->peer.rwnd + + asoc->outqueue.outstanding_bytes); sackh.num_gap_ack_blocks = 0; sackh.num_dup_tsns = 0; chunk->subh.sack_hdr = &sackh; -- cgit v1.1 From 978aa0474115f3f5848949f2efce4def0766a5cb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Oct 2017 19:43:57 +0800 Subject: sctp: fix some type cast warnings introduced since very beginning These warnings were found by running 'make C=2 M=net/sctp/'. They are there since very beginning. Note after this patch, there still one warning left in sctp_outq_flush(): sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM) Since it has been moved to sctp_stream_outq_migrate on net-next, to avoid the extra job when merging net-next to net, I will post the fix for it after the merging is done. Reported-by: Eric Dumazet Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 2 +- include/uapi/linux/sctp.h | 2 +- net/sctp/ipv6.c | 2 +- net/sctp/sm_make_chunk.c | 4 ++-- net/sctp/sm_sideeffect.c | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 09d7412..da803df 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -231,7 +231,7 @@ struct sctp_datahdr { __be32 tsn; __be16 stream; __be16 ssn; - __be32 ppid; + __u32 ppid; __u8 payload[0]; }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 6217ff8..84fc291 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -376,7 +376,7 @@ struct sctp_remote_error { __u16 sre_type; __u16 sre_flags; __u32 sre_length; - __u16 sre_error; + __be16 sre_error; sctp_assoc_t sre_assoc_id; __u8 sre_data[0]; }; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 7fe9e1d1..a6dfa86 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -738,7 +738,7 @@ static int sctp_v6_skb_iif(const struct sk_buff *skb) /* Was this packet marked by Explicit Congestion Notification? */ static int sctp_v6_is_ce(const struct sk_buff *skb) { - return *((__u32 *)(ipv6_hdr(skb))) & htonl(1 << 20); + return *((__u32 *)(ipv6_hdr(skb))) & (__force __u32)htonl(1 << 20); } /* Dump the v6 addr to the seq file. */ diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 57c5504..514465b 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2854,7 +2854,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, addr_param_len = af->to_addr_param(addr, &addr_param); param.param_hdr.type = flags; param.param_hdr.length = htons(paramlen + addr_param_len); - param.crr_id = i; + param.crr_id = htonl(i); sctp_addto_chunk(retval, paramlen, ¶m); sctp_addto_chunk(retval, addr_param_len, &addr_param); @@ -2867,7 +2867,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, addr_param_len = af->to_addr_param(addr, &addr_param); param.param_hdr.type = SCTP_PARAM_DEL_IP; param.param_hdr.length = htons(paramlen + addr_param_len); - param.crr_id = i; + param.crr_id = htonl(i); sctp_addto_chunk(retval, paramlen, ¶m); sctp_addto_chunk(retval, addr_param_len, &addr_param); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8f2762b..e2d9a4b 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1607,12 +1607,12 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, break; case SCTP_CMD_INIT_FAILED: - sctp_cmd_init_failed(commands, asoc, cmd->obj.err); + sctp_cmd_init_failed(commands, asoc, cmd->obj.u32); break; case SCTP_CMD_ASSOC_FAILED: sctp_cmd_assoc_failed(commands, asoc, event_type, - subtype, chunk, cmd->obj.err); + subtype, chunk, cmd->obj.u32); break; case SCTP_CMD_INIT_COUNTER_INC: -- cgit v1.1 From 7aa0045dadb6ef37485ea9f2a7d28278ca588b51 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:28 -0700 Subject: net_sched: introduce a workqueue for RCU callbacks of tc filter This patch introduces a dedicated workqueue for tc filters so that each tc filter's RCU callback could defer their action destroy work to this workqueue. The helper tcf_queue_work() is introduced for them to use. Because we hold RTNL lock when calling tcf_block_put(), we can not simply flush works inside it, therefore we have to defer it again to this workqueue and make sure all flying RCU callbacks have already queued their work before this one, in other words, to ensure this is the last one to execute to prevent any use-after-free. On the other hand, this makes tcf_block_put() ugly and harder to understand. Since David and Eric strongly dislike adding synchronize_rcu(), this is probably the only solution that could make everyone happy. Please also see the code comments below. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 3 +++ include/net/sch_generic.h | 2 ++ net/sched/cls_api.c | 68 +++++++++++++++++++++++++++++++++++------------ 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e80edd8..3009547 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -2,6 +2,7 @@ #define __NET_PKT_CLS_H #include +#include #include #include @@ -17,6 +18,8 @@ struct tcf_walker { int register_tcf_proto_ops(struct tcf_proto_ops *ops); int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); +bool tcf_queue_work(struct work_struct *work); + #ifdef CONFIG_NET_CLS struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 135f5a2..0dec8a2 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -271,6 +272,7 @@ struct tcf_chain { struct tcf_block { struct list_head chain_list; + struct work_struct work; }; static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 0b2219a..045d136 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -77,6 +77,8 @@ out: } EXPORT_SYMBOL(register_tcf_proto_ops); +static struct workqueue_struct *tc_filter_wq; + int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) { struct tcf_proto_ops *t; @@ -86,6 +88,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) * tcf_proto_ops's destroy() handler. */ rcu_barrier(); + flush_workqueue(tc_filter_wq); write_lock(&cls_mod_lock); list_for_each_entry(t, &tcf_proto_base, head) { @@ -100,6 +103,12 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) } EXPORT_SYMBOL(unregister_tcf_proto_ops); +bool tcf_queue_work(struct work_struct *work) +{ + return queue_work(tc_filter_wq, work); +} +EXPORT_SYMBOL(tcf_queue_work); + /* Select new prio value from the range, managed by kernel. */ static inline u32 tcf_auto_prio(struct tcf_proto *tp) @@ -266,23 +275,30 @@ err_chain_create: } EXPORT_SYMBOL(tcf_block_get); -void tcf_block_put(struct tcf_block *block) +static void tcf_block_put_final(struct work_struct *work) { + struct tcf_block *block = container_of(work, struct tcf_block, work); struct tcf_chain *chain, *tmp; - if (!block) - return; - - /* XXX: Standalone actions are not allowed to jump to any chain, and - * bound actions should be all removed after flushing. However, - * filters are destroyed in RCU callbacks, we have to hold the chains - * first, otherwise we would always race with RCU callbacks on this list - * without proper locking. - */ + /* At this point, all the chains should have refcnt == 1. */ + rtnl_lock(); + list_for_each_entry_safe(chain, tmp, &block->chain_list, list) + tcf_chain_put(chain); + rtnl_unlock(); + kfree(block); +} - /* Wait for existing RCU callbacks to cool down. */ - rcu_barrier(); +/* XXX: Standalone actions are not allowed to jump to any chain, and bound + * actions should be all removed after flushing. However, filters are destroyed + * in RCU callbacks, we have to hold the chains first, otherwise we would + * always race with RCU callbacks on this list without proper locking. + */ +static void tcf_block_put_deferred(struct work_struct *work) +{ + struct tcf_block *block = container_of(work, struct tcf_block, work); + struct tcf_chain *chain; + rtnl_lock(); /* Hold a refcnt for all chains, except 0, in case they are gone. */ list_for_each_entry(chain, &block->chain_list, list) if (chain->index) @@ -292,13 +308,27 @@ void tcf_block_put(struct tcf_block *block) list_for_each_entry(chain, &block->chain_list, list) tcf_chain_flush(chain); - /* Wait for RCU callbacks to release the reference count. */ + INIT_WORK(&block->work, tcf_block_put_final); + /* Wait for RCU callbacks to release the reference count and make + * sure their works have been queued before this. + */ rcu_barrier(); + tcf_queue_work(&block->work); + rtnl_unlock(); +} - /* At this point, all the chains should have refcnt == 1. */ - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) - tcf_chain_put(chain); - kfree(block); +void tcf_block_put(struct tcf_block *block) +{ + if (!block) + return; + + INIT_WORK(&block->work, tcf_block_put_deferred); + /* Wait for existing RCU callbacks to cool down, make sure their works + * have been queued before this. We can not flush pending works here + * because we are holding the RTNL lock. + */ + rcu_barrier(); + tcf_queue_work(&block->work); } EXPORT_SYMBOL(tcf_block_put); @@ -1030,6 +1060,10 @@ EXPORT_SYMBOL(tcf_exts_get_dev); static int __init tc_filter_init(void) { + tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0); + if (!tc_filter_wq) + return -ENOMEM; + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter, -- cgit v1.1 From c96a48385d53089ee9977dd0bce82a9493984484 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:29 -0700 Subject: net_sched: use tcf_queue_work() in basic filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_basic.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index d89ebaf..f177649 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -34,7 +34,10 @@ struct basic_filter { struct tcf_result res; struct tcf_proto *tp; struct list_head link; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -82,15 +85,26 @@ static int basic_init(struct tcf_proto *tp) return 0; } -static void basic_delete_filter(struct rcu_head *head) +static void basic_delete_filter_work(struct work_struct *work) { - struct basic_filter *f = container_of(head, struct basic_filter, rcu); + struct basic_filter *f = container_of(work, struct basic_filter, work); + rtnl_lock(); tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); + rtnl_unlock(); + kfree(f); } +static void basic_delete_filter(struct rcu_head *head) +{ + struct basic_filter *f = container_of(head, struct basic_filter, rcu); + + INIT_WORK(&f->work, basic_delete_filter_work); + tcf_queue_work(&f->work); +} + static void basic_destroy(struct tcf_proto *tp) { struct basic_head *head = rtnl_dereference(tp->root); -- cgit v1.1 From e910af676b565ecc16bcd6c896ecb68157396ecc Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:30 -0700 Subject: net_sched: use tcf_queue_work() in bpf filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 520c502..037a3ae 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -49,7 +49,10 @@ struct cls_bpf_prog { struct sock_filter *bpf_ops; const char *bpf_name; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { @@ -257,9 +260,21 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) kfree(prog); } +static void cls_bpf_delete_prog_work(struct work_struct *work) +{ + struct cls_bpf_prog *prog = container_of(work, struct cls_bpf_prog, work); + + rtnl_lock(); + __cls_bpf_delete_prog(prog); + rtnl_unlock(); +} + static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu) { - __cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu)); + struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu); + + INIT_WORK(&prog->work, cls_bpf_delete_prog_work); + tcf_queue_work(&prog->work); } static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) -- cgit v1.1 From b1b5b04fdb6da262aef37ef83b9f2e41326720ef Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:31 -0700 Subject: net_sched: use tcf_queue_work() in cgroup filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_cgroup.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index d48452f..a97e069 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -23,7 +23,10 @@ struct cls_cgroup_head { struct tcf_exts exts; struct tcf_ematch_tree ematches; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -57,15 +60,26 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; +static void cls_cgroup_destroy_work(struct work_struct *work) +{ + struct cls_cgroup_head *head = container_of(work, + struct cls_cgroup_head, + work); + rtnl_lock(); + tcf_exts_destroy(&head->exts); + tcf_em_tree_destroy(&head->ematches); + kfree(head); + rtnl_unlock(); +} + static void cls_cgroup_destroy_rcu(struct rcu_head *root) { struct cls_cgroup_head *head = container_of(root, struct cls_cgroup_head, rcu); - tcf_exts_destroy(&head->exts); - tcf_em_tree_destroy(&head->ematches); - kfree(head); + INIT_WORK(&head->work, cls_cgroup_destroy_work); + tcf_queue_work(&head->work); } static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, -- cgit v1.1 From 94cdb47566b799649e996e1fb9de2a503dada763 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:32 -0700 Subject: net_sched: use tcf_queue_work() in flow filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_flow.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 2a3a60e..67f3a2a 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -57,7 +57,10 @@ struct flow_filter { u32 divisor; u32 baseclass; u32 hashrnd; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static inline u32 addr_fold(void *addr) @@ -369,14 +372,24 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; -static void flow_destroy_filter(struct rcu_head *head) +static void flow_destroy_filter_work(struct work_struct *work) { - struct flow_filter *f = container_of(head, struct flow_filter, rcu); + struct flow_filter *f = container_of(work, struct flow_filter, work); + rtnl_lock(); del_timer_sync(&f->perturb_timer); tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); kfree(f); + rtnl_unlock(); +} + +static void flow_destroy_filter(struct rcu_head *head) +{ + struct flow_filter *f = container_of(head, struct flow_filter, rcu); + + INIT_WORK(&f->work, flow_destroy_filter_work); + tcf_queue_work(&f->work); } static int flow_change(struct net *net, struct sk_buff *in_skb, -- cgit v1.1 From 0552c8afa077889b4704ef5ee88b03063ad45023 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:33 -0700 Subject: net_sched: use tcf_queue_work() in flower filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index b480d7c..5b5722c 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -87,7 +87,10 @@ struct cls_fl_filter { struct list_head list; u32 handle; u32 flags; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; struct net_device *hw_dev; }; @@ -215,12 +218,22 @@ static int fl_init(struct tcf_proto *tp) return 0; } -static void fl_destroy_filter(struct rcu_head *head) +static void fl_destroy_filter_work(struct work_struct *work) { - struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu); + struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work); + rtnl_lock(); tcf_exts_destroy(&f->exts); kfree(f); + rtnl_unlock(); +} + +static void fl_destroy_filter(struct rcu_head *head) +{ + struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu); + + INIT_WORK(&f->work, fl_destroy_filter_work); + tcf_queue_work(&f->work); } static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) -- cgit v1.1 From e071dff2a6beeccb6f9744f9a0251ab773ca2ab8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:34 -0700 Subject: net_sched: use tcf_queue_work() in fw filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_fw.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 941245a..99183b8 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -46,7 +46,10 @@ struct fw_filter { #endif /* CONFIG_NET_CLS_IND */ struct tcf_exts exts; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static u32 fw_hash(u32 handle) @@ -119,12 +122,22 @@ static int fw_init(struct tcf_proto *tp) return 0; } -static void fw_delete_filter(struct rcu_head *head) +static void fw_delete_filter_work(struct work_struct *work) { - struct fw_filter *f = container_of(head, struct fw_filter, rcu); + struct fw_filter *f = container_of(work, struct fw_filter, work); + rtnl_lock(); tcf_exts_destroy(&f->exts); kfree(f); + rtnl_unlock(); +} + +static void fw_delete_filter(struct rcu_head *head) +{ + struct fw_filter *f = container_of(head, struct fw_filter, rcu); + + INIT_WORK(&f->work, fw_delete_filter_work); + tcf_queue_work(&f->work); } static void fw_destroy(struct tcf_proto *tp) -- cgit v1.1 From df2735ee8e6ca202a8630f237b59401a25193be1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:35 -0700 Subject: net_sched: use tcf_queue_work() in matchall filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_matchall.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index eeac606..c33f711 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -21,7 +21,10 @@ struct cls_mall_head { struct tcf_result res; u32 handle; u32 flags; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -41,13 +44,23 @@ static int mall_init(struct tcf_proto *tp) return 0; } +static void mall_destroy_work(struct work_struct *work) +{ + struct cls_mall_head *head = container_of(work, struct cls_mall_head, + work); + rtnl_lock(); + tcf_exts_destroy(&head->exts); + kfree(head); + rtnl_unlock(); +} + static void mall_destroy_rcu(struct rcu_head *rcu) { struct cls_mall_head *head = container_of(rcu, struct cls_mall_head, rcu); - tcf_exts_destroy(&head->exts); - kfree(head); + INIT_WORK(&head->work, mall_destroy_work); + tcf_queue_work(&head->work); } static int mall_replace_hw_filter(struct tcf_proto *tp, -- cgit v1.1 From c0d378ef1266546a39f2df00a56ff1f74166a2b7 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:36 -0700 Subject: net_sched: use tcf_queue_work() in u32 filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 10b8d85..dadd1b3 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -68,7 +68,10 @@ struct tc_u_knode { u32 __percpu *pcpu_success; #endif struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; /* The 'sel' field MUST be the last field in structure to allow for * tc_u32_keys allocated at end of structure. */ @@ -418,11 +421,21 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, * this the u32_delete_key_rcu variant does not free the percpu * statistics. */ +static void u32_delete_key_work(struct work_struct *work) +{ + struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); + + rtnl_lock(); + u32_destroy_key(key->tp, key, false); + rtnl_unlock(); +} + static void u32_delete_key_rcu(struct rcu_head *rcu) { struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); - u32_destroy_key(key->tp, key, false); + INIT_WORK(&key->work, u32_delete_key_work); + tcf_queue_work(&key->work); } /* u32_delete_key_freepf_rcu is the rcu callback variant @@ -432,11 +445,21 @@ static void u32_delete_key_rcu(struct rcu_head *rcu) * for the variant that should be used with keys return from * u32_init_knode() */ +static void u32_delete_key_freepf_work(struct work_struct *work) +{ + struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); + + rtnl_lock(); + u32_destroy_key(key->tp, key, true); + rtnl_unlock(); +} + static void u32_delete_key_freepf_rcu(struct rcu_head *rcu) { struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); - u32_destroy_key(key->tp, key, true); + INIT_WORK(&key->work, u32_delete_key_freepf_work); + tcf_queue_work(&key->work); } static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) -- cgit v1.1 From c2f3f31d402be4849b06282c3a5278f2865c9fcc Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:37 -0700 Subject: net_sched: use tcf_queue_work() in route filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_route.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 9ddde65..4b14ccd 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -57,7 +57,10 @@ struct route4_filter { u32 handle; struct route4_bucket *bkt; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; #define ROUTE4_FAILURE ((struct route4_filter *)(-1L)) @@ -254,12 +257,22 @@ static int route4_init(struct tcf_proto *tp) return 0; } -static void route4_delete_filter(struct rcu_head *head) +static void route4_delete_filter_work(struct work_struct *work) { - struct route4_filter *f = container_of(head, struct route4_filter, rcu); + struct route4_filter *f = container_of(work, struct route4_filter, work); + rtnl_lock(); tcf_exts_destroy(&f->exts); kfree(f); + rtnl_unlock(); +} + +static void route4_delete_filter(struct rcu_head *head) +{ + struct route4_filter *f = container_of(head, struct route4_filter, rcu); + + INIT_WORK(&f->work, route4_delete_filter_work); + tcf_queue_work(&f->work); } static void route4_destroy(struct tcf_proto *tp) -- cgit v1.1 From d4f84a41dc615c166555cd332b0235bf6b9bcb4a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:38 -0700 Subject: net_sched: use tcf_queue_work() in rsvp filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_rsvp.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index b1f6ed4..bdbc541 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -97,7 +97,10 @@ struct rsvp_filter { u32 handle; struct rsvp_session *sess; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) @@ -282,12 +285,22 @@ static int rsvp_init(struct tcf_proto *tp) return -ENOBUFS; } -static void rsvp_delete_filter_rcu(struct rcu_head *head) +static void rsvp_delete_filter_work(struct work_struct *work) { - struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu); + struct rsvp_filter *f = container_of(work, struct rsvp_filter, work); + rtnl_lock(); tcf_exts_destroy(&f->exts); kfree(f); + rtnl_unlock(); +} + +static void rsvp_delete_filter_rcu(struct rcu_head *head) +{ + struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu); + + INIT_WORK(&f->work, rsvp_delete_filter_work); + tcf_queue_work(&f->work); } static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) -- cgit v1.1 From 27ce4f05e2abbe2d3ec7434e456619a5178cd3bd Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:39 -0700 Subject: net_sched: use tcf_queue_work() in tcindex filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 14a7e08..beaa95e 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -27,14 +27,20 @@ struct tcindex_filter_result { struct tcf_exts exts; struct tcf_result res; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; struct tcindex_filter { u16 key; struct tcindex_filter_result result; struct tcindex_filter __rcu *next; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; @@ -133,12 +139,34 @@ static int tcindex_init(struct tcf_proto *tp) return 0; } +static void tcindex_destroy_rexts_work(struct work_struct *work) +{ + struct tcindex_filter_result *r; + + r = container_of(work, struct tcindex_filter_result, work); + rtnl_lock(); + tcf_exts_destroy(&r->exts); + rtnl_unlock(); +} + static void tcindex_destroy_rexts(struct rcu_head *head) { struct tcindex_filter_result *r; r = container_of(head, struct tcindex_filter_result, rcu); - tcf_exts_destroy(&r->exts); + INIT_WORK(&r->work, tcindex_destroy_rexts_work); + tcf_queue_work(&r->work); +} + +static void tcindex_destroy_fexts_work(struct work_struct *work) +{ + struct tcindex_filter *f = container_of(work, struct tcindex_filter, + work); + + rtnl_lock(); + tcf_exts_destroy(&f->result.exts); + kfree(f); + rtnl_unlock(); } static void tcindex_destroy_fexts(struct rcu_head *head) @@ -146,8 +174,8 @@ static void tcindex_destroy_fexts(struct rcu_head *head) struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu); - tcf_exts_destroy(&f->result.exts); - kfree(f); + INIT_WORK(&f->work, tcindex_destroy_fexts_work); + tcf_queue_work(&f->work); } static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last) -- cgit v1.1 From 2d132eba1d972ea6c0e47286e4c821b4a3c5b84d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:40 -0700 Subject: net_sched: add rtnl assertion to tcf_exts_destroy() After previous patches, it is now safe to claim that tcf_exts_destroy() is always called with RTNL lock. Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 045d136..231181c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -909,6 +909,7 @@ void tcf_exts_destroy(struct tcf_exts *exts) #ifdef CONFIG_NET_CLS_ACT LIST_HEAD(actions); + ASSERT_RTNL(); tcf_exts_to_list(exts, &actions); tcf_action_destroy(&actions, TCA_ACT_UNBIND); kfree(exts->actions); -- cgit v1.1 From 46e235c15ca44f34cb79f4dbec909b8c51999dc1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:41 -0700 Subject: net_sched: fix call_rcu() race on act_sample module removal Similar to commit c78e1746d3ad ("net: sched: fix call_rcu() race on classifier module unloads"), we need to wait for flying RCU callback tcf_sample_cleanup_rcu(). Cc: Yotam Gigi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_sample.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index ec986ae..a9f9a2cc 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -264,6 +264,7 @@ static int __init sample_init_module(void) static void __exit sample_cleanup_module(void) { + rcu_barrier(); tcf_unregister_action(&act_sample_ops, &sample_net_ops); } -- cgit v1.1 From 7f071998474a9e5f7b98103d3058a1b8ca5887e6 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 26 Oct 2017 18:24:42 -0700 Subject: selftests: Introduce a new script to generate tc batch file # ./tdc_batch.py -h usage: tdc_batch.py [-h] [-n NUMBER] [-o] [-s] [-p] device file TC batch file generator positional arguments: device device name file batch file name optional arguments: -h, --help show this help message and exit -n NUMBER, --number NUMBER how many lines in batch file -o, --skip_sw skip_sw (offload), by default skip_hw -s, --share_action all filters share the same action -p, --prio all filters have different prio Acked-by: Jamal Hadi Salim Acked-by: Lucas Bates Signed-off-by: Chris Mi Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tdc_batch.py | 62 +++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100755 tools/testing/selftests/tc-testing/tdc_batch.py diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py new file mode 100755 index 0000000..707c6bf --- /dev/null +++ b/tools/testing/selftests/tc-testing/tdc_batch.py @@ -0,0 +1,62 @@ +#!/usr/bin/python3 + +""" +tdc_batch.py - a script to generate TC batch file + +Copyright (C) 2017 Chris Mi +""" + +import argparse + +parser = argparse.ArgumentParser(description='TC batch file generator') +parser.add_argument("device", help="device name") +parser.add_argument("file", help="batch file name") +parser.add_argument("-n", "--number", type=int, + help="how many lines in batch file") +parser.add_argument("-o", "--skip_sw", + help="skip_sw (offload), by default skip_hw", + action="store_true") +parser.add_argument("-s", "--share_action", + help="all filters share the same action", + action="store_true") +parser.add_argument("-p", "--prio", + help="all filters have different prio", + action="store_true") +args = parser.parse_args() + +device = args.device +file = open(args.file, 'w') + +number = 1 +if args.number: + number = args.number + +skip = "skip_hw" +if args.skip_sw: + skip = "skip_sw" + +share_action = "" +if args.share_action: + share_action = "index 1" + +prio = "prio 1" +if args.prio: + prio = "" + if number > 0x4000: + number = 0x4000 + +index = 0 +for i in range(0x100): + for j in range(0x100): + for k in range(0x100): + mac = ("%02x:%02x:%02x" % (i, j, k)) + src_mac = "e4:11:00:" + mac + dst_mac = "e4:12:00:" + mac + cmd = ("filter add dev %s %s protocol ip parent ffff: flower %s " + "src_mac %s dst_mac %s action drop %s" % + (device, prio, skip, src_mac, dst_mac, share_action)) + file.write("%s\n" % cmd) + index += 1 + if index >= number: + file.close() + exit(0) -- cgit v1.1 From 31c2611b66e01378b54f7ef641cb0d23fcd8502f Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 26 Oct 2017 18:24:43 -0700 Subject: selftests: Introduce a new test case to tc testsuite In this patchset, we fixed a tc bug. This patch adds the test case that reproduces the bug. To run this test case, user should specify an existing NIC device: # sudo ./tdc.py -d enp4s0f0 This test case belongs to category "flower". If user doesn't specify a NIC device, the test cases belong to "flower" will not be run. In this test case, we create 1M filters and all filters share the same action. When destroying all filters, kernel should not panic. It takes about 18s to run it. Acked-by: Jamal Hadi Salim Acked-by: Lucas Bates Signed-off-by: Chris Mi Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/filters/tests.json | 23 +++++++++++++++++++++- tools/testing/selftests/tc-testing/tdc.py | 20 +++++++++++++++---- tools/testing/selftests/tc-testing/tdc_config.py | 2 ++ 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json index c727b96..5fa02d8 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -17,5 +17,26 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "d052", + "name": "Add 1M filters with the same action", + "category": [ + "filter", + "flower" + ], + "setup": [ + "$TC qdisc add dev $DEV2 ingress", + "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000" + ], + "cmdUnderTest": "$TC -b $BATCH_FILE", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order 0: gact action drop.*index 1 ref 1000000 bind 1000000", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress", + "/bin/rm $BATCH_FILE" + ] } -] \ No newline at end of file +] diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index cd61b78..5f11f5d 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -88,7 +88,7 @@ def prepare_env(cmdlist): exit(1) -def test_runner(filtered_tests): +def test_runner(filtered_tests, args): """ Driver function for the unit tests. @@ -105,6 +105,8 @@ def test_runner(filtered_tests): for tidx in testlist: result = True tresult = "" + if "flower" in tidx["category"] and args.device == None: + continue print("Test " + tidx["id"] + ": " + tidx["name"]) prepare_env(tidx["setup"]) (p, procout) = exec_cmd(tidx["cmdUnderTest"]) @@ -152,6 +154,10 @@ def ns_create(): exec_cmd(cmd, False) cmd = 'ip -s $NS link set $DEV1 up' exec_cmd(cmd, False) + cmd = 'ip link set $DEV2 netns $NS' + exec_cmd(cmd, False) + cmd = 'ip -s $NS link set $DEV2 up' + exec_cmd(cmd, False) def ns_destroy(): @@ -211,7 +217,8 @@ def set_args(parser): help='Execute the single test case with specified ID') parser.add_argument('-i', '--id', action='store_true', dest='gen_id', help='Generate ID numbers for new test cases') - return parser + parser.add_argument('-d', '--device', + help='Execute the test case in flower category') return parser @@ -225,6 +232,8 @@ def check_default_settings(args): if args.path != None: NAMES['TC'] = args.path + if args.device != None: + NAMES['DEV2'] = args.device if not os.path.isfile(NAMES['TC']): print("The specified tc path " + NAMES['TC'] + " does not exist.") exit(1) @@ -381,14 +390,17 @@ def set_operation_mode(args): if (len(alltests) == 0): print("Cannot find a test case with ID matching " + target_id) exit(1) - catresults = test_runner(alltests) + catresults = test_runner(alltests, args) print("All test results: " + "\n\n" + catresults) elif (len(target_category) > 0): + if (target_category == "flower") and args.device == None: + print("Please specify a NIC device (-d) to run category flower") + exit(1) if (target_category not in ucat): print("Specified category is not present in this file.") exit(1) else: - catresults = test_runner(testcases[target_category]) + catresults = test_runner(testcases[target_category], args) print("Category " + target_category + "\n\n" + catresults) ns_destroy() diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index 0108737..b635251 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -12,6 +12,8 @@ NAMES = { # Name of veth devices to be created for the namespace 'DEV0': 'v0p0', 'DEV1': 'v0p1', + 'DEV2': '', + 'BATCH_FILE': './batch.txt', # Name of the namespace to use 'NS': 'tcut' } -- cgit v1.1 From 0b07194bb55ed836c2cc7c22e866b87a14681984 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 Oct 2017 13:58:38 -0700 Subject: Linux 4.14-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2900c54..5f91a28 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.1