From f21e0e81d81b649ad309cedc7226f1bed72982e0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 24 May 2011 08:12:40 +0800 Subject: regulator: Do bulk enables of regulators in parallel In order to reduce the impact of ramp times rather than enabling the regulators for a device in series use async tasks to run the actual enables. This means that the delays which the enables implement can all run in parallel, though it does mean that the order in which the supplies come on may be unstable. For super bonus fun points if any of the regulators are shared between multiple supplies on the same device (as is rather likely) then this will test our locking. Note that in this case we only delay once for each physical regulator so the threads shouldn't block each other while delaying. It'd be even nicer if we could coalesce writes to a shared enable registers in PMICs but that's definitely future work, and it may also be useful and is certainly more achievable to optimise out the parallelism if none of the regulators implement ramp delays. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 32 ++++++++++++++++++++++++++------ include/linux/regulator/consumer.h | 3 +++ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index d3e3879..7b38af9 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -2264,6 +2265,13 @@ err: } EXPORT_SYMBOL_GPL(regulator_bulk_get); +static void regulator_bulk_enable_async(void *data, async_cookie_t cookie) +{ + struct regulator_bulk_data *bulk = data; + + bulk->ret = regulator_enable(bulk->consumer); +} + /** * regulator_bulk_enable - enable multiple regulator consumers * @@ -2279,21 +2287,33 @@ EXPORT_SYMBOL_GPL(regulator_bulk_get); int regulator_bulk_enable(int num_consumers, struct regulator_bulk_data *consumers) { + LIST_HEAD(async_domain); int i; - int ret; + int ret = 0; + + for (i = 0; i < num_consumers; i++) + async_schedule_domain(regulator_bulk_enable_async, + &consumers[i], &async_domain); + + async_synchronize_full_domain(&async_domain); + /* If any consumer failed we need to unwind any that succeeded */ for (i = 0; i < num_consumers; i++) { - ret = regulator_enable(consumers[i].consumer); - if (ret != 0) + if (consumers[i].ret != 0) { + ret = consumers[i].ret; goto err; + } } return 0; err: - pr_err("Failed to enable %s: %d\n", consumers[i].supply, ret); - for (--i; i >= 0; --i) - regulator_disable(consumers[i].consumer); + for (i = 0; i < num_consumers; i++) + if (consumers[i].ret == 0) + regulator_disable(consumers[i].consumer); + else + pr_err("Failed to enable %s: %d\n", + consumers[i].supply, consumers[i].ret); return ret; } diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 9e87c1c..26f6ea4 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -122,6 +122,9 @@ struct regulator; struct regulator_bulk_data { const char *supply; struct regulator *consumer; + + /* Internal use */ + int ret; }; #if defined(CONFIG_REGULATOR) -- cgit v1.1 From 2ae3636b79aee1a69b2e84eff68bb123090796d3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 24 May 2011 23:14:40 +0800 Subject: regulator: Use _cansleep() for WM8994 regulator GPIOs The WM8994 regulator driver is perfectly happy if the GPIO used to enable the regulator sleeps so call the appropriate GPIO API. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/wm8994-regulator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/wm8994-regulator.c b/drivers/regulator/wm8994-regulator.c index 35b2958..1a6a690 100644 --- a/drivers/regulator/wm8994-regulator.c +++ b/drivers/regulator/wm8994-regulator.c @@ -43,7 +43,7 @@ static int wm8994_ldo_enable(struct regulator_dev *rdev) if (!ldo->enable) return 0; - gpio_set_value(ldo->enable, 1); + gpio_set_value_cansleep(ldo->enable, 1); ldo->is_enabled = true; return 0; @@ -57,7 +57,7 @@ static int wm8994_ldo_disable(struct regulator_dev *rdev) if (!ldo->enable) return -EINVAL; - gpio_set_value(ldo->enable, 0); + gpio_set_value_cansleep(ldo->enable, 0); ldo->is_enabled = false; return 0; -- cgit v1.1 From 7736f11dbadce33d3f12bf0e8114d0f1da5e8622 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Fri, 27 May 2011 12:25:27 -0700 Subject: regulator: twl-regulator: fix n_voltages for twl6030 variable LDOs The n_voltages initializer for the TWL6030_ADJUSTABLE_LDO macro is off by one, causing the the highest supported voltage to be unreachable. Setting the machine constraints to only allow the highest voltage causes errors: machine_constraints_voltage: VAUX3_6030: unsupportable voltage constraints twl_reg twl_reg.39: can't register VAUX3_6030, -22 twl_reg: probe of twl_reg.39 failed with error -22 This patch fixes the off by one error. Tested by setting VAUX3_6030 to 3.3V. Signed-off-by: Colin Cross Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/twl-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c index 87fe0f7..503c2bc 100644 --- a/drivers/regulator/twl-regulator.c +++ b/drivers/regulator/twl-regulator.c @@ -864,7 +864,7 @@ static struct regulator_ops twlsmps_ops = { .desc = { \ .name = #label, \ .id = TWL6030_REG_##label, \ - .n_voltages = (max_mVolts - min_mVolts)/100, \ + .n_voltages = (max_mVolts - min_mVolts)/100 + 1, \ .ops = &twl6030ldo_ops, \ .type = REGULATOR_VOLTAGE, \ .owner = THIS_MODULE, \ -- cgit v1.1 From c3d4913cd4cd469cbf29d411293e937729e83f3a Mon Sep 17 00:00:00 2001 From: Tomoya MORINAGA Date: Tue, 31 May 2011 10:34:45 +0900 Subject: pch_dma: fix DMA issue(ch8-ch11) ISSUE: In case PCH_DMA with I2S communications with ch8~ch11, sometimes I2S data is not send correctly. CAUSE: The following patch I submitted before was not enough modification for supporting DMA ch8~ch11. The modification for status register of ch8~11 was not enough. pch_dma: Support I2S for ML7213 IOH author Tomoya MORINAGA Mon, 9 May 2011 07:09:38 +0000 (16:09 +0900) committer Vinod Koul Mon, 9 May 2011 11:42:23 +0000 (16:42 +0530) commit 194f5f2706c7472f9c6bb2d17fa788993606581f tree c9d4903ea02b18939a4f390956a48be1a3734517 parent 60092d0bde4c8741198da4a69b693d3709385bf1 This patch fixes the issue. We can confirm PCH_DMA with I2S communications with ch8~ch11 works well. Signed-off-by: Tomoya MORINAGA Signed-off-by: Vinod Koul --- drivers/dma/pch_dma.c | 69 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 14 deletions(-) diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index ff5b38f..65c32f8 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -45,7 +45,8 @@ #define DMA_STATUS_MASK_BITS 0x3 #define DMA_STATUS_SHIFT_BITS 16 #define DMA_STATUS_IRQ(x) (0x1 << (x)) -#define DMA_STATUS_ERR(x) (0x1 << ((x) + 8)) +#define DMA_STATUS0_ERR(x) (0x1 << ((x) + 8)) +#define DMA_STATUS2_ERR(x) (0x1 << (x)) #define DMA_DESC_WIDTH_SHIFT_BITS 12 #define DMA_DESC_WIDTH_1_BYTE (0x3 << DMA_DESC_WIDTH_SHIFT_BITS) @@ -133,6 +134,7 @@ struct pch_dma { #define PCH_DMA_CTL3 0x0C #define PCH_DMA_STS0 0x10 #define PCH_DMA_STS1 0x14 +#define PCH_DMA_STS2 0x18 #define dma_readl(pd, name) \ readl((pd)->membase + PCH_DMA_##name) @@ -183,13 +185,19 @@ static void pdc_enable_irq(struct dma_chan *chan, int enable) { struct pch_dma *pd = to_pd(chan->device); u32 val; + int pos; + + if (chan->chan_id < 8) + pos = chan->chan_id; + else + pos = chan->chan_id + 8; val = dma_readl(pd, CTL2); if (enable) - val |= 0x1 << chan->chan_id; + val |= 0x1 << pos; else - val &= ~(0x1 << chan->chan_id); + val &= ~(0x1 << pos); dma_writel(pd, CTL2, val); @@ -262,7 +270,7 @@ static void pdc_set_mode(struct dma_chan *chan, u32 mode) chan->chan_id, val); } -static u32 pdc_get_status(struct pch_dma_chan *pd_chan) +static u32 pdc_get_status0(struct pch_dma_chan *pd_chan) { struct pch_dma *pd = to_pd(pd_chan->chan.device); u32 val; @@ -272,9 +280,27 @@ static u32 pdc_get_status(struct pch_dma_chan *pd_chan) DMA_STATUS_BITS_PER_CH * pd_chan->chan.chan_id)); } +static u32 pdc_get_status2(struct pch_dma_chan *pd_chan) +{ + struct pch_dma *pd = to_pd(pd_chan->chan.device); + u32 val; + + val = dma_readl(pd, STS2); + return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS + + DMA_STATUS_BITS_PER_CH * (pd_chan->chan.chan_id - 8))); +} + static bool pdc_is_idle(struct pch_dma_chan *pd_chan) { - if (pdc_get_status(pd_chan) == DMA_STATUS_IDLE) + u32 sts; + + if (pd_chan->chan.chan_id < 8) + sts = pdc_get_status0(pd_chan); + else + sts = pdc_get_status2(pd_chan); + + + if (sts == DMA_STATUS_IDLE) return true; else return false; @@ -693,30 +719,45 @@ static irqreturn_t pd_irq(int irq, void *devid) struct pch_dma *pd = (struct pch_dma *)devid; struct pch_dma_chan *pd_chan; u32 sts0; + u32 sts2; int i; - int ret = IRQ_NONE; + int ret0 = IRQ_NONE; + int ret2 = IRQ_NONE; sts0 = dma_readl(pd, STS0); + sts2 = dma_readl(pd, STS2); dev_dbg(pd->dma.dev, "pd_irq sts0: %x\n", sts0); for (i = 0; i < pd->dma.chancnt; i++) { pd_chan = &pd->channels[i]; - if (sts0 & DMA_STATUS_IRQ(i)) { - if (sts0 & DMA_STATUS_ERR(i)) - set_bit(0, &pd_chan->err_status); + if (i < 8) { + if (sts0 & DMA_STATUS_IRQ(i)) { + if (sts0 & DMA_STATUS0_ERR(i)) + set_bit(0, &pd_chan->err_status); - tasklet_schedule(&pd_chan->tasklet); - ret = IRQ_HANDLED; - } + tasklet_schedule(&pd_chan->tasklet); + ret0 = IRQ_HANDLED; + } + } else { + if (sts2 & DMA_STATUS_IRQ(i - 8)) { + if (sts2 & DMA_STATUS2_ERR(i)) + set_bit(0, &pd_chan->err_status); + tasklet_schedule(&pd_chan->tasklet); + ret2 = IRQ_HANDLED; + } + } } /* clear interrupt bits in status register */ - dma_writel(pd, STS0, sts0); + if (ret0) + dma_writel(pd, STS0, sts0); + if (ret2) + dma_writel(pd, STS2, sts2); - return ret; + return ret0 | ret2; } #ifdef CONFIG_PM -- cgit v1.1 From badc1446891c158f065c5a9726febdae74eb5ac5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 1 Jun 2011 21:25:47 +0100 Subject: [IA64] Hook up gpiolib support Allow people to use gpiolib on ia64, mostly for build coverage as it seems more useful to standardise on availablity of the API than handle it being optional. Signed-off-by: Mark Brown Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 4 ++++ arch/ia64/include/asm/gpio.h | 55 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 arch/ia64/include/asm/gpio.h diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 38280ef..578701e 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -27,6 +27,7 @@ config IA64 select GENERIC_PENDING_IRQ if SMP select IRQ_PER_CPU select GENERIC_IRQ_SHOW + select ARCH_WANT_OPTIONAL_GPIOLIB default y help The Itanium Processor Family is Intel's 64-bit successor to @@ -89,6 +90,9 @@ config GENERIC_TIME_VSYSCALL config HAVE_SETUP_PER_CPU_AREA def_bool y +config GENERIC_GPIO + def_bool y + config DMI bool default y diff --git a/arch/ia64/include/asm/gpio.h b/arch/ia64/include/asm/gpio.h new file mode 100644 index 0000000..590a20d --- /dev/null +++ b/arch/ia64/include/asm/gpio.h @@ -0,0 +1,55 @@ +/* + * Generic GPIO API implementation for IA-64. + * + * A stright copy of that for PowerPC which was: + * + * Copyright (c) 2007-2008 MontaVista Software, Inc. + * + * Author: Anton Vorontsov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _ASM_IA64_GPIO_H +#define _ASM_IA64_GPIO_H + +#include +#include + +#ifdef CONFIG_GPIOLIB + +/* + * We don't (yet) implement inlined/rapid versions for on-chip gpios. + * Just call gpiolib. + */ +static inline int gpio_get_value(unsigned int gpio) +{ + return __gpio_get_value(gpio); +} + +static inline void gpio_set_value(unsigned int gpio, int value) +{ + __gpio_set_value(gpio, value); +} + +static inline int gpio_cansleep(unsigned int gpio) +{ + return __gpio_cansleep(gpio); +} + +static inline int gpio_to_irq(unsigned int gpio) +{ + return __gpio_to_irq(gpio); +} + +static inline int irq_to_gpio(unsigned int irq) +{ + return -EINVAL; +} + +#endif /* CONFIG_GPIOLIB */ + +#endif /* _ASM_IA64_GPIO_H */ -- cgit v1.1 From 9f14517bd6f38cf4b48d742a0ac5db9d17edecba Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 1 Jun 2011 00:32:50 -0700 Subject: clocksource: tile: convert to use clocksource_register_hz Convert tile to use clocksource_register_hz. CC: Thomas Gleixner Signed-off-by: John Stultz Signed-off-by: Chris Metcalf --- arch/tile/kernel/time.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index c4be58c..f6f50f2a 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -78,7 +78,6 @@ static struct clocksource cycle_counter_cs = { .rating = 300, .read = clocksource_get_cycles, .mask = CLOCKSOURCE_MASK(64), - .shift = 22, /* typical value, e.g. x86 tsc uses this */ .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -91,8 +90,6 @@ void __init setup_clock(void) cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED); sched_clock_mult = clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT); - cycle_counter_cs.mult = - clocksource_hz2mult(cycles_per_sec, cycle_counter_cs.shift); } void __init calibrate_delay(void) @@ -107,7 +104,7 @@ void __init calibrate_delay(void) void __init time_init(void) { /* Initialize and register the clock source. */ - clocksource_register(&cycle_counter_cs); + clocksource_register_hz(&cycle_counter_cs, cycles_per_sec); /* Start up the tile-timer interrupt source on the boot cpu. */ setup_tile_timer(); -- cgit v1.1 From 2c007a9d8cb018a3ce2106342f2aff2abca2a3bd Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 3 Jun 2011 17:36:18 -0400 Subject: tile: use generic-y format for one-line asm-generic headers This lets us remove a lot of one-line wrapper header files. See commit d8ecc5cd8e227bc318513b5306ae88a474b8886d for context. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/Kbuild | 38 +++++++++++++++++++++++++++++++ arch/tile/include/asm/bug.h | 1 - arch/tile/include/asm/bugs.h | 1 - arch/tile/include/asm/cputime.h | 1 - arch/tile/include/asm/device.h | 1 - arch/tile/include/asm/div64.h | 1 - arch/tile/include/asm/emergency-restart.h | 1 - arch/tile/include/asm/errno.h | 1 - arch/tile/include/asm/fb.h | 1 - arch/tile/include/asm/fcntl.h | 1 - arch/tile/include/asm/ioctl.h | 1 - arch/tile/include/asm/ioctls.h | 1 - arch/tile/include/asm/ipc.h | 1 - arch/tile/include/asm/ipcbuf.h | 1 - arch/tile/include/asm/irq_regs.h | 1 - arch/tile/include/asm/kdebug.h | 1 - arch/tile/include/asm/local.h | 1 - arch/tile/include/asm/module.h | 1 - arch/tile/include/asm/msgbuf.h | 1 - arch/tile/include/asm/mutex.h | 1 - arch/tile/include/asm/param.h | 1 - arch/tile/include/asm/parport.h | 1 - arch/tile/include/asm/poll.h | 1 - arch/tile/include/asm/posix_types.h | 1 - arch/tile/include/asm/resource.h | 1 - arch/tile/include/asm/scatterlist.h | 1 - arch/tile/include/asm/sembuf.h | 1 - arch/tile/include/asm/serial.h | 1 - arch/tile/include/asm/shmbuf.h | 1 - arch/tile/include/asm/shmparam.h | 1 - arch/tile/include/asm/socket.h | 1 - arch/tile/include/asm/sockios.h | 1 - arch/tile/include/asm/statfs.h | 1 - arch/tile/include/asm/termbits.h | 1 - arch/tile/include/asm/termios.h | 1 - arch/tile/include/asm/types.h | 1 - arch/tile/include/asm/ucontext.h | 1 - arch/tile/include/asm/xor.h | 1 - 38 files changed, 38 insertions(+), 37 deletions(-) delete mode 100644 arch/tile/include/asm/bug.h delete mode 100644 arch/tile/include/asm/bugs.h delete mode 100644 arch/tile/include/asm/cputime.h delete mode 100644 arch/tile/include/asm/device.h delete mode 100644 arch/tile/include/asm/div64.h delete mode 100644 arch/tile/include/asm/emergency-restart.h delete mode 100644 arch/tile/include/asm/errno.h delete mode 100644 arch/tile/include/asm/fb.h delete mode 100644 arch/tile/include/asm/fcntl.h delete mode 100644 arch/tile/include/asm/ioctl.h delete mode 100644 arch/tile/include/asm/ioctls.h delete mode 100644 arch/tile/include/asm/ipc.h delete mode 100644 arch/tile/include/asm/ipcbuf.h delete mode 100644 arch/tile/include/asm/irq_regs.h delete mode 100644 arch/tile/include/asm/kdebug.h delete mode 100644 arch/tile/include/asm/local.h delete mode 100644 arch/tile/include/asm/module.h delete mode 100644 arch/tile/include/asm/msgbuf.h delete mode 100644 arch/tile/include/asm/mutex.h delete mode 100644 arch/tile/include/asm/param.h delete mode 100644 arch/tile/include/asm/parport.h delete mode 100644 arch/tile/include/asm/poll.h delete mode 100644 arch/tile/include/asm/posix_types.h delete mode 100644 arch/tile/include/asm/resource.h delete mode 100644 arch/tile/include/asm/scatterlist.h delete mode 100644 arch/tile/include/asm/sembuf.h delete mode 100644 arch/tile/include/asm/serial.h delete mode 100644 arch/tile/include/asm/shmbuf.h delete mode 100644 arch/tile/include/asm/shmparam.h delete mode 100644 arch/tile/include/asm/socket.h delete mode 100644 arch/tile/include/asm/sockios.h delete mode 100644 arch/tile/include/asm/statfs.h delete mode 100644 arch/tile/include/asm/termbits.h delete mode 100644 arch/tile/include/asm/termios.h delete mode 100644 arch/tile/include/asm/types.h delete mode 100644 arch/tile/include/asm/ucontext.h delete mode 100644 arch/tile/include/asm/xor.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 849ab2f..aec60dc 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -2,3 +2,41 @@ include include/asm-generic/Kbuild.asm header-y += ucontext.h header-y += hardwall.h + +generic-y += bug.h +generic-y += bugs.h +generic-y += cputime.h +generic-y += device.h +generic-y += div64.h +generic-y += emergency-restart.h +generic-y += errno.h +generic-y += fb.h +generic-y += fcntl.h +generic-y += ioctl.h +generic-y += ioctls.h +generic-y += ipc.h +generic-y += ipcbuf.h +generic-y += irq_regs.h +generic-y += kdebug.h +generic-y += local.h +generic-y += module.h +generic-y += msgbuf.h +generic-y += mutex.h +generic-y += param.h +generic-y += parport.h +generic-y += poll.h +generic-y += posix_types.h +generic-y += resource.h +generic-y += scatterlist.h +generic-y += sembuf.h +generic-y += serial.h +generic-y += shmbuf.h +generic-y += shmparam.h +generic-y += socket.h +generic-y += sockios.h +generic-y += statfs.h +generic-y += termbits.h +generic-y += termios.h +generic-y += types.h +generic-y += ucontext.h +generic-y += xor.h diff --git a/arch/tile/include/asm/bug.h b/arch/tile/include/asm/bug.h deleted file mode 100644 index b12fd89..0000000 --- a/arch/tile/include/asm/bug.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/bugs.h b/arch/tile/include/asm/bugs.h deleted file mode 100644 index 61791e1..0000000 --- a/arch/tile/include/asm/bugs.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/cputime.h b/arch/tile/include/asm/cputime.h deleted file mode 100644 index 6d68ad7..0000000 --- a/arch/tile/include/asm/cputime.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h deleted file mode 100644 index f0a4c25..0000000 --- a/arch/tile/include/asm/device.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/div64.h b/arch/tile/include/asm/div64.h deleted file mode 100644 index 6cd978c..0000000 --- a/arch/tile/include/asm/div64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/emergency-restart.h b/arch/tile/include/asm/emergency-restart.h deleted file mode 100644 index 3711bd9..0000000 --- a/arch/tile/include/asm/emergency-restart.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/errno.h b/arch/tile/include/asm/errno.h deleted file mode 100644 index 4c82b50..0000000 --- a/arch/tile/include/asm/errno.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/fb.h b/arch/tile/include/asm/fb.h deleted file mode 100644 index 3a4988e..0000000 --- a/arch/tile/include/asm/fb.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/fcntl.h b/arch/tile/include/asm/fcntl.h deleted file mode 100644 index 46ab12d..0000000 --- a/arch/tile/include/asm/fcntl.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/ioctl.h b/arch/tile/include/asm/ioctl.h deleted file mode 100644 index b279fe0..0000000 --- a/arch/tile/include/asm/ioctl.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/ioctls.h b/arch/tile/include/asm/ioctls.h deleted file mode 100644 index ec34c76..0000000 --- a/arch/tile/include/asm/ioctls.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/ipc.h b/arch/tile/include/asm/ipc.h deleted file mode 100644 index a46e3d9..0000000 --- a/arch/tile/include/asm/ipc.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/ipcbuf.h b/arch/tile/include/asm/ipcbuf.h deleted file mode 100644 index 84c7e51..0000000 --- a/arch/tile/include/asm/ipcbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/irq_regs.h b/arch/tile/include/asm/irq_regs.h deleted file mode 100644 index 3dd9c0b..0000000 --- a/arch/tile/include/asm/irq_regs.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h deleted file mode 100644 index 6ece1b0..0000000 --- a/arch/tile/include/asm/kdebug.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/local.h b/arch/tile/include/asm/local.h deleted file mode 100644 index c11c530..0000000 --- a/arch/tile/include/asm/local.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/module.h b/arch/tile/include/asm/module.h deleted file mode 100644 index 1e4b79f..0000000 --- a/arch/tile/include/asm/module.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/msgbuf.h b/arch/tile/include/asm/msgbuf.h deleted file mode 100644 index 809134c..0000000 --- a/arch/tile/include/asm/msgbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/mutex.h b/arch/tile/include/asm/mutex.h deleted file mode 100644 index ff6101a..0000000 --- a/arch/tile/include/asm/mutex.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/param.h b/arch/tile/include/asm/param.h deleted file mode 100644 index 965d454..0000000 --- a/arch/tile/include/asm/param.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/parport.h b/arch/tile/include/asm/parport.h deleted file mode 100644 index cf252af..0000000 --- a/arch/tile/include/asm/parport.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/poll.h b/arch/tile/include/asm/poll.h deleted file mode 100644 index c98509d..0000000 --- a/arch/tile/include/asm/poll.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/posix_types.h b/arch/tile/include/asm/posix_types.h deleted file mode 100644 index 22cae62..0000000 --- a/arch/tile/include/asm/posix_types.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/resource.h b/arch/tile/include/asm/resource.h deleted file mode 100644 index 04bc4db..0000000 --- a/arch/tile/include/asm/resource.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/scatterlist.h b/arch/tile/include/asm/scatterlist.h deleted file mode 100644 index 35d786f..0000000 --- a/arch/tile/include/asm/scatterlist.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/sembuf.h b/arch/tile/include/asm/sembuf.h deleted file mode 100644 index 7673b83..0000000 --- a/arch/tile/include/asm/sembuf.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/serial.h b/arch/tile/include/asm/serial.h deleted file mode 100644 index a0cb0caf..0000000 --- a/arch/tile/include/asm/serial.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/shmbuf.h b/arch/tile/include/asm/shmbuf.h deleted file mode 100644 index 83c05fc..0000000 --- a/arch/tile/include/asm/shmbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/shmparam.h b/arch/tile/include/asm/shmparam.h deleted file mode 100644 index 93f30de..0000000 --- a/arch/tile/include/asm/shmparam.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/socket.h b/arch/tile/include/asm/socket.h deleted file mode 100644 index 6b71384..0000000 --- a/arch/tile/include/asm/socket.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/sockios.h b/arch/tile/include/asm/sockios.h deleted file mode 100644 index def6d47..0000000 --- a/arch/tile/include/asm/sockios.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/statfs.h b/arch/tile/include/asm/statfs.h deleted file mode 100644 index 0b91fe1..0000000 --- a/arch/tile/include/asm/statfs.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/termbits.h b/arch/tile/include/asm/termbits.h deleted file mode 100644 index 3935b10..0000000 --- a/arch/tile/include/asm/termbits.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/termios.h b/arch/tile/include/asm/termios.h deleted file mode 100644 index 280d78a..0000000 --- a/arch/tile/include/asm/termios.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/types.h b/arch/tile/include/asm/types.h deleted file mode 100644 index b9e79bc..0000000 --- a/arch/tile/include/asm/types.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/ucontext.h b/arch/tile/include/asm/ucontext.h deleted file mode 100644 index 9bc07b9..0000000 --- a/arch/tile/include/asm/ucontext.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/tile/include/asm/xor.h b/arch/tile/include/asm/xor.h deleted file mode 100644 index c82eb12..0000000 --- a/arch/tile/include/asm/xor.h +++ /dev/null @@ -1 +0,0 @@ -#include -- cgit v1.1 From 5fa29a17fabfe204fa9f20edd5fc81ab2364eb4b Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Sun, 29 May 2011 13:10:02 +0300 Subject: dmaengine: add ep93xx DMA support The ep93xx DMA controller has 10 independent memory to peripheral (M2P) channels, and 2 dedicated memory to memory (M2M) channels. M2M channels can also be used by SPI and IDE to perform DMA transfers to/from their memory mapped FIFOs. This driver supports both M2P and M2M channels with DMA_SLAVE, DMA_CYCLIC and DMA_MEMCPY (M2M only) capabilities. Signed-off-by: Mika Westerberg Signed-off-by: Ryan Mallon Acked-by: H Hartley Sweeten Acked-by: Vinod Koul Cc: Dan Williams Signed-off-by: Vinod Koul --- arch/arm/mach-ep93xx/include/mach/dma.h | 87 ++ drivers/dma/Kconfig | 7 + drivers/dma/Makefile | 1 + drivers/dma/ep93xx_dma.c | 1355 +++++++++++++++++++++++++++++++ 4 files changed, 1450 insertions(+) create mode 100644 drivers/dma/ep93xx_dma.c diff --git a/arch/arm/mach-ep93xx/include/mach/dma.h b/arch/arm/mach-ep93xx/include/mach/dma.h index 5e31b2b..6e7049a 100644 --- a/arch/arm/mach-ep93xx/include/mach/dma.h +++ b/arch/arm/mach-ep93xx/include/mach/dma.h @@ -15,6 +15,8 @@ #include #include +#include +#include /** * struct ep93xx_dma_buffer - Information about a buffer to be transferred @@ -146,4 +148,89 @@ void ep93xx_dma_m2p_submit_recursive(struct ep93xx_dma_m2p_client *m2p, */ void ep93xx_dma_m2p_flush(struct ep93xx_dma_m2p_client *m2p); +/* + * M2P channels. + * + * Note that these values are also directly used for setting the PPALLOC + * register. + */ +#define EP93XX_DMA_I2S1 0 +#define EP93XX_DMA_I2S2 1 +#define EP93XX_DMA_AAC1 2 +#define EP93XX_DMA_AAC2 3 +#define EP93XX_DMA_AAC3 4 +#define EP93XX_DMA_I2S3 5 +#define EP93XX_DMA_UART1 6 +#define EP93XX_DMA_UART2 7 +#define EP93XX_DMA_UART3 8 +#define EP93XX_DMA_IRDA 9 +/* M2M channels */ +#define EP93XX_DMA_SSP 10 +#define EP93XX_DMA_IDE 11 + +/** + * struct ep93xx_dma_data - configuration data for the EP93xx dmaengine + * @port: peripheral which is requesting the channel + * @direction: TX/RX channel + * @name: optional name for the channel, this is displayed in /proc/interrupts + * + * This information is passed as private channel parameter in a filter + * function. Note that this is only needed for slave/cyclic channels. For + * memcpy channels %NULL data should be passed. + */ +struct ep93xx_dma_data { + int port; + enum dma_data_direction direction; + const char *name; +}; + +/** + * struct ep93xx_dma_chan_data - platform specific data for a DMA channel + * @name: name of the channel, used for getting the right clock for the channel + * @base: mapped registers + * @irq: interrupt number used by this channel + */ +struct ep93xx_dma_chan_data { + const char *name; + void __iomem *base; + int irq; +}; + +/** + * struct ep93xx_dma_platform_data - platform data for the dmaengine driver + * @channels: array of channels which are passed to the driver + * @num_channels: number of channels in the array + * + * This structure is passed to the DMA engine driver via platform data. For + * M2P channels, contract is that even channels are for TX and odd for RX. + * There is no requirement for the M2M channels. + */ +struct ep93xx_dma_platform_data { + struct ep93xx_dma_chan_data *channels; + size_t num_channels; +}; + +static inline bool ep93xx_dma_chan_is_m2p(struct dma_chan *chan) +{ + return !strcmp(dev_name(chan->device->dev), "ep93xx-dma-m2p"); +} + +/** + * ep93xx_dma_chan_direction - returns direction the channel can be used + * @chan: channel + * + * This function can be used in filter functions to find out whether the + * channel supports given DMA direction. Only M2P channels have such + * limitation, for M2M channels the direction is configurable. + */ +static inline enum dma_data_direction +ep93xx_dma_chan_direction(struct dma_chan *chan) +{ + if (!ep93xx_dma_chan_is_m2p(chan)) + return DMA_NONE; + + /* even channels are for TX, odd for RX */ + return (chan->chan_id % 2 == 0) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; +} + #endif /* __ASM_ARCH_DMA_H */ diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 25cf327..2e3b3d3 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -237,6 +237,13 @@ config MXS_DMA Support the MXS DMA engine. This engine including APBH-DMA and APBX-DMA is integrated into Freescale i.MX23/28 chips. +config EP93XX_DMA + bool "Cirrus Logic EP93xx DMA support" + depends on ARCH_EP93XX + select DMA_ENGINE + help + Enable support for the Cirrus Logic EP93xx M2P/M2M DMA controller. + config DMA_ENGINE bool diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 836095a..30cf3b1 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -25,3 +25,4 @@ obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o obj-$(CONFIG_PL330_DMA) += pl330.o obj-$(CONFIG_PCH_DMA) += pch_dma.o obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o +obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c new file mode 100644 index 0000000..0766c1e --- /dev/null +++ b/drivers/dma/ep93xx_dma.c @@ -0,0 +1,1355 @@ +/* + * Driver for the Cirrus Logic EP93xx DMA Controller + * + * Copyright (C) 2011 Mika Westerberg + * + * DMA M2P implementation is based on the original + * arch/arm/mach-ep93xx/dma-m2p.c which has following copyrights: + * + * Copyright (C) 2006 Lennert Buytenhek + * Copyright (C) 2006 Applied Data Systems + * Copyright (C) 2009 Ryan Mallon + * + * This driver is based on dw_dmac and amba-pl08x drivers. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include + +/* M2P registers */ +#define M2P_CONTROL 0x0000 +#define M2P_CONTROL_STALLINT BIT(0) +#define M2P_CONTROL_NFBINT BIT(1) +#define M2P_CONTROL_CH_ERROR_INT BIT(3) +#define M2P_CONTROL_ENABLE BIT(4) +#define M2P_CONTROL_ICE BIT(6) + +#define M2P_INTERRUPT 0x0004 +#define M2P_INTERRUPT_STALL BIT(0) +#define M2P_INTERRUPT_NFB BIT(1) +#define M2P_INTERRUPT_ERROR BIT(3) + +#define M2P_PPALLOC 0x0008 +#define M2P_STATUS 0x000c + +#define M2P_MAXCNT0 0x0020 +#define M2P_BASE0 0x0024 +#define M2P_MAXCNT1 0x0030 +#define M2P_BASE1 0x0034 + +#define M2P_STATE_IDLE 0 +#define M2P_STATE_STALL 1 +#define M2P_STATE_ON 2 +#define M2P_STATE_NEXT 3 + +/* M2M registers */ +#define M2M_CONTROL 0x0000 +#define M2M_CONTROL_DONEINT BIT(2) +#define M2M_CONTROL_ENABLE BIT(3) +#define M2M_CONTROL_START BIT(4) +#define M2M_CONTROL_DAH BIT(11) +#define M2M_CONTROL_SAH BIT(12) +#define M2M_CONTROL_PW_SHIFT 9 +#define M2M_CONTROL_PW_8 (0 << M2M_CONTROL_PW_SHIFT) +#define M2M_CONTROL_PW_16 (1 << M2M_CONTROL_PW_SHIFT) +#define M2M_CONTROL_PW_32 (2 << M2M_CONTROL_PW_SHIFT) +#define M2M_CONTROL_PW_MASK (3 << M2M_CONTROL_PW_SHIFT) +#define M2M_CONTROL_TM_SHIFT 13 +#define M2M_CONTROL_TM_TX (1 << M2M_CONTROL_TM_SHIFT) +#define M2M_CONTROL_TM_RX (2 << M2M_CONTROL_TM_SHIFT) +#define M2M_CONTROL_RSS_SHIFT 22 +#define M2M_CONTROL_RSS_SSPRX (1 << M2M_CONTROL_RSS_SHIFT) +#define M2M_CONTROL_RSS_SSPTX (2 << M2M_CONTROL_RSS_SHIFT) +#define M2M_CONTROL_RSS_IDE (3 << M2M_CONTROL_RSS_SHIFT) +#define M2M_CONTROL_NO_HDSK BIT(24) +#define M2M_CONTROL_PWSC_SHIFT 25 + +#define M2M_INTERRUPT 0x0004 +#define M2M_INTERRUPT_DONEINT BIT(1) + +#define M2M_BCR0 0x0010 +#define M2M_BCR1 0x0014 +#define M2M_SAR_BASE0 0x0018 +#define M2M_SAR_BASE1 0x001c +#define M2M_DAR_BASE0 0x002c +#define M2M_DAR_BASE1 0x0030 + +#define DMA_MAX_CHAN_BYTES 0xffff +#define DMA_MAX_CHAN_DESCRIPTORS 32 + +struct ep93xx_dma_engine; + +/** + * struct ep93xx_dma_desc - EP93xx specific transaction descriptor + * @src_addr: source address of the transaction + * @dst_addr: destination address of the transaction + * @size: size of the transaction (in bytes) + * @complete: this descriptor is completed + * @txd: dmaengine API descriptor + * @tx_list: list of linked descriptors + * @node: link used for putting this into a channel queue + */ +struct ep93xx_dma_desc { + u32 src_addr; + u32 dst_addr; + size_t size; + bool complete; + struct dma_async_tx_descriptor txd; + struct list_head tx_list; + struct list_head node; +}; + +/** + * struct ep93xx_dma_chan - an EP93xx DMA M2P/M2M channel + * @chan: dmaengine API channel + * @edma: pointer to to the engine device + * @regs: memory mapped registers + * @irq: interrupt number of the channel + * @clk: clock used by this channel + * @tasklet: channel specific tasklet used for callbacks + * @lock: lock protecting the fields following + * @flags: flags for the channel + * @buffer: which buffer to use next (0/1) + * @last_completed: last completed cookie value + * @active: flattened chain of descriptors currently being processed + * @queue: pending descriptors which are handled next + * @free_list: list of free descriptors which can be used + * @runtime_addr: physical address currently used as dest/src (M2M only). This + * is set via %DMA_SLAVE_CONFIG before slave operation is + * prepared + * @runtime_ctrl: M2M runtime values for the control register. + * + * As EP93xx DMA controller doesn't support real chained DMA descriptors we + * will have slightly different scheme here: @active points to a head of + * flattened DMA descriptor chain. + * + * @queue holds pending transactions. These are linked through the first + * descriptor in the chain. When a descriptor is moved to the @active queue, + * the first and chained descriptors are flattened into a single list. + * + * @chan.private holds pointer to &struct ep93xx_dma_data which contains + * necessary channel configuration information. For memcpy channels this must + * be %NULL. + */ +struct ep93xx_dma_chan { + struct dma_chan chan; + const struct ep93xx_dma_engine *edma; + void __iomem *regs; + int irq; + struct clk *clk; + struct tasklet_struct tasklet; + /* protects the fields following */ + spinlock_t lock; + unsigned long flags; +/* Channel is configured for cyclic transfers */ +#define EP93XX_DMA_IS_CYCLIC 0 + + int buffer; + dma_cookie_t last_completed; + struct list_head active; + struct list_head queue; + struct list_head free_list; + u32 runtime_addr; + u32 runtime_ctrl; +}; + +/** + * struct ep93xx_dma_engine - the EP93xx DMA engine instance + * @dma_dev: holds the dmaengine device + * @m2m: is this an M2M or M2P device + * @hw_setup: method which sets the channel up for operation + * @hw_shutdown: shuts the channel down and flushes whatever is left + * @hw_submit: pushes active descriptor(s) to the hardware + * @hw_interrupt: handle the interrupt + * @num_channels: number of channels for this instance + * @channels: array of channels + * + * There is one instance of this struct for the M2P channels and one for the + * M2M channels. hw_xxx() methods are used to perform operations which are + * different on M2M and M2P channels. These methods are called with channel + * lock held and interrupts disabled so they cannot sleep. + */ +struct ep93xx_dma_engine { + struct dma_device dma_dev; + bool m2m; + int (*hw_setup)(struct ep93xx_dma_chan *); + void (*hw_shutdown)(struct ep93xx_dma_chan *); + void (*hw_submit)(struct ep93xx_dma_chan *); + int (*hw_interrupt)(struct ep93xx_dma_chan *); +#define INTERRUPT_UNKNOWN 0 +#define INTERRUPT_DONE 1 +#define INTERRUPT_NEXT_BUFFER 2 + + size_t num_channels; + struct ep93xx_dma_chan channels[]; +}; + +static inline struct device *chan2dev(struct ep93xx_dma_chan *edmac) +{ + return &edmac->chan.dev->device; +} + +static struct ep93xx_dma_chan *to_ep93xx_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct ep93xx_dma_chan, chan); +} + +/** + * ep93xx_dma_set_active - set new active descriptor chain + * @edmac: channel + * @desc: head of the new active descriptor chain + * + * Sets @desc to be the head of the new active descriptor chain. This is the + * chain which is processed next. The active list must be empty before calling + * this function. + * + * Called with @edmac->lock held and interrupts disabled. + */ +static void ep93xx_dma_set_active(struct ep93xx_dma_chan *edmac, + struct ep93xx_dma_desc *desc) +{ + BUG_ON(!list_empty(&edmac->active)); + + list_add_tail(&desc->node, &edmac->active); + + /* Flatten the @desc->tx_list chain into @edmac->active list */ + while (!list_empty(&desc->tx_list)) { + struct ep93xx_dma_desc *d = list_first_entry(&desc->tx_list, + struct ep93xx_dma_desc, node); + + /* + * We copy the callback parameters from the first descriptor + * to all the chained descriptors. This way we can call the + * callback without having to find out the first descriptor in + * the chain. Useful for cyclic transfers. + */ + d->txd.callback = desc->txd.callback; + d->txd.callback_param = desc->txd.callback_param; + + list_move_tail(&d->node, &edmac->active); + } +} + +/* Called with @edmac->lock held and interrupts disabled */ +static struct ep93xx_dma_desc * +ep93xx_dma_get_active(struct ep93xx_dma_chan *edmac) +{ + return list_first_entry(&edmac->active, struct ep93xx_dma_desc, node); +} + +/** + * ep93xx_dma_advance_active - advances to the next active descriptor + * @edmac: channel + * + * Function advances active descriptor to the next in the @edmac->active and + * returns %true if we still have descriptors in the chain to process. + * Otherwise returns %false. + * + * When the channel is in cyclic mode always returns %true. + * + * Called with @edmac->lock held and interrupts disabled. + */ +static bool ep93xx_dma_advance_active(struct ep93xx_dma_chan *edmac) +{ + list_rotate_left(&edmac->active); + + if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) + return true; + + /* + * If txd.cookie is set it means that we are back in the first + * descriptor in the chain and hence done with it. + */ + return !ep93xx_dma_get_active(edmac)->txd.cookie; +} + +/* + * M2P DMA implementation + */ + +static void m2p_set_control(struct ep93xx_dma_chan *edmac, u32 control) +{ + writel(control, edmac->regs + M2P_CONTROL); + /* + * EP93xx User's Guide states that we must perform a dummy read after + * write to the control register. + */ + readl(edmac->regs + M2P_CONTROL); +} + +static int m2p_hw_setup(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_data *data = edmac->chan.private; + u32 control; + + writel(data->port & 0xf, edmac->regs + M2P_PPALLOC); + + control = M2P_CONTROL_CH_ERROR_INT | M2P_CONTROL_ICE + | M2P_CONTROL_ENABLE; + m2p_set_control(edmac, control); + + return 0; +} + +static inline u32 m2p_channel_state(struct ep93xx_dma_chan *edmac) +{ + return (readl(edmac->regs + M2P_STATUS) >> 4) & 0x3; +} + +static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac) +{ + u32 control; + + control = readl(edmac->regs + M2P_CONTROL); + control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT); + m2p_set_control(edmac, control); + + while (m2p_channel_state(edmac) >= M2P_STATE_ON) + cpu_relax(); + + m2p_set_control(edmac, 0); + + while (m2p_channel_state(edmac) == M2P_STATE_STALL) + cpu_relax(); +} + +static void m2p_fill_desc(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac); + u32 bus_addr; + + if (ep93xx_dma_chan_direction(&edmac->chan) == DMA_TO_DEVICE) + bus_addr = desc->src_addr; + else + bus_addr = desc->dst_addr; + + if (edmac->buffer == 0) { + writel(desc->size, edmac->regs + M2P_MAXCNT0); + writel(bus_addr, edmac->regs + M2P_BASE0); + } else { + writel(desc->size, edmac->regs + M2P_MAXCNT1); + writel(bus_addr, edmac->regs + M2P_BASE1); + } + + edmac->buffer ^= 1; +} + +static void m2p_hw_submit(struct ep93xx_dma_chan *edmac) +{ + u32 control = readl(edmac->regs + M2P_CONTROL); + + m2p_fill_desc(edmac); + control |= M2P_CONTROL_STALLINT; + + if (ep93xx_dma_advance_active(edmac)) { + m2p_fill_desc(edmac); + control |= M2P_CONTROL_NFBINT; + } + + m2p_set_control(edmac, control); +} + +static int m2p_hw_interrupt(struct ep93xx_dma_chan *edmac) +{ + u32 irq_status = readl(edmac->regs + M2P_INTERRUPT); + u32 control; + + if (irq_status & M2P_INTERRUPT_ERROR) { + struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac); + + /* Clear the error interrupt */ + writel(1, edmac->regs + M2P_INTERRUPT); + + /* + * It seems that there is no easy way of reporting errors back + * to client so we just report the error here and continue as + * usual. + * + * Revisit this when there is a mechanism to report back the + * errors. + */ + dev_err(chan2dev(edmac), + "DMA transfer failed! Details:\n" + "\tcookie : %d\n" + "\tsrc_addr : 0x%08x\n" + "\tdst_addr : 0x%08x\n" + "\tsize : %zu\n", + desc->txd.cookie, desc->src_addr, desc->dst_addr, + desc->size); + } + + switch (irq_status & (M2P_INTERRUPT_STALL | M2P_INTERRUPT_NFB)) { + case M2P_INTERRUPT_STALL: + /* Disable interrupts */ + control = readl(edmac->regs + M2P_CONTROL); + control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT); + m2p_set_control(edmac, control); + + return INTERRUPT_DONE; + + case M2P_INTERRUPT_NFB: + if (ep93xx_dma_advance_active(edmac)) + m2p_fill_desc(edmac); + + return INTERRUPT_NEXT_BUFFER; + } + + return INTERRUPT_UNKNOWN; +} + +/* + * M2M DMA implementation + * + * For the M2M transfers we don't use NFB at all. This is because it simply + * doesn't work well with memcpy transfers. When you submit both buffers it is + * extremely unlikely that you get an NFB interrupt, but it instead reports + * DONE interrupt and both buffers are already transferred which means that we + * weren't able to update the next buffer. + * + * So for now we "simulate" NFB by just submitting buffer after buffer + * without double buffering. + */ + +static int m2m_hw_setup(struct ep93xx_dma_chan *edmac) +{ + const struct ep93xx_dma_data *data = edmac->chan.private; + u32 control = 0; + + if (!data) { + /* This is memcpy channel, nothing to configure */ + writel(control, edmac->regs + M2M_CONTROL); + return 0; + } + + switch (data->port) { + case EP93XX_DMA_SSP: + /* + * This was found via experimenting - anything less than 5 + * causes the channel to perform only a partial transfer which + * leads to problems since we don't get DONE interrupt then. + */ + control = (5 << M2M_CONTROL_PWSC_SHIFT); + control |= M2M_CONTROL_NO_HDSK; + + if (data->direction == DMA_TO_DEVICE) { + control |= M2M_CONTROL_DAH; + control |= M2M_CONTROL_TM_TX; + control |= M2M_CONTROL_RSS_SSPTX; + } else { + control |= M2M_CONTROL_SAH; + control |= M2M_CONTROL_TM_RX; + control |= M2M_CONTROL_RSS_SSPRX; + } + break; + + case EP93XX_DMA_IDE: + /* + * This IDE part is totally untested. Values below are taken + * from the EP93xx Users's Guide and might not be correct. + */ + control |= M2M_CONTROL_NO_HDSK; + control |= M2M_CONTROL_RSS_IDE; + control |= M2M_CONTROL_PW_16; + + if (data->direction == DMA_TO_DEVICE) { + /* Worst case from the UG */ + control = (3 << M2M_CONTROL_PWSC_SHIFT); + control |= M2M_CONTROL_DAH; + control |= M2M_CONTROL_TM_TX; + } else { + control = (2 << M2M_CONTROL_PWSC_SHIFT); + control |= M2M_CONTROL_SAH; + control |= M2M_CONTROL_TM_RX; + } + break; + + default: + return -EINVAL; + } + + writel(control, edmac->regs + M2M_CONTROL); + return 0; +} + +static void m2m_hw_shutdown(struct ep93xx_dma_chan *edmac) +{ + /* Just disable the channel */ + writel(0, edmac->regs + M2M_CONTROL); +} + +static void m2m_fill_desc(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac); + + if (edmac->buffer == 0) { + writel(desc->src_addr, edmac->regs + M2M_SAR_BASE0); + writel(desc->dst_addr, edmac->regs + M2M_DAR_BASE0); + writel(desc->size, edmac->regs + M2M_BCR0); + } else { + writel(desc->src_addr, edmac->regs + M2M_SAR_BASE1); + writel(desc->dst_addr, edmac->regs + M2M_DAR_BASE1); + writel(desc->size, edmac->regs + M2M_BCR1); + } + + edmac->buffer ^= 1; +} + +static void m2m_hw_submit(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_data *data = edmac->chan.private; + u32 control = readl(edmac->regs + M2M_CONTROL); + + /* + * Since we allow clients to configure PW (peripheral width) we always + * clear PW bits here and then set them according what is given in + * the runtime configuration. + */ + control &= ~M2M_CONTROL_PW_MASK; + control |= edmac->runtime_ctrl; + + m2m_fill_desc(edmac); + control |= M2M_CONTROL_DONEINT; + + /* + * Now we can finally enable the channel. For M2M channel this must be + * done _after_ the BCRx registers are programmed. + */ + control |= M2M_CONTROL_ENABLE; + writel(control, edmac->regs + M2M_CONTROL); + + if (!data) { + /* + * For memcpy channels the software trigger must be asserted + * in order to start the memcpy operation. + */ + control |= M2M_CONTROL_START; + writel(control, edmac->regs + M2M_CONTROL); + } +} + +static int m2m_hw_interrupt(struct ep93xx_dma_chan *edmac) +{ + u32 control; + + if (!(readl(edmac->regs + M2M_INTERRUPT) & M2M_INTERRUPT_DONEINT)) + return INTERRUPT_UNKNOWN; + + /* Clear the DONE bit */ + writel(0, edmac->regs + M2M_INTERRUPT); + + /* Disable interrupts and the channel */ + control = readl(edmac->regs + M2M_CONTROL); + control &= ~(M2M_CONTROL_DONEINT | M2M_CONTROL_ENABLE); + writel(control, edmac->regs + M2M_CONTROL); + + /* + * Since we only get DONE interrupt we have to find out ourselves + * whether there still is something to process. So we try to advance + * the chain an see whether it succeeds. + */ + if (ep93xx_dma_advance_active(edmac)) { + edmac->edma->hw_submit(edmac); + return INTERRUPT_NEXT_BUFFER; + } + + return INTERRUPT_DONE; +} + +/* + * DMA engine API implementation + */ + +static struct ep93xx_dma_desc * +ep93xx_dma_desc_get(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc, *_desc; + struct ep93xx_dma_desc *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + list_for_each_entry_safe(desc, _desc, &edmac->free_list, node) { + if (async_tx_test_ack(&desc->txd)) { + list_del_init(&desc->node); + + /* Re-initialize the descriptor */ + desc->src_addr = 0; + desc->dst_addr = 0; + desc->size = 0; + desc->complete = false; + desc->txd.cookie = 0; + desc->txd.callback = NULL; + desc->txd.callback_param = NULL; + + ret = desc; + break; + } + } + spin_unlock_irqrestore(&edmac->lock, flags); + return ret; +} + +static void ep93xx_dma_desc_put(struct ep93xx_dma_chan *edmac, + struct ep93xx_dma_desc *desc) +{ + if (desc) { + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + list_splice_init(&desc->tx_list, &edmac->free_list); + list_add(&desc->node, &edmac->free_list); + spin_unlock_irqrestore(&edmac->lock, flags); + } +} + +/** + * ep93xx_dma_advance_work - start processing the next pending transaction + * @edmac: channel + * + * If we have pending transactions queued and we are currently idling, this + * function takes the next queued transaction from the @edmac->queue and + * pushes it to the hardware for execution. + */ +static void ep93xx_dma_advance_work(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *new; + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + if (!list_empty(&edmac->active) || list_empty(&edmac->queue)) { + spin_unlock_irqrestore(&edmac->lock, flags); + return; + } + + /* Take the next descriptor from the pending queue */ + new = list_first_entry(&edmac->queue, struct ep93xx_dma_desc, node); + list_del_init(&new->node); + + ep93xx_dma_set_active(edmac, new); + + /* Push it to the hardware */ + edmac->edma->hw_submit(edmac); + spin_unlock_irqrestore(&edmac->lock, flags); +} + +static void ep93xx_dma_unmap_buffers(struct ep93xx_dma_desc *desc) +{ + struct device *dev = desc->txd.chan->device->dev; + + if (!(desc->txd.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (desc->txd.flags & DMA_COMPL_SRC_UNMAP_SINGLE) + dma_unmap_single(dev, desc->src_addr, desc->size, + DMA_TO_DEVICE); + else + dma_unmap_page(dev, desc->src_addr, desc->size, + DMA_TO_DEVICE); + } + if (!(desc->txd.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (desc->txd.flags & DMA_COMPL_DEST_UNMAP_SINGLE) + dma_unmap_single(dev, desc->dst_addr, desc->size, + DMA_FROM_DEVICE); + else + dma_unmap_page(dev, desc->dst_addr, desc->size, + DMA_FROM_DEVICE); + } +} + +static void ep93xx_dma_tasklet(unsigned long data) +{ + struct ep93xx_dma_chan *edmac = (struct ep93xx_dma_chan *)data; + struct ep93xx_dma_desc *desc, *d; + dma_async_tx_callback callback; + void *callback_param; + LIST_HEAD(list); + + spin_lock_irq(&edmac->lock); + desc = ep93xx_dma_get_active(edmac); + if (desc->complete) { + edmac->last_completed = desc->txd.cookie; + list_splice_init(&edmac->active, &list); + } + spin_unlock_irq(&edmac->lock); + + /* Pick up the next descriptor from the queue */ + ep93xx_dma_advance_work(edmac); + + callback = desc->txd.callback; + callback_param = desc->txd.callback_param; + + /* Now we can release all the chained descriptors */ + list_for_each_entry_safe(desc, d, &list, node) { + /* + * For the memcpy channels the API requires us to unmap the + * buffers unless requested otherwise. + */ + if (!edmac->chan.private) + ep93xx_dma_unmap_buffers(desc); + + ep93xx_dma_desc_put(edmac, desc); + } + + if (callback) + callback(callback_param); +} + +static irqreturn_t ep93xx_dma_interrupt(int irq, void *dev_id) +{ + struct ep93xx_dma_chan *edmac = dev_id; + irqreturn_t ret = IRQ_HANDLED; + + spin_lock(&edmac->lock); + + switch (edmac->edma->hw_interrupt(edmac)) { + case INTERRUPT_DONE: + ep93xx_dma_get_active(edmac)->complete = true; + tasklet_schedule(&edmac->tasklet); + break; + + case INTERRUPT_NEXT_BUFFER: + if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) + tasklet_schedule(&edmac->tasklet); + break; + + default: + dev_warn(chan2dev(edmac), "unknown interrupt!\n"); + ret = IRQ_NONE; + break; + } + + spin_unlock(&edmac->lock); + return ret; +} + +/** + * ep93xx_dma_tx_submit - set the prepared descriptor(s) to be executed + * @tx: descriptor to be executed + * + * Function will execute given descriptor on the hardware or if the hardware + * is busy, queue the descriptor to be executed later on. Returns cookie which + * can be used to poll the status of the descriptor. + */ +static dma_cookie_t ep93xx_dma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(tx->chan); + struct ep93xx_dma_desc *desc; + dma_cookie_t cookie; + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + + cookie = edmac->chan.cookie; + + if (++cookie < 0) + cookie = 1; + + desc = container_of(tx, struct ep93xx_dma_desc, txd); + + edmac->chan.cookie = cookie; + desc->txd.cookie = cookie; + + /* + * If nothing is currently prosessed, we push this descriptor + * directly to the hardware. Otherwise we put the descriptor + * to the pending queue. + */ + if (list_empty(&edmac->active)) { + ep93xx_dma_set_active(edmac, desc); + edmac->edma->hw_submit(edmac); + } else { + list_add_tail(&desc->node, &edmac->queue); + } + + spin_unlock_irqrestore(&edmac->lock, flags); + return cookie; +} + +/** + * ep93xx_dma_alloc_chan_resources - allocate resources for the channel + * @chan: channel to allocate resources + * + * Function allocates necessary resources for the given DMA channel and + * returns number of allocated descriptors for the channel. Negative errno + * is returned in case of failure. + */ +static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_data *data = chan->private; + const char *name = dma_chan_name(chan); + int ret, i; + + /* Sanity check the channel parameters */ + if (!edmac->edma->m2m) { + if (!data) + return -EINVAL; + if (data->port < EP93XX_DMA_I2S1 || + data->port > EP93XX_DMA_IRDA) + return -EINVAL; + if (data->direction != ep93xx_dma_chan_direction(chan)) + return -EINVAL; + } else { + if (data) { + switch (data->port) { + case EP93XX_DMA_SSP: + case EP93XX_DMA_IDE: + if (data->direction != DMA_TO_DEVICE && + data->direction != DMA_FROM_DEVICE) + return -EINVAL; + break; + default: + return -EINVAL; + } + } + } + + if (data && data->name) + name = data->name; + + ret = clk_enable(edmac->clk); + if (ret) + return ret; + + ret = request_irq(edmac->irq, ep93xx_dma_interrupt, 0, name, edmac); + if (ret) + goto fail_clk_disable; + + spin_lock_irq(&edmac->lock); + edmac->last_completed = 1; + edmac->chan.cookie = 1; + ret = edmac->edma->hw_setup(edmac); + spin_unlock_irq(&edmac->lock); + + if (ret) + goto fail_free_irq; + + for (i = 0; i < DMA_MAX_CHAN_DESCRIPTORS; i++) { + struct ep93xx_dma_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) { + dev_warn(chan2dev(edmac), "not enough descriptors\n"); + break; + } + + INIT_LIST_HEAD(&desc->tx_list); + + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.flags = DMA_CTRL_ACK; + desc->txd.tx_submit = ep93xx_dma_tx_submit; + + ep93xx_dma_desc_put(edmac, desc); + } + + return i; + +fail_free_irq: + free_irq(edmac->irq, edmac); +fail_clk_disable: + clk_disable(edmac->clk); + + return ret; +} + +/** + * ep93xx_dma_free_chan_resources - release resources for the channel + * @chan: channel + * + * Function releases all the resources allocated for the given channel. + * The channel must be idle when this is called. + */ +static void ep93xx_dma_free_chan_resources(struct dma_chan *chan) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *d; + unsigned long flags; + LIST_HEAD(list); + + BUG_ON(!list_empty(&edmac->active)); + BUG_ON(!list_empty(&edmac->queue)); + + spin_lock_irqsave(&edmac->lock, flags); + edmac->edma->hw_shutdown(edmac); + edmac->runtime_addr = 0; + edmac->runtime_ctrl = 0; + edmac->buffer = 0; + list_splice_init(&edmac->free_list, &list); + spin_unlock_irqrestore(&edmac->lock, flags); + + list_for_each_entry_safe(desc, d, &list, node) + kfree(desc); + + clk_disable(edmac->clk); + free_irq(edmac->irq, edmac); +} + +/** + * ep93xx_dma_prep_dma_memcpy - prepare a memcpy DMA operation + * @chan: channel + * @dest: destination bus address + * @src: source bus address + * @len: size of the transaction + * @flags: flags for the descriptor + * + * Returns a valid DMA descriptor or %NULL in case of failure. + */ +struct dma_async_tx_descriptor * +ep93xx_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *first; + size_t bytes, offset; + + first = NULL; + for (offset = 0; offset < len; offset += bytes) { + desc = ep93xx_dma_desc_get(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), "couln't get descriptor\n"); + goto fail; + } + + bytes = min_t(size_t, len - offset, DMA_MAX_CHAN_BYTES); + + desc->src_addr = src + offset; + desc->dst_addr = dest + offset; + desc->size = bytes; + + if (!first) + first = desc; + else + list_add_tail(&desc->node, &first->tx_list); + } + + first->txd.cookie = -EBUSY; + first->txd.flags = flags; + + return &first->txd; +fail: + ep93xx_dma_desc_put(edmac, first); + return NULL; +} + +/** + * ep93xx_dma_prep_slave_sg - prepare a slave DMA operation + * @chan: channel + * @sgl: list of buffers to transfer + * @sg_len: number of entries in @sgl + * @dir: direction of tha DMA transfer + * @flags: flags for the descriptor + * + * Returns a valid DMA descriptor or %NULL in case of failure. + */ +static struct dma_async_tx_descriptor * +ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_data_direction dir, + unsigned long flags) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *first; + struct scatterlist *sg; + int i; + + if (!edmac->edma->m2m && dir != ep93xx_dma_chan_direction(chan)) { + dev_warn(chan2dev(edmac), + "channel was configured with different direction\n"); + return NULL; + } + + if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) { + dev_warn(chan2dev(edmac), + "channel is already used for cyclic transfers\n"); + return NULL; + } + + first = NULL; + for_each_sg(sgl, sg, sg_len, i) { + size_t sg_len = sg_dma_len(sg); + + if (sg_len > DMA_MAX_CHAN_BYTES) { + dev_warn(chan2dev(edmac), "too big transfer size %d\n", + sg_len); + goto fail; + } + + desc = ep93xx_dma_desc_get(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), "couln't get descriptor\n"); + goto fail; + } + + if (dir == DMA_TO_DEVICE) { + desc->src_addr = sg_dma_address(sg); + desc->dst_addr = edmac->runtime_addr; + } else { + desc->src_addr = edmac->runtime_addr; + desc->dst_addr = sg_dma_address(sg); + } + desc->size = sg_len; + + if (!first) + first = desc; + else + list_add_tail(&desc->node, &first->tx_list); + } + + first->txd.cookie = -EBUSY; + first->txd.flags = flags; + + return &first->txd; + +fail: + ep93xx_dma_desc_put(edmac, first); + return NULL; +} + +/** + * ep93xx_dma_prep_dma_cyclic - prepare a cyclic DMA operation + * @chan: channel + * @dma_addr: DMA mapped address of the buffer + * @buf_len: length of the buffer (in bytes) + * @period_len: lenght of a single period + * @dir: direction of the operation + * + * Prepares a descriptor for cyclic DMA operation. This means that once the + * descriptor is submitted, we will be submitting in a @period_len sized + * buffers and calling callback once the period has been elapsed. Transfer + * terminates only when client calls dmaengine_terminate_all() for this + * channel. + * + * Returns a valid DMA descriptor or %NULL in case of failure. + */ +static struct dma_async_tx_descriptor * +ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr, + size_t buf_len, size_t period_len, + enum dma_data_direction dir) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct ep93xx_dma_desc *desc, *first; + size_t offset = 0; + + if (!edmac->edma->m2m && dir != ep93xx_dma_chan_direction(chan)) { + dev_warn(chan2dev(edmac), + "channel was configured with different direction\n"); + return NULL; + } + + if (test_and_set_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) { + dev_warn(chan2dev(edmac), + "channel is already used for cyclic transfers\n"); + return NULL; + } + + if (period_len > DMA_MAX_CHAN_BYTES) { + dev_warn(chan2dev(edmac), "too big period length %d\n", + period_len); + return NULL; + } + + /* Split the buffer into period size chunks */ + first = NULL; + for (offset = 0; offset < buf_len; offset += period_len) { + desc = ep93xx_dma_desc_get(edmac); + if (!desc) { + dev_warn(chan2dev(edmac), "couln't get descriptor\n"); + goto fail; + } + + if (dir == DMA_TO_DEVICE) { + desc->src_addr = dma_addr + offset; + desc->dst_addr = edmac->runtime_addr; + } else { + desc->src_addr = edmac->runtime_addr; + desc->dst_addr = dma_addr + offset; + } + + desc->size = period_len; + + if (!first) + first = desc; + else + list_add_tail(&desc->node, &first->tx_list); + } + + first->txd.cookie = -EBUSY; + + return &first->txd; + +fail: + ep93xx_dma_desc_put(edmac, first); + return NULL; +} + +/** + * ep93xx_dma_terminate_all - terminate all transactions + * @edmac: channel + * + * Stops all DMA transactions. All descriptors are put back to the + * @edmac->free_list and callbacks are _not_ called. + */ +static int ep93xx_dma_terminate_all(struct ep93xx_dma_chan *edmac) +{ + struct ep93xx_dma_desc *desc, *_d; + unsigned long flags; + LIST_HEAD(list); + + spin_lock_irqsave(&edmac->lock, flags); + /* First we disable and flush the DMA channel */ + edmac->edma->hw_shutdown(edmac); + clear_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags); + list_splice_init(&edmac->active, &list); + list_splice_init(&edmac->queue, &list); + /* + * We then re-enable the channel. This way we can continue submitting + * the descriptors by just calling ->hw_submit() again. + */ + edmac->edma->hw_setup(edmac); + spin_unlock_irqrestore(&edmac->lock, flags); + + list_for_each_entry_safe(desc, _d, &list, node) + ep93xx_dma_desc_put(edmac, desc); + + return 0; +} + +static int ep93xx_dma_slave_config(struct ep93xx_dma_chan *edmac, + struct dma_slave_config *config) +{ + enum dma_slave_buswidth width; + unsigned long flags; + u32 addr, ctrl; + + if (!edmac->edma->m2m) + return -EINVAL; + + switch (config->direction) { + case DMA_FROM_DEVICE: + width = config->src_addr_width; + addr = config->src_addr; + break; + + case DMA_TO_DEVICE: + width = config->dst_addr_width; + addr = config->dst_addr; + break; + + default: + return -EINVAL; + } + + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + ctrl = 0; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + ctrl = M2M_CONTROL_PW_16; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + ctrl = M2M_CONTROL_PW_32; + break; + default: + return -EINVAL; + } + + spin_lock_irqsave(&edmac->lock, flags); + edmac->runtime_addr = addr; + edmac->runtime_ctrl = ctrl; + spin_unlock_irqrestore(&edmac->lock, flags); + + return 0; +} + +/** + * ep93xx_dma_control - manipulate all pending operations on a channel + * @chan: channel + * @cmd: control command to perform + * @arg: optional argument + * + * Controls the channel. Function returns %0 in case of success or negative + * error in case of failure. + */ +static int ep93xx_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + struct dma_slave_config *config; + + switch (cmd) { + case DMA_TERMINATE_ALL: + return ep93xx_dma_terminate_all(edmac); + + case DMA_SLAVE_CONFIG: + config = (struct dma_slave_config *)arg; + return ep93xx_dma_slave_config(edmac, config); + + default: + break; + } + + return -ENOSYS; +} + +/** + * ep93xx_dma_tx_status - check if a transaction is completed + * @chan: channel + * @cookie: transaction specific cookie + * @state: state of the transaction is stored here if given + * + * This function can be used to query state of a given transaction. + */ +static enum dma_status ep93xx_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + dma_cookie_t last_used, last_completed; + enum dma_status ret; + unsigned long flags; + + spin_lock_irqsave(&edmac->lock, flags); + last_used = chan->cookie; + last_completed = edmac->last_completed; + spin_unlock_irqrestore(&edmac->lock, flags); + + ret = dma_async_is_complete(cookie, last_completed, last_used); + dma_set_tx_state(state, last_completed, last_used, 0); + + return ret; +} + +/** + * ep93xx_dma_issue_pending - push pending transactions to the hardware + * @chan: channel + * + * When this function is called, all pending transactions are pushed to the + * hardware and executed. + */ +static void ep93xx_dma_issue_pending(struct dma_chan *chan) +{ + ep93xx_dma_advance_work(to_ep93xx_dma_chan(chan)); +} + +static int __init ep93xx_dma_probe(struct platform_device *pdev) +{ + struct ep93xx_dma_platform_data *pdata = dev_get_platdata(&pdev->dev); + struct ep93xx_dma_engine *edma; + struct dma_device *dma_dev; + size_t edma_size; + int ret, i; + + edma_size = pdata->num_channels * sizeof(struct ep93xx_dma_chan); + edma = kzalloc(sizeof(*edma) + edma_size, GFP_KERNEL); + if (!edma) + return -ENOMEM; + + dma_dev = &edma->dma_dev; + edma->m2m = platform_get_device_id(pdev)->driver_data; + edma->num_channels = pdata->num_channels; + + INIT_LIST_HEAD(&dma_dev->channels); + for (i = 0; i < pdata->num_channels; i++) { + const struct ep93xx_dma_chan_data *cdata = &pdata->channels[i]; + struct ep93xx_dma_chan *edmac = &edma->channels[i]; + + edmac->chan.device = dma_dev; + edmac->regs = cdata->base; + edmac->irq = cdata->irq; + edmac->edma = edma; + + edmac->clk = clk_get(NULL, cdata->name); + if (IS_ERR(edmac->clk)) { + dev_warn(&pdev->dev, "failed to get clock for %s\n", + cdata->name); + continue; + } + + spin_lock_init(&edmac->lock); + INIT_LIST_HEAD(&edmac->active); + INIT_LIST_HEAD(&edmac->queue); + INIT_LIST_HEAD(&edmac->free_list); + tasklet_init(&edmac->tasklet, ep93xx_dma_tasklet, + (unsigned long)edmac); + + list_add_tail(&edmac->chan.device_node, + &dma_dev->channels); + } + + dma_cap_zero(dma_dev->cap_mask); + dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); + dma_cap_set(DMA_CYCLIC, dma_dev->cap_mask); + + dma_dev->dev = &pdev->dev; + dma_dev->device_alloc_chan_resources = ep93xx_dma_alloc_chan_resources; + dma_dev->device_free_chan_resources = ep93xx_dma_free_chan_resources; + dma_dev->device_prep_slave_sg = ep93xx_dma_prep_slave_sg; + dma_dev->device_prep_dma_cyclic = ep93xx_dma_prep_dma_cyclic; + dma_dev->device_control = ep93xx_dma_control; + dma_dev->device_issue_pending = ep93xx_dma_issue_pending; + dma_dev->device_tx_status = ep93xx_dma_tx_status; + + dma_set_max_seg_size(dma_dev->dev, DMA_MAX_CHAN_BYTES); + + if (edma->m2m) { + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_dev->device_prep_dma_memcpy = ep93xx_dma_prep_dma_memcpy; + + edma->hw_setup = m2m_hw_setup; + edma->hw_shutdown = m2m_hw_shutdown; + edma->hw_submit = m2m_hw_submit; + edma->hw_interrupt = m2m_hw_interrupt; + } else { + dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); + + edma->hw_setup = m2p_hw_setup; + edma->hw_shutdown = m2p_hw_shutdown; + edma->hw_submit = m2p_hw_submit; + edma->hw_interrupt = m2p_hw_interrupt; + } + + ret = dma_async_device_register(dma_dev); + if (unlikely(ret)) { + for (i = 0; i < edma->num_channels; i++) { + struct ep93xx_dma_chan *edmac = &edma->channels[i]; + if (!IS_ERR_OR_NULL(edmac->clk)) + clk_put(edmac->clk); + } + kfree(edma); + } else { + dev_info(dma_dev->dev, "EP93xx M2%s DMA ready\n", + edma->m2m ? "M" : "P"); + } + + return ret; +} + +static struct platform_device_id ep93xx_dma_driver_ids[] = { + { "ep93xx-dma-m2p", 0 }, + { "ep93xx-dma-m2m", 1 }, + { }, +}; + +static struct platform_driver ep93xx_dma_driver = { + .driver = { + .name = "ep93xx-dma", + }, + .id_table = ep93xx_dma_driver_ids, +}; + +static int __init ep93xx_dma_module_init(void) +{ + return platform_driver_probe(&ep93xx_dma_driver, ep93xx_dma_probe); +} +subsys_initcall(ep93xx_dma_module_init); + +MODULE_AUTHOR("Mika Westerberg "); +MODULE_DESCRIPTION("EP93xx DMA driver"); +MODULE_LICENSE("GPL"); -- cgit v1.1 From f911d026e84a137e35701a4f23732f47ce40a6b8 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Sun, 29 May 2011 13:10:03 +0300 Subject: ep93xx: add dmaengine platform code Add platform support code for the new EP93xx dmaengine driver. Signed-off-by: Mika Westerberg Signed-off-by: Ryan Mallon Acked-by: H Hartley Sweeten Signed-off-by: Vinod Koul --- arch/arm/mach-ep93xx/Makefile | 2 + arch/arm/mach-ep93xx/dma.c | 108 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 arch/arm/mach-ep93xx/dma.c diff --git a/arch/arm/mach-ep93xx/Makefile b/arch/arm/mach-ep93xx/Makefile index 33ee2c8..4920f7a 100644 --- a/arch/arm/mach-ep93xx/Makefile +++ b/arch/arm/mach-ep93xx/Makefile @@ -6,6 +6,8 @@ obj-m := obj-n := obj- := +obj-$(CONFIG_EP93XX_DMA) += dma.o + obj-$(CONFIG_MACH_ADSSPHERE) += adssphere.o obj-$(CONFIG_MACH_EDB93XX) += edb93xx.o obj-$(CONFIG_MACH_GESBC9312) += gesbc9312.o diff --git a/arch/arm/mach-ep93xx/dma.c b/arch/arm/mach-ep93xx/dma.c new file mode 100644 index 0000000..5a25708 --- /dev/null +++ b/arch/arm/mach-ep93xx/dma.c @@ -0,0 +1,108 @@ +/* + * arch/arm/mach-ep93xx/dma.c + * + * Platform support code for the EP93xx dmaengine driver. + * + * Copyright (C) 2011 Mika Westerberg + * + * This work is based on the original dma-m2p implementation with + * following copyrights: + * + * Copyright (C) 2006 Lennert Buytenhek + * Copyright (C) 2006 Applied Data Systems + * Copyright (C) 2009 Ryan Mallon + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#define DMA_CHANNEL(_name, _base, _irq) \ + { .name = (_name), .base = (_base), .irq = (_irq) } + +/* + * DMA M2P channels. + * + * On the EP93xx chip the following peripherals my be allocated to the 10 + * Memory to Internal Peripheral (M2P) channels (5 transmit + 5 receive). + * + * I2S contains 3 Tx and 3 Rx DMA Channels + * AAC contains 3 Tx and 3 Rx DMA Channels + * UART1 contains 1 Tx and 1 Rx DMA Channels + * UART2 contains 1 Tx and 1 Rx DMA Channels + * UART3 contains 1 Tx and 1 Rx DMA Channels + * IrDA contains 1 Tx and 1 Rx DMA Channels + * + * Registers are mapped statically in ep93xx_map_io(). + */ +static struct ep93xx_dma_chan_data ep93xx_dma_m2p_channels[] = { + DMA_CHANNEL("m2p0", EP93XX_DMA_BASE + 0x0000, IRQ_EP93XX_DMAM2P0), + DMA_CHANNEL("m2p1", EP93XX_DMA_BASE + 0x0040, IRQ_EP93XX_DMAM2P1), + DMA_CHANNEL("m2p2", EP93XX_DMA_BASE + 0x0080, IRQ_EP93XX_DMAM2P2), + DMA_CHANNEL("m2p3", EP93XX_DMA_BASE + 0x00c0, IRQ_EP93XX_DMAM2P3), + DMA_CHANNEL("m2p4", EP93XX_DMA_BASE + 0x0240, IRQ_EP93XX_DMAM2P4), + DMA_CHANNEL("m2p5", EP93XX_DMA_BASE + 0x0200, IRQ_EP93XX_DMAM2P5), + DMA_CHANNEL("m2p6", EP93XX_DMA_BASE + 0x02c0, IRQ_EP93XX_DMAM2P6), + DMA_CHANNEL("m2p7", EP93XX_DMA_BASE + 0x0280, IRQ_EP93XX_DMAM2P7), + DMA_CHANNEL("m2p8", EP93XX_DMA_BASE + 0x0340, IRQ_EP93XX_DMAM2P8), + DMA_CHANNEL("m2p9", EP93XX_DMA_BASE + 0x0300, IRQ_EP93XX_DMAM2P9), +}; + +static struct ep93xx_dma_platform_data ep93xx_dma_m2p_data = { + .channels = ep93xx_dma_m2p_channels, + .num_channels = ARRAY_SIZE(ep93xx_dma_m2p_channels), +}; + +static struct platform_device ep93xx_dma_m2p_device = { + .name = "ep93xx-dma-m2p", + .id = -1, + .dev = { + .platform_data = &ep93xx_dma_m2p_data, + }, +}; + +/* + * DMA M2M channels. + * + * There are 2 M2M channels which support memcpy/memset and in addition simple + * hardware requests from/to SSP and IDE. We do not implement an external + * hardware requests. + * + * Registers are mapped statically in ep93xx_map_io(). + */ +static struct ep93xx_dma_chan_data ep93xx_dma_m2m_channels[] = { + DMA_CHANNEL("m2m0", EP93XX_DMA_BASE + 0x0100, IRQ_EP93XX_DMAM2M0), + DMA_CHANNEL("m2m1", EP93XX_DMA_BASE + 0x0140, IRQ_EP93XX_DMAM2M1), +}; + +static struct ep93xx_dma_platform_data ep93xx_dma_m2m_data = { + .channels = ep93xx_dma_m2m_channels, + .num_channels = ARRAY_SIZE(ep93xx_dma_m2m_channels), +}; + +static struct platform_device ep93xx_dma_m2m_device = { + .name = "ep93xx-dma-m2m", + .id = -1, + .dev = { + .platform_data = &ep93xx_dma_m2m_data, + }, +}; + +static int __init ep93xx_dma_init(void) +{ + platform_device_register(&ep93xx_dma_m2p_device); + platform_device_register(&ep93xx_dma_m2m_device); + return 0; +} +arch_initcall(ep93xx_dma_init); -- cgit v1.1 From a103fc67c612bfc0f6388885fea7244967afaad4 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Sun, 29 May 2011 13:10:04 +0300 Subject: ASoC: ep93xx: convert to use the DMA engine API Now that we have the EP93xx DMA engine driver in place, we convert the ASoC drivers (I2S, AC97 and PCM) to take advantage of this new API. There are no functional changes. Signed-off-by: Mika Westerberg Acked-by: H Hartley Sweeten Acked-by: Liam Girdwood Acked-by: Mark Brown Signed-off-by: Vinod Koul --- sound/soc/ep93xx/ep93xx-ac97.c | 4 +- sound/soc/ep93xx/ep93xx-i2s.c | 4 +- sound/soc/ep93xx/ep93xx-pcm.c | 137 +++++++++++++++++++++++------------------ 3 files changed, 81 insertions(+), 64 deletions(-) diff --git a/sound/soc/ep93xx/ep93xx-ac97.c b/sound/soc/ep93xx/ep93xx-ac97.c index 104e95c..c7417c7 100644 --- a/sound/soc/ep93xx/ep93xx-ac97.c +++ b/sound/soc/ep93xx/ep93xx-ac97.c @@ -106,12 +106,12 @@ static struct ep93xx_ac97_info *ep93xx_ac97_info; static struct ep93xx_pcm_dma_params ep93xx_ac97_pcm_out = { .name = "ac97-pcm-out", - .dma_port = EP93XX_DMA_M2P_PORT_AAC1, + .dma_port = EP93XX_DMA_AAC1, }; static struct ep93xx_pcm_dma_params ep93xx_ac97_pcm_in = { .name = "ac97-pcm-in", - .dma_port = EP93XX_DMA_M2P_PORT_AAC1, + .dma_port = EP93XX_DMA_AAC1, }; static inline unsigned ep93xx_ac97_read_reg(struct ep93xx_ac97_info *info, diff --git a/sound/soc/ep93xx/ep93xx-i2s.c b/sound/soc/ep93xx/ep93xx-i2s.c index 042f4e9..30df425 100644 --- a/sound/soc/ep93xx/ep93xx-i2s.c +++ b/sound/soc/ep93xx/ep93xx-i2s.c @@ -70,11 +70,11 @@ struct ep93xx_i2s_info { struct ep93xx_pcm_dma_params ep93xx_i2s_dma_params[] = { [SNDRV_PCM_STREAM_PLAYBACK] = { .name = "i2s-pcm-out", - .dma_port = EP93XX_DMA_M2P_PORT_I2S1, + .dma_port = EP93XX_DMA_I2S1, }, [SNDRV_PCM_STREAM_CAPTURE] = { .name = "i2s-pcm-in", - .dma_port = EP93XX_DMA_M2P_PORT_I2S1, + .dma_port = EP93XX_DMA_I2S1, }, }; diff --git a/sound/soc/ep93xx/ep93xx-pcm.c b/sound/soc/ep93xx/ep93xx-pcm.c index a456e49..a07f99c 100644 --- a/sound/soc/ep93xx/ep93xx-pcm.c +++ b/sound/soc/ep93xx/ep93xx-pcm.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -53,43 +54,34 @@ static const struct snd_pcm_hardware ep93xx_pcm_hardware = { struct ep93xx_runtime_data { - struct ep93xx_dma_m2p_client cl; - struct ep93xx_pcm_dma_params *params; int pointer_bytes; - struct tasklet_struct period_tasklet; int periods; - struct ep93xx_dma_buffer buf[32]; + int period_bytes; + struct dma_chan *dma_chan; + struct ep93xx_dma_data dma_data; }; -static void ep93xx_pcm_period_elapsed(unsigned long data) +static void ep93xx_pcm_dma_callback(void *data) { - struct snd_pcm_substream *substream = (struct snd_pcm_substream *)data; - snd_pcm_period_elapsed(substream); -} + struct snd_pcm_substream *substream = data; + struct ep93xx_runtime_data *rtd = substream->runtime->private_data; -static void ep93xx_pcm_buffer_started(void *cookie, - struct ep93xx_dma_buffer *buf) -{ + rtd->pointer_bytes += rtd->period_bytes; + rtd->pointer_bytes %= rtd->period_bytes * rtd->periods; + + snd_pcm_period_elapsed(substream); } -static void ep93xx_pcm_buffer_finished(void *cookie, - struct ep93xx_dma_buffer *buf, - int bytes, int error) +static bool ep93xx_pcm_dma_filter(struct dma_chan *chan, void *filter_param) { - struct snd_pcm_substream *substream = cookie; - struct ep93xx_runtime_data *rtd = substream->runtime->private_data; - - if (buf == rtd->buf + rtd->periods - 1) - rtd->pointer_bytes = 0; - else - rtd->pointer_bytes += buf->size; + struct ep93xx_dma_data *data = filter_param; - if (!error) { - ep93xx_dma_m2p_submit_recursive(&rtd->cl, buf); - tasklet_schedule(&rtd->period_tasklet); - } else { - snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN); + if (data->direction == ep93xx_dma_chan_direction(chan)) { + chan->private = data; + return true; } + + return false; } static int ep93xx_pcm_open(struct snd_pcm_substream *substream) @@ -98,30 +90,38 @@ static int ep93xx_pcm_open(struct snd_pcm_substream *substream) struct snd_soc_dai *cpu_dai = soc_rtd->cpu_dai; struct ep93xx_pcm_dma_params *dma_params; struct ep93xx_runtime_data *rtd; + dma_cap_mask_t mask; int ret; - dma_params = snd_soc_dai_get_dma_data(cpu_dai, substream); + ret = snd_pcm_hw_constraint_integer(substream->runtime, + SNDRV_PCM_HW_PARAM_PERIODS); + if (ret < 0) + return ret; + snd_soc_set_runtime_hwparams(substream, &ep93xx_pcm_hardware); rtd = kmalloc(sizeof(*rtd), GFP_KERNEL); if (!rtd) return -ENOMEM; - memset(&rtd->period_tasklet, 0, sizeof(rtd->period_tasklet)); - rtd->period_tasklet.func = ep93xx_pcm_period_elapsed; - rtd->period_tasklet.data = (unsigned long)substream; - - rtd->cl.name = dma_params->name; - rtd->cl.flags = dma_params->dma_port | EP93XX_DMA_M2P_IGNORE_ERROR | - ((substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ? - EP93XX_DMA_M2P_TX : EP93XX_DMA_M2P_RX); - rtd->cl.cookie = substream; - rtd->cl.buffer_started = ep93xx_pcm_buffer_started; - rtd->cl.buffer_finished = ep93xx_pcm_buffer_finished; - ret = ep93xx_dma_m2p_client_register(&rtd->cl); - if (ret < 0) { + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + dma_cap_set(DMA_CYCLIC, mask); + + dma_params = snd_soc_dai_get_dma_data(cpu_dai, substream); + rtd->dma_data.port = dma_params->dma_port; + rtd->dma_data.name = dma_params->name; + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + rtd->dma_data.direction = DMA_TO_DEVICE; + else + rtd->dma_data.direction = DMA_FROM_DEVICE; + + rtd->dma_chan = dma_request_channel(mask, ep93xx_pcm_dma_filter, + &rtd->dma_data); + if (!rtd->dma_chan) { kfree(rtd); - return ret; + return -EINVAL; } substream->runtime->private_data = rtd; @@ -132,31 +132,52 @@ static int ep93xx_pcm_close(struct snd_pcm_substream *substream) { struct ep93xx_runtime_data *rtd = substream->runtime->private_data; - ep93xx_dma_m2p_client_unregister(&rtd->cl); + dma_release_channel(rtd->dma_chan); kfree(rtd); return 0; } +static int ep93xx_pcm_dma_submit(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct ep93xx_runtime_data *rtd = runtime->private_data; + struct dma_chan *chan = rtd->dma_chan; + struct dma_device *dma_dev = chan->device; + struct dma_async_tx_descriptor *desc; + + rtd->pointer_bytes = 0; + desc = dma_dev->device_prep_dma_cyclic(chan, runtime->dma_addr, + rtd->period_bytes * rtd->periods, + rtd->period_bytes, + rtd->dma_data.direction); + if (!desc) + return -EINVAL; + + desc->callback = ep93xx_pcm_dma_callback; + desc->callback_param = substream; + + dmaengine_submit(desc); + return 0; +} + +static void ep93xx_pcm_dma_flush(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct ep93xx_runtime_data *rtd = runtime->private_data; + + dmaengine_terminate_all(rtd->dma_chan); +} + static int ep93xx_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { struct snd_pcm_runtime *runtime = substream->runtime; struct ep93xx_runtime_data *rtd = runtime->private_data; - size_t totsize = params_buffer_bytes(params); - size_t period = params_period_bytes(params); - int i; snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer); - runtime->dma_bytes = totsize; - - rtd->periods = (totsize + period - 1) / period; - for (i = 0; i < rtd->periods; i++) { - rtd->buf[i].bus_addr = runtime->dma_addr + (i * period); - rtd->buf[i].size = period; - if ((i + 1) * period > totsize) - rtd->buf[i].size = totsize - (i * period); - } + rtd->periods = params_periods(params); + rtd->period_bytes = params_period_bytes(params); return 0; } @@ -168,24 +189,20 @@ static int ep93xx_pcm_hw_free(struct snd_pcm_substream *substream) static int ep93xx_pcm_trigger(struct snd_pcm_substream *substream, int cmd) { - struct ep93xx_runtime_data *rtd = substream->runtime->private_data; int ret; - int i; ret = 0; switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - rtd->pointer_bytes = 0; - for (i = 0; i < rtd->periods; i++) - ep93xx_dma_m2p_submit(&rtd->cl, rtd->buf + i); + ret = ep93xx_pcm_dma_submit(substream); break; case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - ep93xx_dma_m2p_flush(&rtd->cl); + ep93xx_pcm_dma_flush(substream); break; default: -- cgit v1.1 From 8e4a93008db7780e45838fe65840b289f389ef4a Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Sun, 29 May 2011 13:10:05 +0300 Subject: ep93xx: remove the old M2P DMA code Since we have converted all existing users of the old DMA API to use the DMA engine API the old code can be dropped. Signed-off-by: Mika Westerberg Acked-by: Ryan Mallon Acked-by: H Hartley Sweeten Signed-off-by: Vinod Koul --- arch/arm/mach-ep93xx/Makefile | 2 +- arch/arm/mach-ep93xx/dma-m2p.c | 411 -------------------------------- arch/arm/mach-ep93xx/include/mach/dma.h | 143 ----------- 3 files changed, 1 insertion(+), 555 deletions(-) delete mode 100644 arch/arm/mach-ep93xx/dma-m2p.c diff --git a/arch/arm/mach-ep93xx/Makefile b/arch/arm/mach-ep93xx/Makefile index 4920f7a..21e721a 100644 --- a/arch/arm/mach-ep93xx/Makefile +++ b/arch/arm/mach-ep93xx/Makefile @@ -1,7 +1,7 @@ # # Makefile for the linux kernel. # -obj-y := core.o clock.o dma-m2p.o gpio.o +obj-y := core.o clock.o gpio.o obj-m := obj-n := obj- := diff --git a/arch/arm/mach-ep93xx/dma-m2p.c b/arch/arm/mach-ep93xx/dma-m2p.c deleted file mode 100644 index a696d35..0000000 --- a/arch/arm/mach-ep93xx/dma-m2p.c +++ /dev/null @@ -1,411 +0,0 @@ -/* - * arch/arm/mach-ep93xx/dma-m2p.c - * M2P DMA handling for Cirrus EP93xx chips. - * - * Copyright (C) 2006 Lennert Buytenhek - * Copyright (C) 2006 Applied Data Systems - * - * Copyright (C) 2009 Ryan Mallon - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - */ - -/* - * On the EP93xx chip the following peripherals my be allocated to the 10 - * Memory to Internal Peripheral (M2P) channels (5 transmit + 5 receive). - * - * I2S contains 3 Tx and 3 Rx DMA Channels - * AAC contains 3 Tx and 3 Rx DMA Channels - * UART1 contains 1 Tx and 1 Rx DMA Channels - * UART2 contains 1 Tx and 1 Rx DMA Channels - * UART3 contains 1 Tx and 1 Rx DMA Channels - * IrDA contains 1 Tx and 1 Rx DMA Channels - * - * SSP and IDE use the Memory to Memory (M2M) channels and are not covered - * with this implementation. - */ - -#define pr_fmt(fmt) "ep93xx " KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include - -#include -#include - -#define M2P_CONTROL 0x00 -#define M2P_CONTROL_STALL_IRQ_EN (1 << 0) -#define M2P_CONTROL_NFB_IRQ_EN (1 << 1) -#define M2P_CONTROL_ERROR_IRQ_EN (1 << 3) -#define M2P_CONTROL_ENABLE (1 << 4) -#define M2P_INTERRUPT 0x04 -#define M2P_INTERRUPT_STALL (1 << 0) -#define M2P_INTERRUPT_NFB (1 << 1) -#define M2P_INTERRUPT_ERROR (1 << 3) -#define M2P_PPALLOC 0x08 -#define M2P_STATUS 0x0c -#define M2P_REMAIN 0x14 -#define M2P_MAXCNT0 0x20 -#define M2P_BASE0 0x24 -#define M2P_MAXCNT1 0x30 -#define M2P_BASE1 0x34 - -#define STATE_IDLE 0 /* Channel is inactive. */ -#define STATE_STALL 1 /* Channel is active, no buffers pending. */ -#define STATE_ON 2 /* Channel is active, one buffer pending. */ -#define STATE_NEXT 3 /* Channel is active, two buffers pending. */ - -struct m2p_channel { - char *name; - void __iomem *base; - int irq; - - struct clk *clk; - spinlock_t lock; - - void *client; - unsigned next_slot:1; - struct ep93xx_dma_buffer *buffer_xfer; - struct ep93xx_dma_buffer *buffer_next; - struct list_head buffers_pending; -}; - -static struct m2p_channel m2p_rx[] = { - {"m2p1", EP93XX_DMA_BASE + 0x0040, IRQ_EP93XX_DMAM2P1}, - {"m2p3", EP93XX_DMA_BASE + 0x00c0, IRQ_EP93XX_DMAM2P3}, - {"m2p5", EP93XX_DMA_BASE + 0x0200, IRQ_EP93XX_DMAM2P5}, - {"m2p7", EP93XX_DMA_BASE + 0x0280, IRQ_EP93XX_DMAM2P7}, - {"m2p9", EP93XX_DMA_BASE + 0x0300, IRQ_EP93XX_DMAM2P9}, - {NULL}, -}; - -static struct m2p_channel m2p_tx[] = { - {"m2p0", EP93XX_DMA_BASE + 0x0000, IRQ_EP93XX_DMAM2P0}, - {"m2p2", EP93XX_DMA_BASE + 0x0080, IRQ_EP93XX_DMAM2P2}, - {"m2p4", EP93XX_DMA_BASE + 0x0240, IRQ_EP93XX_DMAM2P4}, - {"m2p6", EP93XX_DMA_BASE + 0x02c0, IRQ_EP93XX_DMAM2P6}, - {"m2p8", EP93XX_DMA_BASE + 0x0340, IRQ_EP93XX_DMAM2P8}, - {NULL}, -}; - -static void feed_buf(struct m2p_channel *ch, struct ep93xx_dma_buffer *buf) -{ - if (ch->next_slot == 0) { - writel(buf->size, ch->base + M2P_MAXCNT0); - writel(buf->bus_addr, ch->base + M2P_BASE0); - } else { - writel(buf->size, ch->base + M2P_MAXCNT1); - writel(buf->bus_addr, ch->base + M2P_BASE1); - } - ch->next_slot ^= 1; -} - -static void choose_buffer_xfer(struct m2p_channel *ch) -{ - struct ep93xx_dma_buffer *buf; - - ch->buffer_xfer = NULL; - if (!list_empty(&ch->buffers_pending)) { - buf = list_entry(ch->buffers_pending.next, - struct ep93xx_dma_buffer, list); - list_del(&buf->list); - feed_buf(ch, buf); - ch->buffer_xfer = buf; - } -} - -static void choose_buffer_next(struct m2p_channel *ch) -{ - struct ep93xx_dma_buffer *buf; - - ch->buffer_next = NULL; - if (!list_empty(&ch->buffers_pending)) { - buf = list_entry(ch->buffers_pending.next, - struct ep93xx_dma_buffer, list); - list_del(&buf->list); - feed_buf(ch, buf); - ch->buffer_next = buf; - } -} - -static inline void m2p_set_control(struct m2p_channel *ch, u32 v) -{ - /* - * The control register must be read immediately after being written so - * that the internal state machine is correctly updated. See the ep93xx - * users' guide for details. - */ - writel(v, ch->base + M2P_CONTROL); - readl(ch->base + M2P_CONTROL); -} - -static inline int m2p_channel_state(struct m2p_channel *ch) -{ - return (readl(ch->base + M2P_STATUS) >> 4) & 0x3; -} - -static irqreturn_t m2p_irq(int irq, void *dev_id) -{ - struct m2p_channel *ch = dev_id; - struct ep93xx_dma_m2p_client *cl; - u32 irq_status, v; - int error = 0; - - cl = ch->client; - - spin_lock(&ch->lock); - irq_status = readl(ch->base + M2P_INTERRUPT); - - if (irq_status & M2P_INTERRUPT_ERROR) { - writel(M2P_INTERRUPT_ERROR, ch->base + M2P_INTERRUPT); - error = 1; - } - - if ((irq_status & (M2P_INTERRUPT_STALL | M2P_INTERRUPT_NFB)) == 0) { - spin_unlock(&ch->lock); - return IRQ_NONE; - } - - switch (m2p_channel_state(ch)) { - case STATE_IDLE: - pr_crit("dma interrupt without a dma buffer\n"); - BUG(); - break; - - case STATE_STALL: - cl->buffer_finished(cl->cookie, ch->buffer_xfer, 0, error); - if (ch->buffer_next != NULL) { - cl->buffer_finished(cl->cookie, ch->buffer_next, - 0, error); - } - choose_buffer_xfer(ch); - choose_buffer_next(ch); - if (ch->buffer_xfer != NULL) - cl->buffer_started(cl->cookie, ch->buffer_xfer); - break; - - case STATE_ON: - cl->buffer_finished(cl->cookie, ch->buffer_xfer, 0, error); - ch->buffer_xfer = ch->buffer_next; - choose_buffer_next(ch); - cl->buffer_started(cl->cookie, ch->buffer_xfer); - break; - - case STATE_NEXT: - pr_crit("dma interrupt while next\n"); - BUG(); - break; - } - - v = readl(ch->base + M2P_CONTROL) & ~(M2P_CONTROL_STALL_IRQ_EN | - M2P_CONTROL_NFB_IRQ_EN); - if (ch->buffer_xfer != NULL) - v |= M2P_CONTROL_STALL_IRQ_EN; - if (ch->buffer_next != NULL) - v |= M2P_CONTROL_NFB_IRQ_EN; - m2p_set_control(ch, v); - - spin_unlock(&ch->lock); - return IRQ_HANDLED; -} - -static struct m2p_channel *find_free_channel(struct ep93xx_dma_m2p_client *cl) -{ - struct m2p_channel *ch; - int i; - - if (cl->flags & EP93XX_DMA_M2P_RX) - ch = m2p_rx; - else - ch = m2p_tx; - - for (i = 0; ch[i].base; i++) { - struct ep93xx_dma_m2p_client *client; - - client = ch[i].client; - if (client != NULL) { - int port; - - port = cl->flags & EP93XX_DMA_M2P_PORT_MASK; - if (port == (client->flags & - EP93XX_DMA_M2P_PORT_MASK)) { - pr_warning("DMA channel already used by %s\n", - cl->name ? : "unknown client"); - return ERR_PTR(-EBUSY); - } - } - } - - for (i = 0; ch[i].base; i++) { - if (ch[i].client == NULL) - return ch + i; - } - - pr_warning("No free DMA channel for %s\n", - cl->name ? : "unknown client"); - return ERR_PTR(-ENODEV); -} - -static void channel_enable(struct m2p_channel *ch) -{ - struct ep93xx_dma_m2p_client *cl = ch->client; - u32 v; - - clk_enable(ch->clk); - - v = cl->flags & EP93XX_DMA_M2P_PORT_MASK; - writel(v, ch->base + M2P_PPALLOC); - - v = cl->flags & EP93XX_DMA_M2P_ERROR_MASK; - v |= M2P_CONTROL_ENABLE | M2P_CONTROL_ERROR_IRQ_EN; - m2p_set_control(ch, v); -} - -static void channel_disable(struct m2p_channel *ch) -{ - u32 v; - - v = readl(ch->base + M2P_CONTROL); - v &= ~(M2P_CONTROL_STALL_IRQ_EN | M2P_CONTROL_NFB_IRQ_EN); - m2p_set_control(ch, v); - - while (m2p_channel_state(ch) >= STATE_ON) - cpu_relax(); - - m2p_set_control(ch, 0x0); - - while (m2p_channel_state(ch) == STATE_STALL) - cpu_relax(); - - clk_disable(ch->clk); -} - -int ep93xx_dma_m2p_client_register(struct ep93xx_dma_m2p_client *cl) -{ - struct m2p_channel *ch; - int err; - - ch = find_free_channel(cl); - if (IS_ERR(ch)) - return PTR_ERR(ch); - - err = request_irq(ch->irq, m2p_irq, 0, cl->name ? : "dma-m2p", ch); - if (err) - return err; - - ch->client = cl; - ch->next_slot = 0; - ch->buffer_xfer = NULL; - ch->buffer_next = NULL; - INIT_LIST_HEAD(&ch->buffers_pending); - - cl->channel = ch; - - channel_enable(ch); - - return 0; -} -EXPORT_SYMBOL_GPL(ep93xx_dma_m2p_client_register); - -void ep93xx_dma_m2p_client_unregister(struct ep93xx_dma_m2p_client *cl) -{ - struct m2p_channel *ch = cl->channel; - - channel_disable(ch); - free_irq(ch->irq, ch); - ch->client = NULL; -} -EXPORT_SYMBOL_GPL(ep93xx_dma_m2p_client_unregister); - -void ep93xx_dma_m2p_submit(struct ep93xx_dma_m2p_client *cl, - struct ep93xx_dma_buffer *buf) -{ - struct m2p_channel *ch = cl->channel; - unsigned long flags; - u32 v; - - spin_lock_irqsave(&ch->lock, flags); - v = readl(ch->base + M2P_CONTROL); - if (ch->buffer_xfer == NULL) { - ch->buffer_xfer = buf; - feed_buf(ch, buf); - cl->buffer_started(cl->cookie, buf); - - v |= M2P_CONTROL_STALL_IRQ_EN; - m2p_set_control(ch, v); - - } else if (ch->buffer_next == NULL) { - ch->buffer_next = buf; - feed_buf(ch, buf); - - v |= M2P_CONTROL_NFB_IRQ_EN; - m2p_set_control(ch, v); - } else { - list_add_tail(&buf->list, &ch->buffers_pending); - } - spin_unlock_irqrestore(&ch->lock, flags); -} -EXPORT_SYMBOL_GPL(ep93xx_dma_m2p_submit); - -void ep93xx_dma_m2p_submit_recursive(struct ep93xx_dma_m2p_client *cl, - struct ep93xx_dma_buffer *buf) -{ - struct m2p_channel *ch = cl->channel; - - list_add_tail(&buf->list, &ch->buffers_pending); -} -EXPORT_SYMBOL_GPL(ep93xx_dma_m2p_submit_recursive); - -void ep93xx_dma_m2p_flush(struct ep93xx_dma_m2p_client *cl) -{ - struct m2p_channel *ch = cl->channel; - - channel_disable(ch); - ch->next_slot = 0; - ch->buffer_xfer = NULL; - ch->buffer_next = NULL; - INIT_LIST_HEAD(&ch->buffers_pending); - channel_enable(ch); -} -EXPORT_SYMBOL_GPL(ep93xx_dma_m2p_flush); - -static int init_channel(struct m2p_channel *ch) -{ - ch->clk = clk_get(NULL, ch->name); - if (IS_ERR(ch->clk)) - return PTR_ERR(ch->clk); - - spin_lock_init(&ch->lock); - ch->client = NULL; - - return 0; -} - -static int __init ep93xx_dma_m2p_init(void) -{ - int i; - int ret; - - for (i = 0; m2p_rx[i].base; i++) { - ret = init_channel(m2p_rx + i); - if (ret) - return ret; - } - - for (i = 0; m2p_tx[i].base; i++) { - ret = init_channel(m2p_tx + i); - if (ret) - return ret; - } - - pr_info("M2P DMA subsystem initialized\n"); - return 0; -} -arch_initcall(ep93xx_dma_m2p_init); diff --git a/arch/arm/mach-ep93xx/include/mach/dma.h b/arch/arm/mach-ep93xx/include/mach/dma.h index 6e7049a..46d4d87 100644 --- a/arch/arm/mach-ep93xx/include/mach/dma.h +++ b/arch/arm/mach-ep93xx/include/mach/dma.h @@ -1,153 +1,10 @@ -/** - * DOC: EP93xx DMA M2P memory to peripheral and peripheral to memory engine - * - * The EP93xx DMA M2P subsystem handles DMA transfers between memory and - * peripherals. DMA M2P channels are available for audio, UARTs and IrDA. - * See chapter 10 of the EP93xx users guide for full details on the DMA M2P - * engine. - * - * See sound/soc/ep93xx/ep93xx-pcm.c for an example use of the DMA M2P code. - * - */ - #ifndef __ASM_ARCH_DMA_H #define __ASM_ARCH_DMA_H -#include #include #include #include -/** - * struct ep93xx_dma_buffer - Information about a buffer to be transferred - * using the DMA M2P engine - * - * @list: Entry in DMA buffer list - * @bus_addr: Physical address of the buffer - * @size: Size of the buffer in bytes - */ -struct ep93xx_dma_buffer { - struct list_head list; - u32 bus_addr; - u16 size; -}; - -/** - * struct ep93xx_dma_m2p_client - Information about a DMA M2P client - * - * @name: Unique name for this client - * @flags: Client flags - * @cookie: User data to pass to callback functions - * @buffer_started: Non NULL function to call when a transfer is started. - * The arguments are the user data cookie and the DMA - * buffer which is starting. - * @buffer_finished: Non NULL function to call when a transfer is completed. - * The arguments are the user data cookie, the DMA buffer - * which has completed, and a boolean flag indicating if - * the transfer had an error. - */ -struct ep93xx_dma_m2p_client { - char *name; - u8 flags; - void *cookie; - void (*buffer_started)(void *cookie, - struct ep93xx_dma_buffer *buf); - void (*buffer_finished)(void *cookie, - struct ep93xx_dma_buffer *buf, - int bytes, int error); - - /* private: Internal use only */ - void *channel; -}; - -/* DMA M2P ports */ -#define EP93XX_DMA_M2P_PORT_I2S1 0x00 -#define EP93XX_DMA_M2P_PORT_I2S2 0x01 -#define EP93XX_DMA_M2P_PORT_AAC1 0x02 -#define EP93XX_DMA_M2P_PORT_AAC2 0x03 -#define EP93XX_DMA_M2P_PORT_AAC3 0x04 -#define EP93XX_DMA_M2P_PORT_I2S3 0x05 -#define EP93XX_DMA_M2P_PORT_UART1 0x06 -#define EP93XX_DMA_M2P_PORT_UART2 0x07 -#define EP93XX_DMA_M2P_PORT_UART3 0x08 -#define EP93XX_DMA_M2P_PORT_IRDA 0x09 -#define EP93XX_DMA_M2P_PORT_MASK 0x0f - -/* DMA M2P client flags */ -#define EP93XX_DMA_M2P_TX 0x00 /* Memory to peripheral */ -#define EP93XX_DMA_M2P_RX 0x10 /* Peripheral to memory */ - -/* - * DMA M2P client error handling flags. See the EP93xx users guide - * documentation on the DMA M2P CONTROL register for more details - */ -#define EP93XX_DMA_M2P_ABORT_ON_ERROR 0x20 /* Abort on peripheral error */ -#define EP93XX_DMA_M2P_IGNORE_ERROR 0x40 /* Ignore peripheral errors */ -#define EP93XX_DMA_M2P_ERROR_MASK 0x60 /* Mask of error bits */ - -/** - * ep93xx_dma_m2p_client_register - Register a client with the DMA M2P - * subsystem - * - * @m2p: Client information to register - * returns 0 on success - * - * The DMA M2P subsystem allocates a channel and an interrupt line for the DMA - * client - */ -int ep93xx_dma_m2p_client_register(struct ep93xx_dma_m2p_client *m2p); - -/** - * ep93xx_dma_m2p_client_unregister - Unregister a client from the DMA M2P - * subsystem - * - * @m2p: Client to unregister - * - * Any transfers currently in progress will be completed in hardware, but - * ignored in software. - */ -void ep93xx_dma_m2p_client_unregister(struct ep93xx_dma_m2p_client *m2p); - -/** - * ep93xx_dma_m2p_submit - Submit a DMA M2P transfer - * - * @m2p: DMA Client to submit the transfer on - * @buf: DMA Buffer to submit - * - * If the current or next transfer positions are free on the M2P client then - * the transfer is started immediately. If not, the transfer is added to the - * list of pending transfers. This function must not be called from the - * buffer_finished callback for an M2P channel. - * - */ -void ep93xx_dma_m2p_submit(struct ep93xx_dma_m2p_client *m2p, - struct ep93xx_dma_buffer *buf); - -/** - * ep93xx_dma_m2p_submit_recursive - Put a DMA transfer on the pending list - * for an M2P channel - * - * @m2p: DMA Client to submit the transfer on - * @buf: DMA Buffer to submit - * - * This function must only be called from the buffer_finished callback for an - * M2P channel. It is commonly used to add the next transfer in a chained list - * of DMA transfers. - */ -void ep93xx_dma_m2p_submit_recursive(struct ep93xx_dma_m2p_client *m2p, - struct ep93xx_dma_buffer *buf); - -/** - * ep93xx_dma_m2p_flush - Flush all pending transfers on a DMA M2P client - * - * @m2p: DMA client to flush transfers on - * - * Any transfers currently in progress will be completed in hardware, but - * ignored in software. - * - */ -void ep93xx_dma_m2p_flush(struct ep93xx_dma_m2p_client *m2p); - /* * M2P channels. * -- cgit v1.1 From d41071575b0b20b780bb0e8e7e70c62c1b07a883 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Sun, 29 May 2011 13:10:06 +0300 Subject: spi/ep93xx: add DMA support This patch adds DMA support for the EP93xx SPI driver. By default the DMA is not enabled but it can be enabled by setting ep93xx_spi_info.use_dma to true in board configuration file. Note that the SPI driver still uses PIO for small transfers (<= 8 bytes) for performance reasons. Signed-off-by: Mika Westerberg Acked-by: H Hartley Sweeten Cc: Grant Likely Acked-by: Grant Likely Signed-off-by: Vinod Koul --- Documentation/spi/ep93xx_spi | 10 + arch/arm/mach-ep93xx/core.c | 6 +- arch/arm/mach-ep93xx/include/mach/ep93xx_spi.h | 2 + drivers/spi/ep93xx_spi.c | 303 ++++++++++++++++++++++++- 4 files changed, 308 insertions(+), 13 deletions(-) diff --git a/Documentation/spi/ep93xx_spi b/Documentation/spi/ep93xx_spi index 6325f5b..d8eb01c 100644 --- a/Documentation/spi/ep93xx_spi +++ b/Documentation/spi/ep93xx_spi @@ -88,6 +88,16 @@ static void __init ts72xx_init_machine(void) ARRAY_SIZE(ts72xx_spi_devices)); } +The driver can use DMA for the transfers also. In this case ts72xx_spi_info +becomes: + +static struct ep93xx_spi_info ts72xx_spi_info = { + .num_chipselect = ARRAY_SIZE(ts72xx_spi_devices), + .use_dma = true; +}; + +Note that CONFIG_EP93XX_DMA should be enabled as well. + Thanks to ========= Martin Guy, H. Hartley Sweeten and others who helped me during development of diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 8207954..cc9f1d4 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -488,11 +488,15 @@ static struct resource ep93xx_spi_resources[] = { }, }; +static u64 ep93xx_spi_dma_mask = DMA_BIT_MASK(32); + static struct platform_device ep93xx_spi_device = { .name = "ep93xx-spi", .id = 0, .dev = { - .platform_data = &ep93xx_spi_master_data, + .platform_data = &ep93xx_spi_master_data, + .coherent_dma_mask = DMA_BIT_MASK(32), + .dma_mask = &ep93xx_spi_dma_mask, }, .num_resources = ARRAY_SIZE(ep93xx_spi_resources), .resource = ep93xx_spi_resources, diff --git a/arch/arm/mach-ep93xx/include/mach/ep93xx_spi.h b/arch/arm/mach-ep93xx/include/mach/ep93xx_spi.h index 0a37961..9bb63ac 100644 --- a/arch/arm/mach-ep93xx/include/mach/ep93xx_spi.h +++ b/arch/arm/mach-ep93xx/include/mach/ep93xx_spi.h @@ -7,9 +7,11 @@ struct spi_device; * struct ep93xx_spi_info - EP93xx specific SPI descriptor * @num_chipselect: number of chip selects on this board, must be * at least one + * @use_dma: use DMA for the transfers */ struct ep93xx_spi_info { int num_chipselect; + bool use_dma; }; /** diff --git a/drivers/spi/ep93xx_spi.c b/drivers/spi/ep93xx_spi.c index d357007..1cf64547 100644 --- a/drivers/spi/ep93xx_spi.c +++ b/drivers/spi/ep93xx_spi.c @@ -1,7 +1,7 @@ /* * Driver for Cirrus Logic EP93xx SPI controller. * - * Copyright (c) 2010 Mika Westerberg + * Copyright (C) 2010-2011 Mika Westerberg * * Explicit FIFO handling code was inspired by amba-pl022 driver. * @@ -21,13 +21,16 @@ #include #include #include +#include #include #include #include #include #include +#include #include +#include #include #define SSPCR0 0x0000 @@ -71,6 +74,7 @@ * @pdev: pointer to platform device * @clk: clock for the controller * @regs_base: pointer to ioremap()'d registers + * @sspdr_phys: physical address of the SSPDR register * @irq: IRQ number used by the driver * @min_rate: minimum clock rate (in Hz) supported by the controller * @max_rate: maximum clock rate (in Hz) supported by the controller @@ -84,6 +88,14 @@ * @rx: current byte in transfer to receive * @fifo_level: how full is FIFO (%0..%SPI_FIFO_SIZE - %1). Receiving one * frame decreases this level and sending one frame increases it. + * @dma_rx: RX DMA channel + * @dma_tx: TX DMA channel + * @dma_rx_data: RX parameters passed to the DMA engine + * @dma_tx_data: TX parameters passed to the DMA engine + * @rx_sgt: sg table for RX transfers + * @tx_sgt: sg table for TX transfers + * @zeropage: dummy page used as RX buffer when only TX buffer is passed in by + * the client * * This structure holds EP93xx SPI controller specific information. When * @running is %true, driver accepts transfer requests from protocol drivers. @@ -100,6 +112,7 @@ struct ep93xx_spi { const struct platform_device *pdev; struct clk *clk; void __iomem *regs_base; + unsigned long sspdr_phys; int irq; unsigned long min_rate; unsigned long max_rate; @@ -112,6 +125,13 @@ struct ep93xx_spi { size_t tx; size_t rx; size_t fifo_level; + struct dma_chan *dma_rx; + struct dma_chan *dma_tx; + struct ep93xx_dma_data dma_rx_data; + struct ep93xx_dma_data dma_tx_data; + struct sg_table rx_sgt; + struct sg_table tx_sgt; + void *zeropage; }; /** @@ -496,14 +516,195 @@ static int ep93xx_spi_read_write(struct ep93xx_spi *espi) espi->fifo_level++; } - if (espi->rx == t->len) { - msg->actual_length += t->len; + if (espi->rx == t->len) return 0; - } return -EINPROGRESS; } +static void ep93xx_spi_pio_transfer(struct ep93xx_spi *espi) +{ + /* + * Now everything is set up for the current transfer. We prime the TX + * FIFO, enable interrupts, and wait for the transfer to complete. + */ + if (ep93xx_spi_read_write(espi)) { + ep93xx_spi_enable_interrupts(espi); + wait_for_completion(&espi->wait); + } +} + +/** + * ep93xx_spi_dma_prepare() - prepares a DMA transfer + * @espi: ep93xx SPI controller struct + * @dir: DMA transfer direction + * + * Function configures the DMA, maps the buffer and prepares the DMA + * descriptor. Returns a valid DMA descriptor in case of success and ERR_PTR + * in case of failure. + */ +static struct dma_async_tx_descriptor * +ep93xx_spi_dma_prepare(struct ep93xx_spi *espi, enum dma_data_direction dir) +{ + struct spi_transfer *t = espi->current_msg->state; + struct dma_async_tx_descriptor *txd; + enum dma_slave_buswidth buswidth; + struct dma_slave_config conf; + struct scatterlist *sg; + struct sg_table *sgt; + struct dma_chan *chan; + const void *buf, *pbuf; + size_t len = t->len; + int i, ret, nents; + + if (bits_per_word(espi) > 8) + buswidth = DMA_SLAVE_BUSWIDTH_2_BYTES; + else + buswidth = DMA_SLAVE_BUSWIDTH_1_BYTE; + + memset(&conf, 0, sizeof(conf)); + conf.direction = dir; + + if (dir == DMA_FROM_DEVICE) { + chan = espi->dma_rx; + buf = t->rx_buf; + sgt = &espi->rx_sgt; + + conf.src_addr = espi->sspdr_phys; + conf.src_addr_width = buswidth; + } else { + chan = espi->dma_tx; + buf = t->tx_buf; + sgt = &espi->tx_sgt; + + conf.dst_addr = espi->sspdr_phys; + conf.dst_addr_width = buswidth; + } + + ret = dmaengine_slave_config(chan, &conf); + if (ret) + return ERR_PTR(ret); + + /* + * We need to split the transfer into PAGE_SIZE'd chunks. This is + * because we are using @espi->zeropage to provide a zero RX buffer + * for the TX transfers and we have only allocated one page for that. + * + * For performance reasons we allocate a new sg_table only when + * needed. Otherwise we will re-use the current one. Eventually the + * last sg_table is released in ep93xx_spi_release_dma(). + */ + + nents = DIV_ROUND_UP(len, PAGE_SIZE); + if (nents != sgt->nents) { + sg_free_table(sgt); + + ret = sg_alloc_table(sgt, nents, GFP_KERNEL); + if (ret) + return ERR_PTR(ret); + } + + pbuf = buf; + for_each_sg(sgt->sgl, sg, sgt->nents, i) { + size_t bytes = min_t(size_t, len, PAGE_SIZE); + + if (buf) { + sg_set_page(sg, virt_to_page(pbuf), bytes, + offset_in_page(pbuf)); + } else { + sg_set_page(sg, virt_to_page(espi->zeropage), + bytes, 0); + } + + pbuf += bytes; + len -= bytes; + } + + if (WARN_ON(len)) { + dev_warn(&espi->pdev->dev, "len = %d expected 0!", len); + return ERR_PTR(-EINVAL); + } + + nents = dma_map_sg(chan->device->dev, sgt->sgl, sgt->nents, dir); + if (!nents) + return ERR_PTR(-ENOMEM); + + txd = chan->device->device_prep_slave_sg(chan, sgt->sgl, nents, + dir, DMA_CTRL_ACK); + if (!txd) { + dma_unmap_sg(chan->device->dev, sgt->sgl, sgt->nents, dir); + return ERR_PTR(-ENOMEM); + } + return txd; +} + +/** + * ep93xx_spi_dma_finish() - finishes with a DMA transfer + * @espi: ep93xx SPI controller struct + * @dir: DMA transfer direction + * + * Function finishes with the DMA transfer. After this, the DMA buffer is + * unmapped. + */ +static void ep93xx_spi_dma_finish(struct ep93xx_spi *espi, + enum dma_data_direction dir) +{ + struct dma_chan *chan; + struct sg_table *sgt; + + if (dir == DMA_FROM_DEVICE) { + chan = espi->dma_rx; + sgt = &espi->rx_sgt; + } else { + chan = espi->dma_tx; + sgt = &espi->tx_sgt; + } + + dma_unmap_sg(chan->device->dev, sgt->sgl, sgt->nents, dir); +} + +static void ep93xx_spi_dma_callback(void *callback_param) +{ + complete(callback_param); +} + +static void ep93xx_spi_dma_transfer(struct ep93xx_spi *espi) +{ + struct spi_message *msg = espi->current_msg; + struct dma_async_tx_descriptor *rxd, *txd; + + rxd = ep93xx_spi_dma_prepare(espi, DMA_FROM_DEVICE); + if (IS_ERR(rxd)) { + dev_err(&espi->pdev->dev, "DMA RX failed: %ld\n", PTR_ERR(rxd)); + msg->status = PTR_ERR(rxd); + return; + } + + txd = ep93xx_spi_dma_prepare(espi, DMA_TO_DEVICE); + if (IS_ERR(txd)) { + ep93xx_spi_dma_finish(espi, DMA_FROM_DEVICE); + dev_err(&espi->pdev->dev, "DMA TX failed: %ld\n", PTR_ERR(rxd)); + msg->status = PTR_ERR(txd); + return; + } + + /* We are ready when RX is done */ + rxd->callback = ep93xx_spi_dma_callback; + rxd->callback_param = &espi->wait; + + /* Now submit both descriptors and wait while they finish */ + dmaengine_submit(rxd); + dmaengine_submit(txd); + + dma_async_issue_pending(espi->dma_rx); + dma_async_issue_pending(espi->dma_tx); + + wait_for_completion(&espi->wait); + + ep93xx_spi_dma_finish(espi, DMA_TO_DEVICE); + ep93xx_spi_dma_finish(espi, DMA_FROM_DEVICE); +} + /** * ep93xx_spi_process_transfer() - processes one SPI transfer * @espi: ep93xx SPI controller struct @@ -556,13 +757,14 @@ static void ep93xx_spi_process_transfer(struct ep93xx_spi *espi, espi->tx = 0; /* - * Now everything is set up for the current transfer. We prime the TX - * FIFO, enable interrupts, and wait for the transfer to complete. + * There is no point of setting up DMA for the transfers which will + * fit into the FIFO and can be transferred with a single interrupt. + * So in these cases we will be using PIO and don't bother for DMA. */ - if (ep93xx_spi_read_write(espi)) { - ep93xx_spi_enable_interrupts(espi); - wait_for_completion(&espi->wait); - } + if (espi->dma_rx && t->len > SPI_FIFO_SIZE) + ep93xx_spi_dma_transfer(espi); + else + ep93xx_spi_pio_transfer(espi); /* * In case of error during transmit, we bail out from processing @@ -571,6 +773,8 @@ static void ep93xx_spi_process_transfer(struct ep93xx_spi *espi, if (msg->status) return; + msg->actual_length += t->len; + /* * After this transfer is finished, perform any possible * post-transfer actions requested by the protocol driver. @@ -752,6 +956,75 @@ static irqreturn_t ep93xx_spi_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +static bool ep93xx_spi_dma_filter(struct dma_chan *chan, void *filter_param) +{ + if (ep93xx_dma_chan_is_m2p(chan)) + return false; + + chan->private = filter_param; + return true; +} + +static int ep93xx_spi_setup_dma(struct ep93xx_spi *espi) +{ + dma_cap_mask_t mask; + int ret; + + espi->zeropage = (void *)get_zeroed_page(GFP_KERNEL); + if (!espi->zeropage) + return -ENOMEM; + + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + espi->dma_rx_data.port = EP93XX_DMA_SSP; + espi->dma_rx_data.direction = DMA_FROM_DEVICE; + espi->dma_rx_data.name = "ep93xx-spi-rx"; + + espi->dma_rx = dma_request_channel(mask, ep93xx_spi_dma_filter, + &espi->dma_rx_data); + if (!espi->dma_rx) { + ret = -ENODEV; + goto fail_free_page; + } + + espi->dma_tx_data.port = EP93XX_DMA_SSP; + espi->dma_tx_data.direction = DMA_TO_DEVICE; + espi->dma_tx_data.name = "ep93xx-spi-tx"; + + espi->dma_tx = dma_request_channel(mask, ep93xx_spi_dma_filter, + &espi->dma_tx_data); + if (!espi->dma_tx) { + ret = -ENODEV; + goto fail_release_rx; + } + + return 0; + +fail_release_rx: + dma_release_channel(espi->dma_rx); + espi->dma_rx = NULL; +fail_free_page: + free_page((unsigned long)espi->zeropage); + + return ret; +} + +static void ep93xx_spi_release_dma(struct ep93xx_spi *espi) +{ + if (espi->dma_rx) { + dma_release_channel(espi->dma_rx); + sg_free_table(&espi->rx_sgt); + } + if (espi->dma_tx) { + dma_release_channel(espi->dma_tx); + sg_free_table(&espi->tx_sgt); + } + + if (espi->zeropage) + free_page((unsigned long)espi->zeropage); +} + static int __init ep93xx_spi_probe(struct platform_device *pdev) { struct spi_master *master; @@ -818,6 +1091,7 @@ static int __init ep93xx_spi_probe(struct platform_device *pdev) goto fail_put_clock; } + espi->sspdr_phys = res->start + SSPDR; espi->regs_base = ioremap(res->start, resource_size(res)); if (!espi->regs_base) { dev_err(&pdev->dev, "failed to map resources\n"); @@ -832,10 +1106,13 @@ static int __init ep93xx_spi_probe(struct platform_device *pdev) goto fail_unmap_regs; } + if (info->use_dma && ep93xx_spi_setup_dma(espi)) + dev_warn(&pdev->dev, "DMA setup failed. Falling back to PIO\n"); + espi->wq = create_singlethread_workqueue("ep93xx_spid"); if (!espi->wq) { dev_err(&pdev->dev, "unable to create workqueue\n"); - goto fail_free_irq; + goto fail_free_dma; } INIT_WORK(&espi->msg_work, ep93xx_spi_work); INIT_LIST_HEAD(&espi->msg_queue); @@ -857,7 +1134,8 @@ static int __init ep93xx_spi_probe(struct platform_device *pdev) fail_free_queue: destroy_workqueue(espi->wq); -fail_free_irq: +fail_free_dma: + ep93xx_spi_release_dma(espi); free_irq(espi->irq, espi); fail_unmap_regs: iounmap(espi->regs_base); @@ -901,6 +1179,7 @@ static int __exit ep93xx_spi_remove(struct platform_device *pdev) } spin_unlock_irq(&espi->lock); + ep93xx_spi_release_dma(espi); free_irq(espi->irq, espi); iounmap(espi->regs_base); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -- cgit v1.1 From 7dab35c0c01c5d960d7b551a607270adccfadb42 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Wed, 1 Jun 2011 15:10:30 -0700 Subject: dma: ipu_idmac.c: use resource_size in ioremap Signed-off-by: H Hartley Sweeten Cc: Dan Williams Cc: Vinod Koul Cc: Guennadi Liakhovetski Cc: Anatolij Gustschin Signed-off-by: Vinod Koul --- drivers/dma/ipu/ipu_idmac.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c index c1a125e7..25447a8 100644 --- a/drivers/dma/ipu/ipu_idmac.c +++ b/drivers/dma/ipu/ipu_idmac.c @@ -1705,16 +1705,14 @@ static int __init ipu_probe(struct platform_device *pdev) ipu_data.irq_fn, ipu_data.irq_err, ipu_data.irq_base); /* Remap IPU common registers */ - ipu_data.reg_ipu = ioremap(mem_ipu->start, - mem_ipu->end - mem_ipu->start + 1); + ipu_data.reg_ipu = ioremap(mem_ipu->start, resource_size(mem_ipu)); if (!ipu_data.reg_ipu) { ret = -ENOMEM; goto err_ioremap_ipu; } /* Remap Image Converter and Image DMA Controller registers */ - ipu_data.reg_ic = ioremap(mem_ic->start, - mem_ic->end - mem_ic->start + 1); + ipu_data.reg_ic = ioremap(mem_ic->start, resource_size(mem_ic)); if (!ipu_data.reg_ic) { ret = -ENOMEM; goto err_ioremap_ic; -- cgit v1.1 From 114df7d66efd5c23561782f38e97c48fb30d4f5d Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Wed, 1 Jun 2011 15:16:09 -0700 Subject: dma: at_hdmac.c: use resource_size Signed-off-by: H Hartley Sweeten Cc: Dan Williams Cc: Vinod Koul Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 36144f8..6a483ea 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1216,7 +1216,7 @@ static int __init at_dma_probe(struct platform_device *pdev) atdma->dma_common.cap_mask = pdata->cap_mask; atdma->all_chan_mask = (1 << pdata->nr_channels) - 1; - size = io->end - io->start + 1; + size = resource_size(io); if (!request_mem_region(io->start, size, pdev->dev.driver->name)) { err = -EBUSY; goto err_kfree; @@ -1362,7 +1362,7 @@ static int __exit at_dma_remove(struct platform_device *pdev) atdma->regs = NULL; io = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(io->start, io->end - io->start + 1); + release_mem_region(io->start, resource_size(io)); kfree(atdma); -- cgit v1.1 From c08957a2cf3c4a14e68d72c845d3c52cf3d826e1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 7 Jun 2011 23:36:18 +0100 Subject: regulator: Properly register dummy regulator driver Recent changes in the driver core appear to mean that the data structures for the driver core are not fully initialised unless the driver is bound. Make sure the driver core knows the dummy driver is in use by binding it to a driver. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/dummy.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/regulator/dummy.c b/drivers/regulator/dummy.c index c7410bd..f6ef669 100644 --- a/drivers/regulator/dummy.c +++ b/drivers/regulator/dummy.c @@ -36,6 +36,29 @@ static struct regulator_desc dummy_desc = { .ops = &dummy_ops, }; +static int __devinit dummy_regulator_probe(struct platform_device *pdev) +{ + int ret; + + dummy_regulator_rdev = regulator_register(&dummy_desc, NULL, + &dummy_initdata, NULL); + if (IS_ERR(dummy_regulator_rdev)) { + ret = PTR_ERR(dummy_regulator_rdev); + pr_err("Failed to register regulator: %d\n", ret); + return ret; + } + + return 0; +} + +static struct platform_driver dummy_regulator_driver = { + .probe = dummy_regulator_probe, + .driver = { + .name = "reg-dummy", + .owner = THIS_MODULE, + }, +}; + static struct platform_device *dummy_pdev; void __init regulator_dummy_init(void) @@ -55,12 +78,9 @@ void __init regulator_dummy_init(void) return; } - dummy_regulator_rdev = regulator_register(&dummy_desc, NULL, - &dummy_initdata, NULL); - if (IS_ERR(dummy_regulator_rdev)) { - ret = PTR_ERR(dummy_regulator_rdev); - pr_err("Failed to register regulator: %d\n", ret); + ret = platform_driver_register(&dummy_regulator_driver); + if (ret != 0) { + pr_err("Failed to register dummy regulator driver: %d\n", ret); platform_device_unregister(dummy_pdev); - return; } } -- cgit v1.1 From f5726ae33c382366ea1b23240d5620dcf675d81d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 9 Jun 2011 16:22:20 +0100 Subject: regulator: Increase the limit on sysfs file names With verbose filenames we can easily hit 32 characters. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 7b38af9..75312bd 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1033,7 +1033,7 @@ static void unset_regulator_supplies(struct regulator_dev *rdev) } } -#define REG_STR_SIZE 32 +#define REG_STR_SIZE 64 static struct regulator *create_regulator(struct regulator_dev *rdev, struct device *dev, -- cgit v1.1 From e0eaedefda8e14ed3f445f382c568c5d69e4223f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 9 Jun 2011 16:22:21 +0100 Subject: regulator: Include the device name in the microamps_requested_ file We may have multiple devices requesting a supply with the same name so include the device name in the generated filename for microamps_requested to avoid duplicate files. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 75312bd..e3b67ee 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1053,8 +1053,9 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, if (dev) { /* create a 'requested_microamps_name' sysfs entry */ - size = scnprintf(buf, REG_STR_SIZE, "microamps_requested_%s", - supply_name); + size = scnprintf(buf, REG_STR_SIZE, + "microamps_requested_%s-%s", + dev_name(dev), supply_name); if (size >= REG_STR_SIZE) goto overflow_err; -- cgit v1.1 From 3801b86aa482d26a8ae460f67fca29e016491a86 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 9 Jun 2011 16:22:22 +0100 Subject: regulator: Refactor supply implementation to work as regular consumers Currently the regulator supply implementation is somewhat complex and fragile as it doesn't look like standard consumers but is instead a parallel implementation. This causes issues with locking and reference counting. Move the implementation over to using standard consumers to address this. Rather than only notifying the supply on the first enable/disable we do so every time the regulator is enabled or disabled, simplifying locking as we don't need to hold a lock on the consumer we are about to enable. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 101 ++++++++++++++------------------------- include/linux/regulator/driver.h | 4 +- 2 files changed, 37 insertions(+), 68 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index e3b67ee..f0cc398 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -82,8 +82,7 @@ struct regulator { }; static int _regulator_is_enabled(struct regulator_dev *rdev); -static int _regulator_disable(struct regulator_dev *rdev, - struct regulator_dev **supply_rdev_ptr); +static int _regulator_disable(struct regulator_dev *rdev); static int _regulator_get_voltage(struct regulator_dev *rdev); static int _regulator_get_current_limit(struct regulator_dev *rdev); static unsigned int _regulator_get_mode(struct regulator_dev *rdev); @@ -91,6 +90,9 @@ static void _notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data); static int _regulator_do_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV); +static struct regulator *create_regulator(struct regulator_dev *rdev, + struct device *dev, + const char *supply_name); static const char *rdev_get_name(struct regulator_dev *rdev) { @@ -930,21 +932,20 @@ out: * core if it's child is enabled. */ static int set_supply(struct regulator_dev *rdev, - struct regulator_dev *supply_rdev) + struct regulator_dev *supply_rdev) { int err; - err = sysfs_create_link(&rdev->dev.kobj, &supply_rdev->dev.kobj, - "supply"); - if (err) { - rdev_err(rdev, "could not add device link %s err %d\n", - supply_rdev->dev.kobj.name, err); - goto out; + rdev_info(rdev, "supplied by %s\n", rdev_get_name(supply_rdev)); + + rdev->supply = create_regulator(supply_rdev, &rdev->dev, "SUPPLY"); + if (IS_ERR(rdev->supply)) { + err = PTR_ERR(rdev->supply); + rdev->supply = NULL; + return err; } - rdev->supply = supply_rdev; - list_add(&rdev->slist, &supply_rdev->supply_list); -out: - return err; + + return 0; } /** @@ -1303,19 +1304,6 @@ static int _regulator_enable(struct regulator_dev *rdev) { int ret, delay; - if (rdev->use_count == 0) { - /* do we need to enable the supply regulator first */ - if (rdev->supply) { - mutex_lock(&rdev->supply->mutex); - ret = _regulator_enable(rdev->supply); - mutex_unlock(&rdev->supply->mutex); - if (ret < 0) { - rdev_err(rdev, "failed to enable: %d\n", ret); - return ret; - } - } - } - /* check voltage and requested load before enabling */ if (rdev->constraints && (rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_DRMS)) @@ -1390,19 +1378,27 @@ int regulator_enable(struct regulator *regulator) struct regulator_dev *rdev = regulator->rdev; int ret = 0; + if (rdev->supply) { + ret = regulator_enable(rdev->supply); + if (ret != 0) + return ret; + } + mutex_lock(&rdev->mutex); ret = _regulator_enable(rdev); mutex_unlock(&rdev->mutex); + + if (ret != 0) + regulator_disable(rdev->supply); + return ret; } EXPORT_SYMBOL_GPL(regulator_enable); /* locks held by regulator_disable() */ -static int _regulator_disable(struct regulator_dev *rdev, - struct regulator_dev **supply_rdev_ptr) +static int _regulator_disable(struct regulator_dev *rdev) { int ret = 0; - *supply_rdev_ptr = NULL; if (WARN(rdev->use_count <= 0, "unbalanced disables for %s\n", rdev_get_name(rdev))) @@ -1429,9 +1425,6 @@ static int _regulator_disable(struct regulator_dev *rdev, NULL); } - /* decrease our supplies ref count and disable if required */ - *supply_rdev_ptr = rdev->supply; - rdev->use_count = 0; } else if (rdev->use_count > 1) { @@ -1442,6 +1435,7 @@ static int _regulator_disable(struct regulator_dev *rdev, rdev->use_count--; } + return ret; } @@ -1460,29 +1454,21 @@ static int _regulator_disable(struct regulator_dev *rdev, int regulator_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; - struct regulator_dev *supply_rdev = NULL; int ret = 0; mutex_lock(&rdev->mutex); - ret = _regulator_disable(rdev, &supply_rdev); + ret = _regulator_disable(rdev); mutex_unlock(&rdev->mutex); - /* decrease our supplies ref count and disable if required */ - while (supply_rdev != NULL) { - rdev = supply_rdev; - - mutex_lock(&rdev->mutex); - _regulator_disable(rdev, &supply_rdev); - mutex_unlock(&rdev->mutex); - } + if (ret == 0 && rdev->supply) + regulator_disable(rdev->supply); return ret; } EXPORT_SYMBOL_GPL(regulator_disable); /* locks held by regulator_force_disable() */ -static int _regulator_force_disable(struct regulator_dev *rdev, - struct regulator_dev **supply_rdev_ptr) +static int _regulator_force_disable(struct regulator_dev *rdev) { int ret = 0; @@ -1499,10 +1485,6 @@ static int _regulator_force_disable(struct regulator_dev *rdev, REGULATOR_EVENT_DISABLE, NULL); } - /* decrease our supplies ref count and disable if required */ - *supply_rdev_ptr = rdev->supply; - - rdev->use_count = 0; return ret; } @@ -1518,16 +1500,16 @@ static int _regulator_force_disable(struct regulator_dev *rdev, int regulator_force_disable(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; - struct regulator_dev *supply_rdev = NULL; int ret; mutex_lock(&rdev->mutex); regulator->uA_load = 0; - ret = _regulator_force_disable(rdev, &supply_rdev); + ret = _regulator_force_disable(regulator->rdev); mutex_unlock(&rdev->mutex); - if (supply_rdev) - regulator_disable(get_device_regulator(rdev_get_dev(supply_rdev))); + if (rdev->supply) + while (rdev->open_count--) + regulator_disable(rdev->supply); return ret; } @@ -2138,7 +2120,7 @@ int regulator_set_optimum_mode(struct regulator *regulator, int uA_load) /* get input voltage */ input_uV = 0; if (rdev->supply) - input_uV = _regulator_get_voltage(rdev->supply); + input_uV = regulator_get_voltage(rdev->supply); if (input_uV <= 0) input_uV = rdev->constraints->input_uV; if (input_uV <= 0) { @@ -2208,17 +2190,8 @@ EXPORT_SYMBOL_GPL(regulator_unregister_notifier); static void _notifier_call_chain(struct regulator_dev *rdev, unsigned long event, void *data) { - struct regulator_dev *_rdev; - /* call rdev chain first */ blocking_notifier_call_chain(&rdev->notifier, event, NULL); - - /* now notify regulator we supply */ - list_for_each_entry(_rdev, &rdev->supply_list, slist) { - mutex_lock(&_rdev->mutex); - _notifier_call_chain(_rdev, event, data); - mutex_unlock(&_rdev->mutex); - } } /** @@ -2610,9 +2583,7 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc, rdev->owner = regulator_desc->owner; rdev->desc = regulator_desc; INIT_LIST_HEAD(&rdev->consumer_list); - INIT_LIST_HEAD(&rdev->supply_list); INIT_LIST_HEAD(&rdev->list); - INIT_LIST_HEAD(&rdev->slist); BLOCKING_INIT_NOTIFIER_HEAD(&rdev->notifier); /* preform any regulator specific init */ @@ -2724,7 +2695,7 @@ void regulator_unregister(struct regulator_dev *rdev) unset_regulator_supplies(rdev); list_del(&rdev->list); if (rdev->supply) - sysfs_remove_link(&rdev->dev.kobj, "supply"); + regulator_put(rdev->supply); device_unregister(&rdev->dev); kfree(rdev->constraints); mutex_unlock(®ulator_list_mutex); diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 6c433b8..1a80bc7 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -188,18 +188,16 @@ struct regulator_dev { /* lists we belong to */ struct list_head list; /* list of all regulators */ - struct list_head slist; /* list of supplied regulators */ /* lists we own */ struct list_head consumer_list; /* consumers we supply */ - struct list_head supply_list; /* regulators we supply */ struct blocking_notifier_head notifier; struct mutex mutex; /* consumer lock */ struct module *owner; struct device dev; struct regulation_constraints *constraints; - struct regulator_dev *supply; /* for tree */ + struct regulator *supply; /* for tree */ void *reg_data; /* regulator_dev data */ -- cgit v1.1 From 7d51a0dbe51282f3ed13cadf6e7f13a974374be2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 9 Jun 2011 16:06:37 +0100 Subject: regulator: Add rdev_crit() macro No actual users but provide the macro so there's less surprise when it's not there. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index f0cc398..cc3dfd6 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -34,6 +34,8 @@ #include "dummy.h" +#define rdev_crit(rdev, fmt, ...) \ + pr_crit("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__) #define rdev_err(rdev, fmt, ...) \ pr_err("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__) #define rdev_warn(rdev, fmt, ...) \ -- cgit v1.1 From ea41b1e5440442cea5c029b192e9ebbe68e423f6 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 10 Jun 2011 10:14:03 -0400 Subject: ioctl-number.txt: add the tile hardwall ioctl range Signed-off-by: Chris Metcalf --- Documentation/ioctl/ioctl-number.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 3a46e36..b7dc374 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -292,6 +292,7 @@ Code Seq#(hex) Include File Comments 0xA0 all linux/sdp/sdp.h Industrial Device Project +0xA2 00-0F arch/tile/include/asm/hardwall.h 0xA3 80-8F Port ACL in development: 0xA3 90-9F linux/dtlk.h -- cgit v1.1 From dbcb4a1a3f16702918caa4d4ab7062965050a780 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 10 Jun 2011 13:07:48 -0400 Subject: arch/tile: add hypervisor-based character driver for SPI flash ROM The first version of this patch proposed an arch/tile/drivers/ directory, but the consensus was that this was probably a poor choice for a place to group Tilera-specific drivers, and that in any case grouping by platform was discouraged, and grouping by function was preferred. This version of the patch addresses various issues raised in the community, primarily the absence of sysfs integration. The sysfs integration now handles passing information on sector size, page size, and total partition size to userspace as well. In addition, we now use a single "struct cdev" to manage all the partition minor devices, and dynamically discover the correct number of partitions from the hypervisor rather than using a module_param with a default value. This driver has no particular "peer" drivers it can be grouped with. It is sort of like an MTD driver for SPI ROM, but it doesn't group well with the other MTD devices since it relies on hypervisor virtualization to handle many of the irritating aspects of flash ROM management: sector awareness, background read for sub-sector writes, bit examination to determine whether a sector erase needs to be issued, etc. It is in fact more like an EEPROM driver, but the hypervisor virtualization does require a "flush" command if you wish to commit a sector write prior to writing to a different sector, and this is sufficiently different from generic I2C/SPI EEPROMs that as a result it doesn't group well with them either. The simple character device is already in use by a range of Tilera SPI ROM management tools, as well as by customers. In addition, using the simple character device actually simplifies the userspace tools, since they don't need to manage sector erase, background read, etc. This both simplifies the code (since we can uniformly manage plain files and the SPI ROM) as well as makes the user code portable to non-Linux platforms that don't offer the same MTD ioctls. Signed-off-by: Chris Metcalf Reviewed-by: Arnd Bergmann --- arch/tile/include/hv/drv_srom_intf.h | 41 +++ drivers/char/Kconfig | 11 + drivers/char/Makefile | 2 + drivers/char/tile-srom.c | 481 +++++++++++++++++++++++++++++++++++ 4 files changed, 535 insertions(+) create mode 100644 arch/tile/include/hv/drv_srom_intf.h create mode 100644 drivers/char/tile-srom.c diff --git a/arch/tile/include/hv/drv_srom_intf.h b/arch/tile/include/hv/drv_srom_intf.h new file mode 100644 index 0000000..6395faa --- /dev/null +++ b/arch/tile/include/hv/drv_srom_intf.h @@ -0,0 +1,41 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drv_srom_intf.h + * Interface definitions for the SPI Flash ROM driver. + */ + +#ifndef _SYS_HV_INCLUDE_DRV_SROM_INTF_H +#define _SYS_HV_INCLUDE_DRV_SROM_INTF_H + +/** Read this offset to get the total device size. */ +#define SROM_TOTAL_SIZE_OFF 0xF0000000 + +/** Read this offset to get the device sector size. */ +#define SROM_SECTOR_SIZE_OFF 0xF0000004 + +/** Read this offset to get the device page size. */ +#define SROM_PAGE_SIZE_OFF 0xF0000008 + +/** Write this offset to flush any pending writes. */ +#define SROM_FLUSH_OFF 0xF1000000 + +/** Write this offset, plus the byte offset of the start of a sector, to + * erase a sector. Any write data is ignored, but there must be at least + * one byte of write data. Only applies when the driver is in MTD mode. + */ +#define SROM_ERASE_OFF 0xF2000000 + +#endif /* _SYS_HV_INCLUDE_DRV_SROM_INTF_H */ diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 49502bc..423fd56 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -616,5 +616,16 @@ config MSM_SMD_PKT Enables userspace clients to read and write to some packet SMD ports via device interface for MSM chipset. +config TILE_SROM + bool "Character-device access via hypervisor to the Tilera SPI ROM" + depends on TILE + default y + ---help--- + This device provides character-level read-write access + to the SROM, typically via the "0", "1", and "2" devices + in /dev/srom/. The Tilera hypervisor makes the flash + device appear much like a simple EEPROM, and knows + how to partition a single ROM for multiple purposes. + endmenu diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 7a00672..32762ba 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -63,3 +63,5 @@ obj-$(CONFIG_RAMOOPS) += ramoops.o obj-$(CONFIG_JS_RTC) += js-rtc.o js-rtc-y = rtc.o + +obj-$(CONFIG_TILE_SROM) += tile-srom.o diff --git a/drivers/char/tile-srom.c b/drivers/char/tile-srom.c new file mode 100644 index 0000000..cf3ee00 --- /dev/null +++ b/drivers/char/tile-srom.c @@ -0,0 +1,481 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * SPI Flash ROM driver + * + * This source code is derived from code provided in "Linux Device + * Drivers, Third Edition", by Jonathan Corbet, Alessandro Rubini, and + * Greg Kroah-Hartman, published by O'Reilly Media, Inc. + */ + +#include +#include +#include +#include /* printk() */ +#include /* kmalloc() */ +#include /* everything... */ +#include /* error codes */ +#include /* size_t */ +#include +#include /* O_ACCMODE */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Size of our hypervisor I/O requests. We break up large transfers + * so that we don't spend large uninterrupted spans of time in the + * hypervisor. Erasing an SROM sector takes a significant fraction of + * a second, so if we allowed the user to, say, do one I/O to write the + * entire ROM, we'd get soft lockup timeouts, or worse. + */ +#define SROM_CHUNK_SIZE ((size_t)4096) + +/* + * When hypervisor is busy (e.g. erasing), poll the status periodically. + */ + +/* + * Interval to poll the state in msec + */ +#define SROM_WAIT_TRY_INTERVAL 20 + +/* + * Maximum times to poll the state + */ +#define SROM_MAX_WAIT_TRY_TIMES 1000 + +struct srom_dev { + int hv_devhdl; /* Handle for hypervisor device */ + u32 total_size; /* Size of this device */ + u32 sector_size; /* Size of a sector */ + u32 page_size; /* Size of a page */ + struct mutex lock; /* Allow only one accessor at a time */ +}; + +static int srom_major; /* Dynamic major by default */ +module_param(srom_major, int, 0); +MODULE_AUTHOR("Tilera Corporation"); +MODULE_LICENSE("GPL"); + +static int srom_devs; /* Number of SROM partitions */ +static struct cdev srom_cdev; +static struct class *srom_class; +static struct srom_dev *srom_devices; + +/* + * Handle calling the hypervisor and managing EAGAIN/EBUSY. + */ + +static ssize_t _srom_read(int hv_devhdl, void *buf, + loff_t off, size_t count) +{ + int retval, retries = SROM_MAX_WAIT_TRY_TIMES; + for (;;) { + retval = hv_dev_pread(hv_devhdl, 0, (HV_VirtAddr)buf, + count, off); + if (retval >= 0) + return retval; + if (retval == HV_EAGAIN) + continue; + if (retval == HV_EBUSY && --retries > 0) { + msleep(SROM_WAIT_TRY_INTERVAL); + continue; + } + pr_err("_srom_read: error %d\n", retval); + return -EIO; + } +} + +static ssize_t _srom_write(int hv_devhdl, const void *buf, + loff_t off, size_t count) +{ + int retval, retries = SROM_MAX_WAIT_TRY_TIMES; + for (;;) { + retval = hv_dev_pwrite(hv_devhdl, 0, (HV_VirtAddr)buf, + count, off); + if (retval >= 0) + return retval; + if (retval == HV_EAGAIN) + continue; + if (retval == HV_EBUSY && --retries > 0) { + msleep(SROM_WAIT_TRY_INTERVAL); + continue; + } + pr_err("_srom_write: error %d\n", retval); + return -EIO; + } +} + +/** + * srom_open() - Device open routine. + * @inode: Inode for this device. + * @filp: File for this specific open of the device. + * + * Returns zero, or an error code. + */ +static int srom_open(struct inode *inode, struct file *filp) +{ + filp->private_data = &srom_devices[iminor(inode)]; + return 0; +} + + +/** + * srom_release() - Device release routine. + * @inode: Inode for this device. + * @filp: File for this specific open of the device. + * + * Returns zero, or an error code. + */ +static int srom_release(struct inode *inode, struct file *filp) +{ + struct srom_dev *srom = filp->private_data; + char dummy; + + /* Make sure we've flushed anything written to the ROM. */ + mutex_lock(&srom->lock); + if (srom->hv_devhdl >= 0) + _srom_write(srom->hv_devhdl, &dummy, SROM_FLUSH_OFF, 1); + mutex_unlock(&srom->lock); + + filp->private_data = NULL; + + return 0; +} + + +/** + * srom_read() - Read data from the device. + * @filp: File for this specific open of the device. + * @buf: User's data buffer. + * @count: Number of bytes requested. + * @f_pos: File position. + * + * Returns number of bytes read, or an error code. + */ +static ssize_t srom_read(struct file *filp, char __user *buf, + size_t count, loff_t *f_pos) +{ + int retval = 0; + void *kernbuf; + struct srom_dev *srom = filp->private_data; + + kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL); + if (!kernbuf) + return -ENOMEM; + + if (mutex_lock_interruptible(&srom->lock)) { + retval = -ERESTARTSYS; + kfree(kernbuf); + return retval; + } + + while (count) { + int hv_retval; + int bytes_this_pass = min(count, SROM_CHUNK_SIZE); + + hv_retval = _srom_read(srom->hv_devhdl, kernbuf, + *f_pos, bytes_this_pass); + if (hv_retval > 0) { + if (copy_to_user(buf, kernbuf, hv_retval) != 0) { + retval = -EFAULT; + break; + } + } else if (hv_retval <= 0) { + if (retval == 0) + retval = hv_retval; + break; + } + + retval += hv_retval; + *f_pos += hv_retval; + buf += hv_retval; + count -= hv_retval; + } + + mutex_unlock(&srom->lock); + kfree(kernbuf); + + return retval; +} + +/** + * srom_write() - Write data to the device. + * @filp: File for this specific open of the device. + * @buf: User's data buffer. + * @count: Number of bytes requested. + * @f_pos: File position. + * + * Returns number of bytes written, or an error code. + */ +static ssize_t srom_write(struct file *filp, const char __user *buf, + size_t count, loff_t *f_pos) +{ + int retval = 0; + void *kernbuf; + struct srom_dev *srom = filp->private_data; + + kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL); + if (!kernbuf) + return -ENOMEM; + + if (mutex_lock_interruptible(&srom->lock)) { + retval = -ERESTARTSYS; + kfree(kernbuf); + return retval; + } + + while (count) { + int hv_retval; + int bytes_this_pass = min(count, SROM_CHUNK_SIZE); + + if (copy_from_user(kernbuf, buf, bytes_this_pass) != 0) { + retval = -EFAULT; + break; + } + + hv_retval = _srom_write(srom->hv_devhdl, kernbuf, + *f_pos, bytes_this_pass); + if (hv_retval <= 0) { + if (retval == 0) + retval = hv_retval; + break; + } + + retval += hv_retval; + *f_pos += hv_retval; + buf += hv_retval; + count -= hv_retval; + } + + mutex_unlock(&srom->lock); + kfree(kernbuf); + + return retval; +} + +/* Provide our own implementation so we can use srom->total_size. */ +loff_t srom_llseek(struct file *filp, loff_t offset, int origin) +{ + struct srom_dev *srom = filp->private_data; + + if (mutex_lock_interruptible(&srom->lock)) + return -ERESTARTSYS; + + switch (origin) { + case SEEK_END: + offset += srom->total_size; + break; + case SEEK_CUR: + offset += filp->f_pos; + break; + } + + if (offset < 0 || offset > srom->total_size) { + offset = -EINVAL; + } else { + filp->f_pos = offset; + filp->f_version = 0; + } + + mutex_unlock(&srom->lock); + + return offset; +} + +static ssize_t total_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srom_dev *srom = dev_get_drvdata(dev); + return sprintf(buf, "%u\n", srom->total_size); +} + +static ssize_t sector_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srom_dev *srom = dev_get_drvdata(dev); + return sprintf(buf, "%u\n", srom->sector_size); +} + +static ssize_t page_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srom_dev *srom = dev_get_drvdata(dev); + return sprintf(buf, "%u\n", srom->page_size); +} + +static struct device_attribute srom_dev_attrs[] = { + __ATTR(total_size, S_IRUGO, total_show, NULL), + __ATTR(sector_size, S_IRUGO, sector_show, NULL), + __ATTR(page_size, S_IRUGO, page_show, NULL), + __ATTR_NULL +}; + +static char *srom_devnode(struct device *dev, mode_t *mode) +{ + *mode = S_IRUGO | S_IWUSR; + return kasprintf(GFP_KERNEL, "srom/%s", dev_name(dev)); +} + +/* + * The fops + */ +static const struct file_operations srom_fops = { + .owner = THIS_MODULE, + .llseek = srom_llseek, + .read = srom_read, + .write = srom_write, + .open = srom_open, + .release = srom_release, +}; + +/** + * srom_setup_minor() - Initialize per-minor information. + * @srom: Per-device SROM state. + * @index: Device to set up. + */ +static int srom_setup_minor(struct srom_dev *srom, int index) +{ + struct device *dev; + int devhdl = srom->hv_devhdl; + + mutex_init(&srom->lock); + + if (_srom_read(devhdl, &srom->total_size, + SROM_TOTAL_SIZE_OFF, sizeof(srom->total_size)) < 0) + return -EIO; + if (_srom_read(devhdl, &srom->sector_size, + SROM_SECTOR_SIZE_OFF, sizeof(srom->sector_size)) < 0) + return -EIO; + if (_srom_read(devhdl, &srom->page_size, + SROM_PAGE_SIZE_OFF, sizeof(srom->page_size)) < 0) + return -EIO; + + dev = device_create(srom_class, &platform_bus, + MKDEV(srom_major, index), srom, "%d", index); + return IS_ERR(dev) ? PTR_ERR(dev) : 0; +} + +/** srom_init() - Initialize the driver's module. */ +static int srom_init(void) +{ + int result, i; + dev_t dev = MKDEV(srom_major, 0); + + /* + * Start with a plausible number of partitions; the krealloc() call + * below will yield about log(srom_devs) additional allocations. + */ + srom_devices = kzalloc(4 * sizeof(struct srom_dev), GFP_KERNEL); + + /* Discover the number of srom partitions. */ + for (i = 0; ; i++) { + int devhdl; + char buf[20]; + struct srom_dev *new_srom_devices = + krealloc(srom_devices, (i+1) * sizeof(struct srom_dev), + GFP_KERNEL | __GFP_ZERO); + if (!new_srom_devices) { + result = -ENOMEM; + goto fail_mem; + } + srom_devices = new_srom_devices; + sprintf(buf, "srom/0/%d", i); + devhdl = hv_dev_open((HV_VirtAddr)buf, 0); + if (devhdl < 0) { + if (devhdl != HV_ENODEV) + pr_notice("srom/%d: hv_dev_open failed: %d.\n", + i, devhdl); + break; + } + srom_devices[i].hv_devhdl = devhdl; + } + srom_devs = i; + + /* Bail out early if we have no partitions at all. */ + if (srom_devs == 0) { + result = -ENODEV; + goto fail_mem; + } + + /* Register our major, and accept a dynamic number. */ + if (srom_major) + result = register_chrdev_region(dev, srom_devs, "srom"); + else { + result = alloc_chrdev_region(&dev, 0, srom_devs, "srom"); + srom_major = MAJOR(dev); + } + if (result < 0) + goto fail_mem; + + /* Register a character device. */ + cdev_init(&srom_cdev, &srom_fops); + srom_cdev.owner = THIS_MODULE; + srom_cdev.ops = &srom_fops; + result = cdev_add(&srom_cdev, dev, srom_devs); + if (result < 0) + goto fail_chrdev; + + /* Create a sysfs class. */ + srom_class = class_create(THIS_MODULE, "srom"); + if (IS_ERR(srom_class)) { + result = PTR_ERR(srom_class); + goto fail_cdev; + } + srom_class->dev_attrs = srom_dev_attrs; + srom_class->devnode = srom_devnode; + + /* Do per-partition initialization */ + for (i = 0; i < srom_devs; i++) { + result = srom_setup_minor(srom_devices + i, i); + if (result < 0) + goto fail_class; + } + + return 0; + +fail_class: + for (i = 0; i < srom_devs; i++) + device_destroy(srom_class, MKDEV(srom_major, i)); + class_destroy(srom_class); +fail_cdev: + cdev_del(&srom_cdev); +fail_chrdev: + unregister_chrdev_region(dev, srom_devs); +fail_mem: + kfree(srom_devices); + return result; +} + +/** srom_cleanup() - Clean up the driver's module. */ +static void srom_cleanup(void) +{ + int i; + for (i = 0; i < srom_devs; i++) + device_destroy(srom_class, MKDEV(srom_major, i)); + class_destroy(srom_class); + cdev_del(&srom_cdev); + unregister_chrdev_region(MKDEV(srom_major, 0), srom_devs); + kfree(srom_devices); +} + +module_init(srom_init); +module_exit(srom_cleanup); -- cgit v1.1 From cf8e98d15361f8c594da00a3f7a500787fc1a426 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 15 Jun 2011 10:35:38 -0400 Subject: arch/tile: remove useless set_fixmap_nocache() macro TILE doesn't support PAGE_KERNEL_NOCACHE so the macro isn't useful; it's a copy-and-paste from the first version of this header in 2007. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/fixmap.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h index 51537ff..c66f793 100644 --- a/arch/tile/include/asm/fixmap.h +++ b/arch/tile/include/asm/fixmap.h @@ -75,12 +75,6 @@ extern void __set_fixmap(enum fixed_addresses idx, #define set_fixmap(idx, phys) \ __set_fixmap(idx, phys, PAGE_KERNEL) -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) - #define clear_fixmap(idx) \ __set_fixmap(idx, 0, __pgprot(0)) -- cgit v1.1 From e2f5e5a71dfe6bf155590de0fdd6d748ac79bf76 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Fri, 10 Jun 2011 15:15:05 -0700 Subject: dma/ep93xx_dma.c: local symbols should be static The symbol 'ep93xx_dma_prep_dma_memcpy' is only used in this driver and should be marked static. Signed-off-by: H Hartley Sweeten Cc: Mika Westerberg Cc: Dan Williams Cc: Vinod Koul Acked-by: Mika Westerberg Signed-off-by: Vinod Koul --- drivers/dma/ep93xx_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index 0766c1e..5d7a49b 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -902,7 +902,7 @@ static void ep93xx_dma_free_chan_resources(struct dma_chan *chan) * * Returns a valid DMA descriptor or %NULL in case of failure. */ -struct dma_async_tx_descriptor * +static struct dma_async_tx_descriptor * ep93xx_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) { -- cgit v1.1 From a03a202e95fdaa3ff52ccfc2594ec531e5917816 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Mon, 20 Jun 2011 17:02:47 +0200 Subject: dmaengine: failure to get a specific DMA channel is not critical There exist systems with multiple DMA controllers with different capabilities. For example, on some sh-mobile / rmobile systems there are DMA controllers, whose channels can be configured to be used with SD- and MMC-host controllers, serial ports etc. Besides there are also DMA controllers, that can only be used for one special function, e.g., for USB. In such cases the DMA client filter function can just choose to specify to the DMA driver, which channel it needs. Then the .device_alloc_chan_resources() method of the DMA driver will check, whether it can provide that dunction. If not, it will fail and the loop in __dma_request_channel() will continue to the next DMA device, until it finds a suitable one. This works fine with just one minor glitch: the kernel logs error messages like dmaengine: failed to get : (-) after each such non-critical failure. This patch lowers priority of this message to the debug level. Reported-by: Kuninori Morimoto Signed-off-by: Guennadi Liakhovetski Tested-by: Kuninori Morimoto Tested-by: Magnus Damm Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 8bcb15f..f7f21a5 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -509,8 +509,8 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v dma_chan_name(chan)); list_del_rcu(&device->global_node); } else if (err) - pr_err("dmaengine: failed to get %s: (%d)\n", - dma_chan_name(chan), err); + pr_debug("dmaengine: failed to get %s: (%d)\n", + dma_chan_name(chan), err); else break; if (--device->privatecnt == 0) -- cgit v1.1 From d3ad8434aa83ef7c88bc91edcfe012cdcbab9f3e Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 27 Jun 2011 12:36:29 -0400 Subject: jbd2: use WRITE_SYNC in journal checkpoint In journal checkpoint, we write the buffer and wait for its finish. But in cfq, the async queue has a very low priority, and in our test, if there are too many sync queues and every queue is filled up with requests, the write request will be delayed for quite a long time and all the tasks which are waiting for journal space will end with errors like: INFO: task attr_set:3816 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. attr_set D ffff880028393480 0 3816 1 0x00000000 ffff8802073fbae8 0000000000000086 ffff8802140847c8 ffff8800283934e8 ffff8802073fb9d8 ffffffff8103e456 ffff8802140847b8 ffff8801ed728080 ffff8801db4bc080 ffff8801ed728450 ffff880028393480 0000000000000002 Call Trace: [] ? __dequeue_entity+0x33/0x38 [] ? need_resched+0x23/0x2d [] ? thread_return+0xa2/0xbc [] ? jbd2_journal_dirty_metadata+0x116/0x126 [jbd2] [] ? jbd2_journal_dirty_metadata+0x116/0x126 [jbd2] [] __mutex_lock_common+0x14e/0x1a9 [] ? brelse+0x13/0x15 [ext4] [] __mutex_lock_slowpath+0x19/0x1b [] mutex_lock+0x1b/0x32 [] __jbd2_journal_insert_checkpoint+0xe3/0x20c [jbd2] [] start_this_handle+0x438/0x527 [jbd2] [] ? autoremove_wake_function+0x0/0x3e [] jbd2_journal_start+0xa1/0xcc [jbd2] [] ext4_journal_start_sb+0x57/0x81 [ext4] [] ext4_xattr_set+0x6c/0xe3 [ext4] [] ext4_xattr_user_set+0x42/0x4b [ext4] [] generic_setxattr+0x6b/0x76 [] __vfs_setxattr_noperm+0x47/0xc0 [] vfs_setxattr+0x7f/0x9a [] setxattr+0xb5/0xe8 [] ? do_filp_open+0x571/0xa6e [] sys_fsetxattr+0x6b/0x91 [] system_call_fastpath+0x16/0x1b So this patch tries to use WRITE_SYNC in __flush_batch so that the request will be moved into sync queue and handled by cfq timely. We also use the new plug, sot that all the WRITE_SYNC requests can be given as a whole when we unplug it. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" Cc: Jan Kara Reported-by: Robin Dong --- fs/jbd2/checkpoint.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 2c62c5a..16a698b 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -257,9 +257,12 @@ static void __flush_batch(journal_t *journal, int *batch_count) { int i; + struct blk_plug plug; + blk_start_plug(&plug); for (i = 0; i < *batch_count; i++) - write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE); + write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE_SYNC); + blk_finish_plug(&plug); for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = journal->j_chkpt_bhs[i]; -- cgit v1.1 From ed7a7e16724a4123fce1fc0ff1f5131a0596f189 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Mon, 27 Jun 2011 15:35:53 -0400 Subject: ext4: fix incorrect error msg in ext4_ext_insert_index In function ext4_ext_insert_index when eh_entries of curp is bigger than eh_max, error messages will be printed out, but the content is about logical and ei_block, that's incorret. Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f815cc8..eb63c7b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -808,8 +808,9 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) > le16_to_cpu(curp->p_hdr->eh_max))) { EXT4_ERROR_INODE(inode, - "logical %d == ei_block %d!", - logical, le32_to_cpu(curp->p_idx->ei_block)); + "eh_entries %d > eh_max %d!", + le16_to_cpu(curp->p_hdr->eh_entries), + le16_to_cpu(curp->p_hdr->eh_max)); return -EIO; } if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { -- cgit v1.1 From ff9893dc8aa622a4f122293a6861566a284edea5 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 27 Jun 2011 16:36:31 -0400 Subject: ext4: split ext4_ind_truncate from ext4_truncate We are about to move all indirect inode functions to a new file. Before we do that, let's split ext4_ind_truncate() out of ext4_truncate() leaving only generic code in the latter, so we will be able to move ext4_ind_truncate() to the new file. Signed-off-by: Amir Goldstein Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 ++ fs/ext4/inode.c | 36 ++++++++++++++++++++---------------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1921392..8532dd4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1834,6 +1834,8 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern void ext4_da_update_reserve_space(struct inode *inode, int used, int quota_claim); +extern void ext4_ind_truncate(struct inode *inode); + /* ioctl.c */ extern long ext4_ioctl(struct file *, unsigned int, unsigned long); extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e3126c0..a8f310b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4471,6 +4471,26 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) */ void ext4_truncate(struct inode *inode) { + trace_ext4_truncate_enter(inode); + + if (!ext4_can_truncate(inode)) + return; + + ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); + + if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) + ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); + + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) + ext4_ext_truncate(inode); + else + ext4_ind_truncate(inode); + + trace_ext4_truncate_exit(inode); +} + +void ext4_ind_truncate(struct inode *inode) +{ handle_t *handle; struct ext4_inode_info *ei = EXT4_I(inode); __le32 *i_data = ei->i_data; @@ -4484,22 +4504,6 @@ void ext4_truncate(struct inode *inode) ext4_lblk_t last_block, max_block; unsigned blocksize = inode->i_sb->s_blocksize; - trace_ext4_truncate_enter(inode); - - if (!ext4_can_truncate(inode)) - return; - - ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); - - if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) - ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); - - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { - ext4_ext_truncate(inode); - trace_ext4_truncate_exit(inode); - return; - } - handle = start_transaction(inode); if (IS_ERR(handle)) return; /* AKPM: return what? */ -- cgit v1.1 From 8bb2b247124ba6093455d4aef26743b1bef27bc5 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 27 Jun 2011 17:10:28 -0400 Subject: ext4: rename ext4_indirect_* funcs to ext4_ind_* We are going to move all ext4_ind_* functions to indirect.c. Before we do that, let's rename 2 functions called ext4_indirect_* to ext4_ind_*, to keep to the naming convention. Signed-off-by: Amir Goldstein Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a8f310b..6c1d28e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1075,8 +1075,7 @@ qsize_t *ext4_get_reserved_space(struct inode *inode) * Calculate the number of metadata blocks need to reserve * to allocate a new block at @lblocks for non extent file based file */ -static int ext4_indirect_calc_metadata_amount(struct inode *inode, - sector_t lblock) +static int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) { struct ext4_inode_info *ei = EXT4_I(inode); sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); @@ -1107,7 +1106,7 @@ static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) return ext4_ext_calc_metadata_amount(inode, lblock); - return ext4_indirect_calc_metadata_amount(inode, lblock); + return ext4_ind_calc_metadata_amount(inode, lblock); } /* @@ -5456,8 +5455,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, return 0; } -static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, - int chunk) +static int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk) { int indirects; @@ -5483,7 +5481,7 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) { if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) - return ext4_indirect_trans_blocks(inode, nrblocks, chunk); + return ext4_ind_trans_blocks(inode, nrblocks, chunk); return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); } -- cgit v1.1 From 1f7d1e77419050831a905353683807fa69a26625 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 27 Jun 2011 19:16:02 -0400 Subject: ext4: move __ext4_check_blockref to block_validity.c In preparation for moving the indirect functions to a separate file, move __ext4_check_blockref() to block_validity.c and rename it to ext4_check_blockref() which is exported as globally visible function. Also, rename the cpp macro ext4_check_inode_blockref() to ext4_ind_check_inode(), to make it clear that it is only valid for use with non-extent mapped inodes. Signed-off-by: "Theodore Ts'o" --- fs/ext4/block_validity.c | 20 ++++++++++++++++++++ fs/ext4/ext4.h | 15 +++++++++++++++ fs/ext4/inode.c | 35 +---------------------------------- 3 files changed, 36 insertions(+), 34 deletions(-) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index fac90f3..af103be 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -246,3 +246,23 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, return 1; } +int ext4_check_blockref(const char *function, unsigned int line, + struct inode *inode, __le32 *p, unsigned int max) +{ + struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + __le32 *bref = p; + unsigned int blk; + + while (bref < p+max) { + blk = le32_to_cpu(*bref++); + if (blk && + unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), + blk, 1))) { + es->s_last_error_block = cpu_to_le64(blk); + ext4_error_inode(inode, function, line, blk, + "invalid block"); + return -EIO; + } + } + return 0; +} diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8532dd4..82ba7eb 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2125,6 +2125,19 @@ static inline void ext4_mark_super_dirty(struct super_block *sb) } /* + * Block validity checking + */ +#define ext4_check_indirect_blockref(inode, bh) \ + ext4_check_blockref(__func__, __LINE__, inode, \ + (__le32 *)(bh)->b_data, \ + EXT4_ADDR_PER_BLOCK((inode)->i_sb)) + +#define ext4_ind_check_inode(inode) \ + ext4_check_blockref(__func__, __LINE__, inode, \ + EXT4_I(inode)->i_data, \ + EXT4_NDIR_BLOCKS) + +/* * Inodes and files operations */ @@ -2153,6 +2166,8 @@ extern void ext4_exit_system_zone(void); extern int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, unsigned int count); +extern int ext4_check_blockref(const char *, unsigned int, + struct inode *, __le32 *, unsigned int); /* extents.c */ extern int ext4_ext_tree_init(handle_t *handle, struct inode *); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6c1d28e..3dca526 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -360,39 +360,6 @@ static int ext4_block_to_path(struct inode *inode, return n; } -static int __ext4_check_blockref(const char *function, unsigned int line, - struct inode *inode, - __le32 *p, unsigned int max) -{ - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; - __le32 *bref = p; - unsigned int blk; - - while (bref < p+max) { - blk = le32_to_cpu(*bref++); - if (blk && - unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), - blk, 1))) { - es->s_last_error_block = cpu_to_le64(blk); - ext4_error_inode(inode, function, line, blk, - "invalid block"); - return -EIO; - } - } - return 0; -} - - -#define ext4_check_indirect_blockref(inode, bh) \ - __ext4_check_blockref(__func__, __LINE__, inode, \ - (__le32 *)(bh)->b_data, \ - EXT4_ADDR_PER_BLOCK((inode)->i_sb)) - -#define ext4_check_inode_blockref(inode) \ - __ext4_check_blockref(__func__, __LINE__, inode, \ - EXT4_I(inode)->i_data, \ - EXT4_NDIR_BLOCKS) - /** * ext4_get_branch - read the chain of indirect blocks leading to data * @inode: inode in question @@ -5010,7 +4977,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) (S_ISLNK(inode->i_mode) && !ext4_inode_is_fast_symlink(inode))) { /* Validate block references which are part of inode */ - ret = ext4_check_inode_blockref(inode); + ret = ext4_ind_check_inode(inode); } if (ret) goto bad_inode; -- cgit v1.1 From 9f125d641beb898f5bf2fe69583192c18043517a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 27 Jun 2011 19:16:04 -0400 Subject: ext4: move common truncate functions to header file Move two functions that will be needed by the indirect functions to be moved to indirect.c as well as inode.c to truncate.h as inline functions, so that we can avoid having duplicate copies of the function (which can be a maintenance problem) without having to expose them as globally functions. Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 48 ++++++------------------------------------------ fs/ext4/truncate.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 42 deletions(-) create mode 100644 fs/ext4/truncate.h diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3dca526..9b82ac7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -47,6 +47,7 @@ #include "xattr.h" #include "acl.h" #include "ext4_extents.h" +#include "truncate.h" #include @@ -89,33 +90,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) } /* - * Work out how many blocks we need to proceed with the next chunk of a - * truncate transaction. - */ -static unsigned long blocks_for_truncate(struct inode *inode) -{ - ext4_lblk_t needed; - - needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); - - /* Give ourselves just enough room to cope with inodes in which - * i_blocks is corrupt: we've seen disk corruptions in the past - * which resulted in random data in an inode which looked enough - * like a regular file for ext4 to try to delete it. Things - * will go a bit crazy if that happens, but at least we should - * try not to panic the whole kernel. */ - if (needed < 2) - needed = 2; - - /* But we need to bound the transaction so we don't overflow the - * journal. */ - if (needed > EXT4_MAX_TRANS_DATA) - needed = EXT4_MAX_TRANS_DATA; - - return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed; -} - -/* * Truncate transactions can be complex and absolutely huge. So we need to * be able to restart the transaction at a conventient checkpoint to make * sure we don't overflow the journal. @@ -129,7 +103,7 @@ static handle_t *start_transaction(struct inode *inode) { handle_t *result; - result = ext4_journal_start(inode, blocks_for_truncate(inode)); + result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)); if (!IS_ERR(result)) return result; @@ -149,7 +123,7 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode) return 0; if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) return 0; - if (!ext4_journal_extend(handle, blocks_for_truncate(inode))) + if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode))) return 0; return 1; } @@ -204,7 +178,7 @@ void ext4_evict_inode(struct inode *inode) if (is_bad_inode(inode)) goto no_delete; - handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3); + handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3); if (IS_ERR(handle)) { ext4_std_error(inode->i_sb, PTR_ERR(handle)); /* @@ -1555,16 +1529,6 @@ static int do_journal_get_write_access(handle_t *handle, return ret; } -/* - * Truncate blocks that were not used by write. We have to truncate the - * pagecache as well so that corresponding buffers get properly unmapped. - */ -static void ext4_truncate_failed_write(struct inode *inode) -{ - truncate_inode_pages(inode->i_mapping, inode->i_size); - ext4_truncate(inode); -} - static int ext4_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); static int ext4_write_begin(struct file *file, struct address_space *mapping, @@ -4134,7 +4098,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, if (unlikely(err)) goto out_err; err = ext4_truncate_restart_trans(handle, inode, - blocks_for_truncate(inode)); + ext4_blocks_for_truncate(inode)); if (unlikely(err)) goto out_err; if (bh) { @@ -4329,7 +4293,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, if (try_to_extend_transaction(handle, inode)) { ext4_mark_inode_dirty(handle, inode); ext4_truncate_restart_trans(handle, inode, - blocks_for_truncate(inode)); + ext4_blocks_for_truncate(inode)); } /* diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h new file mode 100644 index 0000000..011ba66 --- /dev/null +++ b/fs/ext4/truncate.h @@ -0,0 +1,43 @@ +/* + * linux/fs/ext4/truncate.h + * + * Common inline functions needed for truncate support + */ + +/* + * Truncate blocks that were not used by write. We have to truncate the + * pagecache as well so that corresponding buffers get properly unmapped. + */ +static inline void ext4_truncate_failed_write(struct inode *inode) +{ + truncate_inode_pages(inode->i_mapping, inode->i_size); + ext4_truncate(inode); +} + +/* + * Work out how many blocks we need to proceed with the next chunk of a + * truncate transaction. + */ +static inline unsigned long ext4_blocks_for_truncate(struct inode *inode) +{ + ext4_lblk_t needed; + + needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); + + /* Give ourselves just enough room to cope with inodes in which + * i_blocks is corrupt: we've seen disk corruptions in the past + * which resulted in random data in an inode which looked enough + * like a regular file for ext4 to try to delete it. Things + * will go a bit crazy if that happens, but at least we should + * try not to panic the whole kernel. */ + if (needed < 2) + needed = 2; + + /* But we need to bound the transaction so we don't overflow the + * journal. */ + if (needed > EXT4_MAX_TRANS_DATA) + needed = EXT4_MAX_TRANS_DATA; + + return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed; +} + -- cgit v1.1 From dae1e52cb1267bf8f52e5e47a80fab566d7e8aa4 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 27 Jun 2011 19:40:50 -0400 Subject: ext4: move ext4_ind_* functions from inode.c to indirect.c This patch moves functions from inode.c to indirect.c. The moved functions are ext4_ind_* functions and their helpers. Functions called from inode.c are declared extern. Signed-off-by: Amir Goldstein Signed-off-by: "Theodore Ts'o" --- fs/ext4/Makefile | 2 +- fs/ext4/block_validity.c | 1 + fs/ext4/ext4.h | 9 + fs/ext4/indirect.c | 1510 ++++++++++++++++++++++++++++++++++++++++++++++ fs/ext4/inode.c | 1486 --------------------------------------------- 5 files changed, 1521 insertions(+), 1487 deletions(-) create mode 100644 fs/ext4/indirect.c diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 04109460..56fd8f86 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ - mmp.o + mmp.o indirect.o ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index af103be..8efb2f0 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -266,3 +266,4 @@ int ext4_check_blockref(const char *function, unsigned int line, } return 0; } + diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 82ba7eb..ddaf504 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1834,6 +1834,15 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern void ext4_da_update_reserve_space(struct inode *inode, int used, int quota_claim); + +/* indirect.c */ +extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, int flags); +extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, + unsigned long nr_segs); +extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); +extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk); extern void ext4_ind_truncate(struct inode *inode); /* ioctl.c */ diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c new file mode 100644 index 0000000..c3e85a8 --- /dev/null +++ b/fs/ext4/indirect.c @@ -0,0 +1,1510 @@ +/* + * linux/fs/ext4/indirect.c + * + * from + * + * linux/fs/ext4/inode.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/fs/minix/inode.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Goal-directed block allocation by Stephen Tweedie + * (sct@redhat.com), 1993, 1998 + */ + +#include +#include "ext4_jbd2.h" +#include "truncate.h" + +#include + +typedef struct { + __le32 *p; + __le32 key; + struct buffer_head *bh; +} Indirect; + +static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) +{ + p->key = *(p->p = v); + p->bh = bh; +} + +/** + * ext4_block_to_path - parse the block number into array of offsets + * @inode: inode in question (we are only interested in its superblock) + * @i_block: block number to be parsed + * @offsets: array to store the offsets in + * @boundary: set this non-zero if the referred-to block is likely to be + * followed (on disk) by an indirect block. + * + * To store the locations of file's data ext4 uses a data structure common + * for UNIX filesystems - tree of pointers anchored in the inode, with + * data blocks at leaves and indirect blocks in intermediate nodes. + * This function translates the block number into path in that tree - + * return value is the path length and @offsets[n] is the offset of + * pointer to (n+1)th node in the nth one. If @block is out of range + * (negative or too large) warning is printed and zero returned. + * + * Note: function doesn't find node addresses, so no IO is needed. All + * we need to know is the capacity of indirect blocks (taken from the + * inode->i_sb). + */ + +/* + * Portability note: the last comparison (check that we fit into triple + * indirect block) is spelled differently, because otherwise on an + * architecture with 32-bit longs and 8Kb pages we might get into trouble + * if our filesystem had 8Kb blocks. We might use long long, but that would + * kill us on x86. Oh, well, at least the sign propagation does not matter - + * i_block would have to be negative in the very beginning, so we would not + * get there at all. + */ + +static int ext4_block_to_path(struct inode *inode, + ext4_lblk_t i_block, + ext4_lblk_t offsets[4], int *boundary) +{ + int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); + int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); + const long direct_blocks = EXT4_NDIR_BLOCKS, + indirect_blocks = ptrs, + double_blocks = (1 << (ptrs_bits * 2)); + int n = 0; + int final = 0; + + if (i_block < direct_blocks) { + offsets[n++] = i_block; + final = direct_blocks; + } else if ((i_block -= direct_blocks) < indirect_blocks) { + offsets[n++] = EXT4_IND_BLOCK; + offsets[n++] = i_block; + final = ptrs; + } else if ((i_block -= indirect_blocks) < double_blocks) { + offsets[n++] = EXT4_DIND_BLOCK; + offsets[n++] = i_block >> ptrs_bits; + offsets[n++] = i_block & (ptrs - 1); + final = ptrs; + } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { + offsets[n++] = EXT4_TIND_BLOCK; + offsets[n++] = i_block >> (ptrs_bits * 2); + offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); + offsets[n++] = i_block & (ptrs - 1); + final = ptrs; + } else { + ext4_warning(inode->i_sb, "block %lu > max in inode %lu", + i_block + direct_blocks + + indirect_blocks + double_blocks, inode->i_ino); + } + if (boundary) + *boundary = final - 1 - (i_block & (ptrs - 1)); + return n; +} + +/** + * ext4_get_branch - read the chain of indirect blocks leading to data + * @inode: inode in question + * @depth: depth of the chain (1 - direct pointer, etc.) + * @offsets: offsets of pointers in inode/indirect blocks + * @chain: place to store the result + * @err: here we store the error value + * + * Function fills the array of triples and returns %NULL + * if everything went OK or the pointer to the last filled triple + * (incomplete one) otherwise. Upon the return chain[i].key contains + * the number of (i+1)-th block in the chain (as it is stored in memory, + * i.e. little-endian 32-bit), chain[i].p contains the address of that + * number (it points into struct inode for i==0 and into the bh->b_data + * for i>0) and chain[i].bh points to the buffer_head of i-th indirect + * block for i>0 and NULL for i==0. In other words, it holds the block + * numbers of the chain, addresses they were taken from (and where we can + * verify that chain did not change) and buffer_heads hosting these + * numbers. + * + * Function stops when it stumbles upon zero pointer (absent block) + * (pointer to last triple returned, *@err == 0) + * or when it gets an IO error reading an indirect block + * (ditto, *@err == -EIO) + * or when it reads all @depth-1 indirect blocks successfully and finds + * the whole chain, all way to the data (returns %NULL, *err == 0). + * + * Need to be called with + * down_read(&EXT4_I(inode)->i_data_sem) + */ +static Indirect *ext4_get_branch(struct inode *inode, int depth, + ext4_lblk_t *offsets, + Indirect chain[4], int *err) +{ + struct super_block *sb = inode->i_sb; + Indirect *p = chain; + struct buffer_head *bh; + + *err = 0; + /* i_data is not going away, no lock needed */ + add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); + if (!p->key) + goto no_block; + while (--depth) { + bh = sb_getblk(sb, le32_to_cpu(p->key)); + if (unlikely(!bh)) + goto failure; + + if (!bh_uptodate_or_lock(bh)) { + if (bh_submit_read(bh) < 0) { + put_bh(bh); + goto failure; + } + /* validate block references */ + if (ext4_check_indirect_blockref(inode, bh)) { + put_bh(bh); + goto failure; + } + } + + add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); + /* Reader: end */ + if (!p->key) + goto no_block; + } + return NULL; + +failure: + *err = -EIO; +no_block: + return p; +} + +/** + * ext4_find_near - find a place for allocation with sufficient locality + * @inode: owner + * @ind: descriptor of indirect block. + * + * This function returns the preferred place for block allocation. + * It is used when heuristic for sequential allocation fails. + * Rules are: + * + if there is a block to the left of our position - allocate near it. + * + if pointer will live in indirect block - allocate near that block. + * + if pointer will live in inode - allocate in the same + * cylinder group. + * + * In the latter case we colour the starting block by the callers PID to + * prevent it from clashing with concurrent allocations for a different inode + * in the same block group. The PID is used here so that functionally related + * files will be close-by on-disk. + * + * Caller must make sure that @ind is valid and will stay that way. + */ +static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; + __le32 *p; + ext4_fsblk_t bg_start; + ext4_fsblk_t last_block; + ext4_grpblk_t colour; + ext4_group_t block_group; + int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); + + /* Try to find previous block */ + for (p = ind->p - 1; p >= start; p--) { + if (*p) + return le32_to_cpu(*p); + } + + /* No such thing, so let's try location of indirect block */ + if (ind->bh) + return ind->bh->b_blocknr; + + /* + * It is going to be referred to from the inode itself? OK, just put it + * into the same cylinder group then. + */ + block_group = ei->i_block_group; + if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { + block_group &= ~(flex_size-1); + if (S_ISREG(inode->i_mode)) + block_group++; + } + bg_start = ext4_group_first_block_no(inode->i_sb, block_group); + last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; + + /* + * If we are doing delayed allocation, we don't need take + * colour into account. + */ + if (test_opt(inode->i_sb, DELALLOC)) + return bg_start; + + if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) + colour = (current->pid % 16) * + (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); + else + colour = (current->pid % 16) * ((last_block - bg_start) / 16); + return bg_start + colour; +} + +/** + * ext4_find_goal - find a preferred place for allocation. + * @inode: owner + * @block: block we want + * @partial: pointer to the last triple within a chain + * + * Normally this function find the preferred place for block allocation, + * returns it. + * Because this is only used for non-extent files, we limit the block nr + * to 32 bits. + */ +static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, + Indirect *partial) +{ + ext4_fsblk_t goal; + + /* + * XXX need to get goal block from mballoc's data structures + */ + + goal = ext4_find_near(inode, partial); + goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; + return goal; +} + +/** + * ext4_blks_to_allocate - Look up the block map and count the number + * of direct blocks need to be allocated for the given branch. + * + * @branch: chain of indirect blocks + * @k: number of blocks need for indirect blocks + * @blks: number of data blocks to be mapped. + * @blocks_to_boundary: the offset in the indirect block + * + * return the total number of blocks to be allocate, including the + * direct and indirect blocks. + */ +static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, + int blocks_to_boundary) +{ + unsigned int count = 0; + + /* + * Simple case, [t,d]Indirect block(s) has not allocated yet + * then it's clear blocks on that path have not allocated + */ + if (k > 0) { + /* right now we don't handle cross boundary allocation */ + if (blks < blocks_to_boundary + 1) + count += blks; + else + count += blocks_to_boundary + 1; + return count; + } + + count++; + while (count < blks && count <= blocks_to_boundary && + le32_to_cpu(*(branch[0].p + count)) == 0) { + count++; + } + return count; +} + +/** + * ext4_alloc_blocks: multiple allocate blocks needed for a branch + * @handle: handle for this transaction + * @inode: inode which needs allocated blocks + * @iblock: the logical block to start allocated at + * @goal: preferred physical block of allocation + * @indirect_blks: the number of blocks need to allocate for indirect + * blocks + * @blks: number of desired blocks + * @new_blocks: on return it will store the new block numbers for + * the indirect blocks(if needed) and the first direct block, + * @err: on return it will store the error code + * + * This function will return the number of blocks allocated as + * requested by the passed-in parameters. + */ +static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, + ext4_lblk_t iblock, ext4_fsblk_t goal, + int indirect_blks, int blks, + ext4_fsblk_t new_blocks[4], int *err) +{ + struct ext4_allocation_request ar; + int target, i; + unsigned long count = 0, blk_allocated = 0; + int index = 0; + ext4_fsblk_t current_block = 0; + int ret = 0; + + /* + * Here we try to allocate the requested multiple blocks at once, + * on a best-effort basis. + * To build a branch, we should allocate blocks for + * the indirect blocks(if not allocated yet), and at least + * the first direct block of this branch. That's the + * minimum number of blocks need to allocate(required) + */ + /* first we try to allocate the indirect blocks */ + target = indirect_blks; + while (target > 0) { + count = target; + /* allocating blocks for indirect blocks and direct blocks */ + current_block = ext4_new_meta_blocks(handle, inode, goal, + 0, &count, err); + if (*err) + goto failed_out; + + if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { + EXT4_ERROR_INODE(inode, + "current_block %llu + count %lu > %d!", + current_block, count, + EXT4_MAX_BLOCK_FILE_PHYS); + *err = -EIO; + goto failed_out; + } + + target -= count; + /* allocate blocks for indirect blocks */ + while (index < indirect_blks && count) { + new_blocks[index++] = current_block++; + count--; + } + if (count > 0) { + /* + * save the new block number + * for the first direct block + */ + new_blocks[index] = current_block; + printk(KERN_INFO "%s returned more blocks than " + "requested\n", __func__); + WARN_ON(1); + break; + } + } + + target = blks - count ; + blk_allocated = count; + if (!target) + goto allocated; + /* Now allocate data blocks */ + memset(&ar, 0, sizeof(ar)); + ar.inode = inode; + ar.goal = goal; + ar.len = target; + ar.logical = iblock; + if (S_ISREG(inode->i_mode)) + /* enable in-core preallocation only for regular files */ + ar.flags = EXT4_MB_HINT_DATA; + + current_block = ext4_mb_new_blocks(handle, &ar, err); + if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { + EXT4_ERROR_INODE(inode, + "current_block %llu + ar.len %d > %d!", + current_block, ar.len, + EXT4_MAX_BLOCK_FILE_PHYS); + *err = -EIO; + goto failed_out; + } + + if (*err && (target == blks)) { + /* + * if the allocation failed and we didn't allocate + * any blocks before + */ + goto failed_out; + } + if (!*err) { + if (target == blks) { + /* + * save the new block number + * for the first direct block + */ + new_blocks[index] = current_block; + } + blk_allocated += ar.len; + } +allocated: + /* total number of blocks allocated for direct blocks */ + ret = blk_allocated; + *err = 0; + return ret; +failed_out: + for (i = 0; i < index; i++) + ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); + return ret; +} + +/** + * ext4_alloc_branch - allocate and set up a chain of blocks. + * @handle: handle for this transaction + * @inode: owner + * @indirect_blks: number of allocated indirect blocks + * @blks: number of allocated direct blocks + * @goal: preferred place for allocation + * @offsets: offsets (in the blocks) to store the pointers to next. + * @branch: place to store the chain in. + * + * This function allocates blocks, zeroes out all but the last one, + * links them into chain and (if we are synchronous) writes them to disk. + * In other words, it prepares a branch that can be spliced onto the + * inode. It stores the information about that chain in the branch[], in + * the same format as ext4_get_branch() would do. We are calling it after + * we had read the existing part of chain and partial points to the last + * triple of that (one with zero ->key). Upon the exit we have the same + * picture as after the successful ext4_get_block(), except that in one + * place chain is disconnected - *branch->p is still zero (we did not + * set the last link), but branch->key contains the number that should + * be placed into *branch->p to fill that gap. + * + * If allocation fails we free all blocks we've allocated (and forget + * their buffer_heads) and return the error value the from failed + * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain + * as described above and return 0. + */ +static int ext4_alloc_branch(handle_t *handle, struct inode *inode, + ext4_lblk_t iblock, int indirect_blks, + int *blks, ext4_fsblk_t goal, + ext4_lblk_t *offsets, Indirect *branch) +{ + int blocksize = inode->i_sb->s_blocksize; + int i, n = 0; + int err = 0; + struct buffer_head *bh; + int num; + ext4_fsblk_t new_blocks[4]; + ext4_fsblk_t current_block; + + num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks, + *blks, new_blocks, &err); + if (err) + return err; + + branch[0].key = cpu_to_le32(new_blocks[0]); + /* + * metadata blocks and data blocks are allocated. + */ + for (n = 1; n <= indirect_blks; n++) { + /* + * Get buffer_head for parent block, zero it out + * and set the pointer to new one, then send + * parent to disk. + */ + bh = sb_getblk(inode->i_sb, new_blocks[n-1]); + if (unlikely(!bh)) { + err = -EIO; + goto failed; + } + + branch[n].bh = bh; + lock_buffer(bh); + BUFFER_TRACE(bh, "call get_create_access"); + err = ext4_journal_get_create_access(handle, bh); + if (err) { + /* Don't brelse(bh) here; it's done in + * ext4_journal_forget() below */ + unlock_buffer(bh); + goto failed; + } + + memset(bh->b_data, 0, blocksize); + branch[n].p = (__le32 *) bh->b_data + offsets[n]; + branch[n].key = cpu_to_le32(new_blocks[n]); + *branch[n].p = branch[n].key; + if (n == indirect_blks) { + current_block = new_blocks[n]; + /* + * End of chain, update the last new metablock of + * the chain to point to the new allocated + * data blocks numbers + */ + for (i = 1; i < num; i++) + *(branch[n].p + i) = cpu_to_le32(++current_block); + } + BUFFER_TRACE(bh, "marking uptodate"); + set_buffer_uptodate(bh); + unlock_buffer(bh); + + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, inode, bh); + if (err) + goto failed; + } + *blks = num; + return err; +failed: + /* Allocation failed, free what we already allocated */ + ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); + for (i = 1; i <= n ; i++) { + /* + * branch[i].bh is newly allocated, so there is no + * need to revoke the block, which is why we don't + * need to set EXT4_FREE_BLOCKS_METADATA. + */ + ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, + EXT4_FREE_BLOCKS_FORGET); + } + for (i = n+1; i < indirect_blks; i++) + ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); + + ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); + + return err; +} + +/** + * ext4_splice_branch - splice the allocated branch onto inode. + * @handle: handle for this transaction + * @inode: owner + * @block: (logical) number of block we are adding + * @chain: chain of indirect blocks (with a missing link - see + * ext4_alloc_branch) + * @where: location of missing link + * @num: number of indirect blocks we are adding + * @blks: number of direct blocks we are adding + * + * This function fills the missing link and does all housekeeping needed in + * inode (->i_blocks, etc.). In case of success we end up with the full + * chain to new block and return 0. + */ +static int ext4_splice_branch(handle_t *handle, struct inode *inode, + ext4_lblk_t block, Indirect *where, int num, + int blks) +{ + int i; + int err = 0; + ext4_fsblk_t current_block; + + /* + * If we're splicing into a [td]indirect block (as opposed to the + * inode) then we need to get write access to the [td]indirect block + * before the splice. + */ + if (where->bh) { + BUFFER_TRACE(where->bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, where->bh); + if (err) + goto err_out; + } + /* That's it */ + + *where->p = where->key; + + /* + * Update the host buffer_head or inode to point to more just allocated + * direct blocks blocks + */ + if (num == 0 && blks > 1) { + current_block = le32_to_cpu(where->key) + 1; + for (i = 1; i < blks; i++) + *(where->p + i) = cpu_to_le32(current_block++); + } + + /* We are done with atomic stuff, now do the rest of housekeeping */ + /* had we spliced it onto indirect block? */ + if (where->bh) { + /* + * If we spliced it onto an indirect block, we haven't + * altered the inode. Note however that if it is being spliced + * onto an indirect block at the very end of the file (the + * file is growing) then we *will* alter the inode to reflect + * the new i_size. But that is not done here - it is done in + * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. + */ + jbd_debug(5, "splicing indirect only\n"); + BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, inode, where->bh); + if (err) + goto err_out; + } else { + /* + * OK, we spliced it into the inode itself on a direct block. + */ + ext4_mark_inode_dirty(handle, inode); + jbd_debug(5, "splicing direct\n"); + } + return err; + +err_out: + for (i = 1; i <= num; i++) { + /* + * branch[i].bh is newly allocated, so there is no + * need to revoke the block, which is why we don't + * need to set EXT4_FREE_BLOCKS_METADATA. + */ + ext4_free_blocks(handle, inode, where[i].bh, 0, 1, + EXT4_FREE_BLOCKS_FORGET); + } + ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), + blks, 0); + + return err; +} + +/* + * The ext4_ind_map_blocks() function handles non-extents inodes + * (i.e., using the traditional indirect/double-indirect i_blocks + * scheme) for ext4_map_blocks(). + * + * Allocation strategy is simple: if we have to allocate something, we will + * have to go the whole way to leaf. So let's do it before attaching anything + * to tree, set linkage between the newborn blocks, write them if sync is + * required, recheck the path, free and repeat if check fails, otherwise + * set the last missing link (that will protect us from any truncate-generated + * removals - all blocks on the path are immune now) and possibly force the + * write on the parent block. + * That has a nice additional property: no special recovery from the failed + * allocations is needed - we simply release blocks and do not touch anything + * reachable from inode. + * + * `handle' can be NULL if create == 0. + * + * return > 0, # of blocks mapped or allocated. + * return = 0, if plain lookup failed. + * return < 0, error case. + * + * The ext4_ind_get_blocks() function should be called with + * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem + * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or + * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system + * blocks. + */ +int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, + int flags) +{ + int err = -EIO; + ext4_lblk_t offsets[4]; + Indirect chain[4]; + Indirect *partial; + ext4_fsblk_t goal; + int indirect_blks; + int blocks_to_boundary = 0; + int depth; + int count = 0; + ext4_fsblk_t first_block = 0; + + trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); + J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); + J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); + depth = ext4_block_to_path(inode, map->m_lblk, offsets, + &blocks_to_boundary); + + if (depth == 0) + goto out; + + partial = ext4_get_branch(inode, depth, offsets, chain, &err); + + /* Simplest case - block found, no allocation needed */ + if (!partial) { + first_block = le32_to_cpu(chain[depth - 1].key); + count++; + /*map more blocks*/ + while (count < map->m_len && count <= blocks_to_boundary) { + ext4_fsblk_t blk; + + blk = le32_to_cpu(*(chain[depth-1].p + count)); + + if (blk == first_block + count) + count++; + else + break; + } + goto got_it; + } + + /* Next simple case - plain lookup or failed read of indirect block */ + if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) + goto cleanup; + + /* + * Okay, we need to do block allocation. + */ + goal = ext4_find_goal(inode, map->m_lblk, partial); + + /* the number of blocks need to allocate for [d,t]indirect blocks */ + indirect_blks = (chain + depth) - partial - 1; + + /* + * Next look up the indirect map to count the totoal number of + * direct blocks to allocate for this branch. + */ + count = ext4_blks_to_allocate(partial, indirect_blks, + map->m_len, blocks_to_boundary); + /* + * Block out ext4_truncate while we alter the tree + */ + err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, + &count, goal, + offsets + (partial - chain), partial); + + /* + * The ext4_splice_branch call will free and forget any buffers + * on the new chain if there is a failure, but that risks using + * up transaction credits, especially for bitmaps where the + * credits cannot be returned. Can we handle this somehow? We + * may need to return -EAGAIN upwards in the worst case. --sct + */ + if (!err) + err = ext4_splice_branch(handle, inode, map->m_lblk, + partial, indirect_blks, count); + if (err) + goto cleanup; + + map->m_flags |= EXT4_MAP_NEW; + + ext4_update_inode_fsync_trans(handle, inode, 1); +got_it: + map->m_flags |= EXT4_MAP_MAPPED; + map->m_pblk = le32_to_cpu(chain[depth-1].key); + map->m_len = count; + if (count > blocks_to_boundary) + map->m_flags |= EXT4_MAP_BOUNDARY; + err = count; + /* Clean up and exit */ + partial = chain + depth - 1; /* the whole chain */ +cleanup: + while (partial > chain) { + BUFFER_TRACE(partial->bh, "call brelse"); + brelse(partial->bh); + partial--; + } +out: + trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, + map->m_pblk, map->m_len, err); + return err; +} + +/* + * O_DIRECT for ext3 (or indirect map) based files + * + * If the O_DIRECT write will extend the file then add this inode to the + * orphan list. So recovery will truncate it back to the original size + * if the machine crashes during the write. + * + * If the O_DIRECT write is intantiating holes inside i_size and the machine + * crashes then stale disk data _may_ be exposed inside the file. But current + * VFS code falls back into buffered path in that case so we are safe. + */ +ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, + unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct ext4_inode_info *ei = EXT4_I(inode); + handle_t *handle; + ssize_t ret; + int orphan = 0; + size_t count = iov_length(iov, nr_segs); + int retries = 0; + + if (rw == WRITE) { + loff_t final_size = offset + count; + + if (final_size > inode->i_size) { + /* Credits for sb + inode write */ + handle = ext4_journal_start(inode, 2); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } + ret = ext4_orphan_add(handle, inode); + if (ret) { + ext4_journal_stop(handle); + goto out; + } + orphan = 1; + ei->i_disksize = inode->i_size; + ext4_journal_stop(handle); + } + } + +retry: + if (rw == READ && ext4_should_dioread_nolock(inode)) + ret = __blockdev_direct_IO(rw, iocb, inode, + inode->i_sb->s_bdev, iov, + offset, nr_segs, + ext4_get_block, NULL, NULL, 0); + else { + ret = blockdev_direct_IO(rw, iocb, inode, + inode->i_sb->s_bdev, iov, + offset, nr_segs, + ext4_get_block, NULL); + + if (unlikely((rw & WRITE) && ret < 0)) { + loff_t isize = i_size_read(inode); + loff_t end = offset + iov_length(iov, nr_segs); + + if (end > isize) + ext4_truncate_failed_write(inode); + } + } + if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; + + if (orphan) { + int err; + + /* Credits for sb + inode write */ + handle = ext4_journal_start(inode, 2); + if (IS_ERR(handle)) { + /* This is really bad luck. We've written the data + * but cannot extend i_size. Bail out and pretend + * the write failed... */ + ret = PTR_ERR(handle); + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + + goto out; + } + if (inode->i_nlink) + ext4_orphan_del(handle, inode); + if (ret > 0) { + loff_t end = offset + ret; + if (end > inode->i_size) { + ei->i_disksize = end; + i_size_write(inode, end); + /* + * We're going to return a positive `ret' + * here due to non-zero-length I/O, so there's + * no way of reporting error returns from + * ext4_mark_inode_dirty() to userspace. So + * ignore it. + */ + ext4_mark_inode_dirty(handle, inode); + } + } + err = ext4_journal_stop(handle); + if (ret == 0) + ret = err; + } +out: + return ret; +} + +/* + * Calculate the number of metadata blocks need to reserve + * to allocate a new block at @lblocks for non extent file based file + */ +int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); + int blk_bits; + + if (lblock < EXT4_NDIR_BLOCKS) + return 0; + + lblock -= EXT4_NDIR_BLOCKS; + + if (ei->i_da_metadata_calc_len && + (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { + ei->i_da_metadata_calc_len++; + return 0; + } + ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; + ei->i_da_metadata_calc_len = 1; + blk_bits = order_base_2(lblock); + return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; +} + +int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk) +{ + int indirects; + + /* if nrblocks are contiguous */ + if (chunk) { + /* + * With N contiguous data blocks, we need at most + * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, + * 2 dindirect blocks, and 1 tindirect block + */ + return DIV_ROUND_UP(nrblocks, + EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; + } + /* + * if nrblocks are not contiguous, worse case, each block touch + * a indirect block, and each indirect block touch a double indirect + * block, plus a triple indirect block + */ + indirects = nrblocks * 2 + 1; + return indirects; +} + +/* + * Truncate transactions can be complex and absolutely huge. So we need to + * be able to restart the transaction at a conventient checkpoint to make + * sure we don't overflow the journal. + * + * start_transaction gets us a new handle for a truncate transaction, + * and extend_transaction tries to extend the existing one a bit. If + * extend fails, we need to propagate the failure up and restart the + * transaction in the top-level truncate loop. --sct + */ +static handle_t *start_transaction(struct inode *inode) +{ + handle_t *result; + + result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)); + if (!IS_ERR(result)) + return result; + + ext4_std_error(inode->i_sb, PTR_ERR(result)); + return result; +} + +/* + * Try to extend this transaction for the purposes of truncation. + * + * Returns 0 if we managed to create more room. If we can't create more + * room, and the transaction must be restarted we return 1. + */ +static int try_to_extend_transaction(handle_t *handle, struct inode *inode) +{ + if (!ext4_handle_valid(handle)) + return 0; + if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) + return 0; + if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode))) + return 0; + return 1; +} + +/* + * Probably it should be a library function... search for first non-zero word + * or memcmp with zero_page, whatever is better for particular architecture. + * Linus? + */ +static inline int all_zeroes(__le32 *p, __le32 *q) +{ + while (p < q) + if (*p++) + return 0; + return 1; +} + +/** + * ext4_find_shared - find the indirect blocks for partial truncation. + * @inode: inode in question + * @depth: depth of the affected branch + * @offsets: offsets of pointers in that branch (see ext4_block_to_path) + * @chain: place to store the pointers to partial indirect blocks + * @top: place to the (detached) top of branch + * + * This is a helper function used by ext4_truncate(). + * + * When we do truncate() we may have to clean the ends of several + * indirect blocks but leave the blocks themselves alive. Block is + * partially truncated if some data below the new i_size is referred + * from it (and it is on the path to the first completely truncated + * data block, indeed). We have to free the top of that path along + * with everything to the right of the path. Since no allocation + * past the truncation point is possible until ext4_truncate() + * finishes, we may safely do the latter, but top of branch may + * require special attention - pageout below the truncation point + * might try to populate it. + * + * We atomically detach the top of branch from the tree, store the + * block number of its root in *@top, pointers to buffer_heads of + * partially truncated blocks - in @chain[].bh and pointers to + * their last elements that should not be removed - in + * @chain[].p. Return value is the pointer to last filled element + * of @chain. + * + * The work left to caller to do the actual freeing of subtrees: + * a) free the subtree starting from *@top + * b) free the subtrees whose roots are stored in + * (@chain[i].p+1 .. end of @chain[i].bh->b_data) + * c) free the subtrees growing from the inode past the @chain[0]. + * (no partially truncated stuff there). */ + +static Indirect *ext4_find_shared(struct inode *inode, int depth, + ext4_lblk_t offsets[4], Indirect chain[4], + __le32 *top) +{ + Indirect *partial, *p; + int k, err; + + *top = 0; + /* Make k index the deepest non-null offset + 1 */ + for (k = depth; k > 1 && !offsets[k-1]; k--) + ; + partial = ext4_get_branch(inode, k, offsets, chain, &err); + /* Writer: pointers */ + if (!partial) + partial = chain + k-1; + /* + * If the branch acquired continuation since we've looked at it - + * fine, it should all survive and (new) top doesn't belong to us. + */ + if (!partial->key && *partial->p) + /* Writer: end */ + goto no_top; + for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) + ; + /* + * OK, we've found the last block that must survive. The rest of our + * branch should be detached before unlocking. However, if that rest + * of branch is all ours and does not grow immediately from the inode + * it's easier to cheat and just decrement partial->p. + */ + if (p == chain + k - 1 && p > chain) { + p->p--; + } else { + *top = *p->p; + /* Nope, don't do this in ext4. Must leave the tree intact */ +#if 0 + *p->p = 0; +#endif + } + /* Writer: end */ + + while (partial > p) { + brelse(partial->bh); + partial--; + } +no_top: + return partial; +} + +/* + * Zero a number of block pointers in either an inode or an indirect block. + * If we restart the transaction we must again get write access to the + * indirect block for further modification. + * + * We release `count' blocks on disk, but (last - first) may be greater + * than `count' because there can be holes in there. + * + * Return 0 on success, 1 on invalid block range + * and < 0 on fatal error. + */ +static int ext4_clear_blocks(handle_t *handle, struct inode *inode, + struct buffer_head *bh, + ext4_fsblk_t block_to_free, + unsigned long count, __le32 *first, + __le32 *last) +{ + __le32 *p; + int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; + int err; + + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + flags |= EXT4_FREE_BLOCKS_METADATA; + + if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, + count)) { + EXT4_ERROR_INODE(inode, "attempt to clear invalid " + "blocks %llu len %lu", + (unsigned long long) block_to_free, count); + return 1; + } + + if (try_to_extend_transaction(handle, inode)) { + if (bh) { + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, inode, bh); + if (unlikely(err)) + goto out_err; + } + err = ext4_mark_inode_dirty(handle, inode); + if (unlikely(err)) + goto out_err; + err = ext4_truncate_restart_trans(handle, inode, + ext4_blocks_for_truncate(inode)); + if (unlikely(err)) + goto out_err; + if (bh) { + BUFFER_TRACE(bh, "retaking write access"); + err = ext4_journal_get_write_access(handle, bh); + if (unlikely(err)) + goto out_err; + } + } + + for (p = first; p < last; p++) + *p = 0; + + ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); + return 0; +out_err: + ext4_std_error(inode->i_sb, err); + return err; +} + +/** + * ext4_free_data - free a list of data blocks + * @handle: handle for this transaction + * @inode: inode we are dealing with + * @this_bh: indirect buffer_head which contains *@first and *@last + * @first: array of block numbers + * @last: points immediately past the end of array + * + * We are freeing all blocks referred from that array (numbers are stored as + * little-endian 32-bit) and updating @inode->i_blocks appropriately. + * + * We accumulate contiguous runs of blocks to free. Conveniently, if these + * blocks are contiguous then releasing them at one time will only affect one + * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't + * actually use a lot of journal space. + * + * @this_bh will be %NULL if @first and @last point into the inode's direct + * block pointers. + */ +static void ext4_free_data(handle_t *handle, struct inode *inode, + struct buffer_head *this_bh, + __le32 *first, __le32 *last) +{ + ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ + unsigned long count = 0; /* Number of blocks in the run */ + __le32 *block_to_free_p = NULL; /* Pointer into inode/ind + corresponding to + block_to_free */ + ext4_fsblk_t nr; /* Current block # */ + __le32 *p; /* Pointer into inode/ind + for current block */ + int err = 0; + + if (this_bh) { /* For indirect block */ + BUFFER_TRACE(this_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, this_bh); + /* Important: if we can't update the indirect pointers + * to the blocks, we can't free them. */ + if (err) + return; + } + + for (p = first; p < last; p++) { + nr = le32_to_cpu(*p); + if (nr) { + /* accumulate blocks to free if they're contiguous */ + if (count == 0) { + block_to_free = nr; + block_to_free_p = p; + count = 1; + } else if (nr == block_to_free + count) { + count++; + } else { + err = ext4_clear_blocks(handle, inode, this_bh, + block_to_free, count, + block_to_free_p, p); + if (err) + break; + block_to_free = nr; + block_to_free_p = p; + count = 1; + } + } + } + + if (!err && count > 0) + err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, + count, block_to_free_p, p); + if (err < 0) + /* fatal error */ + return; + + if (this_bh) { + BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); + + /* + * The buffer head should have an attached journal head at this + * point. However, if the data is corrupted and an indirect + * block pointed to itself, it would have been detached when + * the block was cleared. Check for this instead of OOPSing. + */ + if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) + ext4_handle_dirty_metadata(handle, inode, this_bh); + else + EXT4_ERROR_INODE(inode, + "circular indirect block detected at " + "block %llu", + (unsigned long long) this_bh->b_blocknr); + } +} + +/** + * ext4_free_branches - free an array of branches + * @handle: JBD handle for this transaction + * @inode: inode we are dealing with + * @parent_bh: the buffer_head which contains *@first and *@last + * @first: array of block numbers + * @last: pointer immediately past the end of array + * @depth: depth of the branches to free + * + * We are freeing all blocks referred from these branches (numbers are + * stored as little-endian 32-bit) and updating @inode->i_blocks + * appropriately. + */ +static void ext4_free_branches(handle_t *handle, struct inode *inode, + struct buffer_head *parent_bh, + __le32 *first, __le32 *last, int depth) +{ + ext4_fsblk_t nr; + __le32 *p; + + if (ext4_handle_is_aborted(handle)) + return; + + if (depth--) { + struct buffer_head *bh; + int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); + p = last; + while (--p >= first) { + nr = le32_to_cpu(*p); + if (!nr) + continue; /* A hole */ + + if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), + nr, 1)) { + EXT4_ERROR_INODE(inode, + "invalid indirect mapped " + "block %lu (level %d)", + (unsigned long) nr, depth); + break; + } + + /* Go read the buffer for the next level down */ + bh = sb_bread(inode->i_sb, nr); + + /* + * A read failure? Report error and clear slot + * (should be rare). + */ + if (!bh) { + EXT4_ERROR_INODE_BLOCK(inode, nr, + "Read failure"); + continue; + } + + /* This zaps the entire block. Bottom up. */ + BUFFER_TRACE(bh, "free child branches"); + ext4_free_branches(handle, inode, bh, + (__le32 *) bh->b_data, + (__le32 *) bh->b_data + addr_per_block, + depth); + brelse(bh); + + /* + * Everything below this this pointer has been + * released. Now let this top-of-subtree go. + * + * We want the freeing of this indirect block to be + * atomic in the journal with the updating of the + * bitmap block which owns it. So make some room in + * the journal. + * + * We zero the parent pointer *after* freeing its + * pointee in the bitmaps, so if extend_transaction() + * for some reason fails to put the bitmap changes and + * the release into the same transaction, recovery + * will merely complain about releasing a free block, + * rather than leaking blocks. + */ + if (ext4_handle_is_aborted(handle)) + return; + if (try_to_extend_transaction(handle, inode)) { + ext4_mark_inode_dirty(handle, inode); + ext4_truncate_restart_trans(handle, inode, + ext4_blocks_for_truncate(inode)); + } + + /* + * The forget flag here is critical because if + * we are journaling (and not doing data + * journaling), we have to make sure a revoke + * record is written to prevent the journal + * replay from overwriting the (former) + * indirect block if it gets reallocated as a + * data block. This must happen in the same + * transaction where the data blocks are + * actually freed. + */ + ext4_free_blocks(handle, inode, NULL, nr, 1, + EXT4_FREE_BLOCKS_METADATA| + EXT4_FREE_BLOCKS_FORGET); + + if (parent_bh) { + /* + * The block which we have just freed is + * pointed to by an indirect block: journal it + */ + BUFFER_TRACE(parent_bh, "get_write_access"); + if (!ext4_journal_get_write_access(handle, + parent_bh)){ + *p = 0; + BUFFER_TRACE(parent_bh, + "call ext4_handle_dirty_metadata"); + ext4_handle_dirty_metadata(handle, + inode, + parent_bh); + } + } + } + } else { + /* We have reached the bottom of the tree. */ + BUFFER_TRACE(parent_bh, "free data blocks"); + ext4_free_data(handle, inode, parent_bh, first, last); + } +} + +void ext4_ind_truncate(struct inode *inode) +{ + handle_t *handle; + struct ext4_inode_info *ei = EXT4_I(inode); + __le32 *i_data = ei->i_data; + int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + ext4_lblk_t offsets[4]; + Indirect chain[4]; + Indirect *partial; + __le32 nr = 0; + int n = 0; + ext4_lblk_t last_block, max_block; + unsigned blocksize = inode->i_sb->s_blocksize; + + handle = start_transaction(inode); + if (IS_ERR(handle)) + return; /* AKPM: return what? */ + + last_block = (inode->i_size + blocksize-1) + >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); + max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) + >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); + + if (inode->i_size & (blocksize - 1)) + if (ext4_block_truncate_page(handle, mapping, inode->i_size)) + goto out_stop; + + if (last_block != max_block) { + n = ext4_block_to_path(inode, last_block, offsets, NULL); + if (n == 0) + goto out_stop; /* error */ + } + + /* + * OK. This truncate is going to happen. We add the inode to the + * orphan list, so that if this truncate spans multiple transactions, + * and we crash, we will resume the truncate when the filesystem + * recovers. It also marks the inode dirty, to catch the new size. + * + * Implication: the file must always be in a sane, consistent + * truncatable state while each transaction commits. + */ + if (ext4_orphan_add(handle, inode)) + goto out_stop; + + /* + * From here we block out all ext4_get_block() callers who want to + * modify the block allocation tree. + */ + down_write(&ei->i_data_sem); + + ext4_discard_preallocations(inode); + + /* + * The orphan list entry will now protect us from any crash which + * occurs before the truncate completes, so it is now safe to propagate + * the new, shorter inode size (held for now in i_size) into the + * on-disk inode. We do this via i_disksize, which is the value which + * ext4 *really* writes onto the disk inode. + */ + ei->i_disksize = inode->i_size; + + if (last_block == max_block) { + /* + * It is unnecessary to free any data blocks if last_block is + * equal to the indirect block limit. + */ + goto out_unlock; + } else if (n == 1) { /* direct blocks */ + ext4_free_data(handle, inode, NULL, i_data+offsets[0], + i_data + EXT4_NDIR_BLOCKS); + goto do_indirects; + } + + partial = ext4_find_shared(inode, n, offsets, chain, &nr); + /* Kill the top of shared branch (not detached) */ + if (nr) { + if (partial == chain) { + /* Shared branch grows from the inode */ + ext4_free_branches(handle, inode, NULL, + &nr, &nr+1, (chain+n-1) - partial); + *partial->p = 0; + /* + * We mark the inode dirty prior to restart, + * and prior to stop. No need for it here. + */ + } else { + /* Shared branch grows from an indirect block */ + BUFFER_TRACE(partial->bh, "get_write_access"); + ext4_free_branches(handle, inode, partial->bh, + partial->p, + partial->p+1, (chain+n-1) - partial); + } + } + /* Clear the ends of indirect blocks on the shared branch */ + while (partial > chain) { + ext4_free_branches(handle, inode, partial->bh, partial->p + 1, + (__le32*)partial->bh->b_data+addr_per_block, + (chain+n-1) - partial); + BUFFER_TRACE(partial->bh, "call brelse"); + brelse(partial->bh); + partial--; + } +do_indirects: + /* Kill the remaining (whole) subtrees */ + switch (offsets[0]) { + default: + nr = i_data[EXT4_IND_BLOCK]; + if (nr) { + ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); + i_data[EXT4_IND_BLOCK] = 0; + } + case EXT4_IND_BLOCK: + nr = i_data[EXT4_DIND_BLOCK]; + if (nr) { + ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); + i_data[EXT4_DIND_BLOCK] = 0; + } + case EXT4_DIND_BLOCK: + nr = i_data[EXT4_TIND_BLOCK]; + if (nr) { + ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); + i_data[EXT4_TIND_BLOCK] = 0; + } + case EXT4_TIND_BLOCK: + ; + } + +out_unlock: + up_write(&ei->i_data_sem); + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); + ext4_mark_inode_dirty(handle, inode); + + /* + * In a multi-transaction truncate, we only make the final transaction + * synchronous + */ + if (IS_SYNC(inode)) + ext4_handle_sync(handle); +out_stop: + /* + * If this was a simple ftruncate(), and the file will remain alive + * then we need to clear up the orphan record which we created above. + * However, if this was a real unlink then we were called by + * ext4_delete_inode(), and we allow that function to clean up the + * orphan info for us. + */ + if (inode->i_nlink) + ext4_orphan_del(handle, inode); + + ext4_journal_stop(handle); + trace_ext4_truncate_exit(inode); +} + diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9b82ac7..de50b16 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -12,10 +12,6 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * - * Goal-directed block allocation by Stephen Tweedie - * (sct@redhat.com), 1993, 1998 - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 * 64-bit file support on 64-bit platforms by Jakub Jelinek * (jj@sunsite.ms.mff.cuni.cz) * @@ -90,45 +86,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) } /* - * Truncate transactions can be complex and absolutely huge. So we need to - * be able to restart the transaction at a conventient checkpoint to make - * sure we don't overflow the journal. - * - * start_transaction gets us a new handle for a truncate transaction, - * and extend_transaction tries to extend the existing one a bit. If - * extend fails, we need to propagate the failure up and restart the - * transaction in the top-level truncate loop. --sct - */ -static handle_t *start_transaction(struct inode *inode) -{ - handle_t *result; - - result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)); - if (!IS_ERR(result)) - return result; - - ext4_std_error(inode->i_sb, PTR_ERR(result)); - return result; -} - -/* - * Try to extend this transaction for the purposes of truncation. - * - * Returns 0 if we managed to create more room. If we can't create more - * room, and the transaction must be restarted we return 1. - */ -static int try_to_extend_transaction(handle_t *handle, struct inode *inode) -{ - if (!ext4_handle_valid(handle)) - return 0; - if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) - return 0; - if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode))) - return 0; - return 1; -} - -/* * Restart the transaction associated with *handle. This does a commit, * so before we call here everything must be consistently dirtied against * this transaction. @@ -251,760 +208,6 @@ no_delete: ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ } -typedef struct { - __le32 *p; - __le32 key; - struct buffer_head *bh; -} Indirect; - -static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) -{ - p->key = *(p->p = v); - p->bh = bh; -} - -/** - * ext4_block_to_path - parse the block number into array of offsets - * @inode: inode in question (we are only interested in its superblock) - * @i_block: block number to be parsed - * @offsets: array to store the offsets in - * @boundary: set this non-zero if the referred-to block is likely to be - * followed (on disk) by an indirect block. - * - * To store the locations of file's data ext4 uses a data structure common - * for UNIX filesystems - tree of pointers anchored in the inode, with - * data blocks at leaves and indirect blocks in intermediate nodes. - * This function translates the block number into path in that tree - - * return value is the path length and @offsets[n] is the offset of - * pointer to (n+1)th node in the nth one. If @block is out of range - * (negative or too large) warning is printed and zero returned. - * - * Note: function doesn't find node addresses, so no IO is needed. All - * we need to know is the capacity of indirect blocks (taken from the - * inode->i_sb). - */ - -/* - * Portability note: the last comparison (check that we fit into triple - * indirect block) is spelled differently, because otherwise on an - * architecture with 32-bit longs and 8Kb pages we might get into trouble - * if our filesystem had 8Kb blocks. We might use long long, but that would - * kill us on x86. Oh, well, at least the sign propagation does not matter - - * i_block would have to be negative in the very beginning, so we would not - * get there at all. - */ - -static int ext4_block_to_path(struct inode *inode, - ext4_lblk_t i_block, - ext4_lblk_t offsets[4], int *boundary) -{ - int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); - int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); - const long direct_blocks = EXT4_NDIR_BLOCKS, - indirect_blocks = ptrs, - double_blocks = (1 << (ptrs_bits * 2)); - int n = 0; - int final = 0; - - if (i_block < direct_blocks) { - offsets[n++] = i_block; - final = direct_blocks; - } else if ((i_block -= direct_blocks) < indirect_blocks) { - offsets[n++] = EXT4_IND_BLOCK; - offsets[n++] = i_block; - final = ptrs; - } else if ((i_block -= indirect_blocks) < double_blocks) { - offsets[n++] = EXT4_DIND_BLOCK; - offsets[n++] = i_block >> ptrs_bits; - offsets[n++] = i_block & (ptrs - 1); - final = ptrs; - } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { - offsets[n++] = EXT4_TIND_BLOCK; - offsets[n++] = i_block >> (ptrs_bits * 2); - offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); - offsets[n++] = i_block & (ptrs - 1); - final = ptrs; - } else { - ext4_warning(inode->i_sb, "block %lu > max in inode %lu", - i_block + direct_blocks + - indirect_blocks + double_blocks, inode->i_ino); - } - if (boundary) - *boundary = final - 1 - (i_block & (ptrs - 1)); - return n; -} - -/** - * ext4_get_branch - read the chain of indirect blocks leading to data - * @inode: inode in question - * @depth: depth of the chain (1 - direct pointer, etc.) - * @offsets: offsets of pointers in inode/indirect blocks - * @chain: place to store the result - * @err: here we store the error value - * - * Function fills the array of triples and returns %NULL - * if everything went OK or the pointer to the last filled triple - * (incomplete one) otherwise. Upon the return chain[i].key contains - * the number of (i+1)-th block in the chain (as it is stored in memory, - * i.e. little-endian 32-bit), chain[i].p contains the address of that - * number (it points into struct inode for i==0 and into the bh->b_data - * for i>0) and chain[i].bh points to the buffer_head of i-th indirect - * block for i>0 and NULL for i==0. In other words, it holds the block - * numbers of the chain, addresses they were taken from (and where we can - * verify that chain did not change) and buffer_heads hosting these - * numbers. - * - * Function stops when it stumbles upon zero pointer (absent block) - * (pointer to last triple returned, *@err == 0) - * or when it gets an IO error reading an indirect block - * (ditto, *@err == -EIO) - * or when it reads all @depth-1 indirect blocks successfully and finds - * the whole chain, all way to the data (returns %NULL, *err == 0). - * - * Need to be called with - * down_read(&EXT4_I(inode)->i_data_sem) - */ -static Indirect *ext4_get_branch(struct inode *inode, int depth, - ext4_lblk_t *offsets, - Indirect chain[4], int *err) -{ - struct super_block *sb = inode->i_sb; - Indirect *p = chain; - struct buffer_head *bh; - - *err = 0; - /* i_data is not going away, no lock needed */ - add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); - if (!p->key) - goto no_block; - while (--depth) { - bh = sb_getblk(sb, le32_to_cpu(p->key)); - if (unlikely(!bh)) - goto failure; - - if (!bh_uptodate_or_lock(bh)) { - if (bh_submit_read(bh) < 0) { - put_bh(bh); - goto failure; - } - /* validate block references */ - if (ext4_check_indirect_blockref(inode, bh)) { - put_bh(bh); - goto failure; - } - } - - add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); - /* Reader: end */ - if (!p->key) - goto no_block; - } - return NULL; - -failure: - *err = -EIO; -no_block: - return p; -} - -/** - * ext4_find_near - find a place for allocation with sufficient locality - * @inode: owner - * @ind: descriptor of indirect block. - * - * This function returns the preferred place for block allocation. - * It is used when heuristic for sequential allocation fails. - * Rules are: - * + if there is a block to the left of our position - allocate near it. - * + if pointer will live in indirect block - allocate near that block. - * + if pointer will live in inode - allocate in the same - * cylinder group. - * - * In the latter case we colour the starting block by the callers PID to - * prevent it from clashing with concurrent allocations for a different inode - * in the same block group. The PID is used here so that functionally related - * files will be close-by on-disk. - * - * Caller must make sure that @ind is valid and will stay that way. - */ -static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; - __le32 *p; - ext4_fsblk_t bg_start; - ext4_fsblk_t last_block; - ext4_grpblk_t colour; - ext4_group_t block_group; - int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); - - /* Try to find previous block */ - for (p = ind->p - 1; p >= start; p--) { - if (*p) - return le32_to_cpu(*p); - } - - /* No such thing, so let's try location of indirect block */ - if (ind->bh) - return ind->bh->b_blocknr; - - /* - * It is going to be referred to from the inode itself? OK, just put it - * into the same cylinder group then. - */ - block_group = ei->i_block_group; - if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { - block_group &= ~(flex_size-1); - if (S_ISREG(inode->i_mode)) - block_group++; - } - bg_start = ext4_group_first_block_no(inode->i_sb, block_group); - last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; - - /* - * If we are doing delayed allocation, we don't need take - * colour into account. - */ - if (test_opt(inode->i_sb, DELALLOC)) - return bg_start; - - if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) - colour = (current->pid % 16) * - (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); - else - colour = (current->pid % 16) * ((last_block - bg_start) / 16); - return bg_start + colour; -} - -/** - * ext4_find_goal - find a preferred place for allocation. - * @inode: owner - * @block: block we want - * @partial: pointer to the last triple within a chain - * - * Normally this function find the preferred place for block allocation, - * returns it. - * Because this is only used for non-extent files, we limit the block nr - * to 32 bits. - */ -static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, - Indirect *partial) -{ - ext4_fsblk_t goal; - - /* - * XXX need to get goal block from mballoc's data structures - */ - - goal = ext4_find_near(inode, partial); - goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; - return goal; -} - -/** - * ext4_blks_to_allocate - Look up the block map and count the number - * of direct blocks need to be allocated for the given branch. - * - * @branch: chain of indirect blocks - * @k: number of blocks need for indirect blocks - * @blks: number of data blocks to be mapped. - * @blocks_to_boundary: the offset in the indirect block - * - * return the total number of blocks to be allocate, including the - * direct and indirect blocks. - */ -static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, - int blocks_to_boundary) -{ - unsigned int count = 0; - - /* - * Simple case, [t,d]Indirect block(s) has not allocated yet - * then it's clear blocks on that path have not allocated - */ - if (k > 0) { - /* right now we don't handle cross boundary allocation */ - if (blks < blocks_to_boundary + 1) - count += blks; - else - count += blocks_to_boundary + 1; - return count; - } - - count++; - while (count < blks && count <= blocks_to_boundary && - le32_to_cpu(*(branch[0].p + count)) == 0) { - count++; - } - return count; -} - -/** - * ext4_alloc_blocks: multiple allocate blocks needed for a branch - * @handle: handle for this transaction - * @inode: inode which needs allocated blocks - * @iblock: the logical block to start allocated at - * @goal: preferred physical block of allocation - * @indirect_blks: the number of blocks need to allocate for indirect - * blocks - * @blks: number of desired blocks - * @new_blocks: on return it will store the new block numbers for - * the indirect blocks(if needed) and the first direct block, - * @err: on return it will store the error code - * - * This function will return the number of blocks allocated as - * requested by the passed-in parameters. - */ -static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, ext4_fsblk_t goal, - int indirect_blks, int blks, - ext4_fsblk_t new_blocks[4], int *err) -{ - struct ext4_allocation_request ar; - int target, i; - unsigned long count = 0, blk_allocated = 0; - int index = 0; - ext4_fsblk_t current_block = 0; - int ret = 0; - - /* - * Here we try to allocate the requested multiple blocks at once, - * on a best-effort basis. - * To build a branch, we should allocate blocks for - * the indirect blocks(if not allocated yet), and at least - * the first direct block of this branch. That's the - * minimum number of blocks need to allocate(required) - */ - /* first we try to allocate the indirect blocks */ - target = indirect_blks; - while (target > 0) { - count = target; - /* allocating blocks for indirect blocks and direct blocks */ - current_block = ext4_new_meta_blocks(handle, inode, goal, - 0, &count, err); - if (*err) - goto failed_out; - - if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { - EXT4_ERROR_INODE(inode, - "current_block %llu + count %lu > %d!", - current_block, count, - EXT4_MAX_BLOCK_FILE_PHYS); - *err = -EIO; - goto failed_out; - } - - target -= count; - /* allocate blocks for indirect blocks */ - while (index < indirect_blks && count) { - new_blocks[index++] = current_block++; - count--; - } - if (count > 0) { - /* - * save the new block number - * for the first direct block - */ - new_blocks[index] = current_block; - printk(KERN_INFO "%s returned more blocks than " - "requested\n", __func__); - WARN_ON(1); - break; - } - } - - target = blks - count ; - blk_allocated = count; - if (!target) - goto allocated; - /* Now allocate data blocks */ - memset(&ar, 0, sizeof(ar)); - ar.inode = inode; - ar.goal = goal; - ar.len = target; - ar.logical = iblock; - if (S_ISREG(inode->i_mode)) - /* enable in-core preallocation only for regular files */ - ar.flags = EXT4_MB_HINT_DATA; - - current_block = ext4_mb_new_blocks(handle, &ar, err); - if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { - EXT4_ERROR_INODE(inode, - "current_block %llu + ar.len %d > %d!", - current_block, ar.len, - EXT4_MAX_BLOCK_FILE_PHYS); - *err = -EIO; - goto failed_out; - } - - if (*err && (target == blks)) { - /* - * if the allocation failed and we didn't allocate - * any blocks before - */ - goto failed_out; - } - if (!*err) { - if (target == blks) { - /* - * save the new block number - * for the first direct block - */ - new_blocks[index] = current_block; - } - blk_allocated += ar.len; - } -allocated: - /* total number of blocks allocated for direct blocks */ - ret = blk_allocated; - *err = 0; - return ret; -failed_out: - for (i = 0; i < index; i++) - ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); - return ret; -} - -/** - * ext4_alloc_branch - allocate and set up a chain of blocks. - * @handle: handle for this transaction - * @inode: owner - * @indirect_blks: number of allocated indirect blocks - * @blks: number of allocated direct blocks - * @goal: preferred place for allocation - * @offsets: offsets (in the blocks) to store the pointers to next. - * @branch: place to store the chain in. - * - * This function allocates blocks, zeroes out all but the last one, - * links them into chain and (if we are synchronous) writes them to disk. - * In other words, it prepares a branch that can be spliced onto the - * inode. It stores the information about that chain in the branch[], in - * the same format as ext4_get_branch() would do. We are calling it after - * we had read the existing part of chain and partial points to the last - * triple of that (one with zero ->key). Upon the exit we have the same - * picture as after the successful ext4_get_block(), except that in one - * place chain is disconnected - *branch->p is still zero (we did not - * set the last link), but branch->key contains the number that should - * be placed into *branch->p to fill that gap. - * - * If allocation fails we free all blocks we've allocated (and forget - * their buffer_heads) and return the error value the from failed - * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain - * as described above and return 0. - */ -static int ext4_alloc_branch(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, int indirect_blks, - int *blks, ext4_fsblk_t goal, - ext4_lblk_t *offsets, Indirect *branch) -{ - int blocksize = inode->i_sb->s_blocksize; - int i, n = 0; - int err = 0; - struct buffer_head *bh; - int num; - ext4_fsblk_t new_blocks[4]; - ext4_fsblk_t current_block; - - num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks, - *blks, new_blocks, &err); - if (err) - return err; - - branch[0].key = cpu_to_le32(new_blocks[0]); - /* - * metadata blocks and data blocks are allocated. - */ - for (n = 1; n <= indirect_blks; n++) { - /* - * Get buffer_head for parent block, zero it out - * and set the pointer to new one, then send - * parent to disk. - */ - bh = sb_getblk(inode->i_sb, new_blocks[n-1]); - if (unlikely(!bh)) { - err = -EIO; - goto failed; - } - - branch[n].bh = bh; - lock_buffer(bh); - BUFFER_TRACE(bh, "call get_create_access"); - err = ext4_journal_get_create_access(handle, bh); - if (err) { - /* Don't brelse(bh) here; it's done in - * ext4_journal_forget() below */ - unlock_buffer(bh); - goto failed; - } - - memset(bh->b_data, 0, blocksize); - branch[n].p = (__le32 *) bh->b_data + offsets[n]; - branch[n].key = cpu_to_le32(new_blocks[n]); - *branch[n].p = branch[n].key; - if (n == indirect_blks) { - current_block = new_blocks[n]; - /* - * End of chain, update the last new metablock of - * the chain to point to the new allocated - * data blocks numbers - */ - for (i = 1; i < num; i++) - *(branch[n].p + i) = cpu_to_le32(++current_block); - } - BUFFER_TRACE(bh, "marking uptodate"); - set_buffer_uptodate(bh); - unlock_buffer(bh); - - BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, bh); - if (err) - goto failed; - } - *blks = num; - return err; -failed: - /* Allocation failed, free what we already allocated */ - ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); - for (i = 1; i <= n ; i++) { - /* - * branch[i].bh is newly allocated, so there is no - * need to revoke the block, which is why we don't - * need to set EXT4_FREE_BLOCKS_METADATA. - */ - ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, - EXT4_FREE_BLOCKS_FORGET); - } - for (i = n+1; i < indirect_blks; i++) - ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); - - ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); - - return err; -} - -/** - * ext4_splice_branch - splice the allocated branch onto inode. - * @handle: handle for this transaction - * @inode: owner - * @block: (logical) number of block we are adding - * @chain: chain of indirect blocks (with a missing link - see - * ext4_alloc_branch) - * @where: location of missing link - * @num: number of indirect blocks we are adding - * @blks: number of direct blocks we are adding - * - * This function fills the missing link and does all housekeeping needed in - * inode (->i_blocks, etc.). In case of success we end up with the full - * chain to new block and return 0. - */ -static int ext4_splice_branch(handle_t *handle, struct inode *inode, - ext4_lblk_t block, Indirect *where, int num, - int blks) -{ - int i; - int err = 0; - ext4_fsblk_t current_block; - - /* - * If we're splicing into a [td]indirect block (as opposed to the - * inode) then we need to get write access to the [td]indirect block - * before the splice. - */ - if (where->bh) { - BUFFER_TRACE(where->bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, where->bh); - if (err) - goto err_out; - } - /* That's it */ - - *where->p = where->key; - - /* - * Update the host buffer_head or inode to point to more just allocated - * direct blocks blocks - */ - if (num == 0 && blks > 1) { - current_block = le32_to_cpu(where->key) + 1; - for (i = 1; i < blks; i++) - *(where->p + i) = cpu_to_le32(current_block++); - } - - /* We are done with atomic stuff, now do the rest of housekeeping */ - /* had we spliced it onto indirect block? */ - if (where->bh) { - /* - * If we spliced it onto an indirect block, we haven't - * altered the inode. Note however that if it is being spliced - * onto an indirect block at the very end of the file (the - * file is growing) then we *will* alter the inode to reflect - * the new i_size. But that is not done here - it is done in - * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. - */ - jbd_debug(5, "splicing indirect only\n"); - BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, where->bh); - if (err) - goto err_out; - } else { - /* - * OK, we spliced it into the inode itself on a direct block. - */ - ext4_mark_inode_dirty(handle, inode); - jbd_debug(5, "splicing direct\n"); - } - return err; - -err_out: - for (i = 1; i <= num; i++) { - /* - * branch[i].bh is newly allocated, so there is no - * need to revoke the block, which is why we don't - * need to set EXT4_FREE_BLOCKS_METADATA. - */ - ext4_free_blocks(handle, inode, where[i].bh, 0, 1, - EXT4_FREE_BLOCKS_FORGET); - } - ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), - blks, 0); - - return err; -} - -/* - * The ext4_ind_map_blocks() function handles non-extents inodes - * (i.e., using the traditional indirect/double-indirect i_blocks - * scheme) for ext4_map_blocks(). - * - * Allocation strategy is simple: if we have to allocate something, we will - * have to go the whole way to leaf. So let's do it before attaching anything - * to tree, set linkage between the newborn blocks, write them if sync is - * required, recheck the path, free and repeat if check fails, otherwise - * set the last missing link (that will protect us from any truncate-generated - * removals - all blocks on the path are immune now) and possibly force the - * write on the parent block. - * That has a nice additional property: no special recovery from the failed - * allocations is needed - we simply release blocks and do not touch anything - * reachable from inode. - * - * `handle' can be NULL if create == 0. - * - * return > 0, # of blocks mapped or allocated. - * return = 0, if plain lookup failed. - * return < 0, error case. - * - * The ext4_ind_get_blocks() function should be called with - * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem - * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or - * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system - * blocks. - */ -static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, - struct ext4_map_blocks *map, - int flags) -{ - int err = -EIO; - ext4_lblk_t offsets[4]; - Indirect chain[4]; - Indirect *partial; - ext4_fsblk_t goal; - int indirect_blks; - int blocks_to_boundary = 0; - int depth; - int count = 0; - ext4_fsblk_t first_block = 0; - - trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); - J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); - J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); - depth = ext4_block_to_path(inode, map->m_lblk, offsets, - &blocks_to_boundary); - - if (depth == 0) - goto out; - - partial = ext4_get_branch(inode, depth, offsets, chain, &err); - - /* Simplest case - block found, no allocation needed */ - if (!partial) { - first_block = le32_to_cpu(chain[depth - 1].key); - count++; - /*map more blocks*/ - while (count < map->m_len && count <= blocks_to_boundary) { - ext4_fsblk_t blk; - - blk = le32_to_cpu(*(chain[depth-1].p + count)); - - if (blk == first_block + count) - count++; - else - break; - } - goto got_it; - } - - /* Next simple case - plain lookup or failed read of indirect block */ - if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) - goto cleanup; - - /* - * Okay, we need to do block allocation. - */ - goal = ext4_find_goal(inode, map->m_lblk, partial); - - /* the number of blocks need to allocate for [d,t]indirect blocks */ - indirect_blks = (chain + depth) - partial - 1; - - /* - * Next look up the indirect map to count the totoal number of - * direct blocks to allocate for this branch. - */ - count = ext4_blks_to_allocate(partial, indirect_blks, - map->m_len, blocks_to_boundary); - /* - * Block out ext4_truncate while we alter the tree - */ - err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, - &count, goal, - offsets + (partial - chain), partial); - - /* - * The ext4_splice_branch call will free and forget any buffers - * on the new chain if there is a failure, but that risks using - * up transaction credits, especially for bitmaps where the - * credits cannot be returned. Can we handle this somehow? We - * may need to return -EAGAIN upwards in the worst case. --sct - */ - if (!err) - err = ext4_splice_branch(handle, inode, map->m_lblk, - partial, indirect_blks, count); - if (err) - goto cleanup; - - map->m_flags |= EXT4_MAP_NEW; - - ext4_update_inode_fsync_trans(handle, inode, 1); -got_it: - map->m_flags |= EXT4_MAP_MAPPED; - map->m_pblk = le32_to_cpu(chain[depth-1].key); - map->m_len = count; - if (count > blocks_to_boundary) - map->m_flags |= EXT4_MAP_BOUNDARY; - err = count; - /* Clean up and exit */ - partial = chain + depth - 1; /* the whole chain */ -cleanup: - while (partial > chain) { - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); - partial--; - } -out: - trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, - map->m_pblk, map->m_len, err); - return err; -} - #ifdef CONFIG_QUOTA qsize_t *ext4_get_reserved_space(struct inode *inode) { @@ -1014,32 +217,6 @@ qsize_t *ext4_get_reserved_space(struct inode *inode) /* * Calculate the number of metadata blocks need to reserve - * to allocate a new block at @lblocks for non extent file based file - */ -static int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); - int blk_bits; - - if (lblock < EXT4_NDIR_BLOCKS) - return 0; - - lblock -= EXT4_NDIR_BLOCKS; - - if (ei->i_da_metadata_calc_len && - (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { - ei->i_da_metadata_calc_len++; - return 0; - } - ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; - ei->i_da_metadata_calc_len = 1; - blk_bits = order_base_2(lblock); - return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; -} - -/* - * Calculate the number of metadata blocks need to reserve * to allocate a block located at @lblock */ static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) @@ -3380,114 +2557,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait) } /* - * O_DIRECT for ext3 (or indirect map) based files - * - * If the O_DIRECT write will extend the file then add this inode to the - * orphan list. So recovery will truncate it back to the original size - * if the machine crashes during the write. - * - * If the O_DIRECT write is intantiating holes inside i_size and the machine - * crashes then stale disk data _may_ be exposed inside the file. But current - * VFS code falls back into buffered path in that case so we are safe. - */ -static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct ext4_inode_info *ei = EXT4_I(inode); - handle_t *handle; - ssize_t ret; - int orphan = 0; - size_t count = iov_length(iov, nr_segs); - int retries = 0; - - if (rw == WRITE) { - loff_t final_size = offset + count; - - if (final_size > inode->i_size) { - /* Credits for sb + inode write */ - handle = ext4_journal_start(inode, 2); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - ret = ext4_orphan_add(handle, inode); - if (ret) { - ext4_journal_stop(handle); - goto out; - } - orphan = 1; - ei->i_disksize = inode->i_size; - ext4_journal_stop(handle); - } - } - -retry: - if (rw == READ && ext4_should_dioread_nolock(inode)) - ret = __blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, iov, - offset, nr_segs, - ext4_get_block, NULL, NULL, 0); - else { - ret = blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, iov, - offset, nr_segs, - ext4_get_block, NULL); - - if (unlikely((rw & WRITE) && ret < 0)) { - loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); - - if (end > isize) - ext4_truncate_failed_write(inode); - } - } - if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) - goto retry; - - if (orphan) { - int err; - - /* Credits for sb + inode write */ - handle = ext4_journal_start(inode, 2); - if (IS_ERR(handle)) { - /* This is really bad luck. We've written the data - * but cannot extend i_size. Bail out and pretend - * the write failed... */ - ret = PTR_ERR(handle); - if (inode->i_nlink) - ext4_orphan_del(NULL, inode); - - goto out; - } - if (inode->i_nlink) - ext4_orphan_del(handle, inode); - if (ret > 0) { - loff_t end = offset + ret; - if (end > inode->i_size) { - ei->i_disksize = end; - i_size_write(inode, end); - /* - * We're going to return a positive `ret' - * here due to non-zero-length I/O, so there's - * no way of reporting error returns from - * ext4_mark_inode_dirty() to userspace. So - * ignore it. - */ - ext4_mark_inode_dirty(handle, inode); - } - } - err = ext4_journal_stop(handle); - if (ret == 0) - ret = err; - } -out: - return ret; -} - -/* * ext4_get_block used when preparing for a DIO write or buffer write. * We allocate an uinitialized extent if blocks haven't been allocated. * The extent will be converted to initialized after the IO is complete. @@ -3958,383 +3027,6 @@ unlock: return err; } -/* - * Probably it should be a library function... search for first non-zero word - * or memcmp with zero_page, whatever is better for particular architecture. - * Linus? - */ -static inline int all_zeroes(__le32 *p, __le32 *q) -{ - while (p < q) - if (*p++) - return 0; - return 1; -} - -/** - * ext4_find_shared - find the indirect blocks for partial truncation. - * @inode: inode in question - * @depth: depth of the affected branch - * @offsets: offsets of pointers in that branch (see ext4_block_to_path) - * @chain: place to store the pointers to partial indirect blocks - * @top: place to the (detached) top of branch - * - * This is a helper function used by ext4_truncate(). - * - * When we do truncate() we may have to clean the ends of several - * indirect blocks but leave the blocks themselves alive. Block is - * partially truncated if some data below the new i_size is referred - * from it (and it is on the path to the first completely truncated - * data block, indeed). We have to free the top of that path along - * with everything to the right of the path. Since no allocation - * past the truncation point is possible until ext4_truncate() - * finishes, we may safely do the latter, but top of branch may - * require special attention - pageout below the truncation point - * might try to populate it. - * - * We atomically detach the top of branch from the tree, store the - * block number of its root in *@top, pointers to buffer_heads of - * partially truncated blocks - in @chain[].bh and pointers to - * their last elements that should not be removed - in - * @chain[].p. Return value is the pointer to last filled element - * of @chain. - * - * The work left to caller to do the actual freeing of subtrees: - * a) free the subtree starting from *@top - * b) free the subtrees whose roots are stored in - * (@chain[i].p+1 .. end of @chain[i].bh->b_data) - * c) free the subtrees growing from the inode past the @chain[0]. - * (no partially truncated stuff there). */ - -static Indirect *ext4_find_shared(struct inode *inode, int depth, - ext4_lblk_t offsets[4], Indirect chain[4], - __le32 *top) -{ - Indirect *partial, *p; - int k, err; - - *top = 0; - /* Make k index the deepest non-null offset + 1 */ - for (k = depth; k > 1 && !offsets[k-1]; k--) - ; - partial = ext4_get_branch(inode, k, offsets, chain, &err); - /* Writer: pointers */ - if (!partial) - partial = chain + k-1; - /* - * If the branch acquired continuation since we've looked at it - - * fine, it should all survive and (new) top doesn't belong to us. - */ - if (!partial->key && *partial->p) - /* Writer: end */ - goto no_top; - for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) - ; - /* - * OK, we've found the last block that must survive. The rest of our - * branch should be detached before unlocking. However, if that rest - * of branch is all ours and does not grow immediately from the inode - * it's easier to cheat and just decrement partial->p. - */ - if (p == chain + k - 1 && p > chain) { - p->p--; - } else { - *top = *p->p; - /* Nope, don't do this in ext4. Must leave the tree intact */ -#if 0 - *p->p = 0; -#endif - } - /* Writer: end */ - - while (partial > p) { - brelse(partial->bh); - partial--; - } -no_top: - return partial; -} - -/* - * Zero a number of block pointers in either an inode or an indirect block. - * If we restart the transaction we must again get write access to the - * indirect block for further modification. - * - * We release `count' blocks on disk, but (last - first) may be greater - * than `count' because there can be holes in there. - * - * Return 0 on success, 1 on invalid block range - * and < 0 on fatal error. - */ -static int ext4_clear_blocks(handle_t *handle, struct inode *inode, - struct buffer_head *bh, - ext4_fsblk_t block_to_free, - unsigned long count, __le32 *first, - __le32 *last) -{ - __le32 *p; - int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; - int err; - - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - flags |= EXT4_FREE_BLOCKS_METADATA; - - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, - count)) { - EXT4_ERROR_INODE(inode, "attempt to clear invalid " - "blocks %llu len %lu", - (unsigned long long) block_to_free, count); - return 1; - } - - if (try_to_extend_transaction(handle, inode)) { - if (bh) { - BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, bh); - if (unlikely(err)) - goto out_err; - } - err = ext4_mark_inode_dirty(handle, inode); - if (unlikely(err)) - goto out_err; - err = ext4_truncate_restart_trans(handle, inode, - ext4_blocks_for_truncate(inode)); - if (unlikely(err)) - goto out_err; - if (bh) { - BUFFER_TRACE(bh, "retaking write access"); - err = ext4_journal_get_write_access(handle, bh); - if (unlikely(err)) - goto out_err; - } - } - - for (p = first; p < last; p++) - *p = 0; - - ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); - return 0; -out_err: - ext4_std_error(inode->i_sb, err); - return err; -} - -/** - * ext4_free_data - free a list of data blocks - * @handle: handle for this transaction - * @inode: inode we are dealing with - * @this_bh: indirect buffer_head which contains *@first and *@last - * @first: array of block numbers - * @last: points immediately past the end of array - * - * We are freeing all blocks referred from that array (numbers are stored as - * little-endian 32-bit) and updating @inode->i_blocks appropriately. - * - * We accumulate contiguous runs of blocks to free. Conveniently, if these - * blocks are contiguous then releasing them at one time will only affect one - * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't - * actually use a lot of journal space. - * - * @this_bh will be %NULL if @first and @last point into the inode's direct - * block pointers. - */ -static void ext4_free_data(handle_t *handle, struct inode *inode, - struct buffer_head *this_bh, - __le32 *first, __le32 *last) -{ - ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ - unsigned long count = 0; /* Number of blocks in the run */ - __le32 *block_to_free_p = NULL; /* Pointer into inode/ind - corresponding to - block_to_free */ - ext4_fsblk_t nr; /* Current block # */ - __le32 *p; /* Pointer into inode/ind - for current block */ - int err = 0; - - if (this_bh) { /* For indirect block */ - BUFFER_TRACE(this_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, this_bh); - /* Important: if we can't update the indirect pointers - * to the blocks, we can't free them. */ - if (err) - return; - } - - for (p = first; p < last; p++) { - nr = le32_to_cpu(*p); - if (nr) { - /* accumulate blocks to free if they're contiguous */ - if (count == 0) { - block_to_free = nr; - block_to_free_p = p; - count = 1; - } else if (nr == block_to_free + count) { - count++; - } else { - err = ext4_clear_blocks(handle, inode, this_bh, - block_to_free, count, - block_to_free_p, p); - if (err) - break; - block_to_free = nr; - block_to_free_p = p; - count = 1; - } - } - } - - if (!err && count > 0) - err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, - count, block_to_free_p, p); - if (err < 0) - /* fatal error */ - return; - - if (this_bh) { - BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); - - /* - * The buffer head should have an attached journal head at this - * point. However, if the data is corrupted and an indirect - * block pointed to itself, it would have been detached when - * the block was cleared. Check for this instead of OOPSing. - */ - if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) - ext4_handle_dirty_metadata(handle, inode, this_bh); - else - EXT4_ERROR_INODE(inode, - "circular indirect block detected at " - "block %llu", - (unsigned long long) this_bh->b_blocknr); - } -} - -/** - * ext4_free_branches - free an array of branches - * @handle: JBD handle for this transaction - * @inode: inode we are dealing with - * @parent_bh: the buffer_head which contains *@first and *@last - * @first: array of block numbers - * @last: pointer immediately past the end of array - * @depth: depth of the branches to free - * - * We are freeing all blocks referred from these branches (numbers are - * stored as little-endian 32-bit) and updating @inode->i_blocks - * appropriately. - */ -static void ext4_free_branches(handle_t *handle, struct inode *inode, - struct buffer_head *parent_bh, - __le32 *first, __le32 *last, int depth) -{ - ext4_fsblk_t nr; - __le32 *p; - - if (ext4_handle_is_aborted(handle)) - return; - - if (depth--) { - struct buffer_head *bh; - int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); - p = last; - while (--p >= first) { - nr = le32_to_cpu(*p); - if (!nr) - continue; /* A hole */ - - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), - nr, 1)) { - EXT4_ERROR_INODE(inode, - "invalid indirect mapped " - "block %lu (level %d)", - (unsigned long) nr, depth); - break; - } - - /* Go read the buffer for the next level down */ - bh = sb_bread(inode->i_sb, nr); - - /* - * A read failure? Report error and clear slot - * (should be rare). - */ - if (!bh) { - EXT4_ERROR_INODE_BLOCK(inode, nr, - "Read failure"); - continue; - } - - /* This zaps the entire block. Bottom up. */ - BUFFER_TRACE(bh, "free child branches"); - ext4_free_branches(handle, inode, bh, - (__le32 *) bh->b_data, - (__le32 *) bh->b_data + addr_per_block, - depth); - brelse(bh); - - /* - * Everything below this this pointer has been - * released. Now let this top-of-subtree go. - * - * We want the freeing of this indirect block to be - * atomic in the journal with the updating of the - * bitmap block which owns it. So make some room in - * the journal. - * - * We zero the parent pointer *after* freeing its - * pointee in the bitmaps, so if extend_transaction() - * for some reason fails to put the bitmap changes and - * the release into the same transaction, recovery - * will merely complain about releasing a free block, - * rather than leaking blocks. - */ - if (ext4_handle_is_aborted(handle)) - return; - if (try_to_extend_transaction(handle, inode)) { - ext4_mark_inode_dirty(handle, inode); - ext4_truncate_restart_trans(handle, inode, - ext4_blocks_for_truncate(inode)); - } - - /* - * The forget flag here is critical because if - * we are journaling (and not doing data - * journaling), we have to make sure a revoke - * record is written to prevent the journal - * replay from overwriting the (former) - * indirect block if it gets reallocated as a - * data block. This must happen in the same - * transaction where the data blocks are - * actually freed. - */ - ext4_free_blocks(handle, inode, NULL, nr, 1, - EXT4_FREE_BLOCKS_METADATA| - EXT4_FREE_BLOCKS_FORGET); - - if (parent_bh) { - /* - * The block which we have just freed is - * pointed to by an indirect block: journal it - */ - BUFFER_TRACE(parent_bh, "get_write_access"); - if (!ext4_journal_get_write_access(handle, - parent_bh)){ - *p = 0; - BUFFER_TRACE(parent_bh, - "call ext4_handle_dirty_metadata"); - ext4_handle_dirty_metadata(handle, - inode, - parent_bh); - } - } - } - } else { - /* We have reached the bottom of the tree. */ - BUFFER_TRACE(parent_bh, "free data blocks"); - ext4_free_data(handle, inode, parent_bh, first, last); - } -} - int ext4_can_truncate(struct inode *inode) { if (S_ISREG(inode->i_mode)) @@ -4419,161 +3111,6 @@ void ext4_truncate(struct inode *inode) trace_ext4_truncate_exit(inode); } -void ext4_ind_truncate(struct inode *inode) -{ - handle_t *handle; - struct ext4_inode_info *ei = EXT4_I(inode); - __le32 *i_data = ei->i_data; - int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); - struct address_space *mapping = inode->i_mapping; - ext4_lblk_t offsets[4]; - Indirect chain[4]; - Indirect *partial; - __le32 nr = 0; - int n = 0; - ext4_lblk_t last_block, max_block; - unsigned blocksize = inode->i_sb->s_blocksize; - - handle = start_transaction(inode); - if (IS_ERR(handle)) - return; /* AKPM: return what? */ - - last_block = (inode->i_size + blocksize-1) - >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); - max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) - >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); - - if (inode->i_size & (blocksize - 1)) - if (ext4_block_truncate_page(handle, mapping, inode->i_size)) - goto out_stop; - - if (last_block != max_block) { - n = ext4_block_to_path(inode, last_block, offsets, NULL); - if (n == 0) - goto out_stop; /* error */ - } - - /* - * OK. This truncate is going to happen. We add the inode to the - * orphan list, so that if this truncate spans multiple transactions, - * and we crash, we will resume the truncate when the filesystem - * recovers. It also marks the inode dirty, to catch the new size. - * - * Implication: the file must always be in a sane, consistent - * truncatable state while each transaction commits. - */ - if (ext4_orphan_add(handle, inode)) - goto out_stop; - - /* - * From here we block out all ext4_get_block() callers who want to - * modify the block allocation tree. - */ - down_write(&ei->i_data_sem); - - ext4_discard_preallocations(inode); - - /* - * The orphan list entry will now protect us from any crash which - * occurs before the truncate completes, so it is now safe to propagate - * the new, shorter inode size (held for now in i_size) into the - * on-disk inode. We do this via i_disksize, which is the value which - * ext4 *really* writes onto the disk inode. - */ - ei->i_disksize = inode->i_size; - - if (last_block == max_block) { - /* - * It is unnecessary to free any data blocks if last_block is - * equal to the indirect block limit. - */ - goto out_unlock; - } else if (n == 1) { /* direct blocks */ - ext4_free_data(handle, inode, NULL, i_data+offsets[0], - i_data + EXT4_NDIR_BLOCKS); - goto do_indirects; - } - - partial = ext4_find_shared(inode, n, offsets, chain, &nr); - /* Kill the top of shared branch (not detached) */ - if (nr) { - if (partial == chain) { - /* Shared branch grows from the inode */ - ext4_free_branches(handle, inode, NULL, - &nr, &nr+1, (chain+n-1) - partial); - *partial->p = 0; - /* - * We mark the inode dirty prior to restart, - * and prior to stop. No need for it here. - */ - } else { - /* Shared branch grows from an indirect block */ - BUFFER_TRACE(partial->bh, "get_write_access"); - ext4_free_branches(handle, inode, partial->bh, - partial->p, - partial->p+1, (chain+n-1) - partial); - } - } - /* Clear the ends of indirect blocks on the shared branch */ - while (partial > chain) { - ext4_free_branches(handle, inode, partial->bh, partial->p + 1, - (__le32*)partial->bh->b_data+addr_per_block, - (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); - partial--; - } -do_indirects: - /* Kill the remaining (whole) subtrees */ - switch (offsets[0]) { - default: - nr = i_data[EXT4_IND_BLOCK]; - if (nr) { - ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); - i_data[EXT4_IND_BLOCK] = 0; - } - case EXT4_IND_BLOCK: - nr = i_data[EXT4_DIND_BLOCK]; - if (nr) { - ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); - i_data[EXT4_DIND_BLOCK] = 0; - } - case EXT4_DIND_BLOCK: - nr = i_data[EXT4_TIND_BLOCK]; - if (nr) { - ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); - i_data[EXT4_TIND_BLOCK] = 0; - } - case EXT4_TIND_BLOCK: - ; - } - -out_unlock: - up_write(&ei->i_data_sem); - inode->i_mtime = inode->i_ctime = ext4_current_time(inode); - ext4_mark_inode_dirty(handle, inode); - - /* - * In a multi-transaction truncate, we only make the final transaction - * synchronous - */ - if (IS_SYNC(inode)) - ext4_handle_sync(handle); -out_stop: - /* - * If this was a simple ftruncate(), and the file will remain alive - * then we need to clear up the orphan record which we created above. - * However, if this was a real unlink then we were called by - * ext4_delete_inode(), and we allow that function to clean up the - * orphan info for us. - */ - if (inode->i_nlink) - ext4_orphan_del(handle, inode); - - ext4_journal_stop(handle); - trace_ext4_truncate_exit(inode); -} - /* * ext4_get_inode_loc returns with an extra refcount against the inode's * underlying buffer_head on success. If 'in_mem' is true, we have all @@ -5386,29 +3923,6 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, return 0; } -static int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk) -{ - int indirects; - - /* if nrblocks are contiguous */ - if (chunk) { - /* - * With N contiguous data blocks, we need at most - * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, - * 2 dindirect blocks, and 1 tindirect block - */ - return DIV_ROUND_UP(nrblocks, - EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; - } - /* - * if nrblocks are not contiguous, worse case, each block touch - * a indirect block, and each indirect block touch a double indirect - * block, plus a triple indirect block - */ - indirects = nrblocks * 2 + 1; - return indirects; -} - static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) { if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) -- cgit v1.1 From f86186b44b4164600cce03d0d93ad48ec21fa429 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 28 Jun 2011 10:01:31 -0400 Subject: ext4: refactor duplicated block placement code I found that ext4_ext_find_goal() and ext4_find_near() share the same code for returning a coloured start block based on i_block_group. We can refactor this into a common function so that they don't diverge in the future. Thanks to adilger for suggesting the new function name. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/ext4/ext4.h | 1 + fs/ext4/extents.c | 37 +------------------------------------ fs/ext4/indirect.c | 28 +--------------------------- 4 files changed, 51 insertions(+), 63 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 264f694..f8224ad 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -620,3 +620,51 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) } +/** + * ext4_inode_to_goal_block - return a hint for block allocation + * @inode: inode for block allocation + * + * Return the ideal location to start allocating blocks for a + * newly created inode. + */ +ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + ext4_group_t block_group; + ext4_grpblk_t colour; + int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); + ext4_fsblk_t bg_start; + ext4_fsblk_t last_block; + + block_group = ei->i_block_group; + if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { + /* + * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME + * block groups per flexgroup, reserve the first block + * group for directories and special files. Regular + * files will start at the second block group. This + * tends to speed up directory access and improves + * fsck times. + */ + block_group &= ~(flex_size-1); + if (S_ISREG(inode->i_mode)) + block_group++; + } + bg_start = ext4_group_first_block_no(inode->i_sb, block_group); + last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; + + /* + * If we are doing delayed allocation, we don't need take + * colour into account. + */ + if (test_opt(inode->i_sb, DELALLOC)) + return bg_start; + + if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) + colour = (current->pid % 16) * + (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); + else + colour = (current->pid % 16) * ((last_block - bg_start) / 16); + return bg_start + colour; +} + diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ddaf504..49d2cea 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1743,6 +1743,7 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb, struct ext4_group_desc *desc); #define ext4_free_blocks_after_init(sb, group, desc) \ ext4_init_block_bitmap(sb, NULL, group, desc) +ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); /* dir.c */ extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index eb63c7b..f331e50 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -114,12 +114,6 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t block) { - struct ext4_inode_info *ei = EXT4_I(inode); - ext4_fsblk_t bg_start; - ext4_fsblk_t last_block; - ext4_grpblk_t colour; - ext4_group_t block_group; - int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); int depth; if (path) { @@ -161,36 +155,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, } /* OK. use inode's group */ - block_group = ei->i_block_group; - if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { - /* - * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME - * block groups per flexgroup, reserve the first block - * group for directories and special files. Regular - * files will start at the second block group. This - * tends to speed up directory access and improves - * fsck times. - */ - block_group &= ~(flex_size-1); - if (S_ISREG(inode->i_mode)) - block_group++; - } - bg_start = ext4_group_first_block_no(inode->i_sb, block_group); - last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; - - /* - * If we are doing delayed allocation, we don't need take - * colour into account. - */ - if (test_opt(inode->i_sb, DELALLOC)) - return bg_start; - - if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) - colour = (current->pid % 16) * - (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); - else - colour = (current->pid % 16) * ((last_block - bg_start) / 16); - return bg_start + colour + block; + return ext4_inode_to_goal_block(inode); } /* diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index c3e85a8..6c27111 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -207,11 +207,6 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) struct ext4_inode_info *ei = EXT4_I(inode); __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; __le32 *p; - ext4_fsblk_t bg_start; - ext4_fsblk_t last_block; - ext4_grpblk_t colour; - ext4_group_t block_group; - int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); /* Try to find previous block */ for (p = ind->p - 1; p >= start; p--) { @@ -227,28 +222,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) * It is going to be referred to from the inode itself? OK, just put it * into the same cylinder group then. */ - block_group = ei->i_block_group; - if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { - block_group &= ~(flex_size-1); - if (S_ISREG(inode->i_mode)) - block_group++; - } - bg_start = ext4_group_first_block_no(inode->i_sb, block_group); - last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; - - /* - * If we are doing delayed allocation, we don't need take - * colour into account. - */ - if (test_opt(inode->i_sb, DELALLOC)) - return bg_start; - - if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) - colour = (current->pid % 16) * - (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); - else - colour = (current->pid % 16) * ((last_block - bg_start) / 16); - return bg_start + colour; + return ext4_inode_to_goal_block(inode); } /** -- cgit v1.1 From 9331b6261058eb85ae7c57ab8ac279e7fdaa9f04 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 28 Jun 2011 10:19:05 -0400 Subject: ext4: quiet 'unused variables' compile warnings Unused variables was deleted. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 -- fs/ext4/mballoc.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f331e50..31ae5fb 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3073,12 +3073,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, struct ext4_ext_path *path) { struct ext4_extent *ex; - struct ext4_extent_header *eh; int depth; int err = 0; depth = ext_depth(inode); - eh = path[depth].p_hdr; ex = path[depth].p_ext; ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6ed859d..389386b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4666,12 +4666,10 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, struct ext4_buddy e4b; int err = 0, ret, blk_free_count; ext4_grpblk_t blocks_freed; - struct ext4_group_info *grp; ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); ext4_get_group_no_and_offset(sb, block, &block_group, &bit); - grp = ext4_get_group_info(sb, block_group); /* * Check to see if we are freeing blocks across a group * boundary. -- cgit v1.1 From 275d3ba6b40d0f098693b9089c6fee9bd4e55d74 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 29 Jun 2011 21:44:45 -0400 Subject: ext4: remove loop around bio_alloc() These days, bio_alloc() is guaranteed to never fail (as long as nvecs is less than BIO_MAX_PAGES), so we don't need the loop around the struct bio allocation. Signed-off-by: "Theodore Ts'o" --- fs/ext4/page-io.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 7bb8f76..430c401 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -285,11 +285,7 @@ static int io_submit_init(struct ext4_io_submit *io, io_end = ext4_init_io_end(inode, GFP_NOFS); if (!io_end) return -ENOMEM; - do { - bio = bio_alloc(GFP_NOIO, nvecs); - nvecs >>= 1; - } while (bio == NULL); - + bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio->bi_bdev = bh->b_bdev; bio->bi_private = io->io_end = io_end; -- cgit v1.1 From 5de705194e9883a39f993e2ff96028d5aab99b37 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 19 Jun 2011 13:33:16 +0100 Subject: regulator: Add basic per consumer debugfs Report the requested load and voltage for each consumer in debugfs when it is enabled. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index cc3dfd6..f59821f 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -81,6 +81,9 @@ struct regulator { char *supply_name; struct device_attribute dev_attr; struct regulator_dev *rdev; +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs; +#endif }; static int _regulator_is_enabled(struct regulator_dev *rdev); @@ -1093,7 +1096,28 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, dev->kobj.name, err); goto link_name_err; } + } else { + regulator->supply_name = kstrdup(supply_name, GFP_KERNEL); + if (regulator->supply_name == NULL) + goto attr_err; + } + +#ifdef CONFIG_DEBUG_FS + regulator->debugfs = debugfs_create_dir(regulator->supply_name, + rdev->debugfs); + if (IS_ERR_OR_NULL(regulator->debugfs)) { + rdev_warn(rdev, "Failed to create debugfs directory\n"); + regulator->debugfs = NULL; + } else { + debugfs_create_u32("uA_load", 0444, regulator->debugfs, + ®ulator->uA_load); + debugfs_create_u32("min_uV", 0444, regulator->debugfs, + ®ulator->min_uV); + debugfs_create_u32("max_uV", 0444, regulator->debugfs, + ®ulator->max_uV); } +#endif + mutex_unlock(&rdev->mutex); return regulator; link_name_err: @@ -1272,13 +1296,17 @@ void regulator_put(struct regulator *regulator) mutex_lock(®ulator_list_mutex); rdev = regulator->rdev; +#ifdef CONFIG_DEBUG_FS + debugfs_remove_recursive(regulator->debugfs); +#endif + /* remove any sysfs entries */ if (regulator->dev) { sysfs_remove_link(&rdev->dev.kobj, regulator->supply_name); - kfree(regulator->supply_name); device_remove_file(regulator->dev, ®ulator->dev_attr); kfree(regulator->dev_attr.attr.name); } + kfree(regulator->supply_name); list_del(®ulator->list); kfree(regulator); -- cgit v1.1 From 909c2f32ca0629678e353343d69089f4e94ea974 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 26 May 2011 11:37:02 +0300 Subject: ASoC: tlv320aic3x: Add correct hw registers to Line1 cross connect muxes Commit af46800 ("ASoC: Implement mux control sharing") revealed that "Left Line1[L | R] Mux" and "Right Line1[L | R] Mux" widgets were pointing to the same kcontrols and codec registers and thus soc-core falsely detected them as shared controls. This is actually wrong since there are separate registers in hardware that configure Line1L to RADC and Line1R to LADC cross connects so these muxes should not be shared. Signed-off-by: Jarkko Nikula Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- sound/soc/codecs/tlv320aic3x.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index c3d96fc..6e35b51 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -226,11 +226,13 @@ static const char *aic3x_adc_hpf[] = #define RDAC_ENUM 1 #define LHPCOM_ENUM 2 #define RHPCOM_ENUM 3 -#define LINE1L_ENUM 4 -#define LINE1R_ENUM 5 -#define LINE2L_ENUM 6 -#define LINE2R_ENUM 7 -#define ADC_HPF_ENUM 8 +#define LINE1L_2_L_ENUM 4 +#define LINE1L_2_R_ENUM 5 +#define LINE1R_2_L_ENUM 6 +#define LINE1R_2_R_ENUM 7 +#define LINE2L_ENUM 8 +#define LINE2R_ENUM 9 +#define ADC_HPF_ENUM 10 static const struct soc_enum aic3x_enum[] = { SOC_ENUM_SINGLE(DAC_LINE_MUX, 6, 3, aic3x_left_dac_mux), @@ -238,6 +240,8 @@ static const struct soc_enum aic3x_enum[] = { SOC_ENUM_SINGLE(HPLCOM_CFG, 4, 3, aic3x_left_hpcom_mux), SOC_ENUM_SINGLE(HPRCOM_CFG, 3, 5, aic3x_right_hpcom_mux), SOC_ENUM_SINGLE(LINE1L_2_LADC_CTRL, 7, 2, aic3x_linein_mode_mux), + SOC_ENUM_SINGLE(LINE1L_2_RADC_CTRL, 7, 2, aic3x_linein_mode_mux), + SOC_ENUM_SINGLE(LINE1R_2_LADC_CTRL, 7, 2, aic3x_linein_mode_mux), SOC_ENUM_SINGLE(LINE1R_2_RADC_CTRL, 7, 2, aic3x_linein_mode_mux), SOC_ENUM_SINGLE(LINE2L_2_LADC_CTRL, 7, 2, aic3x_linein_mode_mux), SOC_ENUM_SINGLE(LINE2R_2_RADC_CTRL, 7, 2, aic3x_linein_mode_mux), @@ -490,12 +494,16 @@ static const struct snd_kcontrol_new aic3x_right_pga_mixer_controls[] = { }; /* Left Line1 Mux */ -static const struct snd_kcontrol_new aic3x_left_line1_mux_controls = -SOC_DAPM_ENUM("Route", aic3x_enum[LINE1L_ENUM]); +static const struct snd_kcontrol_new aic3x_left_line1l_mux_controls = +SOC_DAPM_ENUM("Route", aic3x_enum[LINE1L_2_L_ENUM]); +static const struct snd_kcontrol_new aic3x_right_line1l_mux_controls = +SOC_DAPM_ENUM("Route", aic3x_enum[LINE1L_2_R_ENUM]); /* Right Line1 Mux */ -static const struct snd_kcontrol_new aic3x_right_line1_mux_controls = -SOC_DAPM_ENUM("Route", aic3x_enum[LINE1R_ENUM]); +static const struct snd_kcontrol_new aic3x_right_line1r_mux_controls = +SOC_DAPM_ENUM("Route", aic3x_enum[LINE1R_2_R_ENUM]); +static const struct snd_kcontrol_new aic3x_left_line1r_mux_controls = +SOC_DAPM_ENUM("Route", aic3x_enum[LINE1R_2_L_ENUM]); /* Left Line2 Mux */ static const struct snd_kcontrol_new aic3x_left_line2_mux_controls = @@ -535,9 +543,9 @@ static const struct snd_soc_dapm_widget aic3x_dapm_widgets[] = { &aic3x_left_pga_mixer_controls[0], ARRAY_SIZE(aic3x_left_pga_mixer_controls)), SND_SOC_DAPM_MUX("Left Line1L Mux", SND_SOC_NOPM, 0, 0, - &aic3x_left_line1_mux_controls), + &aic3x_left_line1l_mux_controls), SND_SOC_DAPM_MUX("Left Line1R Mux", SND_SOC_NOPM, 0, 0, - &aic3x_left_line1_mux_controls), + &aic3x_left_line1r_mux_controls), SND_SOC_DAPM_MUX("Left Line2L Mux", SND_SOC_NOPM, 0, 0, &aic3x_left_line2_mux_controls), @@ -548,9 +556,9 @@ static const struct snd_soc_dapm_widget aic3x_dapm_widgets[] = { &aic3x_right_pga_mixer_controls[0], ARRAY_SIZE(aic3x_right_pga_mixer_controls)), SND_SOC_DAPM_MUX("Right Line1L Mux", SND_SOC_NOPM, 0, 0, - &aic3x_right_line1_mux_controls), + &aic3x_right_line1l_mux_controls), SND_SOC_DAPM_MUX("Right Line1R Mux", SND_SOC_NOPM, 0, 0, - &aic3x_right_line1_mux_controls), + &aic3x_right_line1r_mux_controls), SND_SOC_DAPM_MUX("Right Line2R Mux", SND_SOC_NOPM, 0, 0, &aic3x_right_line2_mux_controls), -- cgit v1.1 From 7132de744ba76930d13033061018ddd7e3e8cd91 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Sun, 10 Jul 2011 19:37:48 -0400 Subject: ext4: fix i_blocks/quota accounting when extent insertion fails The current implementation of ext4_free_blocks() always calls dquot_free_block This looks quite sensible in the most cases: blocks to be freed are associated with inode and were accounted in quota and i_blocks some time ago. However, there is a case when blocks to free were not accounted by the time calling ext4_free_blocks() yet: 1. delalloc is on, write_begin pre-allocated some space in quota 2. write-back happens, ext4 allocates some blocks in ext4_ext_map_blocks() 3. then ext4_ext_map_blocks() gets an error (e.g. ENOSPC) from ext4_ext_insert_extent() and calls ext4_free_blocks(). In this scenario, ext4_free_blocks() calls dquot_free_block() who, in turn, decrements i_blocks for blocks which were not accounted yet (due to delalloc) After clean umount, e2fsck reports something like: > Inode 21, i_blocks is 5080, should be 5128. Fix? because i_blocks was erroneously decremented as explained above. The patch fixes the problem by passing the new flag EXT4_FREE_BLOCKS_NO_QUOT_UPDATE to ext4_free_blocks(), to request that the dquot_free_block() call be skipped. Signed-off-by: Maxim Patlasov Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/ext4.h | 1 + fs/ext4/extents.c | 4 +++- fs/ext4/mballoc.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 49d2cea..d13f3b5 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -526,6 +526,7 @@ struct ext4_new_group_data { #define EXT4_FREE_BLOCKS_METADATA 0x0001 #define EXT4_FREE_BLOCKS_FORGET 0x0002 #define EXT4_FREE_BLOCKS_VALIDATED 0x0004 +#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 /* * ioctl commands diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 31ae5fb..a862138 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3565,12 +3565,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err) { + int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? + EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; /* free data blocks we just allocated */ /* not a good idea to call discard here directly, * but otherwise we'd need to call it every free() */ ext4_discard_preallocations(inode); ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), - ext4_ext_get_actual_len(&newex), 0); + ext4_ext_get_actual_len(&newex), fb_flags); goto out2; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 389386b..1900ec7 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4637,7 +4637,7 @@ do_more: } ext4_mark_super_dirty(sb); error_return: - if (freed) + if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) dquot_free_block(inode, freed); brelse(bitmap_bh); ext4_std_error(sb, err); -- cgit v1.1 From 575a1d4bdfa2ea9fc10733013136145b497e1be0 Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Sun, 10 Jul 2011 20:07:25 -0400 Subject: ext4: free allocated and pre-allocated blocks when check_eofblocks_fl fails Upon corrupted inode or disk failures, we may fail after we already allocate some blocks from the inode or take some blocks from the inode's preallocation list, but before we successfully insert the corresponding extent to the extent tree. In this case, we should free any allocated blocks and discard the inode's preallocated blocks because the entries in the inode's preallocation list may be in an inconsistent state. Signed-off-by: Jiaying Zhang Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/extents.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a862138..c969ae2 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3560,10 +3560,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, } err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); - if (err) - goto out2; - - err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); + if (!err) + err = ext4_ext_insert_extent(handle, inode, path, + &newex, flags); if (err) { int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; -- cgit v1.1 From 4862fd6047ed02e2726667c54d35f538eecc56aa Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 10 Jul 2011 22:05:08 -0400 Subject: jbd2: remove jbd2_dev_to_name() from jbd2 tracepoints Using function calls in TP_printk causes perf heartburn, so print the MAJOR/MINOR device numbers instead. Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 67 --------------------------------------------- include/linux/jbd2.h | 6 ---- include/trace/events/jbd2.h | 36 ++++++++++++------------ 3 files changed, 19 insertions(+), 90 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0dfa5b59..f24df13 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2390,73 +2390,6 @@ static void __exit journal_exit(void) jbd2_journal_destroy_caches(); } -/* - * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 - * tracing infrastructure to map a dev_t to a device name. - * - * The caller should use rcu_read_lock() in order to make sure the - * device name stays valid until its done with it. We use - * rcu_read_lock() as well to make sure we're safe in case the caller - * gets sloppy, and because rcu_read_lock() is cheap and can be safely - * nested. - */ -struct devname_cache { - struct rcu_head rcu; - dev_t device; - char devname[BDEVNAME_SIZE]; -}; -#define CACHE_SIZE_BITS 6 -static struct devname_cache *devcache[1 << CACHE_SIZE_BITS]; -static DEFINE_SPINLOCK(devname_cache_lock); - -static void free_devcache(struct rcu_head *rcu) -{ - kfree(rcu); -} - -const char *jbd2_dev_to_name(dev_t device) -{ - int i = hash_32(device, CACHE_SIZE_BITS); - char *ret; - struct block_device *bd; - static struct devname_cache *new_dev; - - rcu_read_lock(); - if (devcache[i] && devcache[i]->device == device) { - ret = devcache[i]->devname; - rcu_read_unlock(); - return ret; - } - rcu_read_unlock(); - - new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); - if (!new_dev) - return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ - bd = bdget(device); - spin_lock(&devname_cache_lock); - if (devcache[i]) { - if (devcache[i]->device == device) { - kfree(new_dev); - bdput(bd); - ret = devcache[i]->devname; - spin_unlock(&devname_cache_lock); - return ret; - } - call_rcu(&devcache[i]->rcu, free_devcache); - } - devcache[i] = new_dev; - devcache[i]->device = device; - if (bd) { - bdevname(bd, devcache[i]->devname); - bdput(bd); - } else - __bdevname(device, devcache[i]->devname); - ret = devcache[i]->devname; - spin_unlock(&devname_cache_lock); - return ret; -} -EXPORT_SYMBOL(jbd2_dev_to_name); - MODULE_LICENSE("GPL"); module_init(journal_init); module_exit(journal_exit); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d087c2e..38f307b 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1329,12 +1329,6 @@ extern int jbd_blocks_per_page(struct inode *inode); #define BUFFER_TRACE2(bh, bh2, info) do {} while (0) #define JBUFFER_TRACE(jh, info) do {} while (0) -/* - * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 - * tracing infrastructure to map a dev_t to a device name. - */ -extern const char *jbd2_dev_to_name(dev_t device); - #endif /* __KERNEL__ */ #endif /* _LINUX_JBD2_H */ diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index bf16545..7596441 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -26,8 +26,8 @@ TRACE_EVENT(jbd2_checkpoint, __entry->result = result; ), - TP_printk("dev %s result %d", - jbd2_dev_to_name(__entry->dev), __entry->result) + TP_printk("dev %d,%d result %d", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->result) ); DECLARE_EVENT_CLASS(jbd2_commit, @@ -48,9 +48,9 @@ DECLARE_EVENT_CLASS(jbd2_commit, __entry->transaction = commit_transaction->t_tid; ), - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_printk("dev %d,%d transaction %d sync %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->transaction, __entry->sync_commit) ); DEFINE_EVENT(jbd2_commit, jbd2_start_commit, @@ -100,9 +100,9 @@ TRACE_EVENT(jbd2_end_commit, __entry->head = journal->j_tail_sequence; ), - TP_printk("dev %s transaction %d sync %d head %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit, __entry->head) + TP_printk("dev %d,%d transaction %d sync %d head %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->transaction, __entry->sync_commit, __entry->head) ); TRACE_EVENT(jbd2_submit_inode_data, @@ -120,8 +120,9 @@ TRACE_EVENT(jbd2_submit_inode_data, __entry->ino = inode->i_ino; ), - TP_printk("dev %s ino %lu", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino) + TP_printk("dev %d,%d ino %lu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->ino) ); TRACE_EVENT(jbd2_run_stats, @@ -156,9 +157,9 @@ TRACE_EVENT(jbd2_run_stats, __entry->blocks_logged = stats->rs_blocks_logged; ), - TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u " + TP_printk("dev %d,%d tid %lu wait %u running %u locked %u flushing %u " "logging %u handle_count %u blocks %u blocks_logged %u", - jbd2_dev_to_name(__entry->dev), __entry->tid, + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, jiffies_to_msecs(__entry->wait), jiffies_to_msecs(__entry->running), jiffies_to_msecs(__entry->locked), @@ -192,9 +193,9 @@ TRACE_EVENT(jbd2_checkpoint_stats, __entry->dropped = stats->cs_dropped; ), - TP_printk("dev %s tid %lu chp_time %u forced_to_close %u " + TP_printk("dev %d,%d tid %lu chp_time %u forced_to_close %u " "written %u dropped %u", - jbd2_dev_to_name(__entry->dev), __entry->tid, + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, jiffies_to_msecs(__entry->chp_time), __entry->forced_to_close, __entry->written, __entry->dropped) ); @@ -222,9 +223,10 @@ TRACE_EVENT(jbd2_cleanup_journal_tail, __entry->freed = freed; ), - TP_printk("dev %s from %u to %u offset %lu freed %lu", - jbd2_dev_to_name(__entry->dev), __entry->tail_sequence, - __entry->first_tid, __entry->block_nr, __entry->freed) + TP_printk("dev %d,%d from %u to %u offset %lu freed %lu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->tail_sequence, __entry->first_tid, + __entry->block_nr, __entry->freed) ); #endif /* _TRACE_JBD2_H */ -- cgit v1.1 From 12706394bcaa48e3d5e19c97d7b4e5683ebb12fb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 10 Jul 2011 22:37:50 -0400 Subject: ext4: add tracepoint for ext4_journal_start This will help debug who is responsible for starting a jbd2 transaction. Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 1 + include/trace/events/ext4.h | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9ea71aa..7910e61 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -269,6 +269,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) journal_t *journal; handle_t *handle; + trace_ext4_journal_start(sb, nblocks, _RET_IP_); if (sb->s_flags & MS_RDONLY) return ERR_PTR(-EROFS); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 5ce2b2f..6f27a59 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -1520,6 +1520,28 @@ TRACE_EVENT(ext4_load_inode, (unsigned long) __entry->ino) ); +TRACE_EVENT(ext4_journal_start, + TP_PROTO(struct super_block *sb, int nblocks, unsigned long IP), + + TP_ARGS(sb, nblocks, IP), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, nblocks ) + __field(unsigned long, ip ) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->nblocks = nblocks; + __entry->ip = IP; + ), + + TP_printk("dev %d,%d nblocks %d caller %pF", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->nblocks, (void *)__entry->ip) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ -- cgit v1.1 From 22f10457432387615fa1ae6e0375d9cacc50819b Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sun, 10 Jul 2011 23:52:37 -0400 Subject: ext4: fix trim length underflow with small trim length In 0f0a25b, we adjust 'len' with s_first_data_block - start, but it could underflow in case blocksize=1K, fstrim_range.len=512 and fstrim_range.start = 0. In this case, when we run the code: len -= first_data_blk - start; len will be underflow to -1ULL. In the end, although we are safe that last_group check later will limit the trim to the whole volume, but that isn't what the user really want. So this patch fix it. It also adds the check for 'start' like ext3 so that we can break immediately if the start is invalid. Cc: Lukas Czerner Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1900ec7..b189cb4 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4902,6 +4902,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) return -EINVAL; + if (start + len <= first_data_blk) + goto out; if (start < first_data_blk) { len -= first_data_blk - start; start = first_data_blk; @@ -4950,5 +4952,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) } range->len = trimmed * sb->s_blocksize; +out: return ret; } -- cgit v1.1 From 169ddc3ec83b5f732e51d975befb191d50795844 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 11 Jul 2011 00:00:07 -0400 Subject: ext4: speed up group trim with the right free block count When we trim some free blocks in a group of ext4, we need to calculate the free blocks properly and check whether there are enough freed blocks left for us to trim. Current solution will only calculate free spaces if they are large for a trim which isn't appropriate. Let us see a small example: a group has 1.5M free which are 300k, 300k, 300k, 300k, 300k. And minblocks is 1M. With current solution, we have to iterate the whole group since these 300k will never be subtracted from 1.5M. But actually we should exit after we find the first 2 free spaces since the left 3 chunks only sum up to 900K if we subtract the first 600K although they can't be trimed. Reviewed-by: Andreas Dilger Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b189cb4..4a25725 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4821,7 +4821,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_grpblk_t minblocks) { void *bitmap; - ext4_grpblk_t next, count = 0; + ext4_grpblk_t next, count = 0, free_count = 0; struct ext4_buddy e4b; int ret; @@ -4848,6 +4848,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, next - start, group, &e4b); count += next - start; } + free_count += next - start; start = next + 1; if (fatal_signal_pending(current)) { @@ -4861,7 +4862,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_lock_group(sb, group); } - if ((e4b.bd_info->bb_free - count) < minblocks) + if ((e4b.bd_info->bb_free - free_count) < minblocks) break; } ext4_unlock_group(sb, group); -- cgit v1.1 From b3d4c2b10b68d205d3eb1b5c17dcb4649a502798 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 11 Jul 2011 00:01:52 -0400 Subject: ext4: Add new ext4 trim tracepoints Add ext4_trim_extent and ext4_trim_all_free. Reviewed-by: Lukas Czerner Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 4 ++++ include/trace/events/ext4.h | 49 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4a25725..7aa4c16 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4780,6 +4780,8 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count, { struct ext4_free_extent ex; + trace_ext4_trim_extent(sb, group, start, count); + assert_spin_locked(ext4_group_lock_ptr(sb, group)); ex.fe_start = start; @@ -4825,6 +4827,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, struct ext4_buddy e4b; int ret; + trace_ext4_trim_all_free(sb, group, start, max); + ret = ext4_mb_load_buddy(sb, group, &e4b); if (ret) { ext4_error(sb, "Error in loading buddy " diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 6f27a59..51d8813 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -1542,6 +1542,55 @@ TRACE_EVENT(ext4_journal_start, __entry->nblocks, (void *)__entry->ip) ); +DECLARE_EVENT_CLASS(ext4__trim, + TP_PROTO(struct super_block *sb, + ext4_group_t group, + ext4_grpblk_t start, + ext4_grpblk_t len), + + TP_ARGS(sb, group, start, len), + + TP_STRUCT__entry( + __field( int, dev_major ) + __field( int, dev_minor ) + __field( __u32, group ) + __field( int, start ) + __field( int, len ) + ), + + TP_fast_assign( + __entry->dev_major = MAJOR(sb->s_dev); + __entry->dev_minor = MINOR(sb->s_dev); + __entry->group = group; + __entry->start = start; + __entry->len = len; + ), + + TP_printk("dev %d,%d group %u, start %d, len %d", + __entry->dev_major, __entry->dev_minor, + __entry->group, __entry->start, __entry->len) +); + +DEFINE_EVENT(ext4__trim, ext4_trim_extent, + + TP_PROTO(struct super_block *sb, + ext4_group_t group, + ext4_grpblk_t start, + ext4_grpblk_t len), + + TP_ARGS(sb, group, start, len) +); + +DEFINE_EVENT(ext4__trim, ext4_trim_all_free, + + TP_PROTO(struct super_block *sb, + ext4_group_t group, + ext4_grpblk_t start, + ext4_grpblk_t len), + + TP_ARGS(sb, group, start, len) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ -- cgit v1.1 From 3d56b8d2c74cc3f375ce332b3ac3519e009d79ee Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 11 Jul 2011 00:03:38 -0400 Subject: ext4: Speed up FITRIM by recording flags in ext4_group_info In ext4, when FITRIM is called every time, we iterate all the groups and do trim one by one. It is a bit time wasting if the group has been trimmed and there is no change since the last trim. So this patch adds a new flag in ext4_group_info->bb_state to indicate that the group has been trimmed, and it will be cleared if some blocks is freed(in release_blocks_on_commit). Another trim_minlen is added in ext4_sb_info to record the last minlen we use to trim the volume, so that if the caller provide a small one, we will go on the trim regardless of the bb_state. A simple test with my intel x25m ssd: df -h shows: /dev/sdb1 40G 21G 17G 56% /mnt/ext4 Block size: 4096 run the FITRIM with the following parameter: range.start = 0; range.len = UINT64_MAX; range.minlen = 1048576; without the patch: [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a real 0m5.505s user 0m0.000s sys 0m1.224s [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a real 0m5.359s user 0m0.000s sys 0m1.178s [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a real 0m5.228s user 0m0.000s sys 0m1.151s with the patch: [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a real 0m5.625s user 0m0.000s sys 0m1.269s [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a real 0m0.002s user 0m0.000s sys 0m0.001s [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a real 0m0.002s user 0m0.000s sys 0m0.001s A big improvement for the 2nd and 3rd run. Even after I delete some big image files, it is still much faster than iterating the whole disk. [root@boyu-tm test]# time ./ftrim /mnt/ext4/a real 0m1.217s user 0m0.000s sys 0m0.196s Cc: Lukas Czerner Reviewed-by: Andreas Dilger Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 13 ++++++++++++- fs/ext4/mballoc.c | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d13f3b5..62cee2b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1215,6 +1215,9 @@ struct ext4_sb_info { /* Kernel thread for multiple mount protection */ struct task_struct *s_mmp_tsk; + + /* record the last minlen when FITRIM is called. */ + atomic_t s_last_trim_minblks; }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -2080,11 +2083,19 @@ struct ext4_group_info { * 5 free 8-block regions. */ }; -#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 +#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 +#define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1 #define EXT4_MB_GRP_NEED_INIT(grp) \ (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) +#define EXT4_MB_GRP_WAS_TRIMMED(grp) \ + (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) +#define EXT4_MB_GRP_SET_TRIMMED(grp) \ + (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) +#define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \ + (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) + #define EXT4_MAX_CONTENTION 8 #define EXT4_CONTENTION_THRESHOLD 2 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7aa4c16..73c2540 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2628,6 +2628,15 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) rb_erase(&entry->node, &(db->bb_free_root)); mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); + /* + * Clear the trimmed flag for the group so that the next + * ext4_trim_fs can trim it. + * If the volume is mounted with -o discard, online discard + * is supported and the free blocks will be trimmed online. + */ + if (!test_opt(sb, DISCARD)) + EXT4_MB_GRP_CLEAR_TRIMMED(db); + if (!db->bb_free_root.rb_node) { /* No more items in the per group rb tree * balance refcounts from ext4_mb_free_metadata() @@ -4838,6 +4847,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, bitmap = e4b.bd_bitmap; ext4_lock_group(sb, group); + if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) && + minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) + goto out; + start = (e4b.bd_info->bb_first_free > start) ? e4b.bd_info->bb_first_free : start; @@ -4869,6 +4882,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, if ((e4b.bd_info->bb_free - free_count) < minblocks) break; } + + if (!ret) + EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); +out: ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); @@ -4957,6 +4974,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) } range->len = trimmed * sb->s_blocksize; + if (!ret) + atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); + out: return ret; } -- cgit v1.1 From 22612283f7da1ce9849d9b3716010b07a0446fd9 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 11 Jul 2011 00:04:34 -0400 Subject: ext4: Change the wrong param comment for ext4_trim_all_free at ext4_trim_all_free() comment, there is no longer an @e4b parameter, instead it is @group. Reported-by: Andreas Dilger Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 73c2540..04a3d92 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4811,7 +4811,7 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count, /** * ext4_trim_all_free -- function to trim all free space in alloc. group * @sb: super block for file system - * @e4b: ext4 buddy + * @group: group to be trimmed * @start: first group block to examine * @max: last group block to examine * @minblocks: minimum extent block count -- cgit v1.1 From ffb505ff0f7b52318dea46dd139107a8371b4ad7 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Mon, 11 Jul 2011 11:43:59 -0400 Subject: ext4: remove redundant goto in ext4_ext_insert_extent() If eh->eh_entries is smaller than eh->eh_max, the routine will go to the "repeat" and then go to "has_space" directlly , since argument "depth" and "eh" are not even changed. Therefore, goto "has_space" directly and remove redundant "repeat" tag. Signed-off-by: Robin Dong --- fs/ext4/extents.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c969ae2..9cbdcb2 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1723,7 +1723,6 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, goto merge; } -repeat: depth = ext_depth(inode); eh = path[depth].p_hdr; if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) @@ -1745,7 +1744,7 @@ repeat: ext_debug("next leaf isn't full(%d)\n", le16_to_cpu(eh->eh_entries)); path = npath; - goto repeat; + goto has_space; } ext_debug("next leaf has no free space(%d,%d)\n", le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); -- cgit v1.1 From 598dbdf2433cad55bd44d923f67a053871e3eabf Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Mon, 11 Jul 2011 18:24:01 -0400 Subject: ext4: avoid unneeded ext4_ext_next_leaf_block() while inserting extents Optimize ext4_ext_insert_extent() by avoiding ext4_ext_next_leaf_block() when the result is not used/needed. Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 9cbdcb2..f1c538e 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1730,9 +1730,10 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, /* probably next leaf has space for us? */ fex = EXT_LAST_EXTENT(eh); - next = ext4_ext_next_leaf_block(inode, path); - if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) - && next != EXT_MAX_BLOCKS) { + next = EXT_MAX_BLOCKS; + if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) + next = ext4_ext_next_leaf_block(inode, path); + if (next != EXT_MAX_BLOCKS) { ext_debug("next leaf block - %d\n", next); BUG_ON(npath != NULL); npath = ext4_ext_find_extent(inode, next, NULL); -- cgit v1.1 From 823ba01fc07751200c43e45733925a98b73eac3a Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 11 Jul 2011 18:26:01 -0400 Subject: ext4: fix a race which could leak memory in ext4_groupinfo_create_slab() In ext4_groupinfo_create_slab, we create ext4_groupinfo_caches within ext4_grpinfo_slab_create_mutex, but set it outside the lock, and there does exist some case that we may create it twice and causes a memory leak. So set it before we call mutex_unlock. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 04a3d92..2b9a71b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2404,14 +2404,14 @@ static int ext4_groupinfo_create_slab(size_t size) slab_size, 0, SLAB_RECLAIM_ACCOUNT, NULL); + ext4_groupinfo_caches[cache_index] = cachep; + mutex_unlock(&ext4_grpinfo_slab_create_mutex); if (!cachep) { printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n"); return -ENOMEM; } - ext4_groupinfo_caches[cache_index] = cachep; - return 0; } -- cgit v1.1 From caaf7a29d31da21bb8d8200d5e42d1c93d3c6e00 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 11 Jul 2011 18:42:42 -0400 Subject: ext4: Fix a double free of sbi->s_group_info in ext4_mb_init_backend If we meet with an error in ext4_mb_add_groupinfo, we kfree sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)], but fail to reset it to NULL. So the caller ext4_mb_init_backend will try to kfree it again and causes a double free. So fix it by resetting it to NULL. Some typo in comments of mballoc.c are also changed. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 2b9a71b..b97a2d2 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -75,8 +75,8 @@ * * The inode preallocation space is used looking at the _logical_ start * block. If only the logical file block falls within the range of prealloc - * space we will consume the particular prealloc space. This make sure that - * that the we have contiguous physical blocks representing the file blocks + * space we will consume the particular prealloc space. This makes sure that + * we have contiguous physical blocks representing the file blocks * * The important thing to be noted in case of inode prealloc space is that * we don't modify the values associated to inode prealloc space except @@ -84,7 +84,7 @@ * * If we are not able to find blocks in the inode prealloc space and if we * have the group allocation flag set then we look at the locality group - * prealloc space. These are per CPU prealloc list repreasented as + * prealloc space. These are per CPU prealloc list represented as * * ext4_sb_info.s_locality_groups[smp_processor_id()] * @@ -152,7 +152,7 @@ * best extent in the found extents. Searching for the blocks starts with * the group specified as the goal value in allocation context via * ac_g_ex. Each group is first checked based on the criteria whether it - * can used for allocation. ext4_mb_good_group explains how the groups are + * can be used for allocation. ext4_mb_good_group explains how the groups are * checked. * * Both the prealloc space are getting populated as above. So for the first @@ -2279,8 +2279,10 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, exit_group_info: /* If a meta_group_info table has been allocated, release it now */ - if (group % EXT4_DESC_PER_BLOCK(sb) == 0) + if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); + sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL; + } exit_meta_group_info: return -ENOMEM; } /* ext4_mb_add_groupinfo */ -- cgit v1.1 From afb86178cb9b6a7329cf8709aa210fb0a245b606 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 11 Jul 2011 18:47:04 -0400 Subject: ext4: remove unnecessary comments in ext4_orphan_add() The comment from Al Viro about possible race in the ext4_orphan_add() is not justified. There is no race possible as we always have either i_mutex locked, or the inode can not be referenced from outside hence the J_ASSERS should not be hit from the reason described in comment. This commit replaces it with notion that we are holding i_mutex so it should not be possible for i_nlink to be changed while waiting for s_orphan_lock. Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- fs/ext4/namei.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index b754b77..8dde5ab 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1989,18 +1989,11 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) if (!list_empty(&EXT4_I(inode)->i_orphan)) goto out_unlock; - /* Orphan handling is only valid for files with data blocks - * being truncated, or files being unlinked. */ - - /* @@@ FIXME: Observation from aviro: - * I think I can trigger J_ASSERT in ext4_orphan_add(). We block - * here (on s_orphan_lock), so race with ext4_link() which might bump - * ->i_nlink. For, say it, character device. Not a regular file, - * not a directory, not a symlink and ->i_nlink > 0. - * - * tytso, 4/25/2009: I'm not sure how that could happen; - * shouldn't the fs core protect us from these sort of - * unlink()/link() races? + /* + * Orphan handling is only valid for files with data blocks + * being truncated, or files being unlinked. Note that we either + * hold i_mutex, or the inode can not be referenced from outside, + * so i_nlink should not be bumped due to race */ J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); -- cgit v1.1 From 4de1ba155bbe9b629b9fb03919c5d905b747e62f Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Mon, 6 Jun 2011 13:49:00 -0700 Subject: dma: mv_xor: use resource_size() Signed-off-by: H Hartley Sweeten Cc: Dan Williams (supporter:ASYNCHRONOUS TRAN...) Cc: Vinod Koul (supporter:DMA GENERIC OFFLO...) Signed-off-by: Vinod Koul --- drivers/dma/mv_xor.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 954e334..9a353c2 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -1305,7 +1305,7 @@ static int mv_xor_shared_probe(struct platform_device *pdev) return -ENODEV; msp->xor_base = devm_ioremap(&pdev->dev, res->start, - res->end - res->start + 1); + resource_size(res)); if (!msp->xor_base) return -EBUSY; @@ -1314,7 +1314,7 @@ static int mv_xor_shared_probe(struct platform_device *pdev) return -ENODEV; msp->xor_high_base = devm_ioremap(&pdev->dev, res->start, - res->end - res->start + 1); + resource_size(res)); if (!msp->xor_high_base) return -EBUSY; -- cgit v1.1 From 70f18915846f092e0e1c988f1726a532fa3ab3a1 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 22 Jun 2011 17:05:33 +0200 Subject: pch_dma: Fix channel locking Fix for the following INFO message ================================= [ INFO: inconsistent lock state ] 2.6.39+ #89 --------------------------------- inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. rs232/822 [HC1[1]:SC0[0]:HE0:SE1] takes: (&(&pd_chan->lock)->rlock){?.....}, at: [] pdc_desc_get+0x16/0xab {HARDIRQ-ON-W} state was registered at: [] mark_irqflags+0xbd/0x11a [] __lock_acquire+0x501/0x6bb [] lock_acquire+0x63/0x7b [] _raw_spin_lock_bh+0x43/0x51 [] pd_alloc_chan_resources+0x92/0x11e [] dma_chan_get+0x9b/0x107 [] __dma_request_channel+0x61/0xdc [] pch_request_dma+0x61/0x19e [] pch_uart_startup+0x16a/0x1a2 [] uart_startup+0x87/0x147 [] uart_open+0x117/0x13e [] tty_open+0x23c/0x34c [] chrdev_open+0x140/0x15f [] __dentry_open.clone.14+0x14a/0x22b [] nameidata_to_filp+0x36/0x40 [] do_last+0x513/0x635 [] path_openat+0x9c/0x2aa [] do_filp_open+0x27/0x69 [] do_sys_open+0xfd/0x184 [] sys_open+0x24/0x2a [] sysenter_do_call+0x12/0x32 irq event stamp: 2522 hardirqs last enabled at (2521): [] _raw_spin_unlock_irqrestore+0x36/0x52 hardirqs last disabled at (2522): [] common_interrupt+0x27/0x34 softirqs last enabled at (2354): [] __do_softirq+0x10a/0x11a softirqs last disabled at (2299): [] do_softirq+0x57/0xa4 other info that might help us debug this: 2 locks held by rs232/822: #0: (&tty->atomic_write_lock){+.+.+.}, at: [] tty_write_lock+0x14/0x3c #1: (&port_lock_key){-.....}, at: [] pch_uart_interrupt+0x17/0x1e9 stack backtrace: Pid: 822, comm: rs232 Not tainted 2.6.39+ #89 Call Trace: [] ? printk+0x19/0x1b [] print_usage_bug+0x184/0x18f [] ? print_irq_inversion_bug+0x10e/0x10e [] mark_lock_irq+0xa5/0x1f6 [] mark_lock+0x208/0x2d7 [] mark_irqflags+0x55/0x11a [] __lock_acquire+0x501/0x6bb [] ? dump_trace+0x92/0xb6 [] lock_acquire+0x63/0x7b [] ? pdc_desc_get+0x16/0xab [] _raw_spin_lock+0x3e/0x4c [] ? pdc_desc_get+0x16/0xab [] pdc_desc_get+0x16/0xab [] ? __lock_acquire+0x653/0x6bb [] pd_prep_slave_sg+0x7c/0x1cb [] ? nommu_map_sg+0x6e/0x81 [] dma_handle_tx+0x2cf/0x344 [] ? pch_uart_interrupt+0x17/0x1e9 [] pch_uart_interrupt+0x160/0x1e9 [] handle_irq_event_percpu+0x25/0x127 [] handle_irq_event+0x2c/0x43 [] ? handle_fasteoi_irq+0x84/0x84 [] handle_edge_irq+0xac/0xce [] ? do_IRQ+0x38/0x9d [] ? common_interrupt+0x2e/0x34 [] ? __lock_acquire+0x1f6/0x6bb [] ? _raw_spin_unlock_irqrestore+0x38/0x52 [] ? uart_start+0x2d/0x32 [] ? uart_flush_chars+0x8/0xa [] ? n_tty_write+0x12c/0x1c6 [] ? try_to_wake_up+0x251/0x251 [] ? tty_write+0x169/0x1dc [] ? n_tty_ioctl+0xb7/0xb7 [] ? vfs_write+0x91/0x10d [] ? tty_write_lock+0x3c/0x3c [] ? sys_write+0x3e/0x63 [] ? sysenter_do_call+0x12/0x32 Signed-off-by: Alexander Stein Tested-by: Tomoya MORINAGA Signed-off-by: Vinod Koul --- drivers/dma/pch_dma.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index 65c32f8..d9d95a4 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -521,11 +521,11 @@ static int pd_alloc_chan_resources(struct dma_chan *chan) list_add_tail(&desc->desc_node, &tmp_list); } - spin_lock_bh(&pd_chan->lock); + spin_lock_irq(&pd_chan->lock); list_splice(&tmp_list, &pd_chan->free_list); pd_chan->descs_allocated = i; pd_chan->completed_cookie = chan->cookie = 1; - spin_unlock_bh(&pd_chan->lock); + spin_unlock_irq(&pd_chan->lock); pdc_enable_irq(chan, 1); @@ -543,10 +543,10 @@ static void pd_free_chan_resources(struct dma_chan *chan) BUG_ON(!list_empty(&pd_chan->active_list)); BUG_ON(!list_empty(&pd_chan->queue)); - spin_lock_bh(&pd_chan->lock); + spin_lock_irq(&pd_chan->lock); list_splice_init(&pd_chan->free_list, &tmp_list); pd_chan->descs_allocated = 0; - spin_unlock_bh(&pd_chan->lock); + spin_unlock_irq(&pd_chan->lock); list_for_each_entry_safe(desc, _d, &tmp_list, desc_node) pci_pool_free(pd->pool, desc, desc->txd.phys); @@ -562,10 +562,10 @@ static enum dma_status pd_tx_status(struct dma_chan *chan, dma_cookie_t cookie, dma_cookie_t last_completed; int ret; - spin_lock_bh(&pd_chan->lock); + spin_lock_irq(&pd_chan->lock); last_completed = pd_chan->completed_cookie; last_used = chan->cookie; - spin_unlock_bh(&pd_chan->lock); + spin_unlock_irq(&pd_chan->lock); ret = dma_async_is_complete(cookie, last_completed, last_used); @@ -680,7 +680,7 @@ static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, if (cmd != DMA_TERMINATE_ALL) return -ENXIO; - spin_lock_bh(&pd_chan->lock); + spin_lock_irq(&pd_chan->lock); pdc_set_mode(&pd_chan->chan, DMA_CTL0_DISABLE); @@ -690,7 +690,7 @@ static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, list_for_each_entry_safe(desc, _d, &list, desc_node) pdc_chain_complete(pd_chan, desc); - spin_unlock_bh(&pd_chan->lock); + spin_unlock_irq(&pd_chan->lock); return 0; } -- cgit v1.1 From a8f3067bce60b96215f3169d2c71e21f784ef507 Mon Sep 17 00:00:00 2001 From: Per Forlin Date: Sun, 26 Jun 2011 23:29:52 +0200 Subject: dmaengine/ste_dma40: add a separate queue for pending requests tx_submit will add descriptors to the pending queue. Issue pending will then move the pending descriptors to the transfer queue. Signed-off-by: Per Forlin Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 8f222d4..91d5ed7 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -199,6 +199,7 @@ struct d40_chan { struct dma_chan chan; struct tasklet_struct tasklet; struct list_head client; + struct list_head pending_queue; struct list_head active; struct list_head queue; struct stedma40_chan_cfg dma_cfg; @@ -644,7 +645,20 @@ static struct d40_desc *d40_first_active_get(struct d40_chan *d40c) static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc) { - list_add_tail(&desc->node, &d40c->queue); + list_add_tail(&desc->node, &d40c->pending_queue); +} + +static struct d40_desc *d40_first_pending(struct d40_chan *d40c) +{ + struct d40_desc *d; + + if (list_empty(&d40c->pending_queue)) + return NULL; + + d = list_first_entry(&d40c->pending_queue, + struct d40_desc, + node); + return d; } static struct d40_desc *d40_first_queued(struct d40_chan *d40c) @@ -801,6 +815,11 @@ static void d40_term_all(struct d40_chan *d40c) d40_desc_free(d40c, d40d); } + /* Release pending descriptors */ + while ((d40d = d40_first_pending(d40c))) { + d40_desc_remove(d40d); + d40_desc_free(d40c, d40d); + } d40c->pending_tx = 0; d40c->busy = false; @@ -2151,7 +2170,9 @@ static void d40_issue_pending(struct dma_chan *chan) spin_lock_irqsave(&d40c->lock, flags); - /* Busy means that pending jobs are already being processed */ + list_splice_tail_init(&d40c->pending_queue, &d40c->queue); + + /* Busy means that queued jobs are already being processed */ if (!d40c->busy) (void) d40_queue_start(d40c); @@ -2340,6 +2361,7 @@ static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma, INIT_LIST_HEAD(&d40c->active); INIT_LIST_HEAD(&d40c->queue); + INIT_LIST_HEAD(&d40c->pending_queue); INIT_LIST_HEAD(&d40c->client); tasklet_init(&d40c->tasklet, dma_tasklet, -- cgit v1.1 From 78fdaec3416fcbf2c38927cdf8b2de9f402693f1 Mon Sep 17 00:00:00 2001 From: Per Forlin Date: Sun, 26 Jun 2011 23:29:53 +0200 Subject: dmaengine: remove ste_dma40 from issue_pending TODO ste_dma40 now implements issue_pending according to documentation. Submit adds descriptos to a pending queue with are flushed down to the DMAC at issue_pending. Signed-off-by: Per Forlin Signed-off-by: Vinod Koul --- drivers/dma/TODO | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/TODO b/drivers/dma/TODO index a4af858..734ed02 100644 --- a/drivers/dma/TODO +++ b/drivers/dma/TODO @@ -9,6 +9,5 @@ TODO for slave dma - mxs-dma.c - dw_dmac - intel_mid_dma - - ste_dma40 4. Check other subsystems for dma drivers and merge/move to dmaengine 5. Remove dma_slave_config's dma direction. -- cgit v1.1 From ae752bf4cb78520e42f96f904e441c50f2114c7b Mon Sep 17 00:00:00 2001 From: om prakash Date: Mon, 27 Jun 2011 11:33:31 +0200 Subject: dmaengine/ste_dma40: fix missing kernel-doc Missing documentation creates kernel-doc warnings, so add the documenation. Signed-off-by: Om Prakash Reviewed-by: Rabin Vincent Reviewed-by: Jonas Aberg Reviewed-by: Srinidhi Kasagar Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 91d5ed7..e1af76c 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -185,6 +185,8 @@ struct d40_base; * @log_def: Default logical channel settings. * @lcla: Space for one dst src pair for logical channel transfers. * @lcpa: Pointer to dst and src lcpa settings. + * @runtime_addr: runtime configured address. + * @runtime_direction: runtime configured direction. * * This struct can either "be" a logical or a physical channel. */ -- cgit v1.1 From 79ca7ec3d1046a79c64f95f0cac0f5fd29829f53 Mon Sep 17 00:00:00 2001 From: Robert Marklund Date: Mon, 27 Jun 2011 11:33:24 +0200 Subject: dmaengine/ste_dma40: make the cyclic alloc NOWAIT This function may be initiated from IRQ context, so the allocation must allocate NOWAIT memory. Signed-off-by: Robert Marklund Reviewed-by: Rabin Vincent Reviewed-by: Philippe Langlais Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index e1af76c..35b078d 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -2112,7 +2112,7 @@ dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr, struct scatterlist *sg; int i; - sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_KERNEL); + sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_NOWAIT); for (i = 0; i < periods; i++) { sg_dma_address(&sg[i]) = dma_addr; sg_dma_len(&sg[i]) = period_len; -- cgit v1.1 From f4b89764c470230bbf9d18c0a3411887c48bb5a2 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 27 Jun 2011 11:33:46 +0200 Subject: dmaengine/ste_dma40: use AMBA PrimeCell helper macros The DMA40 is not a PrimeCell from ARM, but it still use the same ID registers. So let's utilize the existing macros in the PrimeCell header to identify manufacturer and revision of the IP block instead of reinventing the wheel. Cc: Robert Marklund Cc: Per Forlin Cc: Rabin Vincent Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 72 +++++++++++++++++----------------------------- drivers/dma/ste_dma40_ll.h | 3 -- 2 files changed, 26 insertions(+), 49 deletions(-) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 35b078d..2797f64 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -44,9 +45,6 @@ #define D40_ALLOC_PHY (1 << 30) #define D40_ALLOC_LOG_FREE 0 -/* Hardware designer of the block */ -#define D40_HW_DESIGNER 0x8 - /** * enum 40_command - The different commands and/or statuses. * @@ -2525,25 +2523,6 @@ static int __init d40_phy_res_init(struct d40_base *base) static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) { - static const struct d40_reg_val dma_id_regs[] = { - /* Peripheral Id */ - { .reg = D40_DREG_PERIPHID0, .val = 0x0040}, - { .reg = D40_DREG_PERIPHID1, .val = 0x0000}, - /* - * D40_DREG_PERIPHID2 Depends on HW revision: - * DB8500ed has 0x0008, - * ? has 0x0018, - * DB8500v1 has 0x0028 - * DB8500v2 has 0x0038 - */ - { .reg = D40_DREG_PERIPHID3, .val = 0x0000}, - - /* PCell Id */ - { .reg = D40_DREG_CELLID0, .val = 0x000d}, - { .reg = D40_DREG_CELLID1, .val = 0x00f0}, - { .reg = D40_DREG_CELLID2, .val = 0x0005}, - { .reg = D40_DREG_CELLID3, .val = 0x00b1} - }; struct stedma40_platform_data *plat_data; struct clk *clk = NULL; void __iomem *virtbase = NULL; @@ -2552,8 +2531,9 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) int num_log_chans = 0; int num_phy_chans; int i; - u32 val; - u32 rev; + u32 pid; + u32 cid; + u8 rev; clk = clk_get(&pdev->dev, NULL); @@ -2577,32 +2557,32 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) if (!virtbase) goto failure; - /* HW version check */ - for (i = 0; i < ARRAY_SIZE(dma_id_regs); i++) { - if (dma_id_regs[i].val != - readl(virtbase + dma_id_regs[i].reg)) { - d40_err(&pdev->dev, - "Unknown hardware! Expected 0x%x at 0x%x but got 0x%x\n", - dma_id_regs[i].val, - dma_id_regs[i].reg, - readl(virtbase + dma_id_regs[i].reg)); - goto failure; - } - } - - /* Get silicon revision and designer */ - val = readl(virtbase + D40_DREG_PERIPHID2); + /* This is just a regular AMBA PrimeCell ID actually */ + for (pid = 0, i = 0; i < 4; i++) + pid |= (readl(virtbase + resource_size(res) - 0x20 + 4 * i) + & 255) << (i * 8); + for (cid = 0, i = 0; i < 4; i++) + cid |= (readl(virtbase + resource_size(res) - 0x10 + 4 * i) + & 255) << (i * 8); - if ((val & D40_DREG_PERIPHID2_DESIGNER_MASK) != - D40_HW_DESIGNER) { + if (cid != AMBA_CID) { + d40_err(&pdev->dev, "Unknown hardware! No PrimeCell ID\n"); + goto failure; + } + if (AMBA_MANF_BITS(pid) != AMBA_VENDOR_ST) { d40_err(&pdev->dev, "Unknown designer! Got %x wanted %x\n", - val & D40_DREG_PERIPHID2_DESIGNER_MASK, - D40_HW_DESIGNER); + AMBA_MANF_BITS(pid), + AMBA_VENDOR_ST); goto failure; } - - rev = (val & D40_DREG_PERIPHID2_REV_MASK) >> - D40_DREG_PERIPHID2_REV_POS; + /* + * HW revision: + * DB8500ed has revision 0 + * ? has revision 1 + * DB8500v1 has revision 2 + * DB8500v2 has revision 3 + */ + rev = AMBA_REV_BITS(pid); /* The number of physical channels on this HW */ num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4; diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h index 195ee65..b44c4551 100644 --- a/drivers/dma/ste_dma40_ll.h +++ b/drivers/dma/ste_dma40_ll.h @@ -184,9 +184,6 @@ #define D40_DREG_PERIPHID0 0xFE0 #define D40_DREG_PERIPHID1 0xFE4 #define D40_DREG_PERIPHID2 0xFE8 -#define D40_DREG_PERIPHID2_REV_POS 4 -#define D40_DREG_PERIPHID2_REV_MASK (0xf << D40_DREG_PERIPHID2_REV_POS) -#define D40_DREG_PERIPHID2_DESIGNER_MASK 0xf #define D40_DREG_PERIPHID3 0xFEC #define D40_DREG_CELLID0 0xFF0 #define D40_DREG_CELLID1 0xFF4 -- cgit v1.1 From 98ca528916c47ad17f78a07b45e49de3940fba77 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Mon, 27 Jun 2011 11:33:38 +0200 Subject: dmaengine/ste_dma40: allow memory buswidth/burst to be configured Currently the runtime config implementation forces the memory side parameters to be the same as the peripheral side. Allow these to be different, and check for misconfiguration. Signed-off-by: Rabin Vincent Reviewed-by: Ulf HANSSON Tested-by: Stefan Nilsson Reviewed-by: Per Forlin Reviewed-by: Srinidhi Kasagar Cc: Robert Marklund Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 168 +++++++++++++++++++++++++++++------------------- 1 file changed, 102 insertions(+), 66 deletions(-) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 2797f64..75ba586 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -2179,17 +2179,78 @@ static void d40_issue_pending(struct dma_chan *chan) spin_unlock_irqrestore(&d40c->lock, flags); } +static int +dma40_config_to_halfchannel(struct d40_chan *d40c, + struct stedma40_half_channel_info *info, + enum dma_slave_buswidth width, + u32 maxburst) +{ + enum stedma40_periph_data_width addr_width; + int psize; + + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + addr_width = STEDMA40_BYTE_WIDTH; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + addr_width = STEDMA40_HALFWORD_WIDTH; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + addr_width = STEDMA40_WORD_WIDTH; + break; + case DMA_SLAVE_BUSWIDTH_8_BYTES: + addr_width = STEDMA40_DOUBLEWORD_WIDTH; + break; + default: + dev_err(d40c->base->dev, + "illegal peripheral address width " + "requested (%d)\n", + width); + return -EINVAL; + } + + if (chan_is_logical(d40c)) { + if (maxburst >= 16) + psize = STEDMA40_PSIZE_LOG_16; + else if (maxburst >= 8) + psize = STEDMA40_PSIZE_LOG_8; + else if (maxburst >= 4) + psize = STEDMA40_PSIZE_LOG_4; + else + psize = STEDMA40_PSIZE_LOG_1; + } else { + if (maxburst >= 16) + psize = STEDMA40_PSIZE_PHY_16; + else if (maxburst >= 8) + psize = STEDMA40_PSIZE_PHY_8; + else if (maxburst >= 4) + psize = STEDMA40_PSIZE_PHY_4; + else + psize = STEDMA40_PSIZE_PHY_1; + } + + info->data_width = addr_width; + info->psize = psize; + info->flow_ctrl = STEDMA40_NO_FLOW_CTRL; + + return 0; +} + /* Runtime reconfiguration extension */ -static void d40_set_runtime_config(struct dma_chan *chan, - struct dma_slave_config *config) +static int d40_set_runtime_config(struct dma_chan *chan, + struct dma_slave_config *config) { struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); struct stedma40_chan_cfg *cfg = &d40c->dma_cfg; - enum dma_slave_buswidth config_addr_width; + enum dma_slave_buswidth src_addr_width, dst_addr_width; dma_addr_t config_addr; - u32 config_maxburst; - enum stedma40_periph_data_width addr_width; - int psize; + u32 src_maxburst, dst_maxburst; + int ret; + + src_addr_width = config->src_addr_width; + src_maxburst = config->src_maxburst; + dst_addr_width = config->dst_addr_width; + dst_maxburst = config->dst_maxburst; if (config->direction == DMA_FROM_DEVICE) { dma_addr_t dev_addr_rx = @@ -2208,8 +2269,11 @@ static void d40_set_runtime_config(struct dma_chan *chan, cfg->dir); cfg->dir = STEDMA40_PERIPH_TO_MEM; - config_addr_width = config->src_addr_width; - config_maxburst = config->src_maxburst; + /* Configure the memory side */ + if (dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + dst_addr_width = src_addr_width; + if (dst_maxburst == 0) + dst_maxburst = src_maxburst; } else if (config->direction == DMA_TO_DEVICE) { dma_addr_t dev_addr_tx = @@ -2228,68 +2292,39 @@ static void d40_set_runtime_config(struct dma_chan *chan, cfg->dir); cfg->dir = STEDMA40_MEM_TO_PERIPH; - config_addr_width = config->dst_addr_width; - config_maxburst = config->dst_maxburst; - + /* Configure the memory side */ + if (src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) + src_addr_width = dst_addr_width; + if (src_maxburst == 0) + src_maxburst = dst_maxburst; } else { dev_err(d40c->base->dev, "unrecognized channel direction %d\n", config->direction); - return; + return -EINVAL; } - switch (config_addr_width) { - case DMA_SLAVE_BUSWIDTH_1_BYTE: - addr_width = STEDMA40_BYTE_WIDTH; - break; - case DMA_SLAVE_BUSWIDTH_2_BYTES: - addr_width = STEDMA40_HALFWORD_WIDTH; - break; - case DMA_SLAVE_BUSWIDTH_4_BYTES: - addr_width = STEDMA40_WORD_WIDTH; - break; - case DMA_SLAVE_BUSWIDTH_8_BYTES: - addr_width = STEDMA40_DOUBLEWORD_WIDTH; - break; - default: + if (src_maxburst * src_addr_width != dst_maxburst * dst_addr_width) { dev_err(d40c->base->dev, - "illegal peripheral address width " - "requested (%d)\n", - config->src_addr_width); - return; + "src/dst width/maxburst mismatch: %d*%d != %d*%d\n", + src_maxburst, + src_addr_width, + dst_maxburst, + dst_addr_width); + return -EINVAL; } - if (chan_is_logical(d40c)) { - if (config_maxburst >= 16) - psize = STEDMA40_PSIZE_LOG_16; - else if (config_maxburst >= 8) - psize = STEDMA40_PSIZE_LOG_8; - else if (config_maxburst >= 4) - psize = STEDMA40_PSIZE_LOG_4; - else - psize = STEDMA40_PSIZE_LOG_1; - } else { - if (config_maxburst >= 16) - psize = STEDMA40_PSIZE_PHY_16; - else if (config_maxburst >= 8) - psize = STEDMA40_PSIZE_PHY_8; - else if (config_maxburst >= 4) - psize = STEDMA40_PSIZE_PHY_4; - else if (config_maxburst >= 2) - psize = STEDMA40_PSIZE_PHY_2; - else - psize = STEDMA40_PSIZE_PHY_1; - } + ret = dma40_config_to_halfchannel(d40c, &cfg->src_info, + src_addr_width, + src_maxburst); + if (ret) + return ret; - /* Set up all the endpoint configs */ - cfg->src_info.data_width = addr_width; - cfg->src_info.psize = psize; - cfg->src_info.big_endian = false; - cfg->src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL; - cfg->dst_info.data_width = addr_width; - cfg->dst_info.psize = psize; - cfg->dst_info.big_endian = false; - cfg->dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL; + ret = dma40_config_to_halfchannel(d40c, &cfg->dst_info, + dst_addr_width, + dst_maxburst); + if (ret) + return ret; /* Fill in register values */ if (chan_is_logical(d40c)) @@ -2302,12 +2337,14 @@ static void d40_set_runtime_config(struct dma_chan *chan, d40c->runtime_addr = config_addr; d40c->runtime_direction = config->direction; dev_dbg(d40c->base->dev, - "configured channel %s for %s, data width %d, " - "maxburst %d bytes, LE, no flow control\n", + "configured channel %s for %s, data width %d/%d, " + "maxburst %d/%d elements, LE, no flow control\n", dma_chan_name(chan), (config->direction == DMA_FROM_DEVICE) ? "RX" : "TX", - config_addr_width, - config_maxburst); + src_addr_width, dst_addr_width, + src_maxburst, dst_maxburst); + + return 0; } static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, @@ -2328,9 +2365,8 @@ static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, case DMA_RESUME: return d40_resume(d40c); case DMA_SLAVE_CONFIG: - d40_set_runtime_config(chan, + return d40_set_runtime_config(chan, (struct dma_slave_config *) arg); - return 0; default: break; } -- cgit v1.1 From b89243dd0e6a1c96a4a346cb3e1ba2c637cdfe98 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 1 Jul 2011 16:47:28 +0200 Subject: dmaengine/coh901318: fix slave submission semantics While testing Per Forlins MMC speed improvements I noticed a semantic bug in the COH901318 driver: it will write to channel registers in the prep_slave_sg() function, instead of deferring it to later, breaking the assumption from the drivers to be able to queue up new jobs while another job is running. Fix this by storing up the initial register writes in the job descriptors and write them to hardware when we process the descriptor instead. Now the stress tests work. Acked-by: Per Forlin Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- drivers/dma/coh901318.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index af8c0b5..a7fca16 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -40,6 +40,8 @@ struct coh901318_desc { struct coh901318_lli *lli; enum dma_data_direction dir; unsigned long flags; + u32 head_config; + u32 head_ctrl; }; struct coh901318_base { @@ -660,6 +662,9 @@ static struct coh901318_desc *coh901318_queue_start(struct coh901318_chan *cohc) coh901318_desc_submit(cohc, cohd); + /* Program the transaction head */ + coh901318_set_conf(cohc, cohd->head_config); + coh901318_set_ctrl(cohc, cohd->head_ctrl); coh901318_prep_linked_list(cohc, cohd->lli); /* start dma job on this channel */ @@ -1090,8 +1095,6 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, } else goto err_direction; - coh901318_set_conf(cohc, config); - /* The dma only supports transmitting packages up to * MAX_DMA_PACKET_SIZE. Calculate to total number of * dma elemts required to send the entire sg list @@ -1128,16 +1131,18 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (ret) goto err_lli_fill; - /* - * Set the default ctrl for the channel to the one from the lli, - * things may have changed due to odd buffer alignment etc. - */ - coh901318_set_ctrl(cohc, lli->control); COH_DBG(coh901318_list_print(cohc, lli)); /* Pick a descriptor to handle this transfer */ cohd = coh901318_desc_get(cohc); + cohd->head_config = config; + /* + * Set the default head ctrl for the channel to the one from the + * lli, things may have changed due to odd buffer alignment + * etc. + */ + cohd->head_ctrl = lli->control; cohd->dir = direction; cohd->flags = flags; cohd->desc.tx_submit = coh901318_tx_submit; -- cgit v1.1 From 95bfea1675c02d83cf1923272e62f91db11cbb8f Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 30 Jun 2011 16:06:33 +0800 Subject: dmaengine: mxs-dma: skip request_irq for NO_IRQ In general, the mxs-dma users get separate irq for each channel, but gpmi is special one which has only one irq shared by all gpmi channels. It causes mxs_dma channel allocation function fail for all other gpmi channels except the first one calling into the function. The patch gets request_irq call skipped for NO_IRQ case, and leaves this gpmi specific quirk to gpmi driver to sort out. It will fix above problem if gpmi driver sets chan_irq as gpmi irq for only one channel and NO_IRQ for all the rest channels. Signed-off-by: Shawn Guo Cc: Vinod Koul Signed-off-by: Vinod Koul --- drivers/dma/mxs-dma.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c index 88aad4f..2870d91 100644 --- a/drivers/dma/mxs-dma.c +++ b/drivers/dma/mxs-dma.c @@ -327,10 +327,12 @@ static int mxs_dma_alloc_chan_resources(struct dma_chan *chan) memset(mxs_chan->ccw, 0, PAGE_SIZE); - ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler, - 0, "mxs-dma", mxs_dma); - if (ret) - goto err_irq; + if (mxs_chan->chan_irq != NO_IRQ) { + ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler, + 0, "mxs-dma", mxs_dma); + if (ret) + goto err_irq; + } ret = clk_enable(mxs_dma->clk); if (ret) -- cgit v1.1 From ca7cc5110a313a609da40ae948978a585352564b Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:13 +0800 Subject: ACPI, APEI, ERST, Prevent erst_dbg from loading if ERST is disabled erst_dbg module can not work when ERST is disabled. So disable module loading to provide clearer information to user. Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/erst-dbg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c index a4cfb64..cb04aa4 100644 --- a/drivers/acpi/apei/erst-dbg.c +++ b/drivers/acpi/apei/erst-dbg.c @@ -213,6 +213,10 @@ static struct miscdevice erst_dbg_dev = { static __init int erst_dbg_init(void) { + if (erst_disable) { + pr_info(ERST_DBG_PFX "ERST support is disabled.\n"); + return -ENODEV; + } return misc_register(&erst_dbg_dev); } -- cgit v1.1 From d37afc50e618271839f001ea653949eefc728167 Mon Sep 17 00:00:00 2001 From: Chen Gong Date: Wed, 13 Jul 2011 13:14:14 +0800 Subject: ACPI, APEI, ERST, Fix erst-dbg long record reading issue When we debug ERST table with erst-dbg, if the error record in ERST table is too long(>4K), it can't be read out. So this patch increases the buffer size to 16K to ensure such error records can be read from ERST table. Signed-off-by: Chen Gong Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/erst-dbg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c index cb04aa4..903549d 100644 --- a/drivers/acpi/apei/erst-dbg.c +++ b/drivers/acpi/apei/erst-dbg.c @@ -33,7 +33,7 @@ #define ERST_DBG_PFX "ERST DBG: " -#define ERST_DBG_RECORD_LEN_MAX 4096 +#define ERST_DBG_RECORD_LEN_MAX 0x4000 static void *erst_dbg_buf; static unsigned int erst_dbg_buf_len; -- cgit v1.1 From 5588340d46a484da53bbce8136184d9c7fbc259c Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:15 +0800 Subject: ACPI, APEI, GHES, Do not ratelimit fatal error printk before panic printk is used by GHES to report hardware errors. Normally, the printk will be ratelimited to avoid too many hardware error reports in kernel log. Because there may be thousands or even millions of corrected hardware errors during system running. That is different for fatal hardware error, because system will go panic as soon as possible, there will be no more than several error records. And these error records are valuable for system fault diagnosis, so they should not be ratelimited. Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/ghes.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index f703b28..f339c0f 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -360,11 +360,8 @@ static void ghes_do_proc(struct ghes *ghes) } } -static void ghes_print_estatus(const char *pfx, struct ghes *ghes) +static void __ghes_print_estatus(const char *pfx, struct ghes *ghes) { - /* Not more than 2 messages every 5 seconds */ - static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2); - if (pfx == NULL) { if (ghes_severity(ghes->estatus->error_severity) <= GHES_SEV_CORRECTED) @@ -372,12 +369,18 @@ static void ghes_print_estatus(const char *pfx, struct ghes *ghes) else pfx = KERN_ERR HW_ERR; } - if (__ratelimit(&ratelimit)) { - printk( - "%s""Hardware error from APEI Generic Hardware Error Source: %d\n", - pfx, ghes->generic->header.source_id); - apei_estatus_print(pfx, ghes->estatus); - } + printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", + pfx, ghes->generic->header.source_id); + apei_estatus_print(pfx, ghes->estatus); +} + +static void ghes_print_estatus(const char *pfx, struct ghes *ghes) +{ + /* Not more than 2 messages every 5 seconds */ + static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2); + + if (__ratelimit(&ratelimit)) + __ghes_print_estatus(pfx, ghes); } static int ghes_proc(struct ghes *ghes) @@ -476,7 +479,7 @@ static int ghes_notify_nmi(struct notifier_block *this, if (sev_global >= GHES_SEV_PANIC) { oops_begin(); - ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global); + __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global); /* reboot to log the error! */ if (panic_timeout == 0) panic_timeout = ghes_panic_timeout; -- cgit v1.1 From eecf2f7124834dd1cad21807526a8ea031ba8217 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:16 +0800 Subject: ACPI, APEI, Add apei_exec_run_optional Some actions in APEI ERST and EINJ tables are optional, for example, ACPI_EINJ_BEGIN_OPERATION action is used to do some preparation for error injection, and firmware may choose to do nothing here. While some other actions are mandatory, for example, firmware must provide ACPI_EINJ_GET_ERROR_TYPE implementation. Original implementation treats all actions as optional (that is, can have no instructions), that may cause issue if firmware does not provide some mandatory actions. To fix this, this patch adds apei_exec_run_optional, which should be used for optional actions. The original apei_exec_run should be used for mandatory actions. Cc: Thomas Renninger Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/apei-base.c | 9 +++++---- drivers/acpi/apei/apei-internal.h | 13 ++++++++++++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 4a904a4..0714194 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -157,9 +157,10 @@ EXPORT_SYMBOL_GPL(apei_exec_noop); * Interpret the specified action. Go through whole action table, * execute all instructions belong to the action. */ -int apei_exec_run(struct apei_exec_context *ctx, u8 action) +int __apei_exec_run(struct apei_exec_context *ctx, u8 action, + bool optional) { - int rc; + int rc = -ENOENT; u32 i, ip; struct acpi_whea_header *entry; apei_exec_ins_func_t run; @@ -198,9 +199,9 @@ rewind: goto rewind; } - return 0; + return !optional && rc < 0 ? rc : 0; } -EXPORT_SYMBOL_GPL(apei_exec_run); +EXPORT_SYMBOL_GPL(__apei_exec_run); typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx, struct acpi_whea_header *entry, diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h index ef0581f..f286cf7 100644 --- a/drivers/acpi/apei/apei-internal.h +++ b/drivers/acpi/apei/apei-internal.h @@ -50,7 +50,18 @@ static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx) return ctx->value; } -int apei_exec_run(struct apei_exec_context *ctx, u8 action); +int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional); + +static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action) +{ + return __apei_exec_run(ctx, action, 0); +} + +/* It is optional whether the firmware provides the action */ +static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action) +{ + return __apei_exec_run(ctx, action, 1); +} /* Common instruction implementation */ -- cgit v1.1 From 392913de7cc7446531922f29c0a4382d8d09626c Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:17 +0800 Subject: ACPI, APEI, Use apei_exec_run_optional in APEI EINJ and ERST This patch changes APEI EINJ and ERST to use apei_exec_run for mandatory actions, and apei_exec_run_optional for optional actions. Cc: Thomas Renninger Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/einj.c | 4 ++-- drivers/acpi/apei/erst.c | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index f74b2ea..ab821f1 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -285,7 +285,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) einj_exec_ctx_init(&ctx); - rc = apei_exec_run(&ctx, ACPI_EINJ_BEGIN_OPERATION); + rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION); if (rc) return rc; apei_exec_ctx_set_input(&ctx, type); @@ -323,7 +323,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2) rc = __einj_error_trigger(trigger_paddr); if (rc) return rc; - rc = apei_exec_run(&ctx, ACPI_EINJ_END_OPERATION); + rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION); return rc; } diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index e6cef8e..c8ba9da 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -642,7 +642,7 @@ static int __erst_write_to_storage(u64 offset) int rc; erst_exec_ctx_init(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_WRITE); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE); if (rc) return rc; apei_exec_ctx_set_input(&ctx, offset); @@ -666,7 +666,7 @@ static int __erst_write_to_storage(u64 offset) if (rc) return rc; val = apei_exec_ctx_get_output(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_END); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); if (rc) return rc; @@ -681,7 +681,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset) int rc; erst_exec_ctx_init(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_READ); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ); if (rc) return rc; apei_exec_ctx_set_input(&ctx, offset); @@ -709,7 +709,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset) if (rc) return rc; val = apei_exec_ctx_get_output(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_END); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); if (rc) return rc; @@ -724,7 +724,7 @@ static int __erst_clear_from_storage(u64 record_id) int rc; erst_exec_ctx_init(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_CLEAR); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR); if (rc) return rc; apei_exec_ctx_set_input(&ctx, record_id); @@ -748,7 +748,7 @@ static int __erst_clear_from_storage(u64 record_id) if (rc) return rc; val = apei_exec_ctx_get_output(&ctx); - rc = apei_exec_run(&ctx, ACPI_ERST_END); + rc = apei_exec_run_optional(&ctx, ACPI_ERST_END); if (rc) return rc; -- cgit v1.1 From 86cd47334b00b6aa9b5d0ebf389a6fe76f21c641 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:18 +0800 Subject: ACPI, APEI, GHES, Prevent GHES to be built as module GHES (Generic Hardware Error Source) is used to process hardware error notification in firmware first mode. But because firmware first mode can be turned on but can not be turned off, it is unreasonable to unload the GHES module with firmware first mode turned on. To avoid confusion, this patch makes GHES can be enabled/disabled in configuration time, but not built as module and unloaded at run time. Signed-off-by: Huang Ying Reviewed-by: Andi Kleen Reviewed-by: Matthew Garrett Signed-off-by: Len Brown --- drivers/acpi/apei/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index f739a70..3f45dde 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -10,7 +10,7 @@ config ACPI_APEI error injection. config ACPI_APEI_GHES - tristate "APEI Generic Hardware Error Source" + bool "APEI Generic Hardware Error Source" depends on ACPI_APEI && X86 select ACPI_HED help -- cgit v1.1 From b6a9501658530d8b8374e37f1edb549039a8a260 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:19 +0800 Subject: ACPI, APEI, GHES, Support disable GHES at boot time Some machine may have broken firmware so that GHES and firmware first mode should be disabled. This patch adds support to that. Signed-off-by: Huang Ying Reviewed-by: Andi Kleen Reviewed-by: Matthew Garrett Signed-off-by: Len Brown --- drivers/acpi/apei/ghes.c | 8 ++++++++ drivers/acpi/apei/hest.c | 17 +++++++++-------- include/acpi/apei.h | 1 + 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index f339c0f..b142b94 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -77,6 +77,9 @@ struct ghes { }; }; +int ghes_disable; +module_param_named(disable, ghes_disable, bool, 0); + static int ghes_panic_timeout __read_mostly = 30; /* @@ -665,6 +668,11 @@ static int __init ghes_init(void) return -EINVAL; } + if (ghes_disable) { + pr_info(GHES_PFX "GHES is not enabled!\n"); + return -EINVAL; + } + rc = ghes_ioremap_init(); if (rc) goto err; diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 181bc2f..05fee06 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -231,16 +231,17 @@ void __init acpi_hest_init(void) goto err; } - rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); - if (rc) - goto err; - - rc = hest_ghes_dev_register(ghes_count); - if (!rc) { - pr_info(HEST_PFX "Table parsing has been initialized.\n"); - return; + if (!ghes_disable) { + rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); + if (rc) + goto err; + rc = hest_ghes_dev_register(ghes_count); + if (rc) + goto err; } + pr_info(HEST_PFX "Table parsing has been initialized.\n"); + return; err: hest_disable = 1; } diff --git a/include/acpi/apei.h b/include/acpi/apei.h index e67b523..d40bc55 100644 --- a/include/acpi/apei.h +++ b/include/acpi/apei.h @@ -18,6 +18,7 @@ extern int hest_disable; extern int erst_disable; +extern int ghes_disable; #ifdef CONFIG_ACPI_APEI void __init acpi_hest_init(void); -- cgit v1.1 From eccddd32ced0df8f9130024157bf8d37df860d76 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:20 +0800 Subject: ACPI, APEI, Add APEI bit support in generic _OSC call In APEI firmware first mode, hardware error is reported by hardware to firmware firstly, then firmware reports the error to Linux in a GHES error record via POLL/SCI/IRQ/NMI etc. This may result in some issues if OS has no full APEI support. So some firmware implementation will work in a back-compatible mode by default. Where firmware will only notify OS in old-fashion, without GHES record. For example, for a fatal hardware error, only NMI is signaled, no GHES record. To gain full APEI power on these machines, APEI bit in generic _OSC call can be specified to tell firmware that Linux has full APEI support. This patch adds the APEI bit support in generic _OSC call. Signed-off-by: Huang Ying Reviewed-by: Andi Kleen Reviewed-by: Matthew Garrett Signed-off-by: Len Brown --- drivers/acpi/bus.c | 16 ++++++++++++++-- include/linux/acpi.h | 2 ++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index d1e06c1..43ce3b0 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -519,6 +520,7 @@ out_kfree: } EXPORT_SYMBOL(acpi_run_osc); +bool osc_sb_apei_support_acked; static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48"; static void acpi_bus_osc_support(void) { @@ -541,11 +543,21 @@ static void acpi_bus_osc_support(void) #if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE) capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT; #endif + +#ifdef CONFIG_ACPI_APEI_GHES + if (!ghes_disable) + capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_APEI_SUPPORT; +#endif if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) return; - if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) + if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) { + u32 *capbuf_ret = context.ret.pointer; + if (context.ret.length > OSC_SUPPORT_TYPE) + osc_sb_apei_support_acked = + capbuf_ret[OSC_SUPPORT_TYPE] & OSC_SB_APEI_SUPPORT; kfree(context.ret.pointer); - /* do we need to check the returned cap? Sounds no */ + } + /* do we need to check other returned cap? Sounds no */ } /* -------------------------------------------------------------------------- diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 1deb2a7..e19527d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -280,6 +280,8 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); #define OSC_SB_CPUHP_OST_SUPPORT 8 #define OSC_SB_APEI_SUPPORT 16 +extern bool osc_sb_apei_support_acked; + /* PCI defined _OSC bits */ /* _OSC DW1 Definition (OS Support Fields) */ #define OSC_EXT_PCI_CONFIG_SUPPORT 1 -- cgit v1.1 From 9fb0bfe1408d5506b7b83d13d1eed573fd71d67d Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:21 +0800 Subject: ACPI, APEI, Add WHEA _OSC support APEI firmware first mode must be turned on explicitly on some machines, otherwise there may be no GHES hardware error record for hardware error notification. APEI bit in generic _OSC call can be used to do that, but on some machine, a special WHEA _OSC call must be used. This patch adds the support to that WHEA _OSC call. Signed-off-by: Huang Ying Reviewed-by: Andi Kleen Reviewed-by: Matthew Garrett Signed-off-by: Len Brown --- drivers/acpi/apei/apei-base.c | 26 ++++++++++++++++++++++++++ drivers/acpi/apei/apei-internal.h | 2 ++ drivers/acpi/apei/ghes.c | 10 ++++++++++ 3 files changed, 38 insertions(+) diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 0714194..8041248 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -604,3 +604,29 @@ struct dentry *apei_get_debugfs_dir(void) return dapei; } EXPORT_SYMBOL_GPL(apei_get_debugfs_dir); + +int apei_osc_setup(void) +{ + static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c"; + acpi_handle handle; + u32 capbuf[3]; + struct acpi_osc_context context = { + .uuid_str = whea_uuid_str, + .rev = 1, + .cap.length = sizeof(capbuf), + .cap.pointer = capbuf, + }; + + capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; + capbuf[OSC_SUPPORT_TYPE] = 0; + capbuf[OSC_CONTROL_TYPE] = 0; + + if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)) + || ACPI_FAILURE(acpi_run_osc(handle, &context))) + return -EIO; + else { + kfree(context.ret.pointer); + return 0; + } +} +EXPORT_SYMBOL_GPL(apei_osc_setup); diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h index f286cf7..f57050e 100644 --- a/drivers/acpi/apei/apei-internal.h +++ b/drivers/acpi/apei/apei-internal.h @@ -124,4 +124,6 @@ void apei_estatus_print(const char *pfx, const struct acpi_hest_generic_status *estatus); int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus); int apei_estatus_check(const struct acpi_hest_generic_status *estatus); + +int apei_osc_setup(void); #endif diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index b142b94..b1390a6 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -681,6 +681,16 @@ static int __init ghes_init(void) if (rc) goto err_ioremap_exit; + rc = apei_osc_setup(); + if (rc == 0 && osc_sb_apei_support_acked) + pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); + else if (rc == 0 && !osc_sb_apei_support_acked) + pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); + else if (rc && osc_sb_apei_support_acked) + pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); + else + pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); + return 0; err_ioremap_exit: ghes_ioremap_exit(); -- cgit v1.1 From c8cefe307d79c57b32ca1d4c9ebff528f8dd914c Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 14 Jun 2011 10:42:53 +0800 Subject: ACPICA: Load operator: re-instate most restrictions on incoming table signature Now, only allow "SSDT" "OEM", and a null signature. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/tbinstal.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c index 48db094..62365f6 100644 --- a/drivers/acpi/acpica/tbinstal.c +++ b/drivers/acpi/acpica/tbinstal.c @@ -126,12 +126,29 @@ acpi_tb_add_table(struct acpi_table_desc *table_desc, u32 *table_index) } /* - * Originally, we checked the table signature for "SSDT" or "PSDT" here. - * Next, we added support for OEMx tables, signature "OEM". - * Valid tables were encountered with a null signature, so we've just - * given up on validating the signature, since it seems to be a waste - * of code. The original code was removed (05/2008). + * Validate the incoming table signature. + * + * 1) Originally, we checked the table signature for "SSDT" or "PSDT". + * 2) We added support for OEMx tables, signature "OEM". + * 3) Valid tables were encountered with a null signature, so we just + * gave up on validating the signature, (05/2008). + * 4) We encountered non-AML tables such as the MADT, which caused + * interpreter errors and kernel faults. So now, we once again allow + * only "SSDT", "OEMx", and now, also a null signature. (05/2011). */ + if ((table_desc->pointer->signature[0] != 0x00) && + (!ACPI_COMPARE_NAME(table_desc->pointer->signature, ACPI_SIG_SSDT)) + && (ACPI_STRNCMP(table_desc->pointer->signature, "OEM", 3))) { + ACPI_ERROR((AE_INFO, + "Table has invalid signature [%4.4s] (0x%8.8X), must be SSDT or OEMx", + acpi_ut_valid_acpi_name(*(u32 *)table_desc-> + pointer-> + signature) ? table_desc-> + pointer->signature : "????", + *(u32 *)table_desc->pointer->signature)); + + return_ACPI_STATUS(AE_BAD_SIGNATURE); + } (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); -- cgit v1.1 From f631f9cafbc47bbfe5eb2aa831cd323175298448 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 14 Jun 2011 10:44:51 +0800 Subject: ACPICA: Add missing _TDL to list of predefined names Affects both iASL and core ACPICA. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/acpredef.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h index 94e73c9..c445cca 100644 --- a/drivers/acpi/acpica/acpredef.h +++ b/drivers/acpi/acpica/acpredef.h @@ -468,6 +468,7 @@ static const union acpi_predefined_info predefined_names[] = {{"_SWS", 0, ACPI_RTYPE_INTEGER}}, {{"_TC1", 0, ACPI_RTYPE_INTEGER}}, {{"_TC2", 0, ACPI_RTYPE_INTEGER}}, + {{"_TDL", 0, ACPI_RTYPE_INTEGER}}, {{"_TIP", 1, ACPI_RTYPE_INTEGER}}, {{"_TIV", 1, ACPI_RTYPE_INTEGER}}, {{"_TMP", 0, ACPI_RTYPE_INTEGER}}, -- cgit v1.1 From 80f40ce0f10e87d9a27f1e29325c6f39245fbcb1 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 14 Jun 2011 10:45:57 +0800 Subject: ACPICA: Update to version 20110527 Version 20110527. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 2ed0a84..06853f7 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -47,7 +47,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20110413 +#define ACPI_CA_VERSION 0x20110527 #include "actypes.h" #include "actbl.h" -- cgit v1.1 From d57b23ad0ca7a46931e4d98eb6b4b73b112f0969 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Mon, 4 Jul 2011 08:24:03 +0000 Subject: ACPICA: Add option to disable method return value validation and repair Runtime option can be used to disable return value repair if this is causing a problem on a particular machine. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/acglobal.h | 6 ++++++ drivers/acpi/acpica/nspredef.c | 18 ++++++++++++------ include/acpi/acpixf.h | 1 + 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index 73863d86..76dc02f 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -126,6 +126,12 @@ u8 ACPI_INIT_GLOBAL(acpi_gbl_copy_dsdt_locally, FALSE); */ u8 ACPI_INIT_GLOBAL(acpi_gbl_truncate_io_addresses, FALSE); +/* + * Disable runtime checking and repair of values returned by control methods. + * Use only if the repair is causing a problem on a particular machine. + */ +u8 ACPI_INIT_GLOBAL(acpi_gbl_disable_auto_repair, FALSE); + /* acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */ struct acpi_table_fadt acpi_gbl_FADT; diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c index 9fb03fa..607a9a0 100644 --- a/drivers/acpi/acpica/nspredef.c +++ b/drivers/acpi/acpica/nspredef.c @@ -193,14 +193,20 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node, } /* - * 1) We have a return value, but if one wasn't expected, just exit, this is - * not a problem. For example, if the "Implicit Return" feature is - * enabled, methods will always return a value. + * Return value validation and possible repair. * - * 2) If the return value can be of any type, then we cannot perform any - * validation, exit. + * 1) Don't perform return value validation/repair if this feature + * has been disabled via a global option. + * + * 2) We have a return value, but if one wasn't expected, just exit, + * this is not a problem. For example, if the "Implicit Return" + * feature is enabled, methods will always return a value. + * + * 3) If the return value can be of any type, then we cannot perform + * any validation, just exit. */ - if ((!predefined->info.expected_btypes) || + if (acpi_gbl_disable_auto_repair || + (!predefined->info.expected_btypes) || (predefined->info.expected_btypes == ACPI_RTYPE_ALL)) { goto cleanup; } diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 06853f7..d42d7ce 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -69,6 +69,7 @@ extern u32 acpi_gbl_trace_flags; extern u32 acpi_gbl_enable_aml_debug_object; extern u8 acpi_gbl_copy_dsdt_locally; extern u8 acpi_gbl_truncate_io_addresses; +extern u8 acpi_gbl_disable_auto_repair; extern u32 acpi_current_gpe_count; extern struct acpi_table_fadt acpi_gbl_FADT; -- cgit v1.1 From 8f9c91273e36e5762c617c23e4fd48d5172e0dac Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 4 Jul 2011 08:36:16 +0000 Subject: ACPICA: Do not repair _TSS return package if _PSS is present We can only sort the _TSS return package if there is no _PSS in the same scope. This is because if _PSS is present, the ACPI specification dictates that the _TSS Power Dissipation field is to be ignored, and therefore some BIOSs leave garbage values in the _TSS Power field(s). In this case, it is best to just return the _TSS package as-is. Reported-by: Fenghua Yu Signed-off-by: Fenghua Yu Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/aclocal.h | 1 + drivers/acpi/acpica/nspredef.c | 1 + drivers/acpi/acpica/nsrepair2.c | 15 +++++++++++++++ 3 files changed, 17 insertions(+) diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index c7f743c..5552125 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -357,6 +357,7 @@ struct acpi_predefined_data { char *pathname; const union acpi_predefined_info *predefined; union acpi_operand_object *parent_package; + struct acpi_namespace_node *node; u32 flags; u8 node_flags; }; diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c index 607a9a0..c845c80 100644 --- a/drivers/acpi/acpica/nspredef.c +++ b/drivers/acpi/acpica/nspredef.c @@ -218,6 +218,7 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node, goto cleanup; } data->predefined = predefined; + data->node = node; data->node_flags = node->flags; data->pathname = pathname; diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c index 973883b..024c4f2 100644 --- a/drivers/acpi/acpica/nsrepair2.c +++ b/drivers/acpi/acpica/nsrepair2.c @@ -503,6 +503,21 @@ acpi_ns_repair_TSS(struct acpi_predefined_data *data, { union acpi_operand_object *return_object = *return_object_ptr; acpi_status status; + struct acpi_namespace_node *node; + + /* + * We can only sort the _TSS return package if there is no _PSS in the + * same scope. This is because if _PSS is present, the ACPI specification + * dictates that the _TSS Power Dissipation field is to be ignored, and + * therefore some BIOSs leave garbage values in the _TSS Power field(s). + * In this case, it is best to just return the _TSS package as-is. + * (May, 2011) + */ + status = + acpi_ns_get_node(data->node, "^_PSS", ACPI_NS_NO_UPSEARCH, &node); + if (ACPI_SUCCESS(status)) { + return (AE_OK); + } status = acpi_ns_check_sorted_list(data, return_object, 5, 1, ACPI_SORT_DESCENDING, -- cgit v1.1 From 3eb208f0a36cf7a86953afa7a4eb6776294e6768 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Mon, 4 Jul 2011 08:38:51 +0000 Subject: ACPICA: Update to version 20110623 Version 20110623. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index d42d7ce..f554a93 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -47,7 +47,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20110527 +#define ACPI_CA_VERSION 0x20110623 #include "actypes.h" #include "actbl.h" -- cgit v1.1 From 1dd5c715e5b7524da8c1030f5cf1ea903e45c457 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Fri, 1 Jul 2011 16:03:15 +0800 Subject: ACPI / SBS: Add getting state operation in the acpi_sbs_battery_get_property() https://bugzilla.kernel.org/show_bug.cgi?id=24492 Signed-off-by: Lan Tianyu Signed-off-by: Len Brown --- drivers/acpi/sbs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c index 51ae379..01dad1b 100644 --- a/drivers/acpi/sbs.c +++ b/drivers/acpi/sbs.c @@ -130,6 +130,9 @@ struct acpi_sbs { #define to_acpi_sbs(x) container_of(x, struct acpi_sbs, charger) +static int acpi_sbs_remove(struct acpi_device *device, int type); +static int acpi_battery_get_state(struct acpi_battery *battery); + static inline int battery_scale(int log) { int scale = 1; @@ -195,6 +198,8 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy, if ((!battery->present) && psp != POWER_SUPPLY_PROP_PRESENT) return -ENODEV; + + acpi_battery_get_state(battery); switch (psp) { case POWER_SUPPLY_PROP_STATUS: if (battery->rate_now < 0) @@ -903,8 +908,6 @@ static void acpi_sbs_callback(void *context) } } -static int acpi_sbs_remove(struct acpi_device *device, int type); - static int acpi_sbs_add(struct acpi_device *device) { struct acpi_sbs *sbs; -- cgit v1.1 From e4108292cc5b5ca07abc83af31a78338362810ca Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Fri, 1 Jul 2011 16:03:39 +0800 Subject: ACPI / SBS: Correct the value of power_now and power_avg in the sysfs https://bugzilla.kernel.org/show_bug.cgi?id=24492 Signed-off-by: Lan Tianyu Signed-off-by: Len Brown --- drivers/acpi/sbs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c index 01dad1b..c517ec8 100644 --- a/drivers/acpi/sbs.c +++ b/drivers/acpi/sbs.c @@ -230,11 +230,17 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy, case POWER_SUPPLY_PROP_POWER_NOW: val->intval = abs(battery->rate_now) * acpi_battery_ipscale(battery) * 1000; + val->intval *= (acpi_battery_mode(battery)) ? + (battery->voltage_now * + acpi_battery_vscale(battery) / 1000) : 1; break; case POWER_SUPPLY_PROP_CURRENT_AVG: case POWER_SUPPLY_PROP_POWER_AVG: val->intval = abs(battery->rate_avg) * acpi_battery_ipscale(battery) * 1000; + val->intval *= (acpi_battery_mode(battery)) ? + (battery->voltage_now * + acpi_battery_vscale(battery) / 1000) : 1; break; case POWER_SUPPLY_PROP_CAPACITY: val->intval = battery->state_of_charge; -- cgit v1.1 From ae6f61870490c10a0b0436e5afffa00c9dacffef Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Thu, 30 Jun 2011 11:32:40 +0800 Subject: ACPI / Battery: Add the power unit macro This patch is cosmetic only, and makes no functional change. Signed-off-by: Lan Tianyu Signed-off-by: Len Brown --- drivers/acpi/battery.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index fcc13ac..611434f 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -55,6 +55,9 @@ #define ACPI_BATTERY_NOTIFY_INFO 0x81 #define ACPI_BATTERY_NOTIFY_THRESHOLD 0x82 +/* Battery power unit: 0 means mW, 1 means mA */ +#define ACPI_BATTERY_POWER_UNIT_MA 1 + #define _COMPONENT ACPI_BATTERY_COMPONENT ACPI_MODULE_NAME("battery"); @@ -301,7 +304,8 @@ static enum power_supply_property energy_battery_props[] = { #ifdef CONFIG_ACPI_PROCFS_POWER inline char *acpi_battery_units(struct acpi_battery *battery) { - return (battery->power_unit)?"mA":"mW"; + return (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) ? + "mA" : "mW"; } #endif @@ -544,7 +548,7 @@ static int sysfs_add_battery(struct acpi_battery *battery) { int result; - if (battery->power_unit) { + if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) { battery->bat.properties = charge_battery_props; battery->bat.num_properties = ARRAY_SIZE(charge_battery_props); @@ -575,7 +579,8 @@ static void sysfs_remove_battery(struct acpi_battery *battery) static void acpi_battery_quirks(struct acpi_battery *battery) { - if (dmi_name_in_vendors("Acer") && battery->power_unit) { + if (dmi_name_in_vendors("Acer") && + battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) { set_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags); } } -- cgit v1.1 From 55003b2105a4578736f3e868fbaa889bb1ff3ce0 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Thu, 30 Jun 2011 11:33:12 +0800 Subject: ACPI / Battery: Change 16-bit signed negative battery current into correct value This patch is for some machines which report the battery current as a 16-bit signed negative when it is charging. This is caused by DSDT bug. The commit bc76f90b8a5cf4aceedf210d08d5e8292f820cec has resolved the problem for Acer laptops. But some other machines also have such problem. https://bugzilla.kernel.org/show_bug.cgi?id=33722 Since it is improper that the current is above 32A on laptops whether on AC or on battery, this patch is to check the current and take its absolute value as current and producing a message when it is negative in s16. Remove Acer quirk, as this workaround handles Acer too. Signed-off-by: Lan Tianyu Signed-off-by: Len Brown --- drivers/acpi/battery.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 611434f..ff1ff70 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -94,11 +94,6 @@ MODULE_DEVICE_TABLE(acpi, battery_device_ids); enum { ACPI_BATTERY_ALARM_PRESENT, ACPI_BATTERY_XINFO_PRESENT, - /* For buggy DSDTs that report negative 16-bit values for either - * charging or discharging current and/or report 0 as 65536 - * due to bad math. - */ - ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, }; @@ -465,9 +460,17 @@ static int acpi_battery_get_state(struct acpi_battery *battery) battery->update_time = jiffies; kfree(buffer.pointer); - if (test_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags) && - battery->rate_now != -1) + /* For buggy DSDTs that report negative 16-bit values for either + * charging or discharging current and/or report 0 as 65536 + * due to bad math. + */ + if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA && + battery->rate_now != ACPI_BATTERY_VALUE_UNKNOWN && + (s16)(battery->rate_now) < 0) { battery->rate_now = abs((s16)battery->rate_now); + printk_once(KERN_WARNING FW_BUG "battery: (dis)charge rate" + " invalid.\n"); + } if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags) && battery->capacity_now >= 0 && battery->capacity_now <= 100) @@ -577,14 +580,6 @@ static void sysfs_remove_battery(struct acpi_battery *battery) battery->bat.dev = NULL; } -static void acpi_battery_quirks(struct acpi_battery *battery) -{ - if (dmi_name_in_vendors("Acer") && - battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) { - set_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags); - } -} - /* * According to the ACPI spec, some kinds of primary batteries can * report percentage battery remaining capacity directly to OS. @@ -628,7 +623,6 @@ static int acpi_battery_update(struct acpi_battery *battery) result = acpi_battery_get_info(battery); if (result) return result; - acpi_battery_quirks(battery); acpi_battery_init_alarm(battery); } if (!battery->bat.dev) -- cgit v1.1 From 7b78622d0f9df9f274a319eea1494240119d3734 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Thu, 30 Jun 2011 11:33:27 +0800 Subject: ACPI / Battery: Rename acpi_battery_quirks2 with acpi_battery_quirks This patch is cosmetic only, and makes no functional change. Since the acpi_battery_quirks has been deleted, rename acpi_battery_quirks2 with acpi_battery_quirks to clean the code. Signed-off-by: Lan Tianyu Signed-off-by: Len Brown --- drivers/acpi/battery.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index ff1ff70..ba7926fc 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -592,7 +592,7 @@ static void sysfs_remove_battery(struct acpi_battery *battery) * * Handle this correctly so that they won't break userspace. */ -static void acpi_battery_quirks2(struct acpi_battery *battery) +static void acpi_battery_quirks(struct acpi_battery *battery) { if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags)) return ; @@ -628,7 +628,7 @@ static int acpi_battery_update(struct acpi_battery *battery) if (!battery->bat.dev) sysfs_add_battery(battery); result = acpi_battery_get_state(battery); - acpi_battery_quirks2(battery); + acpi_battery_quirks(battery); return result; } -- cgit v1.1 From d5a5911b3278bad6515a9958f7318f74d534ef64 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Thu, 30 Jun 2011 11:33:40 +0800 Subject: ACPI / Battery: Add the hibernation process in the battery_notify() The Commit 25be58215 has added a PM notifier to refresh the sys in order to deal with the unit change of the Battery Present Rate. But it just consided the suspend situation. The problem also will happen during the hibernation according the bug 28192. https://bugzilla.kernel.org/show_bug.cgi?id=28192 This patch adds the hibernation process and fix the bug. Signed-off-by: Lan Tianyu Signed-off-by: Len Brown --- drivers/acpi/battery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index ba7926fc..2fe7cfd 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -947,6 +947,7 @@ static int battery_notify(struct notifier_block *nb, struct acpi_battery *battery = container_of(nb, struct acpi_battery, pm_nb); switch (mode) { + case PM_POST_HIBERNATION: case PM_POST_SUSPEND: sysfs_remove_battery(battery); sysfs_add_battery(battery); -- cgit v1.1 From 6e17fb6aa1a67afa1827ae317c3594040f055730 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Thu, 30 Jun 2011 11:33:58 +0800 Subject: ACPI / Battery: Add the check before refresh sysfs in the battery_notify() In the commit 25be5821, add the refresh sysfs when system resumes from suspending. But it didn't check that the battery exists. This will cause battery sysfs files added when the battery doesn't exist. This patch add the check before refreshing. https://bugzilla.kernel.org/show_bug.cgi?id=35642 Signed-off-by: Lan Tianyu Signed-off-by: Len Brown --- drivers/acpi/battery.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 2fe7cfd..4ba339d 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -949,8 +949,10 @@ static int battery_notify(struct notifier_block *nb, switch (mode) { case PM_POST_HIBERNATION: case PM_POST_SUSPEND: - sysfs_remove_battery(battery); - sysfs_add_battery(battery); + if (battery->bat.dev) { + sysfs_remove_battery(battery); + sysfs_add_battery(battery); + } break; } -- cgit v1.1 From 9c921c22a7f33397a6774d7fa076db9b6a0fd669 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Thu, 30 Jun 2011 11:34:12 +0800 Subject: ACPI / Battery: Resolve the race condition in the sysfs_remove_battery() Use battery->lock in sysfs_remove_battery() to make checking, removing, and clearing bat.dev atomic. This is necessary because sysfs_remove_battery() may be invoked concurrently from different paths. https://bugzilla.kernel.org/show_bug.cgi?id=35642 Signed-off-by: Lan Tianyu Signed-off-by: Len Brown --- drivers/acpi/battery.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 4ba339d..40bf01d 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -573,11 +573,16 @@ static int sysfs_add_battery(struct acpi_battery *battery) static void sysfs_remove_battery(struct acpi_battery *battery) { - if (!battery->bat.dev) + mutex_lock(&battery->lock); + if (!battery->bat.dev) { + mutex_unlock(&battery->lock); return; + } + device_remove_file(battery->bat.dev, &alarm_attr); power_supply_unregister(&battery->bat); battery->bat.dev = NULL; + mutex_unlock(&battery->lock); } /* -- cgit v1.1 From e545b55a1e980cbb6a158886286106bbf39722b1 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Sun, 19 Jun 2011 18:51:37 -0500 Subject: ACPI: fix 80 char overflow Trivial fix for 80 char line overflow in drivers/acpi/pci_root.c Signed-off-by: Jon Mason Signed-off-by: Len Brown --- drivers/acpi/pci_root.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index d06078d..2672c79 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -485,7 +485,8 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) root->secondary.end = 0xFF; printk(KERN_WARNING FW_BUG PREFIX "no secondary bus range in _CRS\n"); - status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN, NULL, &bus); + status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN, + NULL, &bus); if (ACPI_SUCCESS(status)) root->secondary.start = bus; else if (status == AE_NOT_FOUND) -- cgit v1.1 From d8926bb3badd36670fecf2de4a062c78bc37430b Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 13 Jul 2011 10:38:47 -0700 Subject: btrfs: don't BUG_ON btrfs_alloc_path() errors This patch fixes many callers of btrfs_alloc_path() which BUG_ON allocation failure. All the sites that are fixed in this patch were checked by me to be fairly trivial to fix because of at least one of two criteria: - Callers of the function catch errors from it already so bubbling the error up will be handled. - Callers of the function might BUG_ON any nonzero return code in which case there is no behavior changed (but we still got to remove a BUG_ON) The following functions were updated: btrfs_lookup_extent, alloc_reserved_tree_block, btrfs_remove_block_group, btrfs_lookup_csums_range, btrfs_csum_file_blocks, btrfs_mark_extent_written, btrfs_inode_by_name, btrfs_new_inode, btrfs_symlink, insert_reserved_file_extent, and run_delalloc_nocow Signed-off-by: Mark Fasheh --- fs/btrfs/extent-tree.c | 12 +++++++++--- fs/btrfs/file-item.c | 7 +++++-- fs/btrfs/file.c | 3 ++- fs/btrfs/inode.c | 18 +++++++++++++----- 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 71cd456..aa91773 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -667,7 +667,9 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) struct btrfs_path *path; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; + key.objectid = start; key.offset = len; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -5494,7 +5496,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, @@ -7162,7 +7165,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_unlock(&cluster->refill_lock); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + ret = -ENOMEM; + goto out; + } inode = lookup_free_space_inode(root, block_group, path); if (!IS_ERR(inode)) { diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 90d4ee5..f92ff0e 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -282,7 +282,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; if (search_commit) { path->skip_locking = 1; @@ -672,7 +673,9 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, btrfs_super_csum_size(&root->fs_info->super_copy); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; + sector_sum = sums->sums; again: next_offset = (u64)-1; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fa4ef18..23d1d81 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -855,7 +855,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, btrfs_drop_extent_cache(inode, start, end - 1, 0); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; again: recow = 0; split = start; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3601f0a..8be7d7a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1070,7 +1070,8 @@ static noinline int run_delalloc_nocow(struct inode *inode, u64 ino = btrfs_ino(inode); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; nolock = is_free_space_inode(root, inode); @@ -1644,7 +1645,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, int ret; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; path->leave_spinning = 1; @@ -3713,7 +3715,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, int ret = 0; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name, namelen, 0); @@ -4438,7 +4441,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, int owner; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return ERR_PTR(-ENOMEM); inode = new_inode(root->fs_info->sb); if (!inode) { @@ -7194,7 +7198,11 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + err = -ENOMEM; + drop_inode = 1; + goto out_unlock; + } key.objectid = btrfs_ino(inode); key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); -- cgit v1.1 From 1e5063d093b5f024ae35bf835ca07463de2c1a1f Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 12 Jul 2011 10:46:06 -0700 Subject: btrfs: Don't BUG_ON alloc_path errors in replay_one_buffer() The two ->process_func call sites in tree-log.c which were ignoring a return code have also been updated to gracefully exit as well. Signed-off-by: Mark Fasheh --- fs/btrfs/tree-log.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4ce8a9f..f3cacc0 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1617,7 +1617,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, return 0; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; nritems = btrfs_header_nritems(eb); for (i = 0; i < nritems; i++) { @@ -1723,7 +1724,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, return -ENOMEM; if (*level == 1) { - wc->process_func(root, next, wc, ptr_gen); + ret = wc->process_func(root, next, wc, ptr_gen); + if (ret) + return ret; path->slots[*level]++; if (wc->free) { @@ -1788,8 +1791,11 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, parent = path->nodes[*level + 1]; root_owner = btrfs_header_owner(parent); - wc->process_func(root, path->nodes[*level], wc, + ret = wc->process_func(root, path->nodes[*level], wc, btrfs_header_generation(path->nodes[*level])); + if (ret) + return ret; + if (wc->free) { struct extent_buffer *next; -- cgit v1.1 From 0eb0e19cde6f01397ef8c0e094e44beb75c62a1e Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 12 Jul 2011 16:44:10 -0700 Subject: btrfs: Don't BUG_ON alloc_path errors in btrfs_truncate_inode_items I moved the path allocation up a few lines to the top of the function so that we couldn't get into the state where we've dropped delayed items and the extent cache but fail due to -ENOMEM. Signed-off-by: Mark Fasheh --- fs/btrfs/inode.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8be7d7a..a0faf7d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3172,6 +3172,11 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->reada = -1; + if (root->ref_cows || root == root->fs_info->tree_root) btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); @@ -3184,10 +3189,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, if (min_type == 0 && root == BTRFS_I(inode)->root) btrfs_kill_delayed_inode_items(inode); - path = btrfs_alloc_path(); - BUG_ON(!path); - path->reada = -1; - key.objectid = ino; key.offset = (u64)-1; key.type = (u8)-1; -- cgit v1.1 From 1748f843a0190ef4332d03a64263f383af72682b Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 12 Jul 2011 11:25:31 -0700 Subject: btrfs: Don't BUG_ON alloc_path errors in btrfs_read_locked_inode btrfs_iget() also needed an update so that errors from btrfs_locked_inode() are caught and bubbled back up. Signed-off-by: Mark Fasheh --- fs/btrfs/inode.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a0faf7d..8882999 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2518,7 +2518,9 @@ static void btrfs_read_locked_inode(struct inode *inode) filled = true; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + goto make_bad; + path->leave_spinning = 1; memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); @@ -3973,6 +3975,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, struct btrfs_root *root, int *new) { struct inode *inode; + int bad_inode = 0; inode = btrfs_iget_locked(s, location->objectid, root); if (!inode) @@ -3982,10 +3985,19 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, BTRFS_I(inode)->root = root; memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); btrfs_read_locked_inode(inode); - inode_tree_add(inode); - unlock_new_inode(inode); - if (new) - *new = 1; + if (!is_bad_inode(inode)) { + inode_tree_add(inode); + unlock_new_inode(inode); + if (new) + *new = 1; + } else { + bad_inode = 1; + } + } + + if (bad_inode) { + iput(inode); + inode = ERR_PTR(-ESTALE); } return inode; -- cgit v1.1 From 17e9f796bd92cddec17d781c459376f82340fa44 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 12 Jul 2011 11:10:23 -0700 Subject: btrfs: Don't BUG_ON alloc_path errors in btrfs_balance() Dealing with this seems trivial - the only caller of btrfs_balance() is btrfs_ioctl() which passes the error code directly back to userspace. There also isn't much state to unwind (if I'm wrong about this point, we can always safely move the allocation to the top of btrfs_balance() anyway). Signed-off-by: Mark Fasheh --- fs/btrfs/volumes.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 19450bc..530a2fc 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2061,8 +2061,10 @@ int btrfs_balance(struct btrfs_root *dev_root) /* step two, relocate all the chunks */ path = btrfs_alloc_path(); - BUG_ON(!path); - + if (!path) { + ret = -ENOMEM; + goto error; + } key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; key.offset = (u64)-1; key.type = BTRFS_CHUNK_ITEM_KEY; -- cgit v1.1 From e4c0d0e22ce5cf84b3993b5af6c2dac08d52f06b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 15 Jul 2011 23:39:00 -0400 Subject: tools/power x86_energy_perf_policy: fix print of uninitialized string Looks like I was going to stick the brand string in the verbose ouput, but didn't get around to it. Signed-off-by: Len Brown --- tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 2618ef2..33c5c7e 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -137,7 +137,6 @@ void cmdline(int argc, char **argv) void validate_cpuid(void) { unsigned int eax, ebx, ecx, edx, max_level; - char brand[16]; unsigned int fms, family, model, stepping; eax = ebx = ecx = edx = 0; @@ -160,8 +159,8 @@ void validate_cpuid(void) model += ((fms >> 16) & 0xf) << 4; if (verbose > 1) - printf("CPUID %s %d levels family:model:stepping " - "0x%x:%x:%x (%d:%d:%d)\n", brand, max_level, + printf("CPUID %d levels family:model:stepping " + "0x%x:%x:%x (%d:%d:%d)\n", max_level, family, model, stepping, family, model, stepping); if (!(edx & (1 << 5))) { -- cgit v1.1 From d7f6169a0d32002657886fee561c641acddb9a75 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Fri, 15 Jul 2011 14:52:30 +0200 Subject: ACPI: fix CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS The following was observed by Steve Rostedt on 3.0.0-rc5 Backtrace: irq 16: nobody cared (try booting with the "irqpoll" option) Pid: 65, comm: irq/16-uhci_hcd Not tainted 3.0.0-rc5-test+ #94 Call Trace: [] __report_bad_irq+0x37/0xc1 [] note_interrupt+0x14e/0x1c9 [] ? irq_thread_fn+0x3c/0x3c [] irq_thread+0xf6/0x1b1 [] ? irq_finalize_oneshot+0xb3/0xb3 [] kthread+0x9f/0xa7 [] kernel_thread_helper+0x4/0x10 [] ? finish_task_switch+0x7b/0xc0 [] ? retint_restore_args+0x13/0x13 [] ? __init_kthread_worker+0x5a/0x5a [] ? gs_change+0x13/0x13 handlers: [] irq_default_primary_handler threaded [] usb_hcd_irq [] irq_default_primary_handler threaded [] usb_hcd_irq Disabling IRQ #16 The problem being that a device triggers boot interrupts (due to threaded interrupt handling and masking of the IO-APIC), which are forwarded to the PIRQ line of the device. These interrupts are not handled on the PIRQ line because the interrupt handler is not present there. This should have already been fixed by CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS. However some parts of the quirk got lost in the ACPI merge. This is a resent of the patch proposed in 2009. See http://lkml.org/lkml/2009/9/7/192 Signed-off-by: Stefan Assmann Tested-by: Steven Rostedt Signed-off-by: Len Brown --- drivers/acpi/pci_irq.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index f907cfb..7f9eba9 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -303,6 +303,61 @@ void acpi_pci_irq_del_prt(struct pci_bus *bus) /* -------------------------------------------------------------------------- PCI Interrupt Routing Support -------------------------------------------------------------------------- */ +#ifdef CONFIG_X86_IO_APIC +extern int noioapicquirk; +extern int noioapicreroute; + +static int bridge_has_boot_interrupt_variant(struct pci_bus *bus) +{ + struct pci_bus *bus_it; + + for (bus_it = bus ; bus_it ; bus_it = bus_it->parent) { + if (!bus_it->self) + return 0; + if (bus_it->self->irq_reroute_variant) + return bus_it->self->irq_reroute_variant; + } + return 0; +} + +/* + * Some chipsets (e.g. Intel 6700PXH) generate a legacy INTx when the IRQ + * entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel does + * during interrupt handling). When this INTx generation cannot be disabled, + * we reroute these interrupts to their legacy equivalent to get rid of + * spurious interrupts. + */ +static int acpi_reroute_boot_interrupt(struct pci_dev *dev, + struct acpi_prt_entry *entry) +{ + if (noioapicquirk || noioapicreroute) { + return 0; + } else { + switch (bridge_has_boot_interrupt_variant(dev->bus)) { + case 0: + /* no rerouting necessary */ + return 0; + case INTEL_IRQ_REROUTE_VARIANT: + /* + * Remap according to INTx routing table in 6700PXH + * specs, intel order number 302628-002, section + * 2.15.2. Other chipsets (80332, ...) have the same + * mapping and are handled here as well. + */ + dev_info(&dev->dev, "PCI IRQ %d -> rerouted to legacy " + "IRQ %d\n", entry->index, + (entry->index % 4) + 16); + entry->index = (entry->index % 4) + 16; + return 1; + default: + dev_warn(&dev->dev, "Cannot reroute IRQ %d to legacy " + "IRQ: unknown mapping\n", entry->index); + return -1; + } + } +} +#endif /* CONFIG_X86_IO_APIC */ + static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin) { struct acpi_prt_entry *entry; @@ -311,6 +366,9 @@ static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin) entry = acpi_pci_irq_find_prt_entry(dev, pin); if (entry) { +#ifdef CONFIG_X86_IO_APIC + acpi_reroute_boot_interrupt(dev, entry); +#endif /* CONFIG_X86_IO_APIC */ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %s[%c] _PRT entry\n", pci_name(dev), pin_name(pin))); return entry; -- cgit v1.1 From 9c8b04be443b33939f374a811c82abeebe0a61d1 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Sat, 25 Jun 2011 21:07:52 +0400 Subject: ACPI: constify ops structs Structs battery_file, acpi_dock_ops, file_operations, thermal_cooling_device_ops, thermal_zone_device_ops, kernel_param_ops are not changed in runtime. It is safe to make them const. register_hotplug_dock_device() was altered to take const "ops" argument to respect acpi_dock_ops' const notion. Signed-off-by: Vasiliy Kulikov Acked-by: Jeff Garzik Signed-off-by: Len Brown --- drivers/acpi/battery.c | 2 +- drivers/acpi/dock.c | 4 ++-- drivers/acpi/ec_sys.c | 2 +- drivers/acpi/fan.c | 2 +- drivers/acpi/processor_thermal.c | 2 +- drivers/acpi/sysfs.c | 4 ++-- drivers/acpi/thermal.c | 2 +- drivers/acpi/video.c | 2 +- drivers/ata/libata-acpi.c | 4 ++-- drivers/pci/hotplug/acpiphp_glue.c | 2 +- include/acpi/acpi_drivers.h | 2 +- include/acpi/processor.h | 2 +- 12 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index fcc13ac..746b475 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -863,7 +863,7 @@ DECLARE_FILE_FUNCTIONS(alarm); }, \ } -static struct battery_file { +static const struct battery_file { struct file_operations ops; mode_t mode; const char *name; diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index 1864ad3..19a6113 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -77,7 +77,7 @@ struct dock_dependent_device { struct list_head list; struct list_head hotplug_list; acpi_handle handle; - struct acpi_dock_ops *ops; + const struct acpi_dock_ops *ops; void *context; }; @@ -589,7 +589,7 @@ EXPORT_SYMBOL_GPL(unregister_dock_notifier); * the dock driver after _DCK is executed. */ int -register_hotplug_dock_device(acpi_handle handle, struct acpi_dock_ops *ops, +register_hotplug_dock_device(acpi_handle handle, const struct acpi_dock_ops *ops, void *context) { struct dock_dependent_device *dd; diff --git a/drivers/acpi/ec_sys.c b/drivers/acpi/ec_sys.c index 05b4420..22f918b 100644 --- a/drivers/acpi/ec_sys.c +++ b/drivers/acpi/ec_sys.c @@ -92,7 +92,7 @@ static ssize_t acpi_ec_write_io(struct file *f, const char __user *buf, return count; } -static struct file_operations acpi_ec_io_ops = { +static const struct file_operations acpi_ec_io_ops = { .owner = THIS_MODULE, .open = acpi_ec_open_io, .read = acpi_ec_read_io, diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c index 467479f..0f0356c 100644 --- a/drivers/acpi/fan.c +++ b/drivers/acpi/fan.c @@ -110,7 +110,7 @@ fan_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state) return result; } -static struct thermal_cooling_device_ops fan_cooling_ops = { +static const struct thermal_cooling_device_ops fan_cooling_ops = { .get_max_state = fan_get_max_state, .get_cur_state = fan_get_cur_state, .set_cur_state = fan_set_cur_state, diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index 79cb653..870550d 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -244,7 +244,7 @@ processor_set_cur_state(struct thermal_cooling_device *cdev, return result; } -struct thermal_cooling_device_ops processor_cooling_ops = { +const struct thermal_cooling_device_ops processor_cooling_ops = { .get_max_state = processor_get_max_state, .get_cur_state = processor_get_cur_state, .set_cur_state = processor_set_cur_state, diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 77255f2..c538d0e 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -149,12 +149,12 @@ static int param_get_debug_level(char *buffer, const struct kernel_param *kp) return result; } -static struct kernel_param_ops param_ops_debug_layer = { +static const struct kernel_param_ops param_ops_debug_layer = { .set = param_set_uint, .get = param_get_debug_layer, }; -static struct kernel_param_ops param_ops_debug_level = { +static const struct kernel_param_ops param_ops_debug_level = { .set = param_set_uint, .get = param_get_debug_level, }; diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 2607e17..48fbc64 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -812,7 +812,7 @@ acpi_thermal_unbind_cooling_device(struct thermal_zone_device *thermal, thermal_zone_unbind_cooling_device); } -static struct thermal_zone_device_ops acpi_thermal_zone_ops = { +static const struct thermal_zone_device_ops acpi_thermal_zone_ops = { .bind = acpi_thermal_bind_cooling_device, .unbind = acpi_thermal_unbind_cooling_device, .get_temp = thermal_get_temp, diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index db39e9e..c6f9ef8 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -308,7 +308,7 @@ video_set_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long st return acpi_video_device_lcd_set_level(video, level); } -static struct thermal_cooling_device_ops video_cooling_ops = { +static const struct thermal_cooling_device_ops video_cooling_ops = { .get_max_state = video_get_max_state, .get_cur_state = video_get_cur_state, .set_cur_state = video_set_cur_state, diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index a791b8ce..993d406 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -218,12 +218,12 @@ static void ata_acpi_dev_uevent(acpi_handle handle, u32 event, void *data) ata_acpi_uevent(dev->link->ap, dev, event); } -static struct acpi_dock_ops ata_acpi_dev_dock_ops = { +static const struct acpi_dock_ops ata_acpi_dev_dock_ops = { .handler = ata_acpi_dev_notify_dock, .uevent = ata_acpi_dev_uevent, }; -static struct acpi_dock_ops ata_acpi_ap_dock_ops = { +static const struct acpi_dock_ops ata_acpi_ap_dock_ops = { .handler = ata_acpi_ap_notify_dock, .uevent = ata_acpi_ap_uevent, }; diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index a70fa89..2202857 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -110,7 +110,7 @@ static int post_dock_fixups(struct notifier_block *nb, unsigned long val, } -static struct acpi_dock_ops acpiphp_dock_ops = { +static const struct acpi_dock_ops acpiphp_dock_ops = { .handler = handle_hotplug_event_func, }; diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index 3090471..e49c36d 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -128,7 +128,7 @@ extern int is_dock_device(acpi_handle handle); extern int register_dock_notifier(struct notifier_block *nb); extern void unregister_dock_notifier(struct notifier_block *nb); extern int register_hotplug_dock_device(acpi_handle handle, - struct acpi_dock_ops *ops, + const struct acpi_dock_ops *ops, void *context); extern void unregister_hotplug_dock_device(acpi_handle handle); #else diff --git a/include/acpi/processor.h b/include/acpi/processor.h index ba4928c..67055f1 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -337,7 +337,7 @@ extern struct cpuidle_driver acpi_idle_driver; /* in processor_thermal.c */ int acpi_processor_get_limit_info(struct acpi_processor *pr); -extern struct thermal_cooling_device_ops processor_cooling_ops; +extern const struct thermal_cooling_device_ops processor_cooling_ops; #ifdef CONFIG_CPU_FREQ void acpi_thermal_cpufreq_init(void); void acpi_thermal_cpufreq_exit(void); -- cgit v1.1 From 4996c02306a25def1d352ec8e8f48895bbc7dea9 Mon Sep 17 00:00:00 2001 From: Takao Indoh Date: Thu, 14 Jul 2011 18:05:21 -0400 Subject: ACPI: introduce "acpi_rsdp=" parameter for kdump There is a problem with putting the first kernel in EFI virtual mode, it is that when the second kernel comes up it tries to initialize the EFI again and once we have put EFI in virtual mode we can not really do that. Actually, EFI is not necessary for kdump, we can boot the second kernel with "noefi" parameter, but the boot will mostly fail because 2nd kernel cannot find RSDP. In this situation, we introduced "acpi_rsdp=" kernel parameter, so that kexec-tools can pass the "noefi acpi_rsdp=X" to the second kernel to make kdump works. The physical address of the RSDP can be got from sysfs(/sys/firmware/efi/systab). Signed-off-by: Takao Indoh Reviewed-by: WANG Cong Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Len Brown --- Documentation/kernel-parameters.txt | 5 +++++ drivers/acpi/osl.c | 15 +++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index aa47be7..37713bc 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -163,6 +163,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. See also Documentation/power/pm.txt, pci=noacpi + acpi_rsdp= [ACPI,EFI,KEXEC] + Pass the RSDP address to the kernel, mostly used + on machines running EFI runtime service to boot the + second kernel for kdump. + acpi_apic_instance= [ACPI, IOAPIC] Format: 2: use 2nd APIC table, if available diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 52ca964..27cd140 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -237,8 +237,23 @@ void acpi_os_vprintf(const char *fmt, va_list args) #endif } +#ifdef CONFIG_KEXEC +static unsigned long acpi_rsdp; +static int __init setup_acpi_rsdp(char *arg) +{ + acpi_rsdp = simple_strtoul(arg, NULL, 16); + return 0; +} +early_param("acpi_rsdp", setup_acpi_rsdp); +#endif + acpi_physical_address __init acpi_os_get_root_pointer(void) { +#ifdef CONFIG_KEXEC + if (acpi_rsdp) + return acpi_rsdp; +#endif + if (efi_enabled) { if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) return efi.acpi20; -- cgit v1.1 From e80bba4b5108c6479379740201b0a5d9da5ffbac Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 12 Jul 2011 09:03:28 +0100 Subject: ACPI / Battery: avoid acpi_battery_add() use-after-free When acpi_battery_add_fs() fails the error handling code does not clean up completely. Moreover, it does not return resulting in a use-after-free. Signed-off-by: Stefan Hajnoczi Signed-off-by: Len Brown --- drivers/acpi/battery.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 40bf01d..c771768 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -986,21 +986,27 @@ static int acpi_battery_add(struct acpi_device *device) #ifdef CONFIG_ACPI_PROCFS_POWER result = acpi_battery_add_fs(device); #endif - if (!result) { - printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n", - ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device), - device->status.battery_present ? "present" : "absent"); - } else { + if (result) { #ifdef CONFIG_ACPI_PROCFS_POWER acpi_battery_remove_fs(device); #endif - kfree(battery); + goto fail; } + printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n", + ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device), + device->status.battery_present ? "present" : "absent"); + battery->pm_nb.notifier_call = battery_notify; register_pm_notifier(&battery->pm_nb); return result; + +fail: + sysfs_remove_battery(battery); + mutex_destroy(&battery->lock); + kfree(battery); + return result; } static int acpi_battery_remove(struct acpi_device *device, int type) -- cgit v1.1 From eb03cb02b74df6dd0b653d5f6d976f16a434dfaf Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 12 Jul 2011 09:03:29 +0100 Subject: ACPI / Battery: propagate sysfs error in acpi_battery_add() Make sure the error return from sysfs_add_battery() is checked and propagated out from acpi_battery_add(). Signed-off-by: Stefan Hajnoczi Signed-off-by: Len Brown --- drivers/acpi/battery.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index c771768..ffce2f0 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -630,8 +630,11 @@ static int acpi_battery_update(struct acpi_battery *battery) return result; acpi_battery_init_alarm(battery); } - if (!battery->bat.dev) - sysfs_add_battery(battery); + if (!battery->bat.dev) { + result = sysfs_add_battery(battery); + if (result) + return result; + } result = acpi_battery_get_state(battery); acpi_battery_quirks(battery); return result; @@ -982,7 +985,9 @@ static int acpi_battery_add(struct acpi_device *device) if (ACPI_SUCCESS(acpi_get_handle(battery->device->handle, "_BIX", &handle))) set_bit(ACPI_BATTERY_XINFO_PRESENT, &battery->flags); - acpi_battery_update(battery); + result = acpi_battery_update(battery); + if (result) + goto fail; #ifdef CONFIG_ACPI_PROCFS_POWER result = acpi_battery_add_fs(device); #endif -- cgit v1.1 From 265c6a0f9290c8f470b839257dc6af3c46b24da1 Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Sat, 16 Jul 2011 19:41:23 -0400 Subject: ext4: fix compilation with -DDX_DEBUG Compilation of ext4/namei.c brought up an error and warning messages when compiled with -DDX_DEBUG Signed-off-by: Bernd Schubert Signed-off-by: "Theodore Ts'o" --- fs/ext4/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 8dde5ab..aaf3131 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -289,7 +289,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent while (len--) printk("%c", *name++); ext4fs_dirhash(de->name, de->name_len, &h); printk(":%x.%u ", h.hash, - ((char *) de - base)); + (unsigned) ((char *) de - base)); } space += EXT4_DIR_REC_LEN(de->name_len); names++; @@ -1013,7 +1013,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q *err = -ENOENT; errout: - dxtrace(printk(KERN_DEBUG "%s not found\n", name)); + dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name)); dx_release (frames); return NULL; } -- cgit v1.1 From d7a1fee135771e6e5185642bdc17df19bbdbcc48 Mon Sep 17 00:00:00 2001 From: Dan Ehrenberg Date: Sun, 17 Jul 2011 21:11:30 -0400 Subject: ext4: make the preallocation size be a multiple of stripe size Previously, if a stripe width was provided, then it would be used as the preallocation granularity, with no santiy checking and no way to override this. Now, mb_prealloc_size defaults to the smallest multiple of stripe size that is greater than or equal to the old default mb_prealloc_size, and this can be overridden with the sysfs interface. Signed-off-by: Dan Ehrenberg Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b97a2d2..037f680 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -128,12 +128,13 @@ * we are doing a group prealloc we try to normalize the request to * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is * 512 blocks. This can be tuned via - * /sys/fs/ext4//mb_group_prealloc. The value is represented in * terms of number of blocks. If we have mounted the file system with -O * stripe= option the group prealloc request is normalized to the - * stripe value (sbi->s_stripe) + * the smallest multiple of the stripe value (sbi->s_stripe) which is + * greater than the default mb_group_prealloc. * - * The regular allocator(using the buddy cache) supports few tunables. + * The regular allocator (using the buddy cache) supports a few tunables. * * /sys/fs/ext4//mb_min_to_scan * /sys/fs/ext4//mb_max_to_scan @@ -2474,6 +2475,18 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; + /* + * If there is a s_stripe > 1, then we set the s_mb_group_prealloc + * to the lowest multiple of s_stripe which is bigger than + * the s_mb_group_prealloc as determined above. We want + * the preallocation size to be an exact multiple of the + * RAID stripe size so that preallocations don't fragment + * the stripes. + */ + if (sbi->s_stripe > 1) { + sbi->s_mb_group_prealloc = roundup( + sbi->s_mb_group_prealloc, sbi->s_stripe); + } sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); if (sbi->s_locality_groups == NULL) { @@ -2841,8 +2854,9 @@ out_err: /* * here we normalize request for locality group - * Group request are normalized to s_strip size if we set the same via mount - * option. If not we set it to s_mb_group_prealloc which can be configured via + * Group request are normalized to s_mb_group_prealloc, which goes to + * s_strip if we set the same via mount option. + * s_mb_group_prealloc can be configured via * /sys/fs/ext4//mb_group_prealloc * * XXX: should we try to preallocate more than the group has now? @@ -2853,10 +2867,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) struct ext4_locality_group *lg = ac->ac_lg; BUG_ON(lg == NULL); - if (EXT4_SB(sb)->s_stripe) - ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; - else - ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; + ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; mb_debug(1, "#%u: goal %u blocks for locality group\n", current->pid, ac->ac_g_ex.fe_len); } -- cgit v1.1 From 3eb08658431abd65c0fe6855d1860859c2d416f7 Mon Sep 17 00:00:00 2001 From: Dan Ehrenberg Date: Sun, 17 Jul 2011 21:18:51 -0400 Subject: ext4: ignore a stripe width of 1 If the stripe width was set to 1, then this patch will ignore that stripe width and ext4 will act as if the stripe width were 0 with respect to optimizing allocations. Signed-off-by: Dan Ehrenberg Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7910e61..143d763 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2384,17 +2384,25 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); unsigned long stripe_width = le32_to_cpu(sbi->s_es->s_raid_stripe_width); + int ret; if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) - return sbi->s_stripe; - - if (stripe_width <= sbi->s_blocks_per_group) - return stripe_width; + ret = sbi->s_stripe; + else if (stripe_width <= sbi->s_blocks_per_group) + ret = stripe_width; + else if (stride <= sbi->s_blocks_per_group) + ret = stride; + else + ret = 0; - if (stride <= sbi->s_blocks_per_group) - return stride; + /* + * If the stripe width is 1, this makes no sense and + * we set it to 0 to turn off stripe handling code. + */ + if (ret <= 1) + ret = 0; - return 0; + return ret; } /* sysfs supprt */ -- cgit v1.1 From f7d0d3797fac6cad24ad9f86dd9baf65c586b434 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Sun, 17 Jul 2011 23:17:02 -0400 Subject: ext4: punch hole optimizations: skip un-needed extent lookup This patch optimizes the punch hole operation by skipping the tree walking code that is used by truncate. Since punch hole is done through map blocks, the path to the extent is already known in this function, so we do not need to look it up again. Signed-off-by: Allison Henderson Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f1c538e..06b30b6 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3461,8 +3461,27 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ext4_ext_mark_uninitialized(ex); - err = ext4_ext_remove_space(inode, map->m_lblk, - map->m_lblk + punched_out); + ext4_ext_invalidate_cache(inode); + + err = ext4_ext_rm_leaf(handle, inode, path, + map->m_lblk, map->m_lblk + punched_out); + + if (!err && path->p_hdr->eh_entries == 0) { + /* + * Punch hole freed all of this sub tree, + * so we need to correct eh_depth + */ + err = ext4_ext_get_access(handle, inode, path); + if (err == 0) { + ext_inode_hdr(inode)->eh_depth = 0; + ext_inode_hdr(inode)->eh_max = + cpu_to_le16(ext4_ext_space_root( + inode, 0)); + + err = ext4_ext_dirty( + handle, inode, path); + } + } goto out2; } -- cgit v1.1 From c6a0371cbefade85376bbc326d18451860632dce Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Sun, 17 Jul 2011 23:21:03 -0400 Subject: ext4: remove unneeded parameter to ext4_ext_remove_space() This patch removes the extra parameter in ext4_ext_remove_space() which is no longer needed. Signed-off-by: Allison Henderson Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 06b30b6..3d8c5f5 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2500,8 +2500,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) return 1; } -static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, - ext4_lblk_t end) +static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) { struct super_block *sb = inode->i_sb; int depth = ext_depth(inode); @@ -2541,7 +2540,7 @@ again: if (i == depth) { /* this is leaf block */ err = ext4_ext_rm_leaf(handle, inode, path, - start, end); + start, EXT_MAX_BLOCKS - 1); /* root level has p_bh == NULL, brelse() eats this */ brelse(path[i].p_bh); path[i].p_bh = NULL; @@ -3683,7 +3682,7 @@ void ext4_ext_truncate(struct inode *inode) last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); - err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); + err = ext4_ext_remove_space(inode, last_block); /* In a multi-transaction truncate, we only make the final * transaction synchronous. -- cgit v1.1 From 015861badd0db43d025bbb538f8fc62dfaf3f18d Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Sun, 17 Jul 2011 23:27:43 -0400 Subject: ext4: avoid wasted extent cache lookup if !PUNCH_OUT_EXT This patch avoids an extraneous lookup of the extent cache in ext4_ext_map_blocks() when the flag EXT4_GET_BLOCKS_PUNCH_OUT_EXT is absent. The existing logic was performing the lookup but not making use of the result. The patch simply reverses the order of evaluation in the condition. Since ext4_ext_in_cache() does not initialize newex on misses, bypassing its invocation does not introduce any new issue in this regard. Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" Reviewed-by: Lukas Czerner Reviewed-by: Eric Gouriou --- fs/ext4/extents.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3d8c5f5..b8acfab 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3320,8 +3320,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); /* check in cache */ - if (ext4_ext_in_cache(inode, map->m_lblk, &newex) && - ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0)) { + if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && + ext4_ext_in_cache(inode, map->m_lblk, &newex)) { if (!newex.ee_start_lo && !newex.ee_start_hi) { if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { /* -- cgit v1.1 From d46203159ed376fdbe2b05aa57e58207bf27a8f9 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Sun, 17 Jul 2011 23:43:42 -0400 Subject: ext4: avoid eh_entries overflow before insert extent_idx If eh_entries is equal to (or greater than) eh_max, the operation of inserting new extent_idx will make number of entries overflow. So check eh_entries before inserting the new extent_idx. Although there is no bug case according the code (function ext4_ext_insert_index is called by ext4_ext_split and ext4_ext_split is called only if the index block has free space), the right logic should be "lookup the capacity before insertion". Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index b8acfab..9bec432 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -741,6 +741,16 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, logical, le32_to_cpu(curp->p_idx->ei_block)); return -EIO; } + + if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) + >= le16_to_cpu(curp->p_hdr->eh_max))) { + EXT4_ERROR_INODE(inode, + "eh_entries %d >= eh_max %d!", + le16_to_cpu(curp->p_hdr->eh_entries), + le16_to_cpu(curp->p_hdr->eh_max)); + return -EIO; + } + len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; if (logical > le32_to_cpu(curp->p_idx->ei_block)) { /* insert after */ @@ -770,14 +780,6 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, ext4_idx_store_pblock(ix, ptr); le16_add_cpu(&curp->p_hdr->eh_entries, 1); - if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) - > le16_to_cpu(curp->p_hdr->eh_max))) { - EXT4_ERROR_INODE(inode, - "eh_entries %d > eh_max %d!", - le16_to_cpu(curp->p_hdr->eh_entries), - le16_to_cpu(curp->p_hdr->eh_max)); - return -EIO; - } if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); return -EIO; -- cgit v1.1 From d9ba5fe76d604514444b1ea0a19f38c6196a46e3 Mon Sep 17 00:00:00 2001 From: Carlos O'Donell Date: Fri, 8 Jul 2011 17:27:00 -0400 Subject: [PARISC] Fix futex support Implements futex op support and makes futex cmpxchg atomic. Tested on 64-bit SMP kernel running on 2 x PA8700s. [jejb: checkpatch fixes] Signed-off-by: Carlos O'Donell Tested-by: John David Anglin Cc: stable@kernel.org Signed-off-by: James Bottomley --- arch/parisc/include/asm/futex.h | 66 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h index 67a33cc..2388bdb 100644 --- a/arch/parisc/include/asm/futex.h +++ b/arch/parisc/include/asm/futex.h @@ -5,11 +5,14 @@ #include #include +#include #include static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) { + unsigned long int flags; + u32 val; int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; @@ -18,21 +21,58 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(*uaddr))) return -EFAULT; pagefault_disable(); + _atomic_spin_lock_irqsave(uaddr, flags); + switch (op) { case FUTEX_OP_SET: + /* *(int *)UADDR2 = OPARG; */ + ret = get_user(oldval, uaddr); + if (!ret) + ret = put_user(oparg, uaddr); + break; case FUTEX_OP_ADD: + /* *(int *)UADDR2 += OPARG; */ + ret = get_user(oldval, uaddr); + if (!ret) { + val = oldval + oparg; + ret = put_user(val, uaddr); + } + break; case FUTEX_OP_OR: + /* *(int *)UADDR2 |= OPARG; */ + ret = get_user(oldval, uaddr); + if (!ret) { + val = oldval | oparg; + ret = put_user(val, uaddr); + } + break; case FUTEX_OP_ANDN: + /* *(int *)UADDR2 &= ~OPARG; */ + ret = get_user(oldval, uaddr); + if (!ret) { + val = oldval & ~oparg; + ret = put_user(val, uaddr); + } + break; case FUTEX_OP_XOR: + /* *(int *)UADDR2 ^= OPARG; */ + ret = get_user(oldval, uaddr); + if (!ret) { + val = oldval ^ oparg; + ret = put_user(val, uaddr); + } + break; default: ret = -ENOSYS; } + _atomic_spin_unlock_irqrestore(uaddr, flags); + pagefault_enable(); if (!ret) { @@ -54,7 +94,9 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { + int ret; u32 val; + unsigned long flags; /* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is * our gateway page, and causes no end of trouble... @@ -65,12 +107,24 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - if (get_user(val, uaddr)) - return -EFAULT; - if (val == oldval && put_user(newval, uaddr)) - return -EFAULT; + /* HPPA has no cmpxchg in hardware and therefore the + * best we can do here is use an array of locks. The + * lock selected is based on a hash of the userspace + * address. This should scale to a couple of CPUs. + */ + + _atomic_spin_lock_irqsave(uaddr, flags); + + ret = get_user(val, uaddr); + + if (!ret && val == oldval) + ret = put_user(newval, uaddr); + *uval = val; - return 0; + + _atomic_spin_unlock_irqrestore(uaddr, flags); + + return ret; } #endif /*__KERNEL__*/ -- cgit v1.1 From 7676e345824f162191b1fe2058ad948a6cf91c20 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 19 May 2011 21:32:57 +0900 Subject: IPVS: Free resources on module removal This resolves a panic on module removal. Reported-by: Dave Jones Acked-by: Julian Anastasov Signed-off-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 699c79a..a178cb3 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3771,6 +3771,7 @@ err_sock: void ip_vs_control_cleanup(void) { EnterFunction(2); + unregister_netdevice_notifier(&ip_vs_dst_notifier); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); -- cgit v1.1 From a5071c2fd91f320bc40952df59517053a7346fa9 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 19 Jul 2011 15:38:56 -0700 Subject: drm/i915: provide more error output when mode sets fail If a mode set fails we may get a message from drm_crtc_helper if we're lucky, but it won't tell us anything about *why* we failed to set a mode. So add a few DRM_ERRORs for the cases that shouldn't happen so we can debug things more easily. Signed-off-by: Jesse Barnes Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_display.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b5b15bd..261ffe4 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2096,7 +2096,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, /* no fb bound */ if (!crtc->fb) { - DRM_DEBUG_KMS("No FB bound\n"); + DRM_ERROR("No FB bound\n"); return 0; } @@ -2105,6 +2105,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, case 1: break; default: + DRM_ERROR("no plane for crtc\n"); return -EINVAL; } @@ -2114,6 +2115,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, NULL); if (ret != 0) { mutex_unlock(&dev->struct_mutex); + DRM_ERROR("pin & fence failed\n"); return ret; } @@ -2142,6 +2144,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, if (ret) { i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj); mutex_unlock(&dev->struct_mutex); + DRM_ERROR("failed to update base address\n"); return ret; } -- cgit v1.1 From f0b69efc29b024747a88ce020dada425e3193d5a Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Tue, 19 Jul 2011 16:21:40 -0700 Subject: drm/i915: Skip GPU wait for scanout pin while wedged Failing to pin a scanout buffer will most likely lead to a black screen, so if the GPU is wedged, then just let the pin happen and hope that things work out OK. v2: Just ignore any error from i915_gem_object_wait_rendering, as suggested by Chris Wilson Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e9d1d5c..e46f273 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3113,7 +3113,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (pipelined != obj->ring) { ret = i915_gem_object_wait_rendering(obj); - if (ret) + if (ret == -ERESTARTSYS) return ret; } -- cgit v1.1 From af8b244f733383656c8b4c0c6e94e210e7bbc596 Mon Sep 17 00:00:00 2001 From: Ambresh K Date: Sat, 9 Jul 2011 19:02:21 -0700 Subject: regulator: TWL: Remove entry of RES_ID for 6030 macros RES_ID is only used in 4030, to send PBM singular message to control the state of dedicated resources. In 6030, we don't have concept of PBM, hence removing the definition of RES_ID (num) from macros. Signed-off-by: Ambresh K Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/twl-regulator.c | 64 ++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c index 503c2bc..ee8747f 100644 --- a/drivers/regulator/twl-regulator.c +++ b/drivers/regulator/twl-regulator.c @@ -835,8 +835,8 @@ static struct regulator_ops twlsmps_ops = { remap_conf) \ TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, \ remap_conf, TWL4030, twl4030fixed_ops) -#define TWL6030_FIXED_LDO(label, offset, mVolts, num, turnon_delay) \ - TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, \ +#define TWL6030_FIXED_LDO(label, offset, mVolts, turnon_delay) \ + TWL_FIXED_LDO(label, offset, mVolts, 0x0, turnon_delay, \ 0x0, TWL6030, twl6030fixed_ops) #define TWL4030_ADJUSTABLE_LDO(label, offset, num, turnon_delay, remap_conf) { \ @@ -856,9 +856,8 @@ static struct regulator_ops twlsmps_ops = { }, \ } -#define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts, num) { \ +#define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) { \ .base = offset, \ - .id = num, \ .min_mV = min_mVolts, \ .max_mV = max_mVolts, \ .desc = { \ @@ -871,9 +870,8 @@ static struct regulator_ops twlsmps_ops = { }, \ } -#define TWL6025_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts, num) { \ +#define TWL6025_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) { \ .base = offset, \ - .id = num, \ .min_mV = min_mVolts, \ .max_mV = max_mVolts, \ .desc = { \ @@ -903,9 +901,8 @@ static struct regulator_ops twlsmps_ops = { }, \ } -#define TWL6030_FIXED_RESOURCE(label, offset, num, turnon_delay) { \ +#define TWL6030_FIXED_RESOURCE(label, offset, turnon_delay) { \ .base = offset, \ - .id = num, \ .delay = turnon_delay, \ .desc = { \ .name = #label, \ @@ -916,9 +913,8 @@ static struct regulator_ops twlsmps_ops = { }, \ } -#define TWL6025_ADJUSTABLE_SMPS(label, offset, num) { \ +#define TWL6025_ADJUSTABLE_SMPS(label, offset) { \ .base = offset, \ - .id = num, \ .min_mV = 600, \ .max_mV = 2100, \ .desc = { \ @@ -961,32 +957,32 @@ static struct twlreg_info twl_regs[] = { /* 6030 REG with base as PMC Slave Misc : 0x0030 */ /* Turnon-delay and remap configuration values for 6030 are not verified since the specification is not public */ - TWL6030_ADJUSTABLE_LDO(VAUX1_6030, 0x54, 1000, 3300, 1), - TWL6030_ADJUSTABLE_LDO(VAUX2_6030, 0x58, 1000, 3300, 2), - TWL6030_ADJUSTABLE_LDO(VAUX3_6030, 0x5c, 1000, 3300, 3), - TWL6030_ADJUSTABLE_LDO(VMMC, 0x68, 1000, 3300, 4), - TWL6030_ADJUSTABLE_LDO(VPP, 0x6c, 1000, 3300, 5), - TWL6030_ADJUSTABLE_LDO(VUSIM, 0x74, 1000, 3300, 7), - TWL6030_FIXED_LDO(VANA, 0x50, 2100, 15, 0), - TWL6030_FIXED_LDO(VCXIO, 0x60, 1800, 16, 0), - TWL6030_FIXED_LDO(VDAC, 0x64, 1800, 17, 0), - TWL6030_FIXED_LDO(VUSB, 0x70, 3300, 18, 0), - TWL6030_FIXED_RESOURCE(CLK32KG, 0x8C, 48, 0), + TWL6030_ADJUSTABLE_LDO(VAUX1_6030, 0x54, 1000, 3300), + TWL6030_ADJUSTABLE_LDO(VAUX2_6030, 0x58, 1000, 3300), + TWL6030_ADJUSTABLE_LDO(VAUX3_6030, 0x5c, 1000, 3300), + TWL6030_ADJUSTABLE_LDO(VMMC, 0x68, 1000, 3300), + TWL6030_ADJUSTABLE_LDO(VPP, 0x6c, 1000, 3300), + TWL6030_ADJUSTABLE_LDO(VUSIM, 0x74, 1000, 3300), + TWL6030_FIXED_LDO(VANA, 0x50, 2100, 0), + TWL6030_FIXED_LDO(VCXIO, 0x60, 1800, 0), + TWL6030_FIXED_LDO(VDAC, 0x64, 1800, 0), + TWL6030_FIXED_LDO(VUSB, 0x70, 3300, 0), + TWL6030_FIXED_RESOURCE(CLK32KG, 0x8C, 0), /* 6025 are renamed compared to 6030 versions */ - TWL6025_ADJUSTABLE_LDO(LDO2, 0x54, 1000, 3300, 1), - TWL6025_ADJUSTABLE_LDO(LDO4, 0x58, 1000, 3300, 2), - TWL6025_ADJUSTABLE_LDO(LDO3, 0x5c, 1000, 3300, 3), - TWL6025_ADJUSTABLE_LDO(LDO5, 0x68, 1000, 3300, 4), - TWL6025_ADJUSTABLE_LDO(LDO1, 0x6c, 1000, 3300, 5), - TWL6025_ADJUSTABLE_LDO(LDO7, 0x74, 1000, 3300, 7), - TWL6025_ADJUSTABLE_LDO(LDO6, 0x60, 1000, 3300, 16), - TWL6025_ADJUSTABLE_LDO(LDOLN, 0x64, 1000, 3300, 17), - TWL6025_ADJUSTABLE_LDO(LDOUSB, 0x70, 1000, 3300, 18), - - TWL6025_ADJUSTABLE_SMPS(SMPS3, 0x34, 1), - TWL6025_ADJUSTABLE_SMPS(SMPS4, 0x10, 2), - TWL6025_ADJUSTABLE_SMPS(VIO, 0x16, 3), + TWL6025_ADJUSTABLE_LDO(LDO2, 0x54, 1000, 3300), + TWL6025_ADJUSTABLE_LDO(LDO4, 0x58, 1000, 3300), + TWL6025_ADJUSTABLE_LDO(LDO3, 0x5c, 1000, 3300), + TWL6025_ADJUSTABLE_LDO(LDO5, 0x68, 1000, 3300), + TWL6025_ADJUSTABLE_LDO(LDO1, 0x6c, 1000, 3300), + TWL6025_ADJUSTABLE_LDO(LDO7, 0x74, 1000, 3300), + TWL6025_ADJUSTABLE_LDO(LDO6, 0x60, 1000, 3300), + TWL6025_ADJUSTABLE_LDO(LDOLN, 0x64, 1000, 3300), + TWL6025_ADJUSTABLE_LDO(LDOUSB, 0x70, 1000, 3300), + + TWL6025_ADJUSTABLE_SMPS(SMPS3, 0x34), + TWL6025_ADJUSTABLE_SMPS(SMPS4, 0x10), + TWL6025_ADJUSTABLE_SMPS(VIO, 0x16), }; static u8 twl_get_smps_offset(void) -- cgit v1.1 From a3ee13ee77feea001597415f3a231a8bd4d3c6bf Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 10 Jul 2011 18:52:07 +0800 Subject: regulator: tps65910: Fix a memory leak in tps65910_probe error path Fix a memory leak if chip id is not matched. Signed-off-by: Axel Lin Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/tps65910-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c index 55dd4e6..b07a664 100644 --- a/drivers/regulator/tps65910-regulator.c +++ b/drivers/regulator/tps65910-regulator.c @@ -903,6 +903,7 @@ static __devinit int tps65910_probe(struct platform_device *pdev) info = tps65911_regs; default: pr_err("Invalid tps chip version\n"); + kfree(pmic); return -ENODEV; } -- cgit v1.1 From d04156bca629740a661fd0738cd69ba1f08b2b20 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 10 Jul 2011 21:44:09 +0800 Subject: regulator: tps65910: Add missing breaks in switch/case Also add a default case in tps65910_list_voltage_dcdc to silence 'volt' may be used uninitialized in this function warning. Signed-off-by: Axel Lin Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/tps65910-regulator.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c index b07a664..8e0edab7 100644 --- a/drivers/regulator/tps65910-regulator.c +++ b/drivers/regulator/tps65910-regulator.c @@ -759,8 +759,13 @@ static int tps65910_list_voltage_dcdc(struct regulator_dev *dev, mult = (selector / VDD1_2_NUM_VOLTS) + 1; volt = VDD1_2_MIN_VOLT + (selector % VDD1_2_NUM_VOLTS) * VDD1_2_OFFSET; + break; case TPS65911_REG_VDDCTRL: volt = VDDCTRL_MIN_VOLT + (selector * VDDCTRL_OFFSET); + break; + default: + BUG(); + return -EINVAL; } return volt * 100 * mult; @@ -898,9 +903,11 @@ static __devinit int tps65910_probe(struct platform_device *pdev) case TPS65910: pmic->get_ctrl_reg = &tps65910_get_ctrl_register; info = tps65910_regs; + break; case TPS65911: pmic->get_ctrl_reg = &tps65911_get_ctrl_register; info = tps65911_regs; + break; default: pr_err("Invalid tps chip version\n"); kfree(pmic); -- cgit v1.1 From 39aa9b6e3cb1b2a564d3422eedb7f725179162d3 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 11 Jul 2011 09:57:43 +0800 Subject: regulator: tps65910: Fix array access out of bounds bug For tps65910, the number of regulator is 13. ( ARRAY_SIZE(tps65910_regs) is 13) For tps65911, the number of regulator is 12. ( ARRAY_SIZE(tps65911_regs) is 12) If we are using this driver for tps65911, we hit array access out of bounds bug in tps65910_probe() because current implementation always assume the number of regulator is 13 and thus it will access tps65911_regs[12]. Fix it by setting correct num_regulators for both chips in tps65910_probe(), and allocated neccessay memory accordingly. Signed-off-by: Axel Lin Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/tps65910-regulator.c | 55 ++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c index 8e0edab7..66d2d60 100644 --- a/drivers/regulator/tps65910-regulator.c +++ b/drivers/regulator/tps65910-regulator.c @@ -49,7 +49,6 @@ #define TPS65911_REG_LDO7 11 #define TPS65911_REG_LDO8 12 -#define TPS65910_NUM_REGULATOR 13 #define TPS65910_SUPPLY_STATE_ENABLED 0x1 /* supported VIO voltages in milivolts */ @@ -264,11 +263,12 @@ static struct tps_info tps65911_regs[] = { }; struct tps65910_reg { - struct regulator_desc desc[TPS65910_NUM_REGULATOR]; + struct regulator_desc *desc; struct tps65910 *mfd; - struct regulator_dev *rdev[TPS65910_NUM_REGULATOR]; - struct tps_info *info[TPS65910_NUM_REGULATOR]; + struct regulator_dev **rdev; + struct tps_info **info; struct mutex mutex; + int num_regulators; int mode; int (*get_ctrl_reg)(int); }; @@ -902,10 +902,12 @@ static __devinit int tps65910_probe(struct platform_device *pdev) switch(tps65910_chip_id(tps65910)) { case TPS65910: pmic->get_ctrl_reg = &tps65910_get_ctrl_register; + pmic->num_regulators = ARRAY_SIZE(tps65910_regs); info = tps65910_regs; break; case TPS65911: pmic->get_ctrl_reg = &tps65911_get_ctrl_register; + pmic->num_regulators = ARRAY_SIZE(tps65911_regs); info = tps65911_regs; break; default: @@ -914,7 +916,28 @@ static __devinit int tps65910_probe(struct platform_device *pdev) return -ENODEV; } - for (i = 0; i < TPS65910_NUM_REGULATOR; i++, info++, reg_data++) { + pmic->desc = kcalloc(pmic->num_regulators, + sizeof(struct regulator_desc), GFP_KERNEL); + if (!pmic->desc) { + err = -ENOMEM; + goto err_free_pmic; + } + + pmic->info = kcalloc(pmic->num_regulators, + sizeof(struct tps_info *), GFP_KERNEL); + if (!pmic->info) { + err = -ENOMEM; + goto err_free_desc; + } + + pmic->rdev = kcalloc(pmic->num_regulators, + sizeof(struct regulator_dev *), GFP_KERNEL); + if (!pmic->rdev) { + err = -ENOMEM; + goto err_free_info; + } + + for (i = 0; i < pmic->num_regulators; i++, info++, reg_data++) { /* Register the regulators */ pmic->info[i] = info; @@ -946,7 +969,7 @@ static __devinit int tps65910_probe(struct platform_device *pdev) "failed to register %s regulator\n", pdev->name); err = PTR_ERR(rdev); - goto err; + goto err_unregister_regulator; } /* Save regulator for cleanup */ @@ -954,23 +977,31 @@ static __devinit int tps65910_probe(struct platform_device *pdev) } return 0; -err: +err_unregister_regulator: while (--i >= 0) regulator_unregister(pmic->rdev[i]); - + kfree(pmic->rdev); +err_free_info: + kfree(pmic->info); +err_free_desc: + kfree(pmic->desc); +err_free_pmic: kfree(pmic); return err; } static int __devexit tps65910_remove(struct platform_device *pdev) { - struct tps65910_reg *tps65910_reg = platform_get_drvdata(pdev); + struct tps65910_reg *pmic = platform_get_drvdata(pdev); int i; - for (i = 0; i < TPS65910_NUM_REGULATOR; i++) - regulator_unregister(tps65910_reg->rdev[i]); + for (i = 0; i < pmic->num_regulators; i++) + regulator_unregister(pmic->rdev[i]); - kfree(tps65910_reg); + kfree(pmic->rdev); + kfree(pmic->info); + kfree(pmic->desc); + kfree(pmic); return 0; } -- cgit v1.1 From 89f425ed5bf3d4fd97e840296dccd75b8e0fe4c9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 12 Jul 2011 11:20:37 +0900 Subject: regulator: Make core more chatty about some errors Prevent some head scratching by making the core log about some rare but possible errors with invalid voltage ranges and modes being set. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index f59821f..3700d09 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -151,8 +151,11 @@ static int regulator_check_voltage(struct regulator_dev *rdev, if (*min_uV < rdev->constraints->min_uV) *min_uV = rdev->constraints->min_uV; - if (*min_uV > *max_uV) + if (*min_uV > *max_uV) { + rdev_err(rdev, "unsupportable voltage range: %d-%duV\n", + min_uV, max_uV); return -EINVAL; + } return 0; } @@ -205,8 +208,11 @@ static int regulator_check_current_limit(struct regulator_dev *rdev, if (*min_uA < rdev->constraints->min_uA) *min_uA = rdev->constraints->min_uA; - if (*min_uA > *max_uA) + if (*min_uA > *max_uA) { + rdev_err(rdev, "unsupportable current range: %d-%duA\n", + min_uA, max_uA); return -EINVAL; + } return 0; } @@ -221,6 +227,7 @@ static int regulator_mode_constrain(struct regulator_dev *rdev, int *mode) case REGULATOR_MODE_STANDBY: break; default: + rdev_err(rdev, "invalid mode %x specified\n", *mode); return -EINVAL; } -- cgit v1.1 From 1a6958e79f9e191c89fe0c13f7452b0bd8097050 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Fri, 15 Jul 2011 10:50:43 +0800 Subject: regulator: Fix memory leak in set_machine_constraints() error paths Properly kfree rdev->constraints in all set_machine_constraints() error paths. Also properly kfree rdev->constraints in regulator_register() error paths. Signed-off-by: Axel Lin Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 3700d09..a019544 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -794,7 +794,6 @@ static int machine_constraints_voltage(struct regulator_dev *rdev, if (ret < 0) { rdev_err(rdev, "failed to apply %duV constraint\n", rdev->constraints->min_uV); - rdev->constraints = NULL; return ret; } } @@ -897,7 +896,6 @@ static int set_machine_constraints(struct regulator_dev *rdev, ret = suspend_prepare(rdev, rdev->constraints->initial_state); if (ret < 0) { rdev_err(rdev, "failed to set suspend state\n"); - rdev->constraints = NULL; goto out; } } @@ -924,13 +922,15 @@ static int set_machine_constraints(struct regulator_dev *rdev, ret = ops->enable(rdev); if (ret < 0) { rdev_err(rdev, "failed to enable\n"); - rdev->constraints = NULL; goto out; } } print_constraints(rdev); + return 0; out: + kfree(rdev->constraints); + rdev->constraints = NULL; return ret; } @@ -2701,6 +2701,7 @@ unset_supplies: unset_regulator_supplies(rdev); scrub: + kfree(rdev->constraints); device_unregister(&rdev->dev); /* device core frees rdev */ rdev = ERR_PTR(ret); -- cgit v1.1 From 54abd335fda86d305845f9e62b4bc0997386eb66 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 21 Jul 2011 15:07:37 +0100 Subject: regulator: Fix argument format type errors in error prints We need to dereference the pointers to print their values. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index a019544..d8e6a42 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -153,7 +153,7 @@ static int regulator_check_voltage(struct regulator_dev *rdev, if (*min_uV > *max_uV) { rdev_err(rdev, "unsupportable voltage range: %d-%duV\n", - min_uV, max_uV); + *min_uV, *max_uV); return -EINVAL; } @@ -210,7 +210,7 @@ static int regulator_check_current_limit(struct regulator_dev *rdev, if (*min_uA > *max_uA) { rdev_err(rdev, "unsupportable current range: %d-%duA\n", - min_uA, max_uA); + *min_uA, *max_uA); return -EINVAL; } -- cgit v1.1 From 842d452985300f4ec14c68cb86046e8a1a3b7251 Mon Sep 17 00:00:00 2001 From: Ole Henrik Jahren Date: Fri, 22 Jul 2011 15:56:01 +0200 Subject: drm/i915: Fix typo in DRM_I915_OVERLAY_PUT_IMAGE ioctl define Because of a typo, calling ioctl with DRM_IOCTL_I915_OVERLAY_PUT_IMAGE is broken if the macro is used directly. When using libdrm the bug is not hit, since libdrm handles the ioctl encoding internally. The typo also leads to the .cmd and .cmd_drv fields of the drm_ioctl structure for DRM_I915_OVERLAY_PUT_IMAGE having inconsistent content. Signed-off-by: Ole Henrik Jahren Acked-by: Daniel Vetter Cc: stable@kernel.org Signed-off-by: Keith Packard --- include/drm/i915_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index c4d6dbf..28c0d11 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -237,7 +237,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture) #define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_i915_get_pipe_from_crtc_id) #define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) -#define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_IOCTL_I915_OVERLAY_ATTRS, struct drm_intel_overlay_put_image) +#define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_I915_OVERLAY_PUT_IMAGE, struct drm_intel_overlay_put_image) #define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs) /* Allow drivers to submit batchbuffers directly to hardware, relying -- cgit v1.1 From f3234706a77bd6e1592ae71fb3268e04cb030dba Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Fri, 22 Jul 2011 10:44:39 -0700 Subject: drm/i915: Initialize RCS ring status page address in intel_render_ring_init_dri Physically-addressed hardware status pages are initialized early in the driver load process by i915_init_phys_hws. For UMS environments, the ring structure is not initialized until the X server starts. At that point, the entire ring structure is re-initialized with all new values. Any values set in the ring structure (including ring->status_page.page_addr) will be lost when the ring is re-initialized. This patch moves the initialization of the status_page.page_addr value to intel_render_ring_init_dri. Signed-off-by: Keith Packard Cc: stable@kernel.org --- drivers/gpu/drm/i915/i915_dma.c | 6 ++---- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 296fbd6..7eef6e1 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -61,7 +61,6 @@ static void i915_write_hws_pga(struct drm_device *dev) static int i915_init_phys_hws(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; - struct intel_ring_buffer *ring = LP_RING(dev_priv); /* Program Hardware Status Page */ dev_priv->status_page_dmah = @@ -71,10 +70,9 @@ static int i915_init_phys_hws(struct drm_device *dev) DRM_ERROR("Can not allocate hardware status page\n"); return -ENOMEM; } - ring->status_page.page_addr = - (void __force __iomem *)dev_priv->status_page_dmah->vaddr; - memset_io(ring->status_page.page_addr, 0, PAGE_SIZE); + memset_io((void __force __iomem *)dev_priv->status_page_dmah->vaddr, + 0, PAGE_SIZE); i915_write_hws_pga(dev); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 95c4b14..1f61fc7 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1319,6 +1319,9 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) ring->get_seqno = pc_render_get_seqno; } + if (!I915_NEED_GFX_HWS(dev)) + ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; + ring->dev = dev; INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); -- cgit v1.1 From 9c54c0dd948d715ccfd79e97d852f80eeb53254a Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 15 Jun 2011 23:32:33 +0200 Subject: drm/i915: load the LUT before pipe enable on ILK+ Per the specs and to address https://bugs.freedesktop.org/show_bug.cgi?id=36888. Signed-off-by: Jesse Barnes Cc: stable@kernel.org Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_display.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0f1c799..5609c06 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2699,14 +2699,18 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) I915_WRITE(PF_WIN_SZ(pipe), dev_priv->pch_pf_size); } + /* + * On ILK+ LUT must be loaded before the pipe is running but with + * clocks enabled + */ + intel_crtc_load_lut(crtc); + intel_enable_pipe(dev_priv, pipe, is_pch_port); intel_enable_plane(dev_priv, plane, pipe); if (is_pch_port) ironlake_pch_enable(crtc); - intel_crtc_load_lut(crtc); - mutex_lock(&dev->struct_mutex); intel_update_fbc(dev); mutex_unlock(&dev->struct_mutex); -- cgit v1.1 From 3baef940f289f08e4aada1fd481ab9ee3f070144 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 22 Jul 2011 14:04:56 -0700 Subject: ioat: Adding PCI IDs for IOAT devices on SandyBridge platforms Adding to pci_id.h and the device table for ioat. Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/dma/ioat/pci.c | 11 +++++++++++ include/linux/pci_ids.h | 10 ++++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index fab37d1..5e3a40f 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -72,6 +72,17 @@ static struct pci_device_id ioat_pci_tbl[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) }, + { 0, } }; MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f8910e1..a8571b3 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2699,6 +2699,16 @@ #define PCI_DEVICE_ID_INTEL_ICH10_5 0x3a60 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN 0x3b00 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX 0x3b1f +#define PCI_DEVICE_ID_INTEL_IOAT_SNB0 0x3c20 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB1 0x3c21 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB2 0x3c22 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB3 0x3c23 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB4 0x3c24 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB5 0x3c25 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB6 0x3c26 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB7 0x3c27 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB8 0x3c2e +#define PCI_DEVICE_ID_INTEL_IOAT_SNB9 0x3c2f #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f #define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 -- cgit v1.1 From 9b487ced8a903f1028fcfaef1e6406acc91fe0fa Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 7 Jun 2011 15:26:33 -0700 Subject: Avoid section type conflict in dma/ioat/dma_v3.c const __read_mostly is not legal and causes section type conflicts. That's because the read.mostly section is not read only. Simply drop the __read_mostly designation. Signed-off-by: Andi Kleen [drop __read_mostly instead of const] Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v3.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index d845dc4..fec8664 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -73,10 +73,10 @@ /* provide a lookup table for setting the source address in the base or * extended descriptor of an xor or pq descriptor */ -static const u8 xor_idx_to_desc __read_mostly = 0xd0; -static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; -static const u8 pq_idx_to_desc __read_mostly = 0xf8; -static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 }; +static const u8 xor_idx_to_desc = 0xd0; +static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; +static const u8 pq_idx_to_desc = 0xf8; +static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) { -- cgit v1.1 From 638c1fd3033c76778e6d9975ad8a4a9cdd5b96d9 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Thu, 21 Jul 2011 16:57:52 -0400 Subject: pstore: Extend API for more flexibility in new backends Some pstore implementations may not have a static context, so extend the API to pass the pstore_info struct to all calls and allow for a context pointer. Signed-off-by: Matthew Garrett Signed-off-by: Tony Luck --- drivers/acpi/apei/erst.c | 18 +++++++++++++----- fs/pstore/inode.c | 10 +++++----- fs/pstore/internal.h | 2 +- fs/pstore/platform.c | 13 +++++++------ include/linux/pstore.h | 8 +++++--- 5 files changed, 31 insertions(+), 20 deletions(-) diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index e6cef8e..de3ae92 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -932,8 +932,10 @@ static int erst_check_table(struct acpi_table_erst *erst_tab) static int erst_open_pstore(struct pstore_info *psi); static int erst_close_pstore(struct pstore_info *psi); static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, - struct timespec *time); -static u64 erst_writer(enum pstore_type_id type, size_t size); + struct timespec *time, struct pstore_info *psi); +static u64 erst_writer(enum pstore_type_id type, size_t size, + struct pstore_info *psi); +static int erst_clearer(u64 id, struct pstore_info *psi); static struct pstore_info erst_info = { .owner = THIS_MODULE, @@ -942,7 +944,7 @@ static struct pstore_info erst_info = { .close = erst_close_pstore, .read = erst_reader, .write = erst_writer, - .erase = erst_clear + .erase = erst_clearer }; #define CPER_CREATOR_PSTORE \ @@ -983,7 +985,7 @@ static int erst_close_pstore(struct pstore_info *psi) } static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, - struct timespec *time) + struct timespec *time, struct pstore_info *psi) { int rc; ssize_t len = 0; @@ -1037,7 +1039,8 @@ out: return (rc < 0) ? rc : (len - sizeof(*rcd)); } -static u64 erst_writer(enum pstore_type_id type, size_t size) +static u64 erst_writer(enum pstore_type_id type, size_t size, + struct pstore_info *psi) { struct cper_pstore_record *rcd = (struct cper_pstore_record *) (erst_info.buf - sizeof(*rcd)); @@ -1080,6 +1083,11 @@ static u64 erst_writer(enum pstore_type_id type, size_t size) return rcd->hdr.record_id; } +static int erst_clearer(u64 id, struct pstore_info *psi) +{ + return erst_clear(id); +} + static int __init erst_init(void) { int rc = 0; diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 977ed27..b19884a 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -40,7 +40,7 @@ struct pstore_private { u64 id; - int (*erase)(u64); + struct pstore_info *psi; ssize_t size; char data[]; }; @@ -73,7 +73,7 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) { struct pstore_private *p = dentry->d_inode->i_private; - p->erase(p->id); + p->psi->erase(p->id, p->psi); return simple_unlink(dir, dentry); } @@ -175,8 +175,8 @@ int pstore_is_mounted(void) * Set the mtime & ctime to the date that this record was originally stored. */ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, - char *data, size_t size, - struct timespec time, int (*erase)(u64)) + char *data, size_t size, struct timespec time, + struct pstore_info *psi) { struct dentry *root = pstore_sb->s_root; struct dentry *dentry; @@ -193,7 +193,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, if (!private) goto fail_alloc; private->id = id; - private->erase = erase; + private->psi = psi; switch (type) { case PSTORE_TYPE_DMESG: diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index 8c9f23e..611c1b3 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -2,5 +2,5 @@ extern void pstore_set_kmsg_bytes(int); extern void pstore_get_records(void); extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, char *data, size_t size, - struct timespec time, int (*erase)(u64)); + struct timespec time, struct pstore_info *psi); extern int pstore_is_mounted(void); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index f2c3ff2..221c04e 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -94,11 +94,12 @@ static void pstore_dump(struct kmsg_dumper *dumper, memcpy(dst, s1 + s1_start, l1_cpy); memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); - id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy); + id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy, + psinfo); if (reason == KMSG_DUMP_OOPS && pstore_is_mounted()) pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, hsize + l1_cpy + l2_cpy, - CURRENT_TIME, psinfo->erase); + CURRENT_TIME, psinfo); l1 -= l1_cpy; l2 -= l2_cpy; total += l1_cpy + l2_cpy; @@ -166,9 +167,9 @@ void pstore_get_records(void) if (rc) goto out; - while ((size = psi->read(&id, &type, &time)) > 0) { + while ((size = psi->read(&id, &type, &time, psi)) > 0) { if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size, - time, psi->erase)) + time, psi)) failed++; } psi->close(psi); @@ -196,10 +197,10 @@ int pstore_write(enum pstore_type_id type, char *buf, size_t size) mutex_lock(&psinfo->buf_mutex); memcpy(psinfo->buf, buf, size); - id = psinfo->write(type, size); + id = psinfo->write(type, size, psinfo); if (pstore_is_mounted()) pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, - size, CURRENT_TIME, psinfo->erase); + size, CURRENT_TIME, psinfo); mutex_unlock(&psinfo->buf_mutex); return 0; diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 2455ef2..b2f1d97 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -38,9 +38,11 @@ struct pstore_info { int (*open)(struct pstore_info *psi); int (*close)(struct pstore_info *psi); ssize_t (*read)(u64 *id, enum pstore_type_id *type, - struct timespec *time); - u64 (*write)(enum pstore_type_id type, size_t size); - int (*erase)(u64 id); + struct timespec *time, struct pstore_info *psi); + u64 (*write)(enum pstore_type_id type, size_t size, + struct pstore_info *psi); + int (*erase)(u64 id, struct pstore_info *psi); + void *data; }; #ifdef CONFIG_PSTORE -- cgit v1.1 From 56280682ceeef74b692b3e21d1872049eea7c887 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Thu, 21 Jul 2011 16:57:53 -0400 Subject: pstore: Add extra context for writes and erases EFI only provides small amounts of individual storage, and conventionally puts metadata in the storage variable name. Rather than add a metadata header to the (already limited) variable storage, it's easier for us to modify pstore to pass all the information we need to construct a unique variable name to the appropriate functions. Signed-off-by: Matthew Garrett Signed-off-by: Tony Luck --- drivers/acpi/apei/erst.c | 10 ++++++---- fs/pstore/inode.c | 6 ++++-- fs/pstore/platform.c | 9 +++++---- include/linux/pstore.h | 5 +++-- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index de3ae92..d842ac4 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -933,9 +933,10 @@ static int erst_open_pstore(struct pstore_info *psi); static int erst_close_pstore(struct pstore_info *psi); static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, struct timespec *time, struct pstore_info *psi); -static u64 erst_writer(enum pstore_type_id type, size_t size, +static u64 erst_writer(enum pstore_type_id type, int part, size_t size, struct pstore_info *psi); -static int erst_clearer(u64 id, struct pstore_info *psi); +static int erst_clearer(enum pstore_type_id type, u64 id, + struct pstore_info *psi); static struct pstore_info erst_info = { .owner = THIS_MODULE, @@ -1039,7 +1040,7 @@ out: return (rc < 0) ? rc : (len - sizeof(*rcd)); } -static u64 erst_writer(enum pstore_type_id type, size_t size, +static u64 erst_writer(enum pstore_type_id type, int part, size_t size, struct pstore_info *psi) { struct cper_pstore_record *rcd = (struct cper_pstore_record *) @@ -1083,7 +1084,8 @@ static u64 erst_writer(enum pstore_type_id type, size_t size, return rcd->hdr.record_id; } -static int erst_clearer(u64 id, struct pstore_info *psi) +static int erst_clearer(enum pstore_type_id type, u64 id, + struct pstore_info *psi) { return erst_clear(id); } diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index b19884a..893b961 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -39,8 +39,9 @@ #define PSTORE_NAMELEN 64 struct pstore_private { - u64 id; struct pstore_info *psi; + enum pstore_type_id type; + u64 id; ssize_t size; char data[]; }; @@ -73,7 +74,7 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) { struct pstore_private *p = dentry->d_inode->i_private; - p->psi->erase(p->id, p->psi); + p->psi->erase(p->type, p->id, p->psi); return simple_unlink(dir, dentry); } @@ -192,6 +193,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, private = kmalloc(sizeof *private + size, GFP_KERNEL); if (!private) goto fail_alloc; + private->type = type; private->id = id; private->psi = psi; diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 221c04e..163bb40 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -78,7 +78,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, oopscount++; while (total < kmsg_bytes) { dst = psinfo->buf; - hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part++); + hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part); size = psinfo->bufsize - hsize; dst += hsize; @@ -94,8 +94,8 @@ static void pstore_dump(struct kmsg_dumper *dumper, memcpy(dst, s1 + s1_start, l1_cpy); memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); - id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy, - psinfo); + id = psinfo->write(PSTORE_TYPE_DMESG, part, + hsize + l1_cpy + l2_cpy, psinfo); if (reason == KMSG_DUMP_OOPS && pstore_is_mounted()) pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, hsize + l1_cpy + l2_cpy, @@ -103,6 +103,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, l1 -= l1_cpy; l2 -= l2_cpy; total += l1_cpy + l2_cpy; + part++; } mutex_unlock(&psinfo->buf_mutex); } @@ -197,7 +198,7 @@ int pstore_write(enum pstore_type_id type, char *buf, size_t size) mutex_lock(&psinfo->buf_mutex); memcpy(psinfo->buf, buf, size); - id = psinfo->write(type, size, psinfo); + id = psinfo->write(type, 0, size, psinfo); if (pstore_is_mounted()) pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, size, CURRENT_TIME, psinfo); diff --git a/include/linux/pstore.h b/include/linux/pstore.h index b2f1d97..12be8f1 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -39,9 +39,10 @@ struct pstore_info { int (*close)(struct pstore_info *psi); ssize_t (*read)(u64 *id, enum pstore_type_id *type, struct timespec *time, struct pstore_info *psi); - u64 (*write)(enum pstore_type_id type, size_t size, + u64 (*write)(enum pstore_type_id type, int part, + size_t size, struct pstore_info *psi); + int (*erase)(enum pstore_type_id type, u64 id, struct pstore_info *psi); - int (*erase)(u64 id, struct pstore_info *psi); void *data; }; -- cgit v1.1 From b94fdd077eef5e6cab56836bf62695b497946716 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Thu, 21 Jul 2011 16:57:54 -0400 Subject: pstore: Make "part" unsigned We'll never have a negative part, so just make this an unsigned int. Signed-off-by: Matthew Garrett Signed-off-by: Tony Luck --- drivers/acpi/apei/erst.c | 8 ++++---- fs/pstore/platform.c | 3 ++- include/linux/pstore.h | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index d842ac4..6053f47 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -933,8 +933,8 @@ static int erst_open_pstore(struct pstore_info *psi); static int erst_close_pstore(struct pstore_info *psi); static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, struct timespec *time, struct pstore_info *psi); -static u64 erst_writer(enum pstore_type_id type, int part, size_t size, - struct pstore_info *psi); +static u64 erst_writer(enum pstore_type_id type, unsigned int part, + size_t size, struct pstore_info *psi); static int erst_clearer(enum pstore_type_id type, u64 id, struct pstore_info *psi); @@ -1040,8 +1040,8 @@ out: return (rc < 0) ? rc : (len - sizeof(*rcd)); } -static u64 erst_writer(enum pstore_type_id type, int part, size_t size, - struct pstore_info *psi) +static u64 erst_writer(enum pstore_type_id type, unsigned int part, + size_t size, struct pstore_info *psi) { struct cper_pstore_record *rcd = (struct cper_pstore_record *) (erst_info.buf - sizeof(*rcd)); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 163bb40..49ff1de 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -67,7 +67,8 @@ static void pstore_dump(struct kmsg_dumper *dumper, unsigned long size, total = 0; char *dst, *why; u64 id; - int hsize, part = 1; + int hsize; + unsigned int part = 1; if (reason < ARRAY_SIZE(reason_str)) why = reason_str[reason]; diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 12be8f1..cc03bbf 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -39,7 +39,7 @@ struct pstore_info { int (*close)(struct pstore_info *psi); ssize_t (*read)(u64 *id, enum pstore_type_id *type, struct timespec *time, struct pstore_info *psi); - u64 (*write)(enum pstore_type_id type, int part, + u64 (*write)(enum pstore_type_id type, unsigned int part, size_t size, struct pstore_info *psi); int (*erase)(enum pstore_type_id type, u64 id, struct pstore_info *psi); -- cgit v1.1 From dee28e72b619b48ec80a9e5509db458dbe66f71f Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Thu, 21 Jul 2011 16:57:55 -0400 Subject: pstore: Allow the user to explicitly choose a backend pstore only allows one backend to be registered at present, but the system may provide several. Add a parameter to allow the user to choose which backend will be used rather than just relying on load order. Signed-off-by: Matthew Garrett Signed-off-by: Tony Luck --- Documentation/ABI/testing/pstore | 6 ++++++ Documentation/kernel-parameters.txt | 2 ++ fs/pstore/platform.c | 11 +++++++++++ 3 files changed, 19 insertions(+) diff --git a/Documentation/ABI/testing/pstore b/Documentation/ABI/testing/pstore index ddf451e..ff1df4e 100644 --- a/Documentation/ABI/testing/pstore +++ b/Documentation/ABI/testing/pstore @@ -39,3 +39,9 @@ Description: Generic interface to platform dependent persistent storage. multiple) files based on the record size of the underlying persistent storage until at least this amount is reached. Default is 10 Kbytes. + + Pstore only supports one backend at a time. If multiple + backends are available, the preferred backend may be + set by passing the pstore.backend= argument to the kernel at + boot time. + diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index aa47be7..8789d0c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2151,6 +2151,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. [HW,MOUSE] Controls Logitech smartscroll autorepeat. 0 = disabled, 1 = enabled (default). + pstore.backend= Specify the name of the pstore backend to use + pt. [PARIDE] See Documentation/blockdev/paride.txt. diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 49ff1de..c5300ec 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -37,6 +37,8 @@ static DEFINE_SPINLOCK(pstore_lock); static struct pstore_info *psinfo; +static char *backend; + /* How much of the console log to snapshot */ static unsigned long kmsg_bytes = 10240; @@ -131,6 +133,12 @@ int pstore_register(struct pstore_info *psi) spin_unlock(&pstore_lock); return -EBUSY; } + + if (backend && strcmp(backend, psi->name)) { + spin_unlock(&pstore_lock); + return -EINVAL; + } + psinfo = psi; spin_unlock(&pstore_lock); @@ -208,3 +216,6 @@ int pstore_write(enum pstore_type_id type, char *buf, size_t size) return 0; } EXPORT_SYMBOL_GPL(pstore_write); + +module_param(backend, charp, 0444); +MODULE_PARM_DESC(backend, "Pstore backend to use"); -- cgit v1.1 From 5ee9c198a4208d7760275d48e4c4f6c89dcd2ef0 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Thu, 21 Jul 2011 16:57:56 -0400 Subject: efi: Add support for using efivars as a pstore backend EFI provides an area of nonvolatile storage managed by the firmware. We can use this as a pstore backend to maintain copies of oopses, aiding diagnosis. Signed-off-by: Matthew Garrett Signed-off-by: Tony Luck --- drivers/firmware/efivars.c | 191 ++++++++++++++++++++++++++++++++++++++++++++- include/linux/efi.h | 6 ++ 2 files changed, 195 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 5f29aaf..2bbb226 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -78,6 +78,7 @@ #include #include #include +#include #include @@ -89,6 +90,8 @@ MODULE_DESCRIPTION("sysfs interface to EFI Variables"); MODULE_LICENSE("GPL"); MODULE_VERSION(EFIVARS_VERSION); +#define DUMP_NAME_LEN 52 + /* * The maximum size of VariableName + Data = 1024 * Therefore, it's reasonable to save that much @@ -161,18 +164,28 @@ utf8_strsize(efi_char16_t *data, unsigned long maxlength) } static efi_status_t -get_var_data(struct efivars *efivars, struct efi_variable *var) +get_var_data_locked(struct efivars *efivars, struct efi_variable *var) { efi_status_t status; - spin_lock(&efivars->lock); var->DataSize = 1024; status = efivars->ops->get_variable(var->VariableName, &var->VendorGuid, &var->Attributes, &var->DataSize, var->Data); + return status; +} + +static efi_status_t +get_var_data(struct efivars *efivars, struct efi_variable *var) +{ + efi_status_t status; + + spin_lock(&efivars->lock); + status = get_var_data_locked(efivars, var); spin_unlock(&efivars->lock); + if (status != EFI_SUCCESS) { printk(KERN_WARNING "efivars: get_variable() failed 0x%lx!\n", status); @@ -387,12 +400,176 @@ static struct kobj_type efivar_ktype = { .default_attrs = def_attrs, }; +static struct pstore_info efi_pstore_info; + static inline void efivar_unregister(struct efivar_entry *var) { kobject_put(&var->kobj); } +#ifdef CONFIG_PSTORE + +static int efi_pstore_open(struct pstore_info *psi) +{ + struct efivars *efivars = psi->data; + + spin_lock(&efivars->lock); + efivars->walk_entry = list_first_entry(&efivars->list, + struct efivar_entry, list); + return 0; +} + +static int efi_pstore_close(struct pstore_info *psi) +{ + struct efivars *efivars = psi->data; + + spin_unlock(&efivars->lock); + return 0; +} + +static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, + struct timespec *timespec, struct pstore_info *psi) +{ + efi_guid_t vendor = LINUX_EFI_CRASH_GUID; + struct efivars *efivars = psi->data; + char name[DUMP_NAME_LEN]; + int i; + unsigned int part, size; + unsigned long time; + + while (&efivars->walk_entry->list != &efivars->list) { + if (!efi_guidcmp(efivars->walk_entry->var.VendorGuid, + vendor)) { + for (i = 0; i < DUMP_NAME_LEN; i++) { + name[i] = efivars->walk_entry->var.VariableName[i]; + } + if (sscanf(name, "dump-type%u-%u-%lu", type, &part, &time) == 3) { + *id = part; + timespec->tv_sec = time; + timespec->tv_nsec = 0; + get_var_data_locked(efivars, &efivars->walk_entry->var); + size = efivars->walk_entry->var.DataSize; + memcpy(psi->buf, efivars->walk_entry->var.Data, size); + efivars->walk_entry = list_entry(efivars->walk_entry->list.next, + struct efivar_entry, list); + return size; + } + } + efivars->walk_entry = list_entry(efivars->walk_entry->list.next, + struct efivar_entry, list); + } + return 0; +} + +static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, + size_t size, struct pstore_info *psi) +{ + char name[DUMP_NAME_LEN]; + char stub_name[DUMP_NAME_LEN]; + efi_char16_t efi_name[DUMP_NAME_LEN]; + efi_guid_t vendor = LINUX_EFI_CRASH_GUID; + struct efivars *efivars = psi->data; + struct efivar_entry *entry, *found = NULL; + int i; + + sprintf(stub_name, "dump-type%u-%u-", type, part); + sprintf(name, "%s%lu", stub_name, get_seconds()); + + spin_lock(&efivars->lock); + + for (i = 0; i < DUMP_NAME_LEN; i++) + efi_name[i] = stub_name[i]; + + /* + * Clean up any entries with the same name + */ + + list_for_each_entry(entry, &efivars->list, list) { + get_var_data_locked(efivars, &entry->var); + + for (i = 0; i < DUMP_NAME_LEN; i++) { + if (efi_name[i] == 0) { + found = entry; + efivars->ops->set_variable(entry->var.VariableName, + &entry->var.VendorGuid, + EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS, + 0, NULL); + break; + } else if (efi_name[i] != entry->var.VariableName[i]) { + break; + } + } + } + + if (found) + list_del(&found->list); + + for (i = 0; i < DUMP_NAME_LEN; i++) + efi_name[i] = name[i]; + + efivars->ops->set_variable(efi_name, &vendor, + EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS, + size, psi->buf); + + spin_unlock(&efivars->lock); + + if (found) + efivar_unregister(found); + + if (size) + efivar_create_sysfs_entry(efivars, utf8_strsize(efi_name, DUMP_NAME_LEN * 2), + efi_name, &vendor); + + return part; +}; + +static int efi_pstore_erase(enum pstore_type_id type, u64 id, + struct pstore_info *psi) +{ + efi_pstore_write(type, id, 0, psi); + + return 0; +} +#else +static int efi_pstore_open(struct pstore_info *psi) +{ + return 0; +} + +static int efi_pstore_close(struct pstore_info *psi) +{ + return 0; +} + +static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, + struct timespec *time, struct pstore_info *psi) +{ + return -1; +} + +static u64 efi_pstore_write(enum pstore_type_id type, int part, size_t size, + struct pstore_info *psi) +{ + return 0; +} + +static int efi_pstore_erase(enum pstore_type_id type, u64 id, + struct pstore_info *psi) +{ + return 0; +} +#endif + +static struct pstore_info efi_pstore_info = { + .owner = THIS_MODULE, + .name = "efi", + .open = efi_pstore_open, + .close = efi_pstore_close, + .read = efi_pstore_read, + .write = efi_pstore_write, + .erase = efi_pstore_erase, +}; static ssize_t efivar_create(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, @@ -763,6 +940,16 @@ int register_efivars(struct efivars *efivars, if (error) unregister_efivars(efivars); + efivars->efi_pstore_info = efi_pstore_info; + + efivars->efi_pstore_info.buf = kmalloc(4096, GFP_KERNEL); + if (efivars->efi_pstore_info.buf) { + efivars->efi_pstore_info.bufsize = 1024; + efivars->efi_pstore_info.data = efivars; + mutex_init(&efivars->efi_pstore_info.buf_mutex); + pstore_register(&efivars->efi_pstore_info); + } + out: kfree(variable_name); diff --git a/include/linux/efi.h b/include/linux/efi.h index e376270..c1f5107 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -211,6 +212,9 @@ typedef efi_status_t efi_set_virtual_address_map_t (unsigned long memory_map_siz #define UV_SYSTEM_TABLE_GUID \ EFI_GUID( 0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 ) +#define LINUX_EFI_CRASH_GUID \ + EFI_GUID( 0xcfc8fc79, 0xbe2e, 0x4ddc, 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0 ) + typedef struct { efi_guid_t guid; unsigned long table; @@ -426,6 +430,8 @@ struct efivars { struct kset *kset; struct bin_attribute *new_var, *del_var; const struct efivar_operations *ops; + struct efivar_entry *walk_entry; + struct pstore_info efi_pstore_info; }; int register_efivars(struct efivars *efivars, -- cgit v1.1 From a2940908391f3cee72e38769b30e829b22742b5b Mon Sep 17 00:00:00 2001 From: Mike Waychison Date: Thu, 21 Jul 2011 16:57:57 -0400 Subject: efivars: String functions Fix the string functions in the efivars driver to be called utf16_* instead of utf8_* as the encoding is utf16, not utf8. As well, rename utf16_strlen to utf16_strnlen as it takes a maxlength argument and the name should be consistent with the standard C function names. utf16_strlen is still provided for convenience in a subsequent patch. Signed-off-by: Mike Waychison Signed-off-by: Tony Luck --- drivers/firmware/efivars.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 2bbb226..4202a31 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -144,23 +144,29 @@ efivar_create_sysfs_entry(struct efivars *efivars, /* Return the number of unicode characters in data */ static unsigned long -utf8_strlen(efi_char16_t *data, unsigned long maxlength) +utf16_strnlen(efi_char16_t *s, size_t maxlength) { unsigned long length = 0; - while (*data++ != 0 && length < maxlength) + while (*s++ != 0 && length < maxlength) length++; return length; } +static unsigned long +utf16_strlen(efi_char16_t *s) +{ + return utf16_strnlen(s, ~0UL); +} + /* * Return the number of bytes is the length of this string * Note: this is NOT the same as the number of unicode characters */ static inline unsigned long -utf8_strsize(efi_char16_t *data, unsigned long maxlength) +utf16_strsize(efi_char16_t *data, unsigned long maxlength) { - return utf8_strlen(data, maxlength/sizeof(efi_char16_t)) * sizeof(efi_char16_t); + return utf16_strnlen(data, maxlength/sizeof(efi_char16_t)) * sizeof(efi_char16_t); } static efi_status_t @@ -518,7 +524,9 @@ static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, efivar_unregister(found); if (size) - efivar_create_sysfs_entry(efivars, utf8_strsize(efi_name, DUMP_NAME_LEN * 2), + efivar_create_sysfs_entry(efivars, + utf16_strsize(efi_name, + DUMP_NAME_LEN * 2), efi_name, &vendor); return part; @@ -591,8 +599,8 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, * Does this variable already exist? */ list_for_each_entry_safe(search_efivar, n, &efivars->list, list) { - strsize1 = utf8_strsize(search_efivar->var.VariableName, 1024); - strsize2 = utf8_strsize(new_var->VariableName, 1024); + strsize1 = utf16_strsize(search_efivar->var.VariableName, 1024); + strsize2 = utf16_strsize(new_var->VariableName, 1024); if (strsize1 == strsize2 && !memcmp(&(search_efivar->var.VariableName), new_var->VariableName, strsize1) && @@ -624,8 +632,8 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, /* Create the entry in sysfs. Locking is not required here */ status = efivar_create_sysfs_entry(efivars, - utf8_strsize(new_var->VariableName, - 1024), + utf16_strsize(new_var->VariableName, + 1024), new_var->VariableName, &new_var->VendorGuid); if (status) { @@ -654,8 +662,8 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj, * Does this variable already exist? */ list_for_each_entry_safe(search_efivar, n, &efivars->list, list) { - strsize1 = utf8_strsize(search_efivar->var.VariableName, 1024); - strsize2 = utf8_strsize(del_var->VariableName, 1024); + strsize1 = utf16_strsize(search_efivar->var.VariableName, 1024); + strsize2 = utf16_strsize(del_var->VariableName, 1024); if (strsize1 == strsize2 && !memcmp(&(search_efivar->var.VariableName), del_var->VariableName, strsize1) && -- cgit v1.1 From 828aa1f00ec3508a4d813bd60d210de82929ac97 Mon Sep 17 00:00:00 2001 From: Mike Waychison Date: Thu, 21 Jul 2011 16:57:58 -0400 Subject: efivars: introduce utf16_strncmp Introduce utf16_strncmp which is used in the next patch. Semantics should be the same as the strncmp C function. Signed-off-by: Mike Waychison Signed-off-by: Tony Luck --- drivers/firmware/efivars.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 4202a31..15b9a01 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -169,6 +169,24 @@ utf16_strsize(efi_char16_t *data, unsigned long maxlength) return utf16_strnlen(data, maxlength/sizeof(efi_char16_t)) * sizeof(efi_char16_t); } +static inline int +utf16_strncmp(const efi_char16_t *a, const efi_char16_t *b, size_t len) +{ + while (1) { + if (len == 0) + return 0; + if (*a < *b) + return -1; + if (*a > *b) + return 1; + if (*a == 0) /* implies *b == 0 */ + return 0; + a++; + b++; + len--; + } +} + static efi_status_t get_var_data_locked(struct efivars *efivars, struct efi_variable *var) { -- cgit v1.1 From c475594d838c5c872e734f693a700df8c01b39d4 Mon Sep 17 00:00:00 2001 From: Mike Waychison Date: Thu, 21 Jul 2011 16:57:59 -0400 Subject: efivars: Use string functions in pstore_write Instead of open-coding the string operations for comparing the prefix of the variable names, use the provided utf16_* string functions. This patch also changes the calls to efi.set_variable to efivars->ops->set_variable so that the right function gets called in the case of gsmi (which doesn't have a valid efi structure). As well, make sure that we only consider variables with the right vendor string. Signed-off-by: Mike Waychison Signed-off-by: Tony Luck --- drivers/firmware/efivars.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 15b9a01..563492e 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -512,18 +512,20 @@ static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, list_for_each_entry(entry, &efivars->list, list) { get_var_data_locked(efivars, &entry->var); - for (i = 0; i < DUMP_NAME_LEN; i++) { - if (efi_name[i] == 0) { - found = entry; - efivars->ops->set_variable(entry->var.VariableName, - &entry->var.VendorGuid, - EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS, - 0, NULL); - break; - } else if (efi_name[i] != entry->var.VariableName[i]) { - break; - } - } + if (efi_guidcmp(entry->var.VendorGuid, vendor)) + continue; + if (utf16_strncmp(entry->var.VariableName, efi_name, + utf16_strlen(efi_name))) + continue; + /* Needs to be a prefix */ + if (entry->var.VariableName[utf16_strlen(efi_name)] == 0) + continue; + + /* found */ + found = entry; + efivars->ops->set_variable(entry->var.VariableName, &entry->var.VendorGuid, + EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS, + 0, NULL); } if (found) -- cgit v1.1 From 7644c16c7e7431fa398e834109dbb76dc1b51617 Mon Sep 17 00:00:00 2001 From: Mike Waychison Date: Thu, 21 Jul 2011 16:58:00 -0400 Subject: efivars: Introduce PSTORE_EFI_ATTRIBUTES Consolidate the attributes listed for pstore operations in one place, PSTORE_EFI_ATTRIBUTES. Signed-off-by: Mike Waychison Signed-off-by: Tony Luck --- drivers/firmware/efivars.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 563492e..eacb05e 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -122,6 +122,10 @@ struct efivar_attribute { ssize_t (*store)(struct efivar_entry *entry, const char *buf, size_t count); }; +#define PSTORE_EFI_ATTRIBUTES \ + (EFI_VARIABLE_NON_VOLATILE | \ + EFI_VARIABLE_BOOTSERVICE_ACCESS | \ + EFI_VARIABLE_RUNTIME_ACCESS) #define EFIVAR_ATTR(_name, _mode, _show, _store) \ struct efivar_attribute efivar_attr_##_name = { \ @@ -523,8 +527,9 @@ static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, /* found */ found = entry; - efivars->ops->set_variable(entry->var.VariableName, &entry->var.VendorGuid, - EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS, + efivars->ops->set_variable(entry->var.VariableName, + &entry->var.VendorGuid, + PSTORE_EFI_ATTRIBUTES, 0, NULL); } @@ -534,8 +539,7 @@ static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, for (i = 0; i < DUMP_NAME_LEN; i++) efi_name[i] = name[i]; - efivars->ops->set_variable(efi_name, &vendor, - EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS, + efivars->ops->set_variable(efi_name, &vendor, PSTORE_EFI_ATTRIBUTES, size, psi->buf); spin_unlock(&efivars->lock); -- cgit v1.1 From 529da704ad1ead755d9e40721f29446cb278e099 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sat, 23 Jul 2011 16:07:26 -0400 Subject: ext4: remove unnecessary ext4_get_group_info in ext4_mb_load_buddy ext4_mb_load_buddy() calls ext4_get_group_info() for setting both "grp" and "e4b->bd_info", but it could do "e4b->bd_info = grp". Reported-by: Andreas Dilger Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 037f680..447c0f3 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1126,7 +1126,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, grp = ext4_get_group_info(sb, group); e4b->bd_blkbits = sb->s_blocksize_bits; - e4b->bd_info = ext4_get_group_info(sb, group); + e4b->bd_info = grp; e4b->bd_sb = sb; e4b->bd_group = group; e4b->bd_buddy_page = NULL; -- cgit v1.1 From ced156e464e49b6b4153ede9aaa04d9a4ad24e0c Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sat, 23 Jul 2011 16:18:05 -0400 Subject: ext4: don't increment s_mb_buddies_generated in ext4_mb_release In ext4_mb_release, we use s_mb_buddies_generated++. Although the output is OK, but I don't think we need this extra ++. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 447c0f3..e165830 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2581,7 +2581,7 @@ int ext4_mb_release(struct super_block *sb) atomic_read(&sbi->s_mb_lost_chunks)); printk(KERN_INFO "EXT4-fs: mballoc: %lu generated and it took %Lu\n", - sbi->s_mb_buddies_generated++, + sbi->s_mb_buddies_generated, sbi->s_mb_generation_time); printk(KERN_INFO "EXT4-fs: mballoc: %u preallocated, %u discarded\n", -- cgit v1.1 From 6a0fe49308dcac10ab510b3c45e00eb8d5ef440e Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sat, 23 Jul 2011 16:18:55 -0400 Subject: ext4: remove ac_repeats from ext4_allocation_context ac_repeats isn't referenced in the mballoc code. So remove it. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 20b5e7b..9d4a636 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -187,7 +187,6 @@ struct ext4_allocation_context { __u16 ac_flags; /* allocation hints */ __u8 ac_status; __u8 ac_criteria; - __u8 ac_repeats; __u8 ac_2order; /* if request is to allocate 2^N blocks and * N > 0, the field stores N, otherwise 0 */ __u8 ac_op; /* operation, for history only */ -- cgit v1.1 From 5718789da5b94bd4148cb7ea0f457089c26bc1c3 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Sat, 23 Jul 2011 21:49:07 -0400 Subject: ext4: remove unused argument in ext4_ext_next_leaf_block The argument "inode" in function ext4_ext_next_allocated_block looks useless, so clean it. Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 9bec432..33bbe84 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1414,8 +1414,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path) * ext4_ext_next_leaf_block: * returns first allocated block from next leaf or EXT_MAX_BLOCKS */ -static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, - struct ext4_ext_path *path) +static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path) { int depth; @@ -1734,7 +1733,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, fex = EXT_LAST_EXTENT(eh); next = EXT_MAX_BLOCKS; if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) - next = ext4_ext_next_leaf_block(inode, path); + next = ext4_ext_next_leaf_block(path); if (next != EXT_MAX_BLOCKS) { ext_debug("next leaf block - %d\n", next); BUG_ON(npath != NULL); -- cgit v1.1 From 0737964bc98202776f4d10bc8e108f45b3115037 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Sat, 23 Jul 2011 21:51:07 -0400 Subject: ext4: correct the debug message in ext4_ext_insert_extent The debug message in ext4_ext_insert_extent before moving extent is incorrect (the "from xx to xx"). Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 33bbe84..a637d83 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1806,7 +1806,7 @@ has_space: ext4_ext_pblock(newext), ext4_ext_is_uninitialized(newext), ext4_ext_get_actual_len(newext), - nearex, len, nearex + 1, nearex + 2); + nearex, len, nearex, nearex + 1); memmove(nearex + 1, nearex, len); path[depth].p_ext = nearex; } -- cgit v1.1 From b7ca1e8ec53259359db5313f923a0a20fa04bdb6 Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Sat, 23 Jul 2011 21:53:25 -0400 Subject: ext4: correct comment for ext4_ext_check_cache The comment for ext4_ext_check_cache has a litte mistake. Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a637d83..4d73e11 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2019,7 +2019,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, } /* - * ext4_ext_in_cache() + * ext4_ext_check_cache() * Checks to see if the given block is in the cache. * If it is, the cached extent is stored in the given * cache extent pointer. If the cached extent is a hole, -- cgit v1.1 From 0b052f4a088ddc47a5da23dd733522241314cfb4 Mon Sep 17 00:00:00 2001 From: Tomoya MORINAGA Date: Thu, 14 Jul 2011 09:52:38 +0900 Subject: pch_dma: Fix CTL register access issue Currently, Mode-Control register is accessed by read-modify-write. According to DMA hardware specifications datasheet, prohibits this method. Because this register resets to 0 by DMA HW after DMA transfer completes. Thus, current read-modify-write processing can cause unexpected behavior. The datasheet says in case of writing Mode-Control register, set the value for only target channel, the others must set '11b'. e.g. Set DMA0=01b DMA11=10b CTL0=33333331h CTL2=00002333h NOTE: CTL0 includes DMA0~7 Mode-Control register. CTL2 includes DMA8~11 Mode-Control register. This patch modifies the issue. Signed-off-by: Tomoya MORINAGA Signed-off-by: Vinod Koul --- drivers/dma/pch_dma.c | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index d9d95a4..1ac8d4b 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -62,6 +62,9 @@ #define MAX_CHAN_NR 8 +#define DMA_MASK_CTL0_MODE 0x33333333 +#define DMA_MASK_CTL2_MODE 0x00003333 + static unsigned int init_nr_desc_per_channel = 64; module_param(init_nr_desc_per_channel, uint, 0644); MODULE_PARM_DESC(init_nr_desc_per_channel, @@ -210,10 +213,17 @@ static void pdc_set_dir(struct dma_chan *chan) struct pch_dma_chan *pd_chan = to_pd_chan(chan); struct pch_dma *pd = to_pd(chan->device); u32 val; + u32 mask_mode; + u32 mask_ctl; if (chan->chan_id < 8) { val = dma_readl(pd, CTL0); + mask_mode = DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * chan->chan_id); + mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * chan->chan_id)); + val &= mask_mode; if (pd_chan->dir == DMA_TO_DEVICE) val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + DMA_CTL0_DIR_SHIFT_BITS); @@ -221,18 +231,24 @@ static void pdc_set_dir(struct dma_chan *chan) val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + DMA_CTL0_DIR_SHIFT_BITS)); + val |= mask_ctl; dma_writel(pd, CTL0, val); } else { int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */ val = dma_readl(pd, CTL3); + mask_mode = DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * ch); + mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * ch)); + val &= mask_mode; if (pd_chan->dir == DMA_TO_DEVICE) val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch + DMA_CTL0_DIR_SHIFT_BITS); else val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * ch + DMA_CTL0_DIR_SHIFT_BITS)); - + val |= mask_ctl; dma_writel(pd, CTL3, val); } @@ -244,26 +260,30 @@ static void pdc_set_mode(struct dma_chan *chan, u32 mode) { struct pch_dma *pd = to_pd(chan->device); u32 val; + u32 mask_ctl; + u32 mask_dir; if (chan->chan_id < 8) { + mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * chan->chan_id)); + mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +\ + DMA_CTL0_DIR_SHIFT_BITS); val = dma_readl(pd, CTL0); - - val &= ~(DMA_CTL0_MODE_MASK_BITS << - (DMA_CTL0_BITS_PER_CH * chan->chan_id)); + val &= mask_dir; val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id); - + val |= mask_ctl; dma_writel(pd, CTL0, val); } else { int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */ - + mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * ch)); + mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * ch +\ + DMA_CTL0_DIR_SHIFT_BITS); val = dma_readl(pd, CTL3); - - val &= ~(DMA_CTL0_MODE_MASK_BITS << - (DMA_CTL0_BITS_PER_CH * ch)); + val &= mask_dir; val |= mode << (DMA_CTL0_BITS_PER_CH * ch); - + val |= mask_ctl; dma_writel(pd, CTL3, val); - } dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n", -- cgit v1.1 From 1c1d9547536480626c1be1fb062b81663fb2b88e Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 12 Jul 2011 21:00:13 +0800 Subject: dmaengine: imx-sdma: return proper error if kzalloc fails Signed-off-by: Axel Lin Acked-by: Sascha Hauer Signed-off-by: Vinod Koul --- drivers/dma/imx-sdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index b6d1455..ec53980 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1281,8 +1281,10 @@ static int __init sdma_probe(struct platform_device *pdev) goto err_request_irq; sdma->script_addrs = kzalloc(sizeof(*sdma->script_addrs), GFP_KERNEL); - if (!sdma->script_addrs) + if (!sdma->script_addrs) { + ret = -ENOMEM; goto err_alloc; + } sdma->version = pdata->sdma_version; -- cgit v1.1 From 97cdd7101079adc3c626d159c62d43de949516c8 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Tue, 12 Jul 2011 17:38:00 -0400 Subject: drm/i915/dp: Zero the DPCD data before connection probe Signed-off-by: Adam Jackson Reviewed-by: Keith Packard Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_dp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index e2aced6..de24f31 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1673,6 +1673,7 @@ intel_dp_detect(struct drm_connector *connector, bool force) struct edid *edid = NULL; intel_dp->has_audio = false; + memset(intel_dp->dpcd, 0, sizeof(intel_dp->dpcd)); if (HAS_PCH_SPLIT(dev)) status = ironlake_dp_detect(intel_dp); -- cgit v1.1 From 1b9be9d09d85b3697418dc444db30d069203ff7d Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Tue, 12 Jul 2011 17:38:01 -0400 Subject: drm/i915/dp: Move DPCD dump to common code instead of PCH-only No reason not to see this on g4x, after all. Signed-off-by: Adam Jackson Reviewed-by: Keith Packard Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_dp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index de24f31..0be85a0 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1615,8 +1615,6 @@ ironlake_dp_detect(struct intel_dp *intel_dp) sizeof (intel_dp->dpcd)); if (ret && intel_dp->dpcd[DP_DPCD_REV] != 0) status = connector_status_connected; - DRM_DEBUG_KMS("DPCD: %hx%hx%hx%hx\n", intel_dp->dpcd[0], - intel_dp->dpcd[1], intel_dp->dpcd[2], intel_dp->dpcd[3]); return status; } @@ -1679,6 +1677,10 @@ intel_dp_detect(struct drm_connector *connector, bool force) status = ironlake_dp_detect(intel_dp); else status = g4x_dp_detect(intel_dp); + + DRM_DEBUG_KMS("DPCD: %hx%hx%hx%hx\n", intel_dp->dpcd[0], + intel_dp->dpcd[1], intel_dp->dpcd[2], intel_dp->dpcd[3]); + if (status != connector_status_connected) return status; -- cgit v1.1 From 9de88e6e89a2222061af8e1448f6f346e3413fc8 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Tue, 12 Jul 2011 17:38:02 -0400 Subject: drm/i915/dp: Read more DPCD registers on connection probe For parity with radeon and nouveau, and also because I suspect we're going to need it to get format-conversion dongles right. Signed-off-by: Adam Jackson Reviewed-by: Keith Packard Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_dp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0be85a0..2f0566b 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -52,7 +52,7 @@ struct intel_dp { uint32_t color_range; uint8_t link_bw; uint8_t lane_count; - uint8_t dpcd[4]; + uint8_t dpcd[8]; struct i2c_adapter adapter; struct i2c_algo_dp_aux_data algo; bool is_pch_edp; @@ -1678,8 +1678,10 @@ intel_dp_detect(struct drm_connector *connector, bool force) else status = g4x_dp_detect(intel_dp); - DRM_DEBUG_KMS("DPCD: %hx%hx%hx%hx\n", intel_dp->dpcd[0], - intel_dp->dpcd[1], intel_dp->dpcd[2], intel_dp->dpcd[3]); + DRM_DEBUG_KMS("DPCD: %hx%hx%hx%hx%hx%hx%hx%hx\n", intel_dp->dpcd[0], + intel_dp->dpcd[1], intel_dp->dpcd[2], intel_dp->dpcd[3], + intel_dp->dpcd[4], intel_dp->dpcd[5], intel_dp->dpcd[6], + intel_dp->dpcd[7]); if (status != connector_status_connected) return status; -- cgit v1.1 From ac66ae8346fff704301e24ac55da1d76020660b2 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Tue, 12 Jul 2011 17:38:03 -0400 Subject: drm/i915/dp: Better hexdump of DPCD %hx alone prints 0 as "0", not "00". Signed-off-by: Adam Jackson Reviewed-by: Keith Packard Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_dp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 2f0566b..13bddd3 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1678,10 +1678,10 @@ intel_dp_detect(struct drm_connector *connector, bool force) else status = g4x_dp_detect(intel_dp); - DRM_DEBUG_KMS("DPCD: %hx%hx%hx%hx%hx%hx%hx%hx\n", intel_dp->dpcd[0], - intel_dp->dpcd[1], intel_dp->dpcd[2], intel_dp->dpcd[3], - intel_dp->dpcd[4], intel_dp->dpcd[5], intel_dp->dpcd[6], - intel_dp->dpcd[7]); + DRM_DEBUG_KMS("DPCD: %02hx%02hx%02hx%02hx%02hx%02hx%02hx%02hx\n", + intel_dp->dpcd[0], intel_dp->dpcd[1], intel_dp->dpcd[2], + intel_dp->dpcd[3], intel_dp->dpcd[4], intel_dp->dpcd[5], + intel_dp->dpcd[6], intel_dp->dpcd[7]); if (status != connector_status_connected) return status; -- cgit v1.1 From 71ba9000e673d6171a52f2a8b14e0419087f7199 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Tue, 12 Jul 2011 17:38:04 -0400 Subject: drm/i915/dp: Retry DPCD fetch on G4X too Signed-off-by: Adam Jackson Reviewed-by: Keith Packard Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_dp.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 13bddd3..9a0c3ca 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1596,10 +1596,22 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) } static enum drm_connector_status +i915_dp_detect_common(struct intel_dp *intel_dp) +{ + enum drm_connector_status status = connector_status_disconnected; + + if (intel_dp_aux_native_read_retry(intel_dp, 0x000, intel_dp->dpcd, + sizeof (intel_dp->dpcd)) && + (intel_dp->dpcd[DP_DPCD_REV] != 0)) + status = connector_status_connected; + + return status; +} + +static enum drm_connector_status ironlake_dp_detect(struct intel_dp *intel_dp) { enum drm_connector_status status; - bool ret; /* Can't disconnect eDP, but you can close the lid... */ if (is_edp(intel_dp)) { @@ -1609,13 +1621,7 @@ ironlake_dp_detect(struct intel_dp *intel_dp) return status; } - status = connector_status_disconnected; - ret = intel_dp_aux_native_read_retry(intel_dp, - 0x000, intel_dp->dpcd, - sizeof (intel_dp->dpcd)); - if (ret && intel_dp->dpcd[DP_DPCD_REV] != 0) - status = connector_status_connected; - return status; + return i915_dp_detect_common(intel_dp); } static enum drm_connector_status @@ -1623,7 +1629,6 @@ g4x_dp_detect(struct intel_dp *intel_dp) { struct drm_device *dev = intel_dp->base.base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - enum drm_connector_status status; uint32_t temp, bit; switch (intel_dp->output_reg) { @@ -1645,15 +1650,7 @@ g4x_dp_detect(struct intel_dp *intel_dp) if ((temp & bit) == 0) return connector_status_disconnected; - status = connector_status_disconnected; - if (intel_dp_aux_native_read(intel_dp, 0x000, intel_dp->dpcd, - sizeof (intel_dp->dpcd)) == sizeof (intel_dp->dpcd)) - { - if (intel_dp->dpcd[DP_DPCD_REV] != 0) - status = connector_status_connected; - } - - return status; + return i915_dp_detect_common(intel_dp); } /** -- cgit v1.1 From a2cab1b24a4ea75a68fa21bfb7d5b1a45121583c Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Tue, 12 Jul 2011 17:38:05 -0400 Subject: drm/i915/dp: Explicitly request 8/10 channel coding It's not clear what a sink would do if you wrote zero to this register - which I guess would mean "I don't support any channel encodings, good luck" - but let's not find out. Signed-off-by: Adam Jackson Reviewed-by: Keith Packard Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_dp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 9a0c3ca..1c3a36f 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -769,6 +769,7 @@ intel_dp_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, memset(intel_dp->link_configuration, 0, DP_LINK_CONFIGURATION_SIZE); intel_dp->link_configuration[0] = intel_dp->link_bw; intel_dp->link_configuration[1] = intel_dp->lane_count; + intel_dp->link_configuration[8] = DP_SET_ANSI_8B10B; /* * Check for DPCD version > 1.1 and enhanced framing support -- cgit v1.1 From 92b8e897f6e7ba4aa10037ebd8186f85d39330d0 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 12 Jul 2011 10:57:59 -0700 Subject: btrfs: Don't BUG_ON alloc_path errors in find_next_chunk I also removed the BUG_ON from error return of find_next_chunk in init_first_rw_device(). It turns out that the only caller of init_first_rw_device() also BUGS on any nonzero return so no actual behavior change has occurred here. do_chunk_alloc() also needed an update since it calls btrfs_alloc_chunk() which can now return -ENOMEM. Instead of setting space_info->full on any error from btrfs_alloc_chunk() I catch and return every error value _except_ -ENOSPC. Thanks goes to Tsutomu Itoh for pointing that issue out. Signed-off-by: Mark Fasheh --- fs/btrfs/extent-tree.c | 4 ++++ fs/btrfs/volumes.c | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index aa91773..f6af423 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3277,6 +3277,9 @@ again: } ret = btrfs_alloc_chunk(trans, extent_root, flags); + if (ret < 0 && ret != -ENOSPC) + goto out; + spin_lock(&space_info->lock); if (ret) space_info->full = 1; @@ -3286,6 +3289,7 @@ again: space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; space_info->chunk_alloc = 0; spin_unlock(&space_info->lock); +out: mutex_unlock(&extent_root->fs_info->chunk_mutex); return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 530a2fc..90d956c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1037,7 +1037,8 @@ static noinline int find_next_chunk(struct btrfs_root *root, struct btrfs_key found_key; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; key.objectid = objectid; key.offset = (u64)-1; @@ -2663,7 +2664,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, ret = find_next_chunk(fs_info->chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset); - BUG_ON(ret); + if (ret) + return ret; alloc_profile = BTRFS_BLOCK_GROUP_METADATA | (fs_info->metadata_alloc_profile & -- cgit v1.1 From 38a1a919535742af677303271eb4ff731547b706 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 13 Jul 2011 10:59:59 -0700 Subject: btrfs: don't BUG_ON allocation errors in btrfs_drop_snapshot In addition to properly handling allocation failure from btrfs_alloc_path, I also fixed up the kzalloc error handling code immediately below it. Signed-off-by: Mark Fasheh --- fs/btrfs/extent-tree.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f6af423..6bce721 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6272,10 +6272,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int level; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; wc = kzalloc(sizeof(*wc), GFP_NOFS); - BUG_ON(!wc); + if (!wc) { + btrfs_free_path(path); + return -ENOMEM; + } trans = btrfs_start_transaction(tree_root, 0); BUG_ON(IS_ERR(trans)); -- cgit v1.1 From a65e34c79c88895766ab1f8a5afa451eed26622b Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 25 Jul 2011 10:04:56 -0700 Subject: drm/i915: Hold mode_config->mutex during hotplug processing Hotplug detection is a mode setting operation and must hold the struct_mutex or risk colliding with other mode setting operations. In particular, the display port hotplug function attempts to re-train the link if the monitor is supposed to be running when plugged back in. If that happens while mode setting is underway, the link will get scrambled, leaving it in an inconsistent state. This is a special case -- usually the driver mode setting entry points are covered by the upper level DRM code, but in this case the function is invoked as a work function not under the control of DRM. Signed-off-by: Keith Packard Cc: stable@kernel.org Reviewed-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_irq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 3b03f85..9da2a2c 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -306,6 +306,7 @@ static void i915_hotplug_work_func(struct work_struct *work) struct drm_mode_config *mode_config = &dev->mode_config; struct intel_encoder *encoder; + mutex_lock(&mode_config->mutex); DRM_DEBUG_KMS("running encoder hotplug functions\n"); list_for_each_entry(encoder, &mode_config->encoder_list, base.head) @@ -314,6 +315,8 @@ static void i915_hotplug_work_func(struct work_struct *work) /* Just fire off a uevent and let userspace tell us what to do */ drm_helper_hpd_irq_event(dev); + + mutex_unlock(&mode_config->mutex); } static void i915_handle_rps_change(struct drm_device *dev) -- cgit v1.1 From 302983e9059e9ef5de3ca7671918eeb237c5971e Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Wed, 13 Jul 2011 16:32:32 -0400 Subject: drm/i915/pch: Fix integer math bugs in panel fitting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consider a 1600x900 panel, upscaling a 1360x768 mode, full-aspect. The old math would give you: scaled_width = 1600 * 768; /* 1228800 */ scaled_height = 1360 * 900; /* 1224000 */ if (scaled_width > scaled_height) { /* pillarbox, and true */ width = 1224000 / 768; /* int(1593.75) = 1593 */ x = (1600 - 1593 + 1) / 2; /* 4 */ y = 0; height = 768; } /* ... */ This is broken. The total width of scanout would then be 1593 + 4 + 4, or 1601, which is wider than the panel itself. The hardware very dutifully implements this, and you end up with a black 45° diagonal from the top-left corner to the bottom edge of the screen. It's a cool effect and all, but not what you wanted. Similar things happen for the letterbox case. The problem is that you have an integer number of pixels, which means it's usually impossible to upscale equally on both axes. 1360/768 is 1.7708, 1600/900 is 1.7777. Since we're constrained on the one axis, the other one wants to come out as an even number of pixels (the panel is almost certainly even on both axes, and the x/y offsets will be applied on both sides). In the math above, if 'width' comes out even, rounding down is correct; if it's odd, you'd rather round up. So just increment width/height in those cases. Tested on a Lenovo T500 (Ironlake). Signed-off-by: Adam Jackson Tested-By: Daniel Manrique Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38851 Reviewed-by: Chris Wilson Cc: stable@kernel.org Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_panel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index a06ff07..05f500c 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -83,11 +83,15 @@ intel_pch_panel_fitting(struct drm_device *dev, u32 scaled_height = mode->hdisplay * adjusted_mode->vdisplay; if (scaled_width > scaled_height) { /* pillar */ width = scaled_height / mode->vdisplay; + if (width & 1) + width++; x = (adjusted_mode->hdisplay - width + 1) / 2; y = 0; height = adjusted_mode->vdisplay; } else if (scaled_width < scaled_height) { /* letter */ height = scaled_width / mode->hdisplay; + if (height & 1) + height++; y = (adjusted_mode->vdisplay - height + 1) / 2; x = 0; width = adjusted_mode->hdisplay; -- cgit v1.1 From 81055854d096959898fdc17ed11729eb019eff07 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Thu, 21 Jul 2011 17:48:37 -0400 Subject: drm/i915/dp: Explicitly disable symbol scrambling while training The DP spec says training patterns 1 and 2 are to be sent non-scrambled, and the GPU docs claim that happens (or at least, there's no explicit scrambling control). But the sink may be confused if we don't explicitly tell it what we're doing, so play it safe. Signed-off-by: Adam Jackson Reviewed-by: Jesse Barnes Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_dp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 1c3a36f..8aecb07 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1370,7 +1370,8 @@ intel_dp_start_link_train(struct intel_dp *intel_dp) reg = DP | DP_LINK_TRAIN_PAT_1; if (!intel_dp_set_link_train(intel_dp, reg, - DP_TRAINING_PATTERN_1)) + DP_TRAINING_PATTERN_1 | + DP_LINK_SCRAMBLING_DISABLE)) break; /* Set training pattern 1 */ @@ -1445,7 +1446,8 @@ intel_dp_complete_link_train(struct intel_dp *intel_dp) /* channel eq pattern */ if (!intel_dp_set_link_train(intel_dp, reg, - DP_TRAINING_PATTERN_2)) + DP_TRAINING_PATTERN_2 | + DP_LINK_SCRAMBLING_DISABLE)) break; udelay(400); -- cgit v1.1 From e85194641bec56179dcf5e1704ce5c6bf30340c6 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Thu, 21 Jul 2011 17:48:38 -0400 Subject: drm/i915/dp: Don't turn CPT DP ports on too early The docs say the port has to come on in training pattern 1; at this point, though, ->DP is in normal mode. The intent here is to wait until the port is in fact sending data, but that doesn't happen since we've broken the sequence the hardware expects, and the vblank wait will time out and kvetch in the log. Signed-off-by: Adam Jackson Reviewed-by: Jesse Barnes Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_dp.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 8aecb07..dcc7ae6 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1334,10 +1334,16 @@ intel_dp_start_link_train(struct intel_dp *intel_dp) u32 reg; uint32_t DP = intel_dp->DP; - /* Enable output, wait for it to become active */ - I915_WRITE(intel_dp->output_reg, intel_dp->DP); - POSTING_READ(intel_dp->output_reg); - intel_wait_for_vblank(dev, intel_crtc->pipe); + /* + * On CPT we have to enable the port in training pattern 1, which + * will happen below in intel_dp_set_link_train. Otherwise, enable + * the port and wait for it to become active. + */ + if (!HAS_PCH_CPT(dev)) { + I915_WRITE(intel_dp->output_reg, intel_dp->DP); + POSTING_READ(intel_dp->output_reg); + intel_wait_for_vblank(dev, intel_crtc->pipe); + } /* Write the link configuration data */ intel_dp_aux_native_write(intel_dp, DP_LINK_BW_SET, -- cgit v1.1 From 4e0e6109a1cc18cc5e4143f828c36b6a3e8be6ad Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 25 Jul 2011 16:05:04 -0500 Subject: dmaengine: pl330: make platform data optional The pl330 needs platform data for describing peripheral connections, but some platforms may only support memory to memory dma channels. In this case, we can probe for how many channels there are and don't need the platform data. As memcpy requests don't need channel private data to hold peripheral info, allow private data to be NULL in this case. Signed-off-by: Rob Herring Cc: Jassi Brar Cc: Vinod Koul Cc: Dan Williams Acked-by: Jassi Brar Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 64 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 6abe1ec..00eee59 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -82,7 +82,7 @@ struct dma_pl330_dmac { spinlock_t pool_lock; /* Peripheral channels connected to this DMAC */ - struct dma_pl330_chan peripherals[0]; /* keep at end */ + struct dma_pl330_chan *peripherals; /* keep at end */ }; struct dma_pl330_desc { @@ -451,8 +451,13 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch) desc->txd.cookie = 0; async_tx_ack(&desc->txd); - desc->req.rqtype = peri->rqtype; - desc->req.peri = peri->peri_id; + if (peri) { + desc->req.rqtype = peri->rqtype; + desc->req.peri = peri->peri_id; + } else { + desc->req.rqtype = MEMTOMEM; + desc->req.peri = 0; + } dma_async_tx_descriptor_init(&desc->txd, &pch->chan); @@ -529,10 +534,10 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, struct pl330_info *pi; int burst; - if (unlikely(!pch || !len || !peri)) + if (unlikely(!pch || !len)) return NULL; - if (peri->rqtype != MEMTOMEM) + if (peri && peri->rqtype != MEMTOMEM) return NULL; pi = &pch->dmac->pif; @@ -577,7 +582,7 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, int i, burst_size; dma_addr_t addr; - if (unlikely(!pch || !sgl || !sg_len)) + if (unlikely(!pch || !sgl || !sg_len || !peri)) return NULL; /* Make sure the direction is consistent */ @@ -666,17 +671,12 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) struct dma_device *pd; struct resource *res; int i, ret, irq; + int num_chan; pdat = adev->dev.platform_data; - if (!pdat || !pdat->nr_valid_peri) { - dev_err(&adev->dev, "platform data missing\n"); - return -ENODEV; - } - /* Allocate a new DMAC and its Channels */ - pdmac = kzalloc(pdat->nr_valid_peri * sizeof(*pch) - + sizeof(*pdmac), GFP_KERNEL); + pdmac = kzalloc(sizeof(*pdmac), GFP_KERNEL); if (!pdmac) { dev_err(&adev->dev, "unable to allocate mem\n"); return -ENOMEM; @@ -685,7 +685,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) pi = &pdmac->pif; pi->dev = &adev->dev; pi->pl330_data = NULL; - pi->mcbufsz = pdat->mcbuf_sz; + pi->mcbufsz = pdat ? pdat->mcbuf_sz : 0; res = &adev->res; request_mem_region(res->start, resource_size(res), "dma-pl330"); @@ -717,27 +717,35 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) INIT_LIST_HEAD(&pd->channels); /* Initialize channel parameters */ - for (i = 0; i < pdat->nr_valid_peri; i++) { - struct dma_pl330_peri *peri = &pdat->peri[i]; - pch = &pdmac->peripherals[i]; + num_chan = max(pdat ? pdat->nr_valid_peri : 0, (u8)pi->pcfg.num_chan); + pdmac->peripherals = kzalloc(num_chan * sizeof(*pch), GFP_KERNEL); - switch (peri->rqtype) { - case MEMTOMEM: + for (i = 0; i < num_chan; i++) { + pch = &pdmac->peripherals[i]; + if (pdat) { + struct dma_pl330_peri *peri = &pdat->peri[i]; + + switch (peri->rqtype) { + case MEMTOMEM: + dma_cap_set(DMA_MEMCPY, pd->cap_mask); + break; + case MEMTODEV: + case DEVTOMEM: + dma_cap_set(DMA_SLAVE, pd->cap_mask); + break; + default: + dev_err(&adev->dev, "DEVTODEV Not Supported\n"); + continue; + } + pch->chan.private = peri; + } else { dma_cap_set(DMA_MEMCPY, pd->cap_mask); - break; - case MEMTODEV: - case DEVTOMEM: - dma_cap_set(DMA_SLAVE, pd->cap_mask); - break; - default: - dev_err(&adev->dev, "DEVTODEV Not Supported\n"); - continue; + pch->chan.private = NULL; } INIT_LIST_HEAD(&pch->work_list); spin_lock_init(&pch->lock); pch->pl330_chid = NULL; - pch->chan.private = peri; pch->chan.device = pd; pch->chan.chan_id = i; pch->dmac = pdmac; -- cgit v1.1 From 2a9778ed83b142e88cb38acc496a573a3472d27f Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 12 Jul 2011 18:53:52 +0800 Subject: dma: mxs-dma: fix unterminated platform_device_id table Signed-off-by: Axel Lin Signed-off-by: Vinod Koul --- drivers/dma/mxs-dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c index 2870d91..f22a237 100644 --- a/drivers/dma/mxs-dma.c +++ b/drivers/dma/mxs-dma.c @@ -709,6 +709,8 @@ static struct platform_device_id mxs_dma_type[] = { }, { .name = "mxs-dma-apbx", .driver_data = MXS_DMA_APBX, + }, { + /* end of list */ } }; -- cgit v1.1 From add56ba711627c223e9e356d9398642abf7fa32d Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 19 Jul 2011 14:48:17 +0800 Subject: dma: intel_mid_dma: remove redundant pci_set_drvdata calls Call pci_set_drvdata() once in intel_mid_dma_probe() is enough. Remove redundant pci_set_drvdata() calls in dma_suspend() and dma_resume(). Signed-off-by: Axel Lin Signed-off-by: Vinod Koul --- drivers/dma/intel_mid_dma.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c index f653517..8a3fdd8 100644 --- a/drivers/dma/intel_mid_dma.c +++ b/drivers/dma/intel_mid_dma.c @@ -1351,7 +1351,6 @@ int dma_suspend(struct pci_dev *pci, pm_message_t state) return -EAGAIN; } device->state = SUSPENDED; - pci_set_drvdata(pci, device); pci_save_state(pci); pci_disable_device(pci); pci_set_power_state(pci, PCI_D3hot); @@ -1380,7 +1379,6 @@ int dma_resume(struct pci_dev *pci) } device->state = RUNNING; iowrite32(REG_BIT0, device->dma_base + DMA_CFG); - pci_set_drvdata(pci, device); return 0; } -- cgit v1.1 From a62bae98a93e6c4d53b1e6c20715e94b4a5aca3c Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Tue, 19 Jul 2011 12:09:56 +0800 Subject: ARM: mxs-dma: reset after disable channel We met some channels in abnormal state after disable. Reset it to get a clean state. Signed-off-by: Dong Aisheng Cc: Vinod Koul Cc: Shawn Guo Signed-off-by: Vinod Koul --- drivers/dma/mxs-dma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c index f22a237..be641cb 100644 --- a/drivers/dma/mxs-dma.c +++ b/drivers/dma/mxs-dma.c @@ -537,6 +537,7 @@ static int mxs_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, switch (cmd) { case DMA_TERMINATE_ALL: mxs_dma_disable_chan(mxs_chan); + mxs_dma_reset_chan(mxs_chan); break; case DMA_PAUSE: mxs_dma_pause_chan(mxs_chan); -- cgit v1.1 From f44bd191404841e44a914b2760a16ad328f406a8 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:11:26 +0100 Subject: DMA: PL08x: remove unused constants PL08X_WQ_PERIODMIN and PL08X_MAX_ALLOCS are not used, remove them. Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index e6d7228..90db51f 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -156,14 +156,10 @@ struct pl08x_driver_data { #define PL08X_BOUNDARY_SHIFT (10) /* 1KB 0x400 */ #define PL08X_BOUNDARY_SIZE (1 << PL08X_BOUNDARY_SHIFT) -/* Minimum period between work queue runs */ -#define PL08X_WQ_PERIODMIN 20 - /* Size (bytes) of each LLI buffer allocated for one transfer */ # define PL08X_LLI_TSFR_SIZE 0x2000 /* Maximum times we call dma_pool_alloc on this pool without freeing */ -#define PL08X_MAX_ALLOCS 0x40 #define MAX_NUM_TSFR_LLIS (PL08X_LLI_TSFR_SIZE/sizeof(struct pl08x_lli)) #define PL08X_ALIGN 8 -- cgit v1.1 From 25c94f7fcf70d94e12401b9c957ddf1d303061a3 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:11:46 +0100 Subject: DMA: PL08x: select LLI bus only once per LLI setup Avoid re-selecting the LLI bus each time we create an LLI. Move it out of the LLI setup loops. Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 90db51f..6808f7d 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -491,10 +491,10 @@ static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth, struct pl08x_lli_build_data { struct pl08x_txd *txd; - struct pl08x_driver_data *pl08x; struct pl08x_bus_data srcbus; struct pl08x_bus_data dstbus; size_t remainder; + u32 lli_bus; }; /* @@ -547,8 +547,7 @@ static void pl08x_fill_lli_for_desc(struct pl08x_lli_build_data *bd, llis_va[num_llis].src = bd->srcbus.addr; llis_va[num_llis].dst = bd->dstbus.addr; llis_va[num_llis].lli = llis_bus + (num_llis + 1) * sizeof(struct pl08x_lli); - if (bd->pl08x->lli_buses & PL08X_AHB2) - llis_va[num_llis].lli |= PL080_LLI_LM_AHB2; + llis_va[num_llis].lli |= bd->lli_bus; if (cctl & PL080_CONTROL_SRC_INCR) bd->srcbus.addr += len; @@ -601,9 +600,9 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, cctl = txd->cctl; bd.txd = txd; - bd.pl08x = pl08x; bd.srcbus.addr = txd->src_addr; bd.dstbus.addr = txd->dst_addr; + bd.lli_bus = (pl08x->lli_buses & PL08X_AHB2) ? PL080_LLI_LM_AHB2 : 0; /* Find maximum width of the source bus */ bd.srcbus.maxwidth = -- cgit v1.1 From fc74eb791590e624ca6915ae76a04808e03bffb0 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:12:06 +0100 Subject: DMA: PL08x: clean up LLI debugging Clean up debugging when setting up the LLI list. This reduces the amount of output while preserving the information, and makes it easier to read. Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 6808f7d..1c641bf 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -617,25 +617,15 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, /* Set up the bus widths to the maximum */ bd.srcbus.buswidth = bd.srcbus.maxwidth; bd.dstbus.buswidth = bd.dstbus.maxwidth; - dev_vdbg(&pl08x->adev->dev, - "%s source bus is %d bytes wide, dest bus is %d bytes wide\n", - __func__, bd.srcbus.buswidth, bd.dstbus.buswidth); - /* * Bytes transferred == tsize * MIN(buswidths), not max(buswidths) */ max_bytes_per_lli = min(bd.srcbus.buswidth, bd.dstbus.buswidth) * PL080_CONTROL_TRANSFER_SIZE_MASK; - dev_vdbg(&pl08x->adev->dev, - "%s max bytes per lli = %zu\n", - __func__, max_bytes_per_lli); /* We need to count this down to zero */ bd.remainder = txd->len; - dev_vdbg(&pl08x->adev->dev, - "%s remainder = %zu\n", - __func__, bd.remainder); /* * Choose bus to align to @@ -644,6 +634,16 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, */ pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl); + dev_vdbg(&pl08x->adev->dev, "src=0x%08x%s/%u dst=0x%08x%s/%u len=%zu llimax=%zu\n", + bd.srcbus.addr, cctl & PL080_CONTROL_SRC_INCR ? "+" : "", + bd.srcbus.buswidth, + bd.dstbus.addr, cctl & PL080_CONTROL_DST_INCR ? "+" : "", + bd.dstbus.buswidth, + bd.remainder, max_bytes_per_lli); + dev_vdbg(&pl08x->adev->dev, "mbus=%s sbus=%s\n", + mbus == &bd.srcbus ? "src" : "dst", + sbus == &bd.srcbus ? "src" : "dst"); + if (txd->len < mbus->buswidth) { /* Less than a bus width available - send as single bytes */ while (bd.remainder) { @@ -835,15 +835,14 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, { int i; + dev_vdbg(&pl08x->adev->dev, + "%-3s %-9s %-10s %-10s %-10s %s\n", + "lli", "", "csrc", "cdst", "clli", "cctl"); for (i = 0; i < num_llis; i++) { dev_vdbg(&pl08x->adev->dev, - "lli %d @%p: csrc=0x%08x, cdst=0x%08x, cctl=0x%08x, clli=0x%08x\n", - i, - &llis_va[i], - llis_va[i].src, - llis_va[i].dst, - llis_va[i].cctl, - llis_va[i].lli + "%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x\n", + i, &llis_va[i], llis_va[i].src, + llis_va[i].dst, llis_va[i].lli, llis_va[i].cctl ); } } -- cgit v1.1 From b207b4d02beb06059478339bbe4672ba715605d6 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:12:27 +0100 Subject: DMA: PL08x: separately store source/destination slave address Store the source/destination slave address separately into the channel structure. This moves us towards being able to avoid a configuration call each time we use the channel. Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 21 +++++++++------------ include/linux/amba/pl08x.h | 3 ++- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 1c641bf..077ddee 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1102,7 +1102,6 @@ static int dma_set_runtime_config(struct dma_chan *chan, struct pl08x_driver_data *pl08x = plchan->host; struct pl08x_channel_data *cd = plchan->cd; enum dma_slave_buswidth addr_width; - dma_addr_t addr; u32 maxburst; u32 cctl = 0; int i; @@ -1113,11 +1112,9 @@ static int dma_set_runtime_config(struct dma_chan *chan, /* Transfer direction */ plchan->runtime_direction = config->direction; if (config->direction == DMA_TO_DEVICE) { - addr = config->dst_addr; addr_width = config->dst_addr_width; maxburst = config->dst_maxburst; } else if (config->direction == DMA_FROM_DEVICE) { - addr = config->src_addr; addr_width = config->src_addr_width; maxburst = config->src_maxburst; } else { @@ -1161,7 +1158,11 @@ static int dma_set_runtime_config(struct dma_chan *chan, cctl |= burst_sizes[i].reg; } - plchan->runtime_addr = addr; + if (plchan->runtime_direction == DMA_FROM_DEVICE) { + plchan->src_addr = config->src_addr; + } else { + plchan->dst_addr = config->dst_addr; + } /* Modify the default channel data to fit PrimeCell request */ cd->cctl = cctl; @@ -1396,19 +1397,13 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg( txd->ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT; txd->cctl |= PL080_CONTROL_SRC_INCR; txd->src_addr = sgl->dma_address; - if (plchan->runtime_addr) - txd->dst_addr = plchan->runtime_addr; - else - txd->dst_addr = plchan->cd->addr; + txd->dst_addr = plchan->dst_addr; src_buses = pl08x->mem_buses; dst_buses = plchan->cd->periph_buses; } else if (direction == DMA_FROM_DEVICE) { txd->ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT; txd->cctl |= PL080_CONTROL_DST_INCR; - if (plchan->runtime_addr) - txd->src_addr = plchan->runtime_addr; - else - txd->src_addr = plchan->cd->addr; + txd->src_addr = plchan->src_addr; txd->dst_addr = sgl->dma_address; src_buses = plchan->cd->periph_buses; dst_buses = pl08x->mem_buses; @@ -1704,6 +1699,8 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, chan->slave = true; chan->name = pl08x->pd->slave_channels[i].bus_id; chan->cd = &pl08x->pd->slave_channels[i]; + chan->src_addr = chan->cd->addr; + chan->dst_addr = chan->cd->addr; } else { chan->cd = &pl08x->pd->memcpy_channel; chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i); diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h index 3111385..072ab28 100644 --- a/include/linux/amba/pl08x.h +++ b/include/linux/amba/pl08x.h @@ -173,7 +173,8 @@ struct pl08x_dma_chan { struct tasklet_struct tasklet; char *name; struct pl08x_channel_data *cd; - dma_addr_t runtime_addr; + dma_addr_t src_addr; + dma_addr_t dst_addr; enum dma_data_direction runtime_direction; dma_cookie_t lc; struct list_head pend_list; -- cgit v1.1 From f14c426c723634d223344ad820997d92a3e355b6 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:12:47 +0100 Subject: DMA: PL08x: separately store source/destination cctl Store the source/destination cctl values into the channel structure. This moves us towards being able to avoid a configuration call each time we use the channel. Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 30 ++++++++++++++++-------------- include/linux/amba/pl08x.h | 2 ++ 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 077ddee..ba617e3f 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1095,12 +1095,21 @@ static const struct burst_table burst_sizes[] = { }, }; +static u32 pl08x_cctl(u32 cctl) +{ + cctl &= ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 | + PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR | + PL080_CONTROL_PROT_MASK); + + /* Access the cell in privileged mode, non-bufferable, non-cacheable */ + return cctl | PL080_CONTROL_PROT_SYS; +} + static int dma_set_runtime_config(struct dma_chan *chan, struct dma_slave_config *config) { struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); struct pl08x_driver_data *pl08x = plchan->host; - struct pl08x_channel_data *cd = plchan->cd; enum dma_slave_buswidth addr_width; u32 maxburst; u32 cctl = 0; @@ -1160,13 +1169,12 @@ static int dma_set_runtime_config(struct dma_chan *chan, if (plchan->runtime_direction == DMA_FROM_DEVICE) { plchan->src_addr = config->src_addr; + plchan->src_cctl = pl08x_cctl(cctl); } else { plchan->dst_addr = config->dst_addr; + plchan->dst_cctl = pl08x_cctl(cctl); } - /* Modify the default channel data to fit PrimeCell request */ - cd->cctl = cctl; - dev_dbg(&pl08x->adev->dev, "configured channel %s (%s) for %s, data width %d, " "maxburst %d words, LE, CCTL=0x%08x\n", @@ -1385,24 +1393,16 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg( txd->direction = direction; txd->len = sgl->length; - txd->cctl = plchan->cd->cctl & - ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 | - PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR | - PL080_CONTROL_PROT_MASK); - - /* Access the cell in privileged mode, non-bufferable, non-cacheable */ - txd->cctl |= PL080_CONTROL_PROT_SYS; - if (direction == DMA_TO_DEVICE) { txd->ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT; - txd->cctl |= PL080_CONTROL_SRC_INCR; + txd->cctl = plchan->dst_cctl | PL080_CONTROL_SRC_INCR; txd->src_addr = sgl->dma_address; txd->dst_addr = plchan->dst_addr; src_buses = pl08x->mem_buses; dst_buses = plchan->cd->periph_buses; } else if (direction == DMA_FROM_DEVICE) { txd->ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT; - txd->cctl |= PL080_CONTROL_DST_INCR; + txd->cctl = plchan->src_cctl | PL080_CONTROL_DST_INCR; txd->src_addr = plchan->src_addr; txd->dst_addr = sgl->dma_address; src_buses = plchan->cd->periph_buses; @@ -1701,6 +1701,8 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, chan->cd = &pl08x->pd->slave_channels[i]; chan->src_addr = chan->cd->addr; chan->dst_addr = chan->cd->addr; + chan->src_cctl = pl08x_cctl(chan->cd->cctl); + chan->dst_cctl = pl08x_cctl(chan->cd->cctl); } else { chan->cd = &pl08x->pd->memcpy_channel; chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i); diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h index 072ab28..47cfe31 100644 --- a/include/linux/amba/pl08x.h +++ b/include/linux/amba/pl08x.h @@ -175,6 +175,8 @@ struct pl08x_dma_chan { struct pl08x_channel_data *cd; dma_addr_t src_addr; dma_addr_t dst_addr; + u32 src_cctl; + u32 dst_cctl; enum dma_data_direction runtime_direction; dma_cookie_t lc; struct list_head pend_list; -- cgit v1.1 From fa020e7d046436cb6642b23dc95012a3064d77e2 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:13:07 +0100 Subject: DMA: PL08x: constify plchan->cd and plat->slave_channels We no longer write to the channel data structure, so we can make it const throughout the driver. Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- include/linux/amba/pl08x.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h index 47cfe31..e6e28f3 100644 --- a/include/linux/amba/pl08x.h +++ b/include/linux/amba/pl08x.h @@ -172,7 +172,7 @@ struct pl08x_dma_chan { int phychan_hold; struct tasklet_struct tasklet; char *name; - struct pl08x_channel_data *cd; + const struct pl08x_channel_data *cd; dma_addr_t src_addr; dma_addr_t dst_addr; u32 src_cctl; @@ -205,7 +205,7 @@ struct pl08x_dma_chan { * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2 */ struct pl08x_platform_data { - struct pl08x_channel_data *slave_channels; + const struct pl08x_channel_data *slave_channels; unsigned int num_slave_channels; struct pl08x_channel_data memcpy_channel; int (*get_signal)(struct pl08x_dma_chan *); -- cgit v1.1 From aa88cdaa149e1c1cfc935ff73e50f3f9f3b2e3a1 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:13:28 +0100 Subject: DMA: PL08x: cleanup selection of buswidth Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index ba617e3f..2dd37ff 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1105,13 +1105,26 @@ static u32 pl08x_cctl(u32 cctl) return cctl | PL080_CONTROL_PROT_SYS; } +static u32 pl08x_width(enum dma_slave_buswidth width) +{ + switch (width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + return PL080_WIDTH_8BIT; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + return PL080_WIDTH_16BIT; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + return PL080_WIDTH_32BIT; + } + return ~0; +} + static int dma_set_runtime_config(struct dma_chan *chan, struct dma_slave_config *config) { struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); struct pl08x_driver_data *pl08x = plchan->host; enum dma_slave_buswidth addr_width; - u32 maxburst; + u32 width, maxburst; u32 cctl = 0; int i; @@ -1132,25 +1145,16 @@ static int dma_set_runtime_config(struct dma_chan *chan, return -EINVAL; } - switch (addr_width) { - case DMA_SLAVE_BUSWIDTH_1_BYTE: - cctl |= (PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT) | - (PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT); - break; - case DMA_SLAVE_BUSWIDTH_2_BYTES: - cctl |= (PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT) | - (PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT); - break; - case DMA_SLAVE_BUSWIDTH_4_BYTES: - cctl |= (PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT) | - (PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT); - break; - default: + width = pl08x_width(addr_width); + if (width == ~0) { dev_err(&pl08x->adev->dev, "bad runtime_config: alien address width\n"); return -EINVAL; } + cctl |= width << PL080_CONTROL_SWIDTH_SHIFT; + cctl |= width << PL080_CONTROL_DWIDTH_SHIFT; + /* * Now decide on a maxburst: * If this channel will only request single transfers, set this -- cgit v1.1 From 121c8476a3c39a483326c33526e72a07661df1fc Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:13:48 +0100 Subject: DMA: PL08x: avoid recalculating cctl at each prepare Now that we have separate cctl values for M>P and P>M transfers, we can avoid calculating the cctl value each time we prepare a transaction. Move the bus selection and increment setting to the slave configuration and initialization functions. Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 78 ++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 36 deletions(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 2dd37ff..a84db8b 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1095,6 +1095,23 @@ static const struct burst_table burst_sizes[] = { }, }; +/* + * Given the source and destination available bus masks, select which + * will be routed to each port. We try to have source and destination + * on separate ports, but always respect the allowable settings. + */ +static u32 pl08x_select_bus(u8 src, u8 dst) +{ + u32 cctl = 0; + + if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1))) + cctl |= PL080_CONTROL_DST_AHB2; + if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2))) + cctl |= PL080_CONTROL_SRC_AHB2; + + return cctl; +} + static u32 pl08x_cctl(u32 cctl) { cctl &= ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 | @@ -1173,10 +1190,14 @@ static int dma_set_runtime_config(struct dma_chan *chan, if (plchan->runtime_direction == DMA_FROM_DEVICE) { plchan->src_addr = config->src_addr; - plchan->src_cctl = pl08x_cctl(cctl); + plchan->src_cctl = pl08x_cctl(cctl) | PL080_CONTROL_DST_INCR | + pl08x_select_bus(plchan->cd->periph_buses, + pl08x->mem_buses); } else { plchan->dst_addr = config->dst_addr; - plchan->dst_cctl = pl08x_cctl(cctl); + plchan->dst_cctl = pl08x_cctl(cctl) | PL080_CONTROL_SRC_INCR | + pl08x_select_bus(pl08x->mem_buses, + plchan->cd->periph_buses); } dev_dbg(&pl08x->adev->dev, @@ -1277,23 +1298,6 @@ static int pl08x_prep_channel_resources(struct pl08x_dma_chan *plchan, return 0; } -/* - * Given the source and destination available bus masks, select which - * will be routed to each port. We try to have source and destination - * on separate ports, but always respect the allowable settings. - */ -static u32 pl08x_select_bus(struct pl08x_driver_data *pl08x, u8 src, u8 dst) -{ - u32 cctl = 0; - - if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1))) - cctl |= PL080_CONTROL_DST_AHB2; - if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2))) - cctl |= PL080_CONTROL_SRC_AHB2; - - return cctl; -} - static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan, unsigned long flags) { @@ -1345,8 +1349,8 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy( txd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR; if (pl08x->vd->dualmaster) - txd->cctl |= pl08x_select_bus(pl08x, - pl08x->mem_buses, pl08x->mem_buses); + txd->cctl |= pl08x_select_bus(pl08x->mem_buses, + pl08x->mem_buses); ret = pl08x_prep_channel_resources(plchan, txd); if (ret) @@ -1363,7 +1367,6 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg( struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); struct pl08x_driver_data *pl08x = plchan->host; struct pl08x_txd *txd; - u8 src_buses, dst_buses; int ret; /* @@ -1399,26 +1402,20 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg( if (direction == DMA_TO_DEVICE) { txd->ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT; - txd->cctl = plchan->dst_cctl | PL080_CONTROL_SRC_INCR; + txd->cctl = plchan->dst_cctl; txd->src_addr = sgl->dma_address; txd->dst_addr = plchan->dst_addr; - src_buses = pl08x->mem_buses; - dst_buses = plchan->cd->periph_buses; } else if (direction == DMA_FROM_DEVICE) { txd->ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT; - txd->cctl = plchan->src_cctl | PL080_CONTROL_DST_INCR; + txd->cctl = plchan->src_cctl; txd->src_addr = plchan->src_addr; txd->dst_addr = sgl->dma_address; - src_buses = plchan->cd->periph_buses; - dst_buses = pl08x->mem_buses; } else { dev_err(&pl08x->adev->dev, "%s direction unsupported\n", __func__); return NULL; } - txd->cctl |= pl08x_select_bus(pl08x, src_buses, dst_buses); - ret = pl08x_prep_channel_resources(plchan, txd); if (ret) return NULL; @@ -1669,6 +1666,20 @@ static irqreturn_t pl08x_irq(int irq, void *dev) return mask ? IRQ_HANDLED : IRQ_NONE; } +static void pl08x_dma_slave_init(struct pl08x_dma_chan *chan) +{ + u32 cctl = pl08x_cctl(chan->cd->cctl); + + chan->slave = true; + chan->name = chan->cd->bus_id; + chan->src_addr = chan->cd->addr; + chan->dst_addr = chan->cd->addr; + chan->src_cctl = cctl | PL080_CONTROL_DST_INCR | + pl08x_select_bus(chan->cd->periph_buses, chan->host->mem_buses); + chan->dst_cctl = cctl | PL080_CONTROL_SRC_INCR | + pl08x_select_bus(chan->host->mem_buses, chan->cd->periph_buses); +} + /* * Initialise the DMAC memcpy/slave channels. * Make a local wrapper to hold required data @@ -1700,13 +1711,8 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, chan->state = PL08X_CHAN_IDLE; if (slave) { - chan->slave = true; - chan->name = pl08x->pd->slave_channels[i].bus_id; chan->cd = &pl08x->pd->slave_channels[i]; - chan->src_addr = chan->cd->addr; - chan->dst_addr = chan->cd->addr; - chan->src_cctl = pl08x_cctl(chan->cd->cctl); - chan->dst_cctl = pl08x_cctl(chan->cd->cctl); + pl08x_dma_slave_init(chan); } else { chan->cd = &pl08x->pd->memcpy_channel; chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i); -- cgit v1.1 From 760596c6b986e6345a28392cf40ee344bfd209a6 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 21 Jul 2011 17:14:08 +0100 Subject: DMA: PL08x: cleanup selection of burst size Acked-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 58 +++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index a84db8b..9aa2bd4 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1048,50 +1048,42 @@ pl08x_dma_tx_status(struct dma_chan *chan, /* PrimeCell DMA extension */ struct burst_table { - int burstwords; + u32 burstwords; u32 reg; }; static const struct burst_table burst_sizes[] = { { .burstwords = 256, - .reg = (PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT), + .reg = PL080_BSIZE_256, }, { .burstwords = 128, - .reg = (PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT), + .reg = PL080_BSIZE_128, }, { .burstwords = 64, - .reg = (PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT), + .reg = PL080_BSIZE_64, }, { .burstwords = 32, - .reg = (PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT), + .reg = PL080_BSIZE_32, }, { .burstwords = 16, - .reg = (PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT), + .reg = PL080_BSIZE_16, }, { .burstwords = 8, - .reg = (PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT), + .reg = PL080_BSIZE_8, }, { .burstwords = 4, - .reg = (PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT), + .reg = PL080_BSIZE_4, }, { - .burstwords = 1, - .reg = (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT), + .burstwords = 0, + .reg = PL080_BSIZE_1, }, }; @@ -1135,15 +1127,25 @@ static u32 pl08x_width(enum dma_slave_buswidth width) return ~0; } +static u32 pl08x_burst(u32 maxburst) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(burst_sizes); i++) + if (burst_sizes[i].burstwords <= maxburst) + break; + + return burst_sizes[i].reg; +} + static int dma_set_runtime_config(struct dma_chan *chan, struct dma_slave_config *config) { struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); struct pl08x_driver_data *pl08x = plchan->host; enum dma_slave_buswidth addr_width; - u32 width, maxburst; + u32 width, burst, maxburst; u32 cctl = 0; - int i; if (!plchan->slave) return -EINVAL; @@ -1173,20 +1175,16 @@ static int dma_set_runtime_config(struct dma_chan *chan, cctl |= width << PL080_CONTROL_DWIDTH_SHIFT; /* - * Now decide on a maxburst: * If this channel will only request single transfers, set this * down to ONE element. Also select one element if no maxburst * is specified. */ - if (plchan->cd->single || maxburst == 0) { - cctl |= (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) | - (PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT); - } else { - for (i = 0; i < ARRAY_SIZE(burst_sizes); i++) - if (burst_sizes[i].burstwords <= maxburst) - break; - cctl |= burst_sizes[i].reg; - } + if (plchan->cd->single) + maxburst = 1; + + burst = pl08x_burst(maxburst); + cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT; + cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT; if (plchan->runtime_direction == DMA_FROM_DEVICE) { plchan->src_addr = config->src_addr; -- cgit v1.1 From f32807f1ff7fbfd2d4ec708b1ac8cb75cb92bfef Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 25 Jul 2011 19:22:01 +0530 Subject: dmaengine: pl08x: handle the rest of enums in pl08x_width pl08x_width function does not handle rest of enums for DMA_SLAVE_BUSWIDTH_xxxx which causes gcc to emit below warining drivers/dma/amba-pl08x.c: In function 'pl08x_width': drivers/dma/amba-pl08x.c:1119: warning: enumeration value 'DMA_SLAVE_BUSWIDTH_UNDEFINED' not handled in switch drivers/dma/amba-pl08x.c:1119: warning: enumeration value 'DMA_SLAVE_BUSWIDTH_8_BYTES' not handled in switch this patch adds a default case which returns error Signed-off-by: Vinod Koul --- drivers/dma/amba-pl08x.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 9aa2bd4..196a737 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1123,8 +1123,9 @@ static u32 pl08x_width(enum dma_slave_buswidth width) return PL080_WIDTH_16BIT; case DMA_SLAVE_BUSWIDTH_4_BYTES: return PL080_WIDTH_32BIT; + default: + return ~0; } - return ~0; } static u32 pl08x_burst(u32 maxburst) -- cgit v1.1 From 2d859db3e4a82a365572592d57624a5f996ed0ec Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 26 Jul 2011 09:07:11 -0400 Subject: ext4: fix data corruption in inodes with journalled data When journalling data for an inode (either because it is a symlink or because the filesystem is mounted in data=journal mode), ext4_evict_inode() can discard unwritten data by calling truncate_inode_pages(). This is because we don't mark the buffer / page dirty when journalling data but only add the buffer to the running transaction and thus mm does not know there are still unwritten data. Fix the problem by carefully tracking transaction containing inode's data, committing this transaction, and writing uncheckpointed buffers when inode should be reaped. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index de50b16..43e4abd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -121,6 +121,33 @@ void ext4_evict_inode(struct inode *inode) trace_ext4_evict_inode(inode); if (inode->i_nlink) { + /* + * When journalling data dirty buffers are tracked only in the + * journal. So although mm thinks everything is clean and + * ready for reaping the inode might still have some pages to + * write in the running transaction or waiting to be + * checkpointed. Thus calling jbd2_journal_invalidatepage() + * (via truncate_inode_pages()) to discard these buffers can + * cause data loss. Also even if we did not discard these + * buffers, we would have no way to find them after the inode + * is reaped and thus user could see stale data if he tries to + * read them before the transaction is checkpointed. So be + * careful and force everything to disk here... We use + * ei->i_datasync_tid to store the newest transaction + * containing inode's data. + * + * Note that directories do not have this problem because they + * don't use page cache. + */ + if (ext4_should_journal_data(inode) && + (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { + journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; + tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; + + jbd2_log_start_commit(journal, commit_tid); + jbd2_log_wait_commit(journal, commit_tid); + filemap_write_and_wait(&inode->i_data); + } truncate_inode_pages(&inode->i_data, 0); goto no_delete; } @@ -970,6 +997,7 @@ static int ext4_journalled_write_end(struct file *file, if (new_i_size > inode->i_size) i_size_write(inode, pos+copied); ext4_set_inode_state(inode, EXT4_STATE_JDATA); + EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; if (new_i_size > EXT4_I(inode)->i_disksize) { ext4_update_i_disksize(inode, new_i_size); ret2 = ext4_mark_inode_dirty(handle, inode); @@ -1678,6 +1706,7 @@ static int __ext4_journalled_writepage(struct page *page, write_end_fn); if (ret == 0) ret = err; + EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; err = ext4_journal_stop(handle); if (!ret) ret = err; -- cgit v1.1 From 9ea71503a8ed9184d2d0b8ccc4d269d05f7940ae Mon Sep 17 00:00:00 2001 From: Shawn Bohrer Date: Thu, 30 Jun 2011 11:21:32 -0500 Subject: futex: Fix regression with read only mappings commit 7485d0d3758e8e6491a5c9468114e74dc050785d (futexes: Remove rw parameter from get_futex_key()) in 2.6.33 fixed two problems: First, It prevented a loop when encountering a ZERO_PAGE. Second, it fixed RW MAP_PRIVATE futex operations by forcing the COW to occur by unconditionally performing a write access get_user_pages_fast() to get the page. The commit also introduced a user-mode regression in that it broke futex operations on read-only memory maps. For example, this breaks workloads that have one or more reader processes doing a FUTEX_WAIT on a futex within a read only shared file mapping, and a writer processes that has a writable mapping issuing the FUTEX_WAKE. This fixes the regression for valid futex operations on RO mappings by trying a RO get_user_pages_fast() when the RW get_user_pages_fast() fails. This change makes it necessary to also check for invalid use cases, such as anonymous RO mappings (which can never change) and the ZERO_PAGE which the commit referenced above was written to address. This patch does restore the original behavior with RO MAP_PRIVATE mappings, which have inherent user-mode usage problems and don't really make sense. With this patch performing a FUTEX_WAIT within a RO MAP_PRIVATE mapping will be successfully woken provided another process updates the region of the underlying mapped file. However, the mmap() man page states that for a MAP_PRIVATE mapping: It is unspecified whether changes made to the file after the mmap() call are visible in the mapped region. So user-mode users attempting to use futex operations on RO MAP_PRIVATE mappings are depending on unspecified behavior. Additionally a RO MAP_PRIVATE mapping could fail to wake up in the following case. Thread-A: call futex(FUTEX_WAIT, memory-region-A). get_futex_key() return inode based key. sleep on the key Thread-B: call mprotect(PROT_READ|PROT_WRITE, memory-region-A) Thread-B: write memory-region-A. COW happen. This process's memory-region-A become related to new COWed private (ie PageAnon=1) page. Thread-B: call futex(FUETX_WAKE, memory-region-A). get_futex_key() return mm based key. IOW, we fail to wake up Thread-A. Once again doing something like this is just silly and users who do something like this get what they deserve. While RO MAP_PRIVATE mappings are nonsensical, checking for a private mapping requires walking the vmas and was deemed too costly to avoid a userspace hang. This Patch is based on Peter Zijlstra's initial patch with modifications to only allow RO mappings for futex operations that need VERIFY_READ access. Reported-by: David Oliver Signed-off-by: Shawn Bohrer Acked-by: Peter Zijlstra Signed-off-by: Darren Hart Cc: KOSAKI Motohiro Cc: peterz@infradead.org Cc: eric.dumazet@gmail.com Cc: zvonler@rgmadvisors.com Cc: hughd@google.com Link: http://lkml.kernel.org/r/1309450892-30676-1-git-send-email-sbohrer@rgmadvisors.com Cc: stable@kernel.org Signed-off-by: Thomas Gleixner --- kernel/futex.c | 54 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index fe28dc2..70bb54b 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -218,6 +218,8 @@ static void drop_futex_key_refs(union futex_key *key) * @uaddr: virtual address of the futex * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED * @key: address where result is stored. + * @rw: mapping needs to be read/write (values: VERIFY_READ, + * VERIFY_WRITE) * * Returns a negative error code or 0 * The key words are stored in *key on success. @@ -229,12 +231,12 @@ static void drop_futex_key_refs(union futex_key *key) * lock_page() might sleep, the caller should not hold a spinlock. */ static int -get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) +get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; struct page *page, *page_head; - int err; + int err, ro = 0; /* * The futex address must be "naturally" aligned. @@ -262,8 +264,18 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) again: err = get_user_pages_fast(address, 1, 1, &page); + /* + * If write access is not required (eg. FUTEX_WAIT), try + * and get read-only access. + */ + if (err == -EFAULT && rw == VERIFY_READ) { + err = get_user_pages_fast(address, 1, 0, &page); + ro = 1; + } if (err < 0) return err; + else + err = 0; #ifdef CONFIG_TRANSPARENT_HUGEPAGE page_head = page; @@ -305,6 +317,13 @@ again: if (!page_head->mapping) { unlock_page(page_head); put_page(page_head); + /* + * ZERO_PAGE pages don't have a mapping. Avoid a busy loop + * trying to find one. RW mapping would have COW'd (and thus + * have a mapping) so this page is RO and won't ever change. + */ + if ((page_head == ZERO_PAGE(address))) + return -EFAULT; goto again; } @@ -316,6 +335,15 @@ again: * the object not the particular process. */ if (PageAnon(page_head)) { + /* + * A RO anonymous page will never change and thus doesn't make + * sense for futex operations. + */ + if (ro) { + err = -EFAULT; + goto out; + } + key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ key->private.mm = mm; key->private.address = address; @@ -327,9 +355,10 @@ again: get_futex_key_refs(key); +out: unlock_page(page_head); put_page(page_head); - return 0; + return err; } static inline void put_futex_key(union futex_key *key) @@ -940,7 +969,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) if (!bitset) return -EINVAL; - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ); if (unlikely(ret != 0)) goto out; @@ -986,10 +1015,10 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, int ret, op_ret; retry: - ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); + ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); if (unlikely(ret != 0)) goto out_put_key1; @@ -1243,10 +1272,11 @@ retry: pi_state = NULL; } - ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); + ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, + requeue_pi ? VERIFY_WRITE : VERIFY_READ); if (unlikely(ret != 0)) goto out_put_key1; @@ -1790,7 +1820,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, * while the syscall executes. */ retry: - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ); if (unlikely(ret != 0)) return ret; @@ -1941,7 +1971,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, } retry: - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; @@ -2060,7 +2090,7 @@ retry: if ((uval & FUTEX_TID_MASK) != vpid) return -EPERM; - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; @@ -2249,7 +2279,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, debug_rt_mutex_init_waiter(&rt_waiter); rt_waiter.task = NULL; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; -- cgit v1.1 From 8f82f840ec6ab873f520364d443ff6fa1b3f8e22 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 21:35:44 -0400 Subject: ext4: prevent parallel resizers by atomic bit ops Before this patch, parallel resizers are allowed and protected by a mutex lock, actually, there is no need to support parallel resizer, so this patch prevents parallel resizers by atmoic bit ops, like lock_page() and unlock_page() do. To do this, the patch removed the mutex lock s_resize_lock from struct ext4_sb_info and added a unsigned long field named s_resize_flags which inidicates if there is a resizer. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 7 ++++++- fs/ext4/ioctl.c | 12 ++++++++---- fs/ext4/resize.c | 55 +++++++++++++++++++++---------------------------------- fs/ext4/super.c | 2 +- 4 files changed, 36 insertions(+), 40 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 62cee2b..bb0f776 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1127,7 +1127,8 @@ struct ext4_sb_info { struct journal_s *s_journal; struct list_head s_orphan; struct mutex s_orphan_lock; - struct mutex s_resize_lock; + unsigned long s_resize_flags; /* Flags indicating if there + is a resizer */ unsigned long s_commit_interval; u32 s_max_batch_time; u32 s_min_batch_time; @@ -2269,6 +2270,10 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; +#define EXT4_RESIZING 0 +extern int ext4_resize_begin(struct super_block *sb); +extern void ext4_resize_end(struct super_block *sb); + #endif /* __KERNEL__ */ #endif /* _EXT4_H */ diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 808c554..f18bfe3 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -202,8 +202,9 @@ setversion_out: struct super_block *sb = inode->i_sb; int err, err2=0; - if (!capable(CAP_SYS_RESOURCE)) - return -EPERM; + err = ext4_resize_begin(sb); + if (err) + return err; if (get_user(n_blocks_count, (__u32 __user *)arg)) return -EFAULT; @@ -221,6 +222,7 @@ setversion_out: if (err == 0) err = err2; mnt_drop_write(filp->f_path.mnt); + ext4_resize_end(sb); return err; } @@ -271,8 +273,9 @@ mext_out: struct super_block *sb = inode->i_sb; int err, err2=0; - if (!capable(CAP_SYS_RESOURCE)) - return -EPERM; + err = ext4_resize_begin(sb); + if (err) + return err; if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, sizeof(input))) @@ -291,6 +294,7 @@ mext_out: if (err == 0) err = err2; mnt_drop_write(filp->f_path.mnt); + ext4_resize_end(sb); return err; } diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 80bbc9c..0213f63 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -16,6 +16,25 @@ #include "ext4_jbd2.h" +int ext4_resize_begin(struct super_block *sb) +{ + int ret = 0; + + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + + if (test_and_set_bit_lock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags)) + ret = -EBUSY; + + return ret; +} + +void ext4_resize_end(struct super_block *sb) +{ + clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags); + smp_mb__after_clear_bit(); +} + #define outside(b, first, last) ((b) < (first) || (b) >= (last)) #define inside(b, first, last) ((b) >= (first) && (b) < (last)) @@ -181,11 +200,7 @@ static int setup_new_group_blocks(struct super_block *sb, if (IS_ERR(handle)) return PTR_ERR(handle); - mutex_lock(&sbi->s_resize_lock); - if (input->group != sbi->s_groups_count) { - err = -EBUSY; - goto exit_journal; - } + BUG_ON(input->group != sbi->s_groups_count); if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) { err = PTR_ERR(bh); @@ -285,7 +300,6 @@ exit_bh: brelse(bh); exit_journal: - mutex_unlock(&sbi->s_resize_lock); if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; @@ -799,13 +813,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) goto exit_put; } - mutex_lock(&sbi->s_resize_lock); - if (input->group != sbi->s_groups_count) { - ext4_warning(sb, "multiple resizers run on filesystem!"); - err = -EBUSY; - goto exit_journal; - } - if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) goto exit_journal; @@ -829,7 +836,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) /* * OK, now we've set up the new group. Time to make it active. * - * We do not lock all allocations via s_resize_lock * so we have to be safe wrt. concurrent accesses the group * data. So we need to be careful to set all of the relevant * group descriptor data etc. *before* we enable the group. @@ -886,13 +892,9 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) * * The precise rules we use are: * - * * Writers of s_groups_count *must* hold s_resize_lock - * AND * * Writers must perform a smp_wmb() after updating all dependent * data and before modifying the groups count * - * * Readers must hold s_resize_lock over the access - * OR * * Readers must perform an smp_rmb() after reading the groups count * and before reading any dependent data. * @@ -937,7 +939,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) ext4_handle_dirty_super(handle, sb); exit_journal: - mutex_unlock(&sbi->s_resize_lock); if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; if (!err) { @@ -972,9 +973,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, int err; ext4_group_t group; - /* We don't need to worry about locking wrt other resizers just - * yet: we're going to revalidate es->s_blocks_count after - * taking the s_resize_lock below. */ o_blocks_count = ext4_blocks_count(es); if (test_opt(sb, DEBUG)) @@ -995,7 +993,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, if (n_blocks_count < o_blocks_count) { ext4_warning(sb, "can't shrink FS - resize aborted"); - return -EBUSY; + return -EINVAL; } /* Handle the remaining blocks in the last group only. */ @@ -1038,24 +1036,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, goto exit_put; } - mutex_lock(&EXT4_SB(sb)->s_resize_lock); - if (o_blocks_count != ext4_blocks_count(es)) { - ext4_warning(sb, "multiple resizers run on filesystem!"); - mutex_unlock(&EXT4_SB(sb)->s_resize_lock); - ext4_journal_stop(handle); - err = -EBUSY; - goto exit_put; - } - if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh))) { ext4_warning(sb, "error %d on journal write access", err); - mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_journal_stop(handle); goto exit_put; } ext4_blocks_count_set(es, o_blocks_count + add); - mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 143d763..cfe9f39 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3500,7 +3500,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ mutex_init(&sbi->s_orphan_lock); - mutex_init(&sbi->s_resize_lock); + sbi->s_resize_flags = 0; sb->s_root = NULL; -- cgit v1.1 From ce723c31b58f54fb865036805475ee7a8c5dc173 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 21:39:09 -0400 Subject: ext4: prevent a fs with errors from being resized A filesystem with errors is not allowed to being resized, otherwise, it is easy to destroy the filesystem. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 0213f63..53d9795 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -23,6 +23,16 @@ int ext4_resize_begin(struct super_block *sb) if (!capable(CAP_SYS_RESOURCE)) return -EPERM; + /* + * We are not allowed to do online-resizing on a filesystem mounted + * with error, because it can destroy the filesystem easily. + */ + if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { + ext4_warning(sb, "There are errors in the filesystem, " + "so online resizing is not allowed\n"); + return -EPERM; + } + if (test_and_set_bit_lock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags)) ret = -EBUSY; -- cgit v1.1 From 0529155e8a4bcb77dfc9ceaea19c6501487e452b Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 21:43:56 -0400 Subject: ext4: rename ext4_add_groupblocks() to ext4_group_add_blocks() Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 +- fs/ext4/mballoc.c | 4 ++-- fs/ext4/resize.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bb0f776..bbe81db 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1799,7 +1799,7 @@ extern void ext4_free_blocks(handle_t *handle, struct inode *inode, unsigned long count, int flags); extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); -extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, +extern void ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count); extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index e165830..93035ea 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4667,7 +4667,7 @@ error_return: } /** - * ext4_add_groupblocks() -- Add given blocks to an existing group + * ext4_group_add_blocks() -- Add given blocks to an existing group * @handle: handle to this transaction * @sb: super block * @block: start physcial block to add to the block group @@ -4675,7 +4675,7 @@ error_return: * * This marks the blocks as free in the bitmap and buddy. */ -void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, +void ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count) { struct buffer_head *bitmap_bh = NULL; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 53d9795..d241ecb 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1056,7 +1056,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ - ext4_add_groupblocks(handle, sb, o_blocks_count, add); + ext4_group_add_blocks(handle, sb, o_blocks_count, add); ext4_handle_dirty_super(handle, sb); ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); -- cgit v1.1 From cc7365dfe48cb2191f1572bf69e30d3e58716313 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 21:46:07 -0400 Subject: ext4: let ext4_group_add_blocks() return an error code This patch lets ext4_group_add_blocks() return an error code if it fails, so that upper functions can handle error correctly. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 +- fs/ext4/mballoc.c | 20 +++++++++++++++----- fs/ext4/resize.c | 10 +++++++--- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bbe81db..da7ab48 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1799,7 +1799,7 @@ extern void ext4_free_blocks(handle_t *handle, struct inode *inode, unsigned long count, int flags); extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); -extern void ext4_group_add_blocks(handle_t *handle, struct super_block *sb, +extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count); extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 93035ea..dbe4295 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4675,7 +4675,7 @@ error_return: * * This marks the blocks as free in the bitmap and buddy. */ -void ext4_group_add_blocks(handle_t *handle, struct super_block *sb, +int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count) { struct buffer_head *bitmap_bh = NULL; @@ -4696,15 +4696,24 @@ void ext4_group_add_blocks(handle_t *handle, struct super_block *sb, * Check to see if we are freeing blocks across a group * boundary. */ - if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) + if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { + ext4_warning(sb, "too much blocks added to group %u\n", + block_group); + err = -EINVAL; goto error_return; + } bitmap_bh = ext4_read_block_bitmap(sb, block_group); - if (!bitmap_bh) + if (!bitmap_bh) { + err = -EIO; goto error_return; + } + desc = ext4_get_group_desc(sb, block_group, &gd_bh); - if (!desc) + if (!desc) { + err = -EIO; goto error_return; + } if (in_range(ext4_block_bitmap(sb, desc), block, count) || in_range(ext4_inode_bitmap(sb, desc), block, count) || @@ -4714,6 +4723,7 @@ void ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_error(sb, "Adding blocks in system zones - " "Block = %llu, count = %lu", block, count); + err = -EINVAL; goto error_return; } @@ -4782,7 +4792,7 @@ void ext4_group_add_blocks(handle_t *handle, struct super_block *sb, error_return: brelse(bitmap_bh); ext4_std_error(sb, err); - return; + return err; } /** diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index d241ecb..4c041e3 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -980,7 +980,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_grpblk_t add; struct buffer_head *bh; handle_t *handle; - int err; + int err, err2; ext4_group_t group; o_blocks_count = ext4_blocks_count(es); @@ -1056,11 +1056,15 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ - ext4_group_add_blocks(handle, sb, o_blocks_count, add); + err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); ext4_handle_dirty_super(handle, sb); ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); - if ((err = ext4_journal_stop(handle))) + err2 = ext4_journal_stop(handle); + if (!err && err2) + err = err2; + + if (err) goto exit_put; if (test_opt(sb, DEBUG)) -- cgit v1.1 From 4740b830ed5720ade6c780dbf3fdfe9089b3552d Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 21:51:08 -0400 Subject: ext4: let ext4_group_add_blocks() handle 0 blocks quickly If ext4_group_add_blocks() is called with 0 block, make it return 0 without doing any extra work. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index dbe4295..b6ef4da39 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4691,6 +4691,9 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); + if (count == 0) + return 0; + ext4_get_group_no_and_offset(sb, block, &block_group, &bit); /* * Check to see if we are freeing blocks across a group -- cgit v1.1 From 2b79b09d13e35577151bd13ba08809911baccd1c Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 21:53:35 -0400 Subject: ext4: fix a typo in ext4_group_extend() Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 4c041e3..5f0aefd 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -986,7 +986,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, o_blocks_count = ext4_blocks_count(es); if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n", + printk(KERN_DEBUG "EXT4-fs: extending last group from %llu to %llu blocks\n", o_blocks_count, n_blocks_count); if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) -- cgit v1.1 From c3e94d1df9bdd9e2c4ba7e8f534f7925f1756f97 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 22:05:53 -0400 Subject: ext4: let setup_new_group_blocks() set multiple bits at a time Rename mb_set_bits() to ext4_set_bits() and make it a global function so that setup_new_group_blocks() can use it. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 ++ fs/ext4/mballoc.c | 15 ++++++++------- fs/ext4/resize.c | 18 +++++++----------- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index da7ab48..ba2009b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -940,6 +940,8 @@ struct ext4_inode_info { #define ext4_find_next_zero_bit find_next_zero_bit_le #define ext4_find_next_bit find_next_bit_le +extern void ext4_set_bits(void *bm, int cur, int len); + /* * Maximal mount counts between two filesystem checks */ diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b6ef4da39..fa716c9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1282,7 +1282,7 @@ static void mb_clear_bits(void *bm, int cur, int len) } } -static void mb_set_bits(void *bm, int cur, int len) +void ext4_set_bits(void *bm, int cur, int len) { __u32 *addr; @@ -1511,7 +1511,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) } mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); - mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); + ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); mb_check_buddy(e4b); return ret; @@ -2795,8 +2795,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, * We leak some of the blocks here. */ ext4_lock_group(sb, ac->ac_b_ex.fe_group); - mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, - ac->ac_b_ex.fe_len); + ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, + ac->ac_b_ex.fe_len); ext4_unlock_group(sb, ac->ac_b_ex.fe_group); err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (!err) @@ -2814,7 +2814,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, } } #endif - mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len); + ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, + ac->ac_b_ex.fe_len); if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_blks_set(sb, gdp, @@ -3284,7 +3285,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, while (n) { entry = rb_entry(n, struct ext4_free_data, node); - mb_set_bits(bitmap, entry->start_blk, entry->count); + ext4_set_bits(bitmap, entry->start_blk, entry->count); n = rb_next(n); } return; @@ -3326,7 +3327,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, if (unlikely(len == 0)) continue; BUG_ON(groupnr != group); - mb_set_bits(bitmap, start, len); + ext4_set_bits(bitmap, start, len); preallocated += len; count++; } diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 5f0aefd..178fb2f 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -217,11 +217,6 @@ static int setup_new_group_blocks(struct super_block *sb, goto exit_journal; } - if (ext4_bg_has_super(sb, input->group)) { - ext4_debug("mark backup superblock %#04llx (+0)\n", start); - ext4_set_bit(0, bh->b_data); - } - /* Copy all of the GDT blocks into the backup in this group */ for (i = 0, bit = 1, block = start + 1; i < gdblocks; i++, block++, bit++) { @@ -250,7 +245,6 @@ static int setup_new_group_blocks(struct super_block *sb, brelse(gdb); goto exit_bh; } - ext4_set_bit(bit, bh->b_data); brelse(gdb); } @@ -261,8 +255,11 @@ static int setup_new_group_blocks(struct super_block *sb, GFP_NOFS); if (err) goto exit_bh; - for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++) - ext4_set_bit(bit, bh->b_data); + + if (ext4_bg_has_super(sb, input->group)) { + ext4_debug("mark backup group tables %#04llx (+0)\n", start); + ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1); + } ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, input->block_bitmap - start); @@ -278,9 +275,8 @@ static int setup_new_group_blocks(struct super_block *sb, err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); if (err) goto exit_bh; - for (i = 0, bit = input->inode_table - start; - i < sbi->s_itb_per_group; i++, bit++) - ext4_set_bit(bit, bh->b_data); + ext4_set_bits(bh->b_data, input->inode_table - start, + sbi->s_itb_per_group); if ((err = extend_or_restart_transaction(handle, 2, bh))) goto exit_bh; -- cgit v1.1 From 6d40bc5a7e8fc71795d131e835f38f161ed7e1b1 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 22:24:41 -0400 Subject: ext4: simplify journal handling in setup_new_group_blocks() This patch simplifies journal handling in setup_new_group_blocks(). In previous code, block bitmap is modified everywhere in setup_new_group_blocks(), ext4_get_write_access() in extend_or_restart_transaction() is used to guarantee that the block bitmap stays in the new handle, this makes things complicated. The previous commit changed things so that the modifications on the block bitmap are batched and done by ext4_set_bits() at the end of the for loop. This allows us to simplify things. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 178fb2f..5b423f8 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -161,8 +161,7 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, * If that fails, restart the transaction & regain write access for the * buffer head which is used for block_bitmap modifications. */ -static int extend_or_restart_transaction(handle_t *handle, int thresh, - struct buffer_head *bh) +static int extend_or_restart_transaction(handle_t *handle, int thresh) { int err; @@ -173,9 +172,8 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh, if (err < 0) return err; if (err) { - if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) - return err; - if ((err = ext4_journal_get_write_access(handle, bh))) + err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA); + if (err) return err; } @@ -212,29 +210,24 @@ static int setup_new_group_blocks(struct super_block *sb, BUG_ON(input->group != sbi->s_groups_count); - if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) { - err = PTR_ERR(bh); - goto exit_journal; - } - /* Copy all of the GDT blocks into the backup in this group */ for (i = 0, bit = 1, block = start + 1; i < gdblocks; i++, block++, bit++) { struct buffer_head *gdb; ext4_debug("update backup group %#04llx (+%d)\n", block, bit); - - if ((err = extend_or_restart_transaction(handle, 1, bh))) - goto exit_bh; + err = extend_or_restart_transaction(handle, 1); + if (err) + goto exit_journal; gdb = sb_getblk(sb, block); if (!gdb) { err = -EIO; - goto exit_bh; + goto exit_journal; } if ((err = ext4_journal_get_write_access(handle, gdb))) { brelse(gdb); - goto exit_bh; + goto exit_journal; } lock_buffer(gdb); memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); @@ -243,7 +236,7 @@ static int setup_new_group_blocks(struct super_block *sb, err = ext4_handle_dirty_metadata(handle, NULL, gdb); if (unlikely(err)) { brelse(gdb); - goto exit_bh; + goto exit_journal; } brelse(gdb); } @@ -254,7 +247,17 @@ static int setup_new_group_blocks(struct super_block *sb, err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, GFP_NOFS); if (err) - goto exit_bh; + goto exit_journal; + + err = extend_or_restart_transaction(handle, 2); + if (err) + goto exit_journal; + + bh = bclean(handle, sb, input->block_bitmap); + if (IS_ERR(bh)) { + err = PTR_ERR(bh); + goto exit_journal; + } if (ext4_bg_has_super(sb, input->group)) { ext4_debug("mark backup group tables %#04llx (+0)\n", start); @@ -278,8 +281,6 @@ static int setup_new_group_blocks(struct super_block *sb, ext4_set_bits(bh->b_data, input->inode_table - start, sbi->s_itb_per_group); - if ((err = extend_or_restart_transaction(handle, 2, bh))) - goto exit_bh; ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data); -- cgit v1.1 From 5a42fb93e6a33224774786691027ef2d9795c245 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Tue, 26 Jul 2011 14:25:10 +0100 Subject: Improve slave/cyclic DMA engine documentation Improve the documentation for the slave and cyclic DMA engine support reformatting it for easier reading, adding further APIs, splitting it into five steps, and including references to the documentation in dmaengine.h. Signed-off-by: Russell King [Fixed the index title to reflect new changes] Signed-off-by: Vinod Koul --- Documentation/dmaengine.txt | 234 +++++++++++++++++++++++++++++++------------- 1 file changed, 164 insertions(+), 70 deletions(-) diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine.txt index 5a0cb1e..94b7e0f 100644 --- a/Documentation/dmaengine.txt +++ b/Documentation/dmaengine.txt @@ -10,87 +10,181 @@ NOTE: For DMA Engine usage in async_tx please see: Below is a guide to device driver writers on how to use the Slave-DMA API of the DMA Engine. This is applicable only for slave DMA usage only. -The slave DMA usage consists of following steps +The slave DMA usage consists of following steps: 1. Allocate a DMA slave channel 2. Set slave and controller specific parameters 3. Get a descriptor for transaction -4. Submit the transaction and wait for callback notification +4. Submit the transaction +5. Issue pending requests and wait for callback notification 1. Allocate a DMA slave channel -Channel allocation is slightly different in the slave DMA context, client -drivers typically need a channel from a particular DMA controller only and even -in some cases a specific channel is desired. To request a channel -dma_request_channel() API is used. - -Interface: -struct dma_chan *dma_request_channel(dma_cap_mask_t mask, - dma_filter_fn filter_fn, - void *filter_param); -where dma_filter_fn is defined as: -typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); - -When the optional 'filter_fn' parameter is set to NULL dma_request_channel -simply returns the first channel that satisfies the capability mask. Otherwise, -when the mask parameter is insufficient for specifying the necessary channel, -the filter_fn routine can be used to disposition the available channels in the -system. The filter_fn routine is called once for each free channel in the -system. Upon seeing a suitable channel filter_fn returns DMA_ACK which flags -that channel to be the return value from dma_request_channel. A channel -allocated via this interface is exclusive to the caller, until -dma_release_channel() is called. + + Channel allocation is slightly different in the slave DMA context, + client drivers typically need a channel from a particular DMA + controller only and even in some cases a specific channel is desired. + To request a channel dma_request_channel() API is used. + + Interface: + struct dma_chan *dma_request_channel(dma_cap_mask_t mask, + dma_filter_fn filter_fn, + void *filter_param); + where dma_filter_fn is defined as: + typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); + + The 'filter_fn' parameter is optional, but highly recommended for + slave and cyclic channels as they typically need to obtain a specific + DMA channel. + + When the optional 'filter_fn' parameter is NULL, dma_request_channel() + simply returns the first channel that satisfies the capability mask. + + Otherwise, the 'filter_fn' routine will be called once for each free + channel which has a capability in 'mask'. 'filter_fn' is expected to + return 'true' when the desired DMA channel is found. + + A channel allocated via this interface is exclusive to the caller, + until dma_release_channel() is called. 2. Set slave and controller specific parameters -Next step is always to pass some specific information to the DMA driver. Most of -the generic information which a slave DMA can use is in struct dma_slave_config. -It allows the clients to specify DMA direction, DMA addresses, bus widths, DMA -burst lengths etc. If some DMA controllers have more parameters to be sent then -they should try to embed struct dma_slave_config in their controller specific -structure. That gives flexibility to client to pass more parameters, if -required. - -Interface: -int dmaengine_slave_config(struct dma_chan *chan, - struct dma_slave_config *config) + + Next step is always to pass some specific information to the DMA + driver. Most of the generic information which a slave DMA can use + is in struct dma_slave_config. This allows the clients to specify + DMA direction, DMA addresses, bus widths, DMA burst lengths etc + for the peripheral. + + If some DMA controllers have more parameters to be sent then they + should try to embed struct dma_slave_config in their controller + specific structure. That gives flexibility to client to pass more + parameters, if required. + + Interface: + int dmaengine_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) + + Please see the dma_slave_config structure definition in dmaengine.h + for a detailed explaination of the struct members. Please note + that the 'direction' member will be going away as it duplicates the + direction given in the prepare call. 3. Get a descriptor for transaction -For slave usage the various modes of slave transfers supported by the -DMA-engine are: -slave_sg - DMA a list of scatter gather buffers from/to a peripheral -dma_cyclic - Perform a cyclic DMA operation from/to a peripheral till the + + For slave usage the various modes of slave transfers supported by the + DMA-engine are: + + slave_sg - DMA a list of scatter gather buffers from/to a peripheral + dma_cyclic - Perform a cyclic DMA operation from/to a peripheral till the operation is explicitly stopped. -The non NULL return of this transfer API represents a "descriptor" for the given -transaction. - -Interface: -struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_sg)( - struct dma_chan *chan, - struct scatterlist *dst_sg, unsigned int dst_nents, - struct scatterlist *src_sg, unsigned int src_nents, + + A non-NULL return of this transfer API represents a "descriptor" for + the given transaction. + + Interface: + struct dma_async_tx_descriptor *(*chan->device->device_prep_slave_sg)( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_data_direction direction, unsigned long flags); -struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)( + + struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_data_direction direction); -4. Submit the transaction and wait for callback notification -To schedule the transaction to be scheduled by dma device, the "descriptor" -returned in above (3) needs to be submitted. -To tell the dma driver that a transaction is ready to be serviced, the -descriptor->submit() callback needs to be invoked. This chains the descriptor to -the pending queue. -The transactions in the pending queue can be activated by calling the -issue_pending API. If channel is idle then the first transaction in queue is -started and subsequent ones queued up. -On completion of the DMA operation the next in queue is submitted and a tasklet -triggered. The tasklet would then call the client driver completion callback -routine for notification, if set. -Interface: -void dma_async_issue_pending(struct dma_chan *chan); - -============================================================================== - -Additional usage notes for dma driver writers -1/ Although DMA engine specifies that completion callback routines cannot submit -any new operations, but typically for slave DMA subsequent transaction may not -be available for submit prior to callback routine being called. This requirement -is not a requirement for DMA-slave devices. But they should take care to drop -the spin-lock they might be holding before calling the callback routine + The peripheral driver is expected to have mapped the scatterlist for + the DMA operation prior to calling device_prep_slave_sg, and must + keep the scatterlist mapped until the DMA operation has completed. + The scatterlist must be mapped using the DMA struct device. So, + normal setup should look like this: + + nr_sg = dma_map_sg(chan->device->dev, sgl, sg_len); + if (nr_sg == 0) + /* error */ + + desc = chan->device->device_prep_slave_sg(chan, sgl, nr_sg, + direction, flags); + + Once a descriptor has been obtained, the callback information can be + added and the descriptor must then be submitted. Some DMA engine + drivers may hold a spinlock between a successful preparation and + submission so it is important that these two operations are closely + paired. + + Note: + Although the async_tx API specifies that completion callback + routines cannot submit any new operations, this is not the + case for slave/cyclic DMA. + + For slave DMA, the subsequent transaction may not be available + for submission prior to callback function being invoked, so + slave DMA callbacks are permitted to prepare and submit a new + transaction. + + For cyclic DMA, a callback function may wish to terminate the + DMA via dmaengine_terminate_all(). + + Therefore, it is important that DMA engine drivers drop any + locks before calling the callback function which may cause a + deadlock. + + Note that callbacks will always be invoked from the DMA + engines tasklet, never from interrupt context. + +4. Submit the transaction + + Once the descriptor has been prepared and the callback information + added, it must be placed on the DMA engine drivers pending queue. + + Interface: + dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc) + + This returns a cookie can be used to check the progress of DMA engine + activity via other DMA engine calls not covered in this document. + + dmaengine_submit() will not start the DMA operation, it merely adds + it to the pending queue. For this, see step 5, dma_async_issue_pending. + +5. Issue pending DMA requests and wait for callback notification + + The transactions in the pending queue can be activated by calling the + issue_pending API. If channel is idle then the first transaction in + queue is started and subsequent ones queued up. + + On completion of each DMA operation, the next in queue is started and + a tasklet triggered. The tasklet will then call the client driver + completion callback routine for notification, if set. + + Interface: + void dma_async_issue_pending(struct dma_chan *chan); + +Further APIs: + +1. int dmaengine_terminate_all(struct dma_chan *chan) + + This causes all activity for the DMA channel to be stopped, and may + discard data in the DMA FIFO which hasn't been fully transferred. + No callback functions will be called for any incomplete transfers. + +2. int dmaengine_pause(struct dma_chan *chan) + + This pauses activity on the DMA channel without data loss. + +3. int dmaengine_resume(struct dma_chan *chan) + + Resume a previously paused DMA channel. It is invalid to resume a + channel which is not currently paused. + +4. enum dma_status dma_async_is_tx_complete(struct dma_chan *chan, + dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used) + + This can be used to check the status of the channel. Please see + the documentation in include/linux/dmaengine.h for a more complete + description of this API. + + This can be used in conjunction with dma_async_is_complete() and + the cookie returned from 'descriptor->submit()' to check for + completion of a specific DMA transaction. + + Note: + Not all DMA engine drivers can return reliable information for + a running DMA channel. It is recommended that DMA engine users + pause or stop (via dmaengine_terminate_all) the channel before + using this API. -- cgit v1.1 From 4a28a3fa8c37193b105807193b3695048f0050a4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 27 Jul 2011 12:58:17 +0300 Subject: iscsi-target: Fix NULL dereference on allocation failure This patch fixes a bug in iscsi_target_init_negotiation() where the "goto out" path dereferences "login" which is NULL upon a memory allocation failure. Signed-off-by: Dan Carpenter Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_nego.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 713a4d2..4d087ac 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -978,7 +978,7 @@ struct iscsi_login *iscsi_target_init_negotiation( pr_err("Unable to allocate memory for struct iscsi_login.\n"); iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); - goto out; + return NULL; } login->req = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL); -- cgit v1.1 From 8f50c7f5d63b0e4a29f6f96e8cfaca782e0c458d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 27 Jul 2011 14:11:43 +0300 Subject: iscsi-target: strlen() doesn't count the terminator This patch fixes an off by one check in iscsit_add_tiqn() because the NULL terminator isn't taken into consideration. Signed-off-by: Dan Carpenter Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 14c81c4..396c79d 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -120,7 +120,7 @@ struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf) struct iscsi_tiqn *tiqn = NULL; int ret; - if (strlen(buf) > ISCSI_IQN_LEN) { + if (strlen(buf) >= ISCSI_IQN_LEN) { pr_err("Target IQN exceeds %d bytes\n", ISCSI_IQN_LEN); return ERR_PTR(-EINVAL); -- cgit v1.1 From 76f1928e8831961a692459924cfac0ca1b0d303a Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 27 Jul 2011 12:16:22 -0700 Subject: iscsi-target: Fix uninitialized usage of cmd->pad_bytes This patch fixes an uninitialized usage of cmd->pad_bytes inside of iscsit_handle_text_cmd() introduced during a v4.1 change to use cmd members instead of local pad_bytes variables. Reported-by: Dan Carpenter Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 396c79d..3ab8abc 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1857,7 +1857,7 @@ static int iscsit_handle_text_cmd( char *text_ptr, *text_in; int cmdsn_ret, niov = 0, rx_got, rx_size; u32 checksum = 0, data_crc = 0, payload_length; - u32 padding = 0, text_length = 0; + u32 padding = 0, pad_bytes = 0, text_length = 0; struct iscsi_cmd *cmd; struct kvec iov[3]; struct iscsi_text *hdr; @@ -1896,7 +1896,7 @@ static int iscsit_handle_text_cmd( padding = ((-payload_length) & 3); if (padding != 0) { - iov[niov].iov_base = cmd->pad_bytes; + iov[niov].iov_base = &pad_bytes; iov[niov++].iov_len = padding; rx_size += padding; pr_debug("Receiving %u additional bytes" @@ -1917,7 +1917,7 @@ static int iscsit_handle_text_cmd( if (conn->conn_ops->DataDigest) { iscsit_do_crypto_hash_buf(&conn->conn_rx_hash, text_in, text_length, - padding, cmd->pad_bytes, + padding, (u8 *)&pad_bytes, (u8 *)&data_crc); if (checksum != data_crc) { -- cgit v1.1 From 7bbb654e9c77a8692425dbd2c704ad245b86ec8a Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 27 Jul 2011 12:37:03 -0700 Subject: iscsi-target: Fix snprintf usage with MAX_PORTAL_LEN This patch makes lio_target_call_addnptotpg() use sprintf() with MAX_PORTAL_LEN + 1 to address the following smatch warning: drivers/target/iscsi/iscsi_target_configfs.c +184 lio_target_call_addnptotpg(21) error: snprintf() chops off the last chars of 'name': 257 vs 256 Reported-by: Dan Carpenter Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_configfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 32bb92c..f095e65 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -181,7 +181,7 @@ struct se_tpg_np *lio_target_call_addnptotpg( return ERR_PTR(-EOVERFLOW); } memset(buf, 0, MAX_PORTAL_LEN + 1); - snprintf(buf, MAX_PORTAL_LEN, "%s", name); + snprintf(buf, MAX_PORTAL_LEN + 1, "%s", name); memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage)); -- cgit v1.1 From aadcec010127a20599500c7007fbe9998744be41 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 27 Jul 2011 20:13:22 +0000 Subject: iscsi-target: Fix CONFIG_SMP=n and CONFIG_MODULES=n build failure This patch fixes the following CONFIG_SMP=n and CONFIG_MODULES=n build failure, because iscsit_thread_get_cpumask() is defined as a macro in iscsi_target.c, but needed by iscsi_target_login.c drivers/built-in.o: In function `iscsi_post_login_handler': iscsi_target_login.c:(.text+0x13a315): undefined reference to `iscsit_thread_get_cpumask' iscsi_target_login.c:(.text+0x13a4b4): undefined reference to `iscsit_thread_get_cpumask' make: *** [.tmp_vmlinux1] Error 1 Reported-by: Randy Dunlap Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 3ab8abc..c24fb10 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -3468,7 +3468,12 @@ static inline void iscsit_thread_check_cpumask( } #else -#define iscsit_thread_get_cpumask(X) ({}) + +void iscsit_thread_get_cpumask(struct iscsi_conn *conn) +{ + return; +} + #define iscsit_thread_check_cpumask(X, Y, Z) ({}) #endif /* CONFIG_SMP */ -- cgit v1.1 From b9f5edc25036779bbff1272f8f94860d2d7a483b Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 27 Jul 2011 20:21:15 +0000 Subject: MAINTAINERS: Add target-devel list for drivers/target/ Reported-by: Randy Dunlap Signed-off-by: Nicholas Bellinger --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 41ec646..fcd29e3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6109,6 +6109,7 @@ F: include/linux/sysv_fs.h TARGET SUBSYSTEM M: Nicholas A. Bellinger L: linux-scsi@vger.kernel.org +L: target-devel@vger.kernel.org L: http://groups.google.com/group/linux-iscsi-target-dev W: http://www.linux-iscsi.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/lio-core-2.6.git master -- cgit v1.1 From e6075e984d100c12bb79267639c3f661d9788a67 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 27 Jul 2011 20:40:18 -0400 Subject: ext4: remove lock_buffer in bclean() and setup_new_group_blocks() There is no need to lock the buffers since no one else should be touching these buffers besides the file system. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 5b423f8..65e5cb6 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -147,10 +147,8 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, brelse(bh); bh = ERR_PTR(err); } else { - lock_buffer(bh); memset(bh->b_data, 0, sb->s_blocksize); set_buffer_uptodate(bh); - unlock_buffer(bh); } return bh; @@ -229,10 +227,8 @@ static int setup_new_group_blocks(struct super_block *sb, brelse(gdb); goto exit_journal; } - lock_buffer(gdb); memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); set_buffer_uptodate(gdb); - unlock_buffer(gdb); err = ext4_handle_dirty_metadata(handle, NULL, gdb); if (unlikely(err)) { brelse(gdb); -- cgit v1.1 From 2f919710143cb2025157c3c193ee22de86f3ed73 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 27 Jul 2011 21:16:33 -0400 Subject: ext4: simplify parameters of add_new_gdb() add_new_gdb() only needs the block group number; there is no need to pass a pointer to struct ext4_new_group_data to add_new_gdb(). Instead of filling in a pointer the struct buffer_head in add_new_gdb(), it's simpler to have the caller fetch it from the s_group_desc[] array. [Fixed error path to handle the case where struct buffer_head *primary hasn't been set yet. -- Ted] Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 65e5cb6..9e45355 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -394,15 +394,15 @@ static int verify_reserved_gdb(struct super_block *sb, * fail once we start modifying the data on disk, because JBD has no rollback. */ static int add_new_gdb(handle_t *handle, struct inode *inode, - struct ext4_new_group_data *input, - struct buffer_head **primary) + ext4_group_t group) { struct super_block *sb = inode->i_sb; struct ext4_super_block *es = EXT4_SB(sb)->s_es; - unsigned long gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); + unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb); ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; struct buffer_head **o_group_desc, **n_group_desc; struct buffer_head *dind; + struct buffer_head *gdb_bh; int gdbackups; struct ext4_iloc iloc; __le32 *data; @@ -425,11 +425,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, return -EPERM; } - *primary = sb_bread(sb, gdblock); - if (!*primary) + gdb_bh = sb_bread(sb, gdblock); + if (!gdb_bh) return -EIO; - if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) { + gdbackups = verify_reserved_gdb(sb, gdb_bh); + if (gdbackups < 0) { err = gdbackups; goto exit_bh; } @@ -444,7 +445,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, data = (__le32 *)dind->b_data; if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { ext4_warning(sb, "new group %u GDT block %llu not reserved", - input->group, gdblock); + group, gdblock); err = -EINVAL; goto exit_dind; } @@ -453,7 +454,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, if (unlikely(err)) goto exit_dind; - err = ext4_journal_get_write_access(handle, *primary); + err = ext4_journal_get_write_access(handle, gdb_bh); if (unlikely(err)) goto exit_sbh; @@ -492,8 +493,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, } inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; ext4_mark_iloc_dirty(handle, inode, &iloc); - memset((*primary)->b_data, 0, sb->s_blocksize); - err = ext4_handle_dirty_metadata(handle, NULL, *primary); + memset(gdb_bh->b_data, 0, sb->s_blocksize); + err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); if (unlikely(err)) { ext4_std_error(sb, err); goto exit_inode; @@ -503,7 +504,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, o_group_desc = EXT4_SB(sb)->s_group_desc; memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); - n_group_desc[gdb_num] = *primary; + n_group_desc[gdb_num] = gdb_bh; EXT4_SB(sb)->s_group_desc = n_group_desc; EXT4_SB(sb)->s_gdb_count++; kfree(o_group_desc); @@ -525,7 +526,7 @@ exit_sbh: exit_dind: brelse(dind); exit_bh: - brelse(*primary); + brelse(gdb_bh); ext4_debug("leaving with error %d\n", err); return err; @@ -833,8 +834,16 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (reserved_gdb && ext4_bg_num_gdb(sb, input->group) && (err = reserve_backup_gdb(handle, inode, input))) goto exit_journal; - } else if ((err = add_new_gdb(handle, inode, input, &primary))) - goto exit_journal; + } else { + /* + * Note that we can access new group descriptor block safely + * only if add_new_gdb() succeeds. + */ + err = add_new_gdb(handle, inode, input->group); + if (err) + goto exit_journal; + primary = sbi->s_group_desc[gdb_num]; + } /* * OK, now we've set up the new group. Time to make it active. @@ -944,7 +953,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) exit_journal: if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; - if (!err) { + if (!err && primary) { update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, sizeof(struct ext4_super_block)); update_backups(sb, primary->b_blocknr, primary->b_data, -- cgit v1.1 From 668f4dc5593327fadc95b33189c375f7178ef88e Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Wed, 27 Jul 2011 21:23:13 -0400 Subject: ext4: simplify parameters of reserve_backup_gdb() The reserve_backup_gdb() function only needs the block group number; there's no need to pass a pointer to struct ext4_new_group_data to it. Signed-off-by: Yongqiang Yang --- fs/ext4/resize.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9e45355..6e3327d 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -546,7 +546,7 @@ exit_bh: * backup GDT blocks are stored in their reserved primary GDT block. */ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, - struct ext4_new_group_data *input) + ext4_group_t group) { struct super_block *sb = inode->i_sb; int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); @@ -617,7 +617,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, * Finally we can add each of the reserved backup GDT blocks from * the new group to its reserved primary GDT block. */ - blk = input->group * EXT4_BLOCKS_PER_GROUP(sb); + blk = group * EXT4_BLOCKS_PER_GROUP(sb); for (i = 0; i < reserved_gdb; i++) { int err2; data = (__le32 *)primary[i]->b_data; @@ -831,9 +831,11 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if ((err = ext4_journal_get_write_access(handle, primary))) goto exit_journal; - if (reserved_gdb && ext4_bg_num_gdb(sb, input->group) && - (err = reserve_backup_gdb(handle, inode, input))) - goto exit_journal; + if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) { + err = reserve_backup_gdb(handle, inode, input->group); + if (err) + goto exit_journal; + } } else { /* * Note that we can access new group descriptor block safely -- cgit v1.1 From 0e1147b001793593624e80b3c0a1790822b6baca Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Wed, 27 Jul 2011 21:29:33 -0400 Subject: ext4: add action of moving index in ext4_ext_rm_idx for Punch Hole The old function ext4_ext_rm_idx is used only for truncate case because it just remove last index in extent-index-block. When punching hole, it usually needed to remove "middle" index, therefore we must move indexes which after it forward. (I create a file with 1 depth extent tree and punch hole in the middle of it, the last index in index-block strangly gone, so I find out this bug) Signed-off-by: Robin Dong Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4d73e11..a25bbdc 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2101,8 +2101,6 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, /* * ext4_ext_rm_idx: * removes index from the index block. - * It's used in truncate case only, thus all requests are for - * last index in the block only. */ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) @@ -2120,6 +2118,13 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path); if (err) return err; + + if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) { + int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx; + len *= sizeof(struct ext4_extent_idx); + memmove(path->p_idx, path->p_idx + 1, len); + } + le16_add_cpu(&path->p_hdr->eh_entries, -1); err = ext4_ext_dirty(handle, inode, path); if (err) -- cgit v1.1 From 29ae07b702cb77dbc24b0843f15ee8cf8a642311 Mon Sep 17 00:00:00 2001 From: Utako Kusaka Date: Wed, 27 Jul 2011 22:11:20 -0400 Subject: ext4: Fix overflow caused by missing cast in ext4_fallocate() The logical block number in map.l_blk is a __u32, and so before we shift it left, by the block size, we neeed cast it to a 64-bit size. Otherwise i_size can be corrupted on an ENOSPC. # df -T /mnt/mp1 Filesystem Type 1K-blocks Used Available Use% Mounted on /dev/sda6 ext4 9843276 153056 9190200 2% /mnt/mp1 # fallocate -o 0 -l 2199023251456 /mnt/mp1/testfile fallocate: /mnt/mp1/testfile: fallocate failed: No space left on device # stat /mnt/mp1/testfile File: `/mnt/mp1/testfile' Size: 4293656576 Blocks: 19380440 IO Block: 4096 regular file Device: 806h/2054d Inode: 12 Links: 1 Access: (0644/-rw-r--r--) Uid: ( 0/ root) Gid: ( 0/ root) Access: 2011-07-25 13:01:31.414490496 +0900 Modify: 2011-07-25 13:01:31.414490496 +0900 Change: 2011-07-25 13:01:31.454490495 +0900 Signed-off-by: Utako Kusaka Signed-off-by: "Theodore Ts'o" -- fs/ext4/extents.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a25bbdc..57cf568 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3824,7 +3824,7 @@ retry: blkbits) >> blkbits)) new_size = offset + len; else - new_size = (map.m_lblk + ret) << blkbits; + new_size = ((loff_t) map.m_lblk + ret) << blkbits; ext4_falloc_update_inode(inode, mode, new_size, (map.m_flags & EXT4_MAP_NEW)); -- cgit v1.1 From d6b722aa383a467a43d09ee38e866981abba08ab Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jul 2011 22:21:58 -0400 Subject: hppfs: missing include Signed-off-by: Al Viro --- fs/hppfs/hppfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 8635be5..970ea98 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "os.h" -- cgit v1.1 From 9d108d25487bf958f8093409a4c0bee6169edba6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jul 2011 22:27:33 -0400 Subject: devtmpfs: missing initialialization in never-hit case create_path() on something without a single / in it will return err without initializing it. It actually can't happen (we call that thing only if create on the same path returns -ENOENT, which won't happen happen for single-component path), but in this case initializing err to 0 is more than making compiler to STFU - would be the right thing to return on such paths; the function creates a parent directory of given pathname and in that case it has no work to do... Signed-off-by: Al Viro --- drivers/base/devtmpfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index b89fffc..33e1bed 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -166,7 +166,7 @@ static int create_path(const char *nodepath) { char *path; char *s; - int err; + int err = 0; /* parent directories do not exist, create them */ path = kstrdup(nodepath, GFP_KERNEL); -- cgit v1.1 From 5db0753ba5aabcd9fa298029f03b32b3c96b5a39 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 27 Jul 2011 22:18:52 -0700 Subject: target: Fix WRITE_SAME_16 lba assignment breakage This patch fixes a bug in WRITE_SAME_16 LBA assignment where get_unaligned_be16() is incorrectly being used instead of get_unaligned_be64() for a 64-bit LBA. This was introduced with: commit a1d8b49abd60ba5d09e7c968731abcb0f8f1cbf6 Author: Andy Grover Date: Mon May 2 17:12:10 2011 -0700 target: Updates from AGrover and HCH (round 3) (target: inline struct se_transport_task into struct se_cmd) Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index c75a01a..ff7fcf8 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3324,7 +3324,7 @@ static int transport_generic_cmd_sequencer( goto out_invalid_cdb_field; } - cmd->t_task_lba = get_unaligned_be16(&cdb[2]); + cmd->t_task_lba = get_unaligned_be64(&cdb[2]); passthrough = (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV); /* -- cgit v1.1 From 137a6354305455d585fe99fe5e9949acd895b045 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 25 Jul 2011 22:20:29 +0100 Subject: regulator: Fix WM831x regulator ID lookups for multiple WM831xs With multiple wm831x devices the device IDs used for the regulators will not always be contiguous so simply taking the modulus is not sufficient to look up the ID, we need to reverse the way the ID is generated. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/wm831x-dcdc.c | 16 ++++++++++++++-- drivers/regulator/wm831x-ldo.c | 25 ++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c index a0982e8..0c7a9d5 100644 --- a/drivers/regulator/wm831x-dcdc.c +++ b/drivers/regulator/wm831x-dcdc.c @@ -504,11 +504,17 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev) { struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); struct wm831x_pdata *pdata = wm831x->dev->platform_data; - int id = pdev->id % ARRAY_SIZE(pdata->dcdc); + int id; struct wm831x_dcdc *dcdc; struct resource *res; int ret, irq; + if (pdata && pdata->wm831x_num) + id = (pdata->wm831x_num * 10) + 1; + else + id = 0; + id = pdev->id - id; + dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1); if (pdata == NULL || pdata->dcdc[id] == NULL) @@ -709,11 +715,17 @@ static __devinit int wm831x_buckp_probe(struct platform_device *pdev) { struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); struct wm831x_pdata *pdata = wm831x->dev->platform_data; - int id = pdev->id % ARRAY_SIZE(pdata->dcdc); + int id; struct wm831x_dcdc *dcdc; struct resource *res; int ret, irq; + if (pdata && pdata->wm831x_num) + id = (pdata->wm831x_num * 10) + 1; + else + id = 0; + id = pdev->id - id; + dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1); if (pdata == NULL || pdata->dcdc[id] == NULL) diff --git a/drivers/regulator/wm831x-ldo.c b/drivers/regulator/wm831x-ldo.c index 2220cf8d..6709710 100644 --- a/drivers/regulator/wm831x-ldo.c +++ b/drivers/regulator/wm831x-ldo.c @@ -310,11 +310,17 @@ static __devinit int wm831x_gp_ldo_probe(struct platform_device *pdev) { struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); struct wm831x_pdata *pdata = wm831x->dev->platform_data; - int id = pdev->id % ARRAY_SIZE(pdata->ldo); + int id; struct wm831x_ldo *ldo; struct resource *res; int ret, irq; + if (pdata && pdata->wm831x_num) + id = (pdata->wm831x_num * 10) + 1; + else + id = 0; + id = pdev->id - id; + dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1); if (pdata == NULL || pdata->ldo[id] == NULL) @@ -574,11 +580,17 @@ static __devinit int wm831x_aldo_probe(struct platform_device *pdev) { struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); struct wm831x_pdata *pdata = wm831x->dev->platform_data; - int id = pdev->id % ARRAY_SIZE(pdata->ldo); + int id; struct wm831x_ldo *ldo; struct resource *res; int ret, irq; + if (pdata && pdata->wm831x_num) + id = (pdata->wm831x_num * 10) + 1; + else + id = 0; + id = pdev->id - id; + dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1); if (pdata == NULL || pdata->ldo[id] == NULL) @@ -764,11 +776,18 @@ static __devinit int wm831x_alive_ldo_probe(struct platform_device *pdev) { struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); struct wm831x_pdata *pdata = wm831x->dev->platform_data; - int id = pdev->id % ARRAY_SIZE(pdata->ldo); + int id; struct wm831x_ldo *ldo; struct resource *res; int ret; + if (pdata && pdata->wm831x_num) + id = (pdata->wm831x_num * 10) + 1; + else + id = 0; + id = pdev->id - id; + + dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1); if (pdata == NULL || pdata->ldo[id] == NULL) -- cgit v1.1 From a1b81dd3ff2c622d0f4e3954bf9b5dd47a0f13a0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 25 Jul 2011 22:20:30 +0100 Subject: regulator: Fix WM831x DCDC DVS VSEL bootstrapping Read our initial VSEL from the correct register. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/wm831x-dcdc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c index 0c7a9d5..8a4fdc3 100644 --- a/drivers/regulator/wm831x-dcdc.c +++ b/drivers/regulator/wm831x-dcdc.c @@ -551,7 +551,7 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev) } dcdc->on_vsel = ret & WM831X_DC1_ON_VSEL_MASK; - ret = wm831x_reg_read(wm831x, dcdc->base + WM831X_DCDC_ON_CONFIG); + ret = wm831x_reg_read(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL); if (ret < 0) { dev_err(wm831x->dev, "Failed to read DVS VSEL: %d\n", ret); goto err; -- cgit v1.1 From 24b4315051ef2b9155d23ccbad528daab3b65eb6 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 25 Jul 2011 22:20:31 +0100 Subject: regulator: Add EPEs to the MODULE_ALIAS() for wm831x-dcdc Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/wm831x-dcdc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c index 8a4fdc3..2ee4823 100644 --- a/drivers/regulator/wm831x-dcdc.c +++ b/drivers/regulator/wm831x-dcdc.c @@ -1058,3 +1058,4 @@ MODULE_DESCRIPTION("WM831x DC-DC convertor driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:wm831x-buckv"); MODULE_ALIAS("platform:wm831x-buckp"); +MODULE_ALIAS("platform:wm831x-epe"); -- cgit v1.1 From b47ba9fdd336b318a6a6431e6a4556df99272277 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 25 Jul 2011 22:20:32 +0100 Subject: regulator: Set up GPIO for WM831x VSEL before enabling VSEL mode If the VSEL is not in use prior to us starting up then we need to make sure we initialise the GPIO before we push the DVS control to being done by the GPIO. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/wm831x-dcdc.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c index 2ee4823..95249f7 100644 --- a/drivers/regulator/wm831x-dcdc.c +++ b/drivers/regulator/wm831x-dcdc.c @@ -456,27 +456,6 @@ static __devinit void wm831x_buckv_dvs_init(struct wm831x_dcdc *dcdc, if (!pdata || !pdata->dvs_gpio) return; - switch (pdata->dvs_control_src) { - case 1: - ctrl = 2 << WM831X_DC1_DVS_SRC_SHIFT; - break; - case 2: - ctrl = 3 << WM831X_DC1_DVS_SRC_SHIFT; - break; - default: - dev_err(wm831x->dev, "Invalid DVS control source %d for %s\n", - pdata->dvs_control_src, dcdc->name); - return; - } - - ret = wm831x_set_bits(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL, - WM831X_DC1_DVS_SRC_MASK, ctrl); - if (ret < 0) { - dev_err(wm831x->dev, "Failed to set %s DVS source: %d\n", - dcdc->name, ret); - return; - } - ret = gpio_request(pdata->dvs_gpio, "DCDC DVS"); if (ret < 0) { dev_err(wm831x->dev, "Failed to get %s DVS GPIO: %d\n", @@ -498,6 +477,26 @@ static __devinit void wm831x_buckv_dvs_init(struct wm831x_dcdc *dcdc, } dcdc->dvs_gpio = pdata->dvs_gpio; + + switch (pdata->dvs_control_src) { + case 1: + ctrl = 2 << WM831X_DC1_DVS_SRC_SHIFT; + break; + case 2: + ctrl = 3 << WM831X_DC1_DVS_SRC_SHIFT; + break; + default: + dev_err(wm831x->dev, "Invalid DVS control source %d for %s\n", + pdata->dvs_control_src, dcdc->name); + return; + } + + ret = wm831x_set_bits(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL, + WM831X_DC1_DVS_SRC_MASK, ctrl); + if (ret < 0) { + dev_err(wm831x->dev, "Failed to set %s DVS source: %d\n", + dcdc->name, ret); + } } static __devinit int wm831x_buckv_probe(struct platform_device *pdev) -- cgit v1.1 From c439b8f46ee79147139e124621dbc9e1e7804655 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 25 Jul 2011 22:20:33 +0100 Subject: regulator: Bootstrap wm831x DVS VSEL value from ON VSEL if not already set If we don't have a DVS VSEL value already set when we start up then start it off with the value currently being used for ON. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/wm831x-dcdc.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c index 95249f7..2c5d54b 100644 --- a/drivers/regulator/wm831x-dcdc.c +++ b/drivers/regulator/wm831x-dcdc.c @@ -491,6 +491,20 @@ static __devinit void wm831x_buckv_dvs_init(struct wm831x_dcdc *dcdc, return; } + /* If DVS_VSEL is set to the minimum value then raise it to ON_VSEL + * to make bootstrapping a bit smoother. + */ + if (!dcdc->dvs_vsel) { + ret = wm831x_set_bits(wm831x, + dcdc->base + WM831X_DCDC_DVS_CONTROL, + WM831X_DC1_DVS_VSEL_MASK, dcdc->on_vsel); + if (ret == 0) + dcdc->dvs_vsel = dcdc->on_vsel; + else + dev_warn(wm831x->dev, "Failed to set DVS_VSEL: %d\n", + ret); + } + ret = wm831x_set_bits(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL, WM831X_DC1_DVS_SRC_MASK, ctrl); if (ret < 0) { -- cgit v1.1 From 88cda60e512373ca18a663ee66dc2550800223eb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 25 Jul 2011 22:20:34 +0100 Subject: regulator: Improve WM831x DVS VSEL selection algorithm Rather than using the maximum voltage we get passed to select the DVS voltage to use remember the highest voltage we've ever seen. This improves how the driver works when the consumer permits higher voltages than it will ever selects in order to support the widest possible voltage range. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood --- drivers/regulator/wm831x-dcdc.c | 52 ++++++++++++----------------------------- 1 file changed, 15 insertions(+), 37 deletions(-) diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c index 2c5d54b..bd3531d 100644 --- a/drivers/regulator/wm831x-dcdc.c +++ b/drivers/regulator/wm831x-dcdc.c @@ -267,23 +267,6 @@ static int wm831x_buckv_select_min_voltage(struct regulator_dev *rdev, return vsel; } -static int wm831x_buckv_select_max_voltage(struct regulator_dev *rdev, - int min_uV, int max_uV) -{ - u16 vsel; - - if (max_uV < 600000 || max_uV > 1800000) - return -EINVAL; - - vsel = ((max_uV - 600000) / 12500) + 8; - - if (wm831x_buckv_list_voltage(rdev, vsel) < min_uV || - wm831x_buckv_list_voltage(rdev, vsel) < max_uV) - return -EINVAL; - - return vsel; -} - static int wm831x_buckv_set_dvs(struct regulator_dev *rdev, int state) { struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev); @@ -338,28 +321,23 @@ static int wm831x_buckv_set_voltage(struct regulator_dev *rdev, if (ret < 0) return ret; - /* Set the high voltage as the DVS voltage. This is optimised - * for CPUfreq usage, most processors will keep the maximum - * voltage constant and lower the minimum with the frequency. */ - vsel = wm831x_buckv_select_max_voltage(rdev, min_uV, max_uV); - if (vsel < 0) { - /* This should never happen - at worst the same vsel - * should be chosen */ - WARN_ON(vsel < 0); - return 0; + /* + * If this VSEL is higher than the last one we've seen then + * remember it as the DVS VSEL. This is optimised for CPUfreq + * usage where we want to get to the highest voltage very + * quickly. + */ + if (vsel > dcdc->dvs_vsel) { + ret = wm831x_set_bits(wm831x, dvs_reg, + WM831X_DC1_DVS_VSEL_MASK, + dcdc->dvs_vsel); + if (ret == 0) + dcdc->dvs_vsel = vsel; + else + dev_warn(wm831x->dev, + "Failed to set DCDC DVS VSEL: %d\n", ret); } - /* Don't bother if it's the same VSEL we're already using */ - if (vsel == dcdc->on_vsel) - return 0; - - ret = wm831x_set_bits(wm831x, dvs_reg, WM831X_DC1_DVS_VSEL_MASK, vsel); - if (ret == 0) - dcdc->dvs_vsel = vsel; - else - dev_warn(wm831x->dev, "Failed to set DCDC DVS VSEL: %d\n", - ret); - return 0; } -- cgit v1.1 From ca9380fd68514c7bc952282c1b4fc70607e9fe43 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 28 Jul 2011 14:46:05 +0200 Subject: ALSA: sound/core/pcm_compat.c: adjust array index Convert array index from the loop bound to the loop index. A simplified version of the semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression e1,e2,ar; @@ for(e1 = 0; e1 < e2; e1++) { <... ar[ - e2 + e1 ] ...> } // Signed-off-by: Julia Lawall Cc: Signed-off-by: Takashi Iwai --- sound/core/pcm_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index 5fb2e28..91cdf94 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -342,7 +342,7 @@ static int snd_pcm_ioctl_xfern_compat(struct snd_pcm_substream *substream, kfree(bufs); return -EFAULT; } - bufs[ch] = compat_ptr(ptr); + bufs[i] = compat_ptr(ptr); bufptr++; } if (dir == SNDRV_PCM_STREAM_PLAYBACK) -- cgit v1.1 From d52a10d003694e48d28cf0218db16372ed61f1bd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 28 Jul 2011 09:48:19 -0700 Subject: target: iscsi_target depends on NET iscsi target code uses lots on network interface functions, so it should depend on NET. Fixes many build errors when NET is not enabled: ERROR: "kernel_sendmsg" [drivers/target/iscsi/iscsi_target_mod.ko] undefined! ERROR: "in_aton" [drivers/target/iscsi/iscsi_target_mod.ko] undefined! ERROR: "sock_release" [drivers/target/iscsi/iscsi_target_mod.ko] undefined! ERROR: "kernel_listen" [drivers/target/iscsi/iscsi_target_mod.ko] undefined! ERROR: "kernel_setsockopt" [drivers/target/iscsi/iscsi_target_mod.ko] undefined! ERROR: "kernel_recvmsg" [drivers/target/iscsi/iscsi_target_mod.ko] undefined! ERROR: "kernel_accept" [drivers/target/iscsi/iscsi_target_mod.ko] undefined! ERROR: "sock_create" [drivers/target/iscsi/iscsi_target_mod.ko] undefined! ERROR: "kernel_bind" [drivers/target/iscsi/iscsi_target_mod.ko] undefined! ERROR: "in6_pton" [drivers/target/iscsi/iscsi_target_mod.ko] undefined! Signed-off-by: Randy Dunlap Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig index 564ff4e..8345fb4 100644 --- a/drivers/target/iscsi/Kconfig +++ b/drivers/target/iscsi/Kconfig @@ -1,5 +1,6 @@ config ISCSI_TARGET tristate "Linux-iSCSI.org iSCSI Target Mode Stack" + depends on NET select CRYPTO select CRYPTO_CRC32C select CRYPTO_CRC32C_INTEL if X86 -- cgit v1.1 From 40ee3381dd1010432acc13e907329029096c5bfc Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 28 Jul 2011 15:31:19 -0700 Subject: drm/i915: Fixup for 'Hold mode_config->mutex during hotplug' drm_helper_hpd_irq_event queues another work proc to go and deliver the user-space event, and that function also wants to hold the config mutex, so we shouldn't hold the mutex across the drm_helper_hpd_irq_event call. Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/i915_irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 9da2a2c..9b1d669 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -313,10 +313,10 @@ static void i915_hotplug_work_func(struct work_struct *work) if (encoder->hot_plug) encoder->hot_plug(encoder); + mutex_unlock(&mode_config->mutex); + /* Just fire off a uevent and let userspace tell us what to do */ drm_helper_hpd_irq_event(dev); - - mutex_unlock(&mode_config->mutex); } static void i915_handle_rps_change(struct drm_device *dev) -- cgit v1.1 From 92fd8fd13b7570f6a8ba519c4e8ec98f10a86ce9 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 25 Jul 2011 19:50:10 -0700 Subject: drm/i915: Use dp_detect_common in hotplug helper function This uses the common dpcd reading routine, i915_dp_detect_common, instead of open-coding a call to intel_dp_aux_native_read. Besides reducing duplicated code, this also gains the read retries which may be necessary when a cable is first plugged back in and the link needs to be retrained. Signed-off-by: Keith Packard Reviewed-by: Jesse Barnes Reviewed-by: Adam Jackson --- drivers/gpu/drm/i915/intel_dp.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index dcc7ae6..45db8106 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1567,6 +1567,20 @@ intel_dp_link_down(struct intel_dp *intel_dp) POSTING_READ(intel_dp->output_reg); } +static enum drm_connector_status +i915_dp_detect_common(struct intel_dp *intel_dp) +{ + enum drm_connector_status status = connector_status_disconnected; + + if (intel_dp_aux_native_read_retry(intel_dp, 0x000, intel_dp->dpcd, + sizeof (intel_dp->dpcd)) && + (intel_dp->dpcd[DP_DPCD_REV] != 0)) { + status = connector_status_connected; + } + + return status; +} + /* * According to DP spec * 5.1.2: @@ -1579,45 +1593,30 @@ intel_dp_link_down(struct intel_dp *intel_dp) static void intel_dp_check_link_status(struct intel_dp *intel_dp) { - int ret; - if (!intel_dp->base.base.crtc) return; + /* Try to read receiver status if the link appears to be up */ if (!intel_dp_get_link_status(intel_dp)) { intel_dp_link_down(intel_dp); return; } - /* Try to read receiver status if the link appears to be up */ - ret = intel_dp_aux_native_read(intel_dp, - 0x000, intel_dp->dpcd, - sizeof (intel_dp->dpcd)); - if (ret != sizeof(intel_dp->dpcd)) { + /* Now read the DPCD to see if it's actually running */ + if (i915_dp_detect_common(intel_dp) != connector_status_connected) { intel_dp_link_down(intel_dp); return; } if (!intel_channel_eq_ok(intel_dp)) { + DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n", + drm_get_encoder_name(&intel_dp->base.base)); intel_dp_start_link_train(intel_dp); intel_dp_complete_link_train(intel_dp); } } static enum drm_connector_status -i915_dp_detect_common(struct intel_dp *intel_dp) -{ - enum drm_connector_status status = connector_status_disconnected; - - if (intel_dp_aux_native_read_retry(intel_dp, 0x000, intel_dp->dpcd, - sizeof (intel_dp->dpcd)) && - (intel_dp->dpcd[DP_DPCD_REV] != 0)) - status = connector_status_connected; - - return status; -} - -static enum drm_connector_status ironlake_dp_detect(struct intel_dp *intel_dp) { enum drm_connector_status status; -- cgit v1.1 From 26d61aad7a46115628341e9eb95433f30efef21a Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 25 Jul 2011 20:01:09 -0700 Subject: drm/i915: Rename i915_dp_detect_common to intel_dp_get_dpcd This describes the function better, allowing it to be used where the DPCD value is relevant. Signed-off-by: Keith Packard Reviewed-by: Jesse Barnes Reviewed-by: Adam Jackson --- drivers/gpu/drm/i915/intel_dp.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 45db8106..41674e1 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1567,18 +1567,16 @@ intel_dp_link_down(struct intel_dp *intel_dp) POSTING_READ(intel_dp->output_reg); } -static enum drm_connector_status -i915_dp_detect_common(struct intel_dp *intel_dp) +static bool +intel_dp_get_dpcd(struct intel_dp *intel_dp) { - enum drm_connector_status status = connector_status_disconnected; - if (intel_dp_aux_native_read_retry(intel_dp, 0x000, intel_dp->dpcd, sizeof (intel_dp->dpcd)) && (intel_dp->dpcd[DP_DPCD_REV] != 0)) { - status = connector_status_connected; + return true; } - return status; + return false; } /* @@ -1603,7 +1601,7 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) } /* Now read the DPCD to see if it's actually running */ - if (i915_dp_detect_common(intel_dp) != connector_status_connected) { + if (!intel_dp_get_dpcd(intel_dp)) { intel_dp_link_down(intel_dp); return; } @@ -1617,6 +1615,14 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) } static enum drm_connector_status +intel_dp_detect_dpcd(struct intel_dp *intel_dp) +{ + if (intel_dp_get_dpcd(intel_dp)) + return connector_status_connected; + return connector_status_disconnected; +} + +static enum drm_connector_status ironlake_dp_detect(struct intel_dp *intel_dp) { enum drm_connector_status status; @@ -1629,7 +1635,7 @@ ironlake_dp_detect(struct intel_dp *intel_dp) return status; } - return i915_dp_detect_common(intel_dp); + return intel_dp_detect_dpcd(intel_dp); } static enum drm_connector_status @@ -1658,7 +1664,7 @@ g4x_dp_detect(struct intel_dp *intel_dp) if ((temp & bit) == 0) return connector_status_disconnected; - return i915_dp_detect_common(intel_dp); + return intel_dp_detect_dpcd(intel_dp); } /** -- cgit v1.1 From 59f3e272d788305e16098f0b18309919c9216d67 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 25 Jul 2011 20:01:56 -0700 Subject: drm/i915: In intel_dp_init, replace read of DPCD with intel_dp_get_dpcd Eliminates an open-coded read and also gains the retry behaviour of intel_dp_get_dpcd, which seems like a good idea. Signed-off-by: Keith Packard Reviewed-by: Jesse Barnes Reviewed-by: Adam Jackson --- drivers/gpu/drm/i915/intel_dp.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 41674e1..9f134d2 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2015,7 +2015,7 @@ intel_dp_init(struct drm_device *dev, int output_reg) /* Cache some DPCD data in the eDP case */ if (is_edp(intel_dp)) { - int ret; + bool ret; u32 pp_on, pp_div; pp_on = I915_READ(PCH_PP_ON_DELAYS); @@ -2028,11 +2028,9 @@ intel_dp_init(struct drm_device *dev, int output_reg) dev_priv->panel_t12 *= 100; /* t12 in 100ms units */ ironlake_edp_panel_vdd_on(intel_dp); - ret = intel_dp_aux_native_read(intel_dp, DP_DPCD_REV, - intel_dp->dpcd, - sizeof(intel_dp->dpcd)); + ret = intel_dp_get_dpcd(intel_dp); ironlake_edp_panel_vdd_off(intel_dp); - if (ret == sizeof(intel_dp->dpcd)) { + if (ret) { if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11) dev_priv->no_aux_handshake = intel_dp->dpcd[DP_MAX_DOWNSPREAD] & -- cgit v1.1 From f0575e92974d328e8816ed89704c985a7d7d90ac Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 25 Jul 2011 22:12:43 -0700 Subject: drm/i915: DP_PIPE_ENABLED must check transcoder on CPT Display port pipe selection on CPT is not done with a bit in the output register, rather it is controlled by a couple of bits in the separate transcoder register which indicate which display port output is connected to the transcoder. This patch replaces the simplistic macro DP_PIPE_ENABLED with the rather more complicated function dp_pipe_enabled which checks the output register to see if that is enabled, and then goes on to either check the output register pipe selection bit (on non-CPT) or the transcoder DP selection bits (on CPT). Before this patch, any time the mode of pipe A was changed, any display port outputs on pipe B would get disabled as intel_disable_pch_ports would ensure that the mode setting operation could occur on pipe A without interference from other outputs connected to that pch port Signed-off-by: Keith Packard Reviewed-by: Jesse Barnes Reviewed-by: Adam Jackson --- drivers/gpu/drm/i915/i915_reg.h | 3 --- drivers/gpu/drm/i915/intel_display.c | 45 +++++++++++++++++++++++++++--------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5d5def7..f731565 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2083,9 +2083,6 @@ #define DP_PIPEB_SELECT (1 << 30) #define DP_PIPE_MASK (1 << 30) -#define DP_PIPE_ENABLED(V, P) \ - (((V) & (DP_PIPE_MASK | DP_PORT_EN)) == ((P) << 30 | DP_PORT_EN)) - /* Link training mode - select a suitable mode for each stage */ #define DP_LINK_TRAIN_PAT_1 (0 << 28) #define DP_LINK_TRAIN_PAT_2 (1 << 28) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5609c06..fc26cb4c9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -979,11 +979,29 @@ static void assert_transcoder_disabled(struct drm_i915_private *dev_priv, pipe_name(pipe)); } +static bool dp_pipe_enabled(struct drm_i915_private *dev_priv, enum pipe pipe, + int reg, u32 port_sel, u32 val) +{ + if ((val & DP_PORT_EN) == 0) + return false; + + if (HAS_PCH_CPT(dev_priv->dev)) { + u32 trans_dp_ctl_reg = TRANS_DP_CTL(pipe); + u32 trans_dp_ctl = I915_READ(trans_dp_ctl_reg); + if ((trans_dp_ctl & TRANS_DP_PORT_SEL_MASK) != port_sel) + return false; + } else { + if ((val & DP_PIPE_MASK) != (pipe << 30)) + return false; + } + return true; +} + static void assert_pch_dp_disabled(struct drm_i915_private *dev_priv, - enum pipe pipe, int reg) + enum pipe pipe, int reg, u32 port_sel) { u32 val = I915_READ(reg); - WARN(DP_PIPE_ENABLED(val, pipe), + WARN(dp_pipe_enabled(dev_priv, pipe, reg, port_sel, val), "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n", reg, pipe_name(pipe)); } @@ -1003,9 +1021,9 @@ static void assert_pch_ports_disabled(struct drm_i915_private *dev_priv, int reg; u32 val; - assert_pch_dp_disabled(dev_priv, pipe, PCH_DP_B); - assert_pch_dp_disabled(dev_priv, pipe, PCH_DP_C); - assert_pch_dp_disabled(dev_priv, pipe, PCH_DP_D); + assert_pch_dp_disabled(dev_priv, pipe, PCH_DP_B, TRANS_DP_PORT_SEL_B); + assert_pch_dp_disabled(dev_priv, pipe, PCH_DP_C, TRANS_DP_PORT_SEL_C); + assert_pch_dp_disabled(dev_priv, pipe, PCH_DP_D, TRANS_DP_PORT_SEL_D); reg = PCH_ADPA; val = I915_READ(reg); @@ -1334,19 +1352,24 @@ static void intel_disable_plane(struct drm_i915_private *dev_priv, } static void disable_pch_dp(struct drm_i915_private *dev_priv, - enum pipe pipe, int reg) + enum pipe pipe, int reg, u32 port_sel) { u32 val = I915_READ(reg); - if (DP_PIPE_ENABLED(val, pipe)) + if (dp_pipe_enabled(dev_priv, pipe, reg, port_sel, val)) { + DRM_DEBUG_KMS("Disabling pch dp %x on pipe %d\n", reg, pipe); I915_WRITE(reg, val & ~DP_PORT_EN); + } } static void disable_pch_hdmi(struct drm_i915_private *dev_priv, enum pipe pipe, int reg) { u32 val = I915_READ(reg); - if (HDMI_PIPE_ENABLED(val, pipe)) + if (HDMI_PIPE_ENABLED(val, pipe)) { + DRM_DEBUG_KMS("Disabling pch HDMI %x on pipe %d\n", + reg, pipe); I915_WRITE(reg, val & ~PORT_ENABLE); + } } /* Disable any ports connected to this transcoder */ @@ -1358,9 +1381,9 @@ static void intel_disable_pch_ports(struct drm_i915_private *dev_priv, val = I915_READ(PCH_PP_CONTROL); I915_WRITE(PCH_PP_CONTROL, val | PANEL_UNLOCK_REGS); - disable_pch_dp(dev_priv, pipe, PCH_DP_B); - disable_pch_dp(dev_priv, pipe, PCH_DP_C); - disable_pch_dp(dev_priv, pipe, PCH_DP_D); + disable_pch_dp(dev_priv, pipe, PCH_DP_B, TRANS_DP_PORT_SEL_B); + disable_pch_dp(dev_priv, pipe, PCH_DP_C, TRANS_DP_PORT_SEL_C); + disable_pch_dp(dev_priv, pipe, PCH_DP_D, TRANS_DP_PORT_SEL_D); reg = PCH_ADPA; val = I915_READ(reg); -- cgit v1.1 From d2b996ac698aebb28557355857927b8b934bb4f9 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 25 Jul 2011 22:37:51 -0700 Subject: Revert and fix "drm/i915/dp: remove DPMS mode tracking from DP" This reverts commit 885a50147f00a8a80108904bf58a18af357717f3. We actually *do* need to track DPMS state so that on hotplug, we don't retrain the link until DPMS is disabled. However, that code had avery small bug -- it wouldn't set the dpms_mode at mode set time, and so link retraining would not actually occur on monitor hotplug until the monitor had gone through a DPMS off/DPMS on cycle. Signed-off-by: Keith Packard Tested-by: Andrew Lutomirski --- drivers/gpu/drm/i915/intel_dp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 9f134d2..4493641 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -50,6 +50,7 @@ struct intel_dp { bool has_audio; int force_audio; uint32_t color_range; + int dpms_mode; uint8_t link_bw; uint8_t lane_count; uint8_t dpcd[8]; @@ -1011,6 +1012,8 @@ static void intel_dp_commit(struct drm_encoder *encoder) if (is_edp(intel_dp)) ironlake_edp_backlight_on(dev); + + intel_dp->dpms_mode = DRM_MODE_DPMS_ON; } static void @@ -1045,6 +1048,7 @@ intel_dp_dpms(struct drm_encoder *encoder, int mode) if (is_edp(intel_dp)) ironlake_edp_backlight_on(dev); } + intel_dp->dpms_mode = mode; } /* @@ -1591,6 +1595,9 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) static void intel_dp_check_link_status(struct intel_dp *intel_dp) { + if (intel_dp->dpms_mode != DRM_MODE_DPMS_ON) + return; + if (!intel_dp->base.base.crtc) return; @@ -1939,6 +1946,7 @@ intel_dp_init(struct drm_device *dev, int output_reg) return; intel_dp->output_reg = output_reg; + intel_dp->dpms_mode = -1; intel_connector = kzalloc(sizeof(struct intel_connector), GFP_KERNEL); if (!intel_connector) { -- cgit v1.1 From 120eced9efe7fdb5123db4ea47e9adee9b66284e Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Wed, 27 Jul 2011 01:21:40 -0700 Subject: drm/i915: Set crtc DPMS mode to ON in intel_crtc_mode_set This corrects the DPMS mode tracking so that the DPMS code will actually turn the CRTC off the next time the screen saves. Signed-off-by: Keith Packard Reviewed-by: Jesse Barnes --- drivers/gpu/drm/i915/intel_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index fc26cb4c9..d9b8c15 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5273,6 +5273,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, drm_vblank_post_modeset(dev, pipe); + intel_crtc->dpms_mode = DRM_MODE_DPMS_ON; + return ret; } -- cgit v1.1 From 3bcf603f6d5d18bd9d076dc280de71f48add4101 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 27 Jul 2011 11:51:40 -0700 Subject: drm/i915: apply timing generator bug workaround on CPT and PPT On CougarPoint and PantherPoint PCH chips, the timing generator may fail to start after DP training completes. This is due to a bug in the FDI autotraining detect logic (which will stall the timing generator and re-enable it once training completes), so disable it to avoid silent DP mode setting failures. Signed-off-by: Jesse Barnes Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/i915_reg.h | 5 +++++ drivers/gpu/drm/i915/intel_display.c | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f731565..00bd510 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3072,6 +3072,11 @@ #define TRANS_6BPC (2<<5) #define TRANS_12BPC (3<<5) +#define _TRANSA_CHICKEN2 0xf0064 +#define _TRANSB_CHICKEN2 0xf1064 +#define TRANS_CHICKEN2(pipe) _PIPE(pipe, _TRANSA_CHICKEN2, _TRANSB_CHICKEN2) +#define TRANS_AUTOTRAIN_GEN_STALL_DIS (1<<31) + #define SOUTH_CHICKEN2 0xc2004 #define DPLS_EDP_PPS_FIX_DIS (1<<0) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index d9b8c15..502efc3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7524,6 +7524,7 @@ static void ibx_init_clock_gating(struct drm_device *dev) static void cpt_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + int pipe; /* * On Ibex Peak and Cougar Point, we need to disable clock @@ -7533,6 +7534,9 @@ static void cpt_init_clock_gating(struct drm_device *dev) I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) | DPLS_EDP_PPS_FIX_DIS); + /* Without this, mode sets may fail silently on FDI */ + for_each_pipe(pipe) + I915_WRITE(TRANS_CHICKEN2(pipe), TRANS_AUTOTRAIN_GEN_STALL_DIS); } static void ironlake_teardown_rc6(struct drm_device *dev) -- cgit v1.1 From 2704cf5fbd248871a745d210733c6319959d2b0c Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 28 Jul 2011 11:52:45 -0700 Subject: drm/i915: flush plane control changes on ILK+ as well After writing to the plane control reg we need to write to the surface reg to trigger the double buffered register latch. On previous chipsets, writing to DSPADDR was enough, but on ILK+ DSPSURF is the reg that triggers the double buffer latch. v2: write DSPADDR too to cover pre-965 chipsets v3: use flush_display_plane instead, that's what it's for v4: send the right patch Signed-off-by: Jesse Barnes Tested-by: Keith Packard Reviewed-by: Keith Packard Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 502efc3..a7a7b67 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1323,8 +1323,8 @@ static void intel_enable_plane(struct drm_i915_private *dev_priv, static void intel_flush_display_plane(struct drm_i915_private *dev_priv, enum plane plane) { - u32 reg = DSPADDR(plane); - I915_WRITE(reg, I915_READ(reg)); + I915_WRITE(DSPADDR(plane), I915_READ(DSPADDR(plane))); + I915_WRITE(DSPSURF(plane), I915_READ(DSPSURF(plane))); } /** -- cgit v1.1 From d74362c9e45689d8d7e3d4bcf6681c4358ef4f2e Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 28 Jul 2011 14:47:14 -0700 Subject: drm/i915: Flush other plane register writes Writes to the plane control register are buffered in the chip until a write to the DSPADDR (pre-965) or DSPSURF (post-965) register occurs. This patch adds flushes in: intel_enable_plane gen6_init_clock_gating ivybridge_init_clock_gating Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_display.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a7a7b67..8f7ed73 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1290,6 +1290,17 @@ static void intel_disable_pipe(struct drm_i915_private *dev_priv, intel_wait_for_pipe_off(dev_priv->dev, pipe); } +/* + * Plane regs are double buffered, going from enabled->disabled needs a + * trigger in order to latch. The display address reg provides this. + */ +static void intel_flush_display_plane(struct drm_i915_private *dev_priv, + enum plane plane) +{ + I915_WRITE(DSPADDR(plane), I915_READ(DSPADDR(plane))); + I915_WRITE(DSPSURF(plane), I915_READ(DSPSURF(plane))); +} + /** * intel_enable_plane - enable a display plane on a given pipe * @dev_priv: i915 private structure @@ -1313,20 +1324,10 @@ static void intel_enable_plane(struct drm_i915_private *dev_priv, return; I915_WRITE(reg, val | DISPLAY_PLANE_ENABLE); + intel_flush_display_plane(dev_priv, plane); intel_wait_for_vblank(dev_priv->dev, pipe); } -/* - * Plane regs are double buffered, going from enabled->disabled needs a - * trigger in order to latch. The display address reg provides this. - */ -static void intel_flush_display_plane(struct drm_i915_private *dev_priv, - enum plane plane) -{ - I915_WRITE(DSPADDR(plane), I915_READ(DSPADDR(plane))); - I915_WRITE(DSPSURF(plane), I915_READ(DSPSURF(plane))); -} - /** * intel_disable_plane - disable a display plane * @dev_priv: i915 private structure @@ -7418,10 +7419,12 @@ static void gen6_init_clock_gating(struct drm_device *dev) ILK_DPARB_CLK_GATE | ILK_DPFD_CLK_GATE); - for_each_pipe(pipe) + for_each_pipe(pipe) { I915_WRITE(DSPCNTR(pipe), I915_READ(DSPCNTR(pipe)) | DISPPLANE_TRICKLE_FEED_DISABLE); + intel_flush_display_plane(dev_priv, pipe); + } } static void ivybridge_init_clock_gating(struct drm_device *dev) @@ -7438,10 +7441,12 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE); - for_each_pipe(pipe) + for_each_pipe(pipe) { I915_WRITE(DSPCNTR(pipe), I915_READ(DSPCNTR(pipe)) | DISPPLANE_TRICKLE_FEED_DISABLE); + intel_flush_display_plane(dev_priv, pipe); + } } static void g4x_init_clock_gating(struct drm_device *dev) -- cgit v1.1 From cb0e093162d7b6589c2217a00e2abfef686b32d6 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 28 Jul 2011 14:50:30 -0700 Subject: drm/i915: fix CB tuning check for ILK+ CB tuning is needed to handle potential process variations that might cause clock jitter for certain PLL settings. However, we were setting it incorrectly since we were using the wrong M value as a check (M1 when we needed to use the whole M value). Fix it up, making my HDMI attached display a little prettier (used to have occasional dots crawl across the display). Signed-off-by: Jesse Barnes Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8f7ed73..32c8c95 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4994,7 +4994,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, } else if (is_sdvo && is_tv) factor = 20; - if (clock.m1 < factor * clock.n) + if (clock.m < factor * clock.n) fp |= FP_CB_TUNE; dpll = 0; -- cgit v1.1 From 4203223a1aed862b4445fdcd260d6139603a51d9 Mon Sep 17 00:00:00 2001 From: Tushar Gohad Date: Thu, 28 Jul 2011 10:36:20 +0000 Subject: xfrm: Fix key lengths for rfc3686(ctr(aes)) Fix the min and max bit lengths for AES-CTR (RFC3686) keys. The number of bits in key spec is the key length (128/256) plus 32 bits of nonce. This change takes care of the "Invalid key length" errors reported by setkey when specifying 288 bit keys for aes-ctr. Signed-off-by: Tushar Gohad Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_algo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 58064d9..791ab2e 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -462,8 +462,8 @@ static struct xfrm_algo_desc ealg_list[] = { .desc = { .sadb_alg_id = SADB_X_EALG_AESCTR, .sadb_alg_ivlen = 8, - .sadb_alg_minbits = 128, - .sadb_alg_maxbits = 256 + .sadb_alg_minbits = 160, + .sadb_alg_maxbits = 288 } }, }; -- cgit v1.1 From fe66101f14813b77d84f6450d51772a2af2b81a1 Mon Sep 17 00:00:00 2001 From: Klement Fish Date: Thu, 28 Jul 2011 06:03:22 +0000 Subject: sis190: Rx filter init is needed for MAC address change. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=34552 Signed-off-by: Klement Fish Acked-by: Francois Romieu Signed-off-by: David S. Miller --- drivers/net/sis190.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index 8ad7bfb..3c0f131 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -1825,6 +1825,16 @@ static int sis190_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) generic_mii_ioctl(&tp->mii_if, if_mii(ifr), cmd, NULL); } +static int sis190_mac_addr(struct net_device *dev, void *p) +{ + int rc; + + rc = eth_mac_addr(dev, p); + if (!rc) + sis190_init_rxfilter(dev); + return rc; +} + static const struct net_device_ops sis190_netdev_ops = { .ndo_open = sis190_open, .ndo_stop = sis190_close, @@ -1833,7 +1843,7 @@ static const struct net_device_ops sis190_netdev_ops = { .ndo_tx_timeout = sis190_tx_timeout, .ndo_set_multicast_list = sis190_set_rx_mode, .ndo_change_mtu = eth_change_mtu, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = sis190_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = sis190_netpoll, -- cgit v1.1 From c173bfac2428bd6c5da5d7cc8942b8b85d0819d4 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 28 Jul 2011 10:54:23 +0000 Subject: MAINTAINERS: orphan FrameRelay DLCI Mike McLagan hasn't contributed in many years and his email bounces. Signed-off-by: Joe Perches Cc: David Miller Cc: Stephen Hemminger Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1d2e79d..a99fed1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2637,9 +2637,8 @@ S: Maintained F: arch/x86/math-emu/ FRAME RELAY DLCI/FRAD (Sangoma drivers too) -M: Mike McLagan L: netdev@vger.kernel.org -S: Maintained +S: Orphan F: drivers/net/wan/dlci.c F: drivers/net/wan/sdla.c -- cgit v1.1 From 93a3aa25933461d76141179fc94aa32d5f9d954a Mon Sep 17 00:00:00 2001 From: Lennart Sorensen Date: Thu, 28 Jul 2011 13:18:11 +0000 Subject: r8169: Add support for D-Link 530T rev C1 (Kernel Bug 38862) The D-Link DGE-530T rev C1 is a re-badged Realtek 8169 named DLG10028C, unlike the previous revisions which were skge based. It is probably the same as the discontinued DGE-528T (0x4300) other than the PCI ID. The PCI ID is 0x1186:0x4302. Adding it to r8169.c where 0x1186:0x4300 is already found makes the card be detected and work. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=38862 Signed-off-by: Len Sorensen Signed-off-by: David S. Miller --- drivers/net/r8169.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 7d9c650..c77286e 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -239,6 +239,7 @@ static DEFINE_PCI_DEVICE_TABLE(rtl8169_pci_tbl) = { { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4300), 0, 0, RTL_CFG_0 }, + { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4302), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_AT, 0xc107), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(0x16ec, 0x0116), 0, 0, RTL_CFG_0 }, { PCI_VENDOR_ID_LINKSYS, 0x1032, -- cgit v1.1 From 177c27bf05d0ea508e65afdbe4b6998c81e46af5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 28 Jul 2011 06:54:36 +0000 Subject: net: fix new sunrpc kernel-doc warning Fix new kernel-doc warning in sunrpc: Warning(net/sunrpc/xprt.c:196): No description found for parameter 'xprt' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- net/sunrpc/xprt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9b6a4d1..f4385e4 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -187,6 +187,7 @@ EXPORT_SYMBOL_GPL(xprt_load_transport); /** * xprt_reserve_xprt - serialize write access to transports * @task: task that is requesting access to the transport + * @xprt: pointer to the target transport * * This prevents mixing the payload of separate requests, and prevents * transport connects from colliding with writes. No congestion control -- cgit v1.1 From 9e8f90dfe58eb19140bc66655170c7aef9eadbf2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 28 Jul 2011 18:26:32 -0700 Subject: proc_fork_connector: a lockless ->real_parent usage is not safe proc_fork_connector() uses ->real_parent lockless. This is not safe if copy_process() was called with CLONE_THREAD or CLONE_PARENT, in this case the parent != current can go away at any moment. Signed-off-by: Oleg Nesterov Cc: Vladimir Zapolskiy Cc: "David S. Miller" Cc: Evgeniy Polyakov Cc: Evgeniy Polyakov Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- drivers/connector/cn_proc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 3ee1fdb..e55814b 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -57,6 +57,7 @@ void proc_fork_connector(struct task_struct *task) struct proc_event *ev; __u8 buffer[CN_PROC_MSG_SIZE]; struct timespec ts; + struct task_struct *parent; if (atomic_read(&proc_event_num_listeners) < 1) return; @@ -67,8 +68,11 @@ void proc_fork_connector(struct task_struct *task) ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); ev->what = PROC_EVENT_FORK; - ev->event_data.fork.parent_pid = task->real_parent->pid; - ev->event_data.fork.parent_tgid = task->real_parent->tgid; + rcu_read_lock(); + parent = rcu_dereference(task->real_parent); + ev->event_data.fork.parent_pid = parent->pid; + ev->event_data.fork.parent_tgid = parent->tgid; + rcu_read_unlock(); ev->event_data.fork.child_pid = task->pid; ev->event_data.fork.child_tgid = task->tgid; -- cgit v1.1 From 700d1ef33ff1d9a582b4a1dc23a130049f239942 Mon Sep 17 00:00:00 2001 From: Adrian Knoth Date: Fri, 29 Jul 2011 03:11:02 +0200 Subject: ALSA: hdspm - Provide MADI speed mode selector on RME MADI and MADIface When running in slave mode (no clock master), there is no way to determine the real wirespeed on the MADI link (single/double/quad speed). Like physical gear, simply provide the user with a tristate switch to select the appropriate format. Signed-off-by: Adrian Knoth Signed-off-by: Takashi Iwai --- sound/pci/rme9652/hdspm.c | 91 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 2 deletions(-) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index af130ee..d219649 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -3415,6 +3415,91 @@ static int snd_hdspm_put_qs_wire(struct snd_kcontrol *kcontrol, return change; } +#define HDSPM_MADI_SPEEDMODE(xname, xindex) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .name = xname, \ + .index = xindex, \ + .info = snd_hdspm_info_madi_speedmode, \ + .get = snd_hdspm_get_madi_speedmode, \ + .put = snd_hdspm_put_madi_speedmode \ +} + +static int hdspm_madi_speedmode(struct hdspm *hdspm) +{ + if (hdspm->control_register & HDSPM_QuadSpeed) + return 2; + if (hdspm->control_register & HDSPM_DoubleSpeed) + return 1; + return 0; +} + +static int hdspm_set_madi_speedmode(struct hdspm *hdspm, int mode) +{ + hdspm->control_register &= ~(HDSPM_DoubleSpeed | HDSPM_QuadSpeed); + switch (mode) { + case 0: + break; + case 1: + hdspm->control_register |= HDSPM_DoubleSpeed; + break; + case 2: + hdspm->control_register |= HDSPM_QuadSpeed; + break; + } + hdspm_write(hdspm, HDSPM_controlRegister, hdspm->control_register); + + return 0; +} + +static int snd_hdspm_info_madi_speedmode(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + static char *texts[] = { "Single", "Double", "Quad" }; + + uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; + uinfo->count = 1; + uinfo->value.enumerated.items = 3; + + if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items) + uinfo->value.enumerated.item = + uinfo->value.enumerated.items - 1; + strcpy(uinfo->value.enumerated.name, + texts[uinfo->value.enumerated.item]); + + return 0; +} + +static int snd_hdspm_get_madi_speedmode(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); + + spin_lock_irq(&hdspm->lock); + ucontrol->value.enumerated.item[0] = hdspm_madi_speedmode(hdspm); + spin_unlock_irq(&hdspm->lock); + return 0; +} + +static int snd_hdspm_put_madi_speedmode(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); + int change; + int val; + + if (!snd_hdspm_use_is_exclusive(hdspm)) + return -EBUSY; + val = ucontrol->value.integer.value[0]; + if (val < 0) + val = 0; + if (val > 2) + val = 2; + spin_lock_irq(&hdspm->lock); + change = val != hdspm_madi_speedmode(hdspm); + hdspm_set_madi_speedmode(hdspm, val); + spin_unlock_irq(&hdspm->lock); + return change; +} #define HDSPM_MIXER(xname, xindex) \ { .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ @@ -4289,7 +4374,8 @@ static struct snd_kcontrol_new snd_hdspm_controls_madi[] = { HDSPM_TX_64("TX 64 channels mode", 0), HDSPM_C_TMS("Clear Track Marker", 0), HDSPM_SAFE_MODE("Safe Mode", 0), - HDSPM_INPUT_SELECT("Input Select", 0) + HDSPM_INPUT_SELECT("Input Select", 0), + HDSPM_MADI_SPEEDMODE("MADI Speed Mode", 0) }; @@ -4302,7 +4388,8 @@ static struct snd_kcontrol_new snd_hdspm_controls_madiface[] = { HDSPM_SYNC_CHECK("MADI SyncCheck", 0), HDSPM_TX_64("TX 64 channels mode", 0), HDSPM_C_TMS("Clear Track Marker", 0), - HDSPM_SAFE_MODE("Safe Mode", 0) + HDSPM_SAFE_MODE("Safe Mode", 0), + HDSPM_MADI_SPEEDMODE("MADI Speed Mode", 0) }; static struct snd_kcontrol_new snd_hdspm_controls_aio[] = { -- cgit v1.1 From d12c51d8299667464e31d545acc4ebb7031d024c Mon Sep 17 00:00:00 2001 From: Adrian Knoth Date: Fri, 29 Jul 2011 03:11:03 +0200 Subject: ALSA: hdspm - Fix reported external sample rate on RME MADI and MADIface In slave mode, the card can only detect the base frequency (32..48kHz) on the MADI link (exception: 96k frames), so the real external sample rate is this base frequency multiplied by 1, 2 or 4 depending on the speed mode. This patch enables 64..192kHz sample rates in clock slave mode, which failed before due to an alleged sample rate mismatch between the MADI link (e.g., 48kHz) and the application in DS/QS mode (e.g., 96kHz, 192kHz). Signed-off-by: Adrian Knoth Signed-off-by: Takashi Iwai --- sound/pci/rme9652/hdspm.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index d219649..88ae274 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -1217,6 +1217,22 @@ static int hdspm_external_sample_rate(struct hdspm *hdspm) rate = 0; break; } + + /* QS and DS rates normally can not be detected + * automatically by the card. Only exception is MADI + * in 96k frame mode. + * + * So if we read SS values (32 .. 48k), check for + * user-provided DS/QS bits in the control register + * and multiply the base frequency accordingly. + */ + if (rate <= 48000) { + if (hdspm->control_register & HDSPM_QuadSpeed) + rate *= 4; + else if (hdspm->control_register & + HDSPM_DoubleSpeed) + rate *= 2; + } } break; } -- cgit v1.1 From 5f8b4d53d7efe00ecb7d96c8cc0b4a44e130f174 Mon Sep 17 00:00:00 2001 From: Adrian Knoth Date: Fri, 29 Jul 2011 03:11:04 +0200 Subject: ALSA: hdspm - Add firmware revision 0xcc for RME MADI Apparently, there are multiple old firmware revisions in the wild for the PCI RME MADI cards. Just add them to the list of supported devices and treat them like their modern counterparts. Signed-off-by: Adrian Knoth Signed-off-by: Takashi Iwai --- sound/pci/rme9652/hdspm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index 88ae274..6edc67c 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -521,6 +521,7 @@ MODULE_SUPPORTED_DEVICE("{{RME HDSPM-MADI}}"); #define HDSPM_DMA_AREA_KILOBYTES (HDSPM_DMA_AREA_BYTES/1024) /* revisions >= 230 indicate AES32 card */ +#define HDSPM_MADI_ANCIENT_REV 204 #define HDSPM_MADI_OLD_REV 207 #define HDSPM_MADI_REV 210 #define HDSPM_RAYDAT_REV 211 @@ -6484,6 +6485,7 @@ static int __devinit snd_hdspm_create(struct snd_card *card, switch (hdspm->firmware_rev) { case HDSPM_MADI_REV: case HDSPM_MADI_OLD_REV: + case HDSPM_MADI_ANCIENT_REV: hdspm->io_type = MADI; hdspm->card_name = "RME MADI"; hdspm->midiPorts = 3; -- cgit v1.1 From 06132fdf63c7d9acc19f136623bf2201f49e73b5 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 28 Jul 2011 12:26:16 +0100 Subject: ASoC: Fix txx9aclc.c build 552d1ef6b5a98d7b95959d5b139071e3c90cebf1 [ASoC: core - Optimise and refactor pcm_new() to pass only rtd] breaks compilation of txx9aclc.c: CC [M] sound/soc/txx9/txx9aclc.o /home/ralf/src/linux/linux-mips/sound/soc/txx9/txx9aclc.c: In function 'txx9aclc_pcm_new': /home/ralf/src/linux/linux-mips/sound/soc/txx9/txx9aclc.c:318:3: error: 'card' undeclared (first use in this function) /home/ralf/src/linux/linux-mips/sound/soc/txx9/txx9aclc.c:318:3: note: each undeclared identifier is reported only once for each function it appears in make[5]: *** [sound/soc/txx9/txx9aclc.o] Error 1 Fixed by providing a definition for card. Signed-off-by: Ralf Baechle Signed-off-by: Takashi Iwai --- sound/soc/txx9/txx9aclc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/txx9/txx9aclc.c b/sound/soc/txx9/txx9aclc.c index 34aa972..3de99af 100644 --- a/sound/soc/txx9/txx9aclc.c +++ b/sound/soc/txx9/txx9aclc.c @@ -290,6 +290,7 @@ static void txx9aclc_pcm_free_dma_buffers(struct snd_pcm *pcm) static int txx9aclc_pcm_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_card *card = rtd->card->snd_card; struct snd_soc_dai *dai = rtd->cpu_dai; struct snd_pcm *pcm = rtd->pcm; struct platform_device *pdev = to_platform_device(dai->platform->dev); -- cgit v1.1 From ac85fe8b21248054851e05bfaa352562e5b06dd3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Jul 2011 23:31:26 -0700 Subject: sparc: Sanitize cpu feature detection and reporting. Instead of evaluating the cpu features for ELF_HWCAP every exec, calculate it once at boot time. Add AV_SPARC_* capability flag bits, compatible with what Solaris reports to applications. Report these capabilities once in the kernel log, and also via /proc/cpuinfo in a new "cpucaps" entry. If available, fetch the cpu features from the machine description 'hwcap-list' property of the 'cpu' node. Signed-off-by: David S. Miller --- arch/sparc/include/asm/elf_64.h | 65 ++++++++---------- arch/sparc/kernel/cpu.c | 1 + arch/sparc/kernel/kernel.h | 6 ++ arch/sparc/kernel/setup_64.c | 149 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 185 insertions(+), 36 deletions(-) diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h index 64f7a00..7df8b7f 100644 --- a/arch/sparc/include/asm/elf_64.h +++ b/arch/sparc/include/asm/elf_64.h @@ -59,15 +59,33 @@ #define R_SPARC_6 45 /* Bits present in AT_HWCAP, primarily for Sparc32. */ - -#define HWCAP_SPARC_FLUSH 1 /* CPU supports flush instruction. */ -#define HWCAP_SPARC_STBAR 2 -#define HWCAP_SPARC_SWAP 4 -#define HWCAP_SPARC_MULDIV 8 -#define HWCAP_SPARC_V9 16 -#define HWCAP_SPARC_ULTRA3 32 -#define HWCAP_SPARC_BLKINIT 64 -#define HWCAP_SPARC_N2 128 +#define HWCAP_SPARC_FLUSH 0x00000001 +#define HWCAP_SPARC_STBAR 0x00000002 +#define HWCAP_SPARC_SWAP 0x00000004 +#define HWCAP_SPARC_MULDIV 0x00000008 +#define HWCAP_SPARC_V9 0x00000010 +#define HWCAP_SPARC_ULTRA3 0x00000020 +#define HWCAP_SPARC_BLKINIT 0x00000040 +#define HWCAP_SPARC_N2 0x00000080 + +/* Solaris compatible AT_HWCAP bits. */ +#define AV_SPARC_MUL32 0x00000100 /* 32x32 multiply is efficient */ +#define AV_SPARC_DIV32 0x00000200 /* 32x32 divide is efficient */ +#define AV_SPARC_FSMULD 0x00000400 /* 'fsmuld' is efficient */ +#define AV_SPARC_V8PLUS 0x00000800 /* v9 insn available to 32bit */ +#define AV_SPARC_POPC 0x00001000 /* 'popc' is efficient */ +#define AV_SPARC_VIS 0x00002000 /* VIS insns available */ +#define AV_SPARC_VIS2 0x00004000 /* VIS2 insns available */ +#define AV_SPARC_ASI_BLK_INIT 0x00008000 /* block init ASIs available */ +#define AV_SPARC_FMAF 0x00010000 /* fused multiply-add */ +#define AV_SPARC_VIS3 0x00020000 /* VIS3 insns available */ +#define AV_SPARC_HPC 0x00040000 /* HPC insns available */ +#define AV_SPARC_RANDOM 0x00080000 /* 'random' insn available */ +#define AV_SPARC_TRANS 0x00100000 /* transaction insns available */ +#define AV_SPARC_FJFMAU 0x00200000 /* unfused multiply-add */ +#define AV_SPARC_IMA 0x00400000 /* integer multiply-add */ +#define AV_SPARC_ASI_CACHE_SPARING \ + 0x00800000 /* cache sparing ASIs available */ #define CORE_DUMP_USE_REGSET @@ -162,33 +180,8 @@ typedef struct { #define ELF_ET_DYN_BASE 0x0000010000000000UL #define COMPAT_ELF_ET_DYN_BASE 0x0000000070000000UL - -/* This yields a mask that user programs can use to figure out what - instruction set this cpu supports. */ - -/* On Ultra, we support all of the v8 capabilities. */ -static inline unsigned int sparc64_elf_hwcap(void) -{ - unsigned int cap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | - HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV | - HWCAP_SPARC_V9); - - if (tlb_type == cheetah || tlb_type == cheetah_plus) - cap |= HWCAP_SPARC_ULTRA3; - else if (tlb_type == hypervisor) { - if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || - sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || - sun4v_chip_type == SUN4V_CHIP_NIAGARA3) - cap |= HWCAP_SPARC_BLKINIT; - if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || - sun4v_chip_type == SUN4V_CHIP_NIAGARA3) - cap |= HWCAP_SPARC_N2; - } - - return cap; -} - -#define ELF_HWCAP sparc64_elf_hwcap() +extern unsigned long sparc64_elf_hwcap; +#define ELF_HWCAP sparc64_elf_hwcap /* This yields a string that ld.so will use to load implementation specific libraries for optimization. This is more specific in diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 17cf290..9810fd8 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -396,6 +396,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) , cpu_data(0).clock_tick #endif ); + cpucap_info(m); #ifdef CONFIG_SMP smp_bogo(m); #endif diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index 6f6544c..8325d77 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -10,6 +10,12 @@ extern const char *sparc_pmu_type; extern unsigned int fsr_storage; extern int ncpus_probed; +#ifdef CONFIG_SPARC64 +/* setup_64.c */ +struct seq_file; +extern void cpucap_info(struct seq_file *); +#endif + #ifdef CONFIG_SPARC32 /* cpu.c */ extern void cpu_probe(void); diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index c4dd099..242dbb3 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,8 @@ #include #include #include +#include +#include #ifdef CONFIG_IP_PNP #include @@ -278,6 +281,151 @@ void __init boot_cpu_id_too_large(int cpu) } #endif +/* On Ultra, we support all of the v8 capabilities. */ +unsigned long sparc64_elf_hwcap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | + HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV | + HWCAP_SPARC_V9); +EXPORT_SYMBOL(sparc64_elf_hwcap); + +static const char *hwcaps[] = { + "flush", "stbar", "swap", "muldiv", "v9", + "ultra3", "blkinit", "n2", + + /* These strings are as they appear in the machine description + * 'hwcap-list' property for cpu nodes. + */ + "mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2", + "ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau", + "ima", "cspare", +}; + +void cpucap_info(struct seq_file *m) +{ + unsigned long caps = sparc64_elf_hwcap; + int i, printed = 0; + + seq_puts(m, "cpucaps\t\t: "); + for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { + unsigned long bit = 1UL << i; + if (caps & bit) { + seq_printf(m, "%s%s", + printed ? "," : "", hwcaps[i]); + printed++; + } + } + seq_putc(m, '\n'); +} + +static void __init report_hwcaps(unsigned long caps) +{ + int i, printed = 0; + + printk(KERN_INFO "CPU CAPS: ["); + for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { + unsigned long bit = 1UL << i; + if (caps & bit) { + printk(KERN_CONT "%s%s", + printed ? "," : "", hwcaps[i]); + if (++printed == 8) { + printk(KERN_CONT "]\n"); + printk(KERN_INFO "CPU CAPS: ["); + printed = 0; + } + } + } + printk(KERN_CONT "]\n"); +} + +static unsigned long __init mdesc_cpu_hwcap_list(void) +{ + struct mdesc_handle *hp; + unsigned long caps = 0; + const char *prop; + int len; + u64 pn; + + hp = mdesc_grab(); + if (!hp) + return 0; + + pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "cpu"); + if (pn == MDESC_NODE_NULL) + goto out; + + prop = mdesc_get_property(hp, pn, "hwcap-list", &len); + if (!prop) + goto out; + + while (len) { + int i, plen; + + for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { + unsigned long bit = 1UL << i; + + if (!strcmp(prop, hwcaps[i])) { + caps |= bit; + break; + } + } + + plen = strlen(prop) + 1; + prop += plen; + len -= plen; + } + +out: + mdesc_release(hp); + return caps; +} + +/* This yields a mask that user programs can use to figure out what + * instruction set this cpu supports. + */ +static void __init init_sparc64_elf_hwcap(void) +{ + unsigned long cap = sparc64_elf_hwcap; + unsigned long mdesc_caps; + + if (tlb_type == cheetah || tlb_type == cheetah_plus) + cap |= HWCAP_SPARC_ULTRA3; + else if (tlb_type == hypervisor) { + if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA3) + cap |= HWCAP_SPARC_BLKINIT; + if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA3) + cap |= HWCAP_SPARC_N2; + } + + cap |= (AV_SPARC_MUL32 | AV_SPARC_DIV32 | AV_SPARC_V8PLUS); + + mdesc_caps = mdesc_cpu_hwcap_list(); + if (!mdesc_caps) { + if (tlb_type == spitfire) + cap |= AV_SPARC_VIS; + if (tlb_type == cheetah || tlb_type == cheetah_plus) + cap |= AV_SPARC_VIS | AV_SPARC_VIS2; + if (tlb_type == cheetah_plus) + cap |= AV_SPARC_POPC; + if (tlb_type == hypervisor) { + if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1) + cap |= AV_SPARC_ASI_BLK_INIT; + if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA3) + cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 | + AV_SPARC_ASI_BLK_INIT | + AV_SPARC_POPC); + if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3) + cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC | + AV_SPARC_FMAF); + } + } + sparc64_elf_hwcap = cap | mdesc_caps; + + report_hwcaps(sparc64_elf_hwcap); +} + void __init setup_arch(char **cmdline_p) { /* Initialize PROM console and command line. */ @@ -337,6 +485,7 @@ void __init setup_arch(char **cmdline_p) init_cur_cpu_trap(current_thread_info()); paging_init(); + init_sparc64_elf_hwcap(); } extern int stop_a_enabled; -- cgit v1.1 From 548c210fbffdb008a80fa41ff0cb3965f185583d Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sat, 11 Jun 2011 14:42:06 -0400 Subject: [PARISC] fix return type of __atomic64_add_return The return type of __atomic64_add_return of should be s64 or long, not int. This fixes the atomic64 test failure that I previously reported. Signed-off-by: John David Anglin Cc: stable@kernel.org Signed-off-by: James Bottomley --- arch/parisc/include/asm/atomic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index f819559..26fd114 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -259,10 +259,10 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) -static __inline__ int +static __inline__ s64 __atomic64_add_return(s64 i, atomic64_t *v) { - int ret; + s64 ret; unsigned long flags; _atomic_spin_lock_irqsave(v, flags); -- cgit v1.1 From b055c8f3ef9f7bc6ba415d900f298d7801a9d1d4 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 8 Jul 2011 11:31:57 -0700 Subject: drm/i915/hdmi: send AVI info frames on ILK+ as well On Ironlake and above, we have per-transcoder DIP registers, so use them for sending DIPs like AVI infoframes on ILK and above. Signed-off-by: Jesse Barnes Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/i915_reg.h | 14 ++++++++++++++ drivers/gpu/drm/i915/intel_hdmi.c | 39 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 00bd510..30d8aae 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3020,6 +3020,20 @@ #define _TRANSA_DP_LINK_M2 0xe0048 #define _TRANSA_DP_LINK_N2 0xe004c +/* Per-transcoder DIP controls */ + +#define _VIDEO_DIP_CTL_A 0xe0200 +#define _VIDEO_DIP_DATA_A 0xe0208 +#define _VIDEO_DIP_GCP_A 0xe0210 + +#define _VIDEO_DIP_CTL_B 0xe1200 +#define _VIDEO_DIP_DATA_B 0xe1208 +#define _VIDEO_DIP_GCP_B 0xe1210 + +#define TVIDEO_DIP_CTL(pipe) _PIPE(pipe, _VIDEO_DIP_CTL_A, _VIDEO_DIP_CTL_B) +#define TVIDEO_DIP_DATA(pipe) _PIPE(pipe, _VIDEO_DIP_DATA_A, _VIDEO_DIP_DATA_B) +#define TVIDEO_DIP_GCP(pipe) _PIPE(pipe, _VIDEO_DIP_GCP_A, _VIDEO_DIP_GCP_B) + #define _TRANS_HTOTAL_B 0xe1000 #define _TRANS_HBLANK_B 0xe1004 #define _TRANS_HSYNC_B 0xe1008 diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index aa0a8e8..c220255 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -112,6 +112,40 @@ static void intel_hdmi_set_avi_infoframe(struct drm_encoder *encoder) VIDEO_DIP_ENABLE_AVI); } +static void intel_ironlake_hdmi_set_avi_infoframe(struct drm_encoder *encoder) +{ + struct dip_infoframe avi_if = { + .type = DIP_TYPE_AVI, + .ver = DIP_VERSION_AVI, + .len = DIP_LEN_AVI, + }; + uint32_t *data = (uint32_t *)&avi_if; + struct drm_device *dev = encoder->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); + struct drm_crtc *crtc = encoder->crtc; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int reg = TVIDEO_DIP_CTL(intel_crtc->pipe); + unsigned i; + + if (!intel_hdmi->has_hdmi_sink) + return; + + intel_wait_for_vblank(dev, intel_crtc->pipe); + + I915_WRITE(reg, VIDEO_DIP_SELECT_AVI); + + intel_dip_infoframe_csum(&avi_if); + for (i = 0; i < sizeof(avi_if); i += 4) { + I915_WRITE(TVIDEO_DIP_DATA(intel_crtc->pipe), *data); + data++; + } + + I915_WRITE(reg, VIDEO_DIP_ENABLE | VIDEO_DIP_SELECT_AVI | + VIDEO_DIP_FREQ_VSYNC | (DIP_LEN_AVI << 8) | + VIDEO_DIP_ENABLE_AVI); +} + static void intel_hdmi_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) @@ -149,7 +183,10 @@ static void intel_hdmi_mode_set(struct drm_encoder *encoder, I915_WRITE(intel_hdmi->sdvox_reg, sdvox); POSTING_READ(intel_hdmi->sdvox_reg); - intel_hdmi_set_avi_infoframe(encoder); + if (HAS_PCH_SPLIT(dev)) + intel_ironlake_hdmi_set_avi_infoframe(encoder); + else + intel_hdmi_set_avi_infoframe(encoder); } static void intel_hdmi_dpms(struct drm_encoder *encoder, int mode) -- cgit v1.1 From e0e3fb482105c65ce6f5480a86092e966a29ed79 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Fri, 29 Jul 2011 14:45:21 -0700 Subject: drm/i915: Ignore GPU wedged errors while pinning scanout buffers Failing to pin a scanout buffer will most likely lead to a black screen, so if the GPU is wedged, then just let the pin happen and hope that things work out OK. Signed-off-by: Keith Packard Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a087e1b..d5c7c7b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3019,7 +3019,7 @@ i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj, /* Currently, we are always called from an non-interruptible context. */ if (pipelined != obj->ring) { ret = i915_gem_object_wait_rendering(obj); - if (ret) + if (ret == -ERESTARTSYS) return ret; } -- cgit v1.1 From b066254fee2b0b4d1323295f8ae34c9442222165 Mon Sep 17 00:00:00 2001 From: Pieterjan Camerlynck Date: Tue, 26 Jul 2011 16:23:54 +0200 Subject: i915: add Dell OptiPlex FX170 to intel_no_lvds The Dell OptiPlex FX170 claims to have LVDS, but doesn't. Signed-off-by: Pieterjan Camerlynck Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_lvds.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index b28f7bd..2e8ddfc 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -690,6 +690,14 @@ static const struct dmi_system_id intel_no_lvds[] = { }, { .callback = intel_no_lvds_dmi_callback, + .ident = "Dell OptiPlex FX170", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex FX170"), + }, + }, + { + .callback = intel_no_lvds_dmi_callback, .ident = "AOpen Mini PC", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "AOpen"), -- cgit v1.1 From 358733e9047cafcc185ca19b8c369c659ac0c4cf Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 27 Jul 2011 11:53:01 -0700 Subject: drm/i915: add GPU max frequency control file Mainly for use in debugging and benchmarking, this file allows the user to control the max frequency used by the GPU. Frequency may still vary based on workload (if the frequency is set to higher than the minimum) but won't go over the newly set value. Signed-off-by: Jesse Barnes Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/i915_debugfs.c | 90 +++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0a893f7..782b781 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1300,6 +1300,76 @@ static const struct file_operations i915_wedged_fops = { .llseek = default_llseek, }; +static int +i915_max_freq_open(struct inode *inode, + struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + +static ssize_t +i915_max_freq_read(struct file *filp, + char __user *ubuf, + size_t max, + loff_t *ppos) +{ + struct drm_device *dev = filp->private_data; + drm_i915_private_t *dev_priv = dev->dev_private; + char buf[80]; + int len; + + len = snprintf(buf, sizeof (buf), + "max freq: %d\n", dev_priv->max_delay * 50); + + if (len > sizeof (buf)) + len = sizeof (buf); + + return simple_read_from_buffer(ubuf, max, ppos, buf, len); +} + +static ssize_t +i915_max_freq_write(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + struct drm_device *dev = filp->private_data; + struct drm_i915_private *dev_priv = dev->dev_private; + char buf[20]; + int val = 1; + + if (cnt > 0) { + if (cnt > sizeof (buf) - 1) + return -EINVAL; + + if (copy_from_user(buf, ubuf, cnt)) + return -EFAULT; + buf[cnt] = 0; + + val = simple_strtoul(buf, NULL, 0); + } + + DRM_DEBUG_DRIVER("Manually setting max freq to %d\n", val); + + /* + * Turbo will still be enabled, but won't go above the set value. + */ + dev_priv->max_delay = val / 50; + + gen6_set_rps(dev, val / 50); + + return cnt; +} + +static const struct file_operations i915_max_freq_fops = { + .owner = THIS_MODULE, + .open = i915_max_freq_open, + .read = i915_max_freq_read, + .write = i915_max_freq_write, + .llseek = default_llseek, +}; + /* As the drm_debugfs_init() routines are called before dev->dev_private is * allocated we need to hook into the minor for release. */ static int @@ -1399,6 +1469,21 @@ static int i915_forcewake_create(struct dentry *root, struct drm_minor *minor) return drm_add_fake_info_node(minor, ent, &i915_forcewake_fops); } +static int i915_max_freq_create(struct dentry *root, struct drm_minor *minor) +{ + struct drm_device *dev = minor->dev; + struct dentry *ent; + + ent = debugfs_create_file("i915_max_freq", + S_IRUGO | S_IWUSR, + root, dev, + &i915_max_freq_fops); + if (IS_ERR(ent)) + return PTR_ERR(ent); + + return drm_add_fake_info_node(minor, ent, &i915_max_freq_fops); +} + static struct drm_info_list i915_debugfs_list[] = { {"i915_capabilities", i915_capabilities, 0}, {"i915_gem_objects", i915_gem_object_info, 0}, @@ -1451,6 +1536,9 @@ int i915_debugfs_init(struct drm_minor *minor) ret = i915_forcewake_create(minor->debugfs_root, minor); if (ret) return ret; + ret = i915_max_freq_create(minor->debugfs_root, minor); + if (ret) + return ret; return drm_debugfs_create_files(i915_debugfs_list, I915_DEBUGFS_ENTRIES, @@ -1465,6 +1553,8 @@ void i915_debugfs_cleanup(struct drm_minor *minor) 1, minor); drm_debugfs_remove_files((struct drm_info_list *) &i915_wedged_fops, 1, minor); + drm_debugfs_remove_files((struct drm_info_list *) &i915_max_freq_fops, + 1, minor); } #endif /* CONFIG_DEBUG_FS */ -- cgit v1.1 From 013a41ec541d5daa0c9f2b5126d2e820902c052d Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 19 Jul 2011 15:38:56 -0700 Subject: drm/i915: provide more error output when mode sets fail If a mode set fails we may get a message from drm_crtc_helper if we're lucky, but it won't tell us anything about *why* we failed to set a mode. So add a few DRM_ERRORs for the cases that shouldn't happen so we can debug things more easily. Signed-off-by: Jesse Barnes Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_display.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 32c8c95..d2fb8cf 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1958,7 +1958,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, /* no fb bound */ if (!crtc->fb) { - DRM_DEBUG_KMS("No FB bound\n"); + DRM_ERROR("No FB bound\n"); return 0; } @@ -1967,6 +1967,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, case 1: break; default: + DRM_ERROR("no plane for crtc\n"); return -EINVAL; } @@ -1976,6 +1977,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, NULL); if (ret != 0) { mutex_unlock(&dev->struct_mutex); + DRM_ERROR("pin & fence failed\n"); return ret; } @@ -2004,6 +2006,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, if (ret) { i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj); mutex_unlock(&dev->struct_mutex); + DRM_ERROR("failed to update base address\n"); return ret; } -- cgit v1.1 From 070d329ae52e2fde341771d753a5b728145881f4 Mon Sep 17 00:00:00 2001 From: Michel Alexandre Salim Date: Thu, 28 Jul 2011 18:52:06 +0200 Subject: drm/i915: Add quirk to disable SSC on Sony Vaio Y2 Using the new quirk added to support disabling SSC on Lenovo U160 (#36656, commit 435793dfb8aec7b2e19f72d5bce8a22fd0b57839), also register the Vaio as a special case and disable SSC for it. This patch fixes #34437 on fdo bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=34437 Signed-off-by: Michel Alexandre Salim Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index d2fb8cf..3d2900c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7892,6 +7892,9 @@ struct intel_quirk intel_quirks[] = { /* Lenovo U160 cannot use SSC on LVDS */ { 0x0046, 0x17aa, 0x3920, quirk_ssc_force_disable }, + + /* Sony Vaio Y cannot use SSC on LVDS */ + { 0x0046, 0x104d, 0x9076, quirk_ssc_force_disable }, }; static void intel_init_quirks(struct drm_device *dev) -- cgit v1.1 From 291427f5fdadec6e4be2924172e83588880e1539 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 29 Jul 2011 12:42:37 -0700 Subject: drm/i915: apply phase pointer override on SNB+ too These bits moved around on SNB and above. v2: again with the git send-email fail v3: add macros for getting per-pipe override & enable bits v4: enable phase sync pointer on SNB and IVB configs as well Signed-off-by: Jesse Barnes Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/i915_reg.h | 5 +++++ drivers/gpu/drm/i915/intel_display.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 30d8aae..a7f7a34 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3091,6 +3091,11 @@ #define TRANS_CHICKEN2(pipe) _PIPE(pipe, _TRANSA_CHICKEN2, _TRANSB_CHICKEN2) #define TRANS_AUTOTRAIN_GEN_STALL_DIS (1<<31) +#define SOUTH_CHICKEN1 0xc2000 +#define FDIA_PHASE_SYNC_SHIFT_OVR 19 +#define FDIA_PHASE_SYNC_SHIFT_EN 18 +#define FDI_PHASE_SYNC_OVR(pipe) (1<<(FDIA_PHASE_SYNC_SHIFT_OVR - ((pipe) * 2))) +#define FDI_PHASE_SYNC_EN(pipe) (1<<(FDIA_PHASE_SYNC_SHIFT_EN - ((pipe) * 2))) #define SOUTH_CHICKEN2 0xc2004 #define DPLS_EDP_PPS_FIX_DIS (1<<0) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3d2900c..5316460 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2113,6 +2113,18 @@ static void intel_fdi_normal_train(struct drm_crtc *crtc) FDI_FE_ERRC_ENABLE); } +static void cpt_phase_pointer_enable(struct drm_device *dev, int pipe) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 flags = I915_READ(SOUTH_CHICKEN1); + + flags |= FDI_PHASE_SYNC_OVR(pipe); + I915_WRITE(SOUTH_CHICKEN1, flags); /* once to unlock... */ + flags |= FDI_PHASE_SYNC_EN(pipe); + I915_WRITE(SOUTH_CHICKEN1, flags); /* then again to enable */ + POSTING_READ(SOUTH_CHICKEN1); +} + /* The FDI link training functions for ILK/Ibexpeak. */ static void ironlake_fdi_link_train(struct drm_crtc *crtc) { @@ -2263,6 +2275,9 @@ static void gen6_fdi_link_train(struct drm_crtc *crtc) POSTING_READ(reg); udelay(150); + if (HAS_PCH_CPT(dev)) + cpt_phase_pointer_enable(dev, pipe); + for (i = 0; i < 4; i++ ) { reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); @@ -2379,6 +2394,9 @@ static void ivb_manual_fdi_link_train(struct drm_crtc *crtc) POSTING_READ(reg); udelay(150); + if (HAS_PCH_CPT(dev)) + cpt_phase_pointer_enable(dev, pipe); + for (i = 0; i < 4; i++ ) { reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); @@ -2488,6 +2506,17 @@ static void ironlake_fdi_pll_enable(struct drm_crtc *crtc) } } +static void cpt_phase_pointer_disable(struct drm_device *dev, int pipe) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 flags = I915_READ(SOUTH_CHICKEN1); + + flags &= ~(FDI_PHASE_SYNC_EN(pipe)); + I915_WRITE(SOUTH_CHICKEN1, flags); /* once to disable... */ + flags &= ~(FDI_PHASE_SYNC_OVR(pipe)); + I915_WRITE(SOUTH_CHICKEN1, flags); /* then again to lock */ + POSTING_READ(SOUTH_CHICKEN1); +} static void ironlake_fdi_disable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; @@ -2517,6 +2546,8 @@ static void ironlake_fdi_disable(struct drm_crtc *crtc) I915_WRITE(FDI_RX_CHICKEN(pipe), I915_READ(FDI_RX_CHICKEN(pipe) & ~FDI_RX_PHASE_SYNC_POINTER_EN)); + } else if (HAS_PCH_CPT(dev)) { + cpt_phase_pointer_disable(dev, pipe); } /* still set train pattern 1 */ -- cgit v1.1 From cda2bb78c24de7674eafa3210314dc75bed344a6 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Tue, 26 Jul 2011 16:53:06 -0400 Subject: drm/i915/pch: Save/restore PCH_PORT_HOTPLUG across suspend At least on a Lenovo X220 the HPD bits of this are enabled at boot but cleared after resume, which means plug interrupts stop working. This also happens to fix DP displays re-lighting on resume. I'm quite certain that's an accident: the first DP link train inevitably fails on that machine, and it's only serendipity that we're getting multiple plug interrupts and the second train works. But I shall take my victories where I get them. Signed-off-by: Adam Jackson Tested-by: Keith Packard Reviewed-by: Keith Packard Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_suspend.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ce7914c..e0d0e27 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -541,6 +541,7 @@ typedef struct drm_i915_private { u32 savePIPEB_LINK_M1; u32 savePIPEB_LINK_N1; u32 saveMCHBAR_RENDER_STANDBY; + u32 savePCH_PORT_HOTPLUG; struct { /** Bridge to intel-gtt-ko */ diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 5257cfc..27693c0 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -814,6 +814,7 @@ int i915_save_state(struct drm_device *dev) dev_priv->saveFDI_RXB_IMR = I915_READ(_FDI_RXB_IMR); dev_priv->saveMCHBAR_RENDER_STANDBY = I915_READ(RSTDBYCTL); + dev_priv->savePCH_PORT_HOTPLUG = I915_READ(PCH_PORT_HOTPLUG); } else { dev_priv->saveIER = I915_READ(IER); dev_priv->saveIMR = I915_READ(IMR); @@ -865,6 +866,7 @@ int i915_restore_state(struct drm_device *dev) I915_WRITE(GTIMR, dev_priv->saveGTIMR); I915_WRITE(_FDI_RXA_IMR, dev_priv->saveFDI_RXA_IMR); I915_WRITE(_FDI_RXB_IMR, dev_priv->saveFDI_RXB_IMR); + I915_WRITE(PCH_PORT_HOTPLUG, dev_priv->savePCH_PORT_HOTPLUG); } else { I915_WRITE(IER, dev_priv->saveIER); I915_WRITE(IMR, dev_priv->saveIMR); -- cgit v1.1 From bb0c5ed6ec523199e34e81dcef8e987507553b63 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Sat, 30 Jul 2011 01:40:48 -0400 Subject: ACPI: DMI workaround for Asus A8N-SLI Premium and Asus A8N-SLI DELUX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DMI workaround for A8N-SLI Premium and A8N-SLI DELUXE to enable the s3 suspend old ordering. http://bugzilla.kernel.org/show_bug.cgi?id=9528 Tested-by: Heiko Ettelbrück Tested-by: Brian Beardall Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- drivers/acpi/sleep.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 6c94960..3ed80b2 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -428,6 +428,22 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "1000 Series"), }, }, + { + .callback = init_old_suspend_ordering, + .ident = "Asus A8N-SLI DELUXE", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI DELUXE"), + }, + }, + { + .callback = init_old_suspend_ordering, + .ident = "Asus A8N-SLI Premium", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI Premium"), + }, + }, {}, }; #endif /* CONFIG_SUSPEND */ -- cgit v1.1 From ec2cf68e024b4f66df11683147d63e07db6ee875 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 29 Jul 2011 21:14:12 -0700 Subject: ALSA: rtctimer.c needs module.h rtctimer.c uses interfaces from linux/module.h, so it should include that file. This fixes build errors. Signed-off-by: Randy Dunlap Signed-off-by: Takashi Iwai --- sound/core/rtctimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/rtctimer.c b/sound/core/rtctimer.c index 0851cd1..e85e72b 100644 --- a/sound/core/rtctimer.c +++ b/sound/core/rtctimer.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include -- cgit v1.1 From dd8ae59d48790d5c25f47ebbe502c8ca379fdde0 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sat, 30 Jul 2011 05:03:58 -0700 Subject: target: Fix bug for transport_generic_wait_for_tasks with direct operation This patch fixes a bug in transport_handle_cdb_direct() usage with target_core where transport_generic_wait_for_tasks() was bypassing active I/O + usage of cmd->t_transport_stop_comp because cmd->t_transport_active=1 was not being set before dispatching with transport_generic_new_cmd(). The fix follows existing usage in transport_generic_handle_cdb*() -> transport_add_cmd_to_queue() and set these directly, as well as handle transport_generic_new_cmd() exceptions for QUEUE_FULL and CHECK_CONDITION instead of propigating up to RX context fabric code. The bug was manifesting itself with the following SLUB poison overwritten warnings with iscsi-target v4.1 LUNs using the new process context direct operation during session reinstatement with active I/O exception handling: [885410.498267] ============================================================================= [885410.621622] BUG lio_cmd_cache: Poison overwritten [885410.621791] ----------------------------------------------------------------------------- [885410.621792] [885410.623420] INFO: 0xffff880000cf3750-0xffff880000cf378d. First byte 0x6a instead of 0x6b [885410.626332] INFO: Allocated in iscsit_allocate_cmd+0x1c/0xd4 [iscsi_target_mod] age=345 cpu=1 pid=22554 [885411.855189] INFO: Freed in iscsit_release_cmd+0x208/0x217 [iscsi_target_mod] age=1410 cpu=1 pid=22554 [885411.856048] INFO: Slab 0xffffea000002d480 objects=22 used=0 fp=0xffff880000cf7300 flags=0x4080 [885411.856368] INFO: Object 0xffff880000cf33c0 @offset=13248 fp=0xffff880000cf6780 [885411.955678] Pid: 22554, comm: iscsi_trx Not tainted 3.0.0-rc7+ #30 [885411.956040] Call Trace: [885411.957029] [] print_trailer+0x12e/0x137 [885412.752879] [] check_bytes_and_report+0xb9/0xfd [885412.754933] [] check_object+0xb5/0x192 [885412.755099] [] __free_slab+0x96/0x13a [885412.757008] [] discard_slab+0x41/0x43 [885412.758171] [] __slab_free+0xf3/0xfe [885412.761027] [] ? iscsit_release_cmd+0x208/0x217 [iscsi_target_mod] [885412.761354] [] kmem_cache_free+0x6f/0xac [885412.761536] [] iscsit_release_cmd+0x208/0x217 [iscsi_target_mod] [885412.762056] [] ? iblock_free_task+0x34/0x39 [target_core_iblock] [885412.762368] [] lio_release_cmd+0x10/0x12 [iscsi_target_mod] [885412.764129] [] transport_release_cmd+0x2f/0x33 [target_core_mod] [885412.805024] [] transport_generic_remove+0xb6/0xc3 [target_core_mod] [885412.806424] [] ? try_to_wake_up+0x1bd/0x1bd [885412.809033] [] transport_generic_free_cmd+0x75/0x7d [target_core_mod] [885412.810066] [] transport_generic_wait_for_tasks+0x21c/0x22b [target_core_mod] [885412.811056] [] ? mutex_lock+0x11/0x32 [885412.813059] [] ? mutex_lock+0x11/0x32 [885412.813200] [] iscsit_close_connection+0x1d5/0x63a [iscsi_target_mod] [885412.813517] [] iscsit_take_action_for_connection_exit+0xdb/0xe0 [iscsi_target_mod] [885412.813851] [] iscsi_target_rx_thread+0x11f6/0x1221 [iscsi_target_mod] [885412.829024] [] ? pick_next_task_fair+0xbe/0x10e [885412.831010] [] ? iscsit_handle_scsi_cmd+0x91d/0x91d [iscsi_target_mod] [885412.833011] [] ? iscsit_handle_scsi_cmd+0x91d/0x91d [iscsi_target_mod] [885412.835010] [] kthread+0x7d/0x85 [885412.837022] [] kernel_thread_helper+0x4/0x10 [885412.838008] [] ? kthread_worker_fn+0x145/0x145 [885412.840047] [] ? gs_change+0x13/0x13 [885412.842007] FIX lio_cmd_cache: Restoring 0xffff880000cf3750-0xffff880000cf378d=0x6 Cc: Christoph Hellwig Cc: Andy Grover Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index ff7fcf8..8976032 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1747,6 +1747,8 @@ int transport_generic_handle_cdb( } EXPORT_SYMBOL(transport_generic_handle_cdb); +static void transport_generic_request_failure(struct se_cmd *, + struct se_device *, int, int); /* * Used by fabric module frontends to queue tasks directly. * Many only be used from process context only @@ -1754,6 +1756,8 @@ EXPORT_SYMBOL(transport_generic_handle_cdb); int transport_handle_cdb_direct( struct se_cmd *cmd) { + int ret; + if (!cmd->se_lun) { dump_stack(); pr_err("cmd->se_lun is NULL\n"); @@ -1765,8 +1769,31 @@ int transport_handle_cdb_direct( " from interrupt context\n"); return -EINVAL; } - - return transport_generic_new_cmd(cmd); + /* + * Set TRANSPORT_NEW_CMD state and cmd->t_transport_active=1 following + * transport_generic_handle_cdb*() -> transport_add_cmd_to_queue() + * in existing usage to ensure that outstanding descriptors are handled + * correctly during shutdown via transport_generic_wait_for_tasks() + * + * Also, we don't take cmd->t_state_lock here as we only expect + * this to be called for initial descriptor submission. + */ + cmd->t_state = TRANSPORT_NEW_CMD; + atomic_set(&cmd->t_transport_active, 1); + /* + * transport_generic_new_cmd() is already handling QUEUE_FULL, + * so follow TRANSPORT_NEW_CMD processing thread context usage + * and call transport_generic_request_failure() if necessary.. + */ + ret = transport_generic_new_cmd(cmd); + if (ret == -EAGAIN) + return 0; + else if (ret < 0) { + cmd->transport_error_status = ret; + transport_generic_request_failure(cmd, NULL, 0, + (cmd->data_direction != DMA_TO_DEVICE)); + } + return 0; } EXPORT_SYMBOL(transport_handle_cdb_direct); -- cgit v1.1 From d59729f4e794f814b25ccd2aebfbe606242c4544 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 30 Jul 2011 12:34:19 -0400 Subject: ext4: fix races in ext4_sync_parent() Fix problems if fsync() races against a rename of a parent directory as pointed out by Al Viro in his own inimitable way: >While we are at it, could somebody please explain what the hell is ext4 >doing in >static int ext4_sync_parent(struct inode *inode) >{ > struct writeback_control wbc; > struct dentry *dentry = NULL; > int ret = 0; > > while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { > ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); > dentry = list_entry(inode->i_dentry.next, > struct dentry, d_alias); > if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) > break; > inode = dentry->d_parent->d_inode; > ret = sync_mapping_buffers(inode->i_mapping); > ... >Note that dentry obviously can't be NULL there. dentry->d_parent is never >NULL. And dentry->d_parent would better not be negative, for crying out >loud! What's worse, there's no guarantees that dentry->d_parent will >remain our parent over that sync_mapping_buffers() *and* that inode won't >just be freed under us (after rename() and memory pressure leading to >eviction of what used to be our dentry->d_parent)...... Reported-by: Al Viro Signed-off-by: "Theodore Ts'o" --- fs/ext4/fsync.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index ce66d2f..f9dbe33 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -129,15 +129,30 @@ static int ext4_sync_parent(struct inode *inode) { struct writeback_control wbc; struct dentry *dentry = NULL; + struct inode *next; int ret = 0; - while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { + if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) + return 0; + inode = igrab(inode); + while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); - dentry = list_entry(inode->i_dentry.next, - struct dentry, d_alias); - if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) + dentry = NULL; + spin_lock(&inode->i_lock); + if (!list_empty(&inode->i_dentry)) { + dentry = list_first_entry(&inode->i_dentry, + struct dentry, d_alias); + dget(dentry); + } + spin_unlock(&inode->i_lock); + if (!dentry) + break; + next = igrab(dentry->d_parent->d_inode); + dput(dentry); + if (!next) break; - inode = dentry->d_parent->d_inode; + iput(inode); + inode = next; ret = sync_mapping_buffers(inode->i_mapping); if (ret) break; @@ -148,6 +163,7 @@ static int ext4_sync_parent(struct inode *inode) if (ret) break; } + iput(inode); return ret; } -- cgit v1.1 From 59be8e7280c10fd8f078ba6dc2bcdc2b1453b6ab Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 30 Jul 2011 12:38:46 -0400 Subject: ext4: change umode_t in tracepoint headers to be an explicit __u16 As requested by Al Viro, since umode_t may be changing to a u32 for some architectures. Signed-off-by: "Theodore Ts'o" Cc: Al Viro --- include/trace/events/ext4.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 51d8813..bf5518d 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -23,7 +23,7 @@ TRACE_EVENT(ext4_free_inode, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( umode_t, mode ) + __field( __u16, mode ) __field( uid_t, uid ) __field( gid_t, gid ) __field( __u64, blocks ) @@ -52,7 +52,7 @@ TRACE_EVENT(ext4_request_inode, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, dir ) - __field( umode_t, mode ) + __field( __u16, mode ) ), TP_fast_assign( @@ -75,7 +75,7 @@ TRACE_EVENT(ext4_allocate_inode, __field( dev_t, dev ) __field( ino_t, ino ) __field( ino_t, dir ) - __field( umode_t, mode ) + __field( __u16, mode ) ), TP_fast_assign( @@ -727,7 +727,7 @@ TRACE_EVENT(ext4_free_blocks, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( umode_t, mode ) + __field( __u16, mode ) __field( __u64, block ) __field( unsigned long, count ) __field( int, flags ) @@ -1014,7 +1014,7 @@ TRACE_EVENT(ext4_forget, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( umode_t, mode ) + __field( __u16, mode ) __field( int, is_metadata ) __field( __u64, block ) ), @@ -1041,7 +1041,7 @@ TRACE_EVENT(ext4_da_update_reserve_space, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( umode_t, mode ) + __field( __u16, mode ) __field( __u64, i_blocks ) __field( int, used_blocks ) __field( int, reserved_data_blocks ) @@ -1078,7 +1078,7 @@ TRACE_EVENT(ext4_da_reserve_space, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( umode_t, mode ) + __field( __u16, mode ) __field( __u64, i_blocks ) __field( int, md_needed ) __field( int, reserved_data_blocks ) @@ -1112,7 +1112,7 @@ TRACE_EVENT(ext4_da_release_space, TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( umode_t, mode ) + __field( __u16, mode ) __field( __u64, i_blocks ) __field( int, freed_blocks ) __field( int, reserved_data_blocks ) -- cgit v1.1 From c49bafa3842751b8955a962859f42d307673d75d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 30 Jul 2011 12:58:41 -0400 Subject: ext4: add missing kfree() on error return path in add_new_gdb() We added some more error handling in b40971426a "ext4: add error checking to calls to ext4_handle_dirty_metadata()". But we need to call kfree() as well to avoid a memory leak. Signed-off-by: Dan Carpenter Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 6e3327d..71085df 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -517,6 +517,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, return err; exit_inode: + kfree(n_group_desc); /* ext4_handle_release_buffer(handle, iloc.bh); */ brelse(iloc.bh); exit_dindj: -- cgit v1.1 From ab3d0abe2e4c1f164af7a6cc3694fcb8c24a57ee Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 30 Jul 2011 11:53:47 -0700 Subject: Input: psmouse - hgpk.c needs module.h hgpk.c uses interfaces from linux/module.h, so it should include that file. This fixes build errors. Signed-off-by: Randy Dunlap Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/hgpk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/hgpk.c b/drivers/input/mouse/hgpk.c index 95577c1..4d17d9f 100644 --- a/drivers/input/mouse/hgpk.c +++ b/drivers/input/mouse/hgpk.c @@ -32,6 +32,7 @@ #define DEBUG #include #include +#include #include #include #include -- cgit v1.1 From 2501ec97663d84cfbc8fd8848c382f89c3bf8d1d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Sat, 30 Jul 2011 11:55:38 -0700 Subject: Input: kxtj9 - explicitly include module.h We need to explicitly include module.h since some of its facilities are used. Signed-off-by: Stephen Rothwell Signed-off-by: Dmitry Torokhov --- drivers/input/misc/kxtj9.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/misc/kxtj9.c b/drivers/input/misc/kxtj9.c index c456f63..783597a 100644 --- a/drivers/input/misc/kxtj9.c +++ b/drivers/input/misc/kxtj9.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include -- cgit v1.1 From 3f27757a1182f5e3feff7425b1c3e43f3e466724 Mon Sep 17 00:00:00 2001 From: Rakesh Iyer Date: Sat, 30 Jul 2011 12:01:48 -0700 Subject: Input: tegra-kbc - fix computation of polling time Fix a constant definition and computation of polling time. [dtor@mail.ru: switched to using DIV_ROUND_UP as was suggested by Thierry Reding ] Signed-off-by: Rakesh Iyer Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/tegra-kbc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c index da3828f..f270447 100644 --- a/drivers/input/keyboard/tegra-kbc.c +++ b/drivers/input/keyboard/tegra-kbc.c @@ -19,6 +19,7 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include #include #include #include @@ -37,7 +38,7 @@ #define KBC_ROW_SCAN_DLY 5 /* KBC uses a 32KHz clock so a cycle = 1/32Khz */ -#define KBC_CYCLE_USEC 32 +#define KBC_CYCLE_MS 32 /* KBC Registers */ @@ -647,7 +648,7 @@ static int __devinit tegra_kbc_probe(struct platform_device *pdev) debounce_cnt = min(pdata->debounce_cnt, KBC_MAX_DEBOUNCE_CNT); scan_time_rows = (KBC_ROW_SCAN_TIME + debounce_cnt) * num_rows; kbc->repoll_dly = KBC_ROW_SCAN_DLY + scan_time_rows + pdata->repeat_cnt; - kbc->repoll_dly = ((kbc->repoll_dly * KBC_CYCLE_USEC) + 999) / 1000; + kbc->repoll_dly = DIV_ROUND_UP(kbc->repoll_dly, KBC_CYCLE_MS); input_dev->name = pdev->name; input_dev->id.bustype = BUS_HOST; -- cgit v1.1 From 52db9819ac96b0c5a4e075d836cf21dc529cbce4 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 30 Jul 2011 12:05:13 -0700 Subject: Input: lm8323 - add missing device_remove_file for dev_attr_time Add missing device_remove_file() for dev_attr_time in lm8323_remove(). Also calling device_remove_file() in lm8323_probe() error path to remove sysfs attribute file. Signed-off-by: Axel Lin Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/lm8323.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c index ab0acaf..756348a 100644 --- a/drivers/input/keyboard/lm8323.c +++ b/drivers/input/keyboard/lm8323.c @@ -754,8 +754,11 @@ fail3: device_remove_file(&client->dev, &dev_attr_disable_kp); fail2: while (--pwm >= 0) - if (lm->pwm[pwm].enabled) + if (lm->pwm[pwm].enabled) { + device_remove_file(lm->pwm[pwm].cdev.dev, + &dev_attr_time); led_classdev_unregister(&lm->pwm[pwm].cdev); + } fail1: input_free_device(idev); kfree(lm); @@ -775,8 +778,10 @@ static int __devexit lm8323_remove(struct i2c_client *client) device_remove_file(&lm->client->dev, &dev_attr_disable_kp); for (i = 0; i < 3; i++) - if (lm->pwm[i].enabled) + if (lm->pwm[i].enabled) { + device_remove_file(lm->pwm[i].cdev.dev, &dev_attr_time); led_classdev_unregister(&lm->pwm[i].cdev); + } kfree(lm); -- cgit v1.1 From 1f4bb066433322f6f189b084ceebdfb4add77292 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Sat, 30 Jul 2011 12:08:10 -0700 Subject: Input: gpio_keys - return proper error code if memory allocation fails Return -ENOMEM if kzalloc fails in gpio_keys_get_devtree_pdata(). Signed-off-by: Tobias Klauser Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/gpio_keys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index ce281d1..67df91a 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -483,7 +483,7 @@ static int gpio_keys_get_devtree_pdata(struct device *dev, buttons = kzalloc(pdata->nbuttons * (sizeof *buttons), GFP_KERNEL); if (!buttons) - return -ENODEV; + return -ENOMEM; pp = NULL; i = 0; -- cgit v1.1 From 205e9a2106b934ea39049bab28f0896c17a2cb30 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 29 Jul 2011 18:37:02 +0400 Subject: [PARISC] wire up sendmmsg syscall Cc: stable@kernel.org Signed-off-by: James Bottomley --- arch/parisc/include/asm/unistd.h | 3 ++- arch/parisc/kernel/syscall_table.S | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index 3392de3..d61de64 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -821,8 +821,9 @@ #define __NR_open_by_handle_at (__NR_Linux + 326) #define __NR_syncfs (__NR_Linux + 327) #define __NR_setns (__NR_Linux + 328) +#define __NR_sendmmsg (__NR_Linux + 329) -#define __NR_Linux_syscalls (__NR_setns + 1) +#define __NR_Linux_syscalls (__NR_sendmmsg + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 34a4f5a..e66366f 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -427,6 +427,7 @@ ENTRY_COMP(open_by_handle_at) ENTRY_SAME(syncfs) ENTRY_SAME(setns) + ENTRY_COMP(sendmmsg) /* Nothing yet */ -- cgit v1.1 From 71ff069c3d6a2b23eaf516bc714a20ce0cdc3609 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sun, 31 Jul 2011 19:56:10 -0700 Subject: Input: mma8450 - add device tree probe support It adds device tree probe support for mma8450 driver. Signed-off-by: Shawn Guo Acked-by: Eric Miao Acked-by: Grant Likely Signed-off-by: Dmitry Torokhov --- Documentation/devicetree/bindings/input/fsl-mma8450.txt | 11 +++++++++++ drivers/input/misc/mma8450.c | 8 ++++++++ 2 files changed, 19 insertions(+) create mode 100644 Documentation/devicetree/bindings/input/fsl-mma8450.txt diff --git a/Documentation/devicetree/bindings/input/fsl-mma8450.txt b/Documentation/devicetree/bindings/input/fsl-mma8450.txt new file mode 100644 index 0000000..a00c94c --- /dev/null +++ b/Documentation/devicetree/bindings/input/fsl-mma8450.txt @@ -0,0 +1,11 @@ +* Freescale MMA8450 3-Axis Accelerometer + +Required properties: +- compatible : "fsl,mma8450". + +Example: + +accelerometer: mma8450@1c { + compatible = "fsl,mma8450"; + reg = <0x1c>; +}; diff --git a/drivers/input/misc/mma8450.c b/drivers/input/misc/mma8450.c index 20f8f92..6c76cf7 100644 --- a/drivers/input/misc/mma8450.c +++ b/drivers/input/misc/mma8450.c @@ -24,6 +24,7 @@ #include #include #include +#include #define MMA8450_DRV_NAME "mma8450" @@ -229,10 +230,17 @@ static const struct i2c_device_id mma8450_id[] = { }; MODULE_DEVICE_TABLE(i2c, mma8450_id); +static const struct of_device_id mma8450_dt_ids[] = { + { .compatible = "fsl,mma8450", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(i2c, mma8450_dt_ids); + static struct i2c_driver mma8450_driver = { .driver = { .name = MMA8450_DRV_NAME, .owner = THIS_MODULE, + .of_match_table = mma8450_dt_ids, }, .probe = mma8450_probe, .remove = __devexit_p(mma8450_remove), -- cgit v1.1 From b4aff1f874f679320c03e3d97b60fc7babfd4623 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 28 Jun 2011 16:18:59 -0400 Subject: Btrfs: load the key from the dir item in readdir into a fake dentry In btrfs we have 2 indexes for inodes. One is for readdir, it's in this nice sequential order and works out brilliantly for readdir. However if you use ls, it usually stat's each file it gets from readdir. This is where the second index comes in, which is based on a hash of the name of the file. So then the lookup has to lookup this index, and then lookup the inode. The index lookup is going to be in random order (since its based on the name hash), which gives us less than stellar performance. Since we know the inode location from the readdir index, I create a dummy dentry and copy the location key into dentry->d_fsdata. Then on lookup if we have d_fsdata we use that location to lookup the inode, avoiding looking up the other directory index. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Al Viro --- fs/btrfs/inode.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index caa26ab..540e3b4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4016,12 +4016,19 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) struct btrfs_root *sub_root = root; struct btrfs_key location; int index; - int ret; + int ret = 0; if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ret = btrfs_inode_by_name(dir, dentry, &location); + if (unlikely(d_need_lookup(dentry))) { + memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key)); + kfree(dentry->d_fsdata); + dentry->d_fsdata = NULL; + d_clear_need_lookup(dentry); + } else { + ret = btrfs_inode_by_name(dir, dentry, &location); + } if (ret < 0) return ERR_PTR(ret); @@ -4076,6 +4083,12 @@ static int btrfs_dentry_delete(const struct dentry *dentry) return 0; } +static void btrfs_dentry_release(struct dentry *dentry) +{ + if (dentry->d_fsdata) + kfree(dentry->d_fsdata); +} + static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { @@ -4098,6 +4111,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, struct btrfs_path *path; struct list_head ins_list; struct list_head del_list; + struct qstr q; int ret; struct extent_buffer *leaf; int slot; @@ -4187,6 +4201,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, while (di_cur < di_total) { struct btrfs_key location; + struct dentry *tmp; if (verify_dir_item(root, leaf, di)) break; @@ -4207,6 +4222,33 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; btrfs_dir_item_key_to_cpu(leaf, di, &location); + q.name = name_ptr; + q.len = name_len; + q.hash = full_name_hash(q.name, q.len); + tmp = d_lookup(filp->f_dentry, &q); + if (!tmp) { + struct btrfs_key *newkey; + + newkey = kzalloc(sizeof(struct btrfs_key), + GFP_NOFS); + if (!newkey) + goto no_dentry; + tmp = d_alloc(filp->f_dentry, &q); + if (!tmp) { + kfree(newkey); + dput(tmp); + goto no_dentry; + } + memcpy(newkey, &location, + sizeof(struct btrfs_key)); + tmp->d_fsdata = newkey; + tmp->d_flags |= DCACHE_NEED_LOOKUP; + d_rehash(tmp); + dput(tmp); + } else { + dput(tmp); + } +no_dentry: /* is this a reference to our own snapshot? If so * skip it */ @@ -7452,4 +7494,5 @@ static const struct inode_operations btrfs_symlink_inode_operations = { const struct dentry_operations btrfs_dentry_operations = { .d_delete = btrfs_dentry_delete, + .d_release = btrfs_dentry_release, }; -- cgit v1.1 From 5a30d8a2b8ddd5102c440c7e5a7c8e1fd729c818 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 11 Jul 2011 14:20:57 +0100 Subject: VFS: Fix automount for negative autofs dentries Autofs may set the DCACHE_NEED_AUTOMOUNT flag on negative dentries. These need attention from the automounter daemon regardless of the LOOKUP_FOLLOW flag. Signed-off-by: David Howells Acked-by: Ian Kent Signed-off-by: Al Viro --- fs/namei.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index f8c69d3..445fd5d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -716,19 +716,25 @@ static int follow_automount(struct path *path, unsigned flags, if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT)) return -EISDIR; /* we actually want to stop here */ - /* We want to mount if someone is trying to open/create a file of any - * type under the mountpoint, wants to traverse through the mountpoint - * or wants to open the mounted directory. - * + /* * We don't want to mount if someone's just doing a stat and they've * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and * appended a '/' to the name. */ - if (!(flags & LOOKUP_FOLLOW) && - !(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | - LOOKUP_OPEN | LOOKUP_CREATE))) - return -EISDIR; - + if (!(flags & LOOKUP_FOLLOW)) { + /* We do, however, want to mount if someone wants to open or + * create a file of any type under the mountpoint, wants to + * traverse through the mountpoint or wants to open the mounted + * directory. + * Also, autofs may mark negative dentries as being automount + * points. These will need the attentions of the daemon to + * instantiate them before they can be used. + */ + if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | + LOOKUP_OPEN | LOOKUP_CREATE)) && + path->dentry->d_inode) + return -EISDIR; + } current->total_link_count++; if (current->total_link_count >= 40) return -ELOOP; -- cgit v1.1 From b12362bdb61a230a67daa77bcd2a11e59b2802e1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Jul 2011 06:11:47 +0200 Subject: vfs: conditionally call inode_wb_list_del() Some inodes (pipes, sockets, ...) are not in bdi writeback list. evict() can avoid calling inode_wb_list_del() and its expensive spinlock by checking inode i_wb_list being empty or not. At this point, no other cpu/user can concurrently manipulate this inode i_wb_list Signed-off-by: Eric Dumazet Signed-off-by: Al Viro --- fs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/inode.c b/fs/inode.c index d0c72ff..9dab13a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -454,7 +454,9 @@ static void evict(struct inode *inode) BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(!list_empty(&inode->i_lru)); - inode_wb_list_del(inode); + if (!list_empty(&inode->i_wb_list)) + inode_wb_list_del(inode); + inode_sb_list_del(inode); if (op->evict_inode) { -- cgit v1.1 From f2ee7abf4c40c8e6bffced923a7c01ea2d1f6c97 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Jul 2011 06:41:09 +0200 Subject: vfs: avoid taking inode_hash_lock on pipes and sockets Some inodes (pipes, sockets, ...) are not hashed, no need to take contended inode_hash_lock at dismantle time. nice speedup on SMP machines on socket intensive workloads. Signed-off-by: Eric Dumazet Signed-off-by: Al Viro --- fs/inode.c | 6 +++--- include/linux/fs.h | 9 ++++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 9dab13a..e445be2 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -399,12 +399,12 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval) EXPORT_SYMBOL(__insert_inode_hash); /** - * remove_inode_hash - remove an inode from the hash + * __remove_inode_hash - remove an inode from the hash * @inode: inode to unhash * * Remove an inode from the superblock. */ -void remove_inode_hash(struct inode *inode) +void __remove_inode_hash(struct inode *inode) { spin_lock(&inode_hash_lock); spin_lock(&inode->i_lock); @@ -412,7 +412,7 @@ void remove_inode_hash(struct inode *inode) spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); } -EXPORT_SYMBOL(remove_inode_hash); +EXPORT_SYMBOL(__remove_inode_hash); void end_writeback(struct inode *inode) { diff --git a/include/linux/fs.h b/include/linux/fs.h index f23bcb7..786b3b1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2317,11 +2317,18 @@ extern int should_remove_suid(struct dentry *); extern int file_remove_suid(struct file *); extern void __insert_inode_hash(struct inode *, unsigned long hashval); -extern void remove_inode_hash(struct inode *); static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } + +extern void __remove_inode_hash(struct inode *); +static inline void remove_inode_hash(struct inode *inode) +{ + if (!inode_unhashed(inode)) + __remove_inode_hash(inode); +} + extern void inode_sb_list_add(struct inode *inode); #ifdef CONFIG_BLOCK -- cgit v1.1 From c4ae0c65455c1bb30d1b71c6dd9a1a62aadde8ef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Jul 2011 06:55:13 +0200 Subject: vfs: avoid call to inode_lru_list_del() if possible inode_lru_list_del() is expensive because of per superblock lru locking, while some inodes are not in lru list. Adding a check in iput_final() can speedup pipe/sockets workloads on SMP. Signed-off-by: Eric Dumazet Signed-off-by: Al Viro --- fs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/inode.c b/fs/inode.c index e445be2..5aab80d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1330,7 +1330,8 @@ static void iput_final(struct inode *inode) } inode->i_state |= I_FREEING; - inode_lru_list_del(inode); + if (!list_empty(&inode->i_lru)) + inode_lru_list_del(inode); spin_unlock(&inode->i_lock); evict(inode); -- cgit v1.1 From 782b94cdf577b4df1feb376f372dccc28e66a771 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Jun 2011 11:01:45 +1000 Subject: block: initialise bd_super in bdget() bd_super is currently reset to NULL in kill_block_super() so we rely on previous users of the block_device object to initialise this value for the next user. This quirk was exposed on RHEL5 when a third party filesystem did not always use kill_block_super() and therefore bd_super wasn't being reset when a block_device object was recycled within the cache. This may not be a problem upstream but makes sense to be defensive. Signed-off-by: Lachlan McIlroy Reviewed-by: Eric Sandeen Signed-off-by: Al Viro --- fs/block_dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/block_dev.c b/fs/block_dev.c index f55aad4..f286805 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -552,6 +552,7 @@ struct block_device *bdget(dev_t dev) if (inode->i_state & I_NEW) { bdev->bd_contains = NULL; + bdev->bd_super = NULL; bdev->bd_inode = inode; bdev->bd_block_size = (1 << inode->i_blkbits); bdev->bd_part_count = 0; -- cgit v1.1 From d3fb612076eebec6f67257db0c7a9666ac7e5892 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jul 2011 18:37:50 -0400 Subject: switch posix_acl_create() to umode_t * so we can pass &inode->i_mode to it Signed-off-by: Al Viro --- fs/9p/acl.c | 4 ++-- fs/9p/acl.h | 4 ++-- fs/9p/vfs_inode_dotl.c | 6 +++--- fs/btrfs/acl.c | 5 +---- fs/ext2/acl.c | 4 +--- fs/ext3/acl.c | 5 +---- fs/ext4/acl.c | 5 +---- fs/generic_acl.c | 7 +++---- fs/gfs2/acl.c | 4 ++-- fs/jffs2/acl.c | 2 +- fs/jffs2/acl.h | 2 +- fs/jffs2/fs.c | 2 +- fs/jffs2/os-linux.h | 2 +- fs/jfs/acl.c | 4 +--- fs/nfs/nfs3acl.c | 2 +- fs/nfs/nfs3proc.c | 6 +++--- fs/ocfs2/acl.c | 2 +- fs/posix_acl.c | 6 +++--- fs/reiserfs/xattr_acl.c | 6 +----- fs/xfs/linux-2.6/xfs_acl.c | 4 ++-- include/linux/nfs_fs.h | 4 ++-- include/linux/posix_acl.h | 2 +- 22 files changed, 35 insertions(+), 53 deletions(-) diff --git a/fs/9p/acl.c b/fs/9p/acl.c index e9cb57f..ad734e3 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -182,11 +182,11 @@ int v9fs_set_create_acl(struct dentry *dentry, return 0; } -int v9fs_acl_mode(struct inode *dir, mode_t *modep, +int v9fs_acl_mode(struct inode *dir, umode_t *modep, struct posix_acl **dpacl, struct posix_acl **pacl) { int retval = 0; - mode_t mode = *modep; + umode_t mode = *modep; struct posix_acl *acl = NULL; if (!S_ISLNK(mode)) { diff --git a/fs/9p/acl.h b/fs/9p/acl.h index ddb7ae1..5595564 100644 --- a/fs/9p/acl.h +++ b/fs/9p/acl.h @@ -20,7 +20,7 @@ extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type); extern int v9fs_acl_chmod(struct dentry *); extern int v9fs_set_create_acl(struct dentry *, struct posix_acl **, struct posix_acl **); -extern int v9fs_acl_mode(struct inode *dir, mode_t *modep, +extern int v9fs_acl_mode(struct inode *dir, umode_t *modep, struct posix_acl **dpacl, struct posix_acl **pacl); #else #define v9fs_iop_get_acl NULL @@ -38,7 +38,7 @@ static inline int v9fs_set_create_acl(struct dentry *dentry, { return 0; } -static inline int v9fs_acl_mode(struct inode *dir, mode_t *modep, +static inline int v9fs_acl_mode(struct inode *dir, umode_t *modep, struct posix_acl **dpacl, struct posix_acl **pacl) { diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 9a26dce..b6c8ed2 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -206,7 +206,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, int err = 0; gid_t gid; int flags; - mode_t mode; + umode_t mode; char *name = NULL; struct file *filp; struct p9_qid qid; @@ -348,7 +348,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct p9_fid *fid = NULL, *dfid = NULL; gid_t gid; char *name; - mode_t mode; + umode_t mode; struct inode *inode; struct p9_qid qid; struct dentry *dir_dentry; @@ -751,7 +751,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, int err; gid_t gid; char *name; - mode_t mode; + umode_t mode; struct v9fs_session_info *v9ses; struct p9_fid *fid = NULL, *dfid = NULL; struct inode *inode; diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 65a735d..5908614 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -222,19 +222,16 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, } if (IS_POSIXACL(dir) && acl) { - mode_t mode = inode->i_mode; - if (S_ISDIR(inode->i_mode)) { ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_DEFAULT); if (ret) goto failed; } - ret = posix_acl_create(&acl, GFP_NOFS, &mode); + ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); if (ret < 0) return ret; - inode->i_mode = mode; if (ret > 0) { /* we need an acl */ ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 52c0537..0ce7404 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -253,16 +253,14 @@ ext2_init_acl(struct inode *inode, struct inode *dir) inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { - mode_t mode = inode->i_mode; if (S_ISDIR(inode->i_mode)) { error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl); if (error) goto cleanup; } - error = posix_acl_create(&acl, GFP_KERNEL, &mode); + error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); if (error < 0) return error; - inode->i_mode = mode; if (error > 0) { /* This is an extended ACL */ error = ext2_set_acl(inode, ACL_TYPE_ACCESS, acl); diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 6c29bf0..74a3c64 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -261,19 +261,16 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { - mode_t mode = inode->i_mode; - if (S_ISDIR(inode->i_mode)) { error = ext3_set_acl(handle, inode, ACL_TYPE_DEFAULT, acl); if (error) goto cleanup; } - error = posix_acl_create(&acl, GFP_NOFS, &mode); + error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); if (error < 0) return error; - inode->i_mode = mode; if (error > 0) { /* This is an extended ACL */ error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index dca2d1d..74e469c 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -259,19 +259,16 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { - mode_t mode = inode->i_mode; - if (S_ISDIR(inode->i_mode)) { error = ext4_set_acl(handle, inode, ACL_TYPE_DEFAULT, acl); if (error) goto cleanup; } - error = posix_acl_create(&acl, GFP_NOFS, &mode); + error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); if (error < 0) return error; - inode->i_mode = mode; if (error > 0) { /* This is an extended ACL */ error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); diff --git a/fs/generic_acl.c b/fs/generic_acl.c index d5e33a0..2dd434d 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -125,21 +125,20 @@ int generic_acl_init(struct inode *inode, struct inode *dir) { struct posix_acl *acl = NULL; - mode_t mode = inode->i_mode; int error; - inode->i_mode = mode & ~current_umask(); if (!S_ISLNK(inode->i_mode)) acl = get_cached_acl(dir, ACL_TYPE_DEFAULT); if (acl) { if (S_ISDIR(inode->i_mode)) set_cached_acl(inode, ACL_TYPE_DEFAULT, acl); - error = posix_acl_create(&acl, GFP_KERNEL, &mode); + error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); if (error < 0) return error; - inode->i_mode = mode; if (error > 0) set_cached_acl(inode, ACL_TYPE_ACCESS, acl); + } else { + inode->i_mode &= ~current_umask(); } error = 0; diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 884c9af..0ac3c53 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -72,7 +72,7 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type) return gfs2_acl_get(GFS2_I(inode), type); } -static int gfs2_set_mode(struct inode *inode, mode_t mode) +static int gfs2_set_mode(struct inode *inode, umode_t mode) { int error = 0; @@ -117,7 +117,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct posix_acl *acl; - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; int error = 0; if (!sdp->sd_args.ar_posix_acl) diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 27c511a..6372a84 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -259,7 +259,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) return rc; } -int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, mode_t *i_mode) +int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, umode_t *i_mode) { struct posix_acl *acl; int rc; diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index b3421c7..9b47724 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -28,7 +28,7 @@ struct jffs2_acl_header { struct posix_acl *jffs2_get_acl(struct inode *inode, int type); extern int jffs2_acl_chmod(struct inode *); -extern int jffs2_init_acl_pre(struct inode *, struct inode *, mode_t *); +extern int jffs2_init_acl_pre(struct inode *, struct inode *, umode_t *); extern int jffs2_init_acl_post(struct inode *); extern const struct xattr_handler jffs2_acl_access_xattr_handler; diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index b81b35d..bbcb975 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -406,7 +406,7 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data) /* jffs2_new_inode: allocate a new inode and inocache, add it to the hash, fill in the raw_inode while you're at it. */ -struct inode *jffs2_new_inode (struct inode *dir_i, mode_t mode, struct jffs2_raw_inode *ri) +struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_raw_inode *ri) { struct inode *inode; struct super_block *sb = dir_i->i_sb; diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 526979c..6c1755c 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -173,7 +173,7 @@ int jffs2_do_setattr (struct inode *, struct iattr *); struct inode *jffs2_iget(struct super_block *, unsigned long); void jffs2_evict_inode (struct inode *); void jffs2_dirty_inode(struct inode *inode, int flags); -struct inode *jffs2_new_inode (struct inode *dir_i, mode_t mode, +struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_raw_inode *ri); int jffs2_statfs (struct dentry *, struct kstatfs *); int jffs2_remount_fs (struct super_block *, int *, char *); diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index b3a32ca..45559dc 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -127,16 +127,14 @@ int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) return PTR_ERR(acl); if (acl) { - mode_t mode = inode->i_mode; if (S_ISDIR(inode->i_mode)) { rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl); if (rc) goto cleanup; } - rc = posix_acl_create(&acl, GFP_KERNEL, &mode); + rc = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); if (rc < 0) goto cleanup; /* posix_acl_release(NULL) is no-op */ - inode->i_mode = mode; if (rc > 0) rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, acl); cleanup: diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index e49e731..7ef2397 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -415,7 +415,7 @@ fail: } int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, - mode_t mode) + umode_t mode) { struct posix_acl *dfacl, *acl; int error = 0; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 38053d8..85f1690 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -316,7 +316,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags, struct nfs_open_context *ctx) { struct nfs3_createdata *data; - mode_t mode = sattr->ia_mode; + umode_t mode = sattr->ia_mode; int status = -ENOMEM; dprintk("NFS call create %s\n", dentry->d_name.name); @@ -562,7 +562,7 @@ static int nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) { struct nfs3_createdata *data; - int mode = sattr->ia_mode; + umode_t mode = sattr->ia_mode; int status = -ENOMEM; dprintk("NFS call mkdir %s\n", dentry->d_name.name); @@ -681,7 +681,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dev_t rdev) { struct nfs3_createdata *data; - mode_t mode = sattr->ia_mode; + umode_t mode = sattr->ia_mode; int status = -ENOMEM; dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 783c58d..fbafc6e 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -351,7 +351,7 @@ int ocfs2_init_acl(handle_t *handle, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct posix_acl *acl = NULL; int ret = 0, ret2; - mode_t mode; + umode_t mode; if (!S_ISLNK(inode->i_mode)) { if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { diff --git a/fs/posix_acl.c b/fs/posix_acl.c index d43729a7..f0a017e 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -279,11 +279,11 @@ check_perm: * system calls. All permissions that are not granted by the acl are removed. * The permissions in the acl are changed to reflect the mode_p parameter. */ -static int posix_acl_create_masq(struct posix_acl *acl, mode_t *mode_p) +static int posix_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) { struct posix_acl_entry *pa, *pe; struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; - mode_t mode = *mode_p; + umode_t mode = *mode_p; int not_equiv = 0; /* assert(atomic_read(acl->a_refcount) == 1); */ @@ -382,7 +382,7 @@ static int posix_acl_chmod_masq(struct posix_acl *acl, mode_t mode) } int -posix_acl_create(struct posix_acl **acl, gfp_t gfp, mode_t *mode_p) +posix_acl_create(struct posix_acl **acl, gfp_t gfp, umode_t *mode_p) { struct posix_acl *clone = posix_acl_clone(*acl, gfp); int err = -ENOMEM; diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 7362cf4..89ebc77 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -354,8 +354,6 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, return PTR_ERR(acl); if (acl) { - mode_t mode = inode->i_mode; - /* Copy the default ACL to the default ACL of a new directory */ if (S_ISDIR(inode->i_mode)) { err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT, @@ -366,12 +364,10 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, /* Now we reconcile the new ACL and the mode, potentially modifying both */ - err = posix_acl_create(&acl, GFP_NOFS, &mode); + err = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); if (err < 0) return err; - inode->i_mode = mode; - /* If we need an ACL.. */ if (err > 0) err = reiserfs_set_acl(th, inode, ACL_TYPE_ACCESS, acl); diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index 44ce516..bb85500 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -221,7 +221,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) } static int -xfs_set_mode(struct inode *inode, mode_t mode) +xfs_set_mode(struct inode *inode, umode_t mode) { int error = 0; @@ -267,7 +267,7 @@ posix_acl_default_exists(struct inode *inode) int xfs_inherit_acl(struct inode *inode, struct posix_acl *acl) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; int error = 0, inherit = 0; if (S_ISDIR(inode->i_mode)) { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 8b579be..dda2ac8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -568,12 +568,12 @@ extern struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type); extern int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl); extern int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, - mode_t mode); + umode_t mode); extern void nfs3_forget_cached_acls(struct inode *inode); #else static inline int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, - mode_t mode) + umode_t mode) { return 0; } diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 9a53b99..bd8d005 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -77,7 +77,7 @@ extern int posix_acl_valid(const struct posix_acl *); extern int posix_acl_permission(struct inode *, const struct posix_acl *, int); extern struct posix_acl *posix_acl_from_mode(mode_t, gfp_t); extern int posix_acl_equiv_mode(const struct posix_acl *, mode_t *); -extern int posix_acl_create(struct posix_acl **, gfp_t, mode_t *); +extern int posix_acl_create(struct posix_acl **, gfp_t, umode_t *); extern int posix_acl_chmod(struct posix_acl **, gfp_t, mode_t); extern struct posix_acl *get_posix_acl(struct inode *, int); -- cgit v1.1 From d6952123b53cc8b334df69bba2cd0063b0d88f68 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jul 2011 18:56:36 -0400 Subject: switch posix_acl_equiv_mode() to umode_t * ... so that &inode->i_mode could be passed to it Signed-off-by: Al Viro --- fs/9p/acl.c | 2 +- fs/btrfs/acl.c | 5 +---- fs/ext2/acl.c | 4 +--- fs/ext3/acl.c | 4 +--- fs/ext4/acl.c | 4 +--- fs/generic_acl.c | 6 +----- fs/gfs2/acl.c | 2 +- fs/jffs2/acl.c | 2 +- fs/jfs/xattr.c | 4 +--- fs/ocfs2/acl.c | 2 +- fs/posix_acl.c | 4 ++-- fs/reiserfs/xattr_acl.c | 4 +--- fs/xfs/linux-2.6/xfs_acl.c | 2 +- include/linux/posix_acl.h | 2 +- 14 files changed, 15 insertions(+), 32 deletions(-) diff --git a/fs/9p/acl.c b/fs/9p/acl.c index ad734e3..9a1d426 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -319,7 +319,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; if (acl) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; retval = posix_acl_equiv_mode(acl, &mode); if (retval < 0) goto err_out; diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 5908614..4cc5c01 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -111,7 +111,6 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, int ret, size = 0; const char *name; char *value = NULL; - mode_t mode; if (acl) { ret = posix_acl_valid(acl); @@ -122,13 +121,11 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, switch (type) { case ACL_TYPE_ACCESS: - mode = inode->i_mode; name = POSIX_ACL_XATTR_ACCESS; if (acl) { - ret = posix_acl_equiv_mode(acl, &mode); + ret = posix_acl_equiv_mode(acl, &inode->i_mode); if (ret < 0) return ret; - inode->i_mode = mode; } ret = 0; break; diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 0ce7404..35d6a3c 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -194,12 +194,10 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) case ACL_TYPE_ACCESS: name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - mode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); + error = posix_acl_equiv_mode(acl, &inode->i_mode); if (error < 0) return error; else { - inode->i_mode = mode; inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); if (error == 0) diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 74a3c64..3091f62 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -199,12 +199,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, case ACL_TYPE_ACCESS: name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - mode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); + error = posix_acl_equiv_mode(acl, &inode->i_mode); if (error < 0) return error; else { - inode->i_mode = mode; inode->i_ctime = CURRENT_TIME_SEC; ext3_mark_inode_dirty(handle, inode); if (error == 0) diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 74e469c..a5c29bb 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -198,12 +198,10 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, case ACL_TYPE_ACCESS: name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - mode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); + error = posix_acl_equiv_mode(acl, &inode->i_mode); if (error < 0) return error; else { - inode->i_mode = mode; inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); if (error == 0) diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 2dd434d..d0dddac 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -82,18 +82,14 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value, return PTR_ERR(acl); } if (acl) { - mode_t mode; - error = posix_acl_valid(acl); if (error) goto failed; switch (type) { case ACL_TYPE_ACCESS: - mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); + error = posix_acl_equiv_mode(acl, &inode->i_mode); if (error < 0) goto failed; - inode->i_mode = mode; inode->i_ctime = CURRENT_TIME; if (error == 0) { posix_acl_release(acl); diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 0ac3c53..34501b6 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -276,7 +276,7 @@ static int gfs2_xattr_system_set(struct dentry *dentry, const char *name, goto out_release; if (type == ACL_TYPE_ACCESS) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; error = posix_acl_equiv_mode(acl, &mode); if (error <= 0) { diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 6372a84..926d020 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -227,7 +227,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) case ACL_TYPE_ACCESS: xprefix = JFFS2_XPREFIX_ACL_ACCESS; if (acl) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; rc = posix_acl_equiv_mode(acl, &mode); if (rc < 0) return rc; diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 24838f1..e87fede 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -693,8 +693,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, return rc; } if (acl) { - mode_t mode = inode->i_mode; - rc = posix_acl_equiv_mode(acl, &mode); + rc = posix_acl_equiv_mode(acl, &inode->i_mode); posix_acl_release(acl); if (rc < 0) { printk(KERN_ERR @@ -702,7 +701,6 @@ static int can_set_system_xattr(struct inode *inode, const char *name, rc); return rc; } - inode->i_mode = mode; mark_inode_dirty(inode); } /* diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index fbafc6e..a721907 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -247,7 +247,7 @@ static int ocfs2_set_acl(handle_t *handle, case ACL_TYPE_ACCESS: name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; ret = posix_acl_equiv_mode(acl, &mode); if (ret < 0) return ret; diff --git a/fs/posix_acl.c b/fs/posix_acl.c index f0a017e..3d943be 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -149,10 +149,10 @@ posix_acl_valid(const struct posix_acl *acl) * file mode permission bits, or else 1. Returns -E... on error. */ int -posix_acl_equiv_mode(const struct posix_acl *acl, mode_t *mode_p) +posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p) { const struct posix_acl_entry *pa, *pe; - mode_t mode = 0; + umode_t mode = 0; int not_equiv = 0; FOREACH_ACL_ENTRY(pa, acl, pe) { diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 89ebc77..6da0396e 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -272,12 +272,10 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; if (acl) { - mode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); + error = posix_acl_equiv_mode(acl, &inode->i_mode); if (error < 0) return error; else { - inode->i_mode = mode; if (error == 0) acl = NULL; } diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index bb85500..b6c4b37 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -381,7 +381,7 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name, goto out_release; if (type == ACL_TYPE_ACCESS) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; error = posix_acl_equiv_mode(acl, &mode); if (error <= 0) { diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index bd8d005..529c32a 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -76,7 +76,7 @@ extern struct posix_acl *posix_acl_alloc(int, gfp_t); extern int posix_acl_valid(const struct posix_acl *); extern int posix_acl_permission(struct inode *, const struct posix_acl *, int); extern struct posix_acl *posix_acl_from_mode(mode_t, gfp_t); -extern int posix_acl_equiv_mode(const struct posix_acl *, mode_t *); +extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *); extern int posix_acl_create(struct posix_acl **, gfp_t, umode_t *); extern int posix_acl_chmod(struct posix_acl **, gfp_t, mode_t); -- cgit v1.1 From 3a5fba19b080b365d67866db38e32e6a4a2089e8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jul 2011 19:01:48 -0400 Subject: switch posix_acl_from_mode() to umode_t ... seeing that this is what all callers pass to it anyway. Signed-off-by: Al Viro --- fs/posix_acl.c | 2 +- include/linux/posix_acl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 3d943be..4e16e80 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -188,7 +188,7 @@ posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p) * Create an ACL representing the file mode permission bits of an inode. */ struct posix_acl * -posix_acl_from_mode(mode_t mode, gfp_t flags) +posix_acl_from_mode(umode_t mode, gfp_t flags) { struct posix_acl *acl = posix_acl_alloc(3, flags); if (!acl) diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 529c32a..16ecfb4 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -75,7 +75,7 @@ extern void posix_acl_init(struct posix_acl *, int); extern struct posix_acl *posix_acl_alloc(int, gfp_t); extern int posix_acl_valid(const struct posix_acl *); extern int posix_acl_permission(struct inode *, const struct posix_acl *, int); -extern struct posix_acl *posix_acl_from_mode(mode_t, gfp_t); +extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t); extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *); extern int posix_acl_create(struct posix_acl **, gfp_t, umode_t *); extern int posix_acl_chmod(struct posix_acl **, gfp_t, mode_t); -- cgit v1.1 From 86bc704db0ab7e69230f79bc7d124e063259abc6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jul 2011 19:03:11 -0400 Subject: switch posix_acl_chmod() to umode_t again, that's what all callers pass to it Signed-off-by: Al Viro --- fs/posix_acl.c | 4 ++-- include/linux/posix_acl.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 4e16e80..10027b4 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -336,7 +336,7 @@ static int posix_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) /* * Modify the ACL for the chmod syscall. */ -static int posix_acl_chmod_masq(struct posix_acl *acl, mode_t mode) +static int posix_acl_chmod_masq(struct posix_acl *acl, umode_t mode) { struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; struct posix_acl_entry *pa, *pe; @@ -400,7 +400,7 @@ posix_acl_create(struct posix_acl **acl, gfp_t gfp, umode_t *mode_p) EXPORT_SYMBOL(posix_acl_create); int -posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, mode_t mode) +posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, umode_t mode) { struct posix_acl *clone = posix_acl_clone(*acl, gfp); int err = -ENOMEM; diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 16ecfb4..951bba8 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -78,7 +78,7 @@ extern int posix_acl_permission(struct inode *, const struct posix_acl *, int); extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t); extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *); extern int posix_acl_create(struct posix_acl **, gfp_t, umode_t *); -extern int posix_acl_chmod(struct posix_acl **, gfp_t, mode_t); +extern int posix_acl_chmod(struct posix_acl **, gfp_t, umode_t); extern struct posix_acl *get_posix_acl(struct inode *, int); extern int set_posix_acl(struct inode *, int, struct posix_acl *); -- cgit v1.1 From 35f40ef00204c456f5c181c0e7f54e25bb93cd49 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 7 Jun 2011 14:09:10 +0100 Subject: VFS: Remove detached-dentry counter from shrink_dcache_for_umount_subtree() Remove the detached-dentry counter from shrink_dcache_for_umount_subtree() as the value it computes is no longer used as of commit 312d3ca856d369bb04d0443846b85b4cdde6fa8a which made the nr_dentry counters summed per-CPU rather than global atomic. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/dcache.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index b05aac3..7559057 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -828,7 +828,6 @@ EXPORT_SYMBOL(shrink_dcache_sb); static void shrink_dcache_for_umount_subtree(struct dentry *dentry) { struct dentry *parent; - unsigned detached = 0; BUG_ON(!IS_ROOT(dentry)); @@ -892,8 +891,6 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) spin_unlock(&parent->d_lock); } - detached++; - inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; -- cgit v1.1 From c6627c60c07c43b51ef88e352627fa786d1e1592 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 7 Jun 2011 14:09:20 +0100 Subject: VFS: Remove dentry->d_lock locking from shrink_dcache_for_umount_subtree() Locks of the dcache_lock were replaced by locks of dentry->d_lock in commits such as: 2304450783dfde7b0b94ae234edd0dbffa865073 2fd6b7f50797f2e993eea59e0a0b8c6399c811dc as part of the RCU-based pathwalk changes, despite the fact that the caller (shrink_dcache_for_umount()) notes in the banner comment the reasons that d_lock is not necessary in these functions: /* * destroy the dentries attached to a superblock on unmounting * - we don't need to use dentry->d_lock because: * - the superblock is detached from all mountings and open files, so the * dentry trees will not be rearranged by the VFS * - s_umount is write-locked, so the memory pressure shrinker will ignore * any dentries belonging to this superblock that it comes across * - the filesystem itself is no longer permitted to rearrange the dentries * in this superblock */ So remove these locks. If the locks are actually necessary, then this banner comment should be altered instead. The hash table chains are protected by 1-bit locks in the hash table heads, so those shouldn't be a problem. Note that to make this work, __d_drop() has to be split so that the RCUwalk barrier can be avoided. This causes problems otherwise as it has an assertion that dentry->d_lock is locked - but there is no need for that as no one else can be trying to access this dentry, except to step over it (and that should be handled by d_free(), I think). Signed-off-by: David Howells Cc: Nick Piggin Signed-off-by: Al Viro --- fs/dcache.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 7559057..9df8b86 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -301,6 +301,27 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) return parent; } +/* + * Unhash a dentry without inserting an RCU walk barrier or checking that + * dentry->d_lock is locked. The caller must take care of that, if + * appropriate. + */ +static void __d_shrink(struct dentry *dentry) +{ + if (!d_unhashed(dentry)) { + struct hlist_bl_head *b; + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) + b = &dentry->d_sb->s_anon; + else + b = d_hash(dentry->d_parent, dentry->d_name.hash); + + hlist_bl_lock(b); + __hlist_bl_del(&dentry->d_hash); + dentry->d_hash.pprev = NULL; + hlist_bl_unlock(b); + } +} + /** * d_drop - drop a dentry * @dentry: dentry to drop @@ -319,17 +340,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) void __d_drop(struct dentry *dentry) { if (!d_unhashed(dentry)) { - struct hlist_bl_head *b; - if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) - b = &dentry->d_sb->s_anon; - else - b = d_hash(dentry->d_parent, dentry->d_name.hash); - - hlist_bl_lock(b); - __hlist_bl_del(&dentry->d_hash); - dentry->d_hash.pprev = NULL; - hlist_bl_unlock(b); - + __d_shrink(dentry); dentry_rcuwalk_barrier(dentry); } } @@ -832,10 +843,8 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) BUG_ON(!IS_ROOT(dentry)); /* detach this root from the system */ - spin_lock(&dentry->d_lock); dentry_lru_del(dentry); - __d_drop(dentry); - spin_unlock(&dentry->d_lock); + __d_shrink(dentry); for (;;) { /* descend to the first leaf in the current subtree */ @@ -844,16 +853,11 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* this is a branch with children - detach all of them * from the system in one go */ - spin_lock(&dentry->d_lock); list_for_each_entry(loop, &dentry->d_subdirs, d_u.d_child) { - spin_lock_nested(&loop->d_lock, - DENTRY_D_LOCK_NESTED); dentry_lru_del(loop); - __d_drop(loop); - spin_unlock(&loop->d_lock); + __d_shrink(loop); } - spin_unlock(&dentry->d_lock); /* move to the first child */ dentry = list_entry(dentry->d_subdirs.next, @@ -885,10 +889,8 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) list_del(&dentry->d_u.d_child); } else { parent = dentry->d_parent; - spin_lock(&parent->d_lock); parent->d_count--; list_del(&dentry->d_u.d_child); - spin_unlock(&parent->d_lock); } inode = dentry->d_inode; @@ -935,9 +937,7 @@ void shrink_dcache_for_umount(struct super_block *sb) dentry = sb->s_root; sb->s_root = NULL; - spin_lock(&dentry->d_lock); dentry->d_count--; - spin_unlock(&dentry->d_lock); shrink_dcache_for_umount_subtree(dentry); while (!hlist_bl_empty(&sb->s_anon)) { -- cgit v1.1 From 43c1c9cd244098012441b90c32304f11f1258d43 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 7 Jun 2011 14:09:30 +0100 Subject: VFS: Reorganise shrink_dcache_for_umount_subtree() after demise of dcache_lock Reorganise shrink_dcache_for_umount_subtree() in light of the demise of dcache_lock. Without that dcache_lock, there is no need for the batching of removal of dentries from the system under it (we wanted to make intensive use of the locked data whilst we held it, but didn't want to hold it for long at a time). This works, provided the preceding patch is correct in its removal of locking on dentry->d_lock on the basis that no one should be locking these dentries any more as the whole superblock is defunct. With this patch, the calls to dentry_lru_del() and __d_shrink() are placed at the point where each dentry is detached handled. It is possible that, as an alternative, the batching should still be done - but only for dentry_lru_del() of all a dentry's children in one go. In such a case, the batching would be done under dcache_lru_lock. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/dcache.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 9df8b86..2347cdb 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -842,33 +842,21 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) BUG_ON(!IS_ROOT(dentry)); - /* detach this root from the system */ - dentry_lru_del(dentry); - __d_shrink(dentry); - for (;;) { /* descend to the first leaf in the current subtree */ - while (!list_empty(&dentry->d_subdirs)) { - struct dentry *loop; - - /* this is a branch with children - detach all of them - * from the system in one go */ - list_for_each_entry(loop, &dentry->d_subdirs, - d_u.d_child) { - dentry_lru_del(loop); - __d_shrink(loop); - } - - /* move to the first child */ + while (!list_empty(&dentry->d_subdirs)) dentry = list_entry(dentry->d_subdirs.next, struct dentry, d_u.d_child); - } /* consume the dentries from this leaf up through its parents * until we find one with children or run out altogether */ do { struct inode *inode; + /* detach from the system */ + dentry_lru_del(dentry); + __d_shrink(dentry); + if (dentry->d_count != 0) { printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%s}" -- cgit v1.1 From 206d440f64030b6425841bf7cb38e26a5ea0c382 Mon Sep 17 00:00:00 2001 From: Markus Trippelsdorf Date: Tue, 26 Jul 2011 11:15:20 +0200 Subject: xfs: Fix build breakage in xfs_iops.c when CONFIG_FS_POSIX_ACL is not set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4e34e719e45, that takes the ACL checks to common code, accidentely broke the build when CONFIG_FS_POSIX_ACL is not set: CC fs/xfs/linux-2.6/xfs_iops.o fs/xfs/linux-2.6/xfs_iops.c:1025:14: error: ‘xfs_get_acl’ undeclared here (not in a function) Fix this by declaring xfs_get_acl a static inline function. Signed-off-by: Markus Trippelsdorf Signed-off-by: Al Viro --- fs/xfs/xfs_acl.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 2c656ef..39632d9 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -51,7 +51,10 @@ extern int posix_acl_default_exists(struct inode *inode); extern const struct xattr_handler xfs_xattr_acl_access_handler; extern const struct xattr_handler xfs_xattr_acl_default_handler; #else -# define xfs_get_acl(inode, type) NULL +static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type) +{ + return NULL; +} # define xfs_inherit_acl(inode, default_acl) 0 # define xfs_acl_chmod(inode) 0 # define posix_acl_access_exists(inode) 0 -- cgit v1.1 From dc889f18646325d07eb24ef634f082d60b0beebb Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sun, 31 Jul 2011 23:16:43 +0200 Subject: ALSA: asihpi - Don't leak firmware if mem alloc fails We leak the memory allocated to 'firmware' when we fail to release_firmware() after a kmalloc() failure in hpi_dsp_code_open(). This patch should take care of the leak. Signed-off-by: Jesper Juhl Signed-off-by: Takashi Iwai --- sound/pci/asihpi/hpidspcd.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/pci/asihpi/hpidspcd.c b/sound/pci/asihpi/hpidspcd.c index 3a7afa3..71d32c8 100644 --- a/sound/pci/asihpi/hpidspcd.c +++ b/sound/pci/asihpi/hpidspcd.c @@ -43,6 +43,7 @@ short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code, struct pci_dev *dev = os_data; struct code_header header; char fw_name[20]; + short err_ret = HPI_ERROR_DSP_FILE_NOT_FOUND; int err; sprintf(fw_name, "asihpi/dsp%04x.bin", adapter); @@ -85,8 +86,10 @@ short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code, HPI_DEBUG_LOG(DEBUG, "dsp code %s opened\n", fw_name); dsp_code->pvt = kmalloc(sizeof(*dsp_code->pvt), GFP_KERNEL); - if (!dsp_code->pvt) - return HPI_ERROR_MEMORY_ALLOC; + if (!dsp_code->pvt) { + err_ret = HPI_ERROR_MEMORY_ALLOC; + goto error2; + } dsp_code->pvt->dev = dev; dsp_code->pvt->firmware = firmware; @@ -99,7 +102,7 @@ error2: release_firmware(firmware); error1: dsp_code->block_length = 0; - return HPI_ERROR_DSP_FILE_NOT_FOUND; + return err_ret; } /*-------------------------------------------------------------------*/ -- cgit v1.1 From 2a1690d8cd5b7a4271f993f59a4484d41e2884c3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 1 Aug 2011 02:23:15 -0700 Subject: MAINTAINERS: Remove Pekka Savola from ipv6. Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a99fed1..5d4ceba 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4443,7 +4443,6 @@ F: include/linux/netdevice.h NETWORKING [IPv4/IPv6] M: "David S. Miller" M: Alexey Kuznetsov -M: "Pekka Savola (ipv6)" M: James Morris M: Hideaki YOSHIFUJI M: Patrick McHardy -- cgit v1.1 From a1889c0d2039a53ae04abb9f20c62500bd312bf3 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 28 Jul 2011 02:46:01 +0000 Subject: net: adjust array index Convert array index from the loop bound to the loop index. A simplified version of the semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression e1,e2,ar; @@ for(e1 = 0; e1 < e2; e1++) { <... ar[ - e2 + e1 ] ...> } // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f1d27f6..283c0a2 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1718,7 +1718,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->sfcount[sfmode]--; for (j=0; jsfcount[MCAST_EXCLUDE] != 0)) { #ifdef CONFIG_IP_MULTICAST struct ip_sf_list *psf; -- cgit v1.1 From 956837f7c954443f426a82ba6f17b33488cf9a0c Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 28 Jul 2011 02:46:03 +0000 Subject: drivers/net/niu.c: adjust array index Convert array index from the loop bound to the loop index. A simplified version of the semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression e1,e2,ar; @@ for(e1 = 0; e1 < e2; e1++) { <... ar[ - e2 + e1 ] ...> } // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/niu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/niu.c b/drivers/net/niu.c index cd6c231..ed47585 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -9201,7 +9201,7 @@ static int __devinit niu_ldg_init(struct niu *np) first_chan = 0; for (i = 0; i < port; i++) - first_chan += parent->rxchan_per_port[port]; + first_chan += parent->rxchan_per_port[i]; num_chan = parent->rxchan_per_port[port]; for (i = first_chan; i < (first_chan + num_chan); i++) { @@ -9217,7 +9217,7 @@ static int __devinit niu_ldg_init(struct niu *np) first_chan = 0; for (i = 0; i < port; i++) - first_chan += parent->txchan_per_port[port]; + first_chan += parent->txchan_per_port[i]; num_chan = parent->txchan_per_port[port]; for (i = first_chan; i < (first_chan + num_chan); i++) { err = niu_ldg_assign_ldn(np, parent, -- cgit v1.1 From e1738bd9cecc5c867b0e2996470c1ff20f66ba79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Jul 2011 19:22:42 +0000 Subject: sch_sfq: fix sfq_enqueue() commit 8efa88540635 (sch_sfq: avoid giving spurious NET_XMIT_CN signals) forgot to call qdisc_tree_decrease_qlen() to signal upper levels that a packet (from another flow) was dropped, leading to various problems. With help from Michal Soltys and Michal Pokrywka, who did a bisection. Bugzilla ref: https://bugzilla.kernel.org/show_bug.cgi?id=39372 Debian ref: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=631945 Reported-by: Lucas Bocchi Reported-and-bisected-by: Michal Pokrywka Signed-off-by: Eric Dumazet CC: Michal Soltys Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 4536ee6..4f5510e 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -410,7 +410,12 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* Return Congestion Notification only if we dropped a packet * from this flow. */ - return (qlen != slot->qlen) ? NET_XMIT_CN : NET_XMIT_SUCCESS; + if (qlen != slot->qlen) + return NET_XMIT_CN; + + /* As we dropped a packet, better let upper stack know this */ + qdisc_tree_decrease_qlen(sch, 1); + return NET_XMIT_SUCCESS; } static struct sk_buff * -- cgit v1.1 From 33853a0dde359ded0534204eb6857ad5166d515b Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Mon, 1 Aug 2011 06:32:19 -0400 Subject: ext4: use the correct error exit path in ext4_init_inode_table() This patch lets ext4_init_inode_table() handle errors right. ext4_init_inode_table() should down_write() alloc_sem which has been up_write()ed and stop the started journal handle. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 21bb2f6..9c63f27 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1287,7 +1287,7 @@ extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, group, used_blks, ext4_itable_unused_count(sb, gdp)); ret = 1; - goto out; + goto err_out; } blk = ext4_inode_table(sb, gdp) + used_blks; -- cgit v1.1 From 28c1b6d60e3ad0aecf48aaefc6995904e2ab1b9e Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 22 Jul 2011 22:47:21 +0300 Subject: of: address: use resource_size helper that should be the approved way of calculating the size of resources. No functional changes. Cc: Grant Likely Signed-off-by: Felipe Balbi Signed-off-by: Grant Likely --- drivers/of/address.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index da1f4b9..72c33fb 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -610,6 +610,6 @@ void __iomem *of_iomap(struct device_node *np, int index) if (of_address_to_resource(np, index, &res)) return NULL; - return ioremap(res.start, 1 + res.end - res.start); + return ioremap(res.start, resource_size(&res)); } EXPORT_SYMBOL(of_iomap); -- cgit v1.1 From 9933fc0ac1ac14b795819cd63d05ea92112f690a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 1 Aug 2011 08:45:02 -0400 Subject: ext4: introduce ext4_kvmalloc(), ext4_kzalloc(), and ext4_kvfree() Introduce new helper functions which try kmalloc, and then fall back to vmalloc if necessary, and use them for allocating and deallocating s_flex_groups. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 +++ fs/ext4/super.c | 54 ++++++++++++++++++++++++++++++++++++------------------ 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ba2009b..db9fead 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1874,6 +1874,9 @@ extern int ext4_group_extend(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ +extern void *ext4_kvmalloc(size_t size, gfp_t flags); +extern void *ext4_kvzalloc(size_t size, gfp_t flags); +extern void ext4_kvfree(void *ptr); extern void __ext4_error(struct super_block *, const char *, unsigned int, const char *, ...) __attribute__ ((format (printf, 4, 5))); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cfe9f39..658f586 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -110,6 +110,35 @@ static struct file_system_type ext3_fs_type = { #define IS_EXT3_SB(sb) (0) #endif +void *ext4_kvmalloc(size_t size, gfp_t flags) +{ + void *ret; + + ret = kmalloc(size, flags); + if (!ret) + ret = __vmalloc(size, flags, PAGE_KERNEL); + return ret; +} + +void *ext4_kvzalloc(size_t size, gfp_t flags) +{ + void *ret; + + ret = kmalloc(size, flags); + if (!ret) + ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); + return ret; +} + +void ext4_kvfree(void *ptr) +{ + if (is_vmalloc_addr(ptr)) + vfree(ptr); + else + kfree(ptr); + +} + ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, struct ext4_group_desc *bg) { @@ -791,10 +820,7 @@ static void ext4_put_super(struct super_block *sb) for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); kfree(sbi->s_group_desc); - if (is_vmalloc_addr(sbi->s_flex_groups)) - vfree(sbi->s_flex_groups); - else - kfree(sbi->s_flex_groups); + ext4_kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); @@ -1977,15 +2003,11 @@ static int ext4_fill_flex_info(struct super_block *sb) ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; size = flex_group_count * sizeof(struct flex_groups); - sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); + sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL); if (sbi->s_flex_groups == NULL) { - sbi->s_flex_groups = vzalloc(size); - if (sbi->s_flex_groups == NULL) { - ext4_msg(sb, KERN_ERR, - "not enough memory for %u flex groups", - flex_group_count); - goto failed; - } + ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups", + flex_group_count); + goto failed; } for (i = 0; i < sbi->s_groups_count; i++) { @@ -3750,12 +3772,8 @@ failed_mount_wq: } failed_mount3: del_timer(&sbi->s_err_report); - if (sbi->s_flex_groups) { - if (is_vmalloc_addr(sbi->s_flex_groups)) - vfree(sbi->s_flex_groups); - else - kfree(sbi->s_flex_groups); - } + if (sbi->s_flex_groups) + ext4_kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); -- cgit v1.1 From f18a5f21c25707b4fe64b326e2b4d150565e7300 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 1 Aug 2011 08:45:38 -0400 Subject: ext4: use ext4_kvzalloc()/ext4_kvmalloc() for s_group_desc and s_group_info Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 6 +++--- fs/ext4/resize.c | 13 +++++++------ fs/ext4/super.c | 9 +++++---- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index fa716c9..d5021e8 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2331,7 +2331,7 @@ static int ext4_mb_init_backend(struct super_block *sb) /* An 8TB filesystem with 64-bit pointers requires a 4096 byte * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. * So a two level scheme suffices for now. */ - sbi->s_group_info = kzalloc(array_size, GFP_KERNEL); + sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL); if (sbi->s_group_info == NULL) { printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); return -ENOMEM; @@ -2365,7 +2365,7 @@ err_freebuddy: kfree(sbi->s_group_info[i]); iput(sbi->s_buddy_cache); err_freesgi: - kfree(sbi->s_group_info); + ext4_kvfree(sbi->s_group_info); return -ENOMEM; } @@ -2559,7 +2559,7 @@ int ext4_mb_release(struct super_block *sb) EXT4_DESC_PER_BLOCK_BITS(sb); for (i = 0; i < num_meta_group_infos; i++) kfree(sbi->s_group_info[i]); - kfree(sbi->s_group_info); + ext4_kvfree(sbi->s_group_info); } kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 71085df..707d3f1 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -467,12 +467,13 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, if (unlikely(err)) goto exit_dindj; - n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), - GFP_NOFS); + n_group_desc = ext4_kvmalloc((gdb_num + 1) * + sizeof(struct buffer_head *), + GFP_NOFS); if (!n_group_desc) { err = -ENOMEM; - ext4_warning(sb, - "not enough memory for %lu groups", gdb_num + 1); + ext4_warning(sb, "not enough memory for %lu groups", + gdb_num + 1); goto exit_inode; } @@ -507,7 +508,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, n_group_desc[gdb_num] = gdb_bh; EXT4_SB(sb)->s_group_desc = n_group_desc; EXT4_SB(sb)->s_gdb_count++; - kfree(o_group_desc); + ext4_kvfree(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); @@ -517,7 +518,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, return err; exit_inode: - kfree(n_group_desc); + ext4_kvfree(n_group_desc); /* ext4_handle_release_buffer(handle, iloc.bh); */ brelse(iloc.bh); exit_dindj: diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 658f586..e2d88ba 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -819,7 +819,7 @@ static void ext4_put_super(struct super_block *sb) for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); - kfree(sbi->s_group_desc); + ext4_kvfree(sbi->s_group_desc); ext4_kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); @@ -3439,8 +3439,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); - sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), - GFP_KERNEL); + sbi->s_group_desc = ext4_kvmalloc(db_count * + sizeof(struct buffer_head *), + GFP_KERNEL); if (sbi->s_group_desc == NULL) { ext4_msg(sb, KERN_ERR, "not enough memory"); goto failed_mount; @@ -3783,7 +3784,7 @@ failed_mount3: failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); - kfree(sbi->s_group_desc); + ext4_kvfree(sbi->s_group_desc); failed_mount: if (sbi->s_proc) { remove_proc_entry(sb->s_id, ext4_proc_root); -- cgit v1.1 From 5f9288af065537be1fe6d1d5bd54ba51d6df721a Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 13 Jul 2011 15:46:21 +0200 Subject: input: xilinx_ps2: Add missing of_address.h header Add missing header. Error log: CC drivers/input/serio/xilinx_ps2.o drivers/input/serio/xilinx_ps2.c: In function 'xps2_of_probe': drivers/input/serio/xilinx_ps2.c:249: error: implicit declaration of function 'of_address_to_resource' Signed-off-by: Michal Simek Signed-off-by: Grant Likely --- drivers/input/serio/xilinx_ps2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/serio/xilinx_ps2.c b/drivers/input/serio/xilinx_ps2.c index 80baa53..d64c5a4 100644 --- a/drivers/input/serio/xilinx_ps2.c +++ b/drivers/input/serio/xilinx_ps2.c @@ -23,7 +23,7 @@ #include #include #include - +#include #include #include -- cgit v1.1 From c1227340ca65c2069222a956a68b6842d460c4f4 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Jul 2011 15:01:02 +0200 Subject: ath9k: initialize tx chainmask before testing channel tx power values With an uninitialized chainmask, the per-channel power will only contain the power limits for a single chain instead of the combined tx power. Signed-off-by: Felix Fietkau Cc: stable@kernel.org Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index dceaa4a..2e96346 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -669,8 +669,10 @@ static void ath9k_init_band_txpower(struct ath_softc *sc, int band) static void ath9k_init_txpower_limits(struct ath_softc *sc) { struct ath_hw *ah = sc->sc_ah; + struct ath_common *common = ath9k_hw_common(sc->sc_ah); struct ath9k_channel *curchan = ah->curchan; + ah->txchainmask = common->tx_chainmask; if (ah->caps.hw_caps & ATH9K_HW_CAP_2GHZ) ath9k_init_band_txpower(sc, IEEE80211_BAND_2GHZ); if (ah->caps.hw_caps & ATH9K_HW_CAP_5GHZ) -- cgit v1.1 From 17e859a899712d16c3e70b045d61ad9e02c53f8a Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 27 Jul 2011 15:37:43 +0200 Subject: iwlegacy: set tx power after rxon_assoc If settings of tx power was deferred during scan or changing channel we have to setup them during commit rxon. Fix problem on 3945 (4965 already has this fix). Optimize code to apply tx settings only when tx power was actually changed. Cc: stable@kernel.org # 2.6.39+ Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- drivers/net/wireless/iwlegacy/iwl-3945.c | 6 +++++- drivers/net/wireless/iwlegacy/iwl-4965.c | 8 ++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/iwlegacy/iwl-3945.c b/drivers/net/wireless/iwlegacy/iwl-3945.c index dab67a1..73fe3cd 100644 --- a/drivers/net/wireless/iwlegacy/iwl-3945.c +++ b/drivers/net/wireless/iwlegacy/iwl-3945.c @@ -1746,7 +1746,11 @@ int iwl3945_commit_rxon(struct iwl_priv *priv, struct iwl_rxon_context *ctx) } memcpy(active_rxon, staging_rxon, sizeof(*active_rxon)); - + /* + * We do not commit tx power settings while channel changing, + * do it now if tx power changed. + */ + iwl_legacy_set_tx_power(priv, priv->tx_power_next, false); return 0; } diff --git a/drivers/net/wireless/iwlegacy/iwl-4965.c b/drivers/net/wireless/iwlegacy/iwl-4965.c index bd4b000..ecdc6e5 100644 --- a/drivers/net/wireless/iwlegacy/iwl-4965.c +++ b/drivers/net/wireless/iwlegacy/iwl-4965.c @@ -1235,7 +1235,12 @@ static int iwl4965_commit_rxon(struct iwl_priv *priv, struct iwl_rxon_context *c memcpy(active_rxon, &ctx->staging, sizeof(*active_rxon)); iwl_legacy_print_rx_config_cmd(priv, ctx); - goto set_tx_power; + /* + * We do not commit tx power settings while channel changing, + * do it now if tx power changed. + */ + iwl_legacy_set_tx_power(priv, priv->tx_power_next, false); + return 0; } /* If we are currently associated and the new config requires @@ -1315,7 +1320,6 @@ static int iwl4965_commit_rxon(struct iwl_priv *priv, struct iwl_rxon_context *c iwl4965_init_sensitivity(priv); -set_tx_power: /* If we issue a new RXON command which required a tune then we must * send a new TXPOWER command or we won't be able to Tx any frames */ ret = iwl_legacy_set_tx_power(priv, priv->tx_power_next, true); -- cgit v1.1 From 84404623da45aac04595a8f5760a58df0e955d87 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 29 Jul 2011 11:52:18 +0300 Subject: cfg80211: off by one in nl80211_trigger_scan() The test is off by one so we'd read past the end of the wiphy->bands[] array on the next line. Signed-off-by: Dan Carpenter Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 28d2aa1..e83e7fe 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3464,7 +3464,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) tmp) { enum ieee80211_band band = nla_type(attr); - if (band < 0 || band > IEEE80211_NUM_BANDS) { + if (band < 0 || band >= IEEE80211_NUM_BANDS) { err = -EINVAL; goto out_free; } -- cgit v1.1 From d4930086bdd0c08a8b3a4d66a9c702297cb74a99 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 29 Jul 2011 15:59:08 +0200 Subject: ath9k: skip ->config_pci_powersave() if PCIe port has ASPM disabled We receive many bug reports about system hang during suspend/resume when ath9k driver is in use. Adrian Chadd remarked that this problem happens on systems that have ASPM disabled. To do not hit the bug, skip doing ->config_pci_powersave magic if PCIe downstream port device, which ath9k device is connected to, has ASPM disabled. Bug was introduced by: commit 53bc7aa08b48e5cd745f986731cc7dc24eef2a9f Author: Vivek Natarajan Date: Mon Apr 5 14:48:04 2010 +0530 ath9k: Add support for newer AR9285 chipsets. Patch should address: https://bugzilla.kernel.org/show_bug.cgi?id=37462 https://bugzilla.kernel.org/show_bug.cgi?id=37082 https://bugzilla.redhat.com/show_bug.cgi?id=697157 however I did not receive confirmation about that, except from Camilo Mesias, whose system stops hang regularly with this patch (but still hangs from time to time, but this is probably some other bug). Tested-by: Camilo Mesias Cc: stable@kernel.org # 2.6.35+ Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/ar9002_hw.c | 6 +----- drivers/net/wireless/ath/ath9k/ar9003_hw.c | 6 +----- drivers/net/wireless/ath/ath9k/hw.c | 11 +++++++++-- drivers/net/wireless/ath/ath9k/hw.h | 3 ++- drivers/net/wireless/ath/ath9k/pci.c | 27 +++++++++++++++++++++++++++ 5 files changed, 40 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9002_hw.c b/drivers/net/wireless/ath/ath9k/ar9002_hw.c index 9ff7c30..44d9d8d 100644 --- a/drivers/net/wireless/ath/ath9k/ar9002_hw.c +++ b/drivers/net/wireless/ath/ath9k/ar9002_hw.c @@ -309,11 +309,7 @@ static void ar9002_hw_configpcipowersave(struct ath_hw *ah, u8 i; u32 val; - if (ah->is_pciexpress != true) - return; - - /* Do not touch SerDes registers */ - if (ah->config.pcie_powersave_enable == 2) + if (ah->is_pciexpress != true || ah->aspm_enabled != true) return; /* Nothing to do on restore for 11N */ diff --git a/drivers/net/wireless/ath/ath9k/ar9003_hw.c b/drivers/net/wireless/ath/ath9k/ar9003_hw.c index 8efdec2..ad2bb2b 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_hw.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_hw.c @@ -519,11 +519,7 @@ static void ar9003_hw_configpcipowersave(struct ath_hw *ah, int restore, int power_off) { - if (ah->is_pciexpress != true) - return; - - /* Do not touch SerDes registers */ - if (ah->config.pcie_powersave_enable == 2) + if (ah->is_pciexpress != true || ah->aspm_enabled != true) return; /* Nothing to do on restore for 11N */ diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 8006ce0..8dcefe7 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -318,6 +318,14 @@ static void ath9k_hw_disablepcie(struct ath_hw *ah) REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000); } +static void ath9k_hw_aspm_init(struct ath_hw *ah) +{ + struct ath_common *common = ath9k_hw_common(ah); + + if (common->bus_ops->aspm_init) + common->bus_ops->aspm_init(common); +} + /* This should work for all families including legacy */ static bool ath9k_hw_chip_test(struct ath_hw *ah) { @@ -378,7 +386,6 @@ static void ath9k_hw_init_config(struct ath_hw *ah) ah->config.additional_swba_backoff = 0; ah->config.ack_6mb = 0x0; ah->config.cwm_ignore_extcca = 0; - ah->config.pcie_powersave_enable = 0; ah->config.pcie_clock_req = 0; ah->config.pcie_waen = 0; ah->config.analog_shiftreg = 1; @@ -598,7 +605,7 @@ static int __ath9k_hw_init(struct ath_hw *ah) if (ah->is_pciexpress) - ath9k_hw_configpcipowersave(ah, 0, 0); + ath9k_hw_aspm_init(ah); else ath9k_hw_disablepcie(ah); diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h index 6acd0f9..c798890 100644 --- a/drivers/net/wireless/ath/ath9k/hw.h +++ b/drivers/net/wireless/ath/ath9k/hw.h @@ -219,7 +219,6 @@ struct ath9k_ops_config { int additional_swba_backoff; int ack_6mb; u32 cwm_ignore_extcca; - u8 pcie_powersave_enable; bool pcieSerDesWrite; u8 pcie_clock_req; u32 pcie_waen; @@ -673,6 +672,7 @@ struct ath_hw { bool sw_mgmt_crypto; bool is_pciexpress; + bool aspm_enabled; bool is_monitoring; bool need_an_top2_fixup; u16 tx_trig_level; @@ -874,6 +874,7 @@ struct ath_bus_ops { bool (*eeprom_read)(struct ath_common *common, u32 off, u16 *data); void (*bt_coex_prep)(struct ath_common *common); void (*extn_synch_en)(struct ath_common *common); + void (*aspm_init)(struct ath_common *common); }; static inline struct ath_common *ath9k_hw_common(struct ath_hw *ah) diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c index 3bad0b2..be4ea13 100644 --- a/drivers/net/wireless/ath/ath9k/pci.c +++ b/drivers/net/wireless/ath/ath9k/pci.c @@ -16,6 +16,7 @@ #include #include +#include #include #include "ath9k.h" @@ -115,12 +116,38 @@ static void ath_pci_extn_synch_enable(struct ath_common *common) pci_write_config_byte(pdev, sc->sc_ah->caps.pcie_lcr_offset, lnkctl); } +static void ath_pci_aspm_init(struct ath_common *common) +{ + struct ath_softc *sc = (struct ath_softc *) common->priv; + struct ath_hw *ah = sc->sc_ah; + struct pci_dev *pdev = to_pci_dev(sc->dev); + struct pci_dev *parent; + int pos; + u8 aspm; + + if (!pci_is_pcie(pdev)) + return; + + parent = pdev->bus->self; + if (WARN_ON(!parent)) + return; + + pos = pci_pcie_cap(parent); + pci_read_config_byte(parent, pos + PCI_EXP_LNKCTL, &aspm); + if (aspm & (PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)) { + ah->aspm_enabled = true; + /* Initialize PCIe PM and SERDES registers. */ + ath9k_hw_configpcipowersave(ah, 0, 0); + } +} + static const struct ath_bus_ops ath_pci_bus_ops = { .ath_bus_type = ATH_PCI, .read_cachesize = ath_pci_read_cachesize, .eeprom_read = ath_pci_eeprom_read, .bt_coex_prep = ath_pci_bt_coex_prep, .extn_synch_en = ath_pci_extn_synch_enable, + .aspm_init = ath_pci_aspm_init, }; static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) -- cgit v1.1 From b6b67df3f24c45af0012ee3c8af2f62ca083ae18 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Fri, 29 Jul 2011 10:53:12 -0500 Subject: rtlwifi: Fix kernel oops on ARM SOC This driver uses information from the self member of the pci_bus struct to get information regarding the bridge to which the PCIe device is attached. Unfortunately, this member is not established on all architectures, which leads to a kernel oops. Skipping the entire block that uses the self member to determine the bridge vendor will only affect RTL8192DE devices as that driver sets the ASPM support flag differently when the bridge vendor is Intel. If the self member is available, there is no functional change. This patch fixes Bugzilla No. 40212. Reported-by: Hubert Liao Signed-off-by: Larry Finger Cc: Stable [back to 2.6.38] Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/pci.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index 5efd578..56f1235 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -1696,15 +1696,17 @@ static bool _rtl_pci_find_adapter(struct pci_dev *pdev, pcipriv->ndis_adapter.devnumber = PCI_SLOT(pdev->devfn); pcipriv->ndis_adapter.funcnumber = PCI_FUNC(pdev->devfn); - /*find bridge info */ - pcipriv->ndis_adapter.pcibridge_vendorid = bridge_pdev->vendor; - for (tmp = 0; tmp < PCI_BRIDGE_VENDOR_MAX; tmp++) { - if (bridge_pdev->vendor == pcibridge_vendors[tmp]) { - pcipriv->ndis_adapter.pcibridge_vendor = tmp; - RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG, - ("Pci Bridge Vendor is found index: %d\n", - tmp)); - break; + if (bridge_pdev) { + /*find bridge info if available */ + pcipriv->ndis_adapter.pcibridge_vendorid = bridge_pdev->vendor; + for (tmp = 0; tmp < PCI_BRIDGE_VENDOR_MAX; tmp++) { + if (bridge_pdev->vendor == pcibridge_vendors[tmp]) { + pcipriv->ndis_adapter.pcibridge_vendor = tmp; + RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG, + ("Pci Bridge Vendor is found index:" + " %d\n", tmp)); + break; + } } } -- cgit v1.1 From b52398b6e4522176dd125722c72c301015d24520 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Sat, 30 Jul 2011 13:32:56 +0200 Subject: rt2x00: rt2800: fix zeroing skb structure We should clear skb->data not skb itself. Bug was introduced by: commit 0b8004aa12d13ec750d102ba4082a95f0107c649 "rt2x00: Properly reserve room for descriptors in skbs". Cc: stable@kernel.org # 2.6.36+ Signed-off-by: Stanislaw Gruszka Acked-by: Gertjan van Wingerde Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2800lib.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index 75d2c6c..f94d669 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -703,8 +703,7 @@ void rt2800_write_beacon(struct queue_entry *entry, struct txentry_desc *txdesc) /* * Add space for the TXWI in front of the skb. */ - skb_push(entry->skb, TXWI_DESC_SIZE); - memset(entry->skb, 0, TXWI_DESC_SIZE); + memset(skb_push(entry->skb, TXWI_DESC_SIZE), 0, TXWI_DESC_SIZE); /* * Register descriptor details in skb frame descriptor. -- cgit v1.1 From 3bdb65ec95e6cccffc40102d7c003047c45da90c Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 30 Jun 2011 14:12:00 -0500 Subject: kdb: cleanup unused variables missed in the original kdb merge The BTARGS and BTSYMARG variables do not have any function in the mainline version of kdb. Reported-by: Tim Bird Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_bt.c | 5 ++--- kernel/debug/kdb/kdb_cmds | 4 ---- kernel/debug/kdb/kdb_main.c | 2 +- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 2f62fe8..7179eac 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -112,9 +112,8 @@ kdb_bt(int argc, const char **argv) unsigned long addr; long offset; - kdbgetintenv("BTARGS", &argcount); /* Arguments to print */ - kdbgetintenv("BTAPROMPT", &btaprompt); /* Prompt after each - * proc in bta */ + /* Prompt after each proc in bta */ + kdbgetintenv("BTAPROMPT", &btaprompt); if (strcmp(argv[0], "bta") == 0) { struct task_struct *g, *p; diff --git a/kernel/debug/kdb/kdb_cmds b/kernel/debug/kdb/kdb_cmds index 56c88e4..9834ad3 100644 --- a/kernel/debug/kdb/kdb_cmds +++ b/kernel/debug/kdb/kdb_cmds @@ -18,16 +18,12 @@ defcmd dumpcommon "" "Common kdb debugging" endefcmd defcmd dumpall "" "First line debugging" - set BTSYMARG 1 - set BTARGS 9 pid R -dumpcommon -bta endefcmd defcmd dumpcpu "" "Same as dumpall but only tasks on cpus" - set BTSYMARG 1 - set BTARGS 9 pid R -dumpcommon -btc diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index be14779..b33116e 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -145,7 +145,6 @@ static char *__env[] = { #endif "RADIX=16", "MDCOUNT=8", /* lines of md output */ - "BTARGS=9", /* 9 possible args in bt */ KDB_PLATFORM_ENV, "DTABCOUNT=30", "NOSECT=1", @@ -172,6 +171,7 @@ static char *__env[] = { (char *)0, (char *)0, (char *)0, + (char *)0, }; static const int __nenv = (sizeof(__env) / sizeof(char *)); -- cgit v1.1 From f679c4985bb2e7de9d39a5d40b6031361c4ad861 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Mon, 23 May 2011 13:17:41 -0500 Subject: kdb,kgdb: Implement switch and pass buffer from kdb -> gdb When switching from kdb mode to kgdb mode packets were getting lost depending on the size of the fifo queue of the serial chip. When gdb initially connects if it is in kdb mode it should entirely send any character buffer over to the gdbstub when switching connections. Previously kdb was zero'ing out the character buffer and this could lead to gdb failing to connect at all, or a lengthy pause could occur on the initial connect. Signed-off-by: Jason Wessel --- kernel/debug/gdbstub.c | 22 +++++++++++++++------- kernel/debug/kdb/kdb_debugger.c | 17 +++++++---------- kernel/debug/kdb/kdb_io.c | 10 ++++++---- kernel/debug/kdb/kdb_private.h | 1 + 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index a11db95..3487248 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -42,6 +42,8 @@ /* Our I/O buffers. */ static char remcom_in_buffer[BUFMAX]; static char remcom_out_buffer[BUFMAX]; +static int gdbstub_use_prev_in_buf; +static int gdbstub_prev_in_buf_pos; /* Storage for the registers, in GDB format. */ static unsigned long gdb_regs[(NUMREGBYTES + @@ -58,6 +60,13 @@ static int gdbstub_read_wait(void) int ret = -1; int i; + if (unlikely(gdbstub_use_prev_in_buf)) { + if (gdbstub_prev_in_buf_pos < gdbstub_use_prev_in_buf) + return remcom_in_buffer[gdbstub_prev_in_buf_pos++]; + else + gdbstub_use_prev_in_buf = 0; + } + /* poll any additional I/O interfaces that are defined */ while (ret < 0) for (i = 0; kdb_poll_funcs[i] != NULL; i++) { @@ -109,7 +118,6 @@ static void get_packet(char *buffer) buffer[count] = ch; count = count + 1; } - buffer[count] = 0; if (ch == '#') { xmitcsum = hex_to_bin(gdbstub_read_wait()) << 4; @@ -124,6 +132,7 @@ static void get_packet(char *buffer) if (dbg_io_ops->flush) dbg_io_ops->flush(); } + buffer[count] = 0; } while (checksum != xmitcsum); } @@ -1082,12 +1091,11 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd) case 'c': strcpy(remcom_in_buffer, cmd); return 0; - case '?': - gdb_cmd_status(ks); - break; - case '\0': - strcpy(remcom_out_buffer, ""); - break; + case '$': + strcpy(remcom_in_buffer, cmd); + gdbstub_use_prev_in_buf = strlen(remcom_in_buffer); + gdbstub_prev_in_buf_pos = 0; + return 0; } dbg_io_ops->write_char('+'); put_packet(remcom_out_buffer); diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index dd0b1b7..fe422d27 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c @@ -30,6 +30,8 @@ EXPORT_SYMBOL_GPL(kdb_poll_funcs); int kdb_poll_idx = 1; EXPORT_SYMBOL_GPL(kdb_poll_idx); +static struct kgdb_state *kdb_ks; + int kdb_stub(struct kgdb_state *ks) { int error = 0; @@ -39,6 +41,7 @@ int kdb_stub(struct kgdb_state *ks) kdb_dbtrap_t db_result = KDB_DB_NOBPT; int i; + kdb_ks = ks; if (KDB_STATE(REENTRY)) { reason = KDB_REASON_SWITCH; KDB_STATE_CLEAR(REENTRY); @@ -124,16 +127,6 @@ int kdb_stub(struct kgdb_state *ks) kdbnearsym_cleanup(); if (error == KDB_CMD_KGDB) { if (KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)) { - /* - * This inteface glue which allows kdb to transition in into - * the gdb stub. In order to do this the '?' or '' gdb serial - * packet response is processed here. And then control is - * passed to the gdbstub. - */ - if (KDB_STATE(DOING_KGDB)) - gdbstub_state(ks, "?"); - else - gdbstub_state(ks, ""); KDB_STATE_CLEAR(DOING_KGDB); KDB_STATE_CLEAR(DOING_KGDB2); } @@ -166,3 +159,7 @@ int kdb_stub(struct kgdb_state *ks) return kgdb_info[ks->cpu].ret_state; } +void kdb_gdb_state_pass(char *buf) +{ + gdbstub_state(kdb_ks, buf); +} diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 96fdaac..bd23326 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -35,8 +35,8 @@ static void kgdb_transition_check(char *buffer) { int slen = strlen(buffer); if (strncmp(buffer, "$?#3f", slen) != 0 && - strncmp(buffer, "$qSupported#37", slen) != 0 && - strncmp(buffer, "+$qSupported#37", slen) != 0) { + strncmp(buffer, "$qSupported", slen) != 0 && + strncmp(buffer, "+$qSupported", slen) != 0) { KDB_STATE_SET(KGDB_TRANS); kdb_printf("%s", buffer); } @@ -390,12 +390,14 @@ poll_again: /* Special escape to kgdb */ if (lastchar - buffer >= 5 && strcmp(lastchar - 5, "$?#3f") == 0) { + kdb_gdb_state_pass(lastchar - 5); strcpy(buffer, "kgdb"); KDB_STATE_SET(DOING_KGDB); return buffer; } - if (lastchar - buffer >= 14 && - strcmp(lastchar - 14, "$qSupported#37") == 0) { + if (lastchar - buffer >= 11 && + strcmp(lastchar - 11, "$qSupported") == 0) { + kdb_gdb_state_pass(lastchar - 11); strcpy(buffer, "kgdb"); KDB_STATE_SET(DOING_KGDB2); return buffer; diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 35d69ed..03d332e 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -218,6 +218,7 @@ extern void kdb_print_nameval(const char *name, unsigned long val); extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); extern void kdb_meminfo_proc_show(void); extern char *kdb_getstr(char *, size_t, char *); +extern void kdb_gdb_state_pass(char *buf); /* Defines for kdb_symbol_print */ #define KDB_SP_SPACEB 0x0001 /* Space before string */ -- cgit v1.1 From d613d828e8987a1f794378022f900b454fa95403 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Mon, 23 May 2011 13:22:54 -0500 Subject: kdb: Remove all references to DOING_KGDB2 The DOING_KGDB2 was originally a state variable for one of the two ways to automatically transition from kdb to kgdb. Purge all these variables and just use one single state for the transition. Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_debugger.c | 4 +--- kernel/debug/kdb/kdb_io.c | 2 +- kernel/debug/kdb/kdb_main.c | 2 +- kernel/debug/kdb/kdb_private.h | 2 -- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index fe422d27..d9ca9aa 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c @@ -126,10 +126,8 @@ int kdb_stub(struct kgdb_state *ks) KDB_STATE_CLEAR(PAGER); kdbnearsym_cleanup(); if (error == KDB_CMD_KGDB) { - if (KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)) { + if (KDB_STATE(DOING_KGDB)) KDB_STATE_CLEAR(DOING_KGDB); - KDB_STATE_CLEAR(DOING_KGDB2); - } return DBG_PASS_EVENT; } kdb_bp_install(ks->linux_regs); diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index bd23326..0dbcdfb 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -399,7 +399,7 @@ poll_again: strcmp(lastchar - 11, "$qSupported") == 0) { kdb_gdb_state_pass(lastchar - 11); strcpy(buffer, "kgdb"); - KDB_STATE_SET(DOING_KGDB2); + KDB_STATE_SET(DOING_KGDB); return buffer; } } diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index b33116e..63786e7 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1386,7 +1386,7 @@ int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error, } if (result == KDB_CMD_KGDB) { - if (!(KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2))) + if (!KDB_STATE(DOING_KGDB)) kdb_printf("Entering please attach debugger " "or use $D#44+ or $3#33\n"); break; diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 03d332e..e381d10 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -21,7 +21,6 @@ #define KDB_CMD_SS (-1003) #define KDB_CMD_SSB (-1004) #define KDB_CMD_KGDB (-1005) -#define KDB_CMD_KGDB2 (-1006) /* Internal debug flags */ #define KDB_DEBUG_FLAG_BP 0x0002 /* Breakpoint subsystem debug */ @@ -146,7 +145,6 @@ extern int kdb_state; * keyboard on this cpu */ #define KDB_STATE_KEXEC 0x00040000 /* kexec issued */ #define KDB_STATE_DOING_KGDB 0x00080000 /* kgdb enter now issued */ -#define KDB_STATE_DOING_KGDB2 0x00100000 /* kgdb enter now issued */ #define KDB_STATE_KGDB_TRANS 0x00200000 /* Transition to kgdb */ #define KDB_STATE_ARCH 0xff000000 /* Reserved for arch * specific use */ -- cgit v1.1 From 37f86b469d73fc2f2a925536fb99b8f513f641b7 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Tue, 24 May 2011 10:43:06 -0500 Subject: kdb,kgdb: Allow arbitrary kgdb magic knock sequences The first packet that gdb sends when the kernel is in kdb mode seems to change with every release of gdb. Instead of continuing to add many different gdb packets, change kdb to automatically look for any thing that looks like a gdb packet. Example 1 cold start test: echo g > /proc/sysrq-trigger $D#44+ Example 2 cold start test: echo g > /proc/sysrq-trigger $3#33 The second one should re-enter kdb's shell right away and is purely a test. Signed-off-by: Jason Wessel --- kernel/debug/kdb/kdb_io.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 0dbcdfb..4802eb5 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -31,15 +31,21 @@ char kdb_prompt_str[CMD_BUFLEN]; int kdb_trap_printk; -static void kgdb_transition_check(char *buffer) +static int kgdb_transition_check(char *buffer) { - int slen = strlen(buffer); - if (strncmp(buffer, "$?#3f", slen) != 0 && - strncmp(buffer, "$qSupported", slen) != 0 && - strncmp(buffer, "+$qSupported", slen) != 0) { + if (buffer[0] != '+' && buffer[0] != '$') { KDB_STATE_SET(KGDB_TRANS); kdb_printf("%s", buffer); + } else { + int slen = strlen(buffer); + if (slen > 3 && buffer[slen - 3] == '#') { + kdb_gdb_state_pass(buffer); + strcpy(buffer, "kgdb"); + KDB_STATE_SET(DOING_KGDB); + return 1; + } } + return 0; } static int kdb_read_get_key(char *buffer, size_t bufsize) @@ -251,6 +257,10 @@ poll_again: case 13: /* enter */ *lastchar++ = '\n'; *lastchar++ = '\0'; + if (!KDB_STATE(KGDB_TRANS)) { + KDB_STATE_SET(KGDB_TRANS); + kdb_printf("%s", buffer); + } kdb_printf("\n"); return buffer; case 4: /* Del */ @@ -382,10 +392,12 @@ poll_again: * printed characters if we think that * kgdb is connecting, until the check * fails */ - if (!KDB_STATE(KGDB_TRANS)) - kgdb_transition_check(buffer); - else + if (!KDB_STATE(KGDB_TRANS)) { + if (kgdb_transition_check(buffer)) + return buffer; + } else { kdb_printf("%c", key); + } } /* Special escape to kgdb */ if (lastchar - buffer >= 5 && -- cgit v1.1 From a0f98dde11a1afe9fbf5c98f57968e086e98b6f5 Mon Sep 17 00:00:00 2001 From: Wanlong Gao Date: Mon, 18 Jul 2011 12:19:35 +0000 Subject: Btrfs:don't check the return value of __btrfs_add_inode_defrag Don't need to check the return value of __btrfs_add_inode_defrag(), since it will always return 0. Signed-off-by: Wanlong Gao Signed-off-by: Chris Mason --- fs/btrfs/file.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 41ca5fd..7f134a7 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -74,7 +74,7 @@ struct inode_defrag { * If an existing record is found the defrag item you * pass in is freed */ -static int __btrfs_add_inode_defrag(struct inode *inode, +static void __btrfs_add_inode_defrag(struct inode *inode, struct inode_defrag *defrag) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -106,11 +106,11 @@ static int __btrfs_add_inode_defrag(struct inode *inode, BTRFS_I(inode)->in_defrag = 1; rb_link_node(&defrag->rb_node, parent, p); rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); - return 0; + return; exists: kfree(defrag); - return 0; + return; } @@ -123,7 +123,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, { struct btrfs_root *root = BTRFS_I(inode)->root; struct inode_defrag *defrag; - int ret = 0; u64 transid; if (!btrfs_test_opt(root, AUTO_DEFRAG)) @@ -150,9 +149,9 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, spin_lock(&root->fs_info->defrag_inodes_lock); if (!BTRFS_I(inode)->in_defrag) - ret = __btrfs_add_inode_defrag(inode, defrag); + __btrfs_add_inode_defrag(inode, defrag); spin_unlock(&root->fs_info->defrag_inodes_lock); - return ret; + return 0; } /* -- cgit v1.1 From b532402e4d147e4f409c4e7f50d4413e8450101d Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Tue, 19 Jul 2011 07:27:20 +0000 Subject: Btrfs: return error to caller when btrfs_unlink() failes When btrfs_unlink_inode() and btrfs_orphan_add() in btrfs_unlink() are error, the error code is returned to the caller instead of BUG_ON(). Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 ++- fs/btrfs/inode.c | 10 +++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 55bddffe..db2057f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7201,7 +7201,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, inode = lookup_free_space_inode(root, block_group, path); if (!IS_ERR(inode)) { - btrfs_orphan_add(trans, inode); + ret = btrfs_orphan_add(trans, inode); + BUG_ON(ret); clear_nlink(inode); /* One for the block groups ref */ spin_lock(&block_group->lock); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4360ccb..5da43ae 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2217,7 +2217,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) if (!root->orphan_block_rsv) { block_rsv = btrfs_alloc_block_rsv(root); - BUG_ON(!block_rsv); + if (!block_rsv) + return -ENOMEM; } spin_lock(&root->orphan_lock); @@ -3002,13 +3003,16 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, dentry->d_name.name, dentry->d_name.len); - BUG_ON(ret); + if (ret) + goto out; if (inode->i_nlink == 0) { ret = btrfs_orphan_add(trans, inode); - BUG_ON(ret); + if (ret) + goto out; } +out: nr = trans->blocks_used; __unlink_end_trans(trans, root); btrfs_btree_balance_dirty(root, nr); -- cgit v1.1 From b6973aa62253f3791ef6fa5e9f9de099645fc2bd Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 20 Jul 2011 03:46:35 +0000 Subject: Btrfs: fix readahead in file defrag We passed the wrong value to btrfs_force_ra(). Fix this by changing the argument of btrfs_force_ra() from last_index to nr_page. Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 11 ++++++++--- fs/btrfs/inode.c | 13 ------------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3be57c6..7ac4d25 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2520,6 +2520,14 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, #define PageChecked PageFsMisc #endif +/* This forces readahead on a given range of bytes in an inode */ +static inline void btrfs_force_ra(struct address_space *mapping, + struct file_ra_state *ra, struct file *file, + pgoff_t offset, unsigned long req_size) +{ + page_cache_sync_readahead(mapping, ra, file, offset, req_size); +} + struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); int btrfs_set_inode_index(struct inode *dir, u64 *index); int btrfs_unlink_inode(struct btrfs_trans_handle *trans, @@ -2548,9 +2556,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); -unsigned long btrfs_force_ra(struct address_space *mapping, - struct file_ra_state *ra, struct file *file, - pgoff_t offset, pgoff_t last_index); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); void btrfs_evict_inode(struct inode *inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5da43ae..69e448e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6697,19 +6697,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, return 0; } -/* helper function for file defrag and space balancing. This - * forces readahead on a given range of bytes in an inode - */ -unsigned long btrfs_force_ra(struct address_space *mapping, - struct file_ra_state *ra, struct file *file, - pgoff_t offset, pgoff_t last_index) -{ - pgoff_t req_size = last_index - offset + 1; - - page_cache_sync_readahead(mapping, ra, file, offset, req_size); - return offset + req_size; -} - struct inode *btrfs_alloc_inode(struct super_block *sb) { struct btrfs_inode *ei; -- cgit v1.1 From 1bf85046e493c88be1c1bad9084428373089f618 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 21 Jul 2011 16:56:09 +0000 Subject: btrfs: Make extent-io callbacks that never fail return void The set/clear bit and the extent split/merge hooks only ever return 0. Changing them to return void simplifies the error handling cases later. This patch changes the hook prototypes, the single implementation of each, and the functions that call them to return void instead. Since all four of these hooks execute under a spinlock, they're necessarily simple. Signed-off-by: Jeff Mahoney Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 52 ++++++++++++++-------------------------------------- fs/btrfs/extent_io.h | 18 +++++++++--------- fs/btrfs/inode.c | 26 +++++++++++--------------- 3 files changed, 34 insertions(+), 62 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5bbdb24..789d0b2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -254,14 +254,14 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, * * This should be called with the tree lock held. */ -static int merge_state(struct extent_io_tree *tree, - struct extent_state *state) +static void merge_state(struct extent_io_tree *tree, + struct extent_state *state) { struct extent_state *other; struct rb_node *other_node; if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) - return 0; + return; other_node = rb_prev(&state->rb_node); if (other_node) { @@ -287,19 +287,13 @@ static int merge_state(struct extent_io_tree *tree, free_extent_state(other); } } - - return 0; } -static int set_state_cb(struct extent_io_tree *tree, +static void set_state_cb(struct extent_io_tree *tree, struct extent_state *state, int *bits) { - if (tree->ops && tree->ops->set_bit_hook) { - return tree->ops->set_bit_hook(tree->mapping->host, - state, bits); - } - - return 0; + if (tree->ops && tree->ops->set_bit_hook) + tree->ops->set_bit_hook(tree->mapping->host, state, bits); } static void clear_state_cb(struct extent_io_tree *tree, @@ -325,7 +319,6 @@ static int insert_state(struct extent_io_tree *tree, { struct rb_node *node; int bits_to_set = *bits & ~EXTENT_CTLBITS; - int ret; if (end < start) { printk(KERN_ERR "btrfs end < start %llu %llu\n", @@ -335,9 +328,7 @@ static int insert_state(struct extent_io_tree *tree, } state->start = start; state->end = end; - ret = set_state_cb(tree, state, bits); - if (ret) - return ret; + set_state_cb(tree, state, bits); if (bits_to_set & EXTENT_DIRTY) tree->dirty_bytes += end - start + 1; @@ -357,13 +348,11 @@ static int insert_state(struct extent_io_tree *tree, return 0; } -static int split_cb(struct extent_io_tree *tree, struct extent_state *orig, +static void split_cb(struct extent_io_tree *tree, struct extent_state *orig, u64 split) { if (tree->ops && tree->ops->split_extent_hook) - return tree->ops->split_extent_hook(tree->mapping->host, - orig, split); - return 0; + tree->ops->split_extent_hook(tree->mapping->host, orig, split); } /* @@ -670,23 +659,18 @@ out: return 0; } -static int set_state_bits(struct extent_io_tree *tree, +static void set_state_bits(struct extent_io_tree *tree, struct extent_state *state, int *bits) { - int ret; int bits_to_set = *bits & ~EXTENT_CTLBITS; - ret = set_state_cb(tree, state, bits); - if (ret) - return ret; + set_state_cb(tree, state, bits); if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { u64 range = state->end - state->start + 1; tree->dirty_bytes += range; } state->state |= bits_to_set; - - return 0; } static void cache_state(struct extent_state *state, @@ -779,9 +763,7 @@ hit_next: goto out; } - err = set_state_bits(tree, state, &bits); - if (err) - goto out; + set_state_bits(tree, state, &bits); cache_state(state, cached_state); merge_state(tree, state); @@ -830,9 +812,7 @@ hit_next: if (err) goto out; if (state->end <= end) { - err = set_state_bits(tree, state, &bits); - if (err) - goto out; + set_state_bits(tree, state, &bits); cache_state(state, cached_state); merge_state(tree, state); if (last_end == (u64)-1) @@ -893,11 +873,7 @@ hit_next: err = split_state(tree, state, prealloc, end + 1); BUG_ON(err == -EEXIST); - err = set_state_bits(tree, prealloc, &bits); - if (err) { - prealloc = NULL; - goto out; - } + set_state_bits(tree, prealloc, &bits); cache_state(prealloc, cached_state); merge_state(tree, prealloc); prealloc = NULL; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 21a7ca9..d6871dc 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -76,15 +76,15 @@ struct extent_io_ops { struct extent_state *state); int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate); - int (*set_bit_hook)(struct inode *inode, struct extent_state *state, - int *bits); - int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, - int *bits); - int (*merge_extent_hook)(struct inode *inode, - struct extent_state *new, - struct extent_state *other); - int (*split_extent_hook)(struct inode *inode, - struct extent_state *orig, u64 split); + void (*set_bit_hook)(struct inode *inode, struct extent_state *state, + int *bits); + void (*clear_bit_hook)(struct inode *inode, struct extent_state *state, + int *bits); + void (*merge_extent_hook)(struct inode *inode, + struct extent_state *new, + struct extent_state *other); + void (*split_extent_hook)(struct inode *inode, + struct extent_state *orig, u64 split); int (*write_cache_pages_lock_hook)(struct page *page); }; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 69e448e..34195f9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1283,17 +1283,16 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, return ret; } -static int btrfs_split_extent_hook(struct inode *inode, - struct extent_state *orig, u64 split) +static void btrfs_split_extent_hook(struct inode *inode, + struct extent_state *orig, u64 split) { /* not delalloc, ignore it */ if (!(orig->state & EXTENT_DELALLOC)) - return 0; + return; spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->lock); - return 0; } /* @@ -1302,18 +1301,17 @@ static int btrfs_split_extent_hook(struct inode *inode, * extents, such as when we are doing sequential writes, so we can properly * account for the metadata space we'll need. */ -static int btrfs_merge_extent_hook(struct inode *inode, - struct extent_state *new, - struct extent_state *other) +static void btrfs_merge_extent_hook(struct inode *inode, + struct extent_state *new, + struct extent_state *other) { /* not delalloc, ignore it */ if (!(other->state & EXTENT_DELALLOC)) - return 0; + return; spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents--; spin_unlock(&BTRFS_I(inode)->lock); - return 0; } /* @@ -1321,8 +1319,8 @@ static int btrfs_merge_extent_hook(struct inode *inode, * bytes in this file, and to maintain the list of inodes that * have pending delalloc work to be done. */ -static int btrfs_set_bit_hook(struct inode *inode, - struct extent_state *state, int *bits) +static void btrfs_set_bit_hook(struct inode *inode, + struct extent_state *state, int *bits) { /* @@ -1352,14 +1350,13 @@ static int btrfs_set_bit_hook(struct inode *inode, } spin_unlock(&root->fs_info->delalloc_lock); } - return 0; } /* * extent_io.c clear_bit_hook, see set_bit_hook for why */ -static int btrfs_clear_bit_hook(struct inode *inode, - struct extent_state *state, int *bits) +static void btrfs_clear_bit_hook(struct inode *inode, + struct extent_state *state, int *bits) { /* * set_bit and clear bit hooks normally require _irqsave/restore @@ -1396,7 +1393,6 @@ static int btrfs_clear_bit_hook(struct inode *inode, } spin_unlock(&root->fs_info->delalloc_lock); } - return 0; } /* -- cgit v1.1 From 61cfea9bb84d41b09e59822c33b3eb27acc48358 Mon Sep 17 00:00:00 2001 From: WuBo Date: Tue, 26 Jul 2011 03:30:11 +0000 Subject: Btrfs: Protect the readonly flag of block group The access for ro in btrfs_block_group_cache should be protected because of the racy lock in relocation. Signed-off-by: Wu Bo Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index db2057f..a4db88d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6549,8 +6549,6 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) u64 min_allocable_bytes; int ret = -ENOSPC; - if (cache->ro) - return 0; /* * We need some metadata space and system metadata space for @@ -6566,6 +6564,12 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) spin_lock(&sinfo->lock); spin_lock(&cache->lock); + + if (cache->ro) { + ret = 0; + goto out; + } + num_bytes = cache->key.offset - cache->reserved - cache->pinned - cache->bytes_super - btrfs_block_group_used(&cache->item); @@ -6579,7 +6583,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) cache->ro = 1; ret = 0; } - +out: spin_unlock(&cache->lock); spin_unlock(&sinfo->lock); return ret; -- cgit v1.1 From ff1f2b4407454d926d8b143bf37e4dce66026a28 Mon Sep 17 00:00:00 2001 From: liubo Date: Wed, 27 Jul 2011 09:49:18 +0000 Subject: Btrfs: fix oops while writing data to SSD partitions Here I have a two SSD-partitions btrfs, and they are defaultly set to "data=raid0, metadata=raid1", then I try to fill my btrfs partition till "No space left on device", via "dd if=/dev/zero of=/mnt/btrfs/tmp". I get an oops panic from kernel BUG at fs/btrfs/extent-tree.c:5199!, which refers to find_free_extent's BUG_ON(index != get_block_group_index(block_group)); In SSD mode, in order to find enough space to alloc, we may check the block_group cache which has been checked sometime before, but the index is not updated, where it hits the BUG_ON. Signed-off-by: Liu Bo Acked-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a4db88d..b172b7f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5079,7 +5079,9 @@ have_block_group: * group is does point to and try again */ if (!last_ptr_loop && last_ptr->block_group && - last_ptr->block_group != block_group) { + last_ptr->block_group != block_group && + index <= + get_block_group_index(last_ptr->block_group)) { btrfs_put_block_group(block_group); block_group = last_ptr->block_group; -- cgit v1.1 From bf5f32ecb6caac52b4d1c083251b3dd4f40a0b7a Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Thu, 14 Jul 2011 21:23:06 +0000 Subject: btrfs: make btrfs_set_root_node void This is fairly trivial - btrfs_set_root_node() - always returns zero so we can just make it void. All callers ignore the return code now anyway. I also made sure to check that none of the functions that btrfs_set_root_node() calls returns an error that we might have needed to catch and pass back. Signed-off-by: Mark Fasheh Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/root-tree.c | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7ac4d25..34ce414 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2406,8 +2406,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); int btrfs_find_orphan_roots(struct btrfs_root *tree_root); -int btrfs_set_root_node(struct btrfs_root_item *item, - struct extent_buffer *node); +void btrfs_set_root_node(struct btrfs_root_item *item, + struct extent_buffer *node); void btrfs_check_and_init_root_item(struct btrfs_root_item *item); /* dir-item.c */ diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index ebe4544..f409990 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -71,13 +71,12 @@ out: return ret; } -int btrfs_set_root_node(struct btrfs_root_item *item, - struct extent_buffer *node) +void btrfs_set_root_node(struct btrfs_root_item *item, + struct extent_buffer *node) { btrfs_set_root_bytenr(item, node->start); btrfs_set_root_level(item, btrfs_header_level(node)); btrfs_set_root_generation(item, btrfs_header_generation(node)); - return 0; } /* -- cgit v1.1 From b783e62d9620445d1ca3de76badf0468cdd329ba Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Jul 2011 15:03:50 +0000 Subject: Btrfs: don't print the leaf if we had an error In __btrfs_free_extent we will print the leaf if we fail to find the extent we wanted, but the problem is if we get an error we won't have a leaf so often this leads to a NULL pointer dereference and we lose the error that actually occurred. So only print the leaf if ret > 0, which means we didn't find the item we were looking for but we didn't error either. This way the error is preserved. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b172b7f..66bac22 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4462,7 +4462,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, printk(KERN_ERR "umm, got %d back from search" ", was looking for %llu\n", ret, (unsigned long long)bytenr); - btrfs_print_leaf(extent_root, path->nodes[0]); + if (ret > 0) + btrfs_print_leaf(extent_root, + path->nodes[0]); } BUG_ON(ret); extent_slot = path->slots[0]; -- cgit v1.1 From 77906a5075a4eb767026c2e07b1a412d08aea5be Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jul 2011 03:16:00 +0000 Subject: Btrfs: copy string correctly in INO_LOOKUP ioctl Memory areas [ptr, ptr+total_len] and [name, name+total_len] may overlap, so it's wrong to use memcpy(). Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fd252ff..2bb0886 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1757,11 +1757,10 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, key.objectid = key.offset; key.offset = (u64)-1; dirid = key.objectid; - } if (ptr < name) goto out; - memcpy(name, ptr, total_len); + memmove(name, ptr, total_len); name[total_len]='\0'; ret = 0; out: -- cgit v1.1 From e55179b3d7d41d83fd6b5f59325f4a8d8ac9700a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jul 2011 03:16:47 +0000 Subject: Btrfs: check the nodatasum flag when writing compressed files If mounting with nodatasum option, we won't csum file data for general write or direct-io write, and this rule should also be applied when writing compressed files. Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/compression.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index bfe42b0..8ec5d86 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -338,6 +338,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, u64 first_byte = disk_start; struct block_device *bdev; int ret; + int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); @@ -392,8 +393,11 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); BUG_ON(ret); - ret = btrfs_csum_one_bio(root, inode, bio, start, 1); - BUG_ON(ret); + if (!skip_sum) { + ret = btrfs_csum_one_bio(root, inode, bio, + start, 1); + BUG_ON(ret); + } ret = btrfs_map_bio(root, WRITE, bio, 0, 1); BUG_ON(ret); @@ -418,8 +422,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); BUG_ON(ret); - ret = btrfs_csum_one_bio(root, inode, bio, start, 1); - BUG_ON(ret); + if (!skip_sum) { + ret = btrfs_csum_one_bio(root, inode, bio, start, 1); + BUG_ON(ret); + } ret = btrfs_map_bio(root, WRITE, bio, 0, 1); BUG_ON(ret); -- cgit v1.1 From 72d63ed6427cf233e2b352c0b80c3e5c5a444986 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jul 2011 03:17:00 +0000 Subject: Btrfs: use wait_event() Use wait_event() when possible to avoid code duplication. Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 59 ++++++-------------------------------------------- 1 file changed, 7 insertions(+), 52 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index eb55863..ff5549f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -216,17 +216,11 @@ static void wait_current_trans(struct btrfs_root *root) spin_lock(&root->fs_info->trans_lock); cur_trans = root->fs_info->running_transaction; if (cur_trans && cur_trans->blocked) { - DEFINE_WAIT(wait); atomic_inc(&cur_trans->use_count); spin_unlock(&root->fs_info->trans_lock); - while (1) { - prepare_to_wait(&root->fs_info->transaction_wait, &wait, - TASK_UNINTERRUPTIBLE); - if (!cur_trans->blocked) - break; - schedule(); - } - finish_wait(&root->fs_info->transaction_wait, &wait); + + wait_event(root->fs_info->transaction_wait, + !cur_trans->blocked); put_transaction(cur_trans); } else { spin_unlock(&root->fs_info->trans_lock); @@ -360,15 +354,7 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root static noinline int wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { - DEFINE_WAIT(wait); - while (!commit->commit_done) { - prepare_to_wait(&commit->commit_wait, &wait, - TASK_UNINTERRUPTIBLE); - if (commit->commit_done) - break; - schedule(); - } - finish_wait(&commit->commit_wait, &wait); + wait_event(commit->commit_wait, commit->commit_done); return 0; } @@ -1085,22 +1071,7 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info) static void wait_current_trans_commit_start(struct btrfs_root *root, struct btrfs_transaction *trans) { - DEFINE_WAIT(wait); - - if (trans->in_commit) - return; - - while (1) { - prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait, - TASK_UNINTERRUPTIBLE); - if (trans->in_commit) { - finish_wait(&root->fs_info->transaction_blocked_wait, - &wait); - break; - } - schedule(); - finish_wait(&root->fs_info->transaction_blocked_wait, &wait); - } + wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit); } /* @@ -1110,24 +1081,8 @@ static void wait_current_trans_commit_start(struct btrfs_root *root, static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, struct btrfs_transaction *trans) { - DEFINE_WAIT(wait); - - if (trans->commit_done || (trans->in_commit && !trans->blocked)) - return; - - while (1) { - prepare_to_wait(&root->fs_info->transaction_wait, &wait, - TASK_UNINTERRUPTIBLE); - if (trans->commit_done || - (trans->in_commit && !trans->blocked)) { - finish_wait(&root->fs_info->transaction_wait, - &wait); - break; - } - schedule(); - finish_wait(&root->fs_info->transaction_wait, - &wait); - } + wait_event(root->fs_info->transaction_wait, + trans->commit_done || (trans->in_commit && !trans->blocked)); } /* -- cgit v1.1 From b9c8300c2ac354d850159f301d5b3ead13854cdd Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jul 2011 03:17:14 +0000 Subject: Btrfs: remove a BUG_ON() in btrfs_commit_transaction() wait_for_commit() always returns 0. Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index ff5549f..7dc36fa 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -351,11 +351,10 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root } /* wait for a transaction commit to be fully complete */ -static noinline int wait_for_commit(struct btrfs_root *root, +static noinline void wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { wait_event(commit->commit_wait, commit->commit_done); - return 0; } int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) @@ -1189,8 +1188,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, atomic_inc(&cur_trans->use_count); btrfs_end_transaction(trans, root); - ret = wait_for_commit(root, cur_trans); - BUG_ON(ret); + wait_for_commit(root, cur_trans); put_transaction(cur_trans); -- cgit v1.1 From 15de900d08bb132833c8622610ddb1be660a6018 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jul 2011 03:17:27 +0000 Subject: Btrfs: remove remaining ref-cache code Since commit f2a97a9dbd86eb1ef956bdf20e05c507b32beb96 ("btrfs: remove all unused functions"), there's no extern functions at all in ref-cache.c, so just remove the remaining dead code. Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/ref-cache.c | 68 ---------------------------------------------------- fs/btrfs/ref-cache.h | 52 ---------------------------------------- 2 files changed, 120 deletions(-) delete mode 100644 fs/btrfs/ref-cache.c delete mode 100644 fs/btrfs/ref-cache.h diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c deleted file mode 100644 index 82d569c..0000000 --- a/fs/btrfs/ref-cache.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#include -#include -#include -#include "ctree.h" -#include "ref-cache.h" -#include "transaction.h" - -static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, - struct rb_node *node) -{ - struct rb_node **p = &root->rb_node; - struct rb_node *parent = NULL; - struct btrfs_leaf_ref *entry; - - while (*p) { - parent = *p; - entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node); - - if (bytenr < entry->bytenr) - p = &(*p)->rb_left; - else if (bytenr > entry->bytenr) - p = &(*p)->rb_right; - else - return parent; - } - - entry = rb_entry(node, struct btrfs_leaf_ref, rb_node); - rb_link_node(node, parent, p); - rb_insert_color(node, root); - return NULL; -} - -static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) -{ - struct rb_node *n = root->rb_node; - struct btrfs_leaf_ref *entry; - - while (n) { - entry = rb_entry(n, struct btrfs_leaf_ref, rb_node); - WARN_ON(!entry->in_tree); - - if (bytenr < entry->bytenr) - n = n->rb_left; - else if (bytenr > entry->bytenr) - n = n->rb_right; - else - return n; - } - return NULL; -} diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h deleted file mode 100644 index 24f7001..0000000 --- a/fs/btrfs/ref-cache.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ -#ifndef __REFCACHE__ -#define __REFCACHE__ - -struct btrfs_extent_info { - /* bytenr and num_bytes find the extent in the extent allocation tree */ - u64 bytenr; - u64 num_bytes; - - /* objectid and offset find the back reference for the file */ - u64 objectid; - u64 offset; -}; - -struct btrfs_leaf_ref { - struct rb_node rb_node; - struct btrfs_leaf_ref_tree *tree; - int in_tree; - atomic_t usage; - - u64 root_gen; - u64 bytenr; - u64 owner; - u64 generation; - int nritems; - - struct list_head list; - struct btrfs_extent_info extents[]; -}; - -static inline size_t btrfs_leaf_ref_size(int nr_extents) -{ - return sizeof(struct btrfs_leaf_ref) + - sizeof(struct btrfs_extent_info) * nr_extents; -} -#endif -- cgit v1.1 From 9b89d95a143bb0a9abc4ba0fdcdda78211930f1a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jul 2011 03:17:39 +0000 Subject: Btrfs: make acl functions really no-op if acl is not enabled So there's no overhead for something we don't use. Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 4 +++- fs/btrfs/acl.c | 17 ----------------- fs/btrfs/ctree.h | 15 ++++++++++++--- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9b72dcf..40e6ac0 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,5 +6,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ + export.o tree-log.o free-space-cache.o zlib.o lzo.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o + +btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index f66fc99..b206d4c 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -28,8 +28,6 @@ #include "btrfs_inode.h" #include "xattr.h" -#ifdef CONFIG_BTRFS_FS_POSIX_ACL - static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) { int size; @@ -318,18 +316,3 @@ const struct xattr_handler btrfs_xattr_acl_access_handler = { .get = btrfs_xattr_acl_get, .set = btrfs_xattr_acl_set, }; - -#else /* CONFIG_BTRFS_FS_POSIX_ACL */ - -int btrfs_acl_chmod(struct inode *inode) -{ - return 0; -} - -int btrfs_init_acl(struct btrfs_trans_handle *trans, - struct inode *inode, struct inode *dir) -{ - return 0; -} - -#endif /* CONFIG_BTRFS_FS_POSIX_ACL */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 34ce414..a6263bd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2650,12 +2650,21 @@ do { \ /* acl.c */ #ifdef CONFIG_BTRFS_FS_POSIX_ACL int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags); -#else -#define btrfs_check_acl NULL -#endif int btrfs_init_acl(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *dir); int btrfs_acl_chmod(struct inode *inode); +#else +#define btrfs_check_acl NULL +static inline int btrfs_init_acl(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir) +{ + return 0; +} +static inline int btrfs_acl_chmod(struct inode *inode) +{ + return 0; +} +#endif /* relocation.c */ int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); -- cgit v1.1 From 85d85a743da894029723e002eb556ceeebc03658 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jul 2011 03:17:52 +0000 Subject: Btrfs: remove redundant code for dir item lookup When we search a dir item with a specific hash code, we can just return NULL without further checking if btrfs_search_slot() returns 1. Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/dir-item.c | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index c360a84..31d84e7 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -198,8 +198,6 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; - struct btrfs_key found_key; - struct extent_buffer *leaf; key.objectid = dir; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); @@ -209,18 +207,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); if (ret < 0) return ERR_PTR(ret); - if (ret > 0) { - if (path->slots[0] == 0) - return NULL; - path->slots[0]--; - } - - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - - if (found_key.objectid != dir || - btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY || - found_key.offset != key.offset) + if (ret > 0) return NULL; return btrfs_match_dir_item_name(root, path, name, name_len); @@ -315,8 +302,6 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; - struct btrfs_key found_key; - struct extent_buffer *leaf; key.objectid = dir; btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); @@ -324,18 +309,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); if (ret < 0) return ERR_PTR(ret); - if (ret > 0) { - if (path->slots[0] == 0) - return NULL; - path->slots[0]--; - } - - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - - if (found_key.objectid != dir || - btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY || - found_key.offset != key.offset) + if (ret > 0) return NULL; return btrfs_match_dir_item_name(root, path, name, name_len); -- cgit v1.1 From 7e016a038e829c7d1271e1d57b8002860bbdf0db Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jul 2011 03:18:03 +0000 Subject: Btrfs: clean up search_extent_mapping() rb_node returned by __tree_search() can be a valid pointer or NULL, but won't be some errno. Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2d04103..911a9db 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -379,23 +379,12 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree, em = rb_entry(next, struct extent_map, rb_node); goto found; } - if (!rb_node) { - em = NULL; - goto out; - } - if (IS_ERR(rb_node)) { - em = ERR_CAST(rb_node); - goto out; - } - em = rb_entry(rb_node, struct extent_map, rb_node); - goto found; - - em = NULL; - goto out; + if (!rb_node) + return NULL; + em = rb_entry(rb_node, struct extent_map, rb_node); found: atomic_inc(&em->refs); -out: return em; } -- cgit v1.1 From ed64f06652210b4a52fe0ea65ac43f9c6af1d988 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jul 2011 03:18:15 +0000 Subject: Btrfs: clean up code for extent_map lookup lookup_extent_map() and search_extent_map() can share most of code. Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 85 ++++++++++++++++++--------------------------------- 1 file changed, 29 insertions(+), 56 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 911a9db..df7a803 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -299,19 +299,8 @@ static u64 range_end(u64 start, u64 len) return start + len; } -/** - * lookup_extent_mapping - lookup extent_map - * @tree: tree to lookup in - * @start: byte offset to start the search - * @len: length of the lookup range - * - * Find and return the first extent_map struct in @tree that intersects the - * [start, len] range. There may be additional objects in the tree that - * intersect, so check the object returned carefully to make sure that no - * additional lookups are needed. - */ -struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, - u64 start, u64 len) +struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree, + u64 start, u64 len, int strict) { struct extent_map *em; struct rb_node *rb_node; @@ -320,38 +309,42 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, u64 end = range_end(start, len); rb_node = __tree_search(&tree->map, start, &prev, &next); - if (!rb_node && prev) { - em = rb_entry(prev, struct extent_map, rb_node); - if (end > em->start && start < extent_map_end(em)) - goto found; - } - if (!rb_node && next) { - em = rb_entry(next, struct extent_map, rb_node); - if (end > em->start && start < extent_map_end(em)) - goto found; - } if (!rb_node) { - em = NULL; - goto out; - } - if (IS_ERR(rb_node)) { - em = ERR_CAST(rb_node); - goto out; + if (prev) + rb_node = prev; + else if (next) + rb_node = next; + else + return NULL; } + em = rb_entry(rb_node, struct extent_map, rb_node); - if (end > em->start && start < extent_map_end(em)) - goto found; - em = NULL; - goto out; + if (strict && !(end > em->start && start < extent_map_end(em))) + return NULL; -found: atomic_inc(&em->refs); -out: return em; } /** + * lookup_extent_mapping - lookup extent_map + * @tree: tree to lookup in + * @start: byte offset to start the search + * @len: length of the lookup range + * + * Find and return the first extent_map struct in @tree that intersects the + * [start, len] range. There may be additional objects in the tree that + * intersect, so check the object returned carefully to make sure that no + * additional lookups are needed. + */ +struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, + u64 start, u64 len) +{ + return __lookup_extent_mapping(tree, start, len, 1); +} + +/** * search_extent_mapping - find a nearby extent map * @tree: tree to lookup in * @start: byte offset to start the search @@ -365,27 +358,7 @@ out: struct extent_map *search_extent_mapping(struct extent_map_tree *tree, u64 start, u64 len) { - struct extent_map *em; - struct rb_node *rb_node; - struct rb_node *prev = NULL; - struct rb_node *next = NULL; - - rb_node = __tree_search(&tree->map, start, &prev, &next); - if (!rb_node && prev) { - em = rb_entry(prev, struct extent_map, rb_node); - goto found; - } - if (!rb_node && next) { - em = rb_entry(next, struct extent_map, rb_node); - goto found; - } - if (!rb_node) - return NULL; - - em = rb_entry(rb_node, struct extent_map, rb_node); -found: - atomic_inc(&em->refs); - return em; + return __lookup_extent_mapping(tree, start, len, 0); } /** -- cgit v1.1 From 4d2c8f62f12a6652db67cc0c1f4a4a498b05ddbc Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jul 2011 03:18:33 +0000 Subject: Btrfs: clean up code for merging extent maps unpin_extent_cache() and add_extent_mapping() shares the same code that merges extent maps. Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 59 ++++++++++++++++++--------------------------------- 1 file changed, 21 insertions(+), 38 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index df7a803..7c97b33 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -183,22 +183,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) return 0; } -int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) +static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) { - int ret = 0; struct extent_map *merge = NULL; struct rb_node *rb; - struct extent_map *em; - - write_lock(&tree->lock); - em = lookup_extent_mapping(tree, start, len); - - WARN_ON(!em || em->start != start); - - if (!em) - goto out; - - clear_bit(EXTENT_FLAG_PINNED, &em->flags); if (em->start != 0) { rb = rb_prev(&em->rb_node); @@ -225,6 +213,24 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) merge->in_tree = 0; free_extent_map(merge); } +} + +int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) +{ + int ret = 0; + struct extent_map *em; + + write_lock(&tree->lock); + em = lookup_extent_mapping(tree, start, len); + + WARN_ON(!em || em->start != start); + + if (!em) + goto out; + + clear_bit(EXTENT_FLAG_PINNED, &em->flags); + + try_merge_map(tree, em); free_extent_map(em); out: @@ -247,7 +253,6 @@ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { int ret = 0; - struct extent_map *merge = NULL; struct rb_node *rb; struct extent_map *exist; @@ -263,30 +268,8 @@ int add_extent_mapping(struct extent_map_tree *tree, goto out; } atomic_inc(&em->refs); - if (em->start != 0) { - rb = rb_prev(&em->rb_node); - if (rb) - merge = rb_entry(rb, struct extent_map, rb_node); - if (rb && mergable_maps(merge, em)) { - em->start = merge->start; - em->len += merge->len; - em->block_len += merge->block_len; - em->block_start = merge->block_start; - merge->in_tree = 0; - rb_erase(&merge->rb_node, &tree->map); - free_extent_map(merge); - } - } - rb = rb_next(&em->rb_node); - if (rb) - merge = rb_entry(rb, struct extent_map, rb_node); - if (rb && mergable_maps(em, merge)) { - em->len += merge->len; - em->block_len += merge->len; - rb_erase(&merge->rb_node, &tree->map); - merge->in_tree = 0; - free_extent_map(merge); - } + + try_merge_map(tree, em); out: return ret; } -- cgit v1.1 From 3a6d457ec79d4cdf2313189b4e852e53f2b8d2b2 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 14 Jul 2011 03:18:52 +0000 Subject: Btrfs: remove unused members from struct extent_state These members are not used at all. Signed-off-by: Xiao Guangrong Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/extent_io.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index d6871dc..7b2f0c3 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -108,8 +108,6 @@ struct extent_state { wait_queue_head_t wq; atomic_t refs; unsigned long state; - u64 split_start; - u64 split_end; /* for use by the FS */ u64 private; -- cgit v1.1 From 3150b6996934455d7f2da243939d80a9b839085a Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 14 Jul 2011 03:19:08 +0000 Subject: Btrfs: clean up for insert_state() Don't duplicate set_state_bits(). Signed-off-by: Xiao Guangrong Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 789d0b2..0d69425 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -303,6 +303,9 @@ static void clear_state_cb(struct extent_io_tree *tree, tree->ops->clear_bit_hook(tree->mapping->host, state, bits); } +static void set_state_bits(struct extent_io_tree *tree, + struct extent_state *state, int *bits); + /* * insert an extent_state struct into the tree. 'bits' are set on the * struct before it is inserted. @@ -318,7 +321,6 @@ static int insert_state(struct extent_io_tree *tree, int *bits) { struct rb_node *node; - int bits_to_set = *bits & ~EXTENT_CTLBITS; if (end < start) { printk(KERN_ERR "btrfs end < start %llu %llu\n", @@ -328,11 +330,9 @@ static int insert_state(struct extent_io_tree *tree, } state->start = start; state->end = end; - set_state_cb(tree, state, bits); - if (bits_to_set & EXTENT_DIRTY) - tree->dirty_bytes += end - start + 1; - state->state |= bits_to_set; + set_state_bits(tree, state, bits); + node = tree_insert(&tree->state, end, &state->rb_node); if (node) { struct extent_state *found; -- cgit v1.1 From ded91f0814a349e36b3ca0eff497f816ad518fba Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 14 Jul 2011 03:19:27 +0000 Subject: Btrfs: clean up for wait_extent_bit() We can just use cond_resched_lock(). Signed-off-by: Xiao Guangrong Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0d69425..a0811e6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -648,11 +648,7 @@ again: if (start > end) break; - if (need_resched()) { - spin_unlock(&tree->lock); - cond_resched(); - spin_lock(&tree->lock); - } + cond_resched_lock(&tree->lock); } out: spin_unlock(&tree->lock); -- cgit v1.1 From 69261c4b6a394ead1b5ca2966c908469355eb603 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 14 Jul 2011 03:19:45 +0000 Subject: Btrfs: clean up for find_first_extent_bit() find_first_extent_bit() and find_first_extent_bit_state() share most of the code, and we can just make the former call the latter. Signed-off-by: Xiao Guangrong Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 64 ++++++++++++++++++++-------------------------------- 1 file changed, 24 insertions(+), 40 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a0811e6..a59ebc6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1031,46 +1031,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) return 0; } -/* - * find the first offset in the io tree with 'bits' set. zero is - * returned if we find something, and *start_ret and *end_ret are - * set to reflect the state struct that was found. - * - * If nothing was found, 1 is returned, < 0 on error - */ -int find_first_extent_bit(struct extent_io_tree *tree, u64 start, - u64 *start_ret, u64 *end_ret, int bits) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 1; - - spin_lock(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(tree, start); - if (!node) - goto out; - - while (1) { - state = rb_entry(node, struct extent_state, rb_node); - if (state->end >= start && (state->state & bits)) { - *start_ret = state->start; - *end_ret = state->end; - ret = 0; - break; - } - node = rb_next(node); - if (!node) - break; - } -out: - spin_unlock(&tree->lock); - return ret; -} - /* find the first state struct with 'bits' set after 'start', and * return it. tree->lock must be held. NULL will returned if * nothing was found after 'start' @@ -1103,6 +1063,30 @@ out: } /* + * find the first offset in the io tree with 'bits' set. zero is + * returned if we find something, and *start_ret and *end_ret are + * set to reflect the state struct that was found. + * + * If nothing was found, 1 is returned, < 0 on error + */ +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits) +{ + struct extent_state *state; + int ret = 1; + + spin_lock(&tree->lock); + state = find_first_extent_bit_state(tree, start, bits); + if (state) { + *start_ret = state->start; + *end_ret = state->end; + ret = 0; + } + spin_unlock(&tree->lock); + return ret; +} + +/* * find a contiguous range of bytes in the file marked as delalloc, not * more than 'max_bytes'. start and end are used to return the range, * -- cgit v1.1 From 341d14f161a475ebdbc9adff1f7e681e1185dee9 Mon Sep 17 00:00:00 2001 From: Mitch Harder Date: Tue, 12 Jul 2011 19:43:45 +0000 Subject: Btrfs: Remove unused variable 'last_index' in file.c The variable 'last_index' is calculated in the __btrfs_buffered_write function and passed as a parameter to the prepare_pages function, but is not used anywhere in the prepare_pages function. Remove instances of 'last_index' in these functions. Signed-off-by: Mitch Harder Signed-off-by: Chris Mason --- fs/btrfs/file.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7f134a7..010aec8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1059,7 +1059,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos) static noinline int prepare_pages(struct btrfs_root *root, struct file *file, struct page **pages, size_t num_pages, loff_t pos, unsigned long first_index, - unsigned long last_index, size_t write_bytes) + size_t write_bytes) { struct extent_state *cached_state = NULL; int i; @@ -1159,7 +1159,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, struct btrfs_root *root = BTRFS_I(inode)->root; struct page **pages = NULL; unsigned long first_index; - unsigned long last_index; size_t num_written = 0; int nrptrs; int ret = 0; @@ -1172,7 +1171,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, return -ENOMEM; first_index = pos >> PAGE_CACHE_SHIFT; - last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT; while (iov_iter_count(i) > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); @@ -1206,8 +1204,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, * contents of pages from loop to loop */ ret = prepare_pages(root, file, pages, num_pages, - pos, first_index, last_index, - write_bytes); + pos, first_index, write_bytes); if (ret) { btrfs_delalloc_release_space(inode, num_pages << PAGE_CACHE_SHIFT); -- cgit v1.1 From 0d10ee2e6deb5c8409ae65b970846344897d5e4e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 1 Aug 2011 14:37:36 -0400 Subject: Btrfs: don't call writepages from within write_full_page When doing a writepage we call writepages to try and write out any other dirty pages in the area. This could cause problems where we commit a transaction and then have somebody else dirtying metadata in the area as we could end up writing out a lot more than we care about, which could cause latency on anybody who is waiting for the transaction to completely finish committing. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a59ebc6..8491712 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2502,7 +2502,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, struct writeback_control *wbc) { int ret; - struct address_space *mapping = page->mapping; struct extent_page_data epd = { .bio = NULL, .tree = tree, @@ -2510,18 +2509,9 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, .extent_locked = 0, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; - struct writeback_control wbc_writepages = { - .sync_mode = wbc->sync_mode, - .older_than_this = NULL, - .nr_to_write = 64, - .range_start = page_offset(page) + PAGE_CACHE_SIZE, - .range_end = (loff_t)-1, - }; ret = __extent_writepage(page, wbc, &epd); - extent_write_cache_pages(tree, mapping, &wbc_writepages, - __extent_writepage, &epd, flush_write_bio); flush_epd_write_bio(&epd); return ret; } -- cgit v1.1 From 9d8b9ec44234b2f6e0225300632d250210c04f11 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 1 Aug 2011 17:41:35 -0400 Subject: ext4: use ext4_msg() instead of printk in mballoc Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 79 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 42 insertions(+), 37 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d5021e8..70d1b3e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -493,10 +493,11 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) b2 = (unsigned char *) bitmap; for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { if (b1[i] != b2[i]) { - printk(KERN_ERR "corruption in group %u " - "at byte %u(%u): %x in copy != %x " - "on disk/prealloc\n", - e4b->bd_group, i, i * 8, b1[i], b2[i]); + ext4_msg(e4b->bd_sb, KERN_ERR, + "corruption in group %u " + "at byte %u(%u): %x in copy != %x " + "on disk/prealloc", + e4b->bd_group, i, i * 8, b1[i], b2[i]); BUG(); } } @@ -2224,8 +2225,8 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, EXT4_DESC_PER_BLOCK_BITS(sb); meta_group_info = kmalloc(metalen, GFP_KERNEL); if (meta_group_info == NULL) { - printk(KERN_ERR "EXT4-fs: can't allocate mem for a " - "buddy group\n"); + ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem " + "for a buddy group"); goto exit_meta_group_info; } sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = @@ -2238,7 +2239,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); if (meta_group_info[i] == NULL) { - printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); + ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem"); goto exit_group_info; } memset(meta_group_info[i], 0, kmem_cache_size(cachep)); @@ -2333,12 +2334,12 @@ static int ext4_mb_init_backend(struct super_block *sb) * So a two level scheme suffices for now. */ sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL); if (sbi->s_group_info == NULL) { - printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); + ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); return -ENOMEM; } sbi->s_buddy_cache = new_inode(sb); if (sbi->s_buddy_cache == NULL) { - printk(KERN_ERR "EXT4-fs: can't get new inode\n"); + ext4_msg(sb, KERN_ERR, "can't get new inode"); goto err_freesgi; } sbi->s_buddy_cache->i_ino = get_next_ino(); @@ -2346,8 +2347,7 @@ static int ext4_mb_init_backend(struct super_block *sb) for (i = 0; i < ngroups; i++) { desc = ext4_get_group_desc(sb, i, NULL); if (desc == NULL) { - printk(KERN_ERR - "EXT4-fs: can't read descriptor %u\n", i); + ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i); goto err_freebuddy; } if (ext4_mb_add_groupinfo(sb, i, desc) != 0) @@ -2411,7 +2411,8 @@ static int ext4_groupinfo_create_slab(size_t size) mutex_unlock(&ext4_grpinfo_slab_create_mutex); if (!cachep) { - printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n"); + printk(KERN_EMERG + "EXT4-fs: no memory for groupinfo slab cache\n"); return -ENOMEM; } @@ -2566,25 +2567,25 @@ int ext4_mb_release(struct super_block *sb) if (sbi->s_buddy_cache) iput(sbi->s_buddy_cache); if (sbi->s_mb_stats) { - printk(KERN_INFO - "EXT4-fs: mballoc: %u blocks %u reqs (%u success)\n", + ext4_msg(sb, KERN_INFO, + "mballoc: %u blocks %u reqs (%u success)", atomic_read(&sbi->s_bal_allocated), atomic_read(&sbi->s_bal_reqs), atomic_read(&sbi->s_bal_success)); - printk(KERN_INFO - "EXT4-fs: mballoc: %u extents scanned, %u goal hits, " - "%u 2^N hits, %u breaks, %u lost\n", + ext4_msg(sb, KERN_INFO, + "mballoc: %u extents scanned, %u goal hits, " + "%u 2^N hits, %u breaks, %u lost", atomic_read(&sbi->s_bal_ex_scanned), atomic_read(&sbi->s_bal_goals), atomic_read(&sbi->s_bal_2orders), atomic_read(&sbi->s_bal_breaks), atomic_read(&sbi->s_mb_lost_chunks)); - printk(KERN_INFO - "EXT4-fs: mballoc: %lu generated and it took %Lu\n", + ext4_msg(sb, KERN_INFO, + "mballoc: %lu generated and it took %Lu", sbi->s_mb_buddies_generated, sbi->s_mb_generation_time); - printk(KERN_INFO - "EXT4-fs: mballoc: %u preallocated, %u discarded\n", + ext4_msg(sb, KERN_INFO, + "mballoc: %u preallocated, %u discarded", atomic_read(&sbi->s_mb_preallocated), atomic_read(&sbi->s_mb_discarded)); } @@ -3024,9 +3025,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, if (start + size <= ac->ac_o_ex.fe_logical && start > ac->ac_o_ex.fe_logical) { - printk(KERN_ERR "start %lu, size %lu, fe_logical %lu\n", - (unsigned long) start, (unsigned long) size, - (unsigned long) ac->ac_o_ex.fe_logical); + ext4_msg(ac->ac_sb, KERN_ERR, + "start %lu, size %lu, fe_logical %lu", + (unsigned long) start, (unsigned long) size, + (unsigned long) ac->ac_o_ex.fe_logical); } BUG_ON(start + size <= ac->ac_o_ex.fe_logical && start > ac->ac_o_ex.fe_logical); @@ -3607,10 +3609,11 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, bit = next + 1; } if (free != pa->pa_free) { - printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n", - pa, (unsigned long) pa->pa_lstart, - (unsigned long) pa->pa_pstart, - (unsigned long) pa->pa_len); + ext4_msg(e4b->bd_sb, KERN_CRIT, + "pa %p: logic %lu, phys. %lu, len %lu", + pa, (unsigned long) pa->pa_lstart, + (unsigned long) pa->pa_pstart, + (unsigned long) pa->pa_len); ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u", free, pa->pa_free); /* @@ -3798,7 +3801,8 @@ repeat: * use preallocation while we're discarding it */ spin_unlock(&pa->pa_lock); spin_unlock(&ei->i_prealloc_lock); - printk(KERN_ERR "uh-oh! used pa while discarding\n"); + ext4_msg(sb, KERN_ERR, + "uh-oh! used pa while discarding"); WARN_ON(1); schedule_timeout_uninterruptible(HZ); goto repeat; @@ -3875,12 +3879,13 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) return; - printk(KERN_ERR "EXT4-fs: Can't allocate:" - " Allocation context details:\n"); - printk(KERN_ERR "EXT4-fs: status %d flags %d\n", + ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:" + " Allocation context details:"); + ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d", ac->ac_status, ac->ac_flags); - printk(KERN_ERR "EXT4-fs: orig %lu/%lu/%lu@%lu, goal %lu/%lu/%lu@%lu, " - "best %lu/%lu/%lu@%lu cr %d\n", + ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, " + "goal %lu/%lu/%lu@%lu, " + "best %lu/%lu/%lu@%lu cr %d", (unsigned long)ac->ac_o_ex.fe_group, (unsigned long)ac->ac_o_ex.fe_start, (unsigned long)ac->ac_o_ex.fe_len, @@ -3894,9 +3899,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) (unsigned long)ac->ac_b_ex.fe_len, (unsigned long)ac->ac_b_ex.fe_logical, (int)ac->ac_criteria); - printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, - ac->ac_found); - printk(KERN_ERR "EXT4-fs: groups: \n"); + ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found", + ac->ac_ex_scanned, ac->ac_found); + ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: "); ngroups = ext4_get_groups_count(sb); for (i = 0; i < ngroups; i++) { struct ext4_group_info *grp = ext4_get_group_info(sb, i); -- cgit v1.1 From 48e6061bf4bb25eec151b91f22fd90a5b9a4920a Mon Sep 17 00:00:00 2001 From: Yu Jian Date: Mon, 1 Aug 2011 17:41:39 -0400 Subject: ext4: use EXT4_BAD_INO for buddy cache to avoid colliding with valid inode # Signed-off-by: Yu Jian Signed-off-by: Andreas Dilger Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 70d1b3e..e41620b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2342,7 +2342,11 @@ static int ext4_mb_init_backend(struct super_block *sb) ext4_msg(sb, KERN_ERR, "can't get new inode"); goto err_freesgi; } - sbi->s_buddy_cache->i_ino = get_next_ino(); + /* To avoid potentially colliding with an valid on-disk inode number, + * use EXT4_BAD_INO for the buddy cache inode number. This inode is + * not in the inode hash, so it should never be found by iget(), but + * this will avoid confusion if it ever shows up during debugging. */ + sbi->s_buddy_cache->i_ino = EXT4_BAD_INO; EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; for (i = 0; i < ngroups; i++) { desc = ext4_get_group_desc(sb, i, NULL); -- cgit v1.1 From 79a77c5ac34cc27ccbfbdf7113b41cdd93534eab Mon Sep 17 00:00:00 2001 From: Yu Jian Date: Mon, 1 Aug 2011 17:41:46 -0400 Subject: ext4: prevent memory leaks from ext4_mb_init_backend() on error path In ext4_mb_init(), if the s_locality_group allocation fails it will currently cause the allocations made in ext4_mb_init_backend() to be leaked. Moving the ext4_mb_init_backend() allocation after the s_locality_group allocation avoids that problem. Signed-off-by: Yu Jian Signed-off-by: Andreas Dilger Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index e41620b..17a5a57 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2465,12 +2465,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) i++; } while (i <= sb->s_blocksize_bits + 1); - /* init file for buddy data */ - ret = ext4_mb_init_backend(sb); - if (ret != 0) { - goto out; - } - spin_lock_init(&sbi->s_md_lock); spin_lock_init(&sbi->s_bal_lock); @@ -2507,6 +2501,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) spin_lock_init(&lg->lg_prealloc_lock); } + /* init file for buddy data */ + ret = ext4_mb_init_backend(sb); + if (ret != 0) { + goto out; + } + if (sbi->s_proc) proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, &ext4_mb_seq_groups_fops, sb); -- cgit v1.1 From 62ac41a6e443ef26b9de862c6e20c088e2b04dde Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 28 Jul 2011 12:55:14 -0700 Subject: drm/i915: don't use uninitialized EDID bpc values when picking pipe bpp The EDID parser will zero out the bpc value, and the driver needs to handle that case. In our picker, we'll just ignore 0 values as far as bpp picking goes. Fixes https://bugs.freedesktop.org/show_bug.cgi?id=39323. Signed-off-by: Jesse Barnes Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_display.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2bf5bb6..35364e6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4600,7 +4600,9 @@ static bool intel_choose_pipe_bpp_dither(struct drm_crtc *crtc, if (connector->encoder != encoder) continue; - if (connector->display_info.bpc < display_bpc) { + /* Don't use an invalid EDID bpc value */ + if (connector->display_info.bpc && + connector->display_info.bpc < display_bpc) { DRM_DEBUG_DRIVER("clamping display bpc (was %d) to EDID reported max of %d\n", display_bpc, connector->display_info.bpc); display_bpc = connector->display_info.bpc; } @@ -5215,7 +5217,8 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, temp |= PIPE_12BPC; break; default: - WARN(1, "intel_choose_pipe_bpp returned invalid value\n"); + WARN(1, "intel_choose_pipe_bpp returned invalid value %d\n", + pipe_bpp); temp |= PIPE_8BPC; pipe_bpp = 24; break; -- cgit v1.1 From 11bee43ebba0bfc92165c059f6e9869197ea8889 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 1 Aug 2011 15:02:20 -0700 Subject: drm/i915/dp: wait for previous AUX channel activity to clear Before initiating a new read or write on the DP AUX channel, wait for any outstanding activity to complete. This may happen during normal retry behavior. If the wait fails (i.e. after 1ms the AUX channel is still busy) dump a backtrace to make the caller easier to spot. v2: use msleep instead, and timeout after 3ms (only ever saw 1 retry with msleep in testing) v3: fix backtrace check to trigger if the 3ms wait times out Fixes https://bugs.freedesktop.org/show_bug.cgi?id=38136. Signed-off-by: Jesse Barnes Reviewed-by: Keith Packard Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_dp.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 4493641..ba72fbc 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -315,9 +315,17 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, else precharge = 5; - if (I915_READ(ch_ctl) & DP_AUX_CH_CTL_SEND_BUSY) { - DRM_ERROR("dp_aux_ch not started status 0x%08x\n", - I915_READ(ch_ctl)); + /* Try to wait for any previous AUX channel activity */ + for (try = 0; try < 3; try++) { + status = I915_READ(ch_ctl); + if ((status & DP_AUX_CH_CTL_SEND_BUSY) == 0) + break; + msleep(1); + } + + if (try == 3) { + WARN(1, "dp_aux_ch not started status 0x%08x\n", + I915_READ(ch_ctl)); return -EBUSY; } -- cgit v1.1 From a08af810cdc29d2ca930e8a869d3d01744c392d8 Mon Sep 17 00:00:00 2001 From: Chas Williams Date: Mon, 1 Aug 2011 17:56:14 -0700 Subject: atm: br2864: sent packets truncated in VC routed mode Reported-by: Pascal Hambourg Signed-off-by: Chas Williams Signed-off-by: David S. Miller --- net/atm/br2684.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 2252c20..52cfd0c 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -242,8 +242,6 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev, if (brdev->payload == p_bridged) { skb_push(skb, 2); memset(skb->data, 0, 2); - } else { /* p_routed */ - skb_pull(skb, ETH_HLEN); } } skb_debug(skb); -- cgit v1.1 From 87a0874cf19f1bc9bd25bd7d053a0ea25ccf8373 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 1 Aug 2011 11:10:26 +0000 Subject: doc: Update the MAINTAINERS info for Paul Moore My @hp.com will no longer be valid starting August 5, 2011 so an update is necessary. My new email address is employer independent so we don't have to worry about doing this again any time soon. Signed-off-by: Paul Moore Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5d4ceba..ca87f47 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4395,10 +4395,10 @@ F: net/*/netfilter/ F: net/netfilter/ NETLABEL -M: Paul Moore +M: Paul Moore W: http://netlabel.sf.net L: netdev@vger.kernel.org -S: Supported +S: Maintained F: Documentation/netlabel/ F: include/net/netlabel.h F: net/netlabel/ @@ -4455,7 +4455,7 @@ F: include/net/ip* F: arch/x86/net/* NETWORKING [LABELED] (NetLabel, CIPSO, Labeled IPsec, SECMARK) -M: Paul Moore +M: Paul Moore L: netdev@vger.kernel.org S: Maintained -- cgit v1.1 From 82c21bfab41a77bc01affe21bea9727d776774a7 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 1 Aug 2011 11:10:33 +0000 Subject: doc: Update the email address for Paul Moore in various source files My @hp.com will no longer be valid starting August 5, 2011 so an update is necessary. My new email address is employer independent so we don't have to worry about doing this again any time soon. Signed-off-by: Paul Moore Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/net/cipso_ipv4.h | 2 +- include/net/netlabel.h | 2 +- net/netlabel/Makefile | 2 -- net/netlabel/netlabel_addrlist.c | 2 +- net/netlabel/netlabel_addrlist.h | 2 +- net/netlabel/netlabel_cipso_v4.c | 2 +- net/netlabel/netlabel_cipso_v4.h | 2 +- net/netlabel/netlabel_domainhash.c | 2 +- net/netlabel/netlabel_domainhash.h | 2 +- net/netlabel/netlabel_kapi.c | 2 +- net/netlabel/netlabel_mgmt.c | 2 +- net/netlabel/netlabel_mgmt.h | 2 +- net/netlabel/netlabel_unlabeled.c | 2 +- net/netlabel/netlabel_unlabeled.h | 2 +- net/netlabel/netlabel_user.c | 2 +- net/netlabel/netlabel_user.h | 2 +- security/selinux/hooks.c | 2 +- security/selinux/include/netif.h | 2 +- security/selinux/include/netlabel.h | 2 +- security/selinux/include/netnode.h | 2 +- security/selinux/include/netport.h | 2 +- security/selinux/netif.c | 2 +- security/selinux/netlabel.c | 2 +- security/selinux/netnode.c | 2 +- security/selinux/netport.c | 2 +- security/selinux/selinuxfs.c | 2 +- security/selinux/ss/ebitmap.c | 2 +- security/selinux/ss/mls.c | 2 +- security/selinux/ss/mls.h | 2 +- security/selinux/ss/policydb.c | 2 +- security/selinux/ss/services.c | 2 +- security/smack/smack_lsm.c | 2 +- 32 files changed, 31 insertions(+), 33 deletions(-) diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 3b93874..9808877 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -8,7 +8,7 @@ * have chosen to adopt the protocol and over the years it has become a * de-facto standard for labeled networking. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/include/net/netlabel.h b/include/net/netlabel.h index f21a16e..f674409 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -4,7 +4,7 @@ * The NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile index ea750e9..d2732fc 100644 --- a/net/netlabel/Makefile +++ b/net/netlabel/Makefile @@ -1,8 +1,6 @@ # # Makefile for the NetLabel subsystem. # -# Feb 9, 2006, Paul Moore -# # base objects obj-y := netlabel_user.o netlabel_kapi.o diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index c051913..96b749d 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h index 2b9644e..fdbc1d2 100644 --- a/net/netlabel/netlabel_addrlist.h +++ b/net/netlabel/netlabel_addrlist.h @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index dd53a36..6bf8783 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index af7f335..d24d774 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 2aa975e5..7d8083c 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 0261dda..bfcc0f7 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index b528dd9..58107d0 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -5,7 +5,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index dff8a08..bfa5558 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index 8db37f4..5a9f31c 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index f1ecf84..e6e8236 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -5,7 +5,7 @@ * NetLabel system. The NetLabel system manages static and dynamic label * mappings for network protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h index 0bc8dc3..700af49 100644 --- a/net/netlabel/netlabel_unlabeled.h +++ b/net/netlabel/netlabel_unlabeled.h @@ -5,7 +5,7 @@ * NetLabel system. The NetLabel system manages static and dynamic label * mappings for network protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index a3fd75a..9fae63f 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index f4fc4c9..8196978 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a38316b..266a229 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -14,7 +14,7 @@ * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc. * * Copyright (C) 2006, 2007, 2009 Hewlett-Packard Development Company, L.P. - * Paul Moore + * Paul Moore * Copyright (C) 2007 Hitachi Software Engineering Co., Ltd. * Yuichi Nakamura * diff --git a/security/selinux/include/netif.h b/security/selinux/include/netif.h index ce23edd..43d5072 100644 --- a/security/selinux/include/netif.h +++ b/security/selinux/include/netif.h @@ -8,7 +8,7 @@ * * Copyright (C) 2003 Red Hat, Inc., James Morris * Copyright (C) 2007 Hewlett-Packard Development Company, L.P. - * Paul Moore, + * Paul Moore * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h index cf2f628..8c59b8f 100644 --- a/security/selinux/include/netlabel.h +++ b/security/selinux/include/netlabel.h @@ -1,7 +1,7 @@ /* * SELinux interface to the NetLabel subsystem * - * Author : Paul Moore + * Author: Paul Moore * */ diff --git a/security/selinux/include/netnode.h b/security/selinux/include/netnode.h index 1b94450..df7a5ed 100644 --- a/security/selinux/include/netnode.h +++ b/security/selinux/include/netnode.h @@ -6,7 +6,7 @@ * needed to reduce the lookup overhead since most of these queries happen on * a per-packet basis. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/security/selinux/include/netport.h b/security/selinux/include/netport.h index 8991752..4d965b8 100644 --- a/security/selinux/include/netport.h +++ b/security/selinux/include/netport.h @@ -5,7 +5,7 @@ * mapping is maintained as part of the normal policy but a fast cache is * needed to reduce the lookup overhead. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/security/selinux/netif.c b/security/selinux/netif.c index 58cc481..326f22c 100644 --- a/security/selinux/netif.c +++ b/security/selinux/netif.c @@ -8,7 +8,7 @@ * * Copyright (C) 2003 Red Hat, Inc., James Morris * Copyright (C) 2007 Hewlett-Packard Development Company, L.P. - * Paul Moore + * Paul Moore * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index c3bf3ed..da4b8b2 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -4,7 +4,7 @@ * This file provides the necessary glue to tie NetLabel into the SELinux * subsystem. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c index 8b691a8..3bf46ab 100644 --- a/security/selinux/netnode.c +++ b/security/selinux/netnode.c @@ -6,7 +6,7 @@ * needed to reduce the lookup overhead since most of these queries happen on * a per-packet basis. * - * Author: Paul Moore + * Author: Paul Moore * * This code is heavily based on the "netif" concept originally developed by * James Morris diff --git a/security/selinux/netport.c b/security/selinux/netport.c index ae76e29..0b62bd11 100644 --- a/security/selinux/netport.c +++ b/security/selinux/netport.c @@ -5,7 +5,7 @@ * mapping is maintained as part of the normal policy but a fast cache is * needed to reduce the lookup overhead. * - * Author: Paul Moore + * Author: Paul Moore * * This code is heavily based on the "netif" concept originally developed by * James Morris diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index de7900e..55d92cb 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -2,7 +2,7 @@ * * Added conditional policy language extensions * - * Updated: Hewlett-Packard + * Updated: Hewlett-Packard * * Added support for the policy capability bitmap * diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index d42951f..30f119b 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -4,7 +4,7 @@ * Author : Stephen Smalley, */ /* - * Updated: Hewlett-Packard + * Updated: Hewlett-Packard * * Added support to import/export the NetLabel category bitmap * diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index e961742..fbf9c58 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -11,7 +11,7 @@ * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. */ /* - * Updated: Hewlett-Packard + * Updated: Hewlett-Packard * * Added support to import/export the MLS label from NetLabel * diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h index 037bf9d..e4369e3 100644 --- a/security/selinux/ss/mls.h +++ b/security/selinux/ss/mls.h @@ -11,7 +11,7 @@ * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. */ /* - * Updated: Hewlett-Packard + * Updated: Hewlett-Packard * * Added support to import/export the MLS label from NetLabel * diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index d246aca..2381d0d 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -13,7 +13,7 @@ * * Added conditional policy language extensions * - * Updated: Hewlett-Packard + * Updated: Hewlett-Packard * * Added support for the policy capability bitmap * diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 973e00e..f6917bc 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -13,7 +13,7 @@ * * Added conditional policy language extensions * - * Updated: Hewlett-Packard + * Updated: Hewlett-Packard * * Added support for NetLabel * Added support for the policy capability bitmap diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index f375eb2..b9c5e14 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -9,7 +9,7 @@ * * Copyright (C) 2007 Casey Schaufler * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. - * Paul Moore + * Paul Moore * Copyright (C) 2010 Nokia Corporation * * This program is free software; you can redistribute it and/or modify -- cgit v1.1 From 8fb4e1399137dbbf7f1071bc723b5eff8ad46d1a Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Mon, 1 Aug 2011 17:59:44 -0700 Subject: Documentation/bonding.txt: Update to 3.x version numbers Signed-off-by: Jesper Juhl Signed-off-by: David S. Miller --- Documentation/networking/bonding.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index 675612f..5dd960d 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -599,7 +599,7 @@ num_unsol_na affect only the active-backup mode. These options were added for bonding versions 3.3.0 and 3.4.0 respectively. - From Linux 2.6.40 and bonding version 3.7.1, these notifications + From Linux 3.0 and bonding version 3.7.1, these notifications are generated by the ipv4 and ipv6 code and the numbers of repetitions cannot be set independently. -- cgit v1.1 From 22019b17821ab3543090827491e465c5816cbcd6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Jul 2011 18:37:31 +0000 Subject: net: add kerneldoc to skb_copy_bits() Since skb_copy_bits() is called from assembly, add a fat comment to make clear we should think twice before changing its prototype. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2beda82..27002df 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1369,8 +1369,21 @@ pull_pages: } EXPORT_SYMBOL(__pskb_pull_tail); -/* Copy some data bits from skb to kernel buffer. */ - +/** + * skb_copy_bits - copy bits from skb to kernel buffer + * @skb: source skb + * @offset: offset in source + * @to: destination buffer + * @len: number of bytes to copy + * + * Copy the specified number of bytes from the source skb to the + * destination buffer. + * + * CAUTION ! : + * If its prototype is ever changed, + * check arch/{*}/net/{*}.S files, + * since it is called from BPF assembly code. + */ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) { int start = skb_headlen(skb); -- cgit v1.1 From c027a474a68065391c8773f6e83ed5412657e369 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 30 Jul 2011 16:35:02 +0200 Subject: oom: task->mm == NULL doesn't mean the memory was freed exit_mm() sets ->mm == NULL then it does mmput()->exit_mmap() which frees the memory. However select_bad_process() checks ->mm != NULL before TIF_MEMDIE, so it continues to kill other tasks even if we have the oom-killed task freeing its memory. Change select_bad_process() to check ->mm after TIF_MEMDIE, but skip the tasks which have already passed exit_notify() to ensure a zombie with TIF_MEMDIE set can't block oom-killer. Alternatively we could probably clear TIF_MEMDIE after exit_mmap(). Signed-off-by: Oleg Nesterov Reviewed-by: KOSAKI Motohiro Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index eafff89..626303b 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -303,7 +303,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, do_each_thread(g, p) { unsigned int points; - if (!p->mm) + if (p->exit_state) continue; if (oom_unkillable_task(p, mem, nodemask)) continue; @@ -319,6 +319,8 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, */ if (test_tsk_thread_flag(p, TIF_MEMDIE)) return ERR_PTR(-1UL); + if (!p->mm) + continue; if (p->flags & PF_EXITING) { /* -- cgit v1.1 From da5aa861bea09197e6ae4d7c46618616064891e4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 2 Aug 2011 02:17:48 +0200 Subject: fix block device fallout from ->fsync() changes blkdev_fsync() needs to write pages in pagecache... Signed-off-by: Rafael J. Wysocki Signed-off-by: Al Viro --- fs/block_dev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/block_dev.c b/fs/block_dev.c index f286805..ff77262 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -387,6 +387,10 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) struct inode *bd_inode = filp->f_mapping->host; struct block_device *bdev = I_BDEV(bd_inode); int error; + + error = filemap_write_and_wait_range(filp->f_mapping, start, end); + if (error) + return error; /* * There is no need to serialise calls to blkdev_issue_flush with -- cgit v1.1 From 8997621bb2daaf19a4e9d82f118224159d8054e2 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 2 Aug 2011 00:45:48 -0400 Subject: ACPI print OSI(Linux) warning only once This message gets repeated on some machines: https://bugzilla.kernel.org/show_bug.cgi?id=29292 Signed-off-by: Len Brown --- drivers/acpi/osl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 52ca964..3765980 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -155,7 +155,7 @@ static u32 acpi_osi_handler(acpi_string interface, u32 supported) { if (!strcmp("Linux", interface)) { - printk(KERN_NOTICE FW_BUG PREFIX + printk_once(KERN_NOTICE FW_BUG PREFIX "BIOS _OSI(Linux) query %s%s\n", osi_linux.enable ? "honored" : "ignored", osi_linux.cmdline ? " via cmdline" : -- cgit v1.1 From 08f984c7f7f569b66acbbd163676b4bc7f64addc Mon Sep 17 00:00:00 2001 From: Eliot Blennerhassett Date: Tue, 2 Aug 2011 09:44:24 +1200 Subject: ALSA: asihpi - Clarify adapter index validity check Avoids assigning possibly invalid address to pa, even if it is never dereferenced. Correct error response to reflect request object/function ids. Signed-off-by: Eliot Blennerhassett Signed-off-by: Takashi Iwai --- sound/pci/asihpi/hpioctl.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c index 9683f84..a32502e 100644 --- a/sound/pci/asihpi/hpioctl.c +++ b/sound/pci/asihpi/hpioctl.c @@ -177,16 +177,21 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } else { u16 __user *ptr = NULL; u32 size = 0; - + u32 adapter_present; /* -1=no data 0=read from user mem, 1=write to user mem */ int wrflag = -1; - u32 adapter = hm->h.adapter_index; - struct hpi_adapter *pa = &adapters[adapter]; + struct hpi_adapter *pa; + + if (hm->h.adapter_index < HPI_MAX_ADAPTERS) { + pa = &adapters[hm->h.adapter_index]; + adapter_present = pa->type; + } else { + adapter_present = 0; + } - if ((adapter >= HPI_MAX_ADAPTERS) || (!pa->type)) { - hpi_init_response(&hr->r0, HPI_OBJ_ADAPTER, - HPI_ADAPTER_OPEN, - HPI_ERROR_BAD_ADAPTER_NUMBER); + if (!adapter_present) { + hpi_init_response(&hr->r0, hm->h.object, + hm->h.function, HPI_ERROR_BAD_ADAPTER_NUMBER); uncopied_bytes = copy_to_user(puhr, hr, sizeof(hr->h)); -- cgit v1.1 From bea1906620ce72b63f83735c4cc2642b25ec54ae Mon Sep 17 00:00:00 2001 From: David Engraf Date: Wed, 20 Jul 2011 15:03:39 +0200 Subject: watchdog: shwdt: fix usage of mod_timer Fix the usage of mod_timer() and make the driver usable. mod_timer() must be called with an absolute timeout in jiffies. The old implementation used a relative timeout thus the hardware watchdog was never triggered. Signed-off-by: David Engraf Signed-off-by: Paul Mundt Signed-off-by: Wim Van sebroeck Signed-off-by: Andrew Morton Cc: stable --- drivers/watchdog/shwdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/shwdt.c b/drivers/watchdog/shwdt.c index db84f23..a267dc0 100644 --- a/drivers/watchdog/shwdt.c +++ b/drivers/watchdog/shwdt.c @@ -64,7 +64,7 @@ * misses its deadline, the kernel timer will allow the WDT to overflow. */ static int clock_division_ratio = WTCSR_CKS_4096; -#define next_ping_period(cks) msecs_to_jiffies(cks - 4) +#define next_ping_period(cks) (jiffies + msecs_to_jiffies(cks - 4)) static const struct watchdog_info sh_wdt_info; static struct platform_device *sh_wdt_dev; -- cgit v1.1 From 6b01d30eef64456ad9e261d2173266a3244da8e1 Mon Sep 17 00:00:00 2001 From: Mart Gerrits Date: Sat, 30 Jul 2011 16:59:12 +0200 Subject: watchdog: Fix POST failure on ASUS P5N32-E SLI and similar boards At present the module does not unset the NO_REBOOT bit upon shutdown, this causes the BIOS to fail the POST once and reset. During the next boot it displays the following error message: ***** Warning: System BOOT Fail ***** Your system last boot fail or POST interrupted. Please enter setup to load default and reboot again. Press F1 to continue, DEL to enter SETUP With this patch the NO_REBOOT flag will be unset on shutdown and thus stop this failure from occurring. Tested on 'ASUS P5N32-E SLI with BIOS revision 1801' and 'ASUS P5N32-E SLI PLUS with BIOS revision 1502'. Signed-off-by: Mart Gerrits Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/nv_tco.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/watchdog/nv_tco.c b/drivers/watchdog/nv_tco.c index afa78a5..809f41c 100644 --- a/drivers/watchdog/nv_tco.c +++ b/drivers/watchdog/nv_tco.c @@ -458,7 +458,15 @@ static int __devexit nv_tco_remove(struct platform_device *dev) static void nv_tco_shutdown(struct platform_device *dev) { + u32 val; + tco_timer_stop(); + + /* Some BIOSes fail the POST (once) if the NO_REBOOT flag is not + * unset during shutdown. */ + pci_read_config_dword(tco_pci, MCP51_SMBUS_SETUP_B, &val); + val &= ~MCP51_SMBUS_SETUP_B_TCO_REBOOT; + pci_write_config_dword(tco_pci, MCP51_SMBUS_SETUP_B, val); } static struct platform_driver nv_tco_driver = { -- cgit v1.1 From 7abdd34dbf58bf271db7ee6551f2ff72204a83fd Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Sat, 30 Jul 2011 10:18:48 -0400 Subject: watchdog: Cleanup WATCHDOG_CORE help text The newly added WATCHDOG_CORE option is a bool, but the help text suggests it can be built as a module. Fix it up. Signed-off-by: Josh Boyer Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index f441726..86b0735 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -36,9 +36,6 @@ config WATCHDOG_CORE and gives them the /dev/watchdog interface (and later also the sysfs interface). - To compile this driver as a module, choose M here: the module will - be called watchdog. - config WATCHDOG_NOWAYOUT bool "Disable watchdog shutdown on close" help -- cgit v1.1 From 87b7ba3d24a25cf18aece447de27d7804fa9668c Mon Sep 17 00:00:00 2001 From: Vladislav Zolotarov Date: Tue, 2 Aug 2011 01:35:43 -0700 Subject: bnx2x: Prevent restarting Tx during bnx2x_nic_unload Tx queues were stopped before bp->state was changed to a value different from BNX2X_STATE_OPEN, which allowed the bnx2x_tx_int() called from the NAPI context to re-enable it. This then allowed the netdev->ndo_start_xmit() to be called in the middle of the function reset and rings freeing. This patch changes bp->state to a value different from BNX2X_STATE_OPEN BEFORE disabling the Tx queues in order to restore the broken protection against the above race in the bnx2x_tx_int(). Signed-off-by: Vladislav Zolotarov Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_cmn.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c index 5b0dba6..d724a18 100644 --- a/drivers/net/bnx2x/bnx2x_cmn.c +++ b/drivers/net/bnx2x/bnx2x_cmn.c @@ -1989,14 +1989,20 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode) return -EINVAL; } + /* + * It's important to set the bp->state to the value different from + * BNX2X_STATE_OPEN and only then stop the Tx. Otherwise bnx2x_tx_int() + * may restart the Tx from the NAPI context (see bnx2x_tx_int()). + */ + bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT; + smp_mb(); + /* Stop Tx */ bnx2x_tx_disable(bp); #ifdef BCM_CNIC bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD); #endif - bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT; - smp_mb(); bp->rx_mode = BNX2X_RX_MODE_NONE; -- cgit v1.1 From 762a80d9fc9f690a3a35983f3b4619a220650808 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Aug 2011 12:32:00 +0100 Subject: dm snapshot: flush disk cache when merging This patch makes dm-snapshot flush disk cache when writing metadata for merging snapshot. Without cache flushing the disk may reorder metadata write and other data writes and there is a possibility of data corruption in case of power fault. Cc: stable@kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-snap-persistent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 135c2f1..e4ecadf 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -753,7 +753,7 @@ static int persistent_commit_merge(struct dm_exception_store *store, for (i = 0; i < nr_merged; i++) clear_exception(ps, ps->current_committed - 1 - i); - r = area_io(ps, WRITE); + r = area_io(ps, WRITE_FLUSH_FUA); if (r < 0) return r; -- cgit v1.1 From 286f367dad40beb3234a18c17391d03ba939a7f3 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 2 Aug 2011 12:32:00 +0100 Subject: dm mpath: fix potential NULL pointer in feature arg processing Avoid dereferencing a NULL pointer if the number of feature arguments supplied is fewer than indicated. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon Cc: stable@kernel.org --- drivers/md/dm-mpath.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c354701..adf851a 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -807,6 +807,11 @@ static int parse_features(struct arg_set *as, struct multipath *m) if (!argc) return 0; + if (argc > as->argc) { + ti->error = "not enough arguments for features"; + return -EINVAL; + } + do { param_name = shift(as); argc--; -- cgit v1.1 From bb91bc7bacb906c9f3a9b22744c53fa7564b51ba Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Aug 2011 12:32:01 +0100 Subject: dm io: flush cpu cache with vmapped io For normal kernel pages, CPU cache is synchronized by the dma layer. However, this is not done for pages allocated with vmalloc. If we do I/O to/from vmallocated pages, we must synchronize CPU cache explicitly. Prior to doing I/O on vmallocated page we must call flush_kernel_vmap_range to flush dirty cache on the virtual address. After finished read we must call invalidate_kernel_vmap_range to invalidate cache on the virtual address, so that accesses to the virtual address return newly read data and not stale data from CPU cache. This patch fixes metadata corruption on dm-snapshots on PA-RISC and possibly other architectures with caches indexed by virtual address. Cc: stable Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-io.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 2067288..ad2eba4 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -38,6 +38,8 @@ struct io { struct dm_io_client *client; io_notify_fn callback; void *context; + void *vma_invalidate_address; + unsigned long vma_invalidate_size; } __attribute__((aligned(DM_IO_MAX_REGIONS))); static struct kmem_cache *_dm_io_cache; @@ -116,6 +118,10 @@ static void dec_count(struct io *io, unsigned int region, int error) set_bit(region, &io->error_bits); if (atomic_dec_and_test(&io->count)) { + if (io->vma_invalidate_size) + invalidate_kernel_vmap_range(io->vma_invalidate_address, + io->vma_invalidate_size); + if (io->sleeper) wake_up_process(io->sleeper); @@ -159,6 +165,9 @@ struct dpages { unsigned context_u; void *context_ptr; + + void *vma_invalidate_address; + unsigned long vma_invalidate_size; }; /* @@ -377,6 +386,9 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, io->sleeper = current; io->client = client; + io->vma_invalidate_address = dp->vma_invalidate_address; + io->vma_invalidate_size = dp->vma_invalidate_size; + dispatch_io(rw, num_regions, where, dp, io, 1); while (1) { @@ -415,13 +427,21 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, io->callback = fn; io->context = context; + io->vma_invalidate_address = dp->vma_invalidate_address; + io->vma_invalidate_size = dp->vma_invalidate_size; + dispatch_io(rw, num_regions, where, dp, io, 0); return 0; } -static int dp_init(struct dm_io_request *io_req, struct dpages *dp) +static int dp_init(struct dm_io_request *io_req, struct dpages *dp, + unsigned long size) { /* Set up dpages based on memory type */ + + dp->vma_invalidate_address = NULL; + dp->vma_invalidate_size = 0; + switch (io_req->mem.type) { case DM_IO_PAGE_LIST: list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); @@ -432,6 +452,11 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp) break; case DM_IO_VMA: + flush_kernel_vmap_range(io_req->mem.ptr.vma, size); + if ((io_req->bi_rw & RW_MASK) == READ) { + dp->vma_invalidate_address = io_req->mem.ptr.vma; + dp->vma_invalidate_size = size; + } vm_dp_init(dp, io_req->mem.ptr.vma); break; @@ -460,7 +485,7 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions, int r; struct dpages dp; - r = dp_init(io_req, &dp); + r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT); if (r) return r; -- cgit v1.1 From d15b774c2920d55e3d58275c97fbe3adc3afde38 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Tue, 2 Aug 2011 12:32:01 +0100 Subject: dm: fix idr leak on module removal Destroy _minor_idr when unloading the core dm module. (Found by kmemleak.) Cc: stable@kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0cf68b4..41abc6d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -37,6 +37,8 @@ static const char *_name = DM_NAME; static unsigned int major = 0; static unsigned int _major = 0; +static DEFINE_IDR(_minor_idr); + static DEFINE_SPINLOCK(_minor_lock); /* * For bio-based dm. @@ -313,6 +315,12 @@ static void __exit dm_exit(void) while (i--) _exits[i](); + + /* + * Should be empty by this point. + */ + idr_remove_all(&_minor_idr); + idr_destroy(&_minor_idr); } /* @@ -1705,8 +1713,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits) /*----------------------------------------------------------------- * An IDR is used to keep track of allocated minor numbers. *---------------------------------------------------------------*/ -static DEFINE_IDR(_minor_idr); - static void free_minor(int minor) { spin_lock(&_minor_lock); -- cgit v1.1 From 283a8328ca5b987e547848de8ff0e28edcfb9e08 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Tue, 2 Aug 2011 12:32:01 +0100 Subject: dm: suppress endian warnings Suppress sparse warnings about cpu_to_le32() by using __le32 types for on-disk data etc. Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 6 ++-- drivers/md/dm-log.c | 20 ++++++++---- drivers/md/dm-snap-persistent.c | 71 ++++++++++++++++++++++------------------- 3 files changed, 54 insertions(+), 43 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index bae6c4e..f540676 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -239,7 +239,7 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, struct dm_crypt_request *dmreq) { memset(iv, 0, cc->iv_size); - *(u32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff); + *(__le32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff); return 0; } @@ -248,7 +248,7 @@ static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv, struct dm_crypt_request *dmreq) { memset(iv, 0, cc->iv_size); - *(u64 *)iv = cpu_to_le64(dmreq->iv_sector); + *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector); return 0; } @@ -415,7 +415,7 @@ static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private; memset(iv, 0, cc->iv_size); - *(u64 *)iv = cpu_to_le64(dmreq->iv_sector); + *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector); crypto_cipher_encrypt_one(essiv_tfm, iv, iv); return 0; diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 948e3f4..5f06fb6 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -197,15 +197,21 @@ EXPORT_SYMBOL(dm_dirty_log_destroy); #define MIRROR_DISK_VERSION 2 #define LOG_OFFSET 2 -struct log_header { - uint32_t magic; +struct log_header_disk { + __le32 magic; /* * Simple, incrementing version. no backward * compatibility. */ + __le32 version; + __le64 nr_regions; +} __packed; + +struct log_header_core { + uint32_t magic; uint32_t version; - sector_t nr_regions; + uint64_t nr_regions; }; struct log_c { @@ -239,10 +245,10 @@ struct log_c { int log_dev_failed; int log_dev_flush_failed; struct dm_dev *log_dev; - struct log_header header; + struct log_header_core header; struct dm_io_region header_location; - struct log_header *disk_header; + struct log_header_disk *disk_header; }; /* @@ -271,14 +277,14 @@ static inline void log_clear_bit(struct log_c *l, /*---------------------------------------------------------------- * Header IO *--------------------------------------------------------------*/ -static void header_to_disk(struct log_header *core, struct log_header *disk) +static void header_to_disk(struct log_header_core *core, struct log_header_disk *disk) { disk->magic = cpu_to_le32(core->magic); disk->version = cpu_to_le32(core->version); disk->nr_regions = cpu_to_le64(core->nr_regions); } -static void header_from_disk(struct log_header *core, struct log_header *disk) +static void header_from_disk(struct log_header_core *core, struct log_header_disk *disk) { core->magic = le32_to_cpu(disk->magic); core->version = le32_to_cpu(disk->version); diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index e4ecadf..39becbe 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -58,25 +58,30 @@ #define NUM_SNAPSHOT_HDR_CHUNKS 1 struct disk_header { - uint32_t magic; + __le32 magic; /* * Is this snapshot valid. There is no way of recovering * an invalid snapshot. */ - uint32_t valid; + __le32 valid; /* * Simple, incrementing version. no backward * compatibility. */ - uint32_t version; + __le32 version; /* In sectors */ - uint32_t chunk_size; -}; + __le32 chunk_size; +} __packed; struct disk_exception { + __le64 old_chunk; + __le64 new_chunk; +} __packed; + +struct core_exception { uint64_t old_chunk; uint64_t new_chunk; }; @@ -396,32 +401,32 @@ static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) } static void read_exception(struct pstore *ps, - uint32_t index, struct disk_exception *result) + uint32_t index, struct core_exception *result) { - struct disk_exception *e = get_exception(ps, index); + struct disk_exception *de = get_exception(ps, index); /* copy it */ - result->old_chunk = le64_to_cpu(e->old_chunk); - result->new_chunk = le64_to_cpu(e->new_chunk); + result->old_chunk = le64_to_cpu(de->old_chunk); + result->new_chunk = le64_to_cpu(de->new_chunk); } static void write_exception(struct pstore *ps, - uint32_t index, struct disk_exception *de) + uint32_t index, struct core_exception *e) { - struct disk_exception *e = get_exception(ps, index); + struct disk_exception *de = get_exception(ps, index); /* copy it */ - e->old_chunk = cpu_to_le64(de->old_chunk); - e->new_chunk = cpu_to_le64(de->new_chunk); + de->old_chunk = cpu_to_le64(e->old_chunk); + de->new_chunk = cpu_to_le64(e->new_chunk); } static void clear_exception(struct pstore *ps, uint32_t index) { - struct disk_exception *e = get_exception(ps, index); + struct disk_exception *de = get_exception(ps, index); /* clear it */ - e->old_chunk = 0; - e->new_chunk = 0; + de->old_chunk = 0; + de->new_chunk = 0; } /* @@ -437,13 +442,13 @@ static int insert_exceptions(struct pstore *ps, { int r; unsigned int i; - struct disk_exception de; + struct core_exception e; /* presume the area is full */ *full = 1; for (i = 0; i < ps->exceptions_per_area; i++) { - read_exception(ps, i, &de); + read_exception(ps, i, &e); /* * If the new_chunk is pointing at the start of @@ -451,7 +456,7 @@ static int insert_exceptions(struct pstore *ps, * is we know that we've hit the end of the * exceptions. Therefore the area is not full. */ - if (de.new_chunk == 0LL) { + if (e.new_chunk == 0LL) { ps->current_committed = i; *full = 0; break; @@ -460,13 +465,13 @@ static int insert_exceptions(struct pstore *ps, /* * Keep track of the start of the free chunks. */ - if (ps->next_free <= de.new_chunk) - ps->next_free = de.new_chunk + 1; + if (ps->next_free <= e.new_chunk) + ps->next_free = e.new_chunk + 1; /* * Otherwise we add the exception to the snapshot. */ - r = callback(callback_context, de.old_chunk, de.new_chunk); + r = callback(callback_context, e.old_chunk, e.new_chunk); if (r) return r; } @@ -641,12 +646,12 @@ static void persistent_commit_exception(struct dm_exception_store *store, { unsigned int i; struct pstore *ps = get_info(store); - struct disk_exception de; + struct core_exception ce; struct commit_callback *cb; - de.old_chunk = e->old_chunk; - de.new_chunk = e->new_chunk; - write_exception(ps, ps->current_committed++, &de); + ce.old_chunk = e->old_chunk; + ce.new_chunk = e->new_chunk; + write_exception(ps, ps->current_committed++, &ce); /* * Add the callback to the back of the array. This code @@ -701,7 +706,7 @@ static int persistent_prepare_merge(struct dm_exception_store *store, chunk_t *last_new_chunk) { struct pstore *ps = get_info(store); - struct disk_exception de; + struct core_exception ce; int nr_consecutive; int r; @@ -722,9 +727,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store, ps->current_committed = ps->exceptions_per_area; } - read_exception(ps, ps->current_committed - 1, &de); - *last_old_chunk = de.old_chunk; - *last_new_chunk = de.new_chunk; + read_exception(ps, ps->current_committed - 1, &ce); + *last_old_chunk = ce.old_chunk; + *last_new_chunk = ce.new_chunk; /* * Find number of consecutive chunks within the current area, @@ -733,9 +738,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store, for (nr_consecutive = 1; nr_consecutive < ps->current_committed; nr_consecutive++) { read_exception(ps, ps->current_committed - 1 - nr_consecutive, - &de); - if (de.old_chunk != *last_old_chunk - nr_consecutive || - de.new_chunk != *last_new_chunk - nr_consecutive) + &ce); + if (ce.old_chunk != *last_old_chunk - nr_consecutive || + ce.new_chunk != *last_new_chunk - nr_consecutive) break; } -- cgit v1.1 From 936688d7eb0f39be96c5791be1a04994cc8d6aa0 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 2 Aug 2011 12:32:01 +0100 Subject: dm table: fix discard support Remove 'discards_supported' from the dm_table structure. The same information can be easily discovered from the table's target(s) in dm_table_supports_discards(). Before this fix dm_table_supports_discards() would skip checking the individual targets' 'discards_supported' flag if any one target in the table didn't set num_discard_requests > 0. Now the per-target 'discards_supported' flag is effective at insuring the final DM device advertises discard support. But, to be clear, targets that don't support discards (!num_discard_requests) will not receive discard requests. Also DMWARN if a target sets 'discards_supported' override but forgets to set 'num_discard_requests'. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 15 +++++++-------- drivers/md/dm.c | 3 ++- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index bfe9c23..3909fa2 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -54,7 +54,6 @@ struct dm_table { sector_t *highs; struct dm_target *targets; - unsigned discards_supported:1; unsigned integrity_supported:1; /* @@ -209,7 +208,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode, INIT_LIST_HEAD(&t->devices); INIT_LIST_HEAD(&t->target_callbacks); atomic_set(&t->holders, 0); - t->discards_supported = 1; if (!num_targets) num_targets = KEYS_PER_NODE; @@ -791,8 +789,9 @@ int dm_table_add_target(struct dm_table *t, const char *type, t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; - if (!tgt->num_discard_requests) - t->discards_supported = 0; + if (!tgt->num_discard_requests && tgt->discards_supported) + DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.", + dm_device_name(t->md), type); return 0; @@ -1359,19 +1358,19 @@ bool dm_table_supports_discards(struct dm_table *t) struct dm_target *ti; unsigned i = 0; - if (!t->discards_supported) - return 0; - /* * Unless any target used by the table set discards_supported, * require at least one underlying device to support discards. * t->devices includes internal dm devices such as mirror logs * so we need to use iterate_devices here, which targets - * supporting discard must provide. + * supporting discard selectively must provide. */ while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); + if (!ti->num_discard_requests) + continue; + if (ti->discards_supported) return 1; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 41abc6d..aeb0fa1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1179,7 +1179,8 @@ static int __clone_and_map_discard(struct clone_info *ci) /* * Even though the device advertised discard support, - * reconfiguration might have changed that since the + * that does not mean every target supports it, and + * reconfiguration might also have changed that since the * check was performed. */ if (!ti->num_discard_requests) -- cgit v1.1 From c8f543e0786785d8c7118fc2878e42bc0193a88d Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 2 Aug 2011 12:32:01 +0100 Subject: dm log: clean up bit little endian bitops Using __test_and_{set,clear}_bit_le() with ignoring its return value can be replaced with __{set,clear}_bit_le(). This also removes unnecessary casts. Signed-off-by: Akinobu Mita Signed-off-by: Alasdair G Kergon --- drivers/md/dm-log.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 5f06fb6..306ce12 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -257,20 +257,20 @@ struct log_c { */ static inline int log_test_bit(uint32_t *bs, unsigned bit) { - return test_bit_le(bit, (unsigned long *) bs) ? 1 : 0; + return test_bit_le(bit, bs) ? 1 : 0; } static inline void log_set_bit(struct log_c *l, uint32_t *bs, unsigned bit) { - __test_and_set_bit_le(bit, (unsigned long *) bs); + __set_bit_le(bit, bs); l->touched_cleaned = 1; } static inline void log_clear_bit(struct log_c *l, uint32_t *bs, unsigned bit) { - __test_and_clear_bit_le(bit, (unsigned long *) bs); + __clear_bit_le(bit, bs); l->touched_dirtied = 1; } @@ -745,8 +745,7 @@ static int core_get_resync_work(struct dm_dirty_log *log, region_t *region) return 0; do { - *region = find_next_zero_bit_le( - (unsigned long *) lc->sync_bits, + *region = find_next_zero_bit_le(lc->sync_bits, lc->region_count, lc->sync_search); lc->sync_search = *region + 1; -- cgit v1.1 From 6c9b27ab08aaf46426515b8b858ad9c60731c7a1 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 2 Aug 2011 12:32:02 +0100 Subject: dm log: userspace use list_move Replace list_del() followed by list_add() with list_move(). Signed-off-by: Kirill A. Shutemov Signed-off-by: Alasdair G Kergon --- drivers/md/dm-log-userspace-base.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index aa2e0c3..1021c89 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c @@ -394,8 +394,7 @@ static int flush_by_group(struct log_c *lc, struct list_head *flush_list) group[count] = fe->region; count++; - list_del(&fe->list); - list_add(&fe->list, &tmp_list); + list_move(&fe->list, &tmp_list); type = fe->type; if (count >= MAX_FLUSH_GROUP_COUNT) -- cgit v1.1 From e29e65aacbd9e628378084905cbcf62a9fa4a8cc Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 2 Aug 2011 12:32:02 +0100 Subject: dm: use vzalloc Use vzalloc() instead of vmalloc()+memset(). Signed-off-by: Joe Perches Signed-off-by: Alasdair G Kergon --- drivers/md/dm-log.c | 3 +-- drivers/md/dm-snap-persistent.c | 3 +-- drivers/md/dm-table.c | 4 +--- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 306ce12..3b52bb7 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -492,7 +492,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size); lc->sync_count = (sync == NOSYNC) ? region_count : 0; - lc->recovering_bits = vmalloc(bitset_size); + lc->recovering_bits = vzalloc(bitset_size); if (!lc->recovering_bits) { DMWARN("couldn't allocate sync bitset"); vfree(lc->sync_bits); @@ -504,7 +504,6 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, kfree(lc); return -ENOMEM; } - memset(lc->recovering_bits, 0, bitset_size); lc->sync_search = 0; log->context = lc; diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 39becbe..1a0acb8 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -174,10 +174,9 @@ static int alloc_area(struct pstore *ps) if (!ps->area) goto err_area; - ps->zero_area = vmalloc(len); + ps->zero_area = vzalloc(len); if (!ps->zero_area) goto err_zero_area; - memset(ps->zero_area, 0, len); ps->header_area = vmalloc(len); if (!ps->header_area) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 3909fa2..8dc6755 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -153,9 +153,7 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size) return NULL; size = nmemb * elem_size; - addr = vmalloc(size); - if (addr) - memset(addr, 0, size); + addr = vzalloc(size); return addr; } -- cgit v1.1 From 4622afb3f50e03ce6da42002e7ed3675dfafc187 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Aug 2011 12:32:02 +0100 Subject: dm kcopyd: remove offset field from job structure The offset field in struct kcopyd_job is always zero so remove it. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-kcopyd.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 320401d..e7926fa 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -224,7 +224,6 @@ struct kcopyd_job { unsigned int num_dests; struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS]; - sector_t offset; unsigned int nr_pages; struct page_list *pages; @@ -380,7 +379,7 @@ static int run_io_job(struct kcopyd_job *job) .bi_rw = job->rw, .mem.type = DM_IO_PAGE_LIST, .mem.ptr.pl = job->pages, - .mem.offset = job->offset, + .mem.offset = 0, .notify.fn = complete_io, .notify.context = job, .client = job->kc->io_client, @@ -398,8 +397,7 @@ static int run_pages_job(struct kcopyd_job *job) { int r; - job->nr_pages = dm_div_up(job->dests[0].count + job->offset, - PAGE_SIZE >> 9); + job->nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9); r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages); if (!r) { /* this job is ready for io */ @@ -602,7 +600,6 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->num_dests = num_dests; memcpy(&job->dests, dests, sizeof(*dests) * num_dests); - job->offset = 0; job->nr_pages = 0; job->pages = NULL; -- cgit v1.1 From 5bf45a3dcdba9ff43959f7b5b44523fab254c19c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Aug 2011 12:32:02 +0100 Subject: dm kcopyd: remove nr_pages field from job structure The nr_pages field in struct kcopyd_job is only used temporarily in run_pages_job() to count the number of required pages. We can use a local variable instead. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-kcopyd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index e7926fa..98725e1 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -224,7 +224,6 @@ struct kcopyd_job { unsigned int num_dests; struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS]; - unsigned int nr_pages; struct page_list *pages; /* @@ -396,9 +395,9 @@ static int run_io_job(struct kcopyd_job *job) static int run_pages_job(struct kcopyd_job *job) { int r; + unsigned nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9); - job->nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9); - r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages); + r = kcopyd_get_pages(job->kc, nr_pages, &job->pages); if (!r) { /* this job is ready for io */ push(&job->kc->io_jobs, job); @@ -600,7 +599,6 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->num_dests = num_dests; memcpy(&job->dests, dests, sizeof(*dests) * num_dests); - job->nr_pages = 0; job->pages = NULL; job->fn = fn; -- cgit v1.1 From aa3f0794d279cd154ac100f92ff3904ea1f56de2 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Aug 2011 12:32:03 +0100 Subject: dm snapshot: remove unused definitions Remove a couple of unused #defines. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-snap.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 9ecff5f..9d6daa0 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -30,16 +30,6 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge"; ((ti)->type->name == dm_snapshot_merge_target_name) /* - * The percentage increment we will wake up users at - */ -#define WAKE_UP_PERCENT 5 - -/* - * kcopyd priority of snapshot operations - */ -#define SNAPSHOT_COPY_PRIORITY 2 - -/* * The size of the mempool used to track chunks in use. */ #define MIN_IOS 256 -- cgit v1.1 From a2d2b0345a0f30c169b7d08b8cebdd4853fcb0f8 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Tue, 2 Aug 2011 12:32:03 +0100 Subject: dm snapshot: style cleanups Coding style cleanups. Signed-off-by: Alasdair G Kergon Signed-off-by: Jonathan Brassow --- drivers/md/dm-snap-persistent.c | 4 ++-- drivers/md/dm-snap.c | 14 ++++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 1a0acb8..d1f1d70 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -567,7 +567,7 @@ static int persistent_read_metadata(struct dm_exception_store *store, ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) / sizeof(struct disk_exception); ps->callbacks = dm_vcalloc(ps->exceptions_per_area, - sizeof(*ps->callbacks)); + sizeof(*ps->callbacks)); if (!ps->callbacks) return -ENOMEM; @@ -674,7 +674,7 @@ static void persistent_commit_exception(struct dm_exception_store *store, * If we completely filled the current area, then wipe the next one. */ if ((ps->current_committed == ps->exceptions_per_area) && - zero_disk_area(ps, ps->current_area + 1)) + zero_disk_area(ps, ps->current_area + 1)) ps->valid = 0; /* diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 9d6daa0..94dee05 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1045,8 +1045,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) { - ti->error = "Cannot allocate snapshot context private " - "structure"; + ti->error = "Cannot allocate private snapshot structure"; r = -ENOMEM; goto bad; } @@ -1405,7 +1404,7 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) */ dm_insert_exception(&s->complete, e); - out: +out: dm_remove_exception(&pe->e); snapshot_bios = bio_list_get(&pe->snapshot_bios); origin_bios = bio_list_get(&pe->origin_bios); @@ -1470,8 +1469,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) dest.count = src.count; /* Hand over to kcopyd */ - dm_kcopyd_copy(s->kcopyd_client, - &src, 1, &dest, 0, copy_callback, pe); + dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } static struct dm_snap_pending_exception * @@ -1618,9 +1616,9 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, map_context->ptr = track_chunk(s, chunk); } - out_unlock: +out_unlock: up_write(&s->lock); - out: +out: return r; } @@ -1964,7 +1962,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, pe_to_start_now = pe; } - next_snapshot: +next_snapshot: up_write(&snap->lock); if (pe_to_start_now) { -- cgit v1.1 From 13c87583ea4e867211fc3e7edab750c353c47c95 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Tue, 2 Aug 2011 12:32:03 +0100 Subject: dm raid: cleanup parameter handling Re-order the parameters so they are handled consistently in the same order where defined, parsed and output. Only include rebuild parameters in the STATUSTYPE_TABLE output if they were supplied in the original table line. Correct the parameter count when outputting rebuild: there are two words, not one. Use case-independent checks for keywords (as in other device-mapper targets). Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index e5d8904..efa960f 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -43,13 +43,14 @@ struct raid_dev { /* * Flags for rs->print_flags field. */ -#define DMPF_DAEMON_SLEEP 0x1 -#define DMPF_MAX_WRITE_BEHIND 0x2 -#define DMPF_SYNC 0x4 -#define DMPF_NOSYNC 0x8 -#define DMPF_STRIPE_CACHE 0x10 -#define DMPF_MIN_RECOVERY_RATE 0x20 -#define DMPF_MAX_RECOVERY_RATE 0x40 +#define DMPF_SYNC 0x1 +#define DMPF_NOSYNC 0x2 +#define DMPF_REBUILD 0x4 +#define DMPF_DAEMON_SLEEP 0x8 +#define DMPF_MIN_RECOVERY_RATE 0x10 +#define DMPF_MAX_RECOVERY_RATE 0x20 +#define DMPF_MAX_WRITE_BEHIND 0x40 +#define DMPF_STRIPE_CACHE 0x80 struct raid_set { struct dm_target *ti; @@ -275,13 +276,13 @@ static int parse_raid_params(struct raid_set *rs, char **argv, set_bit(In_sync, &rs->dev[i].rdev.flags); for (i = 0; i < num_raid_params; i++) { - if (!strcmp(argv[i], "nosync")) { + if (!strcasecmp(argv[i], "nosync")) { rs->md.recovery_cp = MaxSector; rs->print_flags |= DMPF_NOSYNC; rs->md.flags |= MD_SYNC_STATE_FORCED; continue; } - if (!strcmp(argv[i], "sync")) { + if (!strcasecmp(argv[i], "sync")) { rs->md.recovery_cp = 0; rs->print_flags |= DMPF_SYNC; rs->md.flags |= MD_SYNC_STATE_FORCED; @@ -300,7 +301,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, return -EINVAL; } - if (!strcmp(key, "rebuild")) { + if (!strcasecmp(key, "rebuild")) { if (++rebuild_cnt > rs->raid_type->parity_devs) { rs->ti->error = "Too many rebuild drives given"; return -EINVAL; @@ -311,7 +312,8 @@ static int parse_raid_params(struct raid_set *rs, char **argv, } clear_bit(In_sync, &rs->dev[value].rdev.flags); rs->dev[value].rdev.recovery_offset = 0; - } else if (!strcmp(key, "max_write_behind")) { + rs->print_flags |= DMPF_REBUILD; + } else if (!strcasecmp(key, "max_write_behind")) { rs->print_flags |= DMPF_MAX_WRITE_BEHIND; /* @@ -324,14 +326,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv, return -EINVAL; } rs->md.bitmap_info.max_write_behind = value; - } else if (!strcmp(key, "daemon_sleep")) { + } else if (!strcasecmp(key, "daemon_sleep")) { rs->print_flags |= DMPF_DAEMON_SLEEP; if (!value || (value > MAX_SCHEDULE_TIMEOUT)) { rs->ti->error = "daemon sleep period out of range"; return -EINVAL; } rs->md.bitmap_info.daemon_sleep = value; - } else if (!strcmp(key, "stripe_cache")) { + } else if (!strcasecmp(key, "stripe_cache")) { rs->print_flags |= DMPF_STRIPE_CACHE; /* @@ -348,14 +350,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv, rs->ti->error = "Bad stripe_cache size"; return -EINVAL; } - } else if (!strcmp(key, "min_recovery_rate")) { + } else if (!strcasecmp(key, "min_recovery_rate")) { rs->print_flags |= DMPF_MIN_RECOVERY_RATE; if (value > INT_MAX) { rs->ti->error = "min_recovery_rate out of range"; return -EINVAL; } rs->md.sync_speed_min = (int)value; - } else if (!strcmp(key, "max_recovery_rate")) { + } else if (!strcasecmp(key, "max_recovery_rate")) { rs->print_flags |= DMPF_MAX_RECOVERY_RATE; if (value > INT_MAX) { rs->ti->error = "max_recovery_rate out of range"; @@ -547,11 +549,12 @@ static int raid_status(struct dm_target *ti, status_type_t type, case STATUSTYPE_TABLE: /* The string you would use to construct this array */ for (i = 0; i < rs->md.raid_disks; i++) - if (rs->dev[i].data_dev && + if ((rs->print_flags & DMPF_REBUILD) && + rs->dev[i].data_dev && !test_bit(In_sync, &rs->dev[i].rdev.flags)) - raid_param_cnt++; /* for rebuilds */ + raid_param_cnt += 2; /* for rebuilds */ - raid_param_cnt += (hweight64(rs->print_flags) * 2); + raid_param_cnt += (hweight64(rs->print_flags & ~DMPF_REBUILD) * 2); if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC)) raid_param_cnt--; @@ -565,7 +568,8 @@ static int raid_status(struct dm_target *ti, status_type_t type, DMEMIT(" nosync"); for (i = 0; i < rs->md.raid_disks; i++) - if (rs->dev[i].data_dev && + if ((rs->print_flags & DMPF_REBUILD) && + rs->dev[i].data_dev && !test_bit(In_sync, &rs->dev[i].rdev.flags)) DMEMIT(" rebuild %u", i); -- cgit v1.1 From 2ca4c92f58f9386e080b26f9ccd78c9ca5825a42 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Tue, 2 Aug 2011 12:32:03 +0100 Subject: dm ioctl: prevent empty message Detect invalid empty messages in core dm instead of requiring every target to check this. Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 4cacdad..1622a6b 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1402,6 +1402,11 @@ static int target_message(struct dm_ioctl *param, size_t param_size) goto out; } + if (!argc) { + DMWARN("Empty message received."); + goto out; + } + table = dm_get_live_table(md); if (!table) goto out_argv; -- cgit v1.1 From 3e8dbb7f3966c80d77272f8b4f430babc99f6595 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Tue, 2 Aug 2011 12:32:03 +0100 Subject: dm raid: tidy includes A dm target only needs to use include/linux dm headers. Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index efa960f..d4e95b2 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -9,9 +9,10 @@ #include "md.h" #include "raid5.h" -#include "dm.h" #include "bitmap.h" +#include + #define DM_MSG_PREFIX "raid" /* -- cgit v1.1 From 08649012545cfb116798260352547cf4d47064ec Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 2 Aug 2011 12:32:04 +0100 Subject: dm table: clean dm_get_device and move exports There is no need for __table_get_device to be factored out. Also move the exports to the end of their respective functions. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 8dc6755..d58b200 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -157,6 +157,7 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size) return addr; } +EXPORT_SYMBOL(dm_vcalloc); /* * highs, and targets are managed as dynamic arrays during a @@ -277,6 +278,7 @@ void dm_table_get(struct dm_table *t) { atomic_inc(&t->holders); } +EXPORT_SYMBOL(dm_table_get); void dm_table_put(struct dm_table *t) { @@ -286,6 +288,7 @@ void dm_table_put(struct dm_table *t) smp_mb__before_atomic_dec(); atomic_dec(&t->holders); } +EXPORT_SYMBOL(dm_table_put); /* * Checks to see if we need to extend highs or targets. @@ -451,13 +454,14 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, * Add a device to the list, or just increment the usage count if * it's already present. */ -static int __table_get_device(struct dm_table *t, struct dm_target *ti, - const char *path, fmode_t mode, struct dm_dev **result) +int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, + struct dm_dev **result) { int r; dev_t uninitialized_var(dev); struct dm_dev_internal *dd; unsigned int major, minor; + struct dm_table *t = ti->table; BUG_ON(!t); @@ -505,6 +509,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, *result = &dd->dm_dev; return 0; } +EXPORT_SYMBOL(dm_get_device); int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) @@ -543,15 +548,8 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, } EXPORT_SYMBOL_GPL(dm_set_device_limits); -int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, - struct dm_dev **result) -{ - return __table_get_device(ti->table, ti, path, mode, result); -} - - /* - * Decrement a devices use count and remove it if necessary. + * Decrement a device's use count and remove it if necessary. */ void dm_put_device(struct dm_target *ti, struct dm_dev *d) { @@ -564,6 +562,7 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d) kfree(dd); } } +EXPORT_SYMBOL(dm_put_device); /* * Checks to see if the target joins onto the end of the table. @@ -1074,11 +1073,13 @@ void dm_table_event(struct dm_table *t) t->event_fn(t->event_context); mutex_unlock(&_event_lock); } +EXPORT_SYMBOL(dm_table_event); sector_t dm_table_get_size(struct dm_table *t) { return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; } +EXPORT_SYMBOL(dm_table_get_size); struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index) { @@ -1234,6 +1235,7 @@ fmode_t dm_table_get_mode(struct dm_table *t) { return t->mode; } +EXPORT_SYMBOL(dm_table_get_mode); static void suspend_targets(struct dm_table *t, unsigned postsuspend) { @@ -1342,6 +1344,7 @@ struct mapped_device *dm_table_get_md(struct dm_table *t) { return t->md; } +EXPORT_SYMBOL(dm_table_get_md); static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) @@ -1379,13 +1382,3 @@ bool dm_table_supports_discards(struct dm_table *t) return 0; } - -EXPORT_SYMBOL(dm_vcalloc); -EXPORT_SYMBOL(dm_get_device); -EXPORT_SYMBOL(dm_put_device); -EXPORT_SYMBOL(dm_table_event); -EXPORT_SYMBOL(dm_table_get_size); -EXPORT_SYMBOL(dm_table_get_mode); -EXPORT_SYMBOL(dm_table_get_md); -EXPORT_SYMBOL(dm_table_put); -EXPORT_SYMBOL(dm_table_get); -- cgit v1.1 From d5b9dd04bd74b774b8e8d93ced7a0d15ad403fa9 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Aug 2011 12:32:04 +0100 Subject: dm: ignore merge_bvec for snapshots when safe Add a new flag DMF_MERGE_IS_OPTIONAL to struct mapped_device to indicate whether the device can accept bios larger than the size its merge function returns. When set, use this to send large bios to snapshots which can split them if necessary. Snapshot I/O may be significantly fragmented and this approach seems to improve peformance. Before the patch, dm_set_device_limits restricted bio size to page size if the underlying device had a merge function and the target didn't provide a merge function. After the patch, dm_set_device_limits restricts bio size to page size if the underlying device has a merge function, doesn't have DMF_MERGE_IS_OPTIONAL flag and the target doesn't provide a merge function. The snapshot target can't provide a merge function because when the merge function is called, it is impossible to determine where the bio will be remapped. Previously this led us to impose a 4k limit, which we can now remove if the snapshot store is located on a device without a merge function. Together with another patch for optimizing full chunk writes, it improves performance from 29MB/s to 40MB/s when writing to the filesystem on snapshot store. If the snapshot store is placed on a non-dm device with a merge function (such as md-raid), device mapper still limits all bios to page size. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 3 +-- drivers/md/dm.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/md/dm.h | 2 ++ 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index d58b200..03516c7 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -540,8 +540,7 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, * If not we'll force DM to use PAGE_SIZE or * smaller I/O, just to be safe. */ - - if (q->merge_bvec_fn && !ti->type->merge) + if (dm_queue_merge_is_compulsory(q) && !ti->type->merge) blk_limits_max_hw_sectors(limits, (unsigned int) (PAGE_SIZE >> 9)); return 0; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index aeb0fa1..1000eaf 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -111,6 +111,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); #define DMF_FREEING 3 #define DMF_DELETING 4 #define DMF_NOFLUSH_SUSPENDING 5 +#define DMF_MERGE_IS_OPTIONAL 6 /* * Work processed by per-device workqueue. @@ -1993,6 +1994,59 @@ static void __set_size(struct mapped_device *md, sector_t size) } /* + * Return 1 if the queue has a compulsory merge_bvec_fn function. + * + * If this function returns 0, then the device is either a non-dm + * device without a merge_bvec_fn, or it is a dm device that is + * able to split any bios it receives that are too big. + */ +int dm_queue_merge_is_compulsory(struct request_queue *q) +{ + struct mapped_device *dev_md; + + if (!q->merge_bvec_fn) + return 0; + + if (q->make_request_fn == dm_request) { + dev_md = q->queuedata; + if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags)) + return 0; + } + + return 1; +} + +static int dm_device_merge_is_compulsory(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + struct block_device *bdev = dev->bdev; + struct request_queue *q = bdev_get_queue(bdev); + + return dm_queue_merge_is_compulsory(q); +} + +/* + * Return 1 if it is acceptable to ignore merge_bvec_fn based + * on the properties of the underlying devices. + */ +static int dm_table_merge_is_optional(struct dm_table *table) +{ + unsigned i = 0; + struct dm_target *ti; + + while (i < dm_table_get_num_targets(table)) { + ti = dm_table_get_target(table, i++); + + if (ti->type->iterate_devices && + ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL)) + return 0; + } + + return 1; +} + +/* * Returns old map, which caller must destroy. */ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, @@ -2002,6 +2056,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, struct request_queue *q = md->queue; sector_t size; unsigned long flags; + int merge_is_optional; size = dm_table_get_size(t); @@ -2027,10 +2082,16 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, __bind_mempools(md, t); + merge_is_optional = dm_table_merge_is_optional(t); + write_lock_irqsave(&md->map_lock, flags); old_map = md->map; md->map = t; dm_table_set_restrictions(t, q, limits); + if (merge_is_optional) + set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); + else + clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); write_unlock_irqrestore(&md->map_lock, flags); return old_map; diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 1aaf167..6745dbd 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -66,6 +66,8 @@ int dm_table_alloc_md_mempools(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); +int dm_queue_merge_is_compulsory(struct request_queue *q); + void dm_lock_md_type(struct mapped_device *md); void dm_unlock_md_type(struct mapped_device *md); void dm_set_md_type(struct mapped_device *md, unsigned type); -- cgit v1.1 From a6e50b409d3f9e0833e69c3c9cca822e8fa4adbb Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Aug 2011 12:32:04 +0100 Subject: dm snapshot: skip reading origin when overwriting complete chunk If we write a full chunk in the snapshot, skip reading the origin device because the whole chunk will be overwritten anyway. This patch changes the snapshot write logic when a full chunk is written. In this case: 1. allocate the exception 2. dispatch the bio (but don't report the bio completion to device mapper) 3. write the exception record 4. report bio completed Callbacks must be done through the kcopyd thread, because callbacks must not race with each other. So we create two new functions: dm_kcopyd_prepare_callback: allocate a job structure and prepare the callback. (This function must not be called from interrupt context.) dm_kcopyd_do_callback: submit callback. (This function may be called from interrupt context.) Performance test (on snapshots with 4k chunk size): without the patch: non-direct-io sequential write (dd): 17.7MB/s direct-io sequential write (dd): 20.9MB/s non-direct-io random write (mkfs.ext2): 0.44s with the patch: non-direct-io sequential write (dd): 26.5MB/s direct-io sequential write (dd): 33.2MB/s non-direct-io random write (mkfs.ext2): 0.27s Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-kcopyd.c | 31 ++++++++++++++++++++++++ drivers/md/dm-snap.c | 60 ++++++++++++++++++++++++++++++++++++++++++++--- include/linux/dm-kcopyd.h | 15 ++++++++++++ 3 files changed, 103 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 98725e1..f821470 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -617,6 +617,37 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, } EXPORT_SYMBOL(dm_kcopyd_copy); +void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, + dm_kcopyd_notify_fn fn, void *context) +{ + struct kcopyd_job *job; + + job = mempool_alloc(kc->job_pool, GFP_NOIO); + + memset(job, 0, sizeof(struct kcopyd_job)); + job->kc = kc; + job->fn = fn; + job->context = context; + + atomic_inc(&kc->nr_jobs); + + return job; +} +EXPORT_SYMBOL(dm_kcopyd_prepare_callback); + +void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err) +{ + struct kcopyd_job *job = j; + struct dm_kcopyd_client *kc = job->kc; + + job->read_err = read_err; + job->write_err = write_err; + + push(&kc->complete_jobs, job); + wake(kc); +} +EXPORT_SYMBOL(dm_kcopyd_do_callback); + /* * Cancels a kcopyd job, eg. someone might be deactivating a * mirror. diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 94dee05..6f75887 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -170,6 +170,13 @@ struct dm_snap_pending_exception { * kcopyd. */ int started; + + /* + * For writing a complete chunk, bypassing the copy. + */ + struct bio *full_bio; + bio_end_io_t *full_bio_end_io; + void *full_bio_private; }; /* @@ -1369,6 +1376,7 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) struct dm_snapshot *s = pe->snap; struct bio *origin_bios = NULL; struct bio *snapshot_bios = NULL; + struct bio *full_bio = NULL; int error = 0; if (!success) { @@ -1408,6 +1416,11 @@ out: dm_remove_exception(&pe->e); snapshot_bios = bio_list_get(&pe->snapshot_bios); origin_bios = bio_list_get(&pe->origin_bios); + full_bio = pe->full_bio; + if (full_bio) { + full_bio->bi_end_io = pe->full_bio_end_io; + full_bio->bi_private = pe->full_bio_private; + } free_pending_exception(pe); increment_pending_exceptions_done_count(); @@ -1415,10 +1428,15 @@ out: up_write(&s->lock); /* Submit any pending write bios */ - if (error) + if (error) { + if (full_bio) + bio_io_error(full_bio); error_bios(snapshot_bios); - else + } else { + if (full_bio) + bio_endio(full_bio, 0); flush_bios(snapshot_bios); + } retry_origin_bios(s, origin_bios); } @@ -1472,6 +1490,32 @@ static void start_copy(struct dm_snap_pending_exception *pe) dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } +static void full_bio_end_io(struct bio *bio, int error) +{ + void *callback_data = bio->bi_private; + + dm_kcopyd_do_callback(callback_data, 0, error ? 1 : 0); +} + +static void start_full_bio(struct dm_snap_pending_exception *pe, + struct bio *bio) +{ + struct dm_snapshot *s = pe->snap; + void *callback_data; + + pe->full_bio = bio; + pe->full_bio_end_io = bio->bi_end_io; + pe->full_bio_private = bio->bi_private; + + callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, + copy_callback, pe); + + bio->bi_end_io = full_bio_end_io; + bio->bi_private = callback_data; + + generic_make_request(bio); +} + static struct dm_snap_pending_exception * __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk) { @@ -1507,6 +1551,7 @@ __find_pending_exception(struct dm_snapshot *s, bio_list_init(&pe->origin_bios); bio_list_init(&pe->snapshot_bios); pe->started = 0; + pe->full_bio = NULL; if (s->store->type->prepare_exception(s->store, &pe->e)) { free_pending_exception(pe); @@ -1600,10 +1645,19 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, } remap_exception(s, &pe->e, bio, chunk); - bio_list_add(&pe->snapshot_bios, bio); r = DM_MAPIO_SUBMITTED; + if (!pe->started && + bio->bi_size == (s->store->chunk_size << SECTOR_SHIFT)) { + pe->started = 1; + up_write(&s->lock); + start_full_bio(pe, bio); + goto out; + } + + bio_list_add(&pe->snapshot_bios, bio); + if (!pe->started) { /* this is protected by snap->lock */ pe->started = 1; diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index 298d587..5e54458 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -42,5 +42,20 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, unsigned num_dests, struct dm_io_region *dests, unsigned flags, dm_kcopyd_notify_fn fn, void *context); +/* + * Prepare a callback and submit it via the kcopyd thread. + * + * dm_kcopyd_prepare_callback allocates a callback structure and returns it. + * It must not be called from interrupt context. + * The returned value should be passed into dm_kcopyd_do_callback. + * + * dm_kcopyd_do_callback submits the callback. + * It may be called from interrupt context. + * The callback is issued from the kcopyd thread. + */ +void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, + dm_kcopyd_notify_fn fn, void *context); +void dm_kcopyd_do_callback(void *job, int read_err, unsigned long write_err); + #endif /* __KERNEL__ */ #endif /* _LINUX_DM_KCOPYD_H */ -- cgit v1.1 From 498f0103ea13123e007660def9072a0b7dd1c599 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 2 Aug 2011 12:32:04 +0100 Subject: dm table: share target argument parsing functions Move multipath target argument parsing code into dm-table so other targets can share it. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 7 +- drivers/md/dm-mpath.c | 152 ++++++++++++------------------------------ drivers/md/dm-table.c | 57 ++++++++++++++++ include/linux/device-mapper.h | 43 ++++++++++++ 4 files changed, 147 insertions(+), 112 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f540676..b79e747 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -30,7 +30,6 @@ #include #define DM_MSG_PREFIX "crypt" -#define MESG_STR(x) x, sizeof(x) /* * context holding the current state of a multi-part conversion @@ -1770,12 +1769,12 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) if (argc < 2) goto error; - if (!strnicmp(argv[0], MESG_STR("key"))) { + if (!strcasecmp(argv[0], "key")) { if (!test_bit(DM_CRYPT_SUSPENDED, &cc->flags)) { DMWARN("not suspended during key manipulation."); return -EINVAL; } - if (argc == 3 && !strnicmp(argv[1], MESG_STR("set"))) { + if (argc == 3 && !strcasecmp(argv[1], "set")) { ret = crypt_set_key(cc, argv[2]); if (ret) return ret; @@ -1783,7 +1782,7 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) ret = cc->iv_gen_ops->init(cc); return ret; } - if (argc == 2 && !strnicmp(argv[1], MESG_STR("wipe"))) { + if (argc == 2 && !strcasecmp(argv[1], "wipe")) { if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) { ret = cc->iv_gen_ops->wipe(cc); if (ret) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index adf851a..5e0090e 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -22,7 +22,6 @@ #include #define DM_MSG_PREFIX "multipath" -#define MESG_STR(x) x, sizeof(x) #define DM_PG_INIT_DELAY_MSECS 2000 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) @@ -505,80 +504,29 @@ static void trigger_event(struct work_struct *work) * <#paths> <#per-path selector args> * [ []* ]+ ]+ *---------------------------------------------------------------*/ -struct param { - unsigned min; - unsigned max; - char *error; -}; - -static int read_param(struct param *param, char *str, unsigned *v, char **error) -{ - if (!str || - (sscanf(str, "%u", v) != 1) || - (*v < param->min) || - (*v > param->max)) { - *error = param->error; - return -EINVAL; - } - - return 0; -} - -struct arg_set { - unsigned argc; - char **argv; -}; - -static char *shift(struct arg_set *as) -{ - char *r; - - if (as->argc) { - as->argc--; - r = *as->argv; - as->argv++; - return r; - } - - return NULL; -} - -static void consume(struct arg_set *as, unsigned n) -{ - BUG_ON (as->argc < n); - as->argc -= n; - as->argv += n; -} - -static int parse_path_selector(struct arg_set *as, struct priority_group *pg, +static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, struct dm_target *ti) { int r; struct path_selector_type *pst; unsigned ps_argc; - static struct param _params[] = { + static struct dm_arg _args[] = { {0, 1024, "invalid number of path selector args"}, }; - pst = dm_get_path_selector(shift(as)); + pst = dm_get_path_selector(dm_shift_arg(as)); if (!pst) { ti->error = "unknown path selector type"; return -EINVAL; } - r = read_param(_params, shift(as), &ps_argc, &ti->error); + r = dm_read_arg_group(_args, as, &ps_argc, &ti->error); if (r) { dm_put_path_selector(pst); return -EINVAL; } - if (ps_argc > as->argc) { - dm_put_path_selector(pst); - ti->error = "not enough arguments for path selector"; - return -EINVAL; - } - r = pst->create(&pg->ps, ps_argc, as->argv); if (r) { dm_put_path_selector(pst); @@ -587,12 +535,12 @@ static int parse_path_selector(struct arg_set *as, struct priority_group *pg, } pg->ps.type = pst; - consume(as, ps_argc); + dm_consume_args(as, ps_argc); return 0; } -static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, +static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps, struct dm_target *ti) { int r; @@ -609,7 +557,7 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, if (!p) return ERR_PTR(-ENOMEM); - r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table), + r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table), &p->path.dev); if (r) { ti->error = "error getting device"; @@ -660,16 +608,16 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, return ERR_PTR(r); } -static struct priority_group *parse_priority_group(struct arg_set *as, +static struct priority_group *parse_priority_group(struct dm_arg_set *as, struct multipath *m) { - static struct param _params[] = { + static struct dm_arg _args[] = { {1, 1024, "invalid number of paths"}, {0, 1024, "invalid number of selector args"} }; int r; - unsigned i, nr_selector_args, nr_params; + unsigned i, nr_selector_args, nr_args; struct priority_group *pg; struct dm_target *ti = m->ti; @@ -693,26 +641,26 @@ static struct priority_group *parse_priority_group(struct arg_set *as, /* * read the paths */ - r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error); + r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error); if (r) goto bad; - r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error); + r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error); if (r) goto bad; - nr_params = 1 + nr_selector_args; + nr_args = 1 + nr_selector_args; for (i = 0; i < pg->nr_pgpaths; i++) { struct pgpath *pgpath; - struct arg_set path_args; + struct dm_arg_set path_args; - if (as->argc < nr_params) { + if (as->argc < nr_args) { ti->error = "not enough path parameters"; r = -EINVAL; goto bad; } - path_args.argc = nr_params; + path_args.argc = nr_args; path_args.argv = as->argv; pgpath = parse_path(&path_args, &pg->ps, ti); @@ -723,7 +671,7 @@ static struct priority_group *parse_priority_group(struct arg_set *as, pgpath->pg = pg; list_add_tail(&pgpath->list, &pg->pgpaths); - consume(as, nr_params); + dm_consume_args(as, nr_args); } return pg; @@ -733,28 +681,23 @@ static struct priority_group *parse_priority_group(struct arg_set *as, return ERR_PTR(r); } -static int parse_hw_handler(struct arg_set *as, struct multipath *m) +static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) { unsigned hw_argc; int ret; struct dm_target *ti = m->ti; - static struct param _params[] = { + static struct dm_arg _args[] = { {0, 1024, "invalid number of hardware handler args"}, }; - if (read_param(_params, shift(as), &hw_argc, &ti->error)) + if (dm_read_arg_group(_args, as, &hw_argc, &ti->error)) return -EINVAL; if (!hw_argc) return 0; - if (hw_argc > as->argc) { - ti->error = "not enough arguments for hardware handler"; - return -EINVAL; - } - - m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL); + m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL); request_module("scsi_dh_%s", m->hw_handler_name); if (scsi_dh_handler_exist(m->hw_handler_name) == 0) { ti->error = "unknown hardware handler type"; @@ -778,7 +721,7 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m) for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1) j = sprintf(p, "%s", as->argv[i]); } - consume(as, hw_argc - 1); + dm_consume_args(as, hw_argc - 1); return 0; fail: @@ -787,52 +730,45 @@ fail: return ret; } -static int parse_features(struct arg_set *as, struct multipath *m) +static int parse_features(struct dm_arg_set *as, struct multipath *m) { int r; unsigned argc; struct dm_target *ti = m->ti; - const char *param_name; + const char *arg_name; - static struct param _params[] = { + static struct dm_arg _args[] = { {0, 5, "invalid number of feature args"}, {1, 50, "pg_init_retries must be between 1 and 50"}, {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, }; - r = read_param(_params, shift(as), &argc, &ti->error); + r = dm_read_arg_group(_args, as, &argc, &ti->error); if (r) return -EINVAL; if (!argc) return 0; - if (argc > as->argc) { - ti->error = "not enough arguments for features"; - return -EINVAL; - } - do { - param_name = shift(as); + arg_name = dm_shift_arg(as); argc--; - if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) { + if (!strcasecmp(arg_name, "queue_if_no_path")) { r = queue_if_no_path(m, 1, 0); continue; } - if (!strnicmp(param_name, MESG_STR("pg_init_retries")) && + if (!strcasecmp(arg_name, "pg_init_retries") && (argc >= 1)) { - r = read_param(_params + 1, shift(as), - &m->pg_init_retries, &ti->error); + r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error); argc--; continue; } - if (!strnicmp(param_name, MESG_STR("pg_init_delay_msecs")) && + if (!strcasecmp(arg_name, "pg_init_delay_msecs") && (argc >= 1)) { - r = read_param(_params + 2, shift(as), - &m->pg_init_delay_msecs, &ti->error); + r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error); argc--; continue; } @@ -847,15 +783,15 @@ static int parse_features(struct arg_set *as, struct multipath *m) static int multipath_ctr(struct dm_target *ti, unsigned int argc, char **argv) { - /* target parameters */ - static struct param _params[] = { + /* target arguments */ + static struct dm_arg _args[] = { {0, 1024, "invalid number of priority groups"}, {0, 1024, "invalid initial priority group number"}, }; int r; struct multipath *m; - struct arg_set as; + struct dm_arg_set as; unsigned pg_count = 0; unsigned next_pg_num; @@ -876,11 +812,11 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, if (r) goto bad; - r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error); + r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error); if (r) goto bad; - r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error); + r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error); if (r) goto bad; @@ -1510,10 +1446,10 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) } if (argc == 1) { - if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) { + if (!strcasecmp(argv[0], "queue_if_no_path")) { r = queue_if_no_path(m, 1, 0); goto out; - } else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) { + } else if (!strcasecmp(argv[0], "fail_if_no_path")) { r = queue_if_no_path(m, 0, 0); goto out; } @@ -1524,18 +1460,18 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) goto out; } - if (!strnicmp(argv[0], MESG_STR("disable_group"))) { + if (!strcasecmp(argv[0], "disable_group")) { r = bypass_pg_num(m, argv[1], 1); goto out; - } else if (!strnicmp(argv[0], MESG_STR("enable_group"))) { + } else if (!strcasecmp(argv[0], "enable_group")) { r = bypass_pg_num(m, argv[1], 0); goto out; - } else if (!strnicmp(argv[0], MESG_STR("switch_group"))) { + } else if (!strcasecmp(argv[0], "switch_group")) { r = switch_pg_num(m, argv[1]); goto out; - } else if (!strnicmp(argv[0], MESG_STR("reinstate_path"))) + } else if (!strcasecmp(argv[0], "reinstate_path")) action = reinstate_path; - else if (!strnicmp(argv[0], MESG_STR("fail_path"))) + else if (!strcasecmp(argv[0], "fail_path")) action = fail_path; else { DMWARN("Unrecognised multipath message received."); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 03516c7..259ce99 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -797,6 +797,63 @@ int dm_table_add_target(struct dm_table *t, const char *type, return r; } +/* + * Target argument parsing helpers. + */ +static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, + unsigned *value, char **error, unsigned grouped) +{ + const char *arg_str = dm_shift_arg(arg_set); + + if (!arg_str || + (sscanf(arg_str, "%u", value) != 1) || + (*value < arg->min) || + (*value > arg->max) || + (grouped && arg_set->argc < *value)) { + *error = arg->error; + return -EINVAL; + } + + return 0; +} + +int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, + unsigned *value, char **error) +{ + return validate_next_arg(arg, arg_set, value, error, 0); +} +EXPORT_SYMBOL(dm_read_arg); + +int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set, + unsigned *value, char **error) +{ + return validate_next_arg(arg, arg_set, value, error, 1); +} +EXPORT_SYMBOL(dm_read_arg_group); + +const char *dm_shift_arg(struct dm_arg_set *as) +{ + char *r; + + if (as->argc) { + as->argc--; + r = *as->argv; + as->argv++; + return r; + } + + return NULL; +} +EXPORT_SYMBOL(dm_shift_arg); + +void dm_consume_args(struct dm_arg_set *as, unsigned num_args) +{ + BUG_ON(as->argc < num_args); + as->argc -= num_args; + as->argv += num_args; +} +EXPORT_SYMBOL(dm_consume_args); + static int dm_table_set_type(struct dm_table *t) { unsigned i; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 4427e04..3fa1f3d 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -208,6 +208,49 @@ struct dm_target_callbacks { int dm_register_target(struct target_type *t); void dm_unregister_target(struct target_type *t); +/* + * Target argument parsing. + */ +struct dm_arg_set { + unsigned argc; + char **argv; +}; + +/* + * The minimum and maximum value of a numeric argument, together with + * the error message to use if the number is found to be outside that range. + */ +struct dm_arg { + unsigned min; + unsigned max; + char *error; +}; + +/* + * Validate the next argument, either returning it as *value or, if invalid, + * returning -EINVAL and setting *error. + */ +int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, + unsigned *value, char **error); + +/* + * Process the next argument as the start of a group containing between + * arg->min and arg->max further arguments. Either return the size as + * *num_args or, if invalid, return -EINVAL and set *error. + */ +int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set, + unsigned *num_args, char **error); + +/* + * Return the current argument and shift to the next. + */ +const char *dm_shift_arg(struct dm_arg_set *as); + +/* + * Move through num_args arguments. + */ +void dm_consume_args(struct dm_arg_set *as, unsigned num_args); + /*----------------------------------------------------------------- * Functions for creating and manipulating mapped devices. * Drop the reference with dm_put when you finish with the object. -- cgit v1.1 From 30e4171bfe3d1c49689803338005cc0071dddaff Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 2 Aug 2011 12:32:05 +0100 Subject: dm flakey: use dm_target_offset and support discards Use dm_target_offset() and support discards. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-flakey.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index ea79062..70a69b2 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -79,6 +79,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->num_flush_requests = 1; + ti->num_discard_requests = 1; ti->private = fc; return 0; @@ -99,7 +100,7 @@ static sector_t flakey_map_sector(struct dm_target *ti, sector_t bi_sector) { struct flakey_c *fc = ti->private; - return fc->start + (bi_sector - ti->begin); + return fc->start + dm_target_offset(ti, bi_sector); } static void flakey_map_bio(struct dm_target *ti, struct bio *bio) -- cgit v1.1 From dfd068b01f02653c6650f1c0eda443b2655d1471 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 2 Aug 2011 12:32:05 +0100 Subject: dm flakey: support feature args Add the ability to specify arbitrary feature flags when creating a flakey target. This code uses the same target argument helpers that the multipath target does. Also remove the superfluous 'dm-flakey' prefixes from the error messages, as they already contain the prefix 'flakey'. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-flakey.c | 83 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 64 insertions(+), 19 deletions(-) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 70a69b2..dd963dc 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -27,54 +27,99 @@ struct flakey_c { unsigned down_interval; }; +static int parse_features(struct dm_arg_set *as, struct dm_target *ti) +{ + int r; + unsigned argc; + const char *arg_name; + + static struct dm_arg _args[] = { + {0, 0, "Invalid number of feature args"}, + }; + + /* No feature arguments supplied. */ + if (!as->argc) + return 0; + + r = dm_read_arg_group(_args, as, &argc, &ti->error); + if (r) + return -EINVAL; + + while (argc && !r) { + arg_name = dm_shift_arg(as); + argc--; + + ti->error = "Unrecognised flakey feature requested"; + r = -EINVAL; + } + + return r; +} + /* - * Construct a flakey mapping: + * Construct a flakey mapping: + * [<#feature args> []*] */ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) { + static struct dm_arg _args[] = { + {0, UINT_MAX, "Invalid up interval"}, + {0, UINT_MAX, "Invalid down interval"}, + }; + + int r; struct flakey_c *fc; - unsigned long long tmp; + unsigned long long tmpll; + struct dm_arg_set as; + const char *devname; - if (argc != 4) { - ti->error = "dm-flakey: Invalid argument count"; + as.argc = argc; + as.argv = argv; + + if (argc < 4) { + ti->error = "Invalid argument count"; return -EINVAL; } fc = kmalloc(sizeof(*fc), GFP_KERNEL); if (!fc) { - ti->error = "dm-flakey: Cannot allocate linear context"; + ti->error = "Cannot allocate linear context"; return -ENOMEM; } fc->start_time = jiffies; - if (sscanf(argv[1], "%llu", &tmp) != 1) { - ti->error = "dm-flakey: Invalid device sector"; + devname = dm_shift_arg(&as); + + if (sscanf(dm_shift_arg(&as), "%llu", &tmpll) != 1) { + ti->error = "Invalid device sector"; goto bad; } - fc->start = tmp; + fc->start = tmpll; - if (sscanf(argv[2], "%u", &fc->up_interval) != 1) { - ti->error = "dm-flakey: Invalid up interval"; + r = dm_read_arg(_args, &as, &fc->up_interval, &ti->error); + if (r) goto bad; - } - if (sscanf(argv[3], "%u", &fc->down_interval) != 1) { - ti->error = "dm-flakey: Invalid down interval"; + r = dm_read_arg(_args, &as, &fc->down_interval, &ti->error); + if (r) goto bad; - } if (!(fc->up_interval + fc->down_interval)) { - ti->error = "dm-flakey: Total (up + down) interval is zero"; + ti->error = "Total (up + down) interval is zero"; goto bad; } if (fc->up_interval + fc->down_interval < fc->up_interval) { - ti->error = "dm-flakey: Interval overflow"; + ti->error = "Interval overflow"; goto bad; } - if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &fc->dev)) { - ti->error = "dm-flakey: Device lookup failed"; + r = parse_features(&as, ti); + if (r) + goto bad; + + if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &fc->dev)) { + ti->error = "Device lookup failed"; goto bad; } @@ -178,7 +223,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, -- cgit v1.1 From b26f5e3d7127487e934758c1fbe05d683b082cb0 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 2 Aug 2011 12:32:05 +0100 Subject: dm flakey: add drop_writes Add 'drop_writes' option to drop writes silently while the device is 'down'. Reads are not touched. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- Documentation/device-mapper/dm-flakey.txt | 29 +++++++++++--- drivers/md/dm-flakey.c | 63 +++++++++++++++++++++++++++---- 2 files changed, 78 insertions(+), 14 deletions(-) diff --git a/Documentation/device-mapper/dm-flakey.txt b/Documentation/device-mapper/dm-flakey.txt index c8efdfd..1b66c86 100644 --- a/Documentation/device-mapper/dm-flakey.txt +++ b/Documentation/device-mapper/dm-flakey.txt @@ -1,17 +1,34 @@ dm-flakey ========= -This target is the same as the linear target except that it returns I/O -errors periodically. It's been found useful in simulating failing -devices for testing purposes. +This target is the same as the linear target except that it exhibits +unreliable behaviour periodically. It's been found useful in simulating +failing devices for testing purposes. Starting from the time the table is loaded, the device is available for - seconds, then returns errors for seconds, -and then this cycle repeats. + seconds, then exhibits unreliable behaviour for seconds, and then this cycle repeats. -Parameters: +Also, consider using this in combination with the dm-delay target too, +which can delay reads and writes and/or send them to different +underlying devices. + +Table parameters +---------------- + \ + [ []] + +Mandatory parameters: : Full pathname to the underlying block-device, or a "major:minor" device-number. : Starting sector within the device. : Number of seconds device is available. : Number of seconds device returns errors. + +Optional feature parameters: + If no feature parameters are present, during the periods of + unreliability, all I/O returns errors. + + drop_writes: + All write I/O is silently ignored. + Read I/O is handled correctly. diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index dd963dc..e7c4c2a 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -25,16 +25,22 @@ struct flakey_c { sector_t start; unsigned up_interval; unsigned down_interval; + unsigned long flags; }; -static int parse_features(struct dm_arg_set *as, struct dm_target *ti) +enum feature_flag_bits { + DROP_WRITES +}; + +static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, + struct dm_target *ti) { int r; unsigned argc; const char *arg_name; static struct dm_arg _args[] = { - {0, 0, "Invalid number of feature args"}, + {0, 1, "Invalid number of feature args"}, }; /* No feature arguments supplied. */ @@ -49,6 +55,18 @@ static int parse_features(struct dm_arg_set *as, struct dm_target *ti) arg_name = dm_shift_arg(as); argc--; + /* + * drop_writes + */ + if (!strcasecmp(arg_name, "drop_writes")) { + if (test_and_set_bit(DROP_WRITES, &fc->flags)) { + ti->error = "Feature drop_writes duplicated"; + return -EINVAL; + } + + continue; + } + ti->error = "Unrecognised flakey feature requested"; r = -EINVAL; } @@ -59,6 +77,9 @@ static int parse_features(struct dm_arg_set *as, struct dm_target *ti) /* * Construct a flakey mapping: * [<#feature args> []*] + * + * Feature args: + * [drop_writes] */ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) { @@ -81,7 +102,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -EINVAL; } - fc = kmalloc(sizeof(*fc), GFP_KERNEL); + fc = kzalloc(sizeof(*fc), GFP_KERNEL); if (!fc) { ti->error = "Cannot allocate linear context"; return -ENOMEM; @@ -114,7 +135,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - r = parse_features(&as, ti); + r = parse_features(&as, fc, ti); if (r) goto bad; @@ -162,12 +183,31 @@ static int flakey_map(struct dm_target *ti, struct bio *bio, { struct flakey_c *fc = ti->private; unsigned elapsed; + unsigned rw; /* Are we alive ? */ elapsed = (jiffies - fc->start_time) / HZ; - if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) + if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) { + rw = bio_data_dir(bio); + + /* + * Drop writes. Map reads as normal. + */ + if (test_bit(DROP_WRITES, &fc->flags)) { + if (rw == WRITE) { + bio_endio(bio, 0); + return DM_MAPIO_SUBMITTED; + } + goto map_bio; + } + + /* + * Default setting errors all I/O. + */ return -EIO; + } +map_bio: flakey_map_bio(ti, bio); return DM_MAPIO_REMAPPED; @@ -176,7 +216,9 @@ static int flakey_map(struct dm_target *ti, struct bio *bio, static int flakey_status(struct dm_target *ti, status_type_t type, char *result, unsigned int maxlen) { + unsigned sz = 0; struct flakey_c *fc = ti->private; + unsigned drop_writes; switch (type) { case STATUSTYPE_INFO: @@ -184,9 +226,14 @@ static int flakey_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: - snprintf(result, maxlen, "%s %llu %u %u", fc->dev->name, - (unsigned long long)fc->start, fc->up_interval, - fc->down_interval); + DMEMIT("%s %llu %u %u ", fc->dev->name, + (unsigned long long)fc->start, fc->up_interval, + fc->down_interval); + + drop_writes = test_bit(DROP_WRITES, &fc->flags); + DMEMIT("%u ", drop_writes); + if (drop_writes) + DMEMIT("drop_writes "); break; } return 0; -- cgit v1.1 From a3998799fb4df0b0af8271a7d50c4269032397aa Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 2 Aug 2011 12:32:06 +0100 Subject: dm flakey: add corrupt_bio_byte feature Add corrupt_bio_byte feature to simulate corruption by overwriting a byte at a specified position with a specified value during intervals when the device is "down". Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- Documentation/device-mapper/dm-flakey.txt | 19 ++++ drivers/md/dm-flakey.c | 155 +++++++++++++++++++++++++++--- 2 files changed, 159 insertions(+), 15 deletions(-) diff --git a/Documentation/device-mapper/dm-flakey.txt b/Documentation/device-mapper/dm-flakey.txt index 1b66c86..6ff5c23 100644 --- a/Documentation/device-mapper/dm-flakey.txt +++ b/Documentation/device-mapper/dm-flakey.txt @@ -32,3 +32,22 @@ Optional feature parameters: drop_writes: All write I/O is silently ignored. Read I/O is handled correctly. + + corrupt_bio_byte : + During , replace of the data of + each matching bio with . + + : The offset of the byte to replace. + Counting starts at 1, to replace the first byte. + : Either 'r' to corrupt reads or 'w' to corrupt writes. + 'w' is incompatible with drop_writes. + : The value (from 0-255) to write. + : Perform the replacement only if bio->bi_rw has all the + selected flags set. + +Examples: + corrupt_bio_byte 32 r 1 0 + - replaces the 32nd byte of READ bios with the value 1 + + corrupt_bio_byte 224 w 0 32 + - replaces the 224th byte of REQ_META (=32) bios with the value 0 diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index e7c4c2a..89f73ca 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2003 Sistina Software (UK) Limited. - * Copyright (C) 2004, 2010 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004, 2010-2011 Red Hat, Inc. All rights reserved. * * This file is released under the GPL. */ @@ -15,6 +15,9 @@ #define DM_MSG_PREFIX "flakey" +#define all_corrupt_bio_flags_match(bio, fc) \ + (((bio)->bi_rw & (fc)->corrupt_bio_flags) == (fc)->corrupt_bio_flags) + /* * Flakey: Used for testing only, simulates intermittent, * catastrophic device failure. @@ -26,6 +29,10 @@ struct flakey_c { unsigned up_interval; unsigned down_interval; unsigned long flags; + unsigned corrupt_bio_byte; + unsigned corrupt_bio_rw; + unsigned corrupt_bio_value; + unsigned corrupt_bio_flags; }; enum feature_flag_bits { @@ -40,7 +47,10 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, const char *arg_name; static struct dm_arg _args[] = { - {0, 1, "Invalid number of feature args"}, + {0, 6, "Invalid number of feature args"}, + {1, UINT_MAX, "Invalid corrupt bio byte"}, + {0, 255, "Invalid corrupt value to write into bio byte (0-255)"}, + {0, UINT_MAX, "Invalid corrupt bio flags mask"}, }; /* No feature arguments supplied. */ @@ -49,9 +59,9 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, r = dm_read_arg_group(_args, as, &argc, &ti->error); if (r) - return -EINVAL; + return r; - while (argc && !r) { + while (argc) { arg_name = dm_shift_arg(as); argc--; @@ -67,11 +77,61 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, continue; } + /* + * corrupt_bio_byte + */ + if (!strcasecmp(arg_name, "corrupt_bio_byte")) { + if (!argc) + ti->error = "Feature corrupt_bio_byte requires parameters"; + + r = dm_read_arg(_args + 1, as, &fc->corrupt_bio_byte, &ti->error); + if (r) + return r; + argc--; + + /* + * Direction r or w? + */ + arg_name = dm_shift_arg(as); + if (!strcasecmp(arg_name, "w")) + fc->corrupt_bio_rw = WRITE; + else if (!strcasecmp(arg_name, "r")) + fc->corrupt_bio_rw = READ; + else { + ti->error = "Invalid corrupt bio direction (r or w)"; + return -EINVAL; + } + argc--; + + /* + * Value of byte (0-255) to write in place of correct one. + */ + r = dm_read_arg(_args + 2, as, &fc->corrupt_bio_value, &ti->error); + if (r) + return r; + argc--; + + /* + * Only corrupt bios with these flags set. + */ + r = dm_read_arg(_args + 3, as, &fc->corrupt_bio_flags, &ti->error); + if (r) + return r; + argc--; + + continue; + } + ti->error = "Unrecognised flakey feature requested"; - r = -EINVAL; + return -EINVAL; } - return r; + if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) { + ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set"; + return -EINVAL; + } + + return 0; } /* @@ -80,6 +140,11 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, * * Feature args: * [drop_writes] + * [corrupt_bio_byte ] + * + * Nth_byte starts from 1 for the first byte. + * Direction is r for READ or w for WRITE. + * bio_flags is ignored if 0. */ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) { @@ -178,31 +243,64 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio) bio->bi_sector = flakey_map_sector(ti, bio->bi_sector); } +static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) +{ + unsigned bio_bytes = bio_cur_bytes(bio); + char *data = bio_data(bio); + + /* + * Overwrite the Nth byte of the data returned. + */ + if (data && bio_bytes >= fc->corrupt_bio_byte) { + data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value; + + DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " + "(rw=%c bi_rw=%lu bi_sector=%llu cur_bytes=%u)\n", + bio, fc->corrupt_bio_value, fc->corrupt_bio_byte, + (bio_data_dir(bio) == WRITE) ? 'w' : 'r', + bio->bi_rw, (unsigned long long)bio->bi_sector, bio_bytes); + } +} + static int flakey_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { struct flakey_c *fc = ti->private; unsigned elapsed; - unsigned rw; /* Are we alive ? */ elapsed = (jiffies - fc->start_time) / HZ; if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) { - rw = bio_data_dir(bio); + /* + * Flag this bio as submitted while down. + */ + map_context->ll = 1; + + /* + * Map reads as normal. + */ + if (bio_data_dir(bio) == READ) + goto map_bio; /* - * Drop writes. Map reads as normal. + * Drop writes? */ if (test_bit(DROP_WRITES, &fc->flags)) { - if (rw == WRITE) { - bio_endio(bio, 0); - return DM_MAPIO_SUBMITTED; - } + bio_endio(bio, 0); + return DM_MAPIO_SUBMITTED; + } + + /* + * Corrupt matching writes. + */ + if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == WRITE)) { + if (all_corrupt_bio_flags_match(bio, fc)) + corrupt_bio_data(bio, fc); goto map_bio; } /* - * Default setting errors all I/O. + * By default, error all I/O. */ return -EIO; } @@ -213,6 +311,24 @@ map_bio: return DM_MAPIO_REMAPPED; } +static int flakey_end_io(struct dm_target *ti, struct bio *bio, + int error, union map_info *map_context) +{ + struct flakey_c *fc = ti->private; + unsigned bio_submitted_while_down = map_context->ll; + + /* + * Corrupt successful READs while in down state. + * If flags were specified, only corrupt those that match. + */ + if (!error && bio_submitted_while_down && + (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) && + all_corrupt_bio_flags_match(bio, fc)) + corrupt_bio_data(bio, fc); + + return error; +} + static int flakey_status(struct dm_target *ti, status_type_t type, char *result, unsigned int maxlen) { @@ -231,9 +347,17 @@ static int flakey_status(struct dm_target *ti, status_type_t type, fc->down_interval); drop_writes = test_bit(DROP_WRITES, &fc->flags); - DMEMIT("%u ", drop_writes); + DMEMIT("%u ", drop_writes + (fc->corrupt_bio_byte > 0) * 5); + if (drop_writes) DMEMIT("drop_writes "); + + if (fc->corrupt_bio_byte) + DMEMIT("corrupt_bio_byte %u %c %u %u ", + fc->corrupt_bio_byte, + (fc->corrupt_bio_rw == WRITE) ? 'w' : 'r', + fc->corrupt_bio_value, fc->corrupt_bio_flags); + break; } return 0; @@ -275,6 +399,7 @@ static struct target_type flakey_target = { .ctr = flakey_ctr, .dtr = flakey_dtr, .map = flakey_map, + .end_io = flakey_end_io, .status = flakey_status, .ioctl = flakey_ioctl, .merge = flakey_merge, -- cgit v1.1 From 0ddf9644cc26e74ed671525e61a17bdbebf18da6 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Aug 2011 12:32:06 +0100 Subject: dm ioctl: fill in device parameters in more ioctls Move parameter filling from find_device to __find_device_hash_cell. This patch causes ioctls using __find_device_hash_cell (DM_DEV_REMOVE_CMD, DM_DEV_SUSPEND_CMD - resume, DM_TABLE_CLEAR_CMD) to return device parameters, bringing them into line with the other ioctls. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 63 +++++++++++++++++++++++++++--------------------- include/linux/dm-ioctl.h | 4 +-- 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 1622a6b..99d38a6 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -719,24 +719,49 @@ static int dev_create(struct dm_ioctl *param, size_t param_size) static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param) { struct mapped_device *md; - void *mdptr = NULL; + struct hash_cell *hc = NULL; - if (*param->uuid) - return __get_uuid_cell(param->uuid); + if (*param->uuid) { + hc = __get_uuid_cell(param->uuid); + if (!hc) + return NULL; + goto fill_params; + } - if (*param->name) - return __get_name_cell(param->name); + if (*param->name) { + hc = __get_name_cell(param->name); + if (!hc) + return NULL; + goto fill_params; + } md = dm_get_md(huge_decode_dev(param->dev)); if (!md) - goto out; + return NULL; - mdptr = dm_get_mdptr(md); - if (!mdptr) + hc = dm_get_mdptr(md); + if (!hc) { dm_put(md); + return NULL; + } -out: - return mdptr; +fill_params: + /* + * Sneakily write in both the name and the uuid + * while we have the cell. + */ + strlcpy(param->name, hc->name, sizeof(param->name)); + if (hc->uuid) + strlcpy(param->uuid, hc->uuid, sizeof(param->uuid)); + else + param->uuid[0] = '\0'; + + if (hc->new_map) + param->flags |= DM_INACTIVE_PRESENT_FLAG; + else + param->flags &= ~DM_INACTIVE_PRESENT_FLAG; + + return hc; } static struct mapped_device *find_device(struct dm_ioctl *param) @@ -746,24 +771,8 @@ static struct mapped_device *find_device(struct dm_ioctl *param) down_read(&_hash_lock); hc = __find_device_hash_cell(param); - if (hc) { + if (hc) md = hc->md; - - /* - * Sneakily write in both the name and the uuid - * while we have the cell. - */ - strlcpy(param->name, hc->name, sizeof(param->name)); - if (hc->uuid) - strlcpy(param->uuid, hc->uuid, sizeof(param->uuid)); - else - param->uuid[0] = '\0'; - - if (hc->new_map) - param->flags |= DM_INACTIVE_PRESENT_FLAG; - else - param->flags &= ~DM_INACTIVE_PRESENT_FLAG; - } up_read(&_hash_lock); return md; diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h index 3708455..0cb8eff 100644 --- a/include/linux/dm-ioctl.h +++ b/include/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 20 +#define DM_VERSION_MINOR 21 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2011-02-02)" +#define DM_VERSION_EXTRA "-ioctl (2011-07-06)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.1 From ba2e19b0f4ccd6920fe175a86521ff18ede260cb Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Aug 2011 12:32:06 +0100 Subject: dm ioctl: introduce __get_dev_cell Move logic to find device based on major/minor number to a separate function __get_dev_cell (similar to __get_uuid_cell and __get_name_cell). This makes the function __find_device_hash_cell more straightforward. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 99d38a6..4c35091 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -128,6 +128,24 @@ static struct hash_cell *__get_uuid_cell(const char *str) return NULL; } +static struct hash_cell *__get_dev_cell(uint64_t dev) +{ + struct mapped_device *md; + struct hash_cell *hc; + + md = dm_get_md(huge_decode_dev(dev)); + if (!md) + return NULL; + + hc = dm_get_mdptr(md); + if (!hc) { + dm_put(md); + return NULL; + } + + return hc; +} + /*----------------------------------------------------------------- * Inserting, removing and renaming a device. *---------------------------------------------------------------*/ @@ -718,34 +736,23 @@ static int dev_create(struct dm_ioctl *param, size_t param_size) */ static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param) { - struct mapped_device *md; struct hash_cell *hc = NULL; if (*param->uuid) { hc = __get_uuid_cell(param->uuid); if (!hc) return NULL; - goto fill_params; - } - - if (*param->name) { + } else if (*param->name) { hc = __get_name_cell(param->name); if (!hc) return NULL; - goto fill_params; - } - - md = dm_get_md(huge_decode_dev(param->dev)); - if (!md) - return NULL; - - hc = dm_get_mdptr(md); - if (!hc) { - dm_put(md); + } else if (param->dev) { + hc = __get_dev_cell(param->dev); + if (!hc) + return NULL; + } else return NULL; - } -fill_params: /* * Sneakily write in both the name and the uuid * while we have the cell. -- cgit v1.1 From 759dea204cce9f1fc2a5d00ea25211299fc7a4a0 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 2 Aug 2011 12:32:06 +0100 Subject: dm ioctl: forbid multiple device specifiers Exactly one of name, uuid or device must be specified when referencing an existing device. This removes the ambiguity (risking the wrong device being updated) if two conflicting parameters were specified. Previously one parameter got used and any others were ignored silently. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 4c35091..2e9a3ca 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -739,10 +739,16 @@ static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param) struct hash_cell *hc = NULL; if (*param->uuid) { + if (*param->name || param->dev) + return NULL; + hc = __get_uuid_cell(param->uuid); if (!hc) return NULL; } else if (*param->name) { + if (param->dev) + return NULL; + hc = __get_name_cell(param->name); if (!hc) return NULL; -- cgit v1.1 From c0a2fa1ef1057a1e9450d6f055f1cde2ad4f85a2 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Tue, 2 Aug 2011 12:32:06 +0100 Subject: dm raid: improve table parameters documentation Add more information about some dm-raid table parameters and clarify how parameters are printed when 'dmsetup table' is issued. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- Documentation/device-mapper/dm-raid.txt | 124 ++++++++++++++++++++------------ 1 file changed, 78 insertions(+), 46 deletions(-) diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index 33b6b70..4f9dd3c 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt @@ -1,49 +1,75 @@ -Device-mapper RAID (dm-raid) is a bridge from DM to MD. It -provides a way to use device-mapper interfaces to access the MD RAID -drivers. +dm-raid +------- -As with all device-mapper targets, the nominal public interfaces are the -constructor (CTR) tables and the status outputs (both STATUSTYPE_INFO -and STATUSTYPE_TABLE). The CTR table looks like the following: +The device-mapper RAID (dm-raid) target provides a bridge from DM to MD. +It allows the MD RAID drivers to be accessed using a device-mapper +interface. -1: raid \ -2: <#raid_params> \ -3: <#raid_devs> .. - -Line 1 contains the standard first three arguments to any device-mapper -target - the start, length, and target type fields. The target type in -this case is "raid". - -Line 2 contains the arguments that define the particular raid -type/personality/level, the required arguments for that raid type, and -any optional arguments. Possible raid types include: raid4, raid5_la, -raid5_ls, raid5_rs, raid6_zr, raid6_nr, and raid6_nc. (raid1 is -planned for the future.) The list of required and optional parameters -is the same for all the current raid types. The required parameters are -positional, while the optional parameters are given as key/value pairs. -The possible parameters are as follows: - Chunk size in sectors. - [[no]sync] Force/Prevent RAID initialization - [rebuild ] Rebuild the drive indicated by the index - [daemon_sleep ] Time between bitmap daemon work to clear bits - [min_recovery_rate ] Throttle RAID initialization - [max_recovery_rate ] Throttle RAID initialization - [max_write_behind ] See '-write-behind=' (man mdadm) - [stripe_cache ] Stripe cache size for higher RAIDs - -Line 3 contains the list of devices that compose the array in -metadata/data device pairs. If the metadata is stored separately, a '-' -is given for the metadata device position. If a drive has failed or is -missing at creation time, a '-' can be given for both the metadata and -data drives for a given position. - -NB. Currently all metadata devices must be specified as '-'. - -Examples: +The target is named "raid" and it accepts the following parameters: + + <#raid_params> \ + <#raid_devs> [.. ] + +: + raid4 RAID4 dedicated parity disk + raid5_la RAID5 left asymmetric + - rotating parity 0 with data continuation + raid5_ra RAID5 right asymmetric + - rotating parity N with data continuation + raid5_ls RAID5 left symmetric + - rotating parity 0 with data restart + raid5_rs RAID5 right symmetric + - rotating parity N with data restart + raid6_zr RAID6 zero restart + - rotating parity zero (left-to-right) with data restart + raid6_nr RAID6 N restart + - rotating parity N (right-to-left) with data restart + raid6_nc RAID6 N continue + - rotating parity N (right-to-left) with data continuation + + Refererence: Chapter 4 of + http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf + +<#raid_params>: The number of parameters that follow. + + consists of + Mandatory parameters: + : Chunk size in sectors. This parameter is often known as + "stripe size". It is the only mandatory parameter and + is placed first. + + followed by optional parameters (in any order): + [sync|nosync] Force or prevent RAID initialization. + + [rebuild ] Rebuild drive number idx (first drive is 0). + + [daemon_sleep ] + Interval between runs of the bitmap daemon that + clear bits. A longer interval means less bitmap I/O but + resyncing after a failure is likely to take longer. + + [min_recovery_rate ] Throttle RAID initialization + [max_recovery_rate ] Throttle RAID initialization + [max_write_behind ] See '-write-behind=' (man mdadm) + [stripe_cache ] Stripe cache size (higher RAIDs only) + +<#raid_devs>: The number of devices composing the array. + Each device consists of two entries. The first is the device + containing the metadata (if any); the second is the one containing the + data. Currently, separate metadata devices are not supported and '-' + is required in place of the metadata device. + + If a drive has failed or is missing at creation time, a '-' can be + given for both the metadata and data drives for a given position. + + +Example tables +-------------- # RAID4 - 4 data drives, 1 parity # No metadata devices specified to hold superblock/bitmap info # Chunk size of 1MiB # (Lines separated for easy reading) + 0 1960893648 raid \ raid4 1 2048 \ 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81 @@ -51,20 +77,26 @@ Examples: # RAID4 - 4 data drives, 1 parity (no metadata devices) # Chunk size of 1MiB, force RAID initialization, # min recovery rate at 20 kiB/sec/disk + 0 1960893648 raid \ raid4 4 2048 min_recovery_rate 20 sync\ 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81 -Performing a 'dmsetup table' should display the CTR table used to -construct the mapping (with possible reordering of optional -parameters). +'dmsetup table' displays the table used to construct the mapping. +The optional parameters will always be printed in the order listed +above with "sync" or "nosync" always output ahead of the other +arguments, regardless of the order used when originally loading the table. -Performing a 'dmsetup status' will yield information on the state and -health of the array. The output is as follows: +'dmsetup status' yields information on the state and health of the +array. +The output is as follows: 1: raid \ 2: <#devices> <1 health char for each dev> -Line 1 is standard DM output. Line 2 is best shown by example: +Line 1 is the standard output produced by device-mapper. +Line 2 is produced by the raid target, and best explained by example: 0 1960893648 raid raid4 5 AAAAA 2/490221568 Here we can see the RAID type is raid4, there are 5 devices - all of which are 'A'live, and the array is 2/490221568 complete with recovery. +Faulty or missing devices are marked 'D'. Devices that are out-of-sync +are marked 'a'. -- cgit v1.1 From c1084561bb85da3630540ebe951749a8cd8fc714 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Tue, 2 Aug 2011 12:32:07 +0100 Subject: dm raid: add region_size parameter Allow the user to specify the region_size. Ensures that the supplied value meets md's constraints, viz. the number of regions does not exceed 2^21. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- Documentation/device-mapper/dm-raid.txt | 4 ++ drivers/md/dm-raid.c | 82 +++++++++++++++++++++++++++++++-- 2 files changed, 83 insertions(+), 3 deletions(-) diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index 4f9dd3c..be4419a 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt @@ -52,6 +52,10 @@ The target is named "raid" and it accepts the following parameters: [max_recovery_rate ] Throttle RAID initialization [max_write_behind ] See '-write-behind=' (man mdadm) [stripe_cache ] Stripe cache size (higher RAIDs only) + [region_size ] + The region_size multiplied by the number of regions is the + logical size of the array. The bitmap records the device + synchronisation state for each region. <#raid_devs>: The number of devices composing the array. Each device consists of two entries. The first is the device diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index d4e95b2..a8a1915 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -52,7 +52,7 @@ struct raid_dev { #define DMPF_MAX_RECOVERY_RATE 0x20 #define DMPF_MAX_WRITE_BEHIND 0x40 #define DMPF_STRIPE_CACHE 0x80 - +#define DMPF_REGION_SIZE 0X100 struct raid_set { struct dm_target *ti; @@ -237,6 +237,67 @@ static int dev_parms(struct raid_set *rs, char **argv) } /* + * validate_region_size + * @rs + * @region_size: region size in sectors. If 0, pick a size (4MiB default). + * + * Set rs->md.bitmap_info.chunksize (which really refers to 'region size'). + * Ensure that (ti->len/region_size < 2^21) - required by MD bitmap. + * + * Returns: 0 on success, -EINVAL on failure. + */ +static int validate_region_size(struct raid_set *rs, unsigned long region_size) +{ + unsigned long min_region_size = rs->ti->len / (1 << 21); + + if (!region_size) { + /* + * Choose a reasonable default. All figures in sectors. + */ + if (min_region_size > (1 << 13)) { + DMINFO("Choosing default region size of %lu sectors", + region_size); + region_size = min_region_size; + } else { + DMINFO("Choosing default region size of 4MiB"); + region_size = 1 << 13; /* sectors */ + } + } else { + /* + * Validate user-supplied value. + */ + if (region_size > rs->ti->len) { + rs->ti->error = "Supplied region size is too large"; + return -EINVAL; + } + + if (region_size < min_region_size) { + DMERR("Supplied region_size (%lu sectors) below minimum (%lu)", + region_size, min_region_size); + rs->ti->error = "Supplied region size is too small"; + return -EINVAL; + } + + if (!is_power_of_2(region_size)) { + rs->ti->error = "Region size is not a power of 2"; + return -EINVAL; + } + + if (region_size < rs->md.chunk_sectors) { + rs->ti->error = "Region size is smaller than the chunk size"; + return -EINVAL; + } + } + + /* + * Convert sectors to bytes. + */ + rs->md.bitmap_info.chunksize = (region_size << 9); + + return 0; +} + +/* * Possible arguments are... * RAID456: * [optional_args] @@ -249,12 +310,13 @@ static int dev_parms(struct raid_set *rs, char **argv) * [max_recovery_rate ] Throttle RAID initialization * [max_write_behind ] See '-write-behind=' (man mdadm) * [stripe_cache ] Stripe cache size for higher RAIDs + * [region_size ] Defines granularity of bitmap */ static int parse_raid_params(struct raid_set *rs, char **argv, unsigned num_raid_params) { unsigned i, rebuild_cnt = 0; - unsigned long value; + unsigned long value, region_size = 0; char *key; /* @@ -365,6 +427,9 @@ static int parse_raid_params(struct raid_set *rs, char **argv, return -EINVAL; } rs->md.sync_speed_max = (int)value; + } else if (!strcasecmp(key, "region_size")) { + rs->print_flags |= DMPF_REGION_SIZE; + region_size = value; } else { DMERR("Unable to parse RAID parameter: %s", key); rs->ti->error = "Unable to parse RAID parameters"; @@ -372,6 +437,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv, } } + if (validate_region_size(rs, region_size)) + return -EINVAL; + + if (rs->md.chunk_sectors) + rs->ti->split_io = rs->md.chunk_sectors; + else + rs->ti->split_io = region_size; + /* Assume there are no metadata devices until the drives are parsed */ rs->md.persistent = 0; rs->md.external = 1; @@ -469,7 +542,6 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; INIT_WORK(&rs->md.event_work, do_table_event); - ti->split_io = rs->md.chunk_sectors; ti->private = rs; mutex_lock(&rs->md.reconfig_mutex); @@ -596,6 +668,10 @@ static int raid_status(struct dm_target *ti, status_type_t type, conf ? conf->max_nr_stripes * 2 : 0); } + if (rs->print_flags & DMPF_REGION_SIZE) + DMEMIT(" region_size %lu", + rs->md.bitmap_info.chunksize >> 9); + DMEMIT(" %d", rs->md.raid_disks); for (i = 0; i < rs->md.raid_disks; i++) { DMEMIT(" -"); /* metadata device */ -- cgit v1.1 From 46bed2b5c16bb7c82e1088d7ae75fb958c8a8c4e Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Tue, 2 Aug 2011 12:32:07 +0100 Subject: dm raid: add write_mostly parameter Add the write_mostly parameter to RAID1 dm-raid tables. This allows the user to set the WriteMostly flag on a RAID1 device that should normally be avoided for read I/O. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- Documentation/device-mapper/dm-raid.txt | 4 +++- drivers/md/dm-raid.c | 26 +++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index be4419a..a7d1c4a 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt @@ -50,6 +50,7 @@ The target is named "raid" and it accepts the following parameters: [min_recovery_rate ] Throttle RAID initialization [max_recovery_rate ] Throttle RAID initialization + [write_mostly ] Drive index is write-mostly [max_write_behind ] See '-write-behind=' (man mdadm) [stripe_cache ] Stripe cache size (higher RAIDs only) [region_size ] @@ -87,9 +88,10 @@ Example tables 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81 'dmsetup table' displays the table used to construct the mapping. -The optional parameters will always be printed in the order listed +The optional parameters are always printed in the order listed above with "sync" or "nosync" always output ahead of the other arguments, regardless of the order used when originally loading the table. +Arguments that can be repeated are ordered by value. 'dmsetup status' yields information on the state and health of the array. diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index a8a1915..88143a0 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -308,6 +308,7 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) * [daemon_sleep ] Time between bitmap daemon work to clear bits * [min_recovery_rate ] Throttle RAID initialization * [max_recovery_rate ] Throttle RAID initialization + * [write_mostly ] Indicate a write mostly drive via index * [max_write_behind ] See '-write-behind=' (man mdadm) * [stripe_cache ] Stripe cache size for higher RAIDs * [region_size ] Defines granularity of bitmap @@ -376,7 +377,21 @@ static int parse_raid_params(struct raid_set *rs, char **argv, clear_bit(In_sync, &rs->dev[value].rdev.flags); rs->dev[value].rdev.recovery_offset = 0; rs->print_flags |= DMPF_REBUILD; + } else if (!strcasecmp(key, "write_mostly")) { + if (rs->raid_type->level != 1) { + rs->ti->error = "write_mostly option is only valid for RAID1"; + return -EINVAL; + } + if (value > rs->md.raid_disks) { + rs->ti->error = "Invalid write_mostly drive index given"; + return -EINVAL; + } + set_bit(WriteMostly, &rs->dev[value].rdev.flags); } else if (!strcasecmp(key, "max_write_behind")) { + if (rs->raid_type->level != 1) { + rs->ti->error = "max_write_behind option is only valid for RAID1"; + return -EINVAL; + } rs->print_flags |= DMPF_MAX_WRITE_BEHIND; /* @@ -621,11 +636,15 @@ static int raid_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: /* The string you would use to construct this array */ - for (i = 0; i < rs->md.raid_disks; i++) + for (i = 0; i < rs->md.raid_disks; i++) { if ((rs->print_flags & DMPF_REBUILD) && rs->dev[i].data_dev && !test_bit(In_sync, &rs->dev[i].rdev.flags)) raid_param_cnt += 2; /* for rebuilds */ + if (rs->dev[i].data_dev && + test_bit(WriteMostly, &rs->dev[i].rdev.flags)) + raid_param_cnt += 2; + } raid_param_cnt += (hweight64(rs->print_flags & ~DMPF_REBUILD) * 2); if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC)) @@ -656,6 +675,11 @@ static int raid_status(struct dm_target *ti, status_type_t type, if (rs->print_flags & DMPF_MAX_RECOVERY_RATE) DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max); + for (i = 0; i < rs->md.raid_disks; i++) + if (rs->dev[i].data_dev && + test_bit(WriteMostly, &rs->dev[i].rdev.flags)) + DMEMIT(" write_mostly %u", i); + if (rs->print_flags & DMPF_MAX_WRITE_BEHIND) DMEMIT(" max_write_behind %lu", rs->md.bitmap_info.max_write_behind); -- cgit v1.1 From b12d437b73d32203a41fde0d407e91812c866844 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Tue, 2 Aug 2011 12:32:07 +0100 Subject: dm raid: support metadata devices Add the ability to parse and use metadata devices to dm-raid. Although not strictly required, without the metadata devices, many features of RAID are unavailable. They are used to store a superblock and bitmap. The role, or position in the array, of each device must be recorded in its superblock. This is to help with fault handling, array reshaping, and sanity checks. RAID 4/5/6 devices must be loaded in a specific order: in this way, the 'array_position' field helps validate the correctness of the mapping when it is loaded. It can be used during reshaping to identify which devices are added/removed. Fault handling is impossible without this field. For example, when a device fails it is recorded in the superblock. If this is a RAID1 device and the offending device is removed from the array, there must be a way during subsequent array assembly to determine that the failed device was the one removed. This is done by correlating the 'array_position' field and the bit-field variable 'failed_devices'. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- Documentation/device-mapper/dm-raid.txt | 12 +- drivers/md/Kconfig | 5 +- drivers/md/dm-raid.c | 419 ++++++++++++++++++++++++++++++-- 3 files changed, 412 insertions(+), 24 deletions(-) diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index a7d1c4a..2a8c113 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt @@ -11,6 +11,7 @@ The target is named "raid" and it accepts the following parameters: <#raid_devs> [.. ] : + raid1 RAID1 mirroring raid4 RAID4 dedicated parity disk raid5_la RAID5 left asymmetric - rotating parity 0 with data continuation @@ -61,8 +62,7 @@ The target is named "raid" and it accepts the following parameters: <#raid_devs>: The number of devices composing the array. Each device consists of two entries. The first is the device containing the metadata (if any); the second is the one containing the - data. Currently, separate metadata devices are not supported and '-' - is required in place of the metadata device. + data. If a drive has failed or is missing at creation time, a '-' can be given for both the metadata and data drives for a given position. @@ -70,7 +70,7 @@ The target is named "raid" and it accepts the following parameters: Example tables -------------- -# RAID4 - 4 data drives, 1 parity +# RAID4 - 4 data drives, 1 parity (no metadata devices) # No metadata devices specified to hold superblock/bitmap info # Chunk size of 1MiB # (Lines separated for easy reading) @@ -79,13 +79,13 @@ Example tables raid4 1 2048 \ 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81 -# RAID4 - 4 data drives, 1 parity (no metadata devices) +# RAID4 - 4 data drives, 1 parity (with metadata devices) # Chunk size of 1MiB, force RAID initialization, # min recovery rate at 20 kiB/sec/disk 0 1960893648 raid \ - raid4 4 2048 min_recovery_rate 20 sync\ - 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81 + raid4 4 2048 sync min_recovery_rate 20 \ + 5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82 'dmsetup table' displays the table used to construct the mapping. The optional parameters are always printed in the order listed diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 8420129..f75a66e 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -241,12 +241,13 @@ config DM_MIRROR needed for live data migration tools such as 'pvmove'. config DM_RAID - tristate "RAID 4/5/6 target (EXPERIMENTAL)" + tristate "RAID 1/4/5/6 target (EXPERIMENTAL)" depends on BLK_DEV_DM && EXPERIMENTAL + select MD_RAID1 select MD_RAID456 select BLK_DEV_MD ---help--- - A dm target that supports RAID4, RAID5 and RAID6 mappings + A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings A RAID-5 set of N drives with a capacity of C MB per drive provides the capacity of C * (N - 1) MB, and protects against a failure diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 88143a0..6987380 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -16,12 +16,10 @@ #define DM_MSG_PREFIX "raid" /* - * If the MD doesn't support MD_SYNC_STATE_FORCED yet, then - * make it so the flag doesn't set anything. + * The following flags are used by dm-raid.c to set up the array state. + * They must be cleared before md_run is called. */ -#ifndef MD_SYNC_STATE_FORCED -#define MD_SYNC_STATE_FORCED 0 -#endif +#define FirstUse 10 /* rdev flag */ struct raid_dev { /* @@ -149,9 +147,16 @@ static void context_free(struct raid_set *rs) { int i; - for (i = 0; i < rs->md.raid_disks; i++) + for (i = 0; i < rs->md.raid_disks; i++) { + if (rs->dev[i].meta_dev) + dm_put_device(rs->ti, rs->dev[i].meta_dev); + if (rs->dev[i].rdev.sb_page) + put_page(rs->dev[i].rdev.sb_page); + rs->dev[i].rdev.sb_page = NULL; + rs->dev[i].rdev.sb_loaded = 0; if (rs->dev[i].data_dev) dm_put_device(rs->ti, rs->dev[i].data_dev); + } kfree(rs); } @@ -161,7 +166,16 @@ static void context_free(struct raid_set *rs) * : meta device name or '-' if missing * : data device name or '-' if missing * - * This code parses those words. + * The following are permitted: + * - - + * - + * + * + * The following is not allowed: + * - + * + * This code parses those words. If there is a failure, + * the caller must use context_free to unwind the operations. */ static int dev_parms(struct raid_set *rs, char **argv) { @@ -184,8 +198,16 @@ static int dev_parms(struct raid_set *rs, char **argv) rs->dev[i].rdev.mddev = &rs->md; if (strcmp(argv[0], "-")) { - rs->ti->error = "Metadata devices not supported"; - return -EINVAL; + ret = dm_get_device(rs->ti, argv[0], + dm_table_get_mode(rs->ti->table), + &rs->dev[i].meta_dev); + rs->ti->error = "RAID metadata device lookup failure"; + if (ret) + return ret; + + rs->dev[i].rdev.sb_page = alloc_page(GFP_KERNEL); + if (!rs->dev[i].rdev.sb_page) + return -ENOMEM; } if (!strcmp(argv[1], "-")) { @@ -195,6 +217,10 @@ static int dev_parms(struct raid_set *rs, char **argv) return -EINVAL; } + rs->ti->error = "No data device supplied with metadata device"; + if (rs->dev[i].meta_dev) + return -EINVAL; + continue; } @@ -206,6 +232,10 @@ static int dev_parms(struct raid_set *rs, char **argv) return ret; } + if (rs->dev[i].meta_dev) { + metadata_available = 1; + rs->dev[i].rdev.meta_bdev = rs->dev[i].meta_dev->bdev; + } rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev; list_add(&rs->dev[i].rdev.same_set, &rs->md.disks); if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) @@ -334,22 +364,39 @@ static int parse_raid_params(struct raid_set *rs, char **argv, num_raid_params--; /* - * Second, parse the unordered optional arguments + * We set each individual device as In_sync with a completed + * 'recovery_offset'. If there has been a device failure or + * replacement then one of the following cases applies: + * + * 1) User specifies 'rebuild'. + * - Device is reset when param is read. + * 2) A new device is supplied. + * - No matching superblock found, resets device. + * 3) Device failure was transient and returns on reload. + * - Failure noticed, resets device for bitmap replay. + * 4) Device hadn't completed recovery after previous failure. + * - Superblock is read and overrides recovery_offset. + * + * What is found in the superblocks of the devices is always + * authoritative, unless 'rebuild' or '[no]sync' was specified. */ - for (i = 0; i < rs->md.raid_disks; i++) + for (i = 0; i < rs->md.raid_disks; i++) { set_bit(In_sync, &rs->dev[i].rdev.flags); + rs->dev[i].rdev.recovery_offset = MaxSector; + } + /* + * Second, parse the unordered optional arguments + */ for (i = 0; i < num_raid_params; i++) { if (!strcasecmp(argv[i], "nosync")) { rs->md.recovery_cp = MaxSector; rs->print_flags |= DMPF_NOSYNC; - rs->md.flags |= MD_SYNC_STATE_FORCED; continue; } if (!strcasecmp(argv[i], "sync")) { rs->md.recovery_cp = 0; rs->print_flags |= DMPF_SYNC; - rs->md.flags |= MD_SYNC_STATE_FORCED; continue; } @@ -482,13 +529,344 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits) } /* + * This structure is never routinely used by userspace, unlike md superblocks. + * Devices with this superblock should only ever be accessed via device-mapper. + */ +#define DM_RAID_MAGIC 0x64526D44 +struct dm_raid_superblock { + __le32 magic; /* "DmRd" */ + __le32 features; /* Used to indicate possible future changes */ + + __le32 num_devices; /* Number of devices in this array. (Max 64) */ + __le32 array_position; /* The position of this drive in the array */ + + __le64 events; /* Incremented by md when superblock updated */ + __le64 failed_devices; /* Bit field of devices to indicate failures */ + + /* + * This offset tracks the progress of the repair or replacement of + * an individual drive. + */ + __le64 disk_recovery_offset; + + /* + * This offset tracks the progress of the initial array + * synchronisation/parity calculation. + */ + __le64 array_resync_offset; + + /* + * RAID characteristics + */ + __le32 level; + __le32 layout; + __le32 stripe_sectors; + + __u8 pad[452]; /* Round struct to 512 bytes. */ + /* Always set to 0 when writing. */ +} __packed; + +static int read_disk_sb(mdk_rdev_t *rdev, int size) +{ + BUG_ON(!rdev->sb_page); + + if (rdev->sb_loaded) + return 0; + + if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) { + DMERR("Failed to read device superblock"); + return -EINVAL; + } + + rdev->sb_loaded = 1; + + return 0; +} + +static void super_sync(mddev_t *mddev, mdk_rdev_t *rdev) +{ + mdk_rdev_t *r, *t; + uint64_t failed_devices; + struct dm_raid_superblock *sb; + + sb = page_address(rdev->sb_page); + failed_devices = le64_to_cpu(sb->failed_devices); + + rdev_for_each(r, t, mddev) + if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags)) + failed_devices |= (1ULL << r->raid_disk); + + memset(sb, 0, sizeof(*sb)); + + sb->magic = cpu_to_le32(DM_RAID_MAGIC); + sb->features = cpu_to_le32(0); /* No features yet */ + + sb->num_devices = cpu_to_le32(mddev->raid_disks); + sb->array_position = cpu_to_le32(rdev->raid_disk); + + sb->events = cpu_to_le64(mddev->events); + sb->failed_devices = cpu_to_le64(failed_devices); + + sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset); + sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp); + + sb->level = cpu_to_le32(mddev->level); + sb->layout = cpu_to_le32(mddev->layout); + sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors); +} + +/* + * super_load + * + * This function creates a superblock if one is not found on the device + * and will decide which superblock to use if there's a choice. + * + * Return: 1 if use rdev, 0 if use refdev, -Exxx otherwise + */ +static int super_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev) +{ + int ret; + struct dm_raid_superblock *sb; + struct dm_raid_superblock *refsb; + uint64_t events_sb, events_refsb; + + rdev->sb_start = 0; + rdev->sb_size = sizeof(*sb); + + ret = read_disk_sb(rdev, rdev->sb_size); + if (ret) + return ret; + + sb = page_address(rdev->sb_page); + if (sb->magic != cpu_to_le32(DM_RAID_MAGIC)) { + super_sync(rdev->mddev, rdev); + + set_bit(FirstUse, &rdev->flags); + + /* Force writing of superblocks to disk */ + set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags); + + /* Any superblock is better than none, choose that if given */ + return refdev ? 0 : 1; + } + + if (!refdev) + return 1; + + events_sb = le64_to_cpu(sb->events); + + refsb = page_address(refdev->sb_page); + events_refsb = le64_to_cpu(refsb->events); + + return (events_sb > events_refsb) ? 1 : 0; +} + +static int super_init_validation(mddev_t *mddev, mdk_rdev_t *rdev) +{ + int role; + struct raid_set *rs = container_of(mddev, struct raid_set, md); + uint64_t events_sb; + uint64_t failed_devices; + struct dm_raid_superblock *sb; + uint32_t new_devs = 0; + uint32_t rebuilds = 0; + mdk_rdev_t *r, *t; + struct dm_raid_superblock *sb2; + + sb = page_address(rdev->sb_page); + events_sb = le64_to_cpu(sb->events); + failed_devices = le64_to_cpu(sb->failed_devices); + + /* + * Initialise to 1 if this is a new superblock. + */ + mddev->events = events_sb ? : 1; + + /* + * Reshaping is not currently allowed + */ + if ((le32_to_cpu(sb->level) != mddev->level) || + (le32_to_cpu(sb->layout) != mddev->layout) || + (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors)) { + DMERR("Reshaping arrays not yet supported."); + return -EINVAL; + } + + /* We can only change the number of devices in RAID1 right now */ + if ((rs->raid_type->level != 1) && + (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) { + DMERR("Reshaping arrays not yet supported."); + return -EINVAL; + } + + if (!(rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC))) + mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset); + + /* + * During load, we set FirstUse if a new superblock was written. + * There are two reasons we might not have a superblock: + * 1) The array is brand new - in which case, all of the + * devices must have their In_sync bit set. Also, + * recovery_cp must be 0, unless forced. + * 2) This is a new device being added to an old array + * and the new device needs to be rebuilt - in which + * case the In_sync bit will /not/ be set and + * recovery_cp must be MaxSector. + */ + rdev_for_each(r, t, mddev) { + if (!test_bit(In_sync, &r->flags)) { + if (!test_bit(FirstUse, &r->flags)) + DMERR("Superblock area of " + "rebuild device %d should have been " + "cleared.", r->raid_disk); + set_bit(FirstUse, &r->flags); + rebuilds++; + } else if (test_bit(FirstUse, &r->flags)) + new_devs++; + } + + if (!rebuilds) { + if (new_devs == mddev->raid_disks) { + DMINFO("Superblocks created for new array"); + set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); + } else if (new_devs) { + DMERR("New device injected " + "into existing array without 'rebuild' " + "parameter specified"); + return -EINVAL; + } + } else if (new_devs) { + DMERR("'rebuild' devices cannot be " + "injected into an array with other first-time devices"); + return -EINVAL; + } else if (mddev->recovery_cp != MaxSector) { + DMERR("'rebuild' specified while array is not in-sync"); + return -EINVAL; + } + + /* + * Now we set the Faulty bit for those devices that are + * recorded in the superblock as failed. + */ + rdev_for_each(r, t, mddev) { + if (!r->sb_page) + continue; + sb2 = page_address(r->sb_page); + sb2->failed_devices = 0; + + /* + * Check for any device re-ordering. + */ + if (!test_bit(FirstUse, &r->flags) && (r->raid_disk >= 0)) { + role = le32_to_cpu(sb2->array_position); + if (role != r->raid_disk) { + if (rs->raid_type->level != 1) { + rs->ti->error = "Cannot change device " + "positions in RAID array"; + return -EINVAL; + } + DMINFO("RAID1 device #%d now at position #%d", + role, r->raid_disk); + } + + /* + * Partial recovery is performed on + * returning failed devices. + */ + if (failed_devices & (1 << role)) + set_bit(Faulty, &r->flags); + } + } + + return 0; +} + +static int super_validate(mddev_t *mddev, mdk_rdev_t *rdev) +{ + struct dm_raid_superblock *sb = page_address(rdev->sb_page); + + /* + * If mddev->events is not set, we know we have not yet initialized + * the array. + */ + if (!mddev->events && super_init_validation(mddev, rdev)) + return -EINVAL; + + mddev->bitmap_info.offset = 4096 >> 9; /* Enable bitmap creation */ + rdev->mddev->bitmap_info.default_offset = 4096 >> 9; + if (!test_bit(FirstUse, &rdev->flags)) { + rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset); + if (rdev->recovery_offset != MaxSector) + clear_bit(In_sync, &rdev->flags); + } + + /* + * If a device comes back, set it as not In_sync and no longer faulty. + */ + if (test_bit(Faulty, &rdev->flags)) { + clear_bit(Faulty, &rdev->flags); + clear_bit(In_sync, &rdev->flags); + rdev->saved_raid_disk = rdev->raid_disk; + rdev->recovery_offset = 0; + } + + clear_bit(FirstUse, &rdev->flags); + + return 0; +} + +/* + * Analyse superblocks and select the freshest. + */ +static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) +{ + int ret; + mdk_rdev_t *rdev, *freshest, *tmp; + mddev_t *mddev = &rs->md; + + freshest = NULL; + rdev_for_each(rdev, tmp, mddev) { + if (!rdev->meta_bdev) + continue; + + ret = super_load(rdev, freshest); + + switch (ret) { + case 1: + freshest = rdev; + break; + case 0: + break; + default: + ti->error = "Failed to load superblock"; + return ret; + } + } + + if (!freshest) + return 0; + + /* + * Validation of the freshest device provides the source of + * validation for the remaining devices. + */ + ti->error = "Unable to assemble array: Invalid superblocks"; + if (super_validate(mddev, freshest)) + return -EINVAL; + + rdev_for_each(rdev, tmp, mddev) + if ((rdev != freshest) && super_validate(mddev, rdev)) + return -EINVAL; + + return 0; +} + +/* * Construct a RAID4/5/6 mapping: * Args: * <#raid_params> \ * <#raid_devs> { .. } * - * ** metadata devices are not supported yet, use '-' instead ** - * * varies by . See 'parse_raid_params' for * details on possible . */ @@ -556,6 +934,11 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) if (ret) goto bad; + rs->md.sync_super = super_sync; + ret = analyse_superblocks(ti, rs); + if (ret) + goto bad; + INIT_WORK(&rs->md.event_work, do_table_event); ti->private = rs; @@ -698,7 +1081,10 @@ static int raid_status(struct dm_target *ti, status_type_t type, DMEMIT(" %d", rs->md.raid_disks); for (i = 0; i < rs->md.raid_disks; i++) { - DMEMIT(" -"); /* metadata device */ + if (rs->dev[i].meta_dev) + DMEMIT(" %s", rs->dev[i].meta_dev->name); + else + DMEMIT(" -"); if (rs->dev[i].data_dev) DMEMIT(" %s", rs->dev[i].data_dev->name); @@ -755,6 +1141,7 @@ static void raid_resume(struct dm_target *ti) { struct raid_set *rs = ti->private; + bitmap_load(&rs->md); mddev_resume(&rs->md); } -- cgit v1.1 From 327372797c88b24953f454cd51a3734c02697bdd Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Tue, 2 Aug 2011 12:32:07 +0100 Subject: dm raid: add md raid1 support Support the MD RAID1 personality through dm-raid. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid.c | 49 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 6987380..a002dd8 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -8,6 +8,7 @@ #include #include "md.h" +#include "raid1.h" #include "raid5.h" #include "bitmap.h" @@ -72,6 +73,7 @@ static struct raid_type { const unsigned level; /* RAID level. */ const unsigned algorithm; /* RAID algorithm. */ } raid_types[] = { + {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, @@ -105,7 +107,8 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra } sectors_per_dev = ti->len; - if (sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) { + if ((raid_type->level > 1) && + sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) { ti->error = "Target length not divisible by number of data devices"; return ERR_PTR(-EINVAL); } @@ -329,13 +332,16 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) /* * Possible arguments are... - * RAID456: * [optional_args] * - * Optional args: - * [[no]sync] Force or prevent recovery of the entire array + * Argument definitions + * The number of sectors per disk that + * will form the "stripe" + * [[no]sync] Force or prevent recovery of the + * entire array * [rebuild ] Rebuild the drive indicated by the index - * [daemon_sleep ] Time between bitmap daemon work to clear bits + * [daemon_sleep ] Time between bitmap daemon work to + * clear bits * [min_recovery_rate ] Throttle RAID initialization * [max_recovery_rate ] Throttle RAID initialization * [write_mostly ] Indicate a write mostly drive via index @@ -352,11 +358,21 @@ static int parse_raid_params(struct raid_set *rs, char **argv, /* * First, parse the in-order required arguments + * "chunk_size" is the only argument of this type. */ - if ((strict_strtoul(argv[0], 10, &value) < 0) || - !is_power_of_2(value) || (value < 8)) { + if ((strict_strtoul(argv[0], 10, &value) < 0)) { rs->ti->error = "Bad chunk size"; return -EINVAL; + } else if (rs->raid_type->level == 1) { + if (value) + DMERR("Ignoring chunk size parameter for RAID 1"); + value = 0; + } else if (!is_power_of_2(value)) { + rs->ti->error = "Chunk size must be a power of 2"; + return -EINVAL; + } else if (value < 8) { + rs->ti->error = "Chunk size value is too small"; + return -EINVAL; } rs->md.new_chunk_sectors = rs->md.chunk_sectors = value; @@ -413,8 +429,12 @@ static int parse_raid_params(struct raid_set *rs, char **argv, } if (!strcasecmp(key, "rebuild")) { - if (++rebuild_cnt > rs->raid_type->parity_devs) { - rs->ti->error = "Too many rebuild drives given"; + rebuild_cnt++; + if (((rs->raid_type->level != 1) && + (rebuild_cnt > rs->raid_type->parity_devs)) || + ((rs->raid_type->level == 1) && + (rebuild_cnt > (rs->md.raid_disks - 1)))) { + rs->ti->error = "Too many rebuild devices specified for given RAID type"; return -EINVAL; } if (value > rs->md.raid_disks) { @@ -507,6 +527,11 @@ static int parse_raid_params(struct raid_set *rs, char **argv, else rs->ti->split_io = region_size; + if (rs->md.chunk_sectors) + rs->ti->split_io = rs->md.chunk_sectors; + else + rs->ti->split_io = region_size; + /* Assume there are no metadata devices until the drives are parsed */ rs->md.persistent = 0; rs->md.external = 1; @@ -525,6 +550,9 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits) { struct raid_set *rs = container_of(cb, struct raid_set, callbacks); + if (rs->raid_type->level == 1) + return md_raid1_congested(&rs->md, bits); + return md_raid5_congested(&rs->md, bits); } @@ -955,6 +983,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) rs->callbacks.congested_fn = raid_is_congested; dm_table_add_target_callbacks(ti->table, &rs->callbacks); + mddev_suspend(&rs->md); return 0; bad: @@ -1147,7 +1176,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, -- cgit v1.1 From 772ae5f54d69c38a5e3c4352c5fdbdaff141af21 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Tue, 2 Aug 2011 12:32:08 +0100 Subject: dm crypt: optionally support discard requests Add optional parameter field to dmcrypt table and support "allow_discards" option. Discard requests bypass crypt queue processing. Bio is simple remapped to underlying device. Note that discard will be never enabled by default because of security consequences. It is up to the administrator to enable it for encrypted devices. (Note that userspace cryptsetup does not understand new optional parameters yet. Support for this will come later. Until then, you should use 'dmsetup' to enable and disable this.) Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- Documentation/device-mapper/dm-crypt.txt | 21 +++++++++++++- drivers/md/dm-crypt.c | 49 +++++++++++++++++++++++++++++--- 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt index 6b5c42d..2c656ae 100644 --- a/Documentation/device-mapper/dm-crypt.txt +++ b/Documentation/device-mapper/dm-crypt.txt @@ -4,7 +4,8 @@ dm-crypt Device-Mapper's "crypt" target provides transparent encryption of block devices using the kernel crypto API. -Parameters: +Parameters: \ + [<#opt_params> ] Encryption cipher and an optional IV generation mode. @@ -37,6 +38,24 @@ Parameters: Starting sector within the device where the encrypted data begins. +<#opt_params> + Number of optional parameters. If there are no optional parameters, + the optional paramaters section can be skipped or #opt_params can be zero. + Otherwise #opt_params is the number of following arguments. + + Example of optional parameters section: + 1 allow_discards + +allow_discards + Block discard requests (a.k.a. TRIM) are passed through the crypt device. + The default is to ignore discard requests. + + WARNING: Assess the specific security risks carefully before enabling this + option. For example, allowing discards on encrypted devices may lead to + the leak of information about the ciphertext device (filesystem type, + used space etc.) if the discarded blocks can be located easily on the + device later. + Example scripts =============== LUKS (Linux Unified Key Setup) is now the preferred way to set up disk diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index b79e747..49da55c 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1574,11 +1574,17 @@ bad_mem: static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct crypt_config *cc; - unsigned int key_size; + unsigned int key_size, opt_params; unsigned long long tmpll; int ret; + struct dm_arg_set as; + const char *opt_string; + + static struct dm_arg _args[] = { + {0, 1, "Invalid number of feature args"}, + }; - if (argc != 5) { + if (argc < 5) { ti->error = "Not enough arguments"; return -EINVAL; } @@ -1647,6 +1653,30 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } cc->start = tmpll; + argv += 5; + argc -= 5; + + /* Optional parameters */ + if (argc) { + as.argc = argc; + as.argv = argv; + + ret = dm_read_arg_group(_args, &as, &opt_params, &ti->error); + if (ret) + goto bad; + + opt_string = dm_shift_arg(&as); + + if (opt_params == 1 && opt_string && + !strcasecmp(opt_string, "allow_discards")) + ti->num_discard_requests = 1; + else if (opt_params) { + ret = -EINVAL; + ti->error = "Invalid feature arguments"; + goto bad; + } + } + ret = -ENOMEM; cc->io_queue = alloc_workqueue("kcryptd_io", WQ_NON_REENTRANT| @@ -1681,9 +1711,16 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, struct dm_crypt_io *io; struct crypt_config *cc; - if (bio->bi_rw & REQ_FLUSH) { + /* + * If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues. + * - for REQ_FLUSH device-mapper core ensures that no IO is in-flight + * - for REQ_DISCARD caller must use flush if IO ordering matters + */ + if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) { cc = ti->private; bio->bi_bdev = cc->dev->bdev; + if (bio_sectors(bio)) + bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector); return DM_MAPIO_REMAPPED; } @@ -1726,6 +1763,10 @@ static int crypt_status(struct dm_target *ti, status_type_t type, DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, cc->dev->name, (unsigned long long)cc->start); + + if (ti->num_discard_requests) + DMEMIT(" 1 allow_discards"); + break; } return 0; @@ -1822,7 +1863,7 @@ static int crypt_iterate_devices(struct dm_target *ti, static struct target_type crypt_target = { .name = "crypt", - .version = {1, 10, 0}, + .version = {1, 11, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, -- cgit v1.1 From ed8b752bccf2560e305e25125721d2f0ac759e88 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 2 Aug 2011 12:32:08 +0100 Subject: dm table: set flush capability based on underlying devices DM has always advertised both REQ_FLUSH and REQ_FUA flush capabilities regardless of whether or not a given DM device's underlying devices also advertised a need for them. Block's flush-merge changes from 2.6.39 have proven to be more costly for DM devices. Performance regressions have been reported even when DM's underlying devices do not advertise that they have a write cache. Fix the performance regressions by configuring a DM device's flushing capabilities based on those of the underlying devices' capabilities. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 43 +++++++++++++++++++++++++++++++++++++++++++ drivers/md/dm.c | 1 - 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 259ce99..986b875 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1248,9 +1248,45 @@ static void dm_table_set_integrity(struct dm_table *t) blk_get_integrity(template_disk)); } +static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + unsigned flush = (*(unsigned *)data); + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && (q->flush_flags & flush); +} + +static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) +{ + struct dm_target *ti; + unsigned i = 0; + + /* + * Require at least one underlying device to support flushes. + * t->devices includes internal dm devices such as mirror logs + * so we need to use iterate_devices here, which targets + * supporting flushes must provide. + */ + while (i < dm_table_get_num_targets(t)) { + ti = dm_table_get_target(t, i++); + + if (!ti->num_flush_requests) + continue; + + if (ti->type->iterate_devices && + ti->type->iterate_devices(ti, device_flush_capable, &flush)) + return 1; + } + + return 0; +} + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { + unsigned flush = 0; + /* * Copy table's limits to the DM device's request_queue */ @@ -1261,6 +1297,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + if (dm_table_supports_flush(t, REQ_FLUSH)) { + flush |= REQ_FLUSH; + if (dm_table_supports_flush(t, REQ_FUA)) + flush |= REQ_FUA; + } + blk_queue_flush(q, flush); + dm_table_set_integrity(t); /* diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1000eaf..52b39f3 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1808,7 +1808,6 @@ static void dm_init_md_queue(struct mapped_device *md) blk_queue_make_request(md->queue, dm_request); blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); blk_queue_merge_bvec(md->queue, dm_merge_bvec); - blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA); } /* -- cgit v1.1 From 50658b6602dad6dc76c226917f5d8cec0f680eab Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 2 Aug 2011 11:29:24 +0200 Subject: spi/pl022: remove function cannot exit The remove function in the PL022 driver cannot abort the remove function any way, so restructure the code so as not to make that assumption. Remove will now proceed no matter whether it can stop the transfer queue or not. Reported-by: Russell King Signed-off-by: Linus Walleij Signed-off-by: Grant Likely --- drivers/spi/spi-pl022.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c index eba88c7..730b4a3 100644 --- a/drivers/spi/spi-pl022.c +++ b/drivers/spi/spi-pl022.c @@ -2267,17 +2267,13 @@ static int __devexit pl022_remove(struct amba_device *adev) { struct pl022 *pl022 = amba_get_drvdata(adev); - int status = 0; + if (!pl022) return 0; /* Remove the queue */ - status = destroy_queue(pl022); - if (status != 0) { - dev_err(&adev->dev, - "queue remove failed (%d)\n", status); - return status; - } + if (destroy_queue(pl022) != 0) + dev_err(&adev->dev, "queue remove failed\n"); load_ssp_default_config(pl022); pl022_dma_remove(pl022); free_irq(adev->irq[0], pl022); @@ -2289,7 +2285,6 @@ pl022_remove(struct amba_device *adev) spi_unregister_master(pl022->master); spi_master_put(pl022->master); amba_set_drvdata(adev, NULL); - dev_dbg(&adev->dev, "remove succeeded\n"); return 0; } -- cgit v1.1 From aac285c6cb9622f1cc05ed162604bf5ad2da9a8d Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Tue, 2 Aug 2011 15:45:07 +0100 Subject: of: constify property name parameters for helper functions The helper functions for reading u32 integers, u32 arrays and strings should have the property name as a const pointer. Cc: Grant Likely Signed-off-by: Jamie Iles Signed-off-by: Grant Likely --- drivers/of/base.c | 7 ++++--- include/linux/of.h | 15 +++++++++------ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index 02ed367..3ff22e3 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -610,8 +610,9 @@ EXPORT_SYMBOL(of_find_node_by_phandle); * * The out_value is modified only if a valid u32 value can be decoded. */ -int of_property_read_u32_array(const struct device_node *np, char *propname, - u32 *out_values, size_t sz) +int of_property_read_u32_array(const struct device_node *np, + const char *propname, u32 *out_values, + size_t sz) { struct property *prop = of_find_property(np, propname, NULL); const __be32 *val; @@ -645,7 +646,7 @@ EXPORT_SYMBOL_GPL(of_property_read_u32_array); * * The out_string pointer is modified only if a valid string can be decoded. */ -int of_property_read_string(struct device_node *np, char *propname, +int of_property_read_string(struct device_node *np, const char *propname, const char **out_string) { struct property *prop = of_find_property(np, propname, NULL); diff --git a/include/linux/of.h b/include/linux/of.h index bd716f8..0085bb0 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -196,12 +196,13 @@ extern struct property *of_find_property(const struct device_node *np, const char *name, int *lenp); extern int of_property_read_u32_array(const struct device_node *np, - char *propname, + const char *propname, u32 *out_values, size_t sz); -extern int of_property_read_string(struct device_node *np, char *propname, - const char **out_string); +extern int of_property_read_string(struct device_node *np, + const char *propname, + const char **out_string); extern int of_device_is_compatible(const struct device_node *device, const char *); extern int of_device_is_available(const struct device_node *device); @@ -242,13 +243,15 @@ static inline bool of_have_populated_dt(void) } static inline int of_property_read_u32_array(const struct device_node *np, - char *propname, u32 *out_values, size_t sz) + const char *propname, + u32 *out_values, size_t sz) { return -ENOSYS; } static inline int of_property_read_string(struct device_node *np, - char *propname, const char **out_string) + const char *propname, + const char **out_string) { return -ENOSYS; } @@ -256,7 +259,7 @@ static inline int of_property_read_string(struct device_node *np, #endif /* CONFIG_OF */ static inline int of_property_read_u32(const struct device_node *np, - char *propname, + const char *propname, u32 *out_value) { return of_property_read_u32_array(np, propname, out_value, 1); -- cgit v1.1 From d945fa0da7db9ace61fbb2140cfdb1e2d2b24e7e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 2 Aug 2011 14:13:26 +0900 Subject: MAINTAINERS: Add keyword match for of_match_table to device tree section If a patch is working with an of_match_table it's probably adding an OF binding to a driver in which case the binding ought to be reviewed by the device tree folks to make sure that things like the device matches are set up correctly. Signed-off-by: Mark Brown Signed-off-by: Grant Likely --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c9c6324..0d2fcda 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4722,6 +4722,7 @@ S: Maintained F: drivers/of F: include/linux/of*.h K: of_get_property +K: of_match_table OPENRISC ARCHITECTURE M: Jonas Bonn -- cgit v1.1 From cc1a93e68f6c0d736b771f0746e8e4186f483fdc Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 1 Aug 2011 12:46:57 -0700 Subject: iwlagn: sysfs couldn't find the priv pointer This bug has been introduced by: d593411084a56124aa9d80aafa15db8463b2d8f7 Author: Emmanuel Grumbach Date: Mon Jul 11 10:48:51 2011 +0300 iwlagn: simplify the bus architecture Revert part of the buggy patch: dev_get_drvdata will now return iwl_priv as it did before the patch. Signed-off-by: Emmanuel Grumbach Tested-by: Daniel Halperin Signed-off-by: Wey-Yi Guy Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-pci.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-pci.c b/drivers/net/wireless/iwlwifi/iwl-pci.c index fb7e436..69d4ec4 100644 --- a/drivers/net/wireless/iwlwifi/iwl-pci.c +++ b/drivers/net/wireless/iwlwifi/iwl-pci.c @@ -134,6 +134,7 @@ static void iwl_pci_apm_config(struct iwl_bus *bus) static void iwl_pci_set_drv_data(struct iwl_bus *bus, void *drv_data) { bus->drv_data = drv_data; + pci_set_drvdata(IWL_BUS_GET_PCI_DEV(bus), drv_data); } static void iwl_pci_get_hw_id(struct iwl_bus *bus, char buf[], @@ -454,8 +455,6 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_write_config_word(pdev, PCI_COMMAND, pci_cmd); } - pci_set_drvdata(pdev, bus); - bus->dev = &pdev->dev; bus->irq = pdev->irq; bus->ops = &pci_ops; @@ -494,11 +493,12 @@ static void iwl_pci_down(struct iwl_bus *bus) static void __devexit iwl_pci_remove(struct pci_dev *pdev) { - struct iwl_bus *bus = pci_get_drvdata(pdev); + struct iwl_priv *priv = pci_get_drvdata(pdev); + void *bus_specific = priv->bus->bus_specific; - iwl_remove(bus->drv_data); + iwl_remove(priv); - iwl_pci_down(bus); + iwl_pci_down(bus_specific); } #ifdef CONFIG_PM @@ -506,20 +506,20 @@ static void __devexit iwl_pci_remove(struct pci_dev *pdev) static int iwl_pci_suspend(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); - struct iwl_bus *bus = pci_get_drvdata(pdev); + struct iwl_priv *priv = pci_get_drvdata(pdev); /* Before you put code here, think about WoWLAN. You cannot check here * whether WoWLAN is enabled or not, and your code will run even if * WoWLAN is enabled - don't kill the NIC, someone may need it in Sx. */ - return iwl_suspend(bus->drv_data); + return iwl_suspend(priv); } static int iwl_pci_resume(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); - struct iwl_bus *bus = pci_get_drvdata(pdev); + struct iwl_priv *priv = pci_get_drvdata(pdev); /* Before you put code here, think about WoWLAN. You cannot check here * whether WoWLAN is enabled or not, and your code will run even if @@ -532,7 +532,7 @@ static int iwl_pci_resume(struct device *device) */ pci_write_config_byte(pdev, PCI_CFG_RETRY_TIMEOUT, 0x00); - return iwl_resume(bus->drv_data); + return iwl_resume(priv); } static SIMPLE_DEV_PM_OPS(iwl_dev_pm_ops, iwl_pci_suspend, iwl_pci_resume); -- cgit v1.1 From 449f94eadc91d69fa044fa92ef45c02e86559e13 Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Tue, 2 Aug 2011 11:43:14 +0200 Subject: rt2x00: Fix compilation without CONFIG_RT2X00_LIB_CRYPTO This was introduced by commit 77b5621bac4a56b83b9081f48d4e7d1accdde400 (rt2x00: Don't use queue entry as parameter when creating TX descriptor.) Signed-off-by: Helmut Schaa Acked-by: Gertjan van Wingerde Acked-by: Ivo van Doorn Reported-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00lib.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00lib.h b/drivers/net/wireless/rt2x00/rt2x00lib.h index 15cdc7e..4cdf247 100644 --- a/drivers/net/wireless/rt2x00/rt2x00lib.h +++ b/drivers/net/wireless/rt2x00/rt2x00lib.h @@ -355,7 +355,8 @@ static inline enum cipher rt2x00crypto_key_to_cipher(struct ieee80211_key_conf * return CIPHER_NONE; } -static inline void rt2x00crypto_create_tx_descriptor(struct queue_entry *entry, +static inline void rt2x00crypto_create_tx_descriptor(struct rt2x00_dev *rt2x00dev, + struct sk_buff *skb, struct txentry_desc *txdesc) { } -- cgit v1.1 From 00898a47269ae5e6dda04defad00234b96692d95 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 2 Aug 2011 13:29:02 +0200 Subject: rt2x00: fix usage of NULL queue We may call rt2x00queue_pause_queue(queue) with queue == NULL. Bug was introduced by commit 62fe778412b36791b7897cfa139342906fbbf07b "rt2x00: Fix stuck queue in tx failure case" . Cc: stable@kernel.org # 3.0+ Signed-off-by: Stanislaw Gruszka Acked-by: Ivo van Doorn Acked-by: Gertjan van Wingerde Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00mac.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index 8efab39..4ccf238 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -113,7 +113,7 @@ void rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb) * due to possible race conditions in mac80211. */ if (!test_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags)) - goto exit_fail; + goto exit_free_skb; /* * Use the ATIM queue if appropriate and present. @@ -127,7 +127,7 @@ void rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb) ERROR(rt2x00dev, "Attempt to send packet over invalid queue %d.\n" "Please file bug report to %s.\n", qid, DRV_PROJECT); - goto exit_fail; + goto exit_free_skb; } /* @@ -159,6 +159,7 @@ void rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb) exit_fail: rt2x00queue_pause_queue(queue); + exit_free_skb: dev_kfree_skb_any(skb); } EXPORT_SYMBOL_GPL(rt2x00mac_tx); -- cgit v1.1 From f35291082294ca6737953bbe4e9491ede04ab822 Mon Sep 17 00:00:00 2001 From: Wey-Yi Guy Date: Tue, 2 Aug 2011 09:08:37 -0700 Subject: iwlagn: 5000 do not support idle mode 5000 series has issue supporting power save idle mode: commit 9dc2153315650eae220898668b6aa56a25c130be iwlwifi: always support idle mode for agn devices For agn devices, always support idle mode which help power consumption in idle unassociated state. the above changes cause 5000 become not stable when power management is "on" http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2312 Cc: stable@kernel.org #2.6.39, #3.0.0 Reported-by: Devin J Pohly Signed-off-by: Wey-Yi Guy Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-5000.c | 1 + drivers/net/wireless/iwlwifi/iwl-core.h | 2 ++ drivers/net/wireless/iwlwifi/iwl-power.c | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c index 3eeb12e..c95cefd 100644 --- a/drivers/net/wireless/iwlwifi/iwl-5000.c +++ b/drivers/net/wireless/iwlwifi/iwl-5000.c @@ -365,6 +365,7 @@ static struct iwl_base_params iwl5000_base_params = { .chain_noise_scale = 1000, .wd_timeout = IWL_LONG_WD_TIMEOUT, .max_event_log_size = 512, + .no_idle_support = true, }; static struct iwl_ht_params iwl5000_ht_params = { .ht_greenfield_support = true, diff --git a/drivers/net/wireless/iwlwifi/iwl-core.h b/drivers/net/wireless/iwlwifi/iwl-core.h index 3e6bb734..02817a4 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.h +++ b/drivers/net/wireless/iwlwifi/iwl-core.h @@ -135,6 +135,7 @@ struct iwl_mod_params { * @temperature_kelvin: temperature report by uCode in kelvin * @max_event_log_size: size of event log buffer size for ucode event logging * @shadow_reg_enable: HW shadhow register bit + * @no_idle_support: do not support idle mode */ struct iwl_base_params { int eeprom_size; @@ -156,6 +157,7 @@ struct iwl_base_params { bool temperature_kelvin; u32 max_event_log_size; const bool shadow_reg_enable; + const bool no_idle_support; }; /* * @advanced_bt_coexist: support advanced bt coexist diff --git a/drivers/net/wireless/iwlwifi/iwl-power.c b/drivers/net/wireless/iwlwifi/iwl-power.c index 3ec619c..cd64df0 100644 --- a/drivers/net/wireless/iwlwifi/iwl-power.c +++ b/drivers/net/wireless/iwlwifi/iwl-power.c @@ -349,7 +349,8 @@ static void iwl_power_build_cmd(struct iwl_priv *priv, if (priv->wowlan) iwl_static_sleep_cmd(priv, cmd, IWL_POWER_INDEX_5, dtimper); - else if (priv->hw->conf.flags & IEEE80211_CONF_IDLE) + else if (!priv->cfg->base_params->no_idle_support && + priv->hw->conf.flags & IEEE80211_CONF_IDLE) iwl_static_sleep_cmd(priv, cmd, IWL_POWER_INDEX_5, 20); else if (iwl_tt_is_low_power_state(priv)) { /* in thermal throttling low power state */ -- cgit v1.1 From ab92402af04c151c26541eb28e1c26286a429db5 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 28 Jul 2011 13:48:40 -0700 Subject: thermal: hide CONFIG_THERMAL_HWMON It's about time to revert 16d752397301b9 ("thermal: Create CONFIG_THERMAL_HWMON=n"). Anybody running a kernel >= 2.6.40 would also be running a recent enough version of lm-sensors. Actually having CONFIG_THERMAL_HWMON is pretty convenient so instead of dropping it, we keep it but hide it. Signed-off-by: Jean Delvare Cc: Rene Herman Acked-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- Documentation/feature-removal-schedule.txt | 9 --------- drivers/thermal/Kconfig | 8 ++------ 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index b1c921c..951143d 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -310,15 +310,6 @@ Who: Ravikiran Thirumalai --------------------------- -What: CONFIG_THERMAL_HWMON -When: January 2009 -Why: This option was introduced just to allow older lm-sensors userspace - to keep working over the upgrade to 2.6.26. At the scheduled time of - removal fixed lm-sensors (2.x or 3.x) should be readily available. -Who: Rene Herman - ---------------------------- - What: Code that is now under CONFIG_WIRELESS_EXT_SYSFS (in net/core/net-sysfs.c) When: After the only user (hal) has seen a release with the patches diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index bf7c687..f7f71b2 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -14,11 +14,7 @@ menuconfig THERMAL If you want this support, you should say Y or M here. config THERMAL_HWMON - bool "Hardware monitoring support" + bool depends on THERMAL depends on HWMON=y || HWMON=THERMAL - help - The generic thermal sysfs driver's hardware monitoring support - requires a 2.10.7/3.0.2 or later lm-sensors userspace. - - Say Y if your user-space is new enough. + default y -- cgit v1.1 From 0d97d7a494d43be77f57e688369be0aae33d1ade Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 28 Jul 2011 13:48:41 -0700 Subject: thermal: split hwmon lookup to a separate function We'll soon need to reuse it. Signed-off-by: Jean Delvare Cc: Rene Herman Acked-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- drivers/thermal/thermal_sys.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 0b1c82a..b6353d6 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -469,22 +469,35 @@ temp_crit_show(struct device *dev, struct device_attribute *attr, } -static int -thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) +static struct thermal_hwmon_device * +thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz) { struct thermal_hwmon_device *hwmon; - int new_hwmon_device = 1; - int result; mutex_lock(&thermal_list_lock); list_for_each_entry(hwmon, &thermal_hwmon_list, node) if (!strcmp(hwmon->type, tz->type)) { - new_hwmon_device = 0; mutex_unlock(&thermal_list_lock); - goto register_sys_interface; + return hwmon; } mutex_unlock(&thermal_list_lock); + return NULL; +} + +static int +thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) +{ + struct thermal_hwmon_device *hwmon; + int new_hwmon_device = 1; + int result; + + hwmon = thermal_hwmon_lookup_by_type(tz); + if (hwmon) { + new_hwmon_device = 0; + goto register_sys_interface; + } + hwmon = kzalloc(sizeof(struct thermal_hwmon_device), GFP_KERNEL); if (!hwmon) return -ENOMEM; -- cgit v1.1 From 31f5396ad3bde23c8416e8d23ba425e27f413314 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 28 Jul 2011 13:48:42 -0700 Subject: thermal: make THERMAL_HWMON implementation fully internal THERMAL_HWMON is implemented inside the thermal_sys driver and has no effect on drivers implementing thermal zones, so they shouldn't see anything related to it in . Making the THERMAL_HWMON implementation fully internal has two advantages beyond the cleaner design: * This avoids rebuilding all thermal drivers if the THERMAL_HWMON implementation changes, or if CONFIG_THERMAL_HWMON gets enabled or disabled. * This avoids breaking the thermal kABI in these cases too, which should make distributions happy. The only drawback I can see is slightly higher memory fragmentation, as the number of kzalloc() calls will increase by one per thermal zone. But I doubt it will be a problem in practice, as I've never seen a system with more than two thermal zones. Signed-off-by: Jean Delvare Cc: Rene Herman Acked-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- drivers/thermal/thermal_sys.c | 117 +++++++++++++++++++++++++++++++++--------- include/linux/thermal.h | 22 -------- 2 files changed, 92 insertions(+), 47 deletions(-) diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index b6353d6..708f8e9 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -420,6 +420,29 @@ thermal_cooling_device_trip_point_show(struct device *dev, /* hwmon sys I/F */ #include + +/* thermal zone devices with the same type share one hwmon device */ +struct thermal_hwmon_device { + char type[THERMAL_NAME_LENGTH]; + struct device *device; + int count; + struct list_head tz_list; + struct list_head node; +}; + +struct thermal_hwmon_attr { + struct device_attribute attr; + char name[16]; +}; + +/* one temperature input for each thermal zone */ +struct thermal_hwmon_temp { + struct list_head hwmon_node; + struct thermal_zone_device *tz; + struct thermal_hwmon_attr temp_input; /* hwmon sys attr */ + struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */ +}; + static LIST_HEAD(thermal_hwmon_list); static ssize_t @@ -437,9 +460,10 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) int ret; struct thermal_hwmon_attr *hwmon_attr = container_of(attr, struct thermal_hwmon_attr, attr); - struct thermal_zone_device *tz - = container_of(hwmon_attr, struct thermal_zone_device, + struct thermal_hwmon_temp *temp + = container_of(hwmon_attr, struct thermal_hwmon_temp, temp_input); + struct thermal_zone_device *tz = temp->tz; ret = tz->ops->get_temp(tz, &temperature); @@ -455,9 +479,10 @@ temp_crit_show(struct device *dev, struct device_attribute *attr, { struct thermal_hwmon_attr *hwmon_attr = container_of(attr, struct thermal_hwmon_attr, attr); - struct thermal_zone_device *tz - = container_of(hwmon_attr, struct thermal_zone_device, + struct thermal_hwmon_temp *temp + = container_of(hwmon_attr, struct thermal_hwmon_temp, temp_crit); + struct thermal_zone_device *tz = temp->tz; long temperature; int ret; @@ -485,10 +510,29 @@ thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz) return NULL; } +/* Find the temperature input matching a given thermal zone */ +static struct thermal_hwmon_temp * +thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon, + const struct thermal_zone_device *tz) +{ + struct thermal_hwmon_temp *temp; + + mutex_lock(&thermal_list_lock); + list_for_each_entry(temp, &hwmon->tz_list, hwmon_node) + if (temp->tz == tz) { + mutex_unlock(&thermal_list_lock); + return temp; + } + mutex_unlock(&thermal_list_lock); + + return NULL; +} + static int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) { struct thermal_hwmon_device *hwmon; + struct thermal_hwmon_temp *temp; int new_hwmon_device = 1; int result; @@ -515,30 +559,36 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) goto free_mem; register_sys_interface: - tz->hwmon = hwmon; + temp = kzalloc(sizeof(struct thermal_hwmon_temp), GFP_KERNEL); + if (!temp) { + result = -ENOMEM; + goto unregister_name; + } + + temp->tz = tz; hwmon->count++; - snprintf(tz->temp_input.name, THERMAL_NAME_LENGTH, + snprintf(temp->temp_input.name, THERMAL_NAME_LENGTH, "temp%d_input", hwmon->count); - tz->temp_input.attr.attr.name = tz->temp_input.name; - tz->temp_input.attr.attr.mode = 0444; - tz->temp_input.attr.show = temp_input_show; - sysfs_attr_init(&tz->temp_input.attr.attr); - result = device_create_file(hwmon->device, &tz->temp_input.attr); + temp->temp_input.attr.attr.name = temp->temp_input.name; + temp->temp_input.attr.attr.mode = 0444; + temp->temp_input.attr.show = temp_input_show; + sysfs_attr_init(&temp->temp_input.attr.attr); + result = device_create_file(hwmon->device, &temp->temp_input.attr); if (result) - goto unregister_name; + goto free_temp_mem; if (tz->ops->get_crit_temp) { unsigned long temperature; if (!tz->ops->get_crit_temp(tz, &temperature)) { - snprintf(tz->temp_crit.name, THERMAL_NAME_LENGTH, + snprintf(temp->temp_crit.name, THERMAL_NAME_LENGTH, "temp%d_crit", hwmon->count); - tz->temp_crit.attr.attr.name = tz->temp_crit.name; - tz->temp_crit.attr.attr.mode = 0444; - tz->temp_crit.attr.show = temp_crit_show; - sysfs_attr_init(&tz->temp_crit.attr.attr); + temp->temp_crit.attr.attr.name = temp->temp_crit.name; + temp->temp_crit.attr.attr.mode = 0444; + temp->temp_crit.attr.show = temp_crit_show; + sysfs_attr_init(&temp->temp_crit.attr.attr); result = device_create_file(hwmon->device, - &tz->temp_crit.attr); + &temp->temp_crit.attr); if (result) goto unregister_input; } @@ -547,13 +597,15 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) mutex_lock(&thermal_list_lock); if (new_hwmon_device) list_add_tail(&hwmon->node, &thermal_hwmon_list); - list_add_tail(&tz->hwmon_node, &hwmon->tz_list); + list_add_tail(&temp->hwmon_node, &hwmon->tz_list); mutex_unlock(&thermal_list_lock); return 0; unregister_input: - device_remove_file(hwmon->device, &tz->temp_input.attr); + device_remove_file(hwmon->device, &temp->temp_input.attr); + free_temp_mem: + kfree(temp); unregister_name: if (new_hwmon_device) { device_remove_file(hwmon->device, &dev_attr_name); @@ -569,15 +621,30 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) static void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) { - struct thermal_hwmon_device *hwmon = tz->hwmon; + struct thermal_hwmon_device *hwmon; + struct thermal_hwmon_temp *temp; + + hwmon = thermal_hwmon_lookup_by_type(tz); + if (unlikely(!hwmon)) { + /* Should never happen... */ + dev_dbg(&tz->device, "hwmon device lookup failed!\n"); + return; + } + + temp = thermal_hwmon_lookup_temp(hwmon, tz); + if (unlikely(!temp)) { + /* Should never happen... */ + dev_dbg(&tz->device, "temperature input lookup failed!\n"); + return; + } - tz->hwmon = NULL; - device_remove_file(hwmon->device, &tz->temp_input.attr); + device_remove_file(hwmon->device, &temp->temp_input.attr); if (tz->ops->get_crit_temp) - device_remove_file(hwmon->device, &tz->temp_crit.attr); + device_remove_file(hwmon->device, &temp->temp_crit.attr); mutex_lock(&thermal_list_lock); - list_del(&tz->hwmon_node); + list_del(&temp->hwmon_node); + kfree(temp); if (!list_empty(&hwmon->tz_list)) { mutex_unlock(&thermal_list_lock); return; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index d3ec89f..47b4a27 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -85,22 +85,6 @@ struct thermal_cooling_device { ((long)t-2732+5)/10 : ((long)t-2732-5)/10) #define CELSIUS_TO_KELVIN(t) ((t)*10+2732) -#if defined(CONFIG_THERMAL_HWMON) -/* thermal zone devices with the same type share one hwmon device */ -struct thermal_hwmon_device { - char type[THERMAL_NAME_LENGTH]; - struct device *device; - int count; - struct list_head tz_list; - struct list_head node; -}; - -struct thermal_hwmon_attr { - struct device_attribute attr; - char name[16]; -}; -#endif - struct thermal_zone_device { int id; char type[THERMAL_NAME_LENGTH]; @@ -120,12 +104,6 @@ struct thermal_zone_device { struct mutex lock; /* protect cooling devices list */ struct list_head node; struct delayed_work poll_queue; -#if defined(CONFIG_THERMAL_HWMON) - struct list_head hwmon_node; - struct thermal_hwmon_device *hwmon; - struct thermal_hwmon_attr temp_input; /* hwmon sys attr */ - struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */ -#endif }; /* Adding event notification support elements */ #define THERMAL_GENL_FAMILY_NAME "thermal_event" -- cgit v1.1 From f52e00c668669c9c290e84adf859c76db6d92a5a Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 28 Jul 2011 13:48:43 -0700 Subject: ACPI: remove NID_INVAL b552a8c56db8 ("ACPI: remove NID_INVAL") removed the left over uses of NID_INVAL, but didn't actually remove the definition. Remove it. Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- include/linux/acpi.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 1deb2a7..2312e85 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -238,7 +238,6 @@ extern int acpi_paddr_to_node(u64 start_addr, u64 size); extern int pnpacpi_disabled; #define PXM_INVAL (-1) -#define NID_INVAL (-1) int acpi_check_resource_conflict(const struct resource *res); -- cgit v1.1 From aa165971c2923d05988f920c978e438dbc7b0de6 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 28 Jul 2011 13:48:43 -0700 Subject: ACPI: add missing _OSI strings Linux supports some optional features, but it should notify the BIOS about them via the _OSI method. Currently Linux doesn't notify any, which might make such features not work because the BIOS doesn't know about them. Jarosz has a system which needs this to make ACPI processor aggregator device work. Reported-by: "Jarosz, Sebastian" Signed-off-by: Shaohua Li Acked-by: Matthew Garrett Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- drivers/acpi/osl.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 372f9b7..0e47857 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -1083,7 +1083,13 @@ struct osi_setup_entry { bool enable; }; -static struct osi_setup_entry __initdata osi_setup_entries[OSI_STRING_ENTRIES_MAX]; +static struct osi_setup_entry __initdata + osi_setup_entries[OSI_STRING_ENTRIES_MAX] = { + {"Module Device", true}, + {"Processor Device", true}, + {"3.0 _SCP Extensions", true}, + {"Processor Aggregator Device", true}, +}; void __init acpi_osi_setup(char *str) { -- cgit v1.1 From d1afa65ca59d4e6a5f1a8c1ab9bfa73f2fa8b777 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 2 Aug 2011 12:35:04 +0200 Subject: arch/tile/mm/init.c: trivial: use BUG_ON Use BUG_ON(x) rather than if(x) BUG(); The semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier x; @@ -if (x) BUG(); +BUG_ON(x); @@ identifier x; @@ -if (!x) BUG(); +BUG_ON(!x); // Signed-off-by: Julia Lawall Signed-off-by: Chris Metcalf --- arch/tile/mm/init.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 4e10c40..7309988c 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -836,8 +836,7 @@ void __init mem_init(void) #endif #ifdef CONFIG_FLATMEM - if (!mem_map) - BUG(); + BUG_ON(!mem_map); #endif #ifdef CONFIG_HIGHMEM -- cgit v1.1 From 6c8111e9a0e73ef1e58a1bf0a10c23ee1512e7a2 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 2 Aug 2011 17:15:33 -0400 Subject: ACPI: delete stale reference in kernel-parameters.txt Says for acpi= See also Documentation/power/pm.txt, pci=noacpi but this file does not exist Reported-by: Alan Cox Signed-off-by: Len Brown --- Documentation/kernel-parameters.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index aa47be7..53800c2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -161,8 +161,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. rsdt -- prefer RSDT over (default) XSDT copy_dsdt -- copy DSDT to memory - See also Documentation/power/pm.txt, pci=noacpi - acpi_apic_instance= [ACPI, IOAPIC] Format: 2: use 2nd APIC table, if available -- cgit v1.1 From b728a5c806fb36f9adebf2a862bbd015e074afca Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 2 Aug 2011 15:08:30 -0700 Subject: efivars: fix warnings when CONFIG_PSTORE=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/firmware/efivars.c:161: warning: ‘utf16_strlen’ defined but not used utf16_strlen() is only used inside CONFIG_PSTORE - make this "static inline" to shut the compiler up [thanks to hpa for the suggestion]. drivers/firmware/efivars.c:602: warning: initialization from incompatible pointer type Between v1 and v2 of this patch series we decided to make the "part" number unsigned - but missed fixing the stub version of efi_pstore_write() Acked-by: Matthew Garrett Acked-by: Mike Waychison Signed-off-by: Tony Luck --- drivers/firmware/efivars.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index eacb05e..eb80b54 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -157,7 +157,7 @@ utf16_strnlen(efi_char16_t *s, size_t maxlength) return length; } -static unsigned long +static inline unsigned long utf16_strlen(efi_char16_t *s) { return utf16_strnlen(s, ~0UL); @@ -580,8 +580,8 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, return -1; } -static u64 efi_pstore_write(enum pstore_type_id type, int part, size_t size, - struct pstore_info *psi) +static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, + size_t size, struct pstore_info *psi) { return 0; } -- cgit v1.1 From d30c4b7a87e8b19583d5ef1402d9b38f51e30f44 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 31 Jul 2011 18:19:33 -0400 Subject: tools/power turbostat: fit output into 80 columns on snb-ep Reduce columns for package number to 1. If you can afford more than 9 packages, you can also afford a terminal with more than 80 columns:-) Also shave a column also off the package C-states Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 46 +++++++++++++++++------------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 6d8ef4a..8b2d37b 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -128,34 +128,34 @@ unsigned long long get_msr(int cpu, off_t offset) void print_header(void) { if (show_pkg) - fprintf(stderr, "pkg "); + fprintf(stderr, "pk"); if (show_core) - fprintf(stderr, "core"); + fprintf(stderr, " cr"); if (show_cpu) fprintf(stderr, " CPU"); if (do_nhm_cstates) - fprintf(stderr, " %%c0 "); + fprintf(stderr, " %%c0 "); if (has_aperf) - fprintf(stderr, " GHz"); + fprintf(stderr, " GHz"); fprintf(stderr, " TSC"); if (do_nhm_cstates) - fprintf(stderr, " %%c1 "); + fprintf(stderr, " %%c1"); if (do_nhm_cstates) - fprintf(stderr, " %%c3 "); + fprintf(stderr, " %%c3"); if (do_nhm_cstates) - fprintf(stderr, " %%c6 "); + fprintf(stderr, " %%c6"); if (do_snb_cstates) - fprintf(stderr, " %%c7 "); + fprintf(stderr, " %%c7"); if (do_snb_cstates) - fprintf(stderr, " %%pc2 "); + fprintf(stderr, " %%pc2"); if (do_nhm_cstates) - fprintf(stderr, " %%pc3 "); + fprintf(stderr, " %%pc3"); if (do_nhm_cstates) - fprintf(stderr, " %%pc6 "); + fprintf(stderr, " %%pc6"); if (do_snb_cstates) - fprintf(stderr, " %%pc7 "); + fprintf(stderr, " %%pc7"); if (extra_msr_offset) - fprintf(stderr, " MSR 0x%x ", extra_msr_offset); + fprintf(stderr, " MSR 0x%x ", extra_msr_offset); putc('\n', stderr); } @@ -194,14 +194,14 @@ void print_cnt(struct counters *p) /* topology columns, print blanks on 1st (average) line */ if (p == cnt_average) { if (show_pkg) - fprintf(stderr, " "); + fprintf(stderr, " "); if (show_core) fprintf(stderr, " "); if (show_cpu) fprintf(stderr, " "); } else { if (show_pkg) - fprintf(stderr, "%4d", p->pkg); + fprintf(stderr, "%d", p->pkg); if (show_core) fprintf(stderr, "%4d", p->core); if (show_cpu) @@ -241,22 +241,22 @@ void print_cnt(struct counters *p) if (!skip_c1) fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc); else - fprintf(stderr, " ****"); + fprintf(stderr, " ****"); } if (do_nhm_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->c3/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->c3/p->tsc); if (do_nhm_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->c6/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->c6/p->tsc); if (do_snb_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->c7/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc); if (do_snb_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->pc2/p->tsc); + fprintf(stderr, " %5.2f", 100.0 * p->pc2/p->tsc); if (do_nhm_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->pc3/p->tsc); + fprintf(stderr, " %5.2f", 100.0 * p->pc3/p->tsc); if (do_nhm_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->pc6/p->tsc); + fprintf(stderr, " %5.2f", 100.0 * p->pc6/p->tsc); if (do_snb_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->pc7/p->tsc); + fprintf(stderr, " %5.2f", 100.0 * p->pc7/p->tsc); if (extra_msr_offset) fprintf(stderr, " 0x%016llx", p->extra_msr); putc('\n', stderr); -- cgit v1.1 From c28aa38567101bad4e020f4392df41d0bf6c165c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?fran=C3=A7ois=20romieu?= Date: Tue, 2 Aug 2011 03:53:43 +0000 Subject: r8169 : MAC address change fix for the 8168e-vl. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=39252 Hayes suggested that the usual MAC{0, 4} register writes be completed with writes to extended GigaMAC registers : - 0xe0 .. 0xe5 - 0xf2 .. 0xf7 Registers 0xf0 and 0xf1 should be set to 0. Signed-off-by: Francois Romieu Cc: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/r8169.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index c77286e..02339b3 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -1092,6 +1092,21 @@ rtl_w1w0_eri(void __iomem *ioaddr, int addr, u32 mask, u32 p, u32 m, int type) rtl_eri_write(ioaddr, addr, mask, (val & ~m) | p, type); } +struct exgmac_reg { + u16 addr; + u16 mask; + u32 val; +}; + +static void rtl_write_exgmac_batch(void __iomem *ioaddr, + const struct exgmac_reg *r, int len) +{ + while (len-- > 0) { + rtl_eri_write(ioaddr, r->addr, r->mask, r->val, ERIAR_EXGMAC); + r++; + } +} + static u8 rtl8168d_efuse_read(void __iomem *ioaddr, int reg_addr) { u8 value = 0xff; @@ -3117,6 +3132,18 @@ static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr) RTL_W32(MAC0, low); RTL_R32(MAC0); + if (tp->mac_version == RTL_GIGA_MAC_VER_34) { + const struct exgmac_reg e[] = { + { .addr = 0xe0, ERIAR_MASK_1111, .val = low }, + { .addr = 0xe4, ERIAR_MASK_1111, .val = high }, + { .addr = 0xf0, ERIAR_MASK_1111, .val = low << 16 }, + { .addr = 0xf4, ERIAR_MASK_1111, .val = high << 16 | + low >> 16 }, + }; + + rtl_write_exgmac_batch(ioaddr, e, ARRAY_SIZE(e)); + } + RTL_W8(Cfg9346, Cfg9346_Lock); spin_unlock_irq(&tp->lock); -- cgit v1.1 From fc05a78efb8e91e884017bb0bc43f690aa5b4dcd Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 3 Aug 2011 03:17:43 +0000 Subject: Revert "cifs: advertise the right receive buffer size to the server" This reverts commit c4d3396b261473ded6f370edd1e79ba34e089d7e. Problems discovered with readdir to Samba due to not accounting for header size properly with this change --- fs/cifs/sess.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 243d587..d3e6196 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -124,8 +124,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB) /* that we use in next few lines */ /* Note that header is initialized to zero in header_assemble */ pSMB->req.AndXCommand = 0xFF; - pSMB->req.MaxBufferSize = cpu_to_le16(min_t(u32, CIFSMaxBufSize - 4, - USHRT_MAX)); + pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf); pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); pSMB->req.VcNumber = get_next_vcnum(ses); -- cgit v1.1 From b80289833463215d2f3d1d72cf735fc7ba78da57 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 26 Jul 2011 12:20:18 -0400 Subject: cifs: demote DFS referral lookup errors to cFYI cifs: demote DFS referral lookup errors to cFYI Now that we call into this routine on every mount, anyone who doesn't have the upcall configured will get multiple printks about failed lookups. Reported-and-Tested-by: Martijn Uffing Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifs_dfs_ref.c | 5 +++-- fs/cifs/dns_resolve.c | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 8d8f28c..6873bb6 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -141,10 +141,11 @@ char *cifs_compose_mount_options(const char *sb_mountdata, rc = dns_resolve_server_name_to_ip(*devname, &srvIP); if (rc < 0) { - cERROR(1, "%s: Failed to resolve server part of %s to IP: %d", - __func__, *devname, rc); + cFYI(1, "%s: Failed to resolve server part of %s to IP: %d", + __func__, *devname, rc); goto compose_mount_options_err; } + /* md_len = strlen(...) + 12 for 'sep+prefixpath=' * assuming that we have 'unc=' and 'ip=' in * the original sb_mountdata diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 548f062..1d2d91d 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -79,8 +79,8 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) /* Perform the upcall */ rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL); if (rc < 0) - cERROR(1, "%s: unable to resolve: %*.*s", - __func__, len, len, hostname); + cFYI(1, "%s: unable to resolve: %*.*s", + __func__, len, len, hostname); else cFYI(1, "%s: resolved: %*.*s to %s", __func__, len, len, hostname, *ip_addr); -- cgit v1.1 From e95ade083939dcb4b0c51c1a2c8504ea9ef3d6ef Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 1 Aug 2011 18:18:57 -0700 Subject: sparc: Minor tweaks to Niagara page copy/clear. Don't use floating point on Niagara2, use the traditional plain Niagara code instead. Unroll Niagara loops to 128 bytes for copy, and 256 bytes for clear. Signed-off-by: David S. Miller --- arch/sparc/kernel/head_64.S | 2 +- arch/sparc/lib/Makefile | 2 +- arch/sparc/lib/NG2page.S | 61 ------------------------ arch/sparc/lib/NGpage.S | 114 +++++++++++++++++++++++++++++--------------- 4 files changed, 77 insertions(+), 102 deletions(-) delete mode 100644 arch/sparc/lib/NG2page.S diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index c752603..0eac1b2 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -559,7 +559,7 @@ niagara2_patch: nop call niagara_patch_bzero nop - call niagara2_patch_pageops + call niagara_patch_pageops nop ba,a,pt %xcc, 80f diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 7f01b8f..c25f94d 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -31,7 +31,7 @@ lib-$(CONFIG_SPARC64) += NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o -lib-$(CONFIG_SPARC64) += NG2patch.o NG2page.o +lib-$(CONFIG_SPARC64) += NG2patch.o lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o diff --git a/arch/sparc/lib/NG2page.S b/arch/sparc/lib/NG2page.S deleted file mode 100644 index 73b6b7c..0000000 --- a/arch/sparc/lib/NG2page.S +++ /dev/null @@ -1,61 +0,0 @@ -/* NG2page.S: Niagara-2 optimized clear and copy page. - * - * Copyright (C) 2007 (davem@davemloft.net) - */ - -#include -#include -#include - - .text - .align 32 - - /* This is heavily simplified from the sun4u variants - * because Niagara-2 does not have any D-cache aliasing issues. - */ -NG2copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ - prefetch [%o1 + 0x00], #one_read - prefetch [%o1 + 0x40], #one_read - VISEntryHalf - set PAGE_SIZE, %g7 - sub %o0, %o1, %g3 -1: stxa %g0, [%o1 + %g3] ASI_BLK_INIT_QUAD_LDD_P - subcc %g7, 64, %g7 - ldda [%o1] ASI_BLK_P, %f0 - stda %f0, [%o1 + %g3] ASI_BLK_P - add %o1, 64, %o1 - bne,pt %xcc, 1b - prefetch [%o1 + 0x40], #one_read - membar #Sync - VISExitHalf - retl - nop - -#define BRANCH_ALWAYS 0x10680000 -#define NOP 0x01000000 -#define NG_DO_PATCH(OLD, NEW) \ - sethi %hi(NEW), %g1; \ - or %g1, %lo(NEW), %g1; \ - sethi %hi(OLD), %g2; \ - or %g2, %lo(OLD), %g2; \ - sub %g1, %g2, %g1; \ - sethi %hi(BRANCH_ALWAYS), %g3; \ - sll %g1, 11, %g1; \ - srl %g1, 11 + 2, %g1; \ - or %g3, %lo(BRANCH_ALWAYS), %g3; \ - or %g3, %g1, %g3; \ - stw %g3, [%g2]; \ - sethi %hi(NOP), %g3; \ - or %g3, %lo(NOP), %g3; \ - stw %g3, [%g2 + 0x4]; \ - flush %g2; - - .globl niagara2_patch_pageops - .type niagara2_patch_pageops,#function -niagara2_patch_pageops: - NG_DO_PATCH(copy_user_page, NG2copy_user_page) - NG_DO_PATCH(_clear_page, NGclear_page) - NG_DO_PATCH(clear_user_page, NGclear_user_page) - retl - nop - .size niagara2_patch_pageops,.-niagara2_patch_pageops diff --git a/arch/sparc/lib/NGpage.S b/arch/sparc/lib/NGpage.S index 428920d..b9e790b 100644 --- a/arch/sparc/lib/NGpage.S +++ b/arch/sparc/lib/NGpage.S @@ -16,55 +16,91 @@ */ NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ - prefetch [%o1 + 0x00], #one_read - mov 8, %g1 - mov 16, %g2 - mov 24, %g3 + save %sp, -192, %sp + rd %asi, %g3 + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi set PAGE_SIZE, %g7 + prefetch [%i1 + 0x00], #one_read + prefetch [%i1 + 0x40], #one_read -1: ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2 - ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4 - prefetch [%o1 + 0x40], #one_read - add %o1, 32, %o1 - stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P - stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P - ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2 - stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P - stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P - ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4 - add %o1, 32, %o1 - add %o0, 32, %o0 - stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P - stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P - stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P - stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P - subcc %g7, 64, %g7 +1: prefetch [%i1 + 0x80], #one_read + prefetch [%i1 + 0xc0], #one_read + ldda [%i1 + 0x00] %asi, %o2 + ldda [%i1 + 0x10] %asi, %o4 + ldda [%i1 + 0x20] %asi, %l2 + ldda [%i1 + 0x30] %asi, %l4 + stxa %o2, [%i0 + 0x00] %asi + stxa %o3, [%i0 + 0x08] %asi + stxa %o4, [%i0 + 0x10] %asi + stxa %o5, [%i0 + 0x18] %asi + stxa %l2, [%i0 + 0x20] %asi + stxa %l3, [%i0 + 0x28] %asi + stxa %l4, [%i0 + 0x30] %asi + stxa %l5, [%i0 + 0x38] %asi + ldda [%i1 + 0x40] %asi, %o2 + ldda [%i1 + 0x50] %asi, %o4 + ldda [%i1 + 0x60] %asi, %l2 + ldda [%i1 + 0x70] %asi, %l4 + stxa %o2, [%i0 + 0x40] %asi + stxa %o3, [%i0 + 0x48] %asi + stxa %o4, [%i0 + 0x50] %asi + stxa %o5, [%i0 + 0x58] %asi + stxa %l2, [%i0 + 0x60] %asi + stxa %l3, [%i0 + 0x68] %asi + stxa %l4, [%i0 + 0x70] %asi + stxa %l5, [%i0 + 0x78] %asi + add %i1, 128, %i1 + subcc %g7, 128, %g7 bne,pt %xcc, 1b - add %o0, 32, %o0 + add %i0, 128, %i0 + wr %g3, 0x0, %asi membar #Sync - retl - nop + ret + restore - .globl NGclear_page, NGclear_user_page + .align 32 NGclear_page: /* %o0=dest */ NGclear_user_page: /* %o0=dest, %o1=vaddr */ - mov 8, %g1 - mov 16, %g2 - mov 24, %g3 + rd %asi, %g3 + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi set PAGE_SIZE, %g7 -1: stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P - stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P - stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P - stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P - add %o0, 32, %o0 - stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P - stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P - stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P - stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P - subcc %g7, 64, %g7 +1: stxa %g0, [%o0 + 0x00] %asi + stxa %g0, [%o0 + 0x08] %asi + stxa %g0, [%o0 + 0x10] %asi + stxa %g0, [%o0 + 0x18] %asi + stxa %g0, [%o0 + 0x20] %asi + stxa %g0, [%o0 + 0x28] %asi + stxa %g0, [%o0 + 0x30] %asi + stxa %g0, [%o0 + 0x38] %asi + stxa %g0, [%o0 + 0x40] %asi + stxa %g0, [%o0 + 0x48] %asi + stxa %g0, [%o0 + 0x50] %asi + stxa %g0, [%o0 + 0x58] %asi + stxa %g0, [%o0 + 0x60] %asi + stxa %g0, [%o0 + 0x68] %asi + stxa %g0, [%o0 + 0x70] %asi + stxa %g0, [%o0 + 0x78] %asi + stxa %g0, [%o0 + 0x80] %asi + stxa %g0, [%o0 + 0x88] %asi + stxa %g0, [%o0 + 0x90] %asi + stxa %g0, [%o0 + 0x98] %asi + stxa %g0, [%o0 + 0xa0] %asi + stxa %g0, [%o0 + 0xa8] %asi + stxa %g0, [%o0 + 0xb0] %asi + stxa %g0, [%o0 + 0xb8] %asi + stxa %g0, [%o0 + 0xc0] %asi + stxa %g0, [%o0 + 0xc8] %asi + stxa %g0, [%o0 + 0xd0] %asi + stxa %g0, [%o0 + 0xd8] %asi + stxa %g0, [%o0 + 0xe0] %asi + stxa %g0, [%o0 + 0xe8] %asi + stxa %g0, [%o0 + 0xf0] %asi + stxa %g0, [%o0 + 0xf8] %asi + subcc %g7, 256, %g7 bne,pt %xcc, 1b - add %o0, 32, %o0 + add %o0, 256, %o0 + wr %g3, 0x0, %asi membar #Sync retl nop -- cgit v1.1 From ef7c4d4675d2a9206f913f26ca1a5cd41bff9d41 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 29 Jul 2011 09:42:07 -0700 Subject: sparc: Use popc if possible for hweight routines. Just like powerpc, we code patch at boot time. Signed-off-by: David S. Miller --- arch/sparc/include/asm/bitops_64.h | 42 +++---------------------------- arch/sparc/kernel/entry.h | 7 ++++++ arch/sparc/kernel/setup_64.c | 27 ++++++++++++++++++++ arch/sparc/kernel/sparc_ksyms_64.c | 7 ++++++ arch/sparc/kernel/vmlinux.lds.S | 6 ++++- arch/sparc/lib/Makefile | 2 +- arch/sparc/lib/hweight.S | 51 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 102 insertions(+), 40 deletions(-) create mode 100644 arch/sparc/lib/hweight.S diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index 325e295..3588807 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -42,45 +42,11 @@ extern void change_bit(unsigned long nr, volatile unsigned long *addr); * of bits set) of a N-bit word */ -#ifdef ULTRA_HAS_POPULATION_COUNT +extern unsigned long __arch_hweight64(__u64 w); +extern unsigned int __arch_hweight32(unsigned int w); +extern unsigned int __arch_hweight16(unsigned int w); +extern unsigned int __arch_hweight8(unsigned int w); -static inline unsigned int __arch_hweight64(unsigned long w) -{ - unsigned int res; - - __asm__ ("popc %1,%0" : "=r" (res) : "r" (w)); - return res; -} - -static inline unsigned int __arch_hweight32(unsigned int w) -{ - unsigned int res; - - __asm__ ("popc %1,%0" : "=r" (res) : "r" (w & 0xffffffff)); - return res; -} - -static inline unsigned int __arch_hweight16(unsigned int w) -{ - unsigned int res; - - __asm__ ("popc %1,%0" : "=r" (res) : "r" (w & 0xffff)); - return res; -} - -static inline unsigned int __arch_hweight8(unsigned int w) -{ - unsigned int res; - - __asm__ ("popc %1,%0" : "=r" (res) : "r" (w & 0xff)); - return res; -} - -#else - -#include - -#endif #include #include #endif /* __KERNEL__ */ diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h index d1f1361..16d1545 100644 --- a/arch/sparc/kernel/entry.h +++ b/arch/sparc/kernel/entry.h @@ -42,6 +42,13 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr, extern void fpload(unsigned long *fpregs, unsigned long *fsr); #else /* CONFIG_SPARC32 */ +struct popc_3insn_patch_entry { + unsigned int addr; + unsigned int insns[3]; +}; +extern struct popc_3insn_patch_entry __popc_3insn_patch, + __popc_3insn_patch_end; + extern void __init per_cpu_patch(void); extern void __init sun4v_patch(void); extern void __init boot_cpu_id_too_large(int cpu); diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 242dbb3..26d1141 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -272,6 +272,30 @@ void __init sun4v_patch(void) sun4v_hvapi_init(); } +static void __init popc_patch(void) +{ + struct popc_3insn_patch_entry *p3; + + p3 = &__popc_3insn_patch; + while (p3 < &__popc_3insn_patch_end) { + unsigned long addr = p3->addr; + + *(unsigned int *) (addr + 0) = p3->insns[0]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + *(unsigned int *) (addr + 4) = p3->insns[1]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + + *(unsigned int *) (addr + 8) = p3->insns[2]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + + p3++; + } +} + #ifdef CONFIG_SMP void __init boot_cpu_id_too_large(int cpu) { @@ -424,6 +448,9 @@ static void __init init_sparc64_elf_hwcap(void) sparc64_elf_hwcap = cap | mdesc_caps; report_hwcaps(sparc64_elf_hwcap); + + if (sparc64_elf_hwcap & AV_SPARC_POPC) + popc_patch(); } void __init setup_arch(char **cmdline_p) diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c index 372ad59..d0ee65a 100644 --- a/arch/sparc/kernel/sparc_ksyms_64.c +++ b/arch/sparc/kernel/sparc_ksyms_64.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -38,5 +39,11 @@ EXPORT_SYMBOL(sun4v_niagara_setperf); EXPORT_SYMBOL(sun4v_niagara2_getperf); EXPORT_SYMBOL(sun4v_niagara2_setperf); +/* from hweight.S */ +EXPORT_SYMBOL(__arch_hweight8); +EXPORT_SYMBOL(__arch_hweight16); +EXPORT_SYMBOL(__arch_hweight32); +EXPORT_SYMBOL(__arch_hweight64); + /* Exporting a symbol from /init/main.c */ EXPORT_SYMBOL(saved_command_line); diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index c022075..de20c14 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -107,7 +107,11 @@ SECTIONS *(.sun4v_2insn_patch) __sun4v_2insn_patch_end = .; } - + .popc_3insn_patch : { + __popc_3insn_patch = .; + *(.popc_3insn_patch) + __popc_3insn_patch_end = .; + } PERCPU_SECTION(SMP_CACHE_BYTES) . = ALIGN(PAGE_SIZE); diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index c25f94d..11c850a 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -37,7 +37,7 @@ lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o -lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o +lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o obj-y += iomap.o obj-$(CONFIG_SPARC32) += atomic32.o diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S new file mode 100644 index 0000000..95414e0 --- /dev/null +++ b/arch/sparc/lib/hweight.S @@ -0,0 +1,51 @@ +#include + + .text + .align 32 +ENTRY(__arch_hweight8) + ba,pt %xcc, __sw_hweight8 + nop + nop +ENDPROC(__arch_hweight8) + .section .popc_3insn_patch, "ax" + .word __arch_hweight8 + sllx %o0, 64-8, %g1 + retl + popc %g1, %o0 + .previous + +ENTRY(__arch_hweight16) + ba,pt %xcc, __sw_hweight16 + nop + nop +ENDPROC(__arch_hweight16) + .section .popc_3insn_patch, "ax" + .word __arch_hweight16 + sllx %o0, 64-16, %g1 + retl + popc %g1, %o0 + .previous + +ENTRY(__arch_hweight32) + ba,pt %xcc, __sw_hweight32 + nop + nop +ENDPROC(__arch_hweight32) + .section .popc_3insn_patch, "ax" + .word __arch_hweight32 + sllx %o0, 64-32, %g1 + retl + popc %g1, %o0 + .previous + +ENTRY(__arch_hweight64) + ba,pt %xcc, __sw_hweight64 + nop + nop +ENDPROC(__arch_hweight64) + .section .popc_3insn_patch, "ax" + .word __arch_hweight64 + retl + popc %o0, %o0 + nop + .previous -- cgit v1.1 From d600cbed0fe8fceec04500824f638dfe4996c653 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 1 Aug 2011 19:41:12 -0700 Subject: sparc: Use hweight64() in popc emulation. Signed-off-by: David S. Miller --- arch/sparc/kernel/unaligned_64.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c index 35cff16..76e4ac1 100644 --- a/arch/sparc/kernel/unaligned_64.c +++ b/arch/sparc/kernel/unaligned_64.c @@ -22,6 +22,7 @@ #include #include #include +#include #include enum direction { @@ -373,16 +374,11 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn) } } -static char popc_helper[] = { -0, 1, 1, 2, 1, 2, 2, 3, -1, 2, 2, 3, 2, 3, 3, 4, -}; - int handle_popc(u32 insn, struct pt_regs *regs) { - u64 value; - int ret, i, rd = ((insn >> 25) & 0x1f); int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; + int ret, rd = ((insn >> 25) & 0x1f); + u64 value; perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); if (insn & 0x2000) { @@ -392,10 +388,7 @@ int handle_popc(u32 insn, struct pt_regs *regs) maybe_flush_windows(0, insn & 0x1f, rd, from_kernel); value = fetch_reg(insn & 0x1f, regs); } - for (ret = 0, i = 0; i < 16; i++) { - ret += popc_helper[value & 0xf]; - value >>= 4; - } + ret = hweight64(value); if (rd < 16) { if (rd) regs->u_regs[rd] = ret; -- cgit v1.1 From e2eb9f8158ead43a88c0f0b4d74257b1be938a18 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 1 Aug 2011 22:45:18 -0700 Subject: sparc: Add some missing hypervisor API groups. Signed-off-by: David S. Miller --- arch/sparc/include/asm/hypervisor.h | 6 ++++++ arch/sparc/kernel/hvapi.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 7a5f80d..2b7da27 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -2940,11 +2940,17 @@ extern unsigned long sun4v_ncs_request(unsigned long request, #define HV_GRP_CORE 0x0001 #define HV_GRP_INTR 0x0002 #define HV_GRP_SOFT_STATE 0x0003 +#define HV_GRP_TM 0x0080 #define HV_GRP_PCI 0x0100 #define HV_GRP_LDOM 0x0101 #define HV_GRP_SVC_CHAN 0x0102 #define HV_GRP_NCS 0x0103 #define HV_GRP_RNG 0x0104 +#define HV_GRP_PBOOT 0x0105 +#define HV_GRP_TPM 0x0107 +#define HV_GRP_SDIO 0x0108 +#define HV_GRP_SDIO_ERR 0x0109 +#define HV_GRP_REBOOT_DATA 0x0110 #define HV_GRP_NIAG_PERF 0x0200 #define HV_GRP_FIRE_PERF 0x0201 #define HV_GRP_N2_CPU 0x0202 diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index d306e64..c2d055d 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -28,11 +28,17 @@ static struct api_info api_table[] = { { .group = HV_GRP_CORE, .flags = FLAG_PRE_API }, { .group = HV_GRP_INTR, }, { .group = HV_GRP_SOFT_STATE, }, + { .group = HV_GRP_TM, }, { .group = HV_GRP_PCI, .flags = FLAG_PRE_API }, { .group = HV_GRP_LDOM, }, { .group = HV_GRP_SVC_CHAN, .flags = FLAG_PRE_API }, { .group = HV_GRP_NCS, .flags = FLAG_PRE_API }, { .group = HV_GRP_RNG, }, + { .group = HV_GRP_PBOOT, }, + { .group = HV_GRP_TPM, }, + { .group = HV_GRP_SDIO, }, + { .group = HV_GRP_SDIO_ERR, }, + { .group = HV_GRP_REBOOT_DATA, }, { .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API }, { .group = HV_GRP_FIRE_PERF, }, { .group = HV_GRP_N2_CPU, }, -- cgit v1.1 From ea5e7447ea9d555558e0f13798f5143dd51a915a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 1 Aug 2011 23:27:17 -0700 Subject: sparc: Set reboot-cmd using reboot data hypervisor call if available. Signed-off-by: David S. Miller --- arch/sparc/include/asm/hypervisor.h | 7 +++++++ arch/sparc/kernel/ds.c | 30 ++++++++++++++++++++++++++++-- arch/sparc/kernel/hvcalls.S | 7 +++++++ arch/sparc/kernel/kernel.h | 9 +++++++++ arch/sparc/kernel/sstate.c | 9 ++------- 5 files changed, 53 insertions(+), 9 deletions(-) diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 2b7da27..015a761 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -2927,6 +2927,13 @@ extern unsigned long sun4v_ncs_request(unsigned long request, #define HV_FAST_FIRE_GET_PERFREG 0x120 #define HV_FAST_FIRE_SET_PERFREG 0x121 +#define HV_FAST_REBOOT_DATA_SET 0x172 + +#ifndef __ASSEMBLY__ +extern unsigned long sun4v_reboot_data_set(unsigned long ra, + unsigned long len); +#endif + /* Function numbers for HV_CORE_TRAP. */ #define HV_CORE_SET_VER 0x00 #define HV_CORE_PUTCHAR 0x01 diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index dd1342c..490e541 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -15,12 +15,15 @@ #include #include +#include #include #include #include #include #include +#include "kernel.h" + #define DRV_MODULE_NAME "ds" #define PFX DRV_MODULE_NAME ": " #define DRV_MODULE_VERSION "1.0" @@ -828,18 +831,32 @@ void ldom_set_var(const char *var, const char *value) } } +static char full_boot_str[256] __attribute__((aligned(32))); +static int reboot_data_supported; + void ldom_reboot(const char *boot_command) { /* Don't bother with any of this if the boot_command * is empty. */ if (boot_command && strlen(boot_command)) { - char full_boot_str[256]; + unsigned long len; strcpy(full_boot_str, "boot "); strcpy(full_boot_str + strlen("boot "), boot_command); + len = strlen(full_boot_str); - ldom_set_var("reboot-command", full_boot_str); + if (reboot_data_supported) { + unsigned long ra = kimage_addr_to_ra(full_boot_str); + unsigned long hv_ret; + + hv_ret = sun4v_reboot_data_set(ra, len); + if (hv_ret != HV_EOK) + pr_err("SUN4V: Unable to set reboot data " + "hv_ret=%lu\n", hv_ret); + } else { + ldom_set_var("reboot-command", full_boot_str); + } } sun4v_mach_sir(); } @@ -1237,6 +1254,15 @@ static struct vio_driver ds_driver = { static int __init ds_init(void) { + unsigned long hv_ret, major, minor; + + hv_ret = sun4v_get_version(HV_GRP_REBOOT_DATA, &major, &minor); + if (hv_ret == HV_EOK) { + pr_info("SUN4V: Reboot data supported (maj=%lu,min=%lu).\n", + major, minor); + reboot_data_supported = 1; + } + kthread_run(ds_thread, NULL, "kldomd"); return vio_register_driver(&ds_driver); diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index 8a5f35f..58d60de 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S @@ -798,3 +798,10 @@ ENTRY(sun4v_niagara2_setperf) retl nop ENDPROC(sun4v_niagara2_setperf) + +ENTRY(sun4v_reboot_data_set) + mov HV_FAST_REBOOT_DATA_SET, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_reboot_data_set) diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index 8325d77..fd6c36b 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -4,6 +4,8 @@ #include #include +#include +#include /* cpu.c */ extern const char *sparc_pmu_type; @@ -14,6 +16,13 @@ extern int ncpus_probed; /* setup_64.c */ struct seq_file; extern void cpucap_info(struct seq_file *); + +static inline unsigned long kimage_addr_to_ra(const char *p) +{ + unsigned long val = (unsigned long) p; + + return kern_base + (val - KERNBASE); +} #endif #ifdef CONFIG_SPARC32 diff --git a/arch/sparc/kernel/sstate.c b/arch/sparc/kernel/sstate.c index 8cdbe59..c59af54 100644 --- a/arch/sparc/kernel/sstate.c +++ b/arch/sparc/kernel/sstate.c @@ -14,14 +14,9 @@ #include #include -static int hv_supports_soft_state; - -static unsigned long kimage_addr_to_ra(const char *p) -{ - unsigned long val = (unsigned long) p; +#include "kernel.h" - return kern_base + (val - KERNBASE); -} +static int hv_supports_soft_state; static void do_set_sstate(unsigned long state, const char *msg) { -- cgit v1.1 From 56d205cc5c0a3032a605121d4253e111193bf923 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 2 Aug 2011 20:23:34 -0700 Subject: sparc: Use popc when possible for ffs/__ffs/ffz. Signed-off-by: David S. Miller --- arch/sparc/include/asm/bitops_64.h | 7 ++-- arch/sparc/kernel/entry.h | 7 ++++ arch/sparc/kernel/setup_64.c | 32 ++++++++++----- arch/sparc/kernel/sparc_ksyms_64.c | 4 ++ arch/sparc/kernel/vmlinux.lds.S | 5 +++ arch/sparc/lib/Makefile | 2 +- arch/sparc/lib/ffs.S | 84 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 126 insertions(+), 15 deletions(-) create mode 100644 arch/sparc/lib/ffs.S diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index 3588807..29011cc 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -26,16 +26,17 @@ extern void change_bit(unsigned long nr, volatile unsigned long *addr); #define smp_mb__before_clear_bit() barrier() #define smp_mb__after_clear_bit() barrier() -#include -#include #include #include #include #ifdef __KERNEL__ +extern int ffs(int x); +extern unsigned long __ffs(unsigned long); + +#include #include -#include /* * hweightN: returns the hamming weight (i.e. the number diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h index 16d1545..e27f8ea 100644 --- a/arch/sparc/kernel/entry.h +++ b/arch/sparc/kernel/entry.h @@ -49,6 +49,13 @@ struct popc_3insn_patch_entry { extern struct popc_3insn_patch_entry __popc_3insn_patch, __popc_3insn_patch_end; +struct popc_6insn_patch_entry { + unsigned int addr; + unsigned int insns[6]; +}; +extern struct popc_6insn_patch_entry __popc_6insn_patch, + __popc_6insn_patch_end; + extern void __init per_cpu_patch(void); extern void __init sun4v_patch(void); extern void __init boot_cpu_id_too_large(int cpu); diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 26d1141..3e9daea 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -275,24 +275,34 @@ void __init sun4v_patch(void) static void __init popc_patch(void) { struct popc_3insn_patch_entry *p3; + struct popc_6insn_patch_entry *p6; p3 = &__popc_3insn_patch; while (p3 < &__popc_3insn_patch_end) { - unsigned long addr = p3->addr; + unsigned long i, addr = p3->addr; - *(unsigned int *) (addr + 0) = p3->insns[0]; - wmb(); - __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + for (i = 0; i < 3; i++) { + *(unsigned int *) (addr + (i * 4)) = p3->insns[i]; + wmb(); + __asm__ __volatile__("flush %0" + : : "r" (addr + (i * 4))); + } - *(unsigned int *) (addr + 4) = p3->insns[1]; - wmb(); - __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + p3++; + } - *(unsigned int *) (addr + 8) = p3->insns[2]; - wmb(); - __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + p6 = &__popc_6insn_patch; + while (p6 < &__popc_6insn_patch_end) { + unsigned long i, addr = p6->addr; - p3++; + for (i = 0; i < 6; i++) { + *(unsigned int *) (addr + (i * 4)) = p6->insns[i]; + wmb(); + __asm__ __volatile__("flush %0" + : : "r" (addr + (i * 4))); + } + + p6++; } } diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c index d0ee65a..83b47ab 100644 --- a/arch/sparc/kernel/sparc_ksyms_64.c +++ b/arch/sparc/kernel/sparc_ksyms_64.c @@ -45,5 +45,9 @@ EXPORT_SYMBOL(__arch_hweight16); EXPORT_SYMBOL(__arch_hweight32); EXPORT_SYMBOL(__arch_hweight64); +/* from ffs_ffz.S */ +EXPORT_SYMBOL(ffs); +EXPORT_SYMBOL(__ffs); + /* Exporting a symbol from /init/main.c */ EXPORT_SYMBOL(saved_command_line); diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index de20c14..94a9548 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -112,6 +112,11 @@ SECTIONS *(.popc_3insn_patch) __popc_3insn_patch_end = .; } + .popc_6insn_patch : { + __popc_6insn_patch = .; + *(.popc_6insn_patch) + __popc_6insn_patch_end = .; + } PERCPU_SECTION(SMP_CACHE_BYTES) . = ALIGN(PAGE_SIZE); diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 11c850a..a3fc437 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -37,7 +37,7 @@ lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o -lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o +lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o obj-y += iomap.o obj-$(CONFIG_SPARC32) += atomic32.o diff --git a/arch/sparc/lib/ffs.S b/arch/sparc/lib/ffs.S new file mode 100644 index 0000000..b39389f --- /dev/null +++ b/arch/sparc/lib/ffs.S @@ -0,0 +1,84 @@ +#include + + .register %g2,#scratch + + .text + .align 32 + +ENTRY(ffs) + brnz,pt %o0, 1f + mov 1, %o1 + retl + clr %o0 + nop + nop +ENTRY(__ffs) + sllx %o0, 32, %g1 /* 1 */ + srlx %o0, 32, %g2 + + clr %o1 /* 2 */ + movrz %g1, %g2, %o0 + + movrz %g1, 32, %o1 /* 3 */ +1: clr %o2 + + sllx %o0, (64 - 16), %g1 /* 4 */ + srlx %o0, 16, %g2 + + movrz %g1, %g2, %o0 /* 5 */ + clr %o3 + + movrz %g1, 16, %o2 /* 6 */ + clr %o4 + + and %o0, 0xff, %g1 /* 7 */ + srlx %o0, 8, %g2 + + movrz %g1, %g2, %o0 /* 8 */ + clr %o5 + + movrz %g1, 8, %o3 /* 9 */ + add %o2, %o1, %o2 + + and %o0, 0xf, %g1 /* 10 */ + srlx %o0, 4, %g2 + + movrz %g1, %g2, %o0 /* 11 */ + add %o2, %o3, %o2 + + movrz %g1, 4, %o4 /* 12 */ + + and %o0, 0x3, %g1 /* 13 */ + srlx %o0, 2, %g2 + + movrz %g1, %g2, %o0 /* 14 */ + add %o2, %o4, %o2 + + movrz %g1, 2, %o5 /* 15 */ + + and %o0, 0x1, %g1 /* 16 */ + + add %o2, %o5, %o2 /* 17 */ + xor %g1, 0x1, %g1 + + retl /* 18 */ + add %o2, %g1, %o0 +ENDPROC(ffs) +ENDPROC(__ffs) + + .section .popc_6insn_patch, "ax" + .word ffs + brz,pn %o0, 98f + neg %o0, %g1 + xnor %o0, %g1, %o1 + popc %o1, %o0 +98: retl + nop + .word __ffs + neg %o0, %g1 + xnor %o0, %g1, %o1 + popc %o1, %o0 + retl + sub %o0, 1, %o0 + nop + .previous -- cgit v1.1 From 951c0d660a7c35286e401ca6d6ef38c9d49643c7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 3 Aug 2011 00:47:21 -0400 Subject: get rid of boilerplate switches in posix_acl.h the only potentially subtle thing here: get_cached_acl() is never called with the second argument other than ACL_TYPE_{ACCESS,DEFAULT}. IOW, that return ERR_PTR(-EINVAL) in there might as well be BUG(). Signed-off-by: Al Viro --- include/linux/posix_acl.h | 62 +++++++++++++++-------------------------------- 1 file changed, 20 insertions(+), 42 deletions(-) diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 951bba8..a9c2fb2 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -84,20 +84,22 @@ extern struct posix_acl *get_posix_acl(struct inode *, int); extern int set_posix_acl(struct inode *, int, struct posix_acl *); #ifdef CONFIG_FS_POSIX_ACL -static inline struct posix_acl *get_cached_acl(struct inode *inode, int type) +static inline struct posix_acl **acl_by_type(struct inode *inode, int type) { - struct posix_acl **p, *acl; switch (type) { case ACL_TYPE_ACCESS: - p = &inode->i_acl; - break; + return &inode->i_acl; case ACL_TYPE_DEFAULT: - p = &inode->i_default_acl; - break; + return &inode->i_default_acl; default: - return ERR_PTR(-EINVAL); + BUG(); } - acl = ACCESS_ONCE(*p); +} + +static inline struct posix_acl *get_cached_acl(struct inode *inode, int type) +{ + struct posix_acl **p = acl_by_type(inode, type); + struct posix_acl *acl = ACCESS_ONCE(*p); if (acl) { spin_lock(&inode->i_lock); acl = *p; @@ -110,18 +112,8 @@ static inline struct posix_acl *get_cached_acl(struct inode *inode, int type) static inline int negative_cached_acl(struct inode *inode, int type) { - struct posix_acl **p, *acl; - switch (type) { - case ACL_TYPE_ACCESS: - p = &inode->i_acl; - break; - case ACL_TYPE_DEFAULT: - p = &inode->i_default_acl; - break; - default: - BUG(); - } - acl = ACCESS_ONCE(*p); + struct posix_acl **p = acl_by_type(inode, type); + struct posix_acl *acl = ACCESS_ONCE(*p); if (acl) return 0; return 1; @@ -131,18 +123,11 @@ static inline void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl) { - struct posix_acl *old = NULL; + struct posix_acl **p = acl_by_type(inode, type); + struct posix_acl *old; spin_lock(&inode->i_lock); - switch (type) { - case ACL_TYPE_ACCESS: - old = inode->i_acl; - inode->i_acl = posix_acl_dup(acl); - break; - case ACL_TYPE_DEFAULT: - old = inode->i_default_acl; - inode->i_default_acl = posix_acl_dup(acl); - break; - } + old = *p; + *p = posix_dup_acl(acl); spin_unlock(&inode->i_lock); if (old != ACL_NOT_CACHED) posix_acl_release(old); @@ -150,18 +135,11 @@ static inline void set_cached_acl(struct inode *inode, static inline void forget_cached_acl(struct inode *inode, int type) { - struct posix_acl *old = NULL; + struct posix_acl **p = acl_by_type(inode, type); + struct posix_acl *old; spin_lock(&inode->i_lock); - switch (type) { - case ACL_TYPE_ACCESS: - old = inode->i_acl; - inode->i_acl = ACL_NOT_CACHED; - break; - case ACL_TYPE_DEFAULT: - old = inode->i_default_acl; - inode->i_default_acl = ACL_NOT_CACHED; - break; - } + old = *p; + *p = ACL_NOT_CACHED; spin_unlock(&inode->i_lock); if (old != ACL_NOT_CACHED) posix_acl_release(old); -- cgit v1.1 From 3567866bf26190d1e734c975c907eb06e923ba23 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 2 Aug 2011 21:32:13 -0400 Subject: RCUify freeing acls, let check_acl() go ahead in RCU mode if acl is cached Signed-off-by: Al Viro --- fs/namei.c | 17 ++++++----------- include/linux/posix_acl.h | 18 +++++++++--------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 445fd5d..3d607bd 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -179,19 +179,14 @@ static int check_acl(struct inode *inode, int mask) #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *acl; - /* - * Under RCU walk, we cannot even do a "get_cached_acl()", - * because that involves locking and getting a refcount on - * a cached ACL. - * - * So the only case we handle during RCU walking is the - * case of a cached "no ACL at all", which needs no locks - * or refcounts. - */ if (mask & MAY_NOT_BLOCK) { - if (negative_cached_acl(inode, ACL_TYPE_ACCESS)) + acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS); + if (!acl) return -EAGAIN; - return -ECHILD; + /* no ->get_acl() calls in RCU mode... */ + if (acl == ACL_NOT_CACHED) + return -ECHILD; + return posix_acl_permission(inode, acl, mask); } acl = get_cached_acl(inode, ACL_TYPE_ACCESS); diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index a9c2fb2..b768110 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -9,6 +9,7 @@ #define __LINUX_POSIX_ACL_H #include +#include #define ACL_UNDEFINED_ID (-1) @@ -38,7 +39,10 @@ struct posix_acl_entry { }; struct posix_acl { - atomic_t a_refcount; + union { + atomic_t a_refcount; + struct rcu_head a_rcu; + }; unsigned int a_count; struct posix_acl_entry a_entries[0]; }; @@ -65,7 +69,7 @@ static inline void posix_acl_release(struct posix_acl *acl) { if (acl && atomic_dec_and_test(&acl->a_refcount)) - kfree(acl); + kfree_rcu(acl, a_rcu); } @@ -110,13 +114,9 @@ static inline struct posix_acl *get_cached_acl(struct inode *inode, int type) return acl; } -static inline int negative_cached_acl(struct inode *inode, int type) +static inline struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type) { - struct posix_acl **p = acl_by_type(inode, type); - struct posix_acl *acl = ACCESS_ONCE(*p); - if (acl) - return 0; - return 1; + return rcu_dereference(*acl_by_type(inode, type)); } static inline void set_cached_acl(struct inode *inode, @@ -127,7 +127,7 @@ static inline void set_cached_acl(struct inode *inode, struct posix_acl *old; spin_lock(&inode->i_lock); old = *p; - *p = posix_dup_acl(acl); + rcu_assign_pointer(*p, posix_acl_dup(acl)); spin_unlock(&inode->i_lock); if (old != ACL_NOT_CACHED) posix_acl_release(old); -- cgit v1.1 From 0a7a8fff7b8a3bc2d3528af07c9c88083250303d Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 2 Aug 2011 15:40:22 -0700 Subject: Input: gpio_keys - fix two typos in devicetree documentation Signed-off-by: Tobias Klauser Signed-off-by: Dmitry Torokhov --- Documentation/devicetree/bindings/gpio/gpio_keys.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/gpio/gpio_keys.txt b/Documentation/devicetree/bindings/gpio/gpio_keys.txt index 7190c99..5c2c021 100644 --- a/Documentation/devicetree/bindings/gpio/gpio_keys.txt +++ b/Documentation/devicetree/bindings/gpio/gpio_keys.txt @@ -10,7 +10,7 @@ Optional properties: Each button (key) is represented as a sub-node of "gpio-keys": Subnode properties: - - gpios: OF devcie-tree gpio specificatin. + - gpios: OF device-tree gpio specification. - label: Descriptive name of the key. - linux,code: Keycode to emit. -- cgit v1.1 From 4fecc2088fd02ce8a6580c12822987d3c6bab9b8 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Tue, 2 Aug 2011 15:41:37 -0700 Subject: Input: ad7879 - fix deficient device disable Input close or device disable should not interact with the exported gpiolib functionality. However that's the case. __ad7879_disable() clears the entire AD7879_REG_CTRL2, while it should just power down the ADC and its reference. Signed-off-by: Michael Hennerich Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ad7879.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c index bc3b518..131f9d1c 100644 --- a/drivers/input/touchscreen/ad7879.c +++ b/drivers/input/touchscreen/ad7879.c @@ -249,12 +249,14 @@ static void __ad7879_enable(struct ad7879 *ts) static void __ad7879_disable(struct ad7879 *ts) { + u16 reg = (ts->cmd_crtl2 & ~AD7879_PM(-1)) | + AD7879_PM(AD7879_PM_SHUTDOWN); disable_irq(ts->irq); if (del_timer_sync(&ts->timer)) ad7879_ts_event_release(ts); - ad7879_write(ts, AD7879_REG_CTRL2, AD7879_PM(AD7879_PM_SHUTDOWN)); + ad7879_write(ts, AD7879_REG_CTRL2, reg); } -- cgit v1.1 From dcd998ccdbf74a7d8fe0f0a44e85da1ed5975946 Mon Sep 17 00:00:00 2001 From: Kiran Patil Date: Wed, 3 Aug 2011 09:20:01 +0000 Subject: tcm_fc: Handle DDP/SW fc_frame_payload_get failures in ft_recv_write_data Problem: HW DDP context was not invalidated in case of ABORTS, etc... This leads to the problem where memory pages which are used for DDP as user descriptor could get reused for some other purpose (such as to satisfy new memory allocation request either by kernel or user mode threads) and since HW DDP context was not invalidated, HW continue to write to those pages, hence causing memory corruption. Fix: Either on incoming ABORTS or due to exchange time out, allowed the target to cleanup HW DDP context if it was setup for respective ft_cmd. Added new function to perform this cleanup, furthur it can be enhanced for other cleanup activity. Fix ft_recv_write_data() to properly handle fc_frame_payload_get to return pointer to payload if it exist. If there is no payload which is most common case (+ve case in case if DDP is working as expected, it will return NULL. Yes, scope of buf is limited to printk. Invalidation of HW context (which is done inside ft_invl_hw_context() is necessary in SUCCESS and FAILURE case of DDP. Hence invalidation is DONE as long as there was DDP setup (whether it worked correctly or not, NOTE: For some reason, if there is any error w.r.t DDP such as out of order packet reception, HW simply post the full packet in rx queue. Signed-off-by: Kiran Patil Cc: Robert W Love Signed-off-by: Nicholas A. Bellinger --- drivers/target/tcm_fc/tcm_fc.h | 5 ++ drivers/target/tcm_fc/tfc_cmd.c | 1 + drivers/target/tcm_fc/tfc_io.c | 121 ++++++++++++++++++++++++---------------- 3 files changed, 78 insertions(+), 49 deletions(-) diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h index f7fff7e..bd4fe21 100644 --- a/drivers/target/tcm_fc/tcm_fc.h +++ b/drivers/target/tcm_fc/tcm_fc.h @@ -187,4 +187,9 @@ void ft_dump_cmd(struct ft_cmd *, const char *caller); ssize_t ft_format_wwn(char *, size_t, u64); +/* + * Underlying HW specific helper function + */ +void ft_invl_hw_context(struct ft_cmd *); + #endif /* __TCM_FC_H__ */ diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index a9e9a31..03977e8 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -320,6 +320,7 @@ static void ft_recv_seq(struct fc_seq *sp, struct fc_frame *fp, void *arg) default: pr_debug("%s: unhandled frame r_ctl %x\n", __func__, fh->fh_r_ctl); + ft_invl_hw_context(cmd); fc_frame_free(fp); transport_generic_free_cmd(&cmd->se_cmd, 0, 0); break; diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c index 11e6483..06943ee 100644 --- a/drivers/target/tcm_fc/tfc_io.c +++ b/drivers/target/tcm_fc/tfc_io.c @@ -214,62 +214,49 @@ void ft_recv_write_data(struct ft_cmd *cmd, struct fc_frame *fp) if (!(ntoh24(fh->fh_f_ctl) & FC_FC_REL_OFF)) goto drop; + f_ctl = ntoh24(fh->fh_f_ctl); + ep = fc_seq_exch(seq); + lport = ep->lp; + if (cmd->was_ddp_setup) { + BUG_ON(!ep); + BUG_ON(!lport); + } + /* - * Doesn't expect even single byte of payload. Payload + * Doesn't expect payload if DDP is setup. Payload * is expected to be copied directly to user buffers - * due to DDP (Large Rx offload) feature, hence - * BUG_ON if BUF is non-NULL + * due to DDP (Large Rx offload), */ buf = fc_frame_payload_get(fp, 1); - if (cmd->was_ddp_setup && buf) { - pr_debug("%s: When DDP was setup, not expected to" - "receive frame with payload, Payload shall be" - "copied directly to buffer instead of coming " - "via. legacy receive queues\n", __func__); - BUG_ON(buf); - } + if (buf) + pr_err("%s: xid 0x%x, f_ctl 0x%x, cmd->sg %p, " + "cmd->sg_cnt 0x%x. DDP was setup" + " hence not expected to receive frame with " + "payload, Frame will be dropped if " + "'Sequence Initiative' bit in f_ctl is " + "not set\n", __func__, ep->xid, f_ctl, + cmd->sg, cmd->sg_cnt); + /* + * Invalidate HW DDP context if it was setup for respective + * command. Invalidation of HW DDP context is requited in both + * situation (success and error). + */ + ft_invl_hw_context(cmd); /* - * If ft_cmd indicated 'ddp_setup', in that case only the last frame - * should come with 'TSI bit being set'. If 'TSI bit is not set and if - * data frame appears here, means error condition. In both the cases - * release the DDP context (ddp_put) and in error case, as well - * initiate error recovery mechanism. + * If "Sequence Initiative (TSI)" bit set in f_ctl, means last + * write data frame is received successfully where payload is + * posted directly to user buffer and only the last frame's + * header is posted in receive queue. + * + * If "Sequence Initiative (TSI)" bit is not set, means error + * condition w.r.t. DDP, hence drop the packet and let explict + * ABORTS from other end of exchange timer trigger the recovery. */ - ep = fc_seq_exch(seq); - if (cmd->was_ddp_setup) { - BUG_ON(!ep); - lport = ep->lp; - BUG_ON(!lport); - } - if (cmd->was_ddp_setup && ep->xid != FC_XID_UNKNOWN) { - f_ctl = ntoh24(fh->fh_f_ctl); - /* - * If TSI bit set in f_ctl, means last write data frame is - * received successfully where payload is posted directly - * to user buffer and only the last frame's header is posted - * in legacy receive queue - */ - if (f_ctl & FC_FC_SEQ_INIT) { /* TSI bit set in FC frame */ - cmd->write_data_len = lport->tt.ddp_done(lport, - ep->xid); - goto last_frame; - } else { - /* - * Updating the write_data_len may be meaningless at - * this point, but just in case if required in future - * for debugging or any other purpose - */ - pr_err("%s: Received frame with TSI bit not" - " being SET, dropping the frame, " - "cmd->sg <%p>, cmd->sg_cnt <0x%x>\n", - __func__, cmd->sg, cmd->sg_cnt); - cmd->write_data_len = lport->tt.ddp_done(lport, - ep->xid); - lport->tt.seq_exch_abort(cmd->seq, 0); - goto drop; - } - } + if (f_ctl & FC_FC_SEQ_INIT) + goto last_frame; + else + goto drop; rel_off = ntohl(fh->fh_parm_offset); frame_len = fr_len(fp); @@ -332,3 +319,39 @@ last_frame: drop: fc_frame_free(fp); } + +/* + * Handle and cleanup any HW specific resources if + * received ABORTS, errors, timeouts. + */ +void ft_invl_hw_context(struct ft_cmd *cmd) +{ + struct fc_seq *seq = cmd->seq; + struct fc_exch *ep = NULL; + struct fc_lport *lport = NULL; + + BUG_ON(!cmd); + + /* Cleanup the DDP context in HW if DDP was setup */ + if (cmd->was_ddp_setup && seq) { + ep = fc_seq_exch(seq); + if (ep) { + lport = ep->lp; + if (lport && (ep->xid <= lport->lro_xid)) + /* + * "ddp_done" trigger invalidation of HW + * specific DDP context + */ + cmd->write_data_len = lport->tt.ddp_done(lport, + ep->xid); + + /* + * Resetting same variable to indicate HW's + * DDP context has been invalidated to avoid + * re_invalidation of same context (context is + * identified using ep->xid) + */ + cmd->was_ddp_setup = 0; + } + } +} -- cgit v1.1 From 9d5b36be64b42058e7fd12b71266bbf2bb7600fc Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Tue, 2 Aug 2011 22:59:10 +0000 Subject: bnx2x: Fix missing pause on for 578xx When link speed is 1G and below, pause weren't sent due to missing pause setting in the UMAC. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_link.c | 6 ++++++ drivers/net/bnx2x/bnx2x_reg.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c index bcd8f00..aa9958e 100644 --- a/drivers/net/bnx2x/bnx2x_link.c +++ b/drivers/net/bnx2x/bnx2x_link.c @@ -1546,6 +1546,12 @@ static void bnx2x_umac_enable(struct link_params *params, vars->line_speed); break; } + if (!(vars->flow_ctrl & BNX2X_FLOW_CTRL_TX)) + val |= UMAC_COMMAND_CONFIG_REG_IGNORE_TX_PAUSE; + + if (!(vars->flow_ctrl & BNX2X_FLOW_CTRL_RX)) + val |= UMAC_COMMAND_CONFIG_REG_PAUSE_IGNORE; + REG_WR(bp, umac_base + UMAC_REG_COMMAND_CONFIG, val); udelay(50); diff --git a/drivers/net/bnx2x/bnx2x_reg.h b/drivers/net/bnx2x/bnx2x_reg.h index 02461fe..d84642a 100644 --- a/drivers/net/bnx2x/bnx2x_reg.h +++ b/drivers/net/bnx2x/bnx2x_reg.h @@ -4771,9 +4771,11 @@ The fields are: [4:0] - tail pointer; 10:5] - Link List size; 15:11] - header pointer. */ #define UCM_REG_XX_TABLE 0xe0300 +#define UMAC_COMMAND_CONFIG_REG_IGNORE_TX_PAUSE (0x1<<28) #define UMAC_COMMAND_CONFIG_REG_LOOP_ENA (0x1<<15) #define UMAC_COMMAND_CONFIG_REG_NO_LGTH_CHECK (0x1<<24) #define UMAC_COMMAND_CONFIG_REG_PAD_EN (0x1<<5) +#define UMAC_COMMAND_CONFIG_REG_PAUSE_IGNORE (0x1<<8) #define UMAC_COMMAND_CONFIG_REG_PROMIS_EN (0x1<<4) #define UMAC_COMMAND_CONFIG_REG_RX_ENA (0x1<<1) #define UMAC_COMMAND_CONFIG_REG_SW_RESET (0x1<<13) -- cgit v1.1 From b507766205f85d6d69892287e346a7c264a216b4 Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Tue, 2 Aug 2011 22:59:18 +0000 Subject: bnx2x: Fix chip hanging due to TX pipe stall. Fix a problem in which the 578xx chip hangs after running traffic, and then pulling the network cable. This occurs since TX pipe is stalled due to missing XON indication towards the NIG. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_link.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c index aa9958e..0a7091d 100644 --- a/drivers/net/bnx2x/bnx2x_link.c +++ b/drivers/net/bnx2x/bnx2x_link.c @@ -1667,10 +1667,20 @@ static void bnx2x_xmac_disable(struct link_params *params) { u8 port = params->port; struct bnx2x *bp = params->bp; - u32 xmac_base = (port) ? GRCBASE_XMAC1 : GRCBASE_XMAC0; + u32 pfc_ctrl, xmac_base = (port) ? GRCBASE_XMAC1 : GRCBASE_XMAC0; if (REG_RD(bp, MISC_REG_RESET_REG_2) & MISC_REGISTERS_RESET_REG_2_XMAC) { + /* + * Send an indication to change the state in the NIG back to XON + * Clearing this bit enables the next set of this bit to get + * rising edge + */ + pfc_ctrl = REG_RD(bp, xmac_base + XMAC_REG_PFC_CTRL_HI); + REG_WR(bp, xmac_base + XMAC_REG_PFC_CTRL_HI, + (pfc_ctrl & ~(1<<1))); + REG_WR(bp, xmac_base + XMAC_REG_PFC_CTRL_HI, + (pfc_ctrl | (1<<1))); DP(NETIF_MSG_LINK, "Disable XMAC on port %x\n", port); REG_WR(bp, xmac_base + XMAC_REG_CTRL, 0); usleep_range(1000, 1000); -- cgit v1.1 From de6f3377d2da3b384ca3d716ffb8687ad175788a Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Tue, 2 Aug 2011 22:59:25 +0000 Subject: bnx2x: Fix remote fault handling Fix couple of issues of remote fault detection and handling: Link may go down due to remote fault indications during link establishment. Possible link down after primary function migration. Remote fault was not detected on 578xx. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_hsi.h | 2 + drivers/net/bnx2x/bnx2x_link.c | 110 ++++++++++++++++++++++++----------------- drivers/net/bnx2x/bnx2x_link.h | 2 +- 3 files changed, 69 insertions(+), 45 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x_hsi.h b/drivers/net/bnx2x/bnx2x_hsi.h index 06727f3..dc24de4 100644 --- a/drivers/net/bnx2x/bnx2x_hsi.h +++ b/drivers/net/bnx2x/bnx2x_hsi.h @@ -1204,6 +1204,8 @@ struct drv_port_mb { #define LINK_STATUS_PFC_ENABLED 0x20000000 + #define LINK_STATUS_PHYSICAL_LINK_FLAG 0x40000000 + u32 port_stx; u32 stat_nig_timer; diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c index 0a7091d..8e68e15 100644 --- a/drivers/net/bnx2x/bnx2x_link.c +++ b/drivers/net/bnx2x/bnx2x_link.c @@ -1745,6 +1745,10 @@ static int bnx2x_emac_enable(struct link_params *params, DP(NETIF_MSG_LINK, "enabling EMAC\n"); + /* Disable BMAC */ + REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR, + (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port)); + /* enable emac and not bmac */ REG_WR(bp, NIG_REG_EGRESS_EMAC0_PORT + port*4, 1); @@ -2599,12 +2603,6 @@ static int bnx2x_bmac1_enable(struct link_params *params, REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_RX_LLFC_MSG_FLDS, wb_data, 2); - if (vars->phy_flags & PHY_TX_ERROR_CHECK_FLAG) { - REG_RD_DMAE(bp, bmac_addr + BIGMAC_REGISTER_RX_LSS_STATUS, - wb_data, 2); - if (wb_data[0] > 0) - return -ESRCH; - } return 0; } @@ -2670,16 +2668,6 @@ static int bnx2x_bmac2_enable(struct link_params *params, udelay(30); bnx2x_update_pfc_bmac2(params, vars, is_lb); - if (vars->phy_flags & PHY_TX_ERROR_CHECK_FLAG) { - REG_RD_DMAE(bp, bmac_addr + BIGMAC2_REGISTER_RX_LSS_STAT, - wb_data, 2); - if (wb_data[0] > 0) { - DP(NETIF_MSG_LINK, "Got bad LSS status 0x%x\n", - wb_data[0]); - return -ESRCH; - } - } - return 0; } @@ -4369,6 +4357,9 @@ void bnx2x_link_status_update(struct link_params *params, vars->link_up = (vars->link_status & LINK_STATUS_LINK_UP); vars->phy_flags = PHY_XGXS_FLAG; + if (vars->link_status & LINK_STATUS_PHYSICAL_LINK_FLAG) + vars->phy_flags |= PHY_PHYSICAL_LINK_FLAG; + if (vars->link_up) { DP(NETIF_MSG_LINK, "phy link up\n"); @@ -4460,6 +4451,8 @@ void bnx2x_link_status_update(struct link_params *params, /* indicate no mac active */ vars->mac_type = MAC_TYPE_NONE; + if (vars->link_status & LINK_STATUS_PHYSICAL_LINK_FLAG) + vars->phy_flags |= PHY_HALF_OPEN_CONN_FLAG; } /* Sync media type */ @@ -6176,6 +6169,7 @@ static int bnx2x_update_link_down(struct link_params *params, /* update shared memory */ vars->link_status &= ~(LINK_STATUS_SPEED_AND_DUPLEX_MASK | LINK_STATUS_LINK_UP | + LINK_STATUS_PHYSICAL_LINK_FLAG | LINK_STATUS_AUTO_NEGOTIATE_COMPLETE | LINK_STATUS_RX_FLOW_CONTROL_FLAG_MASK | LINK_STATUS_TX_FLOW_CONTROL_FLAG_MASK | @@ -6213,7 +6207,8 @@ static int bnx2x_update_link_up(struct link_params *params, u8 port = params->port; int rc = 0; - vars->link_status |= LINK_STATUS_LINK_UP; + vars->link_status |= (LINK_STATUS_LINK_UP | + LINK_STATUS_PHYSICAL_LINK_FLAG); vars->phy_flags |= PHY_PHYSICAL_LINK_FLAG; if (vars->flow_ctrl & BNX2X_FLOW_CTRL_TX) @@ -8132,7 +8127,6 @@ void bnx2x_handle_module_detect_int(struct link_params *params) offsetof(struct shmem_region, dev_info. port_feature_config[params->port]. config)); - bnx2x_set_gpio_int(bp, gpio_num, MISC_REGISTERS_GPIO_INT_OUTPUT_SET, gpio_port); @@ -8141,8 +8135,9 @@ void bnx2x_handle_module_detect_int(struct link_params *params) * Disable transmit for this module */ phy->media_type = ETH_PHY_NOT_PRESENT; - if ((val & PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_MASK) == - PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_DISABLE_TX_LASER) + if (((val & PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_MASK) == + PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_DISABLE_TX_LASER) || + CHIP_IS_E3(bp)) bnx2x_sfp_set_transmitter(params, phy, 0); } } @@ -8244,9 +8239,6 @@ static u8 bnx2x_8706_config_init(struct bnx2x_phy *phy, u16 cnt, val, tmp1; struct bnx2x *bp = params->bp; - /* SPF+ PHY: Set flag to check for Tx error */ - vars->phy_flags = PHY_TX_ERROR_CHECK_FLAG; - bnx2x_set_gpio(bp, MISC_REGISTERS_GPIO_2, MISC_REGISTERS_GPIO_OUTPUT_HIGH, params->port); /* HW reset */ @@ -8430,9 +8422,6 @@ static int bnx2x_8726_config_init(struct bnx2x_phy *phy, struct bnx2x *bp = params->bp; DP(NETIF_MSG_LINK, "Initializing BCM8726\n"); - /* SPF+ PHY: Set flag to check for Tx error */ - vars->phy_flags = PHY_TX_ERROR_CHECK_FLAG; - bnx2x_cl45_write(bp, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 1<<15); bnx2x_wait_reset_complete(bp, phy, params); @@ -8601,9 +8590,6 @@ static int bnx2x_8727_config_init(struct bnx2x_phy *phy, struct bnx2x *bp = params->bp; /* Enable PMD link, MOD_ABS_FLT, and 1G link alarm */ - /* SPF+ PHY: Set flag to check for Tx error */ - vars->phy_flags = PHY_TX_ERROR_CHECK_FLAG; - bnx2x_wait_reset_complete(bp, phy, params); rx_alarm_ctrl_val = (1<<2) | (1<<5) ; /* Should be 0x6 to enable XS on Tx side. */ @@ -10619,7 +10605,8 @@ static struct bnx2x_phy phy_warpcore = { .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT, .addr = 0xff, .def_md_devad = 0, - .flags = FLAGS_HW_LOCK_REQUIRED, + .flags = (FLAGS_HW_LOCK_REQUIRED | + FLAGS_TX_ERROR_CHECK), .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .mdio_ctrl = 0, @@ -10745,7 +10732,8 @@ static struct bnx2x_phy phy_8706 = { .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8706, .addr = 0xff, .def_md_devad = 0, - .flags = FLAGS_INIT_XGXS_FIRST, + .flags = (FLAGS_INIT_XGXS_FIRST | + FLAGS_TX_ERROR_CHECK), .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .mdio_ctrl = 0, @@ -10776,7 +10764,8 @@ static struct bnx2x_phy phy_8726 = { .addr = 0xff, .def_md_devad = 0, .flags = (FLAGS_HW_LOCK_REQUIRED | - FLAGS_INIT_XGXS_FIRST), + FLAGS_INIT_XGXS_FIRST | + FLAGS_TX_ERROR_CHECK), .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .mdio_ctrl = 0, @@ -10807,7 +10796,8 @@ static struct bnx2x_phy phy_8727 = { .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727, .addr = 0xff, .def_md_devad = 0, - .flags = FLAGS_FAN_FAILURE_DET_REQ, + .flags = (FLAGS_FAN_FAILURE_DET_REQ | + FLAGS_TX_ERROR_CHECK), .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .mdio_ctrl = 0, @@ -12194,10 +12184,6 @@ static void bnx2x_analyze_link_error(struct link_params *params, u8 led_mode; u32 half_open_conn = (vars->phy_flags & PHY_HALF_OPEN_CONN_FLAG) > 0; - /*DP(NETIF_MSG_LINK, "CHECK LINK: %x half_open:%x-> lss:%x\n", - vars->link_up, - half_open_conn, lss_status);*/ - if ((lss_status ^ half_open_conn) == 0) return; @@ -12210,6 +12196,7 @@ static void bnx2x_analyze_link_error(struct link_params *params, * b. Update link_vars->link_up */ if (lss_status) { + DP(NETIF_MSG_LINK, "Remote Fault detected !!!\n"); vars->link_status &= ~LINK_STATUS_LINK_UP; vars->link_up = 0; vars->phy_flags |= PHY_HALF_OPEN_CONN_FLAG; @@ -12219,6 +12206,7 @@ static void bnx2x_analyze_link_error(struct link_params *params, */ led_mode = LED_MODE_OFF; } else { + DP(NETIF_MSG_LINK, "Remote Fault cleared\n"); vars->link_status |= LINK_STATUS_LINK_UP; vars->link_up = 1; vars->phy_flags &= ~PHY_HALF_OPEN_CONN_FLAG; @@ -12235,6 +12223,15 @@ static void bnx2x_analyze_link_error(struct link_params *params, bnx2x_notify_link_changed(bp); } +/****************************************************************************** +* Description: +* This function checks for half opened connection change indication. +* When such change occurs, it calls the bnx2x_analyze_link_error +* to check if Remote Fault is set or cleared. Reception of remote fault +* status message in the MAC indicates that the peer's MAC has detected +* a fault, for example, due to break in the TX side of fiber. +* +******************************************************************************/ static void bnx2x_check_half_open_conn(struct link_params *params, struct link_vars *vars) { @@ -12245,9 +12242,28 @@ static void bnx2x_check_half_open_conn(struct link_params *params, if ((vars->phy_flags & PHY_PHYSICAL_LINK_FLAG) == 0) return; - if (!CHIP_IS_E3(bp) && + if (CHIP_IS_E3(bp) && (REG_RD(bp, MISC_REG_RESET_REG_2) & - (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << params->port))) { + (MISC_REGISTERS_RESET_REG_2_XMAC))) { + /* Check E3 XMAC */ + /* + * Note that link speed cannot be queried here, since it may be + * zero while link is down. In case UMAC is active, LSS will + * simply not be set + */ + mac_base = (params->port) ? GRCBASE_XMAC1 : GRCBASE_XMAC0; + + /* Clear stick bits (Requires rising edge) */ + REG_WR(bp, mac_base + XMAC_REG_CLEAR_RX_LSS_STATUS, 0); + REG_WR(bp, mac_base + XMAC_REG_CLEAR_RX_LSS_STATUS, + XMAC_CLEAR_RX_LSS_STATUS_REG_CLEAR_LOCAL_FAULT_STATUS | + XMAC_CLEAR_RX_LSS_STATUS_REG_CLEAR_REMOTE_FAULT_STATUS); + if (REG_RD(bp, mac_base + XMAC_REG_RX_LSS_STATUS)) + lss_status = 1; + + bnx2x_analyze_link_error(params, vars, lss_status); + } else if (REG_RD(bp, MISC_REG_RESET_REG_2) & + (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << params->port)) { /* Check E1X / E2 BMAC */ u32 lss_status_reg; u32 wb_data[2]; @@ -12269,14 +12285,20 @@ static void bnx2x_check_half_open_conn(struct link_params *params, void bnx2x_period_func(struct link_params *params, struct link_vars *vars) { struct bnx2x *bp = params->bp; + u16 phy_idx; if (!params) { - DP(NETIF_MSG_LINK, "Ininitliazed params !\n"); + DP(NETIF_MSG_LINK, "Uninitialized params !\n"); return; } - /* DP(NETIF_MSG_LINK, "Periodic called vars->phy_flags 0x%x speed 0x%x - RESET_REG_2 0x%x\n", vars->phy_flags, vars->line_speed, - REG_RD(bp, MISC_REG_RESET_REG_2)); */ - bnx2x_check_half_open_conn(params, vars); + + for (phy_idx = INT_PHY; phy_idx < MAX_PHYS; phy_idx++) { + if (params->phy[phy_idx].flags & FLAGS_TX_ERROR_CHECK) { + bnx2x_set_aer_mmd(params, ¶ms->phy[phy_idx]); + bnx2x_check_half_open_conn(params, vars); + break; + } + } + if (CHIP_IS_E3(bp)) bnx2x_check_over_curr(params, vars); } diff --git a/drivers/net/bnx2x/bnx2x_link.h b/drivers/net/bnx2x/bnx2x_link.h index 6a7708d..7ee6b51 100644 --- a/drivers/net/bnx2x/bnx2x_link.h +++ b/drivers/net/bnx2x/bnx2x_link.h @@ -145,6 +145,7 @@ struct bnx2x_phy { #define FLAGS_SFP_NOT_APPROVED (1<<7) #define FLAGS_MDC_MDIO_WA (1<<8) #define FLAGS_DUMMY_READ (1<<9) +#define FLAGS_TX_ERROR_CHECK (1<<12) /* preemphasis values for the rx side */ u16 rx_preemphasis[4]; @@ -276,7 +277,6 @@ struct link_vars { #define PHY_PHYSICAL_LINK_FLAG (1<<2) #define PHY_HALF_OPEN_CONN_FLAG (1<<3) #define PHY_OVER_CURRENT_FLAG (1<<4) -#define PHY_TX_ERROR_CHECK_FLAG (1<<5) u8 mac_type; #define MAC_TYPE_NONE 0 -- cgit v1.1 From 157fa283a7cb5bc6a55dd4e0daf6eeef66adf354 Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Tue, 2 Aug 2011 22:59:32 +0000 Subject: bnx2x: Fix BCM578xx-B0 MDIO access Fix MDIO access to Warpcore on new chip version of 578xx. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_link.c | 16 ++++++++++++++-- drivers/net/bnx2x/bnx2x_link.h | 1 + drivers/net/bnx2x/bnx2x_reg.h | 4 +++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c index 8e68e15..45255bd 100644 --- a/drivers/net/bnx2x/bnx2x_link.c +++ b/drivers/net/bnx2x/bnx2x_link.c @@ -2953,7 +2953,9 @@ static int bnx2x_cl45_read(struct bnx2x *bp, struct bnx2x_phy *phy, u32 val; u16 i; int rc = 0; - + if (phy->flags & FLAGS_MDC_MDIO_WA_B0) + bnx2x_bits_en(bp, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_STATUS, + EMAC_MDIO_STATUS_10MB); /* address */ val = ((phy->addr << 21) | (devad << 16) | reg | EMAC_MDIO_COMM_COMMAND_ADDRESS | @@ -3007,6 +3009,9 @@ static int bnx2x_cl45_read(struct bnx2x *bp, struct bnx2x_phy *phy, } } + if (phy->flags & FLAGS_MDC_MDIO_WA_B0) + bnx2x_bits_dis(bp, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_STATUS, + EMAC_MDIO_STATUS_10MB); return rc; } @@ -3016,6 +3021,9 @@ static int bnx2x_cl45_write(struct bnx2x *bp, struct bnx2x_phy *phy, u32 tmp; u8 i; int rc = 0; + if (phy->flags & FLAGS_MDC_MDIO_WA_B0) + bnx2x_bits_en(bp, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_STATUS, + EMAC_MDIO_STATUS_10MB); /* address */ @@ -3069,7 +3077,9 @@ static int bnx2x_cl45_write(struct bnx2x *bp, struct bnx2x_phy *phy, bnx2x_cl45_read(bp, phy, devad, 0xf, &temp_val); } } - + if (phy->flags & FLAGS_MDC_MDIO_WA_B0) + bnx2x_bits_dis(bp, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_STATUS, + EMAC_MDIO_STATUS_10MB); return rc; } @@ -11118,6 +11128,8 @@ static int bnx2x_populate_int_phy(struct bnx2x *bp, u32 shmem_base, u8 port, */ if (CHIP_REV(bp) == CHIP_REV_Ax) phy->flags |= FLAGS_MDC_MDIO_WA; + else + phy->flags |= FLAGS_MDC_MDIO_WA_B0; } else { switch (switch_cfg) { case SWITCH_CFG_1G: diff --git a/drivers/net/bnx2x/bnx2x_link.h b/drivers/net/bnx2x/bnx2x_link.h index 7ee6b51..c12db6d 100644 --- a/drivers/net/bnx2x/bnx2x_link.h +++ b/drivers/net/bnx2x/bnx2x_link.h @@ -145,6 +145,7 @@ struct bnx2x_phy { #define FLAGS_SFP_NOT_APPROVED (1<<7) #define FLAGS_MDC_MDIO_WA (1<<8) #define FLAGS_DUMMY_READ (1<<9) +#define FLAGS_MDC_MDIO_WA_B0 (1<<10) #define FLAGS_TX_ERROR_CHECK (1<<12) /* preemphasis values for the rx side */ diff --git a/drivers/net/bnx2x/bnx2x_reg.h b/drivers/net/bnx2x/bnx2x_reg.h index d84642a..27b5ecb 100644 --- a/drivers/net/bnx2x/bnx2x_reg.h +++ b/drivers/net/bnx2x/bnx2x_reg.h @@ -5624,8 +5624,9 @@ #define EMAC_MDIO_COMM_START_BUSY (1L<<29) #define EMAC_MDIO_MODE_AUTO_POLL (1L<<4) #define EMAC_MDIO_MODE_CLAUSE_45 (1L<<31) -#define EMAC_MDIO_MODE_CLOCK_CNT (0x3fL<<16) +#define EMAC_MDIO_MODE_CLOCK_CNT (0x3ffL<<16) #define EMAC_MDIO_MODE_CLOCK_CNT_BITSHIFT 16 +#define EMAC_MDIO_STATUS_10MB (1L<<1) #define EMAC_MODE_25G_MODE (1L<<5) #define EMAC_MODE_HALF_DUPLEX (1L<<1) #define EMAC_MODE_PORT_GMII (2L<<2) @@ -5636,6 +5637,7 @@ #define EMAC_REG_EMAC_MAC_MATCH 0x10 #define EMAC_REG_EMAC_MDIO_COMM 0xac #define EMAC_REG_EMAC_MDIO_MODE 0xb4 +#define EMAC_REG_EMAC_MDIO_STATUS 0xb0 #define EMAC_REG_EMAC_MODE 0x0 #define EMAC_REG_EMAC_RX_MODE 0xc8 #define EMAC_REG_EMAC_RX_MTU_SIZE 0x9c -- cgit v1.1 From 793bd450370bf85cd63cccaff5e2f1a62908a52f Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Tue, 2 Aug 2011 22:59:40 +0000 Subject: bnx2x: Fix LED behavior This fix resolve two problems seen regarding LED: 1. LED doesn't flash during port identification. 2. Traffic LED sometimes do not blink. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_link.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c index 45255bd..d8f6eff 100644 --- a/drivers/net/bnx2x/bnx2x_link.c +++ b/drivers/net/bnx2x/bnx2x_link.c @@ -5922,20 +5922,30 @@ int bnx2x_set_led(struct link_params *params, tmp = EMAC_RD(bp, EMAC_REG_EMAC_LED); EMAC_WR(bp, EMAC_REG_EMAC_LED, (tmp | EMAC_LED_OVERRIDE)); - return rc; + /* + * return here without enabling traffic + * LED blink andsetting rate in ON mode. + * In oper mode, enabling LED blink + * and setting rate is needed. + */ + if (mode == LED_MODE_ON) + return rc; } - } else if (SINGLE_MEDIA_DIRECT(params) && - (CHIP_IS_E1x(bp) || - CHIP_IS_E2(bp))) { + } else if (SINGLE_MEDIA_DIRECT(params)) { /* * This is a work-around for HW issue found when link * is up in CL73 */ - REG_WR(bp, NIG_REG_LED_MODE_P0 + port*4, 0); REG_WR(bp, NIG_REG_LED_10G_P0 + port*4, 1); - } else { + if (CHIP_IS_E1x(bp) || + CHIP_IS_E2(bp) || + (mode == LED_MODE_ON)) + REG_WR(bp, NIG_REG_LED_MODE_P0 + port*4, 0); + else + REG_WR(bp, NIG_REG_LED_MODE_P0 + port*4, + hw_led_mode); + } else REG_WR(bp, NIG_REG_LED_MODE_P0 + port*4, hw_led_mode); - } REG_WR(bp, NIG_REG_LED_CONTROL_OVERRIDE_TRAFFIC_P0 + port*4, 0); /* Set blinking rate to ~15.9Hz */ -- cgit v1.1 From 19af03a3c8cb1e07e31c070dfde9fac2e5e7796c Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Tue, 2 Aug 2011 22:59:47 +0000 Subject: bnx2x: Fix link issue with DAC over 578xx Fix no-link issue on BCM578xx when direct attached cable is connected since Warpcore microcode restart was missing to re-read the new mode. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_link.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c index d8f6eff..6a3fdf8 100644 --- a/drivers/net/bnx2x/bnx2x_link.c +++ b/drivers/net/bnx2x/bnx2x_link.c @@ -8029,6 +8029,9 @@ static void bnx2x_warpcore_set_limiting_mode(struct link_params *params, bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD, MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE, &val); + /* Restart microcode to re-read the new mode */ + bnx2x_warpcore_reset_lane(bp, phy, 1); + bnx2x_warpcore_reset_lane(bp, phy, 0); } -- cgit v1.1 From fd38f73eb936f9d9f28e4f7ff598cc0780e09424 Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Tue, 2 Aug 2011 22:59:53 +0000 Subject: bnx2x: Fix BCM84833 link BCM84833 fail to link due to incorrect auto-negotiation setting. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_link.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c index 6a3fdf8..cf8b8d1 100644 --- a/drivers/net/bnx2x/bnx2x_link.c +++ b/drivers/net/bnx2x/bnx2x_link.c @@ -9268,7 +9268,13 @@ static int bnx2x_848xx_cmn_config_init(struct bnx2x_phy *phy, if (phy->req_duplex == DUPLEX_FULL) autoneg_val |= (1<<8); - bnx2x_cl45_write(bp, phy, + /* + * Always write this if this is not 84833. + * For 84833, write it only when it's a forced speed. + */ + if ((phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) || + ((autoneg_val & (1<<12)) == 0)) + bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_8481_LEGACY_MII_CTRL, autoneg_val); @@ -9282,13 +9288,12 @@ static int bnx2x_848xx_cmn_config_init(struct bnx2x_phy *phy, bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, 0x3200); - } else if (phy->req_line_speed != SPEED_10 && - phy->req_line_speed != SPEED_100) { + } else bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_8481_10GBASE_T_AN_CTRL, 1); - } + /* Save spirom version */ bnx2x_save_848xx_spirom_version(phy, params); @@ -9781,11 +9786,9 @@ static void bnx2x_848x3_link_reset(struct bnx2x_phy *phy, bnx2x_cl45_read(bp, phy, MDIO_CTL_DEVAD, 0x400f, &val16); - /* Put to low power mode on newer FW */ - if ((val16 & 0x303f) > 0x1009) - bnx2x_cl45_write(bp, phy, - MDIO_PMA_DEVAD, - MDIO_PMA_REG_CTRL, 0x800); + bnx2x_cl45_write(bp, phy, + MDIO_PMA_DEVAD, + MDIO_PMA_REG_CTRL, 0x800); } } -- cgit v1.1 From d2059a061164120a1e44a0ca46fe08044d6d7c2d Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Tue, 2 Aug 2011 23:00:00 +0000 Subject: bnx2x: Fix BCM54618se invalid link indication After resetting BCM54618se, link partner would still see link since the PHY wasn't put into low-power state. Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_link.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c index cf8b8d1..5e6f351 100644 --- a/drivers/net/bnx2x/bnx2x_link.c +++ b/drivers/net/bnx2x/bnx2x_link.c @@ -10219,8 +10219,15 @@ static void bnx2x_54618se_link_reset(struct bnx2x_phy *phy, u32 cfg_pin; u8 port; - /* This works with E3 only, no need to check the chip - before determining the port. */ + /* + * In case of no EPIO routed to reset the GPHY, put it + * in low power mode. + */ + bnx2x_cl22_write(bp, phy, MDIO_PMA_REG_CTRL, 0x800); + /* + * This works with E3 only, no need to check the chip + * before determining the port. + */ port = params->port; cfg_pin = (REG_RD(bp, params->shmem_base + offsetof(struct shmem_region, -- cgit v1.1 From afad009ad76ece72a3c9629bbc08f14459b9bba7 Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Tue, 2 Aug 2011 23:00:06 +0000 Subject: bnx2x: Fix BCM578xx MAC test Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_link.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c index 5e6f351..01fb92c 100644 --- a/drivers/net/bnx2x/bnx2x_link.c +++ b/drivers/net/bnx2x/bnx2x_link.c @@ -11541,13 +11541,12 @@ void bnx2x_init_xmac_loopback(struct link_params *params, * Set WC to loopback mode since link is required to provide clock * to the XMAC in 20G mode */ - if (vars->line_speed == SPEED_20000) { - bnx2x_set_aer_mmd(params, ¶ms->phy[0]); - bnx2x_warpcore_reset_lane(bp, ¶ms->phy[0], 0); - params->phy[INT_PHY].config_loopback( + bnx2x_set_aer_mmd(params, ¶ms->phy[0]); + bnx2x_warpcore_reset_lane(bp, ¶ms->phy[0], 0); + params->phy[INT_PHY].config_loopback( ¶ms->phy[INT_PHY], params); - } + bnx2x_xmac_enable(params, vars, 1); REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0); } -- cgit v1.1 From 28f4881cbf9ce285edfc245a8990af36d21c062f Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Tue, 2 Aug 2011 23:00:12 +0000 Subject: bnx2x: Clear MDIO access warning during first driver load Signed-off-by: Yaniv Rosner Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x/bnx2x_link.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c index 01fb92c..d45b155 100644 --- a/drivers/net/bnx2x/bnx2x_link.c +++ b/drivers/net/bnx2x/bnx2x_link.c @@ -11724,12 +11724,16 @@ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars, bnx2x_set_led(params, vars, LED_MODE_OFF, 0); if (reset_ext_phy) { + bnx2x_set_mdio_clk(bp, params->chip_id, port); for (phy_index = EXT_PHY1; phy_index < params->num_phys; phy_index++) { - if (params->phy[phy_index].link_reset) + if (params->phy[phy_index].link_reset) { + bnx2x_set_aer_mmd(params, + ¶ms->phy[phy_index]); params->phy[phy_index].link_reset( ¶ms->phy[phy_index], params); + } if (params->phy[phy_index].flags & FLAGS_REARM_LATCH_SIGNAL) clear_latch_ind = 1; -- cgit v1.1 From f2c31e32b378a6653f8de606149d963baf11d7d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Jul 2011 19:00:53 +0000 Subject: net: fix NULL dereferences in check_peer_redir() Gergely Kalman reported crashes in check_peer_redir(). It appears commit f39925dbde778 (ipv4: Cache learned redirect information in inetpeer.) added a race, leading to possible NULL ptr dereference. Since we can now change dst neighbour, we should make sure a reader can safely use a neighbour. Add RCU protection to dst neighbour, and make sure check_peer_redir() can be called safely by different cpus in parallel. As neighbours are already freed after one RCU grace period, this patch should not add typical RCU penalty (cache cold effects) Many thanks to Gergely for providing a pretty report pointing to the bug. Reported-by: Gergely Kalman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 17 +++++++++++++---- net/ipv4/ip_output.c | 10 ++++++++-- net/ipv4/route.c | 14 ++++++++------ net/ipv6/addrconf.c | 2 +- net/ipv6/ip6_fib.c | 2 +- net/ipv6/ip6_output.c | 13 +++++++++++-- net/ipv6/route.c | 35 +++++++++++++++++++++++++---------- 7 files changed, 67 insertions(+), 26 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 29e2557..13d507d 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -37,7 +37,7 @@ struct dst_entry { unsigned long _metrics; unsigned long expires; struct dst_entry *path; - struct neighbour *_neighbour; + struct neighbour __rcu *_neighbour; #ifdef CONFIG_XFRM struct xfrm_state *xfrm; #else @@ -88,12 +88,17 @@ struct dst_entry { static inline struct neighbour *dst_get_neighbour(struct dst_entry *dst) { - return dst->_neighbour; + return rcu_dereference(dst->_neighbour); +} + +static inline struct neighbour *dst_get_neighbour_raw(struct dst_entry *dst) +{ + return rcu_dereference_raw(dst->_neighbour); } static inline void dst_set_neighbour(struct dst_entry *dst, struct neighbour *neigh) { - dst->_neighbour = neigh; + rcu_assign_pointer(dst->_neighbour, neigh); } extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); @@ -382,8 +387,12 @@ static inline void dst_rcu_free(struct rcu_head *head) static inline void dst_confirm(struct dst_entry *dst) { if (dst) { - struct neighbour *n = dst_get_neighbour(dst); + struct neighbour *n; + + rcu_read_lock(); + n = dst_get_neighbour(dst); neigh_confirm(n); + rcu_read_unlock(); } } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ccaaa85..77d3ede 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -204,9 +204,15 @@ static inline int ip_finish_output2(struct sk_buff *skb) skb = skb2; } + rcu_read_lock(); neigh = dst_get_neighbour(dst); - if (neigh) - return neigh_output(neigh, skb); + if (neigh) { + int res = neigh_output(neigh, skb); + + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); if (net_ratelimit()) printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1730689..6afc4eb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1628,16 +1628,18 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) { struct rtable *rt = (struct rtable *) dst; __be32 orig_gw = rt->rt_gateway; - struct neighbour *n; + struct neighbour *n, *old_n; dst_confirm(&rt->dst); - neigh_release(dst_get_neighbour(&rt->dst)); - dst_set_neighbour(&rt->dst, NULL); - rt->rt_gateway = peer->redirect_learned.a4; - rt_bind_neighbour(rt); - n = dst_get_neighbour(&rt->dst); + + n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); + if (IS_ERR(n)) + return PTR_ERR(n); + old_n = xchg(&rt->dst._neighbour, n); + if (old_n) + neigh_release(old_n); if (!n || !(n->nud_state & NUD_VALID)) { if (n) neigh_event_send(n, NULL); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a55500c..f012ebd 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -656,7 +656,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, * layer address of our nexhop router */ - if (dst_get_neighbour(&rt->dst) == NULL) + if (dst_get_neighbour_raw(&rt->dst) == NULL) ifa->flags &= ~IFA_F_OPTIMISTIC; ifa->idev = idev; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 54a4678..320d91d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1455,7 +1455,7 @@ static int fib6_age(struct rt6_info *rt, void *arg) RT6_TRACE("aging clone %p\n", rt); return -1; } else if ((rt->rt6i_flags & RTF_GATEWAY) && - (!(dst_get_neighbour(&rt->dst)->flags & NTF_ROUTER))) { + (!(dst_get_neighbour_raw(&rt->dst)->flags & NTF_ROUTER))) { RT6_TRACE("purging route %p via non-router but gateway\n", rt); return -1; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 32e5339..4c882cf 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -135,10 +135,15 @@ static int ip6_finish_output2(struct sk_buff *skb) skb->len); } + rcu_read_lock(); neigh = dst_get_neighbour(dst); - if (neigh) - return neigh_output(neigh, skb); + if (neigh) { + int res = neigh_output(neigh, skb); + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); @@ -975,12 +980,14 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry and replace it instead with the * dst entry of the nexthop router */ + rcu_read_lock(); n = dst_get_neighbour(*dst); if (n && !(n->nud_state & NUD_VALID)) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; int redirect; + rcu_read_unlock(); ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); @@ -1000,6 +1007,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, if ((err = (*dst)->error)) goto out_err_release; } + } else { + rcu_read_unlock(); } #endif diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e8987da..9e69eb0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -364,7 +364,7 @@ out: #ifdef CONFIG_IPV6_ROUTER_PREF static void rt6_probe(struct rt6_info *rt) { - struct neighbour *neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; + struct neighbour *neigh; /* * Okay, this does not seem to be appropriate * for now, however, we need to check if it @@ -373,8 +373,10 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ + rcu_read_lock(); + neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; if (!neigh || (neigh->nud_state & NUD_VALID)) - return; + goto out; read_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { @@ -387,8 +389,11 @@ static void rt6_probe(struct rt6_info *rt) target = (struct in6_addr *)&neigh->primary_key; addrconf_addr_solict_mult(target, &mcaddr); ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); - } else + } else { read_unlock_bh(&neigh->lock); + } +out: + rcu_read_unlock(); } #else static inline void rt6_probe(struct rt6_info *rt) @@ -412,8 +417,11 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) static inline int rt6_check_neigh(struct rt6_info *rt) { - struct neighbour *neigh = dst_get_neighbour(&rt->dst); + struct neighbour *neigh; int m; + + rcu_read_lock(); + neigh = dst_get_neighbour(&rt->dst); if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) m = 1; @@ -430,6 +438,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt) read_unlock_bh(&neigh->lock); } else m = 0; + rcu_read_unlock(); return m; } @@ -769,7 +778,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; rt->dst.flags |= DST_HOST; - dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour(&ort->dst))); + dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst))); } return rt; } @@ -803,7 +812,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -1587,7 +1596,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - if (neigh == dst_get_neighbour(&rt->dst)) + if (neigh == dst_get_neighbour_raw(&rt->dst)) goto out; nrt = ip6_rt_copy(rt, dest); @@ -1682,7 +1691,7 @@ again: 1. It is connected route. Action: COW 2. It is gatewayed route or NONEXTHOP route. Action: clone it. */ - if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, daddr, saddr); else nrt = rt6_alloc_clone(rt, daddr); @@ -2326,6 +2335,7 @@ static int rt6_fill_node(struct net *net, struct nlmsghdr *nlh; long expires; u32 table; + struct neighbour *n; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -2414,8 +2424,11 @@ static int rt6_fill_node(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - if (dst_get_neighbour(&rt->dst)) - NLA_PUT(skb, RTA_GATEWAY, 16, &dst_get_neighbour(&rt->dst)->primary_key); + rcu_read_lock(); + n = dst_get_neighbour(&rt->dst); + if (n) + NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key); + rcu_read_unlock(); if (rt->dst.dev) NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); @@ -2608,12 +2621,14 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif + rcu_read_lock(); n = dst_get_neighbour(&rt->dst); if (n) { seq_printf(m, "%pi6", n->primary_key); } else { seq_puts(m, "00000000000000000000000000000000"); } + rcu_read_unlock(); seq_printf(m, " %08x %08x %08x %08x %8s\n", rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), rt->dst.__use, rt->rt6i_flags, -- cgit v1.1 From 750f463a749e28464151ad26938d11b07b1c43cb Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 3 Aug 2011 11:28:14 +0100 Subject: dt: add of_alias_scan and of_alias_get_id The patch adds function of_alias_scan to populate a global lookup table with the properties of 'aliases' node and function of_alias_get_id for drivers to find alias id from the lookup table. Signed-off-by: Shawn Guo [grant.likely: add locking and rework parse loop] Signed-off-by: Grant Likely --- drivers/of/base.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/of/fdt.c | 4 +- include/linux/of.h | 8 +++ include/linux/of_fdt.h | 1 - 4 files changed, 141 insertions(+), 2 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index 3ff22e3..fb28b5a 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -17,14 +17,39 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include #include #include #include #include #include +/** + * struct alias_prop - Alias property in 'aliases' node + * @link: List node to link the structure in aliases_lookup list + * @alias: Alias property name + * @np: Pointer to device_node that the alias stands for + * @id: Index value from end of alias name + * @stem: Alias string without the index + * + * The structure represents one alias property of 'aliases' node as + * an entry in aliases_lookup list. + */ +struct alias_prop { + struct list_head link; + const char *alias; + struct device_node *np; + int id; + char stem[0]; +}; + +static LIST_HEAD(aliases_lookup); + struct device_node *allnodes; struct device_node *of_chosen; +struct device_node *of_aliases; + +static DEFINE_MUTEX(of_aliases_mutex); /* use when traversing tree through the allnext, child, sibling, * or parent members of struct device_node. @@ -988,3 +1013,108 @@ out_unlock: } #endif /* defined(CONFIG_OF_DYNAMIC) */ +static void of_alias_add(struct alias_prop *ap, struct device_node *np, + int id, const char *stem, int stem_len) +{ + ap->id = id; + ap->np = np; + strncpy(ap->stem, stem, stem_len); + ap->stem[stem_len] = 0; + list_add_tail(&ap->link, &aliases_lookup); + pr_debug("adding DT alias:%s: stem=%s id=%i node=%s\n", + ap->alias, ap->stem, ap->id, np ? np->full_name : NULL); +} + +/** + * of_alias_scan() - Scan all properties of 'aliases' node + * + * The function scans all the properties of 'aliases' node and populate + * the global lookup table with the properties. It returns the + * number of alias_prop found, or error code in error case. + */ +__init void of_alias_scan(void) +{ + struct property *pp; + + if (!of_aliases) + return; + + for_each_property(pp, of_aliases->properties) { + const char *start = pp->name; + const char *end = start + strlen(start); + struct device_node *np; + struct alias_prop *ap; + int id, len; + + /* Skip those we do not want to proceed */ + if (!strcmp(pp->name, "name") || + !strcmp(pp->name, "phandle") || + !strcmp(pp->name, "linux,phandle")) + continue; + + np = of_find_node_by_path(pp->value); + if (!np) + continue; + + /* walk alias backwards to extract the id and 'stem' string */ + while (isdigit(*(end-1)) && end > start) + end--; + len = end - start; + id = strlen(end) ? simple_strtoul(end, NULL, 10) : -1; + + /* Allocate an alias_prop with enough space for the stem */ + ap = early_init_dt_alloc_memory_arch(sizeof(*ap) + len + 1, 4); + if (!ap) + continue; + ap->alias = start; + of_alias_add(ap, np, id, start, len); + } +} + +/** + * of_alias_get_id() - Get alias id for the given device_node + * @np: Pointer to the given device_node + * @stem: Alias stem of the given device_node + * + * The function travels the lookup table to get alias id for the given + * device_node and alias stem. It returns the alias id if find it. + * If not, dynamically creates one in the lookup table and returns it, + * or returns error code if fail to create. + */ +int of_alias_get_id(struct device_node *np, const char *stem) +{ + struct alias_prop *app; + int id = 0; + bool found = false; + + mutex_lock(&of_aliases_mutex); + list_for_each_entry(app, &aliases_lookup, link) { + if (strcmp(app->stem, stem) != 0) + continue; + + if (np == app->np) { + found = true; + id = app->id; + break; + } + + if (id <= app->id) + id = app->id + 1; + } + + /* If an id is not found, then allocate a new one */ + if (!found) { + app = kzalloc(sizeof(*app) + strlen(stem) + 1, 4); + if (!app) { + id = -ENODEV; + goto out; + } + of_alias_add(app, np, id, stem, strlen(stem)); + } + + out: + mutex_unlock(&of_aliases_mutex); + + return id; +} +EXPORT_SYMBOL_GPL(of_alias_get_id); diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 65200af..13d6d3a 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -707,10 +707,12 @@ void __init unflatten_device_tree(void) __unflatten_device_tree(initial_boot_params, &allnodes, early_init_dt_alloc_memory_arch); - /* Get pointer to OF "/chosen" node for use everywhere */ + /* Get pointer to "/chosen" and "/aliasas" nodes for use everywhere */ of_chosen = of_find_node_by_path("/chosen"); if (of_chosen == NULL) of_chosen = of_find_node_by_path("/chosen@0"); + of_aliases = of_find_node_by_path("/aliases"); + of_alias_scan(); } #endif /* CONFIG_OF_EARLY_FLATTREE */ diff --git a/include/linux/of.h b/include/linux/of.h index 0085bb0..bc3dc63 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -68,6 +68,7 @@ struct device_node { /* Pointer for first entry in chain of all nodes. */ extern struct device_node *allnodes; extern struct device_node *of_chosen; +extern struct device_node *of_aliases; extern rwlock_t devtree_lock; static inline bool of_have_populated_dt(void) @@ -209,6 +210,9 @@ extern int of_device_is_available(const struct device_node *device); extern const void *of_get_property(const struct device_node *node, const char *name, int *lenp); +#define for_each_property(pp, properties) \ + for (pp = properties; pp != NULL; pp = pp->next) + extern int of_n_addr_cells(struct device_node *np); extern int of_n_size_cells(struct device_node *np); extern const struct of_device_id *of_match_node( @@ -221,6 +225,10 @@ extern int of_parse_phandles_with_args(struct device_node *np, const char *list_name, const char *cells_name, int index, struct device_node **out_node, const void **out_args); +extern void *early_init_dt_alloc_memory_arch(u64 size, u64 align); +extern void of_alias_scan(void); +extern int of_alias_get_id(struct device_node *np, const char *stem); + extern int of_machine_is_compatible(const char *compat); extern int prom_add_property(struct device_node* np, struct property* prop); diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index c84d900..b74b74f 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -97,7 +97,6 @@ extern void early_init_dt_check_for_initrd(unsigned long node); extern int early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data); extern void early_init_dt_add_memory_arch(u64 base, u64 size); -extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align); extern u64 dt_mem_next_cell(int s, __be32 **cellp); /* -- cgit v1.1 From be8d97a540cec5d272b1e08e27741a3c3aa38f66 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Wed, 3 Aug 2011 16:44:17 +0200 Subject: [S390] qdio: 2nd stage retry on SIGA-W busy conditions The SIGA-W may return with the busy bit set which means the device was blocked. The busy loop which retries the SIGA-W for 100us may not be long enough when running under a heavily loaded hypervisor. Extend the retry mechanism by adding a longer second stage which retries the SIGA-W for up to 10s. In difference to the first retry loop the second stage is using mdelay to stop the cpu between the retries and thereby avoid additional preassure in on the hypervisor. If the second stage retry is successfull a device reset is avoided. Signed-off-by: Jan Glauber Signed-off-by: Heiko Carstens --- drivers/s390/cio/qdio.h | 2 ++ drivers/s390/cio/qdio_main.c | 21 ++++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 7bc643f..e5c9664 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -14,6 +14,8 @@ #include "chsc.h" #define QDIO_BUSY_BIT_PATIENCE (100 << 12) /* 100 microseconds */ +#define QDIO_BUSY_BIT_RETRY_DELAY 10 /* 10 milliseconds */ +#define QDIO_BUSY_BIT_RETRIES 1000 /* = 10s retry time */ #define QDIO_INPUT_THRESHOLD (500 << 12) /* 500 microseconds */ /* diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index e58169c..288c914 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -313,7 +313,7 @@ static int qdio_siga_output(struct qdio_q *q, unsigned int *busy_bit) unsigned long schid = *((u32 *) &q->irq_ptr->schid); unsigned int fc = QDIO_SIGA_WRITE; u64 start_time = 0; - int cc; + int retries = 0, cc; if (is_qebsm(q)) { schid = q->irq_ptr->sch_token; @@ -325,6 +325,7 @@ again: /* hipersocket busy condition */ if (unlikely(*busy_bit)) { WARN_ON(queue_type(q) != QDIO_IQDIO_QFMT || cc != 2); + retries++; if (!start_time) { start_time = get_clock(); @@ -333,6 +334,11 @@ again: if ((get_clock() - start_time) < QDIO_BUSY_BIT_PATIENCE) goto again; } + if (retries) { + DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, + "%4x cc2 BB1:%1d", SCH_NO(q), q->nr); + DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "count:%u", retries); + } return cc; } @@ -728,13 +734,14 @@ static inline int qdio_outbound_q_moved(struct qdio_q *q) static int qdio_kick_outbound_q(struct qdio_q *q) { + int retries = 0, cc; unsigned int busy_bit; - int cc; if (!need_siga_out(q)) return 0; DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "siga-w:%1d", q->nr); +retry: qperf_inc(q, siga_write); cc = qdio_siga_output(q, &busy_bit); @@ -743,7 +750,11 @@ static int qdio_kick_outbound_q(struct qdio_q *q) break; case 2: if (busy_bit) { - DBF_ERROR("%4x cc2 REP:%1d", SCH_NO(q), q->nr); + while (++retries < QDIO_BUSY_BIT_RETRIES) { + mdelay(QDIO_BUSY_BIT_RETRY_DELAY); + goto retry; + } + DBF_ERROR("%4x cc2 BBC:%1d", SCH_NO(q), q->nr); cc |= QDIO_ERROR_SIGA_BUSY; } else DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "siga-w cc2:%1d", q->nr); @@ -753,6 +764,10 @@ static int qdio_kick_outbound_q(struct qdio_q *q) DBF_ERROR("%4x SIGA-W:%1d", SCH_NO(q), cc); break; } + if (retries) { + DBF_ERROR("%4x cc2 BB2:%1d", SCH_NO(q), q->nr); + DBF_ERROR("count:%u", retries); + } return cc; } -- cgit v1.1 From 944291de33b26a8b403f13f5eb0cc51fb982aa1e Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Wed, 3 Aug 2011 16:44:18 +0200 Subject: [S390] missing return in page_table_alloc_pgste MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following compile warning for !CONFIG_PGSTE: CC arch/s390/mm/pgtable.o arch/s390/mm/pgtable.c: In function ‘page_table_alloc_pgste’: arch/s390/mm/pgtable.c:531:1: warning: no return statement in function returning non-void [-Wreturn-type] Signed-off-by: Jan Glauber Signed-off-by: Heiko Carstens --- arch/s390/mm/pgtable.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 2adb239..4d1f2bc 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -528,6 +528,7 @@ static inline void page_table_free_pgste(unsigned long *table) static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, unsigned long vmaddr) { + return NULL; } static inline void page_table_free_pgste(unsigned long *table) -- cgit v1.1 From 7dd6b3343fdc190712d1620ee8848d25c4c77c33 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 3 Aug 2011 16:44:19 +0200 Subject: [S390] Add PSW restart shutdown trigger With this patch a new S390 shutdown trigger "restart" is added. If under z/VM "systerm restart" is entered or under the HMC the "PSW restart" button is pressed, the PSW located at 0 (31 bit) or 0x1a0 (64 bit) bit is loaded. Now we execute do_restart() that processes the restart action that is defined under /sys/firmware/shutdown_actions/on_restart. Currently the following actions are possible: reipl (default), stop, vmcmd, dump, and dump_reipl. Signed-off-by: Michael Holzheu Signed-off-by: Heiko Carstens --- arch/s390/include/asm/lowcore.h | 11 +++++++++-- arch/s390/include/asm/system.h | 1 + arch/s390/kernel/asm-offsets.c | 1 + arch/s390/kernel/entry.S | 28 ++++++++++++++++++++++++++++ arch/s390/kernel/entry64.S | 20 ++++++++++++++++++++ arch/s390/kernel/ipl.c | 39 ++++++++++++++++++++++++++++++++++++++- arch/s390/kernel/setup.c | 24 +++++++++++++++++++++++- arch/s390/kernel/smp.c | 11 ++++++++++- arch/s390/mm/maccess.c | 16 ++++++++++++++++ 9 files changed, 146 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index f26280d..e85c911 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -18,6 +18,7 @@ void system_call(void); void pgm_check_handler(void); void mcck_int_handler(void); void io_int_handler(void); +void psw_restart_int_handler(void); #ifdef CONFIG_32BIT @@ -150,7 +151,10 @@ struct _lowcore { */ __u32 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e04 */ - __u8 pad_0x0e08[0x0f00-0x0e08]; /* 0x0e08 */ + + /* 64 bit save area */ + __u64 save_area_64; /* 0x0e08 */ + __u8 pad_0x0e10[0x0f00-0x0e10]; /* 0x0e10 */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ @@ -286,7 +290,10 @@ struct _lowcore { */ __u64 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e08 */ - __u8 pad_0x0e0c[0x0f00-0x0e0c]; /* 0x0e0c */ + + /* 64 bit save area */ + __u64 save_area_64; /* 0x0e0c */ + __u8 pad_0x0e14[0x0f00-0x0e14]; /* 0x0e14 */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index d382629..6582f69 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -113,6 +113,7 @@ extern void pfault_fini(void); extern void cmma_init(void); extern int memcpy_real(void *, void *, size_t); +extern void copy_to_absolute_zero(void *dest, void *src, size_t count); #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 05d8f38..199a537 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -142,6 +142,7 @@ int main(void) DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area)); DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area)); DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area)); + DEFINE(__LC_SAVE_AREA_64, offsetof(struct _lowcore, save_area_64)); #ifdef CONFIG_32BIT DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr)); #else /* CONFIG_32BIT */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3eab7cf..02ec8fe 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -849,6 +849,34 @@ restart_crash: restart_go: #endif +# +# PSW restart interrupt handler +# +ENTRY(psw_restart_int_handler) + st %r15,__LC_SAVE_AREA_64(%r0) # save r15 + basr %r15,0 +0: l %r15,.Lrestart_stack-0b(%r15) # load restart stack + l %r15,0(%r15) + ahi %r15,-SP_SIZE # make room for pt_regs + stm %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack + mvc SP_R15(4,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack + mvc SP_PSW(8,%r15),__LC_RST_OLD_PSW(%r0) # store restart old psw + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0 + basr %r14,0 +1: l %r14,.Ldo_restart-1b(%r14) + basr %r14,%r14 + + basr %r14,0 # load disabled wait PSW if +2: lpsw restart_psw_crash-2b(%r14) # do_restart returns + .align 4 +.Ldo_restart: + .long do_restart +.Lrestart_stack: + .long restart_stack + .align 8 +restart_psw_crash: + .long 0x000a0000,0x00000000 + restart_psw_crash + .section .kprobes.text, "ax" #ifdef CONFIG_CHECK_STACK diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 7a0fd42..5f729d6 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -865,6 +865,26 @@ restart_crash: restart_go: #endif +# +# PSW restart interrupt handler +# +ENTRY(psw_restart_int_handler) + stg %r15,__LC_SAVE_AREA_64(%r0) # save r15 + larl %r15,restart_stack # load restart stack + lg %r15,0(%r15) + aghi %r15,-SP_SIZE # make room for pt_regs + stmg %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack + mvc SP_R15(8,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack + mvc SP_PSW(16,%r15),__LC_RST_OLD_PSW(%r0)# store restart old psw + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0 + brasl %r14,do_restart + + larl %r14,restart_psw_crash # load disabled wait PSW if + lpswe 0(%r14) # do_restart returns + .align 8 +restart_psw_crash: + .quad 0x0002000080000000,0x0000000000000000 + restart_psw_crash + .section .kprobes.text, "ax" #ifdef CONFIG_CHECK_STACK diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index a689070..059c590 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -45,11 +45,13 @@ * - halt * - power off * - reipl + * - restart */ #define ON_PANIC_STR "on_panic" #define ON_HALT_STR "on_halt" #define ON_POFF_STR "on_poff" #define ON_REIPL_STR "on_reboot" +#define ON_RESTART_STR "on_restart" struct shutdown_action; struct shutdown_trigger { @@ -1544,17 +1546,20 @@ static char vmcmd_on_reboot[128]; static char vmcmd_on_panic[128]; static char vmcmd_on_halt[128]; static char vmcmd_on_poff[128]; +static char vmcmd_on_restart[128]; DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot); DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic); DEFINE_IPL_ATTR_STR_RW(vmcmd, on_halt, "%s\n", "%s\n", vmcmd_on_halt); DEFINE_IPL_ATTR_STR_RW(vmcmd, on_poff, "%s\n", "%s\n", vmcmd_on_poff); +DEFINE_IPL_ATTR_STR_RW(vmcmd, on_restart, "%s\n", "%s\n", vmcmd_on_restart); static struct attribute *vmcmd_attrs[] = { &sys_vmcmd_on_reboot_attr.attr, &sys_vmcmd_on_panic_attr.attr, &sys_vmcmd_on_halt_attr.attr, &sys_vmcmd_on_poff_attr.attr, + &sys_vmcmd_on_restart_attr.attr, NULL, }; @@ -1576,6 +1581,8 @@ static void vmcmd_run(struct shutdown_trigger *trigger) cmd = vmcmd_on_halt; else if (strcmp(trigger->name, ON_POFF_STR) == 0) cmd = vmcmd_on_poff; + else if (strcmp(trigger->name, ON_RESTART_STR) == 0) + cmd = vmcmd_on_restart; else return; @@ -1707,6 +1714,34 @@ static void do_panic(void) stop_run(&on_panic_trigger); } +/* on restart */ + +static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR, + &reipl_action}; + +static ssize_t on_restart_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", on_restart_trigger.action->name); +} + +static ssize_t on_restart_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return set_trigger(buf, &on_restart_trigger, len); +} + +static struct kobj_attribute on_restart_attr = + __ATTR(on_restart, 0644, on_restart_show, on_restart_store); + +void do_restart(void) +{ + smp_send_stop(); + on_restart_trigger.action->fn(&on_restart_trigger); + stop_run(&on_restart_trigger); +} + /* on halt */ static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action}; @@ -1783,7 +1818,9 @@ static void __init shutdown_triggers_init(void) if (sysfs_create_file(&shutdown_actions_kset->kobj, &on_poff_attr.attr)) goto fail; - + if (sysfs_create_file(&shutdown_actions_kset->kobj, + &on_restart_attr.attr)) + goto fail; return; fail: panic("shutdown_triggers_init failed\n"); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 0c35dee1..18640ff 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -346,7 +346,7 @@ setup_lowcore(void) lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; lc->restart_psw.addr = - PSW_ADDR_AMODE | (unsigned long) restart_int_handler; + PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; if (user_mode != HOME_SPACE_MODE) lc->restart_psw.mask |= PSW_ASC_HOME; lc->external_new_psw.mask = psw_kernel_bits; @@ -529,6 +529,27 @@ static void __init setup_memory_end(void) memory_end = memory_size; } +void *restart_stack __attribute__((__section__(".data"))); + +/* + * Setup new PSW and allocate stack for PSW restart interrupt + */ +static void __init setup_restart_psw(void) +{ + psw_t psw; + + restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0); + restart_stack += ASYNC_SIZE; + + /* + * Setup restart PSW for absolute zero lowcore. This is necesary + * if PSW restart is done on an offline CPU that has lowcore zero + */ + psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; + copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw)); +} + static void __init setup_memory(void) { @@ -792,6 +813,7 @@ setup_arch(char **cmdline_p) setup_addressing_mode(); setup_memory(); setup_resources(); + setup_restart_psw(); setup_lowcore(); cpu_init(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index a6d85c0..70f4b96 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -468,6 +468,11 @@ int __cpuinit start_secondary(void *cpuvoid) ipi_call_lock(); set_cpu_online(smp_processor_id(), true); ipi_call_unlock(); + __ctl_clear_bit(0, 28); /* Disable lowcore protection */ + S390_lowcore.restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + S390_lowcore.restart_psw.addr = + PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; + __ctl_set_bit(0, 28); /* Enable lowcore protection */ /* Switch on interrupts */ local_irq_enable(); /* cpu_idle will call schedule for us */ @@ -507,7 +512,11 @@ static int __cpuinit smp_alloc_lowcore(int cpu) memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512); lowcore->async_stack = async_stack + ASYNC_SIZE; lowcore->panic_stack = panic_stack + PAGE_SIZE; - + lowcore->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lowcore->restart_psw.addr = + PSW_ADDR_AMODE | (unsigned long) restart_int_handler; + if (user_mode != HOME_SPACE_MODE) + lowcore->restart_psw.mask |= PSW_ASC_HOME; #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) { unsigned long save_area; diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 51e5cd9..5dbbaa6 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -85,3 +85,19 @@ int memcpy_real(void *dest, void *src, size_t count) arch_local_irq_restore(flags); return rc; } + +/* + * Copy memory to absolute zero + */ +void copy_to_absolute_zero(void *dest, void *src, size_t count) +{ + unsigned long cr0; + + BUG_ON((unsigned long) dest + count >= sizeof(struct _lowcore)); + preempt_disable(); + __ctl_store(cr0, 0, 0); + __ctl_clear_bit(0, 28); /* disable lowcore protection */ + memcpy_real(dest + store_prefix(), src, count); + __ctl_load(cr0, 0, 0); + preempt_enable(); +} -- cgit v1.1 From e4258d55bff06780cd424c671b576a90acc1592f Mon Sep 17 00:00:00 2001 From: Stefan Weinhuber Date: Wed, 3 Aug 2011 16:44:20 +0200 Subject: [S390] dasd: use vmalloc for statistics input buffer The size of the buffer that is used to store DASD statistics input strings depends on the user input. If the input string is to large, the write operation could fail with -ENOMEM. To avoid this, use vmalloc instead of kmalloc. Signed-off-by: Stefan Weinhuber Signed-off-by: Heiko Carstens --- drivers/s390/block/dasd.c | 9 +++++---- drivers/s390/block/dasd_proc.c | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 432444a..a1d3ddb 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -888,11 +889,11 @@ char *dasd_get_user_string(const char __user *user_buf, size_t user_len) { char *buffer; - buffer = kmalloc(user_len + 1, GFP_KERNEL); + buffer = vmalloc(user_len + 1); if (buffer == NULL) return ERR_PTR(-ENOMEM); if (copy_from_user(buffer, user_buf, user_len) != 0) { - kfree(buffer); + vfree(buffer); return ERR_PTR(-EFAULT); } /* got the string, now strip linefeed. */ @@ -930,7 +931,7 @@ static ssize_t dasd_stats_write(struct file *file, dasd_profile_off(prof); } else rc = -EINVAL; - kfree(buffer); + vfree(buffer); return rc; } @@ -1042,7 +1043,7 @@ static ssize_t dasd_stats_global_write(struct file *file, dasd_global_profile_level = DASD_PROFILE_OFF; } else rc = -EINVAL; - kfree(buffer); + vfree(buffer); return rc; } diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index 6c3c536..e12989f 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -312,14 +312,14 @@ static ssize_t dasd_stats_proc_write(struct file *file, pr_info("The statistics have been reset\n"); } else goto out_parse_error; - kfree(buffer); + vfree(buffer); return user_len; out_parse_error: rc = -EINVAL; pr_warning("%s is not a supported value for /proc/dasd/statistics\n", str); out_error: - kfree(buffer); + vfree(buffer); return rc; #else pr_warning("/proc/dasd/statistics: is not activated in this kernel\n"); -- cgit v1.1 From ef1daec8da2c04b0c6e91a34b9cac1aad33c6692 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 3 Aug 2011 16:44:21 +0200 Subject: [S390] Export store_status() function For kdump we need a store status function to save the registers for the current CPU. Therefore this patch exports a function "store_status()". In addition to that now also floating point registers are saved correctly. Signed-off-by: Michael Holzheu Signed-off-by: Heiko Carstens --- arch/s390/kernel/reipl64.S | 80 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 60 insertions(+), 20 deletions(-) diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index 78eb7cf..e690975 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp 2000,2009 + * Copyright IBM Corp 2000,2011 * Author(s): Holger Smolinski , * Denis Joseph Barrow, */ @@ -8,6 +8,64 @@ #include # +# store_status +# +# Prerequisites to run this function: +# - Prefix register is set to zero +# - Original prefix register is stored in "dump_prefix_page" +# - Lowcore protection is off +# +ENTRY(store_status) + /* Save register one and load save area base */ + stg %r1,__LC_SAVE_AREA_64(%r0) + lghi %r1,SAVE_AREA_BASE + /* General purpose registers */ + stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + lg %r2,__LC_SAVE_AREA_64(%r0) + stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1) + /* Control registers */ + stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Access registers */ + stam %a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Floating point registers */ + std %f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Floating point control register */ + stfpc __LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* CPU timer */ + stpt __LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Saved prefix register */ + larl %r2,dump_prefix_page + mvc __LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2) + /* Clock comparator - seven bytes */ + larl %r2,.Lclkcmp + stckc 0(%r2) + mvc __LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2) + /* Program status word */ + epsw %r2,%r3 + st %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1) + st %r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1) + larl %r2,store_status + stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1) + br %r14 +.align 8 +.Lclkcmp: .quad 0x0000000000000000 + +# # do_reipl_asm # Parameter: r2 = schid of reipl device # @@ -15,22 +73,7 @@ ENTRY(do_reipl_asm) basr %r13,0 .Lpg0: lpswe .Lnewpsw-.Lpg0(%r13) -.Lpg1: # do store status of all registers - - stg %r1,.Lregsave-.Lpg0(%r13) - lghi %r1,0x1000 - stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-0x1000(%r1) - lg %r0,.Lregsave-.Lpg0(%r13) - stg %r0,__LC_GPREGS_SAVE_AREA-0x1000+8(%r1) - stctg %c0,%c15,__LC_CREGS_SAVE_AREA-0x1000(%r1) - stam %a0,%a15,__LC_AREGS_SAVE_AREA-0x1000(%r1) - lg %r10,.Ldump_pfx-.Lpg0(%r13) - mvc __LC_PREFIX_SAVE_AREA-0x1000(4,%r1),0(%r10) - stfpc __LC_FP_CREG_SAVE_AREA-0x1000(%r1) - stckc .Lclkcmp-.Lpg0(%r13) - mvc __LC_CLOCK_COMP_SAVE_AREA-0x1000(7,%r1),.Lclkcmp-.Lpg0(%r13) - stpt __LC_CPU_TIMER_SAVE_AREA-0x1000(%r1) - stg %r13, __LC_PSW_SAVE_AREA-0x1000+8(%r1) +.Lpg1: brasl %r14,store_status lctlg %c6,%c6,.Lall-.Lpg0(%r13) lgr %r1,%r2 @@ -67,10 +110,7 @@ ENTRY(do_reipl_asm) st %r14,.Ldispsw+12-.Lpg0(%r13) lpswe .Ldispsw-.Lpg0(%r13) .align 8 -.Lclkcmp: .quad 0x0000000000000000 .Lall: .quad 0x00000000ff000000 -.Ldump_pfx: .quad dump_prefix_page -.Lregsave: .quad 0x0000000000000000 .align 16 /* * These addresses have to be 31 bit otherwise -- cgit v1.1 From 9dc7356ee1266d6a69fc80f28d006f71c20bf172 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 3 Aug 2011 16:44:22 +0200 Subject: [S390] Use diagnose 308 for system reset The diagnose 308 call is the prefered method for clearing all ongoing I/O. Therefore if it is available we use it instead of doing a manual reset. Signed-off-by: Michael Holzheu Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ipl.h | 1 + arch/s390/kernel/base.S | 36 ++++++++++++++++++++++++++++++++++++ arch/s390/kernel/ipl.c | 6 ++++++ 3 files changed, 43 insertions(+) diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 5e95d95..97cc440 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -167,5 +167,6 @@ enum diag308_rc { }; extern int diag308(unsigned long subcode, void *addr); +extern void diag308_reset(void); #endif /* _ASM_S390_IPL_H */ diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index 209938c..2554356 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -76,6 +76,42 @@ s390_base_pgm_handler_fn: .quad 0 .previous +# +# Calls diag 308 subcode 1 and continues execution +# +# The following conditions must be ensured before calling this function: +# * Prefix register = 0 +# * Lowcore protection is disabled +# +ENTRY(diag308_reset) + larl %r4,.Lctlregs # Save control registers + stctg %c0,%c15,0(%r4) + larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0 + lghi %r3,0 + lg %r4,0(%r4) # Save PSW + sturg %r4,%r3 # Use sturg, because of large pages + lghi %r1,1 + diag %r1,%r1,0x308 +.Lrestart_part2: + lhi %r0,0 # Load r0 with zero + lhi %r1,2 # Use mode 2 = ESAME (dump) + sigp %r1,%r0,0x12 # Switch to ESAME mode + sam64 # Switch to 64 bit addressing mode + larl %r4,.Lctlregs # Restore control registers + lctlg %c0,%c15,0(%r4) + br %r14 +.align 16 +.Lrestart_psw: + .long 0x00080000,0x80000000 + .Lrestart_part2 + + .section .bss +.align 8 +.Lctlregs: + .rept 16 + .quad 0 + .endr + .previous + #else /* CONFIG_64BIT */ ENTRY(s390_base_mcck_handler) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 059c590..04361d5 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1996,6 +1996,12 @@ static void do_reset_calls(void) { struct reset_call *reset; +#ifdef CONFIG_64BIT + if (diag308_set_works) { + diag308_reset(); + return; + } +#endif list_for_each_entry(reset, &rcall, list) reset->fn(); } -- cgit v1.1 From 34cd551a31cd4e35cd3f9ed9f13bb46b4ee98508 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 3 Aug 2011 16:44:23 +0200 Subject: [S390] dasd: check if raw track access is supported To use raw track access some special storage server commands are needed. Older storage hardware may not support these commands. So check if raw track access is possible while setting the DASD online. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- drivers/s390/block/dasd_eckd.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 30fb979..6e835c9 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1461,6 +1461,15 @@ dasd_eckd_check_characteristics(struct dasd_device *device) "Read device characteristic failed, rc=%d", rc); goto out_err3; } + + if ((device->features & DASD_FEATURE_USERAW) && + !(private->rdc_data.facilities.RT_in_LR)) { + dev_err(&device->cdev->dev, "The storage server does not " + "support raw-track access\n"); + rc = -EINVAL; + goto out_err3; + } + /* find the valid cylinder size */ if (private->rdc_data.no_cyl == LV_COMPAT_CYL && private->rdc_data.long_no_cyl) -- cgit v1.1 From 3a81b171429113ae0af2920f5d4ecfe571230627 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 3 Aug 2011 16:44:24 +0200 Subject: [S390] Add support for IBM zEnterprise 114 Just fix up the Kconfig description and the elf platform. Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 8 ++++---- arch/s390/kernel/setup.c | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c03fef7..17dbb43 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -273,11 +273,11 @@ config MARCH_Z10 on older machines. config MARCH_Z196 - bool "IBM zEnterprise 196" + bool "IBM zEnterprise 114 and 196" help - Select this to enable optimizations for IBM zEnterprise 196 - (2817 series). The kernel will be slightly faster but will not work - on older machines. + Select this to enable optimizations for IBM zEnterprise 114 and 196 + (2818 and 2817 series). The kernel will be slightly faster but will + not work on older machines. endchoice diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 18640ff..7b371c3 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -752,6 +752,7 @@ static void __init setup_hwcaps(void) strcpy(elf_platform, "z10"); break; case 0x2817: + case 0x2818: strcpy(elf_platform, "z196"); break; } -- cgit v1.1 From 7a0e42f168337d4af8fe230c1043cb04786c04c2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 3 Aug 2011 16:44:25 +0200 Subject: [S390] asm offsets: fix coding style Because of readability reasons we ignore the 80 character line limit in asm offsets. Just one line per define, nothing else. Signed-off-by: Heiko Carstens --- arch/s390/kernel/asm-offsets.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 199a537..532fd43 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -27,12 +27,9 @@ int main(void) BLANK(); DEFINE(__TASK_pid, offsetof(struct task_struct, pid)); BLANK(); - DEFINE(__THREAD_per_cause, - offsetof(struct task_struct, thread.per_event.cause)); - DEFINE(__THREAD_per_address, - offsetof(struct task_struct, thread.per_event.address)); - DEFINE(__THREAD_per_paid, - offsetof(struct task_struct, thread.per_event.paid)); + DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause)); + DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address)); + DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid)); BLANK(); DEFINE(__TI_task, offsetof(struct thread_info, task)); DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain)); -- cgit v1.1 From 391c62feb1798b6d31bd88076eae649b091ad8bf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 3 Aug 2011 16:44:26 +0200 Subject: [S390] signal: convert to use set_current_blocked() Convert to use set_current_blocked() like x86. Signed-off-by: Heiko Carstens --- arch/s390/kernel/compat_signal.c | 43 ++++++++--------------------- arch/s390/kernel/signal.c | 59 ++++++++++++---------------------------- 2 files changed, 30 insertions(+), 72 deletions(-) diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index eee9998..a9a285b 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -380,20 +380,13 @@ asmlinkage long sys32_sigreturn(void) goto badframe; if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs32(regs, &frame->sregs)) goto badframe; if (restore_sigregs_gprs_high(regs, frame->gprs_high)) goto badframe; - return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; @@ -413,31 +406,22 @@ asmlinkage long sys32_rt_sigreturn(void) goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs32(regs, &frame->uc.uc_mcontext)) goto badframe; if (restore_sigregs_gprs_high(regs, frame->gprs_high)) goto badframe; - err = __get_user(ss_sp, &frame->uc.uc_stack.ss_sp); st.ss_sp = compat_ptr(ss_sp); err |= __get_user(st.ss_size, &frame->uc.uc_stack.ss_size); err |= __get_user(st.ss_flags, &frame->uc.uc_stack.ss_flags); if (err) goto badframe; - set_fs (KERNEL_DS); do_sigaltstack((stack_t __force __user *)&st, NULL, regs->gprs[15]); set_fs (old_fs); - return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; @@ -605,10 +589,10 @@ give_sigsegv: * OK, we're invoking a handler */ -int -handle_signal32(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) +int handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { + sigset_t blocked; int ret; /* Set up the stack frame */ @@ -616,15 +600,12 @@ handle_signal32(unsigned long sig, struct k_sigaction *ka, ret = setup_rt_frame32(sig, ka, info, oldset, regs); else ret = setup_frame32(sig, ka, oldset, regs); - - if (ret == 0) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } - return ret; + if (ret) + return ret; + sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(&blocked, sig); + set_current_blocked(&blocked); + return 0; } diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index abbb3c3..38a88b6 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -57,17 +57,15 @@ typedef struct */ SYSCALL_DEFINE3(sigsuspend, int, history0, int, history1, old_sigset_t, mask) { - mask &= _BLOCKABLE; - spin_lock_irq(¤t->sighand->siglock); - current->saved_sigmask = current->blocked; - siginitset(¤t->blocked, mask); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + sigset_t blocked; + current->saved_sigmask = current->blocked; + mask &= _BLOCKABLE; + siginitset(&blocked, mask); + set_current_blocked(&blocked); set_current_state(TASK_INTERRUPTIBLE); schedule(); set_thread_flag(TIF_RESTORE_SIGMASK); - return -ERESTARTNOHAND; } @@ -172,18 +170,11 @@ SYSCALL_DEFINE0(sigreturn) goto badframe; if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE)) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs(regs, &frame->sregs)) goto badframe; - return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; @@ -199,21 +190,14 @@ SYSCALL_DEFINE0(rt_sigreturn) goto badframe; if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs(regs, &frame->uc.uc_mcontext)) goto badframe; - if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->gprs[15]) == -EFAULT) goto badframe; return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; @@ -385,14 +369,11 @@ give_sigsegv: return -EFAULT; } -/* - * OK, we're invoking a handler - */ - -static int -handle_signal(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) +static int handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, + struct pt_regs *regs) { + sigset_t blocked; int ret; /* Set up the stack frame */ @@ -400,17 +381,13 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, ret = setup_rt_frame(sig, ka, info, oldset, regs); else ret = setup_frame(sig, ka, oldset, regs); - - if (ret == 0) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } - - return ret; + if (ret) + return ret; + sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(&blocked, sig); + set_current_blocked(&blocked); + return 0; } /* -- cgit v1.1 From cc34321d5806b7919531a1d951bb6deb62c163c9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 3 Aug 2011 16:44:27 +0200 Subject: [S390] cpu hotplug: on cpu start wait until being marked active This is the same as fd8a7de1 "x86: cpu-hotplug: Prevent softirq wakeup on wrong CPU". Unlike on x86 this doesn't fix a bug on s390 since we do not have threaded interrupt handlers. However we want to keep the same initialization order like on x86. This should prevent bugs caused by code which assumes (and relies on) the init order is the same on each architecture. Signed-off-by: Heiko Carstens --- arch/s390/kernel/smp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 70f4b96..86371fd 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -473,7 +473,12 @@ int __cpuinit start_secondary(void *cpuvoid) S390_lowcore.restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; __ctl_set_bit(0, 28); /* Enable lowcore protection */ - /* Switch on interrupts */ + /* + * Wait until the cpu which brought this one up marked it + * active before enabling interrupts. + */ + while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask)) + cpu_relax(); local_irq_enable(); /* cpu_idle will call schedule for us */ cpu_idle(); -- cgit v1.1 From ada5ed5484f492e4eb13d788c6c077cf243f53e6 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 3 Aug 2011 16:44:28 +0200 Subject: [S390] exec: remove redundant set_fs(USER_DS) The address limit is already set in flush_old_exec() so those calls to set_fs(USER_DS) are redundant. Signed-off-by: Mathias Krause Signed-off-by: Heiko Carstens --- arch/s390/include/asm/processor.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 55dfcc8..a4b6229 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -119,14 +119,12 @@ struct stack_frame { * Do necessary setup to start up a new thread. */ #define start_thread(regs, new_psw, new_stackp) do { \ - set_fs(USER_DS); \ regs->psw.mask = psw_user_bits; \ regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ regs->gprs[15] = new_stackp; \ } while (0) #define start_thread31(regs, new_psw, new_stackp) do { \ - set_fs(USER_DS); \ regs->psw.mask = psw_user32_bits; \ regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ regs->gprs[15] = new_stackp; \ -- cgit v1.1 From d5f4d113cb9dac182ba44eb605ec4a2f1fdfd3ec Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Wed, 3 Aug 2011 16:44:29 +0200 Subject: [S390] sclp_async: Use kstrtoul_from_user This patch replaces the code for getting an unsigned long from a userspace buffer by a simple call to kstroul_from_user. This makes it easier to read and less error prone. Signed-off-by: Peter Huewe Signed-off-by: Heiko Carstens --- drivers/s390/char/sclp_async.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/s390/char/sclp_async.c b/drivers/s390/char/sclp_async.c index 7ad30e7..5f9f929 100644 --- a/drivers/s390/char/sclp_async.c +++ b/drivers/s390/char/sclp_async.c @@ -82,12 +82,9 @@ static int proc_handler_callhome(struct ctl_table *ctl, int write, return -EFAULT; } else { len = *count; - rc = copy_from_user(buf, buffer, sizeof(buf)); - if (rc != 0) - return -EFAULT; - buf[sizeof(buf) - 1] = '\0'; - if (strict_strtoul(buf, 0, &val) != 0) - return -EINVAL; + rc = kstrtoul_from_user(buffer, len, 0, &val); + if (rc) + return rc; if (val != 0 && val != 1) return -EINVAL; callhome_enabled = val; -- cgit v1.1 From af6df871ba87ff93739babb16d26160c37c43183 Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Wed, 3 Aug 2011 16:44:30 +0200 Subject: [S390] qdio: Use kstrtoul_from_user This patch replaces the code for getting an unsigned long from a userspace buffer by a simple call to kstroul_from_user. This makes it easier to read and less error prone. Signed-off-by: Peter Huewe Acked-by: Jan Glauber Signed-off-by: Heiko Carstens --- drivers/s390/cio/qdio_debug.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c index f8b03a6..0e615cb 100644 --- a/drivers/s390/cio/qdio_debug.c +++ b/drivers/s390/cio/qdio_debug.c @@ -188,19 +188,13 @@ static ssize_t qperf_seq_write(struct file *file, const char __user *ubuf, struct qdio_irq *irq_ptr = seq->private; struct qdio_q *q; unsigned long val; - char buf[8]; int ret, i; if (!irq_ptr) return 0; - if (count >= sizeof(buf)) - return -EINVAL; - if (copy_from_user(&buf, ubuf, count)) - return -EFAULT; - buf[count] = 0; - - ret = strict_strtoul(buf, 10, &val); - if (ret < 0) + + ret = kstrtoul_from_user(ubuf, count, 10, &val); + if (ret) return ret; switch (val) { -- cgit v1.1 From b7f275042f5d69b6c31975341ce5f06cf2aff95f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 3 Aug 2011 16:44:31 +0200 Subject: [S390] smp: remove pointless comments in startup_secondary() Remove pointless comments in startup_secondary(). There is not too much value in having comments like e.g. "call cpu notifiers" just before a call to notify_cpu*(). Signed-off-by: Heiko Carstens --- arch/s390/kernel/smp.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 86371fd..6ab16ac 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -452,19 +452,13 @@ out: */ int __cpuinit start_secondary(void *cpuvoid) { - /* Setup the cpu */ cpu_init(); preempt_disable(); - /* Enable TOD clock interrupts on the secondary cpu. */ init_cpu_timer(); - /* Enable cpu timer interrupts on the secondary cpu. */ init_cpu_vtimer(); - /* Enable pfault pseudo page faults on this cpu. */ pfault_init(); - /* call cpu notifiers */ notify_cpu_starting(smp_processor_id()); - /* Mark this cpu as online */ ipi_call_lock(); set_cpu_online(smp_processor_id(), true); ipi_call_unlock(); -- cgit v1.1 From 9e8ed3ae924b65ab5f088fe63ee6f4326f04590f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 3 Aug 2011 16:44:32 +0200 Subject: [S390] signal: use set_restore_sigmask() helper We should call set_restore_sigmask() instead of directly setting TIF_RESTORE_SIGMASK. This change should have been done three years earlier... see 4e4c22 "signals: add set_restore_sigmask". Signed-off-by: Heiko Carstens --- arch/s390/kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 38a88b6..9a40e1c 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -65,7 +65,7 @@ SYSCALL_DEFINE3(sigsuspend, int, history0, int, history1, old_sigset_t, mask) set_current_blocked(&blocked); set_current_state(TASK_INTERRUPTIBLE); schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); return -ERESTARTNOHAND; } -- cgit v1.1 From df013ffb8119c89f062ab05b7f544704315db47b Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:22 +0800 Subject: Add Kconfig option ARCH_HAVE_NMI_SAFE_CMPXCHG cmpxchg() is widely used by lockless code, including NMI-safe lockless code. But on some architectures, the cmpxchg() implementation is not NMI-safe, on these architectures the lockless code may need a spin_trylock_irqsave() based implementation. This patch adds a Kconfig option: ARCH_HAVE_NMI_SAFE_CMPXCHG, so that NMI-safe lockless code can depend on it or provide different implementation according to it. On many architectures, cmpxchg is only NMI-safe for several specific operand sizes. So, ARCH_HAVE_NMI_SAFE_CMPXCHG define in this patch only guarantees cmpxchg is NMI-safe for sizeof(unsigned long). Signed-off-by: Huang Ying Acked-by: Mike Frysinger Acked-by: Paul Mundt Acked-by: Hans-Christian Egtvedt Acked-by: Benjamin Herrenschmidt Acked-by: Chris Metcalf Acked-by: Richard Henderson CC: Mikael Starvik Acked-by: David Howells CC: Yoshinori Sato CC: Tony Luck CC: Hirokazu Takata CC: Geert Uytterhoeven CC: Michal Simek Acked-by: Ralf Baechle CC: Kyle McMartin CC: Martin Schwidefsky CC: Chen Liqin CC: "David S. Miller" CC: Ingo Molnar CC: Chris Zankel Signed-off-by: Len Brown --- arch/Kconfig | 3 +++ arch/alpha/Kconfig | 1 + arch/avr32/Kconfig | 1 + arch/frv/Kconfig | 1 + arch/ia64/Kconfig | 1 + arch/m68k/Kconfig | 1 + arch/parisc/Kconfig | 1 + arch/powerpc/Kconfig | 1 + arch/s390/Kconfig | 1 + arch/sh/Kconfig | 1 + arch/sparc/Kconfig | 1 + arch/tile/Kconfig | 1 + arch/x86/Kconfig | 1 + 13 files changed, 15 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 26b0e23..4b0669c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -178,4 +178,7 @@ config HAVE_ARCH_MUTEX_CPU_RELAX config HAVE_RCU_TABLE_FREE bool +config ARCH_HAVE_NMI_SAFE_CMPXCHG + bool + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 60219bf..23f8d8c 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -13,6 +13,7 @@ config ALPHA select AUTO_IRQ_AFFINITY if SMP select GENERIC_IRQ_SHOW select ARCH_WANT_OPTIONAL_GPIOLIB + select ARCH_HAVE_NMI_SAFE_CMPXCHG help The Alpha is a 64-bit general-purpose processor designed and marketed by the Digital Equipment Corporation of blessed memory, diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index e9d689b..197e96f 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -10,6 +10,7 @@ config AVR32 select GENERIC_IRQ_PROBE select HARDIRQS_SW_RESEND select GENERIC_IRQ_SHOW + select ARCH_HAVE_NMI_SAFE_CMPXCHG help AVR32 is a high-performance 32-bit RISC microprocessor core, designed for cost-sensitive embedded applications, with particular diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index cb884e4..bad27a6 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -7,6 +7,7 @@ config FRV select HAVE_PERF_EVENTS select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_SHOW + select ARCH_HAVE_NMI_SAFE_CMPXCHG config ZONE_DMA bool diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 38280ef..b1227dd 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -27,6 +27,7 @@ config IA64 select GENERIC_PENDING_IRQ if SMP select IRQ_PER_CPU select GENERIC_IRQ_SHOW + select ARCH_HAVE_NMI_SAFE_CMPXCHG default y help The Itanium Processor Family is Intel's 64-bit successor to diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index d66e34c..effe8f0 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -6,6 +6,7 @@ config M68K select GENERIC_ATOMIC64 if MMU select HAVE_GENERIC_HARDIRQS if !MMU select GENERIC_IRQ_SHOW if !MMU + select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS config RWSEM_GENERIC_SPINLOCK bool diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 65adc86..e077b0b 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -15,6 +15,7 @@ config PARISC select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_PROBE select IRQ_PER_CPU + select ARCH_HAVE_NMI_SAFE_CMPXCHG help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2729c66..e55f754 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -134,6 +134,7 @@ config PPC select GENERIC_IRQ_SHOW_LEVEL select HAVE_RCU_TABLE_FREE if SMP select HAVE_SYSCALL_TRACEPOINTS + select ARCH_HAVE_NMI_SAFE_CMPXCHG config EARLY_PRINTK bool diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c03fef7..0f98bbd 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -81,6 +81,7 @@ config S390 select INIT_ALL_POSSIBLE select HAVE_IRQ_WORK select HAVE_PERF_EVENTS + select ARCH_HAVE_NMI_SAFE_CMPXCHG select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index bbdeb48..66405eb 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -11,6 +11,7 @@ config SUPERH select HAVE_DMA_ATTRS select HAVE_IRQ_WORK select HAVE_PERF_EVENTS + select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) select PERF_USE_VMALLOC select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 253986b..a63cddf 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -53,6 +53,7 @@ config SPARC64 select HAVE_PERF_EVENTS select PERF_USE_VMALLOC select IRQ_PREFLOW_FASTEOI + select ARCH_HAVE_NMI_SAFE_CMPXCHG config ARCH_DEFCONFIG string diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 0249b8b..b30f71a 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -12,6 +12,7 @@ config TILE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW select SYS_HYPERVISOR + select ARCH_HAVE_NMI_SAFE_CMPXCHG if !M386 # FIXME: investigate whether we need/want these options. # select HAVE_IOREMAP_PROT diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index da34972..a680a60 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -70,6 +70,7 @@ config X86 select IRQ_FORCED_THREADING select USE_GENERIC_SMP_HELPERS if SMP select HAVE_BPF_JIT if (X86_64 && NET) + select ARCH_HAVE_NMI_SAFE_CMPXCHG config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS) -- cgit v1.1 From f49f23abf3dd786ddcac1c1e7db3c2013b07413f Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:23 +0800 Subject: lib, Add lock-less NULL terminated single list Cmpxchg is used to implement adding new entry to the list, deleting all entries from the list, deleting first entry of the list and some other operations. Because this is a single list, so the tail can not be accessed in O(1). If there are multiple producers and multiple consumers, llist_add can be used in producers and llist_del_all can be used in consumers. They can work simultaneously without lock. But llist_del_first can not be used here. Because llist_del_first depends on list->first->next does not changed if list->first is not changed during its operation, but llist_del_first, llist_add, llist_add (or llist_del_all, llist_add, llist_add) sequence in another consumer may violate that. If there are multiple producers and one consumer, llist_add can be used in producers and llist_del_all or llist_del_first can be used in the consumer. This can be summarized as follow: | add | del_first | del_all add | - | - | - del_first | | L | L del_all | | | - Where "-" stands for no lock is needed, while "L" stands for lock is needed. The list entries deleted via llist_del_all can be traversed with traversing function such as llist_for_each etc. But the list entries can not be traversed safely before deleted from the list. The order of deleted entries is from the newest to the oldest added one. If you want to traverse from the oldest to the newest, you must reverse the order by yourself before traversing. The basic atomic operation of this list is cmpxchg on long. On architectures that don't have NMI-safe cmpxchg implementation, the list can NOT be used in NMI handler. So code uses the list in NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. Signed-off-by: Huang Ying Reviewed-by: Andi Kleen Reviewed-by: Mathieu Desnoyers Cc: Andrew Morton Signed-off-by: Len Brown --- include/linux/llist.h | 126 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/Kconfig | 3 ++ lib/Makefile | 2 + lib/llist.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 260 insertions(+) create mode 100644 include/linux/llist.h create mode 100644 lib/llist.c diff --git a/include/linux/llist.h b/include/linux/llist.h new file mode 100644 index 0000000..aa0c8b5 --- /dev/null +++ b/include/linux/llist.h @@ -0,0 +1,126 @@ +#ifndef LLIST_H +#define LLIST_H +/* + * Lock-less NULL terminated single linked list + * + * If there are multiple producers and multiple consumers, llist_add + * can be used in producers and llist_del_all can be used in + * consumers. They can work simultaneously without lock. But + * llist_del_first can not be used here. Because llist_del_first + * depends on list->first->next does not changed if list->first is not + * changed during its operation, but llist_del_first, llist_add, + * llist_add (or llist_del_all, llist_add, llist_add) sequence in + * another consumer may violate that. + * + * If there are multiple producers and one consumer, llist_add can be + * used in producers and llist_del_all or llist_del_first can be used + * in the consumer. + * + * This can be summarized as follow: + * + * | add | del_first | del_all + * add | - | - | - + * del_first | | L | L + * del_all | | | - + * + * Where "-" stands for no lock is needed, while "L" stands for lock + * is needed. + * + * The list entries deleted via llist_del_all can be traversed with + * traversing function such as llist_for_each etc. But the list + * entries can not be traversed safely before deleted from the list. + * The order of deleted entries is from the newest to the oldest added + * one. If you want to traverse from the oldest to the newest, you + * must reverse the order by yourself before traversing. + * + * The basic atomic operation of this list is cmpxchg on long. On + * architectures that don't have NMI-safe cmpxchg implementation, the + * list can NOT be used in NMI handler. So code uses the list in NMI + * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. + */ + +struct llist_head { + struct llist_node *first; +}; + +struct llist_node { + struct llist_node *next; +}; + +#define LLIST_HEAD_INIT(name) { NULL } +#define LLIST_HEAD(name) struct llist_head name = LLIST_HEAD_INIT(name) + +/** + * init_llist_head - initialize lock-less list head + * @head: the head for your lock-less list + */ +static inline void init_llist_head(struct llist_head *list) +{ + list->first = NULL; +} + +/** + * llist_entry - get the struct of this entry + * @ptr: the &struct llist_node pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the llist_node within the struct. + */ +#define llist_entry(ptr, type, member) \ + container_of(ptr, type, member) + +/** + * llist_for_each - iterate over some deleted entries of a lock-less list + * @pos: the &struct llist_node to use as a loop cursor + * @node: the first entry of deleted list entries + * + * In general, some entries of the lock-less list can be traversed + * safely only after being deleted from list, so start with an entry + * instead of list head. + * + * If being used on entries deleted from lock-less list directly, the + * traverse order is from the newest to the oldest added entry. If + * you want to traverse from the oldest to the newest, you must + * reverse the order by yourself before traversing. + */ +#define llist_for_each(pos, node) \ + for ((pos) = (node); pos; (pos) = (pos)->next) + +/** + * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type + * @pos: the type * to use as a loop cursor. + * @node: the fist entry of deleted list entries. + * @member: the name of the llist_node with the struct. + * + * In general, some entries of the lock-less list can be traversed + * safely only after being removed from list, so start with an entry + * instead of list head. + * + * If being used on entries deleted from lock-less list directly, the + * traverse order is from the newest to the oldest added entry. If + * you want to traverse from the oldest to the newest, you must + * reverse the order by yourself before traversing. + */ +#define llist_for_each_entry(pos, node, member) \ + for ((pos) = llist_entry((node), typeof(*(pos)), member); \ + &(pos)->member != NULL; \ + (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) + +/** + * llist_empty - tests whether a lock-less list is empty + * @head: the list to test + * + * Not guaranteed to be accurate or up to date. Just a quick way to + * test whether the list is empty without deleting something from the + * list. + */ +static inline int llist_empty(const struct llist_head *head) +{ + return ACCESS_ONCE(head->first) == NULL; +} + +void llist_add(struct llist_node *new, struct llist_head *head); +void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, + struct llist_head *head); +struct llist_node *llist_del_first(struct llist_head *head); +struct llist_node *llist_del_all(struct llist_head *head); +#endif /* LLIST_H */ diff --git a/lib/Kconfig b/lib/Kconfig index 830181c..25c1967 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -262,4 +262,7 @@ config AVERAGE If unsure, say N. +config LLIST + bool + endmenu diff --git a/lib/Makefile b/lib/Makefile index 6b597fd..d770b81 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -112,6 +112,8 @@ obj-$(CONFIG_AVERAGE) += average.o obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o +obj-$(CONFIG_LLIST) += llist.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/llist.c b/lib/llist.c new file mode 100644 index 0000000..da44572 --- /dev/null +++ b/lib/llist.c @@ -0,0 +1,129 @@ +/* + * Lock-less NULL terminated single linked list + * + * The basic atomic operation of this list is cmpxchg on long. On + * architectures that don't have NMI-safe cmpxchg implementation, the + * list can NOT be used in NMI handler. So code uses the list in NMI + * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. + * + * Copyright 2010,2011 Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include + +#include + +/** + * llist_add - add a new entry + * @new: new entry to be added + * @head: the head for your lock-less list + */ +void llist_add(struct llist_node *new, struct llist_head *head) +{ + struct llist_node *entry, *old_entry; + +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + BUG_ON(in_nmi()); +#endif + + entry = head->first; + do { + old_entry = entry; + new->next = entry; + cpu_relax(); + } while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry); +} +EXPORT_SYMBOL_GPL(llist_add); + +/** + * llist_add_batch - add several linked entries in batch + * @new_first: first entry in batch to be added + * @new_last: last entry in batch to be added + * @head: the head for your lock-less list + */ +void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, + struct llist_head *head) +{ + struct llist_node *entry, *old_entry; + +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + BUG_ON(in_nmi()); +#endif + + entry = head->first; + do { + old_entry = entry; + new_last->next = entry; + cpu_relax(); + } while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry); +} +EXPORT_SYMBOL_GPL(llist_add_batch); + +/** + * llist_del_first - delete the first entry of lock-less list + * @head: the head for your lock-less list + * + * If list is empty, return NULL, otherwise, return the first entry + * deleted, this is the newest added one. + * + * Only one llist_del_first user can be used simultaneously with + * multiple llist_add users without lock. Because otherwise + * llist_del_first, llist_add, llist_add (or llist_del_all, llist_add, + * llist_add) sequence in another user may change @head->first->next, + * but keep @head->first. If multiple consumers are needed, please + * use llist_del_all or use lock between consumers. + */ +struct llist_node *llist_del_first(struct llist_head *head) +{ + struct llist_node *entry, *old_entry, *next; + +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + BUG_ON(in_nmi()); +#endif + + entry = head->first; + do { + if (entry == NULL) + return NULL; + old_entry = entry; + next = entry->next; + cpu_relax(); + } while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry); + + return entry; +} +EXPORT_SYMBOL_GPL(llist_del_first); + +/** + * llist_del_all - delete all entries from lock-less list + * @head: the head of lock-less list to delete all entries + * + * If list is empty, return NULL, otherwise, delete all entries and + * return the pointer to the first entry. The order of entries + * deleted is from the newest to the oldest added one. + */ +struct llist_node *llist_del_all(struct llist_head *head) +{ +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + BUG_ON(in_nmi()); +#endif + + return xchg(&head->first, NULL); +} +EXPORT_SYMBOL_GPL(llist_del_all); -- cgit v1.1 From 7f184275aa306046fe7edcbef3229754f0d97402 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:24 +0800 Subject: lib, Make gen_pool memory allocator lockless This version of the gen_pool memory allocator supports lockless operation. This makes it safe to use in NMI handlers and other special unblockable contexts that could otherwise deadlock on locks. This is implemented by using atomic operations and retries on any conflicts. The disadvantage is that there may be livelocks in extreme cases. For better scalability, one gen_pool allocator can be used for each CPU. The lockless operation only works if there is enough memory available. If new memory is added to the pool a lock has to be still taken. So any user relying on locklessness has to ensure that sufficient memory is preallocated. The basic atomic operation of this allocator is cmpxchg on long. On architectures that don't have NMI-safe cmpxchg implementation, the allocator can NOT be used in NMI handler. So code uses the allocator in NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. Signed-off-by: Huang Ying Reviewed-by: Andi Kleen Reviewed-by: Mathieu Desnoyers Cc: Andrew Morton Signed-off-by: Len Brown --- include/linux/bitmap.h | 1 + include/linux/genalloc.h | 34 +++++- lib/bitmap.c | 2 - lib/genalloc.c | 300 ++++++++++++++++++++++++++++++++++++++--------- 4 files changed, 272 insertions(+), 65 deletions(-) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index dcafe0b..907dd58 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -145,6 +145,7 @@ extern void bitmap_release_region(unsigned long *bitmap, int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); +#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG)) #define BITMAP_LAST_WORD_MASK(nbits) \ ( \ ((nbits) % BITS_PER_LONG) ? \ diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 5bbebda..5e98eeb 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -1,8 +1,26 @@ /* - * Basic general purpose allocator for managing special purpose memory - * not managed by the regular kmalloc/kfree interface. - * Uses for this includes on-device special memory, uncached memory - * etc. + * Basic general purpose allocator for managing special purpose + * memory, for example, memory that is not managed by the regular + * kmalloc/kfree interface. Uses for this includes on-device special + * memory, uncached memory etc. + * + * It is safe to use the allocator in NMI handlers and other special + * unblockable contexts that could otherwise deadlock on locks. This + * is implemented by using atomic operations and retries on any + * conflicts. The disadvantage is that there may be livelocks in + * extreme cases. For better scalability, one allocator can be used + * for each CPU. + * + * The lockless operation only works if there is enough memory + * available. If new memory is added to the pool a lock has to be + * still taken. So any user relying on locklessness has to ensure + * that sufficient memory is preallocated. + * + * The basic atomic operation of this allocator is cmpxchg on long. + * On architectures that don't have NMI-safe cmpxchg implementation, + * the allocator can NOT be used in NMI handler. So code uses the + * allocator in NMI handler should depend on + * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. * * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. @@ -15,7 +33,7 @@ * General purpose special memory pool descriptor. */ struct gen_pool { - rwlock_t lock; + spinlock_t lock; struct list_head chunks; /* list of chunks in this pool */ int min_alloc_order; /* minimum allocation order */ }; @@ -24,8 +42,8 @@ struct gen_pool { * General purpose special memory pool chunk descriptor. */ struct gen_pool_chunk { - spinlock_t lock; struct list_head next_chunk; /* next chunk in pool */ + atomic_t avail; phys_addr_t phys_addr; /* physical starting address of memory chunk */ unsigned long start_addr; /* starting address of memory chunk */ unsigned long end_addr; /* ending address of memory chunk */ @@ -56,4 +74,8 @@ static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr, extern void gen_pool_destroy(struct gen_pool *); extern unsigned long gen_pool_alloc(struct gen_pool *, size_t); extern void gen_pool_free(struct gen_pool *, unsigned long, size_t); +extern void gen_pool_for_each_chunk(struct gen_pool *, + void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *); +extern size_t gen_pool_avail(struct gen_pool *); +extern size_t gen_pool_size(struct gen_pool *); #endif /* __GENALLOC_H__ */ diff --git a/lib/bitmap.c b/lib/bitmap.c index 3f3b681..e3c9e99 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -271,8 +271,6 @@ int __bitmap_weight(const unsigned long *bitmap, int bits) } EXPORT_SYMBOL(__bitmap_weight); -#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG)) - void bitmap_set(unsigned long *map, int start, int nr) { unsigned long *p = map + BIT_WORD(start); diff --git a/lib/genalloc.c b/lib/genalloc.c index 577ddf8..f352cc4 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -1,8 +1,26 @@ /* - * Basic general purpose allocator for managing special purpose memory - * not managed by the regular kmalloc/kfree interface. - * Uses for this includes on-device special memory, uncached memory - * etc. + * Basic general purpose allocator for managing special purpose + * memory, for example, memory that is not managed by the regular + * kmalloc/kfree interface. Uses for this includes on-device special + * memory, uncached memory etc. + * + * It is safe to use the allocator in NMI handlers and other special + * unblockable contexts that could otherwise deadlock on locks. This + * is implemented by using atomic operations and retries on any + * conflicts. The disadvantage is that there may be livelocks in + * extreme cases. For better scalability, one allocator can be used + * for each CPU. + * + * The lockless operation only works if there is enough memory + * available. If new memory is added to the pool a lock has to be + * still taken. So any user relying on locklessness has to ensure + * that sufficient memory is preallocated. + * + * The basic atomic operation of this allocator is cmpxchg on long. + * On architectures that don't have NMI-safe cmpxchg implementation, + * the allocator can NOT be used in NMI handler. So code uses the + * allocator in NMI handler should depend on + * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. * * Copyright 2005 (C) Jes Sorensen * @@ -13,8 +31,109 @@ #include #include #include +#include +#include #include +static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set) +{ + unsigned long val, nval; + + nval = *addr; + do { + val = nval; + if (val & mask_to_set) + return -EBUSY; + cpu_relax(); + } while ((nval = cmpxchg(addr, val, val | mask_to_set)) != val); + + return 0; +} + +static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear) +{ + unsigned long val, nval; + + nval = *addr; + do { + val = nval; + if ((val & mask_to_clear) != mask_to_clear) + return -EBUSY; + cpu_relax(); + } while ((nval = cmpxchg(addr, val, val & ~mask_to_clear)) != val); + + return 0; +} + +/* + * bitmap_set_ll - set the specified number of bits at the specified position + * @map: pointer to a bitmap + * @start: a bit position in @map + * @nr: number of bits to set + * + * Set @nr bits start from @start in @map lock-lessly. Several users + * can set/clear the same bitmap simultaneously without lock. If two + * users set the same bit, one user will return remain bits, otherwise + * return 0. + */ +static int bitmap_set_ll(unsigned long *map, int start, int nr) +{ + unsigned long *p = map + BIT_WORD(start); + const int size = start + nr; + int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); + + while (nr - bits_to_set >= 0) { + if (set_bits_ll(p, mask_to_set)) + return nr; + nr -= bits_to_set; + bits_to_set = BITS_PER_LONG; + mask_to_set = ~0UL; + p++; + } + if (nr) { + mask_to_set &= BITMAP_LAST_WORD_MASK(size); + if (set_bits_ll(p, mask_to_set)) + return nr; + } + + return 0; +} + +/* + * bitmap_clear_ll - clear the specified number of bits at the specified position + * @map: pointer to a bitmap + * @start: a bit position in @map + * @nr: number of bits to set + * + * Clear @nr bits start from @start in @map lock-lessly. Several users + * can set/clear the same bitmap simultaneously without lock. If two + * users clear the same bit, one user will return remain bits, + * otherwise return 0. + */ +static int bitmap_clear_ll(unsigned long *map, int start, int nr) +{ + unsigned long *p = map + BIT_WORD(start); + const int size = start + nr; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + + while (nr - bits_to_clear >= 0) { + if (clear_bits_ll(p, mask_to_clear)) + return nr; + nr -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = ~0UL; + p++; + } + if (nr) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + if (clear_bits_ll(p, mask_to_clear)) + return nr; + } + + return 0; +} /** * gen_pool_create - create a new special memory pool @@ -30,7 +149,7 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid) pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); if (pool != NULL) { - rwlock_init(&pool->lock); + spin_lock_init(&pool->lock); INIT_LIST_HEAD(&pool->chunks); pool->min_alloc_order = min_alloc_order; } @@ -63,14 +182,14 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy if (unlikely(chunk == NULL)) return -ENOMEM; - spin_lock_init(&chunk->lock); chunk->phys_addr = phys; chunk->start_addr = virt; chunk->end_addr = virt + size; + atomic_set(&chunk->avail, size); - write_lock(&pool->lock); - list_add(&chunk->next_chunk, &pool->chunks); - write_unlock(&pool->lock); + spin_lock(&pool->lock); + list_add_rcu(&chunk->next_chunk, &pool->chunks); + spin_unlock(&pool->lock); return 0; } @@ -85,19 +204,19 @@ EXPORT_SYMBOL(gen_pool_add_virt); */ phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr) { - struct list_head *_chunk; struct gen_pool_chunk *chunk; + phys_addr_t paddr = -1; - read_lock(&pool->lock); - list_for_each(_chunk, &pool->chunks) { - chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); - - if (addr >= chunk->start_addr && addr < chunk->end_addr) - return chunk->phys_addr + addr - chunk->start_addr; + rcu_read_lock(); + list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { + if (addr >= chunk->start_addr && addr < chunk->end_addr) { + paddr = chunk->phys_addr + (addr - chunk->start_addr); + break; + } } - read_unlock(&pool->lock); + rcu_read_unlock(); - return -1; + return paddr; } EXPORT_SYMBOL(gen_pool_virt_to_phys); @@ -115,7 +234,6 @@ void gen_pool_destroy(struct gen_pool *pool) int order = pool->min_alloc_order; int bit, end_bit; - list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); list_del(&chunk->next_chunk); @@ -137,44 +255,50 @@ EXPORT_SYMBOL(gen_pool_destroy); * @size: number of bytes to allocate from the pool * * Allocate the requested number of bytes from the specified pool. - * Uses a first-fit algorithm. + * Uses a first-fit algorithm. Can not be used in NMI handler on + * architectures without NMI-safe cmpxchg implementation. */ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) { - struct list_head *_chunk; struct gen_pool_chunk *chunk; - unsigned long addr, flags; + unsigned long addr = 0; int order = pool->min_alloc_order; - int nbits, start_bit, end_bit; + int nbits, start_bit = 0, end_bit, remain; + +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + BUG_ON(in_nmi()); +#endif if (size == 0) return 0; nbits = (size + (1UL << order) - 1) >> order; - - read_lock(&pool->lock); - list_for_each(_chunk, &pool->chunks) { - chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); + rcu_read_lock(); + list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { + if (size > atomic_read(&chunk->avail)) + continue; end_bit = (chunk->end_addr - chunk->start_addr) >> order; - - spin_lock_irqsave(&chunk->lock, flags); - start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0, - nbits, 0); - if (start_bit >= end_bit) { - spin_unlock_irqrestore(&chunk->lock, flags); +retry: + start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, + start_bit, nbits, 0); + if (start_bit >= end_bit) continue; + remain = bitmap_set_ll(chunk->bits, start_bit, nbits); + if (remain) { + remain = bitmap_clear_ll(chunk->bits, start_bit, + nbits - remain); + BUG_ON(remain); + goto retry; } addr = chunk->start_addr + ((unsigned long)start_bit << order); - - bitmap_set(chunk->bits, start_bit, nbits); - spin_unlock_irqrestore(&chunk->lock, flags); - read_unlock(&pool->lock); - return addr; + size = nbits << order; + atomic_sub(size, &chunk->avail); + break; } - read_unlock(&pool->lock); - return 0; + rcu_read_unlock(); + return addr; } EXPORT_SYMBOL(gen_pool_alloc); @@ -184,33 +308,95 @@ EXPORT_SYMBOL(gen_pool_alloc); * @addr: starting address of memory to free back to pool * @size: size in bytes of memory to free * - * Free previously allocated special memory back to the specified pool. + * Free previously allocated special memory back to the specified + * pool. Can not be used in NMI handler on architectures without + * NMI-safe cmpxchg implementation. */ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) { - struct list_head *_chunk; struct gen_pool_chunk *chunk; - unsigned long flags; int order = pool->min_alloc_order; - int bit, nbits; + int start_bit, nbits, remain; - nbits = (size + (1UL << order) - 1) >> order; - - read_lock(&pool->lock); - list_for_each(_chunk, &pool->chunks) { - chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); +#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + BUG_ON(in_nmi()); +#endif + nbits = (size + (1UL << order) - 1) >> order; + rcu_read_lock(); + list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { if (addr >= chunk->start_addr && addr < chunk->end_addr) { BUG_ON(addr + size > chunk->end_addr); - spin_lock_irqsave(&chunk->lock, flags); - bit = (addr - chunk->start_addr) >> order; - while (nbits--) - __clear_bit(bit++, chunk->bits); - spin_unlock_irqrestore(&chunk->lock, flags); - break; + start_bit = (addr - chunk->start_addr) >> order; + remain = bitmap_clear_ll(chunk->bits, start_bit, nbits); + BUG_ON(remain); + size = nbits << order; + atomic_add(size, &chunk->avail); + rcu_read_unlock(); + return; } } - BUG_ON(nbits > 0); - read_unlock(&pool->lock); + rcu_read_unlock(); + BUG(); } EXPORT_SYMBOL(gen_pool_free); + +/** + * gen_pool_for_each_chunk - call func for every chunk of generic memory pool + * @pool: the generic memory pool + * @func: func to call + * @data: additional data used by @func + * + * Call @func for every chunk of generic memory pool. The @func is + * called with rcu_read_lock held. + */ +void gen_pool_for_each_chunk(struct gen_pool *pool, + void (*func)(struct gen_pool *pool, struct gen_pool_chunk *chunk, void *data), + void *data) +{ + struct gen_pool_chunk *chunk; + + rcu_read_lock(); + list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk) + func(pool, chunk, data); + rcu_read_unlock(); +} +EXPORT_SYMBOL(gen_pool_for_each_chunk); + +/** + * gen_pool_avail - get available free space of the pool + * @pool: pool to get available free space + * + * Return available free space of the specified pool. + */ +size_t gen_pool_avail(struct gen_pool *pool) +{ + struct gen_pool_chunk *chunk; + size_t avail = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) + avail += atomic_read(&chunk->avail); + rcu_read_unlock(); + return avail; +} +EXPORT_SYMBOL_GPL(gen_pool_avail); + +/** + * gen_pool_size - get size in bytes of memory managed by the pool + * @pool: pool to get size + * + * Return size in bytes of memory managed by the pool. + */ +size_t gen_pool_size(struct gen_pool *pool) +{ + struct gen_pool_chunk *chunk; + size_t size = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) + size += chunk->end_addr - chunk->start_addr; + rcu_read_unlock(); + return size; +} +EXPORT_SYMBOL_GPL(gen_pool_size); -- cgit v1.1 From 67eb2e99076708cc790019a6a08ca3e0ae130a3a Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:25 +0800 Subject: ACPI, APEI, GHES, printk support for recoverable error via NMI Some APEI GHES recoverable errors are reported via NMI, but printk is not safe in NMI context. To solve the issue, a lock-less memory allocator is used to allocate memory in NMI handler, save the error record into the allocated memory, put the error record into a lock-less list. On the other hand, an irq_work is used to delay the operation from NMI context to IRQ context. The irq_work IRQ handler will remove nodes from lock-less list, printk the error record and do some further processing include recovery operation, then free the memory. Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/Kconfig | 2 + drivers/acpi/apei/ghes.c | 209 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 193 insertions(+), 18 deletions(-) diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 3f45dde..35596ea 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -13,6 +13,8 @@ config ACPI_APEI_GHES bool "APEI Generic Hardware Error Source" depends on ACPI_APEI && X86 select ACPI_HED + select LLIST + select GENERIC_ALLOCATOR help Generic Hardware Error Source provides a way to report platform hardware errors (such as that from chipset). It diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index b1390a6..d1a4021 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -12,7 +12,7 @@ * For more information about Generic Hardware Error Source, please * refer to ACPI Specification version 4.0, section 17.3.2.6 * - * Copyright 2010 Intel Corp. + * Copyright 2010,2011 Intel Corp. * Author: Huang Ying * * This program is free software; you can redistribute it and/or @@ -42,6 +42,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -53,6 +56,15 @@ #define GHES_PFX "GHES: " #define GHES_ESTATUS_MAX_SIZE 65536 +#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536 + +#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 + +#define GHES_ESTATUS_NODE_LEN(estatus_len) \ + (sizeof(struct ghes_estatus_node) + (estatus_len)) +#define GHES_ESTATUS_FROM_NODE(estatus_node) \ + ((struct acpi_hest_generic_status *) \ + ((struct ghes_estatus_node *)(estatus_node) + 1)) /* * One struct ghes is created for each generic hardware error source. @@ -77,6 +89,11 @@ struct ghes { }; }; +struct ghes_estatus_node { + struct llist_node llnode; + struct acpi_hest_generic *generic; +}; + int ghes_disable; module_param_named(disable, ghes_disable, bool, 0); @@ -124,6 +141,19 @@ static struct vm_struct *ghes_ioremap_area; static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); +/* + * printk is not safe in NMI context. So in NMI handler, we allocate + * required memory from lock-less memory allocator + * (ghes_estatus_pool), save estatus into it, put them into lock-less + * list (ghes_estatus_llist), then delay printk into IRQ context via + * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record + * required pool size by all NMI error source. + */ +static struct gen_pool *ghes_estatus_pool; +static unsigned long ghes_estatus_pool_size_request; +static struct llist_head ghes_estatus_llist; +static struct irq_work ghes_proc_irq_work; + static int ghes_ioremap_init(void) { ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, @@ -183,6 +213,55 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr) __flush_tlb_one(vaddr); } +static int ghes_estatus_pool_init(void) +{ + ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); + if (!ghes_estatus_pool) + return -ENOMEM; + return 0; +} + +static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool, + struct gen_pool_chunk *chunk, + void *data) +{ + free_page(chunk->start_addr); +} + +static void ghes_estatus_pool_exit(void) +{ + gen_pool_for_each_chunk(ghes_estatus_pool, + ghes_estatus_pool_free_chunk_page, NULL); + gen_pool_destroy(ghes_estatus_pool); +} + +static int ghes_estatus_pool_expand(unsigned long len) +{ + unsigned long i, pages, size, addr; + int ret; + + ghes_estatus_pool_size_request += PAGE_ALIGN(len); + size = gen_pool_size(ghes_estatus_pool); + if (size >= ghes_estatus_pool_size_request) + return 0; + pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE; + for (i = 0; i < pages; i++) { + addr = __get_free_page(GFP_KERNEL); + if (!addr) + return -ENOMEM; + ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1); + if (ret) + return ret; + } + + return 0; +} + +static void ghes_estatus_pool_shrink(unsigned long len) +{ + ghes_estatus_pool_size_request -= PAGE_ALIGN(len); +} + static struct ghes *ghes_new(struct acpi_hest_generic *generic) { struct ghes *ghes; @@ -344,13 +423,13 @@ static void ghes_clear_estatus(struct ghes *ghes) ghes->flags &= ~GHES_TO_CLEAR; } -static void ghes_do_proc(struct ghes *ghes) +static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) { int sev, processed = 0; struct acpi_hest_generic_data *gdata; - sev = ghes_severity(ghes->estatus->error_severity); - apei_estatus_for_each_section(ghes->estatus, gdata) { + sev = ghes_severity(estatus->error_severity); + apei_estatus_for_each_section(estatus, gdata) { #ifdef CONFIG_X86_MCE if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, CPER_SEC_PLATFORM_MEM)) { @@ -363,27 +442,37 @@ static void ghes_do_proc(struct ghes *ghes) } } -static void __ghes_print_estatus(const char *pfx, struct ghes *ghes) +static void __ghes_print_estatus(const char *pfx, + const struct acpi_hest_generic *generic, + const struct acpi_hest_generic_status *estatus) { if (pfx == NULL) { - if (ghes_severity(ghes->estatus->error_severity) <= + if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) pfx = KERN_WARNING HW_ERR; else pfx = KERN_ERR HW_ERR; } printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", - pfx, ghes->generic->header.source_id); - apei_estatus_print(pfx, ghes->estatus); + pfx, generic->header.source_id); + apei_estatus_print(pfx, estatus); } -static void ghes_print_estatus(const char *pfx, struct ghes *ghes) +static void ghes_print_estatus(const char *pfx, + const struct acpi_hest_generic *generic, + const struct acpi_hest_generic_status *estatus) { /* Not more than 2 messages every 5 seconds */ - static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2); + static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); + static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); + struct ratelimit_state *ratelimit; - if (__ratelimit(&ratelimit)) - __ghes_print_estatus(pfx, ghes); + if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) + ratelimit = &ratelimit_corrected; + else + ratelimit = &ratelimit_uncorrected; + if (__ratelimit(ratelimit)) + __ghes_print_estatus(pfx, generic, estatus); } static int ghes_proc(struct ghes *ghes) @@ -393,8 +482,8 @@ static int ghes_proc(struct ghes *ghes) rc = ghes_read_estatus(ghes, 0); if (rc) goto out; - ghes_print_estatus(NULL, ghes); - ghes_do_proc(ghes); + ghes_print_estatus(NULL, ghes->generic, ghes->estatus); + ghes_do_proc(ghes->estatus); out: ghes_clear_estatus(ghes); @@ -453,6 +542,40 @@ static int ghes_notify_sci(struct notifier_block *this, return ret; } +static void ghes_proc_in_irq(struct irq_work *irq_work) +{ + struct llist_node *llnode, *next, *tail = NULL; + struct ghes_estatus_node *estatus_node; + struct acpi_hest_generic_status *estatus; + u32 len, node_len; + + /* + * Because the time order of estatus in list is reversed, + * revert it back to proper order. + */ + llnode = llist_del_all(&ghes_estatus_llist); + while (llnode) { + next = llnode->next; + llnode->next = tail; + tail = llnode; + llnode = next; + } + llnode = tail; + while (llnode) { + next = llnode->next; + estatus_node = llist_entry(llnode, struct ghes_estatus_node, + llnode); + estatus = GHES_ESTATUS_FROM_NODE(estatus_node); + len = apei_estatus_len(estatus); + node_len = GHES_ESTATUS_NODE_LEN(len); + ghes_do_proc(estatus); + ghes_print_estatus(NULL, estatus_node->generic, estatus); + gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, + node_len); + llnode = next; + } +} + static int ghes_notify_nmi(struct notifier_block *this, unsigned long cmd, void *data) { @@ -482,7 +605,8 @@ static int ghes_notify_nmi(struct notifier_block *this, if (sev_global >= GHES_SEV_PANIC) { oops_begin(); - __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global); + __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic, + ghes_global->estatus); /* reboot to log the error! */ if (panic_timeout == 0) panic_timeout = ghes_panic_timeout; @@ -490,12 +614,31 @@ static int ghes_notify_nmi(struct notifier_block *this, } list_for_each_entry_rcu(ghes, &ghes_nmi, list) { +#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + u32 len, node_len; + struct ghes_estatus_node *estatus_node; + struct acpi_hest_generic_status *estatus; +#endif if (!(ghes->flags & GHES_TO_CLEAR)) continue; - /* Do not print estatus because printk is not NMI safe */ - ghes_do_proc(ghes); +#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + /* Save estatus for further processing in IRQ context */ + len = apei_estatus_len(ghes->estatus); + node_len = GHES_ESTATUS_NODE_LEN(len); + estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, + node_len); + if (estatus_node) { + estatus_node->generic = ghes->generic; + estatus = GHES_ESTATUS_FROM_NODE(estatus_node); + memcpy(estatus, ghes->estatus, len); + llist_add(&estatus_node->llnode, &ghes_estatus_llist); + } +#endif ghes_clear_estatus(ghes); } +#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + irq_work_queue(&ghes_proc_irq_work); +#endif out: raw_spin_unlock(&ghes_nmi_lock); @@ -510,10 +653,26 @@ static struct notifier_block ghes_notifier_nmi = { .notifier_call = ghes_notify_nmi, }; +static unsigned long ghes_esource_prealloc_size( + const struct acpi_hest_generic *generic) +{ + unsigned long block_length, prealloc_records, prealloc_size; + + block_length = min_t(unsigned long, generic->error_block_length, + GHES_ESTATUS_MAX_SIZE); + prealloc_records = max_t(unsigned long, + generic->records_to_preallocate, 1); + prealloc_size = min_t(unsigned long, block_length * prealloc_records, + GHES_ESOURCE_PREALLOC_MAX_SIZE); + + return prealloc_size; +} + static int __devinit ghes_probe(struct platform_device *ghes_dev) { struct acpi_hest_generic *generic; struct ghes *ghes = NULL; + unsigned long len; int rc = -EINVAL; generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; @@ -579,6 +738,8 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev) mutex_unlock(&ghes_list_mutex); break; case ACPI_HEST_NOTIFY_NMI: + len = ghes_esource_prealloc_size(generic); + ghes_estatus_pool_expand(len); mutex_lock(&ghes_list_mutex); if (list_empty(&ghes_nmi)) register_die_notifier(&ghes_notifier_nmi); @@ -603,6 +764,7 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev) { struct ghes *ghes; struct acpi_hest_generic *generic; + unsigned long len; ghes = platform_get_drvdata(ghes_dev); generic = ghes->generic; @@ -633,6 +795,8 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev) * freed after NMI handler finishes. */ synchronize_rcu(); + len = ghes_esource_prealloc_size(generic); + ghes_estatus_pool_shrink(len); break; default: BUG(); @@ -673,14 +837,20 @@ static int __init ghes_init(void) return -EINVAL; } + init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); + rc = ghes_ioremap_init(); if (rc) goto err; - rc = platform_driver_register(&ghes_platform_driver); + rc = ghes_estatus_pool_init(); if (rc) goto err_ioremap_exit; + rc = platform_driver_register(&ghes_platform_driver); + if (rc) + goto err_pool_exit; + rc = apei_osc_setup(); if (rc == 0 && osc_sb_apei_support_acked) pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); @@ -692,6 +862,8 @@ static int __init ghes_init(void) pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); return 0; +err_pool_exit: + ghes_estatus_pool_exit(); err_ioremap_exit: ghes_ioremap_exit(); err: @@ -701,6 +873,7 @@ err: static void __exit ghes_exit(void) { platform_driver_unregister(&ghes_platform_driver); + ghes_estatus_pool_exit(); ghes_ioremap_exit(); } -- cgit v1.1 From 152cef40a808d3034e383465b3f7d6783613e458 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:26 +0800 Subject: ACPI, APEI, GHES, Error records content based throttle printk is used by GHES to report hardware errors. Ratelimit is enforced on the printk to avoid too many hardware error reports in kernel log. Because there may be thousands or even millions of corrected hardware errors during system running. Currently, a simple scheme is used. That is, the total number of hardware error reporting is ratelimited. This may cause some issues in practice. For example, there are two kinds of hardware errors occurred in system. One is corrected memory error, because the fault memory address is accessed frequently, there may be hundreds error report per-second. The other is corrected PCIe AER error, it will be reported once per-second. Because they share one ratelimit control structure, it is highly possible that only memory error is reported. To avoid the above issue, an error record content based throttle algorithm is implemented in the patch. Where after the first successful reporting, all error records that are same are throttled for some time, to let other kinds of error records have the opportunity to be reported. In above example, the memory errors will be throttled for some time, after being printked. Then the PCIe AER error will be printked successfully. Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/ghes.c | 184 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 177 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index d1a4021..931410d 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -60,6 +60,21 @@ #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 +/* This is just an estimation for memory pool allocation */ +#define GHES_ESTATUS_CACHE_AVG_SIZE 512 + +#define GHES_ESTATUS_CACHES_SIZE 4 + +#define GHES_ESTATUS_IN_CACHE_MAX_NSEC (10 * NSEC_PER_SEC) +/* Prevent too many caches are allocated because of RCU */ +#define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) + +#define GHES_ESTATUS_CACHE_LEN(estatus_len) \ + (sizeof(struct ghes_estatus_cache) + (estatus_len)) +#define GHES_ESTATUS_FROM_CACHE(estatus_cache) \ + ((struct acpi_hest_generic_status *) \ + ((struct ghes_estatus_cache *)(estatus_cache) + 1)) + #define GHES_ESTATUS_NODE_LEN(estatus_len) \ (sizeof(struct ghes_estatus_node) + (estatus_len)) #define GHES_ESTATUS_FROM_NODE(estatus_node) \ @@ -94,6 +109,14 @@ struct ghes_estatus_node { struct acpi_hest_generic *generic; }; +struct ghes_estatus_cache { + u32 estatus_len; + atomic_t count; + struct acpi_hest_generic *generic; + unsigned long long time_in; + struct rcu_head rcu; +}; + int ghes_disable; module_param_named(disable, ghes_disable, bool, 0); @@ -154,6 +177,9 @@ static unsigned long ghes_estatus_pool_size_request; static struct llist_head ghes_estatus_llist; static struct irq_work ghes_proc_irq_work; +struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; +static atomic_t ghes_estatus_cache_alloced; + static int ghes_ioremap_init(void) { ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, @@ -458,9 +484,9 @@ static void __ghes_print_estatus(const char *pfx, apei_estatus_print(pfx, estatus); } -static void ghes_print_estatus(const char *pfx, - const struct acpi_hest_generic *generic, - const struct acpi_hest_generic_status *estatus) +static int ghes_print_estatus(const char *pfx, + const struct acpi_hest_generic *generic, + const struct acpi_hest_generic_status *estatus) { /* Not more than 2 messages every 5 seconds */ static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); @@ -471,8 +497,137 @@ static void ghes_print_estatus(const char *pfx, ratelimit = &ratelimit_corrected; else ratelimit = &ratelimit_uncorrected; - if (__ratelimit(ratelimit)) + if (__ratelimit(ratelimit)) { __ghes_print_estatus(pfx, generic, estatus); + return 1; + } + return 0; +} + +/* + * GHES error status reporting throttle, to report more kinds of + * errors, instead of just most frequently occurred errors. + */ +static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus) +{ + u32 len; + int i, cached = 0; + unsigned long long now; + struct ghes_estatus_cache *cache; + struct acpi_hest_generic_status *cache_estatus; + + len = apei_estatus_len(estatus); + rcu_read_lock(); + for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { + cache = rcu_dereference(ghes_estatus_caches[i]); + if (cache == NULL) + continue; + if (len != cache->estatus_len) + continue; + cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); + if (memcmp(estatus, cache_estatus, len)) + continue; + atomic_inc(&cache->count); + now = sched_clock(); + if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC) + cached = 1; + break; + } + rcu_read_unlock(); + return cached; +} + +static struct ghes_estatus_cache *ghes_estatus_cache_alloc( + struct acpi_hest_generic *generic, + struct acpi_hest_generic_status *estatus) +{ + int alloced; + u32 len, cache_len; + struct ghes_estatus_cache *cache; + struct acpi_hest_generic_status *cache_estatus; + + alloced = atomic_add_return(1, &ghes_estatus_cache_alloced); + if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) { + atomic_dec(&ghes_estatus_cache_alloced); + return NULL; + } + len = apei_estatus_len(estatus); + cache_len = GHES_ESTATUS_CACHE_LEN(len); + cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len); + if (!cache) { + atomic_dec(&ghes_estatus_cache_alloced); + return NULL; + } + cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); + memcpy(cache_estatus, estatus, len); + cache->estatus_len = len; + atomic_set(&cache->count, 0); + cache->generic = generic; + cache->time_in = sched_clock(); + return cache; +} + +static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache) +{ + u32 len; + + len = apei_estatus_len(GHES_ESTATUS_FROM_CACHE(cache)); + len = GHES_ESTATUS_CACHE_LEN(len); + gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len); + atomic_dec(&ghes_estatus_cache_alloced); +} + +static void ghes_estatus_cache_rcu_free(struct rcu_head *head) +{ + struct ghes_estatus_cache *cache; + + cache = container_of(head, struct ghes_estatus_cache, rcu); + ghes_estatus_cache_free(cache); +} + +static void ghes_estatus_cache_add( + struct acpi_hest_generic *generic, + struct acpi_hest_generic_status *estatus) +{ + int i, slot = -1, count; + unsigned long long now, duration, period, max_period = 0; + struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache; + + new_cache = ghes_estatus_cache_alloc(generic, estatus); + if (new_cache == NULL) + return; + rcu_read_lock(); + now = sched_clock(); + for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { + cache = rcu_dereference(ghes_estatus_caches[i]); + if (cache == NULL) { + slot = i; + slot_cache = NULL; + break; + } + duration = now - cache->time_in; + if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) { + slot = i; + slot_cache = cache; + break; + } + count = atomic_read(&cache->count); + period = duration / (count + 1); + if (period > max_period) { + max_period = period; + slot = i; + slot_cache = cache; + } + } + /* new_cache must be put into array after its contents are written */ + smp_wmb(); + if (slot != -1 && cmpxchg(ghes_estatus_caches + slot, + slot_cache, new_cache) == slot_cache) { + if (slot_cache) + call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free); + } else + ghes_estatus_cache_free(new_cache); + rcu_read_unlock(); } static int ghes_proc(struct ghes *ghes) @@ -482,9 +637,11 @@ static int ghes_proc(struct ghes *ghes) rc = ghes_read_estatus(ghes, 0); if (rc) goto out; - ghes_print_estatus(NULL, ghes->generic, ghes->estatus); + if (!ghes_estatus_cached(ghes->estatus)) { + if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) + ghes_estatus_cache_add(ghes->generic, ghes->estatus); + } ghes_do_proc(ghes->estatus); - out: ghes_clear_estatus(ghes); return 0; @@ -546,6 +703,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) { struct llist_node *llnode, *next, *tail = NULL; struct ghes_estatus_node *estatus_node; + struct acpi_hest_generic *generic; struct acpi_hest_generic_status *estatus; u32 len, node_len; @@ -569,7 +727,11 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) len = apei_estatus_len(estatus); node_len = GHES_ESTATUS_NODE_LEN(len); ghes_do_proc(estatus); - ghes_print_estatus(NULL, estatus_node->generic, estatus); + if (!ghes_estatus_cached(estatus)) { + generic = estatus_node->generic; + if (ghes_print_estatus(NULL, generic, estatus)) + ghes_estatus_cache_add(generic, estatus); + } gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len); llnode = next; @@ -622,6 +784,8 @@ static int ghes_notify_nmi(struct notifier_block *this, if (!(ghes->flags & GHES_TO_CLEAR)) continue; #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG + if (ghes_estatus_cached(ghes->estatus)) + goto next; /* Save estatus for further processing in IRQ context */ len = apei_estatus_len(ghes->estatus); node_len = GHES_ESTATUS_NODE_LEN(len); @@ -633,6 +797,7 @@ static int ghes_notify_nmi(struct notifier_block *this, memcpy(estatus, ghes->estatus, len); llist_add(&estatus_node->llnode, &ghes_estatus_llist); } +next: #endif ghes_clear_estatus(ghes); } @@ -847,6 +1012,11 @@ static int __init ghes_init(void) if (rc) goto err_ioremap_exit; + rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE * + GHES_ESTATUS_CACHE_ALLOCED_MAX); + if (rc) + goto err_pool_exit; + rc = platform_driver_register(&ghes_platform_driver); if (rc) goto err_pool_exit; -- cgit v1.1 From ea8f5fb8a71fddaf5f3a17100d3247855701f732 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:27 +0800 Subject: HWPoison: add memory_failure_queue() memory_failure() is the entry point for HWPoison memory error recovery. It must be called in process context. But commonly hardware memory errors are notified via MCE or NMI, so some delayed execution mechanism must be used. In MCE handler, a work queue + ring buffer mechanism is used. In addition to MCE, now APEI (ACPI Platform Error Interface) GHES (Generic Hardware Error Source) can be used to report memory errors too. To add support to APEI GHES memory recovery, a mechanism similar to that of MCE is implemented. memory_failure_queue() is the new entry point that can be called in IRQ context. The next step is to make MCE handler uses this interface too. Signed-off-by: Huang Ying Cc: Andi Kleen Cc: Wu Fengguang Cc: Andrew Morton Signed-off-by: Len Brown --- include/linux/mm.h | 1 + mm/memory-failure.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 9670f71..303108e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1633,6 +1633,7 @@ enum mf_flags { }; extern void memory_failure(unsigned long pfn, int trapno); extern int __memory_failure(unsigned long pfn, int trapno, int flags); +extern void memory_failure_queue(unsigned long pfn, int trapno, int flags); extern int unpoison_memory(unsigned long pfn); extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 740c4f5..2b43ba0 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -53,6 +53,7 @@ #include #include #include +#include #include "internal.h" int sysctl_memory_failure_early_kill __read_mostly = 0; @@ -1178,6 +1179,97 @@ void memory_failure(unsigned long pfn, int trapno) __memory_failure(pfn, trapno, 0); } +#define MEMORY_FAILURE_FIFO_ORDER 4 +#define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER) + +struct memory_failure_entry { + unsigned long pfn; + int trapno; + int flags; +}; + +struct memory_failure_cpu { + DECLARE_KFIFO(fifo, struct memory_failure_entry, + MEMORY_FAILURE_FIFO_SIZE); + spinlock_t lock; + struct work_struct work; +}; + +static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu); + +/** + * memory_failure_queue - Schedule handling memory failure of a page. + * @pfn: Page Number of the corrupted page + * @trapno: Trap number reported in the signal to user space. + * @flags: Flags for memory failure handling + * + * This function is called by the low level hardware error handler + * when it detects hardware memory corruption of a page. It schedules + * the recovering of error page, including dropping pages, killing + * processes etc. + * + * The function is primarily of use for corruptions that + * happen outside the current execution context (e.g. when + * detected by a background scrubber) + * + * Can run in IRQ context. + */ +void memory_failure_queue(unsigned long pfn, int trapno, int flags) +{ + struct memory_failure_cpu *mf_cpu; + unsigned long proc_flags; + struct memory_failure_entry entry = { + .pfn = pfn, + .trapno = trapno, + .flags = flags, + }; + + mf_cpu = &get_cpu_var(memory_failure_cpu); + spin_lock_irqsave(&mf_cpu->lock, proc_flags); + if (kfifo_put(&mf_cpu->fifo, &entry)) + schedule_work_on(smp_processor_id(), &mf_cpu->work); + else + pr_err("Memory failure: buffer overflow when queuing memory failure at 0x%#lx\n", + pfn); + spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + put_cpu_var(memory_failure_cpu); +} +EXPORT_SYMBOL_GPL(memory_failure_queue); + +static void memory_failure_work_func(struct work_struct *work) +{ + struct memory_failure_cpu *mf_cpu; + struct memory_failure_entry entry = { 0, }; + unsigned long proc_flags; + int gotten; + + mf_cpu = &__get_cpu_var(memory_failure_cpu); + for (;;) { + spin_lock_irqsave(&mf_cpu->lock, proc_flags); + gotten = kfifo_get(&mf_cpu->fifo, &entry); + spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + if (!gotten) + break; + __memory_failure(entry.pfn, entry.trapno, entry.flags); + } +} + +static int __init memory_failure_init(void) +{ + struct memory_failure_cpu *mf_cpu; + int cpu; + + for_each_possible_cpu(cpu) { + mf_cpu = &per_cpu(memory_failure_cpu, cpu); + spin_lock_init(&mf_cpu->lock); + INIT_KFIFO(mf_cpu->fifo); + INIT_WORK(&mf_cpu->work, memory_failure_work_func); + } + + return 0; +} +core_initcall(memory_failure_init); + /** * unpoison_memory - Unpoison a previously poisoned page * @pfn: Page number of the to be unpoisoned page -- cgit v1.1 From ba61ca4aab47441f1c6cec28a9a6aa0489fd1df3 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 13 Jul 2011 13:14:28 +0800 Subject: ACPI, APEI, GHES: Add hardware memory error recovery support memory_failure_queue() is called when recoverable memory errors are notified by firmware to do the recovery work. Signed-off-by: Huang Ying Signed-off-by: Len Brown --- drivers/acpi/apei/Kconfig | 7 +++++++ drivers/acpi/apei/ghes.c | 24 +++++++++++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 35596ea..c34aa51 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -32,6 +32,13 @@ config ACPI_APEI_PCIEAER PCIe AER errors may be reported via APEI firmware first mode. Turn on this option to enable the corresponding support. +config ACPI_APEI_MEMORY_FAILURE + bool "APEI memory error recovering support" + depends on ACPI_APEI && MEMORY_FAILURE + help + Memory errors may be reported via APEI firmware first mode. + Turn on this option to enable the memory recovering support. + config ACPI_APEI_EINJ tristate "APEI Error INJection (EINJ)" depends on ACPI_APEI && DEBUG_FS diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 931410d..e92c47c 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -451,20 +451,30 @@ static void ghes_clear_estatus(struct ghes *ghes) static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) { - int sev, processed = 0; + int sev, sec_sev; struct acpi_hest_generic_data *gdata; sev = ghes_severity(estatus->error_severity); apei_estatus_for_each_section(estatus, gdata) { -#ifdef CONFIG_X86_MCE + sec_sev = ghes_severity(gdata->error_severity); if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, CPER_SEC_PLATFORM_MEM)) { - apei_mce_report_mem_error( - sev == GHES_SEV_CORRECTED, - (struct cper_sec_mem_err *)(gdata+1)); - processed = 1; - } + struct cper_sec_mem_err *mem_err; + mem_err = (struct cper_sec_mem_err *)(gdata+1); +#ifdef CONFIG_X86_MCE + apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, + mem_err); #endif +#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE + if (sev == GHES_SEV_RECOVERABLE && + sec_sev == GHES_SEV_RECOVERABLE && + mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { + unsigned long pfn; + pfn = mem_err->physical_addr >> PAGE_SHIFT; + memory_failure_queue(pfn, 0, 0); + } +#endif + } } } -- cgit v1.1 From a7e09d450b2e0b068e850d103b6ee1af537d1910 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 16 Jul 2011 18:14:21 -0400 Subject: ACPI: APEI build fix as GHES is optional... When # CONFIG_ACPI_APEI_GHES is not set: (.init.text+0x4c22): undefined reference to `ghes_disable' Reported-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: Len Brown --- drivers/acpi/bus.c | 2 -- include/acpi/apei.h | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 43ce3b0..437ddbf 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -544,10 +544,8 @@ static void acpi_bus_osc_support(void) capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT; #endif -#ifdef CONFIG_ACPI_APEI_GHES if (!ghes_disable) capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_APEI_SUPPORT; -#endif if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) return; if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) { diff --git a/include/acpi/apei.h b/include/acpi/apei.h index d40bc55..51a527d 100644 --- a/include/acpi/apei.h +++ b/include/acpi/apei.h @@ -18,7 +18,11 @@ extern int hest_disable; extern int erst_disable; +#ifdef CONFIG_ACPI_APEI_GHES extern int ghes_disable; +#else +#define ghes_disable 1 +#endif #ifdef CONFIG_ACPI_APEI void __init acpi_hest_init(void); -- cgit v1.1 From 70cb6e1da00db6c9212e6fd69bd96fd41c797077 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 2 Aug 2011 18:00:21 -0400 Subject: APEI GHES: 32-bit buildfix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/acpi/apei/ghes.c:542: warning: integer overflow in expression drivers/acpi/apei/ghes.c:619: warning: integer overflow in expression ghes.c:(.text+0x46289): undefined reference to `__udivdi3'   in function ghes_estatus_cache_add(). Reported-by: Randy Dunlap Signed-off-by: Len Brown --- drivers/acpi/apei/ghes.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index e92c47c..0784f99 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -65,7 +65,7 @@ #define GHES_ESTATUS_CACHES_SIZE 4 -#define GHES_ESTATUS_IN_CACHE_MAX_NSEC (10 * NSEC_PER_SEC) +#define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL /* Prevent too many caches are allocated because of RCU */ #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) @@ -622,7 +622,8 @@ static void ghes_estatus_cache_add( break; } count = atomic_read(&cache->count); - period = duration / (count + 1); + period = duration; + do_div(period, (count + 1)); if (period > max_period) { max_period = period; slot = i; -- cgit v1.1 From c3e6088e1036f8084bc7444b38437da136b7588b Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 20 Jul 2011 16:09:29 +0800 Subject: ACPI, APEI, EINJ Param support is disabled by default EINJ parameter support is only usable for some specific BIOS. Originally, it is expected to have no harm for BIOS does not support it. But now, we found it will cause issue (memory overwriting) for some BIOS. So param support is disabled by default and only enabled when newly added module parameter named "param_extension" is explicitly specified. Signed-off-by: Huang Ying Cc: Matthew Garrett Acked-by: Don Zickus Acked-by: Tony Luck Signed-off-by: Len Brown --- Documentation/acpi/apei/einj.txt | 11 +++++++++-- drivers/acpi/apei/einj.c | 39 ++++++++++++++++++++++++--------------- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt index dfab718..5cc699b 100644 --- a/Documentation/acpi/apei/einj.txt +++ b/Documentation/acpi/apei/einj.txt @@ -48,12 +48,19 @@ directory apei/einj. The following files are provided. - param1 This file is used to set the first error parameter value. Effect of parameter depends on error_type specified. For memory error, this is - physical memory address. + physical memory address. Only available if param_extension module + parameter is specified. - param2 This file is used to set the second error parameter value. Effect of parameter depends on error_type specified. For memory error, this is - physical memory address mask. + physical memory address mask. Only available if param_extension + module parameter is specified. + +Injecting parameter support is a BIOS version specific extension, that +is, it only works on some BIOS version. If you want to use it, please +make sure your BIOS version has the proper support and specify +"param_extension=y" in module parameter. For more information about EINJ, please refer to ACPI specification version 4.0, section 17.5. diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index ab821f1..589b96c 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -46,7 +46,8 @@ * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the * EINJ table through an unpublished extension. Use with caution as * most will ignore the parameter and make their own choice of address - * for error injection. + * for error injection. This extension is used only if + * param_extension module parameter is specified. */ struct einj_parameter { u64 type; @@ -65,6 +66,9 @@ struct einj_parameter { ((struct acpi_whea_header *)((char *)(tab) + \ sizeof(struct acpi_table_einj))) +static bool param_extension; +module_param(param_extension, bool, 0); + static struct acpi_table_einj *einj_tab; static struct apei_resources einj_resources; @@ -489,14 +493,6 @@ static int __init einj_init(void) einj_debug_dir, NULL, &error_type_fops); if (!fentry) goto err_cleanup; - fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, - einj_debug_dir, &error_param1); - if (!fentry) - goto err_cleanup; - fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR, - einj_debug_dir, &error_param2); - if (!fentry) - goto err_cleanup; fentry = debugfs_create_file("error_inject", S_IWUSR, einj_debug_dir, NULL, &error_inject_fops); if (!fentry) @@ -513,12 +509,23 @@ static int __init einj_init(void) rc = apei_exec_pre_map_gars(&ctx); if (rc) goto err_release; - param_paddr = einj_get_parameter_address(); - if (param_paddr) { - einj_param = ioremap(param_paddr, sizeof(*einj_param)); - rc = -ENOMEM; - if (!einj_param) - goto err_unmap; + if (param_extension) { + param_paddr = einj_get_parameter_address(); + if (param_paddr) { + einj_param = ioremap(param_paddr, sizeof(*einj_param)); + rc = -ENOMEM; + if (!einj_param) + goto err_unmap; + fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_param1); + if (!fentry) + goto err_unmap; + fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR, + einj_debug_dir, &error_param2); + if (!fentry) + goto err_unmap; + } else + pr_warn(EINJ_PFX "Parameter extension is not supported.\n"); } pr_info(EINJ_PFX "Error INJection is initialized.\n"); @@ -526,6 +533,8 @@ static int __init einj_init(void) return 0; err_unmap: + if (einj_param) + iounmap(einj_param); apei_exec_post_unmap_gars(&ctx); err_release: apei_resources_release(&einj_resources); -- cgit v1.1 From 4edd17a25c99f34bd7a75c1daf31afe840237da8 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Wed, 3 Aug 2011 10:34:58 -0700 Subject: Revert "drm/i915/dp: Zero the DPCD data before connection probe" This reverts commit 97cdd7101079adc3c626d159c62d43de949516c8. Clearing the dpcd data means that if the fetch fails, any previous data will be lost. On eDP, this is no fun as we only fetch dpcd at init time, so the memset will destroy that the next time through. --- drivers/gpu/drm/i915/intel_dp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index ba72fbc..2f901c0 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1697,7 +1697,6 @@ intel_dp_detect(struct drm_connector *connector, bool force) struct edid *edid = NULL; intel_dp->has_audio = false; - memset(intel_dp->dpcd, 0, sizeof(intel_dp->dpcd)); if (HAS_PCH_SPLIT(dev)) status = ironlake_dp_detect(intel_dp); -- cgit v1.1 From 4e20fa65a3ea789510eed1a15deb9e8aab2b8202 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Wed, 3 Aug 2011 10:52:24 -0700 Subject: drm/i915: Try enabling RC6 by default (again) Jesse Barnes and I found a couple of issues where incorrect mode setting would cause problems with RC6 enabled. We're hopeful that fixing those will resolve the outstanding issues with a few machines that had trouble before 3.0 with rc6. Cc: Pekka Enberg Cc: Francesco Allertsen Cc: Ted Phelps Cc: Gu Rui Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38567 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=38332 Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/i915_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ce045a8..60e4b9e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -62,7 +62,7 @@ module_param_named(semaphores, i915_semaphores, int, 0600); MODULE_PARM_DESC(semaphores, "Use semaphores for inter-ring sync (default: false)"); -unsigned int i915_enable_rc6 __read_mostly = 0; +unsigned int i915_enable_rc6 __read_mostly = 1; module_param_named(i915_enable_rc6, i915_enable_rc6, int, 0600); MODULE_PARM_DESC(i915_enable_rc6, "Enable power-saving render C-state 6 (default: true)"); -- cgit v1.1 From 27a26b775ab89ff65d307de180e466d4c1215186 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Sat, 23 Jul 2011 16:21:57 +0800 Subject: xen/pciback: remove duplicated #include Remove duplicated #include('s) in drivers/xen/xen-pciback/xenbus.c Signed-off-by: Huang Weiyi Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/xenbus.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 206c4ce0..978d2c6 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -11,7 +11,6 @@ #include #include #include -#include #include "pciback.h" #define DRV_NAME "xen-pciback" -- cgit v1.1 From f24144c0c397fe3697b5b563c4a86d4f1fb185cc Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 22 Jul 2011 14:00:06 -0400 Subject: xen/grant: Fix compile warning. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/xen/grant-table.c:85: warning: ‘rc’ may be used uninitialized in this function Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index fd725cd..4f44b34 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -82,7 +82,7 @@ static inline grant_ref_t *__gnttab_entry(grant_ref_t entry) static int get_free_entries(unsigned count) { unsigned long flags; - int ref, rc; + int ref, rc = 0; grant_ref_t head; spin_lock_irqsave(&gnttab_list_lock, flags); -- cgit v1.1 From 61077b2c50476ac85ced8a56fc6b5aaa3aa9c980 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 22 Jul 2011 14:01:16 -0400 Subject: xen/balloon: Fix compile errors - missing header files. With a specific enough .config file compile errors show for missing workqueue declarations. Reported-by: Randy Dunlap Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-selfballoon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 010937b..1b4afd8 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -70,10 +70,10 @@ #include #include #include - +#include #include - #include +#include /* Enable/disable with sysfs. */ static int xen_selfballooning_enabled __read_mostly; -- cgit v1.1 From 65d4b248114e18b3d805c7ecb88d9ea64dd978e4 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sat, 30 Jul 2011 11:21:09 -0400 Subject: xen/self-balloon: Add dependency on tmem. Without enabling CONFIG_XEN_TMEM we get this: drivers/xen/xen-selfballoon.c:461: undefined reference to `tmem_enabled' Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index f815283..5f7ff8e 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -11,7 +11,7 @@ config XEN_BALLOON config XEN_SELFBALLOONING bool "Dynamically self-balloon kernel memory to target" - depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP + depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP && XEN_TMEM default n help Self-ballooning dynamically balloons available kernel memory driven -- cgit v1.1 From db9481c0476c6475d058ac7ecebb5a822b43cc99 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 3 Aug 2011 14:57:11 -0400 Subject: ext4: use kzalloc in ext4_kzalloc() Commit 9933fc0i (ext4: introduce ext4_kvmalloc(), ext4_kzalloc(), and ext4_kvfree()) intruduced wrappers around k*alloc/vmalloc but introduced a typo for ext4_kzalloc() by not using kzalloc() but kmalloc(). Signed-off-by: Mathias Krause Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e2d88ba..4687fea 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -124,7 +124,7 @@ void *ext4_kvzalloc(size_t size, gfp_t flags) { void *ret; - ret = kmalloc(size, flags); + ret = kzalloc(size, flags); if (!ret) ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); return ret; -- cgit v1.1 From 0193e072268fe62c4b19ad4b05cd0d4b23c43bb9 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Wed, 3 Aug 2011 23:12:18 +0400 Subject: CIFS: Fix missing a decrement of inFlight value if we failed on getting mid entry in cifs_call_async. Cc: stable@kernel.org Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/transport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 147aa22..c1b9c4b 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -362,6 +362,8 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, mid = AllocMidQEntry(hdr, server); if (mid == NULL) { mutex_unlock(&server->srv_mutex); + atomic_dec(&server->inFlight); + wake_up(&server->request_q); return -ENOMEM; } -- cgit v1.1 From 6dccf9c508d5d773859df1cc2dce75c5b19e35a0 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 12 Jul 2011 22:29:32 -0400 Subject: mrst_pmu: driver for Intel Moorestown Power Management Unit The Moorestown (MRST) Power Management Unit (PMU) driver directs the SOC power states in the "Langwell" south complex (SCU). It hooks pci_platform_pm_ops[] and thus observes all PCI ".set_state" requests. For devices in the SC, the pmu driver translates those PCI requests into the appropriate commands for the SCU. The PMU driver helps implement S0i3, a deep system idle power idle state. Entry into S0i3 is via cpuidle, just like regular processor c-states. S0i3 depends on pre-conditions including uni-processor, graphics off, and certain IO devices in the SC must be off. If those pre-conditions are met, then the PMU allows cpuidle to enter S0i3, otherwise such requests are demoted, either to Atom C4 or Atom C6. This driver is based on prototype work by Bruce Flemming, Illyas Mansoor, Rajeev D. Muralidhar, Vishwesh M. Rudramuni, Hari Seshadri and Sujith Thomas. The current driver also includes contributions from H. Peter Anvin, Arjan van de Ven, Kristen Accardi, and Yong Wang. Thanks for additional review feedback from Alan Cox and Randy Dunlap. Acked-by: Alan Cox Acked-by: H. Peter Anvin Signed-off-by: Len Brown --- MAINTAINERS | 6 + arch/x86/platform/mrst/Makefile | 1 + arch/x86/platform/mrst/pmu.c | 817 ++++++++++++++++++++++++++++++++++++++++ arch/x86/platform/mrst/pmu.h | 234 ++++++++++++ 4 files changed, 1058 insertions(+) create mode 100644 arch/x86/platform/mrst/pmu.c create mode 100644 arch/x86/platform/mrst/pmu.h diff --git a/MAINTAINERS b/MAINTAINERS index 187282d..d37b387 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3344,6 +3344,12 @@ F: drivers/net/ixgb/ F: drivers/net/ixgbe/ F: drivers/net/ixgbevf/ +INTEL MRST PMU DRIVER +M: Len Brown +L: linux-pm@lists.linux-foundation.org +S: Supported +F: arch/x86/platform/mrst/pmu.* + INTEL PRO/WIRELESS 2100 NETWORK CONNECTION SUPPORT L: linux-wireless@vger.kernel.org S: Orphan diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile index f61ccdd..1ea3877 100644 --- a/arch/x86/platform/mrst/Makefile +++ b/arch/x86/platform/mrst/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_X86_MRST) += mrst.o obj-$(CONFIG_X86_MRST) += vrtc.o obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o +obj-$(CONFIG_X86_MRST) += pmu.o diff --git a/arch/x86/platform/mrst/pmu.c b/arch/x86/platform/mrst/pmu.c new file mode 100644 index 0000000..9281da7 --- /dev/null +++ b/arch/x86/platform/mrst/pmu.c @@ -0,0 +1,817 @@ +/* + * mrst/pmu.c - driver for MRST Power Management Unit + * + * Copyright (c) 2011, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pmu.h" + +#define IPCMSG_FW_REVISION 0xF4 + +struct mrst_device { + u16 pci_dev_num; /* DEBUG only */ + u16 lss; + u16 latest_request; + unsigned int pci_state_counts[PCI_D3cold + 1]; /* DEBUG only */ +}; + +/* + * comlete list of MRST PCI devices + */ +static struct mrst_device mrst_devs[] = { +/* 0 */ { 0x0800, LSS_SPI0 }, /* Moorestown SPI Ctrl 0 */ +/* 1 */ { 0x0801, LSS_SPI1 }, /* Moorestown SPI Ctrl 1 */ +/* 2 */ { 0x0802, LSS_I2C0 }, /* Moorestown I2C 0 */ +/* 3 */ { 0x0803, LSS_I2C1 }, /* Moorestown I2C 1 */ +/* 4 */ { 0x0804, LSS_I2C2 }, /* Moorestown I2C 2 */ +/* 5 */ { 0x0805, LSS_KBD }, /* Moorestown Keyboard Ctrl */ +/* 6 */ { 0x0806, LSS_USB_HC }, /* Moorestown USB Ctrl */ +/* 7 */ { 0x0807, LSS_SD_HC0 }, /* Moorestown SD Host Ctrl 0 */ +/* 8 */ { 0x0808, LSS_SD_HC1 }, /* Moorestown SD Host Ctrl 1 */ +/* 9 */ { 0x0809, LSS_NAND }, /* Moorestown NAND Ctrl */ +/* 10 */ { 0x080a, LSS_AUDIO }, /* Moorestown Audio Ctrl */ +/* 11 */ { 0x080b, LSS_IMAGING }, /* Moorestown ISP */ +/* 12 */ { 0x080c, LSS_SECURITY }, /* Moorestown Security Controller */ +/* 13 */ { 0x080d, LSS_DISPLAY }, /* Moorestown External Displays */ +/* 14 */ { 0x080e, 0 }, /* Moorestown SCU IPC */ +/* 15 */ { 0x080f, LSS_GPIO }, /* Moorestown GPIO Controller */ +/* 16 */ { 0x0810, 0 }, /* Moorestown Power Management Unit */ +/* 17 */ { 0x0811, LSS_USB_OTG }, /* Moorestown OTG Ctrl */ +/* 18 */ { 0x0812, LSS_SPI2 }, /* Moorestown SPI Ctrl 2 */ +/* 19 */ { 0x0813, 0 }, /* Moorestown SC DMA */ +/* 20 */ { 0x0814, LSS_AUDIO_LPE }, /* Moorestown LPE DMA */ +/* 21 */ { 0x0815, LSS_AUDIO_SSP }, /* Moorestown SSP0 */ + +/* 22 */ { 0x084F, LSS_SD_HC2 }, /* Moorestown SD Host Ctrl 2 */ + +/* 23 */ { 0x4102, 0 }, /* Lincroft */ +/* 24 */ { 0x4110, 0 }, /* Lincroft */ +}; + +/* n.b. We ignore PCI-id 0x815 in LSS9 b/c MeeGo has no driver for it */ +static u16 mrst_lss9_pci_ids[] = {0x080a, 0x0814, 0}; +static u16 mrst_lss10_pci_ids[] = {0x0800, 0x0801, 0x0802, 0x0803, + 0x0804, 0x0805, 0x080f, 0}; + +/* handle concurrent SMP invokations of pmu_pci_set_power_state() */ +static spinlock_t mrst_pmu_power_state_lock; + +static unsigned int wake_counters[MRST_NUM_LSS]; /* DEBUG only */ +static unsigned int pmu_irq_stats[INT_INVALID + 1]; /* DEBUG only */ + +static int graphics_is_off; +static int lss_s0i3_enabled; +static bool mrst_pmu_s0i3_enable; + +/* debug counters */ +static u32 pmu_wait_ready_calls; +static u32 pmu_wait_ready_udelays; +static u32 pmu_wait_ready_udelays_max; +static u32 pmu_wait_done_calls; +static u32 pmu_wait_done_udelays; +static u32 pmu_wait_done_udelays_max; +static u32 pmu_set_power_state_entry; +static u32 pmu_set_power_state_send_cmd; + +static struct mrst_device *pci_id_2_mrst_dev(u16 pci_dev_num) +{ + int index = 0; + + if ((pci_dev_num >= 0x0800) && (pci_dev_num <= 0x815)) + index = pci_dev_num - 0x800; + else if (pci_dev_num == 0x084F) + index = 22; + else if (pci_dev_num == 0x4102) + index = 23; + else if (pci_dev_num == 0x4110) + index = 24; + + if (pci_dev_num != mrst_devs[index].pci_dev_num) { + WARN_ONCE(1, FW_BUG "Unknown PCI device 0x%04X\n", pci_dev_num); + return 0; + } + + return &mrst_devs[index]; +} + +/** + * mrst_pmu_validate_cstates + * @dev: cpuidle_device + * + * Certain states are not appropriate for governor to pick in some cases. + * This function will be called as cpuidle_device's prepare callback and + * thus tells governor to ignore such states when selecting the next state + * to enter. + */ + +#define IDLE_STATE4_IS_C6 4 +#define IDLE_STATE5_IS_S0I3 5 + +int mrst_pmu_invalid_cstates(void) +{ + int cpu = smp_processor_id(); + + /* + * Demote to C4 if the PMU is busy. + * Since LSS changes leave the busy bit clear... + * busy means either the PMU is waiting for an ACK-C6 that + * isn't coming due to an MWAIT that returned immediately; + * or we returned from S0i3 successfully, and the PMU + * is not done sending us interrupts. + */ + if (pmu_read_busy_status()) + return 1 << IDLE_STATE4_IS_C6 | 1 << IDLE_STATE5_IS_S0I3; + + /* + * Disallow S0i3 if: PMU is not initialized, or CPU1 is active, + * or if device LSS is insufficient, or the GPU is active, + * or if it has been explicitly disabled. + */ + if (!pmu_reg || !cpumask_equal(cpu_online_mask, cpumask_of(cpu)) || + !lss_s0i3_enabled || !graphics_is_off || !mrst_pmu_s0i3_enable) + return 1 << IDLE_STATE5_IS_S0I3; + else + return 0; +} + +/* + * pmu_update_wake_counters(): read PM_WKS, update wake_counters[] + * DEBUG only. + */ +static void pmu_update_wake_counters(void) +{ + int lss; + u32 wake_status; + + wake_status = pmu_read_wks(); + + for (lss = 0; lss < MRST_NUM_LSS; ++lss) { + if (wake_status & (1 << lss)) + wake_counters[lss]++; + } +} + +int mrst_pmu_s0i3_entry(void) +{ + int status; + + /* Clear any possible error conditions */ + pmu_write_ics(0x300); + + /* set wake control to current D-states */ + pmu_write_wssc(S0I3_SSS_TARGET); + + status = mrst_s0i3_entry(PM_S0I3_COMMAND, &pmu_reg->pm_cmd); + pmu_update_wake_counters(); + return status; +} + +/* poll for maximum of 5ms for busy bit to clear */ +static int pmu_wait_ready(void) +{ + int udelays; + + pmu_wait_ready_calls++; + + for (udelays = 0; udelays < 500; ++udelays) { + if (udelays > pmu_wait_ready_udelays_max) + pmu_wait_ready_udelays_max = udelays; + + if (pmu_read_busy_status() == 0) + return 0; + + udelay(10); + pmu_wait_ready_udelays++; + } + + /* + * if this fires, observe + * /sys/kernel/debug/mrst_pmu_wait_ready_calls + * /sys/kernel/debug/mrst_pmu_wait_ready_udelays + */ + WARN_ONCE(1, "SCU not ready for 5ms"); + return -EBUSY; +} +/* poll for maximum of 50ms us for busy bit to clear */ +static int pmu_wait_done(void) +{ + int udelays; + + pmu_wait_done_calls++; + + for (udelays = 0; udelays < 500; ++udelays) { + if (udelays > pmu_wait_done_udelays_max) + pmu_wait_done_udelays_max = udelays; + + if (pmu_read_busy_status() == 0) + return 0; + + udelay(100); + pmu_wait_done_udelays++; + } + + /* + * if this fires, observe + * /sys/kernel/debug/mrst_pmu_wait_done_calls + * /sys/kernel/debug/mrst_pmu_wait_done_udelays + */ + WARN_ONCE(1, "SCU not done for 50ms"); + return -EBUSY; +} + +u32 mrst_pmu_msi_is_disabled(void) +{ + return pmu_msi_is_disabled(); +} + +void mrst_pmu_enable_msi(void) +{ + pmu_msi_enable(); +} + +/** + * pmu_irq - pmu driver interrupt handler + * Context: interrupt context + */ +static irqreturn_t pmu_irq(int irq, void *dummy) +{ + union pmu_pm_ics pmu_ics; + + pmu_ics.value = pmu_read_ics(); + + if (!pmu_ics.bits.pending) + return IRQ_NONE; + + switch (pmu_ics.bits.cause) { + case INT_SPURIOUS: + case INT_CMD_DONE: + case INT_CMD_ERR: + case INT_WAKE_RX: + case INT_SS_ERROR: + case INT_S0IX_MISS: + case INT_NO_ACKC6: + pmu_irq_stats[pmu_ics.bits.cause]++; + break; + default: + pmu_irq_stats[INT_INVALID]++; + } + + pmu_write_ics(pmu_ics.value); /* Clear pending interrupt */ + + return IRQ_HANDLED; +} + +/* + * Translate PCI power management to MRST LSS D-states + */ +static int pci_2_mrst_state(int lss, pci_power_t pci_state) +{ + switch (pci_state) { + case PCI_D0: + if (SSMSK(D0i1, lss) & D0I1_ACG_SSS_TARGET) + return D0i1; + else + return D0; + case PCI_D1: + return D0i1; + case PCI_D2: + return D0i2; + case PCI_D3hot: + case PCI_D3cold: + return D0i3; + default: + WARN(1, "pci_state %d\n", pci_state); + return 0; + } +} + +static int pmu_issue_command(u32 pm_ssc) +{ + union pmu_pm_set_cfg_cmd_t command; + + if (pmu_read_busy_status()) { + pr_debug("pmu is busy, Operation not permitted\n"); + return -1; + } + + /* + * enable interrupts in PMU so that interrupts are + * propagated when ioc bit for a particular set + * command is set + */ + + pmu_irq_enable(); + + /* Configure the sub systems for pmu2 */ + + pmu_write_ssc(pm_ssc); + + /* + * Send the set config command for pmu its configured + * for mode CM_IMMEDIATE & hence with No Trigger + */ + + command.pmu2_params.d_param.cfg_mode = CM_IMMEDIATE; + command.pmu2_params.d_param.cfg_delay = 0; + command.pmu2_params.d_param.rsvd = 0; + + /* construct the command to send SET_CFG to particular PMU */ + command.pmu2_params.d_param.cmd = SET_CFG_CMD; + command.pmu2_params.d_param.ioc = 0; + command.pmu2_params.d_param.mode_id = 0; + command.pmu2_params.d_param.sys_state = SYS_STATE_S0I0; + + /* write the value of PM_CMD into particular PMU */ + pr_debug("pmu command being written %x\n", + command.pmu_pm_set_cfg_cmd_value); + + pmu_write_cmd(command.pmu_pm_set_cfg_cmd_value); + + return 0; +} + +static u16 pmu_min_lss_pci_req(u16 *ids, u16 pci_state) +{ + u16 existing_request; + int i; + + for (i = 0; ids[i]; ++i) { + struct mrst_device *mrst_dev; + + mrst_dev = pci_id_2_mrst_dev(ids[i]); + if (unlikely(!mrst_dev)) + continue; + + existing_request = mrst_dev->latest_request; + if (existing_request < pci_state) + pci_state = existing_request; + } + return pci_state; +} + +/** + * pmu_pci_set_power_state - Callback function is used by all the PCI devices + * for a platform specific device power on/shutdown. + */ + +int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state) +{ + u32 old_sss, new_sss; + int status = 0; + struct mrst_device *mrst_dev; + + pmu_set_power_state_entry++; + + BUG_ON(pdev->vendor != PCI_VENDOR_ID_INTEL); + BUG_ON(pci_state < PCI_D0 || pci_state > PCI_D3cold); + + mrst_dev = pci_id_2_mrst_dev(pdev->device); + if (unlikely(!mrst_dev)) + return -ENODEV; + + mrst_dev->pci_state_counts[pci_state]++; /* count invocations */ + + /* PMU driver calls self as part of PCI initialization, ignore */ + if (pdev->device == PCI_DEV_ID_MRST_PMU) + return 0; + + BUG_ON(!pmu_reg); /* SW bug if called before initialized */ + + spin_lock(&mrst_pmu_power_state_lock); + + if (pdev->d3_delay) { + dev_dbg(&pdev->dev, "d3_delay %d, should be 0\n", + pdev->d3_delay); + pdev->d3_delay = 0; + } + /* + * If Lincroft graphics, simply remember state + */ + if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY + && !((pdev->class & PCI_SUB_CLASS_MASK) >> 8)) { + if (pci_state == PCI_D0) + graphics_is_off = 0; + else + graphics_is_off = 1; + goto ret; + } + + if (!mrst_dev->lss) + goto ret; /* device with no LSS */ + + if (mrst_dev->latest_request == pci_state) + goto ret; /* no change */ + + mrst_dev->latest_request = pci_state; /* record latest request */ + + /* + * LSS9 and LSS10 contain multiple PCI devices. + * Use the lowest numbered (highest power) state in the LSS + */ + if (mrst_dev->lss == 9) + pci_state = pmu_min_lss_pci_req(mrst_lss9_pci_ids, pci_state); + else if (mrst_dev->lss == 10) + pci_state = pmu_min_lss_pci_req(mrst_lss10_pci_ids, pci_state); + + status = pmu_wait_ready(); + if (status) + goto ret; + + old_sss = pmu_read_sss(); + new_sss = old_sss & ~SSMSK(3, mrst_dev->lss); + new_sss |= SSMSK(pci_2_mrst_state(mrst_dev->lss, pci_state), + mrst_dev->lss); + + if (new_sss == old_sss) + goto ret; /* nothing to do */ + + pmu_set_power_state_send_cmd++; + + status = pmu_issue_command(new_sss); + + if (unlikely(status != 0)) { + dev_err(&pdev->dev, "Failed to Issue a PM command\n"); + goto ret; + } + + if (pmu_wait_done()) + goto ret; + + lss_s0i3_enabled = + ((pmu_read_sss() & S0I3_SSS_TARGET) == S0I3_SSS_TARGET); +ret: + spin_unlock(&mrst_pmu_power_state_lock); + return status; +} + +#ifdef CONFIG_DEBUG_FS +static char *d0ix_names[] = {"D0", "D0i1", "D0i2", "D0i3"}; + +static inline const char *d0ix_name(int state) +{ + return d0ix_names[(int) state]; +} + +static int debug_mrst_pmu_show(struct seq_file *s, void *unused) +{ + struct pci_dev *pdev = NULL; + u32 cur_pmsss; + int lss; + + seq_printf(s, "0x%08X D0I1_ACG_SSS_TARGET\n", D0I1_ACG_SSS_TARGET); + + cur_pmsss = pmu_read_sss(); + + seq_printf(s, "0x%08X S0I3_SSS_TARGET\n", S0I3_SSS_TARGET); + + seq_printf(s, "0x%08X Current SSS ", cur_pmsss); + seq_printf(s, lss_s0i3_enabled ? "\n" : "[BLOCKS s0i3]\n"); + + if (cpumask_equal(cpu_online_mask, cpumask_of(0))) + seq_printf(s, "cpu0 is only cpu online\n"); + else + seq_printf(s, "cpu0 is NOT only cpu online [BLOCKS S0i3]\n"); + + seq_printf(s, "GFX: %s\n", graphics_is_off ? "" : "[BLOCKS s0i3]"); + + + for_each_pci_dev(pdev) { + int pos; + u16 pmcsr; + struct mrst_device *mrst_dev; + int i; + + mrst_dev = pci_id_2_mrst_dev(pdev->device); + + seq_printf(s, "%s %04x/%04X %-16.16s ", + dev_name(&pdev->dev), + pdev->vendor, pdev->device, + dev_driver_string(&pdev->dev)); + + if (unlikely (!mrst_dev)) { + seq_printf(s, " UNKNOWN\n"); + continue; + } + + if (mrst_dev->lss) + seq_printf(s, "LSS %2d %-4s ", mrst_dev->lss, + d0ix_name(((cur_pmsss >> + (mrst_dev->lss * 2)) & 0x3))); + else + seq_printf(s, " "); + + /* PCI PM config space setting */ + pos = pci_find_capability(pdev, PCI_CAP_ID_PM); + if (pos != 0) { + pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr); + seq_printf(s, "PCI-%-4s", + pci_power_name(pmcsr & PCI_PM_CTRL_STATE_MASK)); + } else { + seq_printf(s, " "); + } + + seq_printf(s, " %s ", pci_power_name(mrst_dev->latest_request)); + for (i = 0; i <= PCI_D3cold; ++i) + seq_printf(s, "%d ", mrst_dev->pci_state_counts[i]); + + if (mrst_dev->lss) { + unsigned int lssmask; + + lssmask = SSMSK(D0i3, mrst_dev->lss); + + if ((lssmask & S0I3_SSS_TARGET) && + ((lssmask & cur_pmsss) != + (lssmask & S0I3_SSS_TARGET))) + seq_printf(s , "[BLOCKS s0i3]"); + } + + seq_printf(s, "\n"); + } + seq_printf(s, "Wake Counters:\n"); + for (lss = 0; lss < MRST_NUM_LSS; ++lss) + seq_printf(s, "LSS%d %d\n", lss, wake_counters[lss]); + + seq_printf(s, "Interrupt Counters:\n"); + seq_printf(s, + "INT_SPURIOUS \t%8u\n" "INT_CMD_DONE \t%8u\n" + "INT_CMD_ERR \t%8u\n" "INT_WAKE_RX \t%8u\n" + "INT_SS_ERROR \t%8u\n" "INT_S0IX_MISS\t%8u\n" + "INT_NO_ACKC6 \t%8u\n" "INT_INVALID \t%8u\n", + pmu_irq_stats[INT_SPURIOUS], pmu_irq_stats[INT_CMD_DONE], + pmu_irq_stats[INT_CMD_ERR], pmu_irq_stats[INT_WAKE_RX], + pmu_irq_stats[INT_SS_ERROR], pmu_irq_stats[INT_S0IX_MISS], + pmu_irq_stats[INT_NO_ACKC6], pmu_irq_stats[INT_INVALID]); + + seq_printf(s, "mrst_pmu_wait_ready_calls %8d\n", + pmu_wait_ready_calls); + seq_printf(s, "mrst_pmu_wait_ready_udelays %8d\n", + pmu_wait_ready_udelays); + seq_printf(s, "mrst_pmu_wait_ready_udelays_max %8d\n", + pmu_wait_ready_udelays_max); + seq_printf(s, "mrst_pmu_wait_done_calls %8d\n", + pmu_wait_done_calls); + seq_printf(s, "mrst_pmu_wait_done_udelays %8d\n", + pmu_wait_done_udelays); + seq_printf(s, "mrst_pmu_wait_done_udelays_max %8d\n", + pmu_wait_done_udelays_max); + seq_printf(s, "mrst_pmu_set_power_state_entry %8d\n", + pmu_set_power_state_entry); + seq_printf(s, "mrst_pmu_set_power_state_send_cmd %8d\n", + pmu_set_power_state_send_cmd); + seq_printf(s, "SCU busy: %d\n", pmu_read_busy_status()); + + return 0; +} + +static int debug_mrst_pmu_open(struct inode *inode, struct file *file) +{ + return single_open(file, debug_mrst_pmu_show, NULL); +} + +static const struct file_operations devices_state_operations = { + .open = debug_mrst_pmu_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif /* DEBUG_FS */ + +/* + * Validate SCU PCI shim PCI vendor capability byte + * against LSS hard-coded in mrst_devs[] above. + * DEBUG only. + */ +static void pmu_scu_firmware_debug(void) +{ + struct pci_dev *pdev = NULL; + + for_each_pci_dev(pdev) { + struct mrst_device *mrst_dev; + u8 pci_config_lss; + int pos; + + mrst_dev = pci_id_2_mrst_dev(pdev->device); + if (unlikely(!mrst_dev)) { + printk(KERN_ERR FW_BUG "pmu: Unknown " + "PCI device 0x%04X\n", pdev->device); + continue; + } + + if (mrst_dev->lss == 0) + continue; /* no LSS in our table */ + + pos = pci_find_capability(pdev, PCI_CAP_ID_VNDR); + if (!pos != 0) { + printk(KERN_ERR FW_BUG "pmu: 0x%04X " + "missing PCI Vendor Capability\n", + pdev->device); + continue; + } + pci_read_config_byte(pdev, pos + 4, &pci_config_lss); + if (!(pci_config_lss & PCI_VENDOR_CAP_LOG_SS_MASK)) { + printk(KERN_ERR FW_BUG "pmu: 0x%04X " + "invalid PCI Vendor Capability 0x%x " + " expected LSS 0x%X\n", + pdev->device, pci_config_lss, mrst_dev->lss); + continue; + } + pci_config_lss &= PCI_VENDOR_CAP_LOG_ID_MASK; + + if (mrst_dev->lss == pci_config_lss) + continue; + + printk(KERN_ERR FW_BUG "pmu: 0x%04X LSS = %d, expected %d\n", + pdev->device, pci_config_lss, mrst_dev->lss); + } +} + +/** + * pmu_probe + */ +static int __devinit pmu_probe(struct pci_dev *pdev, + const struct pci_device_id *pci_id) +{ + int ret; + struct mrst_pmu_reg *pmu; + + /* Init the device */ + ret = pci_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Unable to Enable PCI device\n"); + return ret; + } + + ret = pci_request_regions(pdev, MRST_PMU_DRV_NAME); + if (ret < 0) { + dev_err(&pdev->dev, "Cannot obtain PCI resources, aborting\n"); + goto out_err1; + } + + /* Map the memory of PMU reg base */ + pmu = pci_iomap(pdev, 0, 0); + if (!pmu) { + dev_err(&pdev->dev, "Unable to map the PMU address space\n"); + ret = -ENOMEM; + goto out_err2; + } + +#ifdef CONFIG_DEBUG_FS + /* /sys/kernel/debug/mrst_pmu */ + (void) debugfs_create_file("mrst_pmu", S_IFREG | S_IRUGO, + NULL, NULL, &devices_state_operations); +#endif + pmu_reg = pmu; /* success */ + + if (request_irq(pdev->irq, pmu_irq, 0, MRST_PMU_DRV_NAME, NULL)) { + dev_err(&pdev->dev, "Registering isr has failed\n"); + ret = -1; + goto out_err3; + } + + pmu_scu_firmware_debug(); + + pmu_write_wkc(S0I3_WAKE_SOURCES); /* Enable S0i3 wakeup sources */ + + pmu_wait_ready(); + + pmu_write_ssc(D0I1_ACG_SSS_TARGET); /* Enable Auto-Clock_Gating */ + pmu_write_cmd(0x201); + + spin_lock_init(&mrst_pmu_power_state_lock); + + /* Enable the hardware interrupt */ + pmu_irq_enable(); + return 0; + +out_err3: + free_irq(pdev->irq, NULL); + pci_iounmap(pdev, pmu_reg); + pmu_reg = NULL; +out_err2: + pci_release_region(pdev, 0); +out_err1: + pci_disable_device(pdev); + return ret; +} + +static void __devexit pmu_remove(struct pci_dev *pdev) +{ + dev_err(&pdev->dev, "Mid PM pmu_remove called\n"); + + /* Freeing up the irq */ + free_irq(pdev->irq, NULL); + + pci_iounmap(pdev, pmu_reg); + pmu_reg = NULL; + + /* disable the current PCI device */ + pci_release_region(pdev, 0); + pci_disable_device(pdev); +} + +static DEFINE_PCI_DEVICE_TABLE(pmu_pci_ids) = { + { PCI_VDEVICE(INTEL, PCI_DEV_ID_MRST_PMU), 0 }, + { } +}; + +MODULE_DEVICE_TABLE(pci, pmu_pci_ids); + +static struct pci_driver driver = { + .name = MRST_PMU_DRV_NAME, + .id_table = pmu_pci_ids, + .probe = pmu_probe, + .remove = __devexit_p(pmu_remove), +}; + +/** + * pmu_pci_register - register the PMU driver as PCI device + */ +static int __init pmu_pci_register(void) +{ + return pci_register_driver(&driver); +} + +/* Register and probe via fs_initcall() to preceed device_initcall() */ +fs_initcall(pmu_pci_register); + +static void __exit mid_pci_cleanup(void) +{ + pci_unregister_driver(&driver); +} + +static int ia_major; +static int ia_minor; + +static int pmu_sfi_parse_oem(struct sfi_table_header *table) +{ + struct sfi_table_simple *sb; + + sb = (struct sfi_table_simple *)table; + ia_major = (sb->pentry[1] >> 0) & 0xFFFF; + ia_minor = (sb->pentry[1] >> 16) & 0xFFFF; + printk(KERN_INFO "mrst_pmu: IA FW version v%x.%x\n", + ia_major, ia_minor); + + return 0; +} + +static int __init scu_fw_check(void) +{ + int ret; + u32 fw_version; + + if (!pmu_reg) + return 0; /* this driver didn't probe-out */ + + sfi_table_parse("OEMB", NULL, NULL, pmu_sfi_parse_oem); + + if (ia_major < 0x6005 || ia_minor < 0x1525) { + WARN(1, "mrst_pmu: IA FW version too old\n"); + return -1; + } + + ret = intel_scu_ipc_command(IPCMSG_FW_REVISION, 0, NULL, 0, + &fw_version, 1); + + if (ret) { + WARN(1, "mrst_pmu: IPC FW version? %d\n", ret); + } else { + int scu_major = (fw_version >> 8) & 0xFF; + int scu_minor = (fw_version >> 0) & 0xFF; + + printk(KERN_INFO "mrst_pmu: firmware v%x\n", fw_version); + + if ((scu_major >= 0xC0) && (scu_minor >= 0x49)) { + printk(KERN_INFO "mrst_pmu: enabling S0i3\n"); + mrst_pmu_s0i3_enable = true; + } else { + WARN(1, "mrst_pmu: S0i3 disabled, old firmware %X.%X", + scu_major, scu_minor); + } + } + return 0; +} +late_initcall(scu_fw_check); +module_exit(mid_pci_cleanup); diff --git a/arch/x86/platform/mrst/pmu.h b/arch/x86/platform/mrst/pmu.h new file mode 100644 index 0000000..bfbfe64 --- /dev/null +++ b/arch/x86/platform/mrst/pmu.h @@ -0,0 +1,234 @@ +/* + * mrst/pmu.h - private definitions for MRST Power Management Unit mrst/pmu.c + * + * Copyright (c) 2011, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef _MRST_PMU_H_ +#define _MRST_PMU_H_ + +#define PCI_DEV_ID_MRST_PMU 0x0810 +#define MRST_PMU_DRV_NAME "mrst_pmu" +#define PCI_SUB_CLASS_MASK 0xFF00 + +#define PCI_VENDOR_CAP_LOG_ID_MASK 0x7F +#define PCI_VENDOR_CAP_LOG_SS_MASK 0x80 + +#define SUB_SYS_ALL_D0I1 0x01155555 +#define S0I3_WAKE_SOURCES 0x00001FFF + +#define PM_S0I3_COMMAND \ + ((0 << 31) | /* Reserved */ \ + (0 << 30) | /* Core must be idle */ \ + (0xc2 << 22) | /* ACK C6 trigger */ \ + (3 << 19) | /* Trigger on DMI message */ \ + (3 << 16) | /* Enter S0i3 */ \ + (0 << 13) | /* Numeric mode ID (sw) */ \ + (3 << 9) | /* Trigger mode */ \ + (0 << 8) | /* Do not interrupt */ \ + (1 << 0)) /* Set configuration */ + +#define LSS_DMI 0 +#define LSS_SD_HC0 1 +#define LSS_SD_HC1 2 +#define LSS_NAND 3 +#define LSS_IMAGING 4 +#define LSS_SECURITY 5 +#define LSS_DISPLAY 6 +#define LSS_USB_HC 7 +#define LSS_USB_OTG 8 +#define LSS_AUDIO 9 +#define LSS_AUDIO_LPE 9 +#define LSS_AUDIO_SSP 9 +#define LSS_I2C0 10 +#define LSS_I2C1 10 +#define LSS_I2C2 10 +#define LSS_KBD 10 +#define LSS_SPI0 10 +#define LSS_SPI1 10 +#define LSS_SPI2 10 +#define LSS_GPIO 10 +#define LSS_SRAM 11 /* used by SCU, do not touch */ +#define LSS_SD_HC2 12 +/* LSS hardware bits 15,14,13 are hardwired to 0, thus unusable */ +#define MRST_NUM_LSS 13 + +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +#define SSMSK(mask, lss) ((mask) << ((lss) * 2)) +#define D0 0 +#define D0i1 1 +#define D0i2 2 +#define D0i3 3 + +#define S0I3_SSS_TARGET ( \ + SSMSK(D0i1, LSS_DMI) | \ + SSMSK(D0i3, LSS_SD_HC0) | \ + SSMSK(D0i3, LSS_SD_HC1) | \ + SSMSK(D0i3, LSS_NAND) | \ + SSMSK(D0i3, LSS_SD_HC2) | \ + SSMSK(D0i3, LSS_IMAGING) | \ + SSMSK(D0i3, LSS_SECURITY) | \ + SSMSK(D0i3, LSS_DISPLAY) | \ + SSMSK(D0i3, LSS_USB_HC) | \ + SSMSK(D0i3, LSS_USB_OTG) | \ + SSMSK(D0i3, LSS_AUDIO) | \ + SSMSK(D0i1, LSS_I2C0)) + +/* + * D0i1 on Langwell is Autonomous Clock Gating (ACG). + * Enable ACG on every LSS except camera and audio + */ +#define D0I1_ACG_SSS_TARGET \ + (SUB_SYS_ALL_D0I1 & ~SSMSK(D0i1, LSS_IMAGING) & ~SSMSK(D0i1, LSS_AUDIO)) + +enum cm_mode { + CM_NOP, /* ignore the config mode value */ + CM_IMMEDIATE, + CM_DELAY, + CM_TRIGGER, + CM_INVALID +}; + +enum sys_state { + SYS_STATE_S0I0, + SYS_STATE_S0I1, + SYS_STATE_S0I2, + SYS_STATE_S0I3, + SYS_STATE_S3, + SYS_STATE_S5 +}; + +#define SET_CFG_CMD 1 + +enum int_status { + INT_SPURIOUS = 0, + INT_CMD_DONE = 1, + INT_CMD_ERR = 2, + INT_WAKE_RX = 3, + INT_SS_ERROR = 4, + INT_S0IX_MISS = 5, + INT_NO_ACKC6 = 6, + INT_INVALID = 7, +}; + +/* PMU register interface */ +static struct mrst_pmu_reg { + u32 pm_sts; /* 0x00 */ + u32 pm_cmd; /* 0x04 */ + u32 pm_ics; /* 0x08 */ + u32 _resv1; /* 0x0C */ + u32 pm_wkc[2]; /* 0x10 */ + u32 pm_wks[2]; /* 0x18 */ + u32 pm_ssc[4]; /* 0x20 */ + u32 pm_sss[4]; /* 0x30 */ + u32 pm_wssc[4]; /* 0x40 */ + u32 pm_c3c4; /* 0x50 */ + u32 pm_c5c6; /* 0x54 */ + u32 pm_msi_disable; /* 0x58 */ +} *pmu_reg; + +static inline u32 pmu_read_sts(void) { return readl(&pmu_reg->pm_sts); } +static inline u32 pmu_read_ics(void) { return readl(&pmu_reg->pm_ics); } +static inline u32 pmu_read_wks(void) { return readl(&pmu_reg->pm_wks[0]); } +static inline u32 pmu_read_sss(void) { return readl(&pmu_reg->pm_sss[0]); } + +static inline void pmu_write_cmd(u32 arg) { writel(arg, &pmu_reg->pm_cmd); } +static inline void pmu_write_ics(u32 arg) { writel(arg, &pmu_reg->pm_ics); } +static inline void pmu_write_wkc(u32 arg) { writel(arg, &pmu_reg->pm_wkc[0]); } +static inline void pmu_write_ssc(u32 arg) { writel(arg, &pmu_reg->pm_ssc[0]); } +static inline void pmu_write_wssc(u32 arg) + { writel(arg, &pmu_reg->pm_wssc[0]); } + +static inline void pmu_msi_enable(void) { writel(0, &pmu_reg->pm_msi_disable); } +static inline u32 pmu_msi_is_disabled(void) + { return readl(&pmu_reg->pm_msi_disable); } + +union pmu_pm_ics { + struct { + u32 cause:8; + u32 enable:1; + u32 pending:1; + u32 reserved:22; + } bits; + u32 value; +}; + +static inline void pmu_irq_enable(void) +{ + union pmu_pm_ics pmu_ics; + + pmu_ics.value = pmu_read_ics(); + pmu_ics.bits.enable = 1; + pmu_write_ics(pmu_ics.value); +} + +union pmu_pm_status { + struct { + u32 pmu_rev:8; + u32 pmu_busy:1; + u32 mode_id:4; + u32 Reserved:19; + } pmu_status_parts; + u32 pmu_status_value; +}; + +static inline int pmu_read_busy_status(void) +{ + union pmu_pm_status result; + + result.pmu_status_value = pmu_read_sts(); + + return result.pmu_status_parts.pmu_busy; +} + +/* pmu set config parameters */ +struct cfg_delay_param_t { + u32 cmd:8; + u32 ioc:1; + u32 cfg_mode:4; + u32 mode_id:3; + u32 sys_state:3; + u32 cfg_delay:8; + u32 rsvd:5; +}; + +struct cfg_trig_param_t { + u32 cmd:8; + u32 ioc:1; + u32 cfg_mode:4; + u32 mode_id:3; + u32 sys_state:3; + u32 cfg_trig_type:3; + u32 cfg_trig_val:8; + u32 cmbi:1; + u32 rsvd1:1; +}; + +union pmu_pm_set_cfg_cmd_t { + union { + struct cfg_delay_param_t d_param; + struct cfg_trig_param_t t_param; + } pmu2_params; + u32 pmu_pm_set_cfg_cmd_value; +}; + +#ifdef FUTURE_PATCH +extern int mrst_s0i3_entry(u32 regval, u32 *regaddr); +#else +static inline int mrst_s0i3_entry(u32 regval, u32 *regaddr) { return -1; } +#endif +#endif -- cgit v1.1 From 62027aea23fcd14478abdddd3b74a4e0f5fb2984 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Apr 2011 18:13:10 -0400 Subject: cpuidle: create bootparam "cpuidle.off=1" useful for disabling cpuidle to fall back to architecture-default idle loop cpuidle drivers and governors will fail to register. on x86 they'll say so: intel_idle: intel_idle yielding to (null) ACPI: acpi_idle yielding to (null) Signed-off-by: Len Brown --- Documentation/kernel-parameters.txt | 3 +++ drivers/cpuidle/cpuidle.c | 10 ++++++++++ drivers/cpuidle/cpuidle.h | 1 + drivers/cpuidle/driver.c | 3 +++ drivers/cpuidle/governor.c | 3 +++ 5 files changed, 20 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index aa47be7..9b8e62d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -546,6 +546,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. /proc//coredump_filter. See also Documentation/filesystems/proc.txt. + cpuidle.off=1 [CPU_IDLE] + disable the cpuidle sub-system + cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver Format: ,,,[,] diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index bf50924..faae2c3 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -28,6 +28,12 @@ LIST_HEAD(cpuidle_detected_devices); static void (*pm_idle_old)(void); static int enabled_devices; +static int off __read_mostly; + +int cpuidle_disabled(void) +{ + return off; +} #if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT) static void cpuidle_kick_cpus(void) @@ -427,6 +433,9 @@ static int __init cpuidle_init(void) { int ret; + if (cpuidle_disabled()) + return -ENODEV; + pm_idle_old = pm_idle; ret = cpuidle_add_class_sysfs(&cpu_sysdev_class); @@ -438,4 +447,5 @@ static int __init cpuidle_init(void) return 0; } +module_param(off, int, 0444); core_initcall(cpuidle_init); diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h index 33e50d5..38c3fd8 100644 --- a/drivers/cpuidle/cpuidle.h +++ b/drivers/cpuidle/cpuidle.h @@ -13,6 +13,7 @@ extern struct list_head cpuidle_governors; extern struct list_head cpuidle_detected_devices; extern struct mutex cpuidle_lock; extern spinlock_t cpuidle_driver_lock; +extern int cpuidle_disabled(void); /* idle loop */ extern void cpuidle_install_idle_handler(void); diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index fd1601e..3f7e3ce 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -26,6 +26,9 @@ int cpuidle_register_driver(struct cpuidle_driver *drv) if (!drv) return -EINVAL; + if (cpuidle_disabled()) + return -ENODEV; + spin_lock(&cpuidle_driver_lock); if (cpuidle_curr_driver) { spin_unlock(&cpuidle_driver_lock); diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c index 724c164..ea2f8e7 100644 --- a/drivers/cpuidle/governor.c +++ b/drivers/cpuidle/governor.c @@ -81,6 +81,9 @@ int cpuidle_register_governor(struct cpuidle_governor *gov) if (!gov || !gov->select) return -EINVAL; + if (cpuidle_disabled()) + return -ENODEV; + mutex_lock(&cpuidle_lock); if (__cpuidle_find_governor(gov->name) == NULL) { ret = 0; -- cgit v1.1 From d91ee5863b71e8c90eaf6035bff3078a85e2e7b5 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Apr 2011 18:28:35 -0400 Subject: cpuidle: replace xen access to x86 pm_idle and default_idle When a Xen Dom0 kernel boots on a hypervisor, it gets access to the raw-hardware ACPI tables. While it parses the idle tables for the hypervisor's beneift, it uses HLT for its own idle. Rather than have xen scribble on pm_idle and access default_idle, have it simply disable_cpuidle() so acpi_idle will not load and architecture default HLT will be used. cc: xen-devel@lists.xensource.com Tested-by: Konrad Rzeszutek Wilk Acked-by: H. Peter Anvin Signed-off-by: Len Brown --- arch/x86/xen/setup.c | 3 ++- drivers/cpuidle/cpuidle.c | 4 ++++ include/linux/cpuidle.h | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 60aeeb5..a9627e2 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -426,7 +427,7 @@ void __init xen_arch_setup(void) #ifdef CONFIG_X86_32 boot_cpu_data.hlt_works_ok = 1; #endif - pm_idle = default_idle; + disable_cpuidle(); boot_option_idle_override = IDLE_HALT; fiddle_vdso(); diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index faae2c3..041df0b 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -34,6 +34,10 @@ int cpuidle_disabled(void) { return off; } +void disable_cpuidle(void) +{ + off = 1; +} #if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT) static void cpuidle_kick_cpus(void) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 36719ea..b89f67d 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -122,6 +122,7 @@ struct cpuidle_driver { }; #ifdef CONFIG_CPU_IDLE +extern void disable_cpuidle(void); extern int cpuidle_register_driver(struct cpuidle_driver *drv); struct cpuidle_driver *cpuidle_get_driver(void); @@ -135,6 +136,7 @@ extern int cpuidle_enable_device(struct cpuidle_device *dev); extern void cpuidle_disable_device(struct cpuidle_device *dev); #else +static inline void disable_cpuidle(void) { } static inline int cpuidle_register_driver(struct cpuidle_driver *drv) {return -ENODEV; } -- cgit v1.1 From 4bfc8288bc4a64529c5547d17349a2a1f4675507 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 30 Mar 2011 23:52:29 -0400 Subject: x86 idle: move mwait_idle_with_hints() to where it is used ...and make it static no functional change cc: x86@kernel.org Acked-by: H. Peter Anvin Signed-off-by: Len Brown --- arch/x86/include/asm/processor.h | 2 -- arch/x86/kernel/acpi/cstate.c | 23 +++++++++++++++++++++++ arch/x86/kernel/process.c | 23 ----------------------- 3 files changed, 23 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2193715..0d1171c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -751,8 +751,6 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx) :: "a" (eax), "c" (ecx)); } -extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); - extern void select_idle_routine(const struct cpuinfo_x86 *c); extern void init_amd_e400_c1e_mask(void); diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 5812404..f50e7fb 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -149,6 +149,29 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, } EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); +/* + * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, + * which can obviate IPI to trigger checking of need_resched. + * We execute MONITOR against need_resched and enter optimized wait state + * through MWAIT. Whenever someone changes need_resched, we would be woken + * up from MWAIT (without an IPI). + * + * New with Core Duo processors, MWAIT can take some hints based on CPU + * capability. + */ +void mwait_idle_with_hints(unsigned long ax, unsigned long cx) +{ + if (!need_resched()) { + if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) + clflush((void *)¤t_thread_info()->flags); + + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __mwait(ax, cx); + } +} + void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) { unsigned int cpu = smp_processor_id(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e1ba8cb2..e7e3b01 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -438,29 +438,6 @@ void cpu_idle_wait(void) } EXPORT_SYMBOL_GPL(cpu_idle_wait); -/* - * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, - * which can obviate IPI to trigger checking of need_resched. - * We execute MONITOR against need_resched and enter optimized wait state - * through MWAIT. Whenever someone changes need_resched, we would be woken - * up from MWAIT (without an IPI). - * - * New with Core Duo processors, MWAIT can take some hints based on CPU - * capability. - */ -void mwait_idle_with_hints(unsigned long ax, unsigned long cx) -{ - if (!need_resched()) { - if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) - clflush((void *)¤t_thread_info()->flags); - - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __mwait(ax, cx); - } -} - /* Default MONITOR/MWAIT with no hints, used for default C1 state */ static void mwait_idle(void) { -- cgit v1.1 From a0bfa1373859e9d11dc92561a8667588803e42d8 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Apr 2011 19:34:59 -0400 Subject: cpuidle: stop depending on pm_idle cpuidle users should call cpuidle_call_idle() directly rather than via (pm_idle)() function pointer. Architecture may choose to continue using (pm_idle)(), but cpuidle need not depend on it: my_arch_cpu_idle() ... if(cpuidle_call_idle()) pm_idle(); cc: Kevin Hilman cc: Paul Mundt cc: x86@kernel.org Acked-by: H. Peter Anvin Signed-off-by: Len Brown --- arch/arm/kernel/process.c | 4 +++- arch/sh/kernel/idle.c | 6 ++++-- arch/x86/kernel/process_32.c | 4 +++- arch/x86/kernel/process_64.c | 4 +++- drivers/cpuidle/cpuidle.c | 38 ++++++++++++++++++-------------------- include/linux/cpuidle.h | 2 ++ 6 files changed, 33 insertions(+), 25 deletions(-) diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 5e1e541..d7ee0d4 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -196,7 +197,8 @@ void cpu_idle(void) cpu_relax(); } else { stop_critical_timings(); - pm_idle(); + if (cpuidle_call_idle()) + pm_idle(); start_critical_timings(); /* * This will eventually be removed - pm_idle diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 425d604..9c7099e 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -16,12 +16,13 @@ #include #include #include +#include #include #include #include #include -void (*pm_idle)(void) = NULL; +static void (*pm_idle)(void); static int hlt_counter; @@ -100,7 +101,8 @@ void cpu_idle(void) local_irq_disable(); /* Don't trace irqs off for idle */ stop_critical_timings(); - pm_idle(); + if (cpuidle_call_idle()) + pm_idle(); /* * Sanity check to ensure that pm_idle() returns * with IRQs enabled diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a3d0dc5..7a3b651 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -109,7 +110,8 @@ void cpu_idle(void) local_irq_disable(); /* Don't trace irqs off for idle */ stop_critical_timings(); - pm_idle(); + if (cpuidle_idle_call()) + pm_idle(); start_critical_timings(); } tick_nohz_restart_sched_tick(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ca6f7ab..f693e44 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -136,7 +137,8 @@ void cpu_idle(void) enter_idle(); /* Don't trace irqs off for idle */ stop_critical_timings(); - pm_idle(); + if (cpuidle_idle_call()) + pm_idle(); start_critical_timings(); /* In many cases the interrupt that ended idle diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 041df0b..d4c5423 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -25,10 +25,10 @@ DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices); DEFINE_MUTEX(cpuidle_lock); LIST_HEAD(cpuidle_detected_devices); -static void (*pm_idle_old)(void); static int enabled_devices; static int off __read_mostly; +static int initialized __read_mostly; int cpuidle_disabled(void) { @@ -56,25 +56,23 @@ static int __cpuidle_register_device(struct cpuidle_device *dev); * cpuidle_idle_call - the main idle loop * * NOTE: no locks or semaphores should be used here + * return non-zero on failure */ -static void cpuidle_idle_call(void) +int cpuidle_idle_call(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); struct cpuidle_state *target_state; int next_state; + if (off) + return -ENODEV; + + if (!initialized) + return -ENODEV; + /* check if the device is ready */ - if (!dev || !dev->enabled) { - if (pm_idle_old) - pm_idle_old(); - else -#if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE) - default_idle(); -#else - local_irq_enable(); -#endif - return; - } + if (!dev || !dev->enabled) + return -EBUSY; #if 0 /* shows regressions, re-enable for 2.6.29 */ @@ -99,7 +97,7 @@ static void cpuidle_idle_call(void) next_state = cpuidle_curr_governor->select(dev); if (need_resched()) { local_irq_enable(); - return; + return 0; } target_state = &dev->states[next_state]; @@ -124,6 +122,8 @@ static void cpuidle_idle_call(void) /* give the governor an opportunity to reflect on the outcome */ if (cpuidle_curr_governor->reflect) cpuidle_curr_governor->reflect(dev); + + return 0; } /** @@ -131,10 +131,10 @@ static void cpuidle_idle_call(void) */ void cpuidle_install_idle_handler(void) { - if (enabled_devices && (pm_idle != cpuidle_idle_call)) { + if (enabled_devices) { /* Make sure all changes finished before we switch to new idle */ smp_wmb(); - pm_idle = cpuidle_idle_call; + initialized = 1; } } @@ -143,8 +143,8 @@ void cpuidle_install_idle_handler(void) */ void cpuidle_uninstall_idle_handler(void) { - if (enabled_devices && pm_idle_old && (pm_idle != pm_idle_old)) { - pm_idle = pm_idle_old; + if (enabled_devices) { + initialized = 0; cpuidle_kick_cpus(); } } @@ -440,8 +440,6 @@ static int __init cpuidle_init(void) if (cpuidle_disabled()) return -ENODEV; - pm_idle_old = pm_idle; - ret = cpuidle_add_class_sysfs(&cpu_sysdev_class); if (ret) return ret; diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index b89f67d..b51629e 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -123,6 +123,7 @@ struct cpuidle_driver { #ifdef CONFIG_CPU_IDLE extern void disable_cpuidle(void); +extern int cpuidle_idle_call(void); extern int cpuidle_register_driver(struct cpuidle_driver *drv); struct cpuidle_driver *cpuidle_get_driver(void); @@ -137,6 +138,7 @@ extern void cpuidle_disable_device(struct cpuidle_device *dev); #else static inline void disable_cpuidle(void) { } +static inline int cpuidle_idle_call(void) { return -ENODEV; } static inline int cpuidle_register_driver(struct cpuidle_driver *drv) {return -ENODEV; } -- cgit v1.1 From 06fa0a883a01a34a0449ec116c5288c1d196b4b0 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Wed, 3 Aug 2011 16:38:59 -0700 Subject: mlx4: Fixing Ethernet unicast packet steering For older FW versions, fixing the usage of per port Mac table. For each port we must define the base QP number, which is passed to the HW. Setting the correct value in SET_PORT FW command to enable the steering. Reported-by: Roland Dreier Tested-by: Roland Dreier Signed-off-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- drivers/net/mlx4/en_port.c | 2 +- drivers/net/mlx4/main.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/mlx4/en_port.c b/drivers/net/mlx4/en_port.c index 5e71091..5ada5b46 100644 --- a/drivers/net/mlx4/en_port.c +++ b/drivers/net/mlx4/en_port.c @@ -128,7 +128,7 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, memset(context, 0, sizeof *context); context->base_qpn = cpu_to_be32(base_qpn); - context->n_mac = 0x7; + context->n_mac = 0x2; context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_SHIFT | base_qpn); context->mcast = cpu_to_be32(m_promisc << SET_PORT_MC_PROMISC_SHIFT | diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index c94b342..f0ee35d 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -1117,6 +1117,8 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) info->port = port; mlx4_init_mac_table(dev, &info->mac_table); mlx4_init_vlan_table(dev, &info->vlan_table); + info->base_qpn = dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] + + (port - 1) * (1 << log_num_mac); sprintf(info->dev_name, "mlx4_port%d", port); info->port_attr.attr.name = info->dev_name; -- cgit v1.1 From dd48c085c1cdf9446f92826f1fd451167fb6c2fd Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 3 Aug 2011 16:21:01 -0700 Subject: fault-injection: add ability to export fault_attr in arbitrary directory init_fault_attr_dentries() is used to export fault_attr via debugfs. But it can only export it in debugfs root directory. Per Forlin is working on mmc_fail_request which adds support to inject data errors after a completed host transfer in MMC subsystem. The fault_attr for mmc_fail_request should be defined per mmc host and export it in debugfs directory per mmc host like /sys/kernel/debug/mmc0/mmc_fail_request. init_fault_attr_dentries() doesn't help for mmc_fail_request. So this introduces fault_create_debugfs_attr() which is able to create a directory in the arbitrary directory and replace init_fault_attr_dentries(). [akpm@linux-foundation.org: extraneous semicolon, per Randy] Signed-off-by: Akinobu Mita Tested-by: Per Forlin Cc: Jens Axboe Cc: Christoph Lameter Cc: Pekka Enberg Cc: Matt Mackall Cc: Randy Dunlap Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/fault-injection/fault-injection.txt | 3 +-- block/blk-core.c | 6 ++++-- block/blk-timeout.c | 5 ++++- include/linux/fault-inject.h | 18 +++++------------- lib/fault-inject.c | 20 +++++++------------- mm/failslab.c | 14 +++++++------- mm/page_alloc.c | 13 +++++-------- 7 files changed, 33 insertions(+), 46 deletions(-) diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt index 7be15e4..82a5d25 100644 --- a/Documentation/fault-injection/fault-injection.txt +++ b/Documentation/fault-injection/fault-injection.txt @@ -143,8 +143,7 @@ o provide a way to configure fault attributes failslab, fail_page_alloc, and fail_make_request use this way. Helper functions: - init_fault_attr_dentries(entries, attr, name); - void cleanup_fault_attr_dentries(entries); + fault_create_debugfs_attr(name, parent, attr); - module parameters diff --git a/block/blk-core.c b/block/blk-core.c index b850bed..b627558 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1368,8 +1368,10 @@ static bool should_fail_request(struct hd_struct *part, unsigned int bytes) static int __init fail_make_request_debugfs(void) { - return init_fault_attr_dentries(&fail_make_request, - "fail_make_request"); + struct dentry *dir = fault_create_debugfs_attr("fail_make_request", + NULL, &fail_make_request); + + return IS_ERR(dir) ? PTR_ERR(dir) : 0; } late_initcall(fail_make_request_debugfs); diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 4f0c06c..7803548 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -28,7 +28,10 @@ int blk_should_fake_timeout(struct request_queue *q) static int __init fail_io_timeout_debugfs(void) { - return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout"); + struct dentry *dir = fault_create_debugfs_attr("fail_io_timeout", + NULL, &fail_io_timeout); + + return IS_ERR(dir) ? PTR_ERR(dir) : 0; } late_initcall(fail_io_timeout_debugfs); diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 3ff060a..c6f996f 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -25,10 +25,6 @@ struct fault_attr { unsigned long reject_end; unsigned long count; - -#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS - struct dentry *dir; -#endif }; #define FAULT_ATTR_INITIALIZER { \ @@ -45,19 +41,15 @@ bool should_fail(struct fault_attr *attr, ssize_t size); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS -int init_fault_attr_dentries(struct fault_attr *attr, const char *name); -void cleanup_fault_attr_dentries(struct fault_attr *attr); +struct dentry *fault_create_debugfs_attr(const char *name, + struct dentry *parent, struct fault_attr *attr); #else /* CONFIG_FAULT_INJECTION_DEBUG_FS */ -static inline int init_fault_attr_dentries(struct fault_attr *attr, - const char *name) -{ - return -ENODEV; -} - -static inline void cleanup_fault_attr_dentries(struct fault_attr *attr) +static inline struct dentry *fault_create_debugfs_attr(const char *name, + struct dentry *parent, struct fault_attr *attr) { + return ERR_PTR(-ENODEV); } #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 2577b12..f193b77 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -197,21 +197,15 @@ static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode, return debugfs_create_file(name, mode, parent, value, &fops_atomic_t); } -void cleanup_fault_attr_dentries(struct fault_attr *attr) -{ - debugfs_remove_recursive(attr->dir); -} - -int init_fault_attr_dentries(struct fault_attr *attr, const char *name) +struct dentry *fault_create_debugfs_attr(const char *name, + struct dentry *parent, struct fault_attr *attr) { mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; struct dentry *dir; - dir = debugfs_create_dir(name, NULL); + dir = debugfs_create_dir(name, parent); if (!dir) - return -ENOMEM; - - attr->dir = dir; + return ERR_PTR(-ENOMEM); if (!debugfs_create_ul("probability", mode, dir, &attr->probability)) goto fail; @@ -243,11 +237,11 @@ int init_fault_attr_dentries(struct fault_attr *attr, const char *name) #endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ - return 0; + return dir; fail: - debugfs_remove_recursive(attr->dir); + debugfs_remove_recursive(dir); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ diff --git a/mm/failslab.c b/mm/failslab.c index 1ce58c2..0dd7b8f 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -34,23 +34,23 @@ __setup("failslab=", setup_failslab); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS static int __init failslab_debugfs_init(void) { + struct dentry *dir; mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; - int err; - err = init_fault_attr_dentries(&failslab.attr, "failslab"); - if (err) - return err; + dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr); + if (IS_ERR(dir)) + return PTR_ERR(dir); - if (!debugfs_create_bool("ignore-gfp-wait", mode, failslab.attr.dir, + if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, &failslab.ignore_gfp_wait)) goto fail; - if (!debugfs_create_bool("cache-filter", mode, failslab.attr.dir, + if (!debugfs_create_bool("cache-filter", mode, dir, &failslab.cache_filter)) goto fail; return 0; fail: - cleanup_fault_attr_dentries(&failslab.attr); + debugfs_remove_recursive(dir); return -ENOMEM; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1dbcf88..6e8ecb6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1409,14 +1409,11 @@ static int __init fail_page_alloc_debugfs(void) { mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; struct dentry *dir; - int err; - err = init_fault_attr_dentries(&fail_page_alloc.attr, - "fail_page_alloc"); - if (err) - return err; - - dir = fail_page_alloc.attr.dir; + dir = fault_create_debugfs_attr("fail_page_alloc", NULL, + &fail_page_alloc.attr); + if (IS_ERR(dir)) + return PTR_ERR(dir); if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, &fail_page_alloc.ignore_gfp_wait)) @@ -1430,7 +1427,7 @@ static int __init fail_page_alloc_debugfs(void) return 0; fail: - cleanup_fault_attr_dentries(&fail_page_alloc.attr); + debugfs_remove_recursive(dir); return -ENOMEM; } -- cgit v1.1 From 12b3e038e5bb4860c17d001e92a6fa9964c0a7b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= Date: Wed, 3 Aug 2011 16:21:02 -0700 Subject: rtc-omap: fix initialization of control register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the comment explains, the intention of the code is to clear the OMAP_RTC_CTRL_MODE_12_24 bit, but instead it only clears the OMAP_RTC_CTRL_SPLIT and OMAP_RTC_CTRL_AUTO_COMP bits, which should be kept. OMAP_RTC_CTRL_DISABLE, OMAP_RTC_CTRL_SET_32_COUNTER, OMAP_RTC_CTRL_TEST, and OMAP_RTC_CTRL_ROUND_30S are also better off being cleared. Signed-off-by: Daniel Glöckner Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-omap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index bcae8dd..7789002 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -368,7 +368,7 @@ static int __init omap_rtc_probe(struct platform_device *pdev) pr_info("%s: already running\n", pdev->name); /* force to 24 hour mode */ - new_ctrl = reg & ~(OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP); + new_ctrl = reg & (OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP); new_ctrl |= OMAP_RTC_CTRL_STOP; /* BOARD-SPECIFIC CUSTOMIZATION CAN GO HERE: -- cgit v1.1 From dfc428b656c4693a2334a8d9865b430beddb562a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Aug 2011 16:21:04 -0700 Subject: taskstats: add_del_listener() shouldn't use the wrong node 1. Commit 26c4caea9d69 "don't allow duplicate entries in listener mode" changed add_del_listener(REGISTER) so that "next_cpu:" can reuse the listener allocated for the previous cpu, this doesn't look exactly right even if minor. Change the code to kfree() in the already-registered case, this case is unlikely anyway so the extra kmalloc_node() shouldn't hurt but looke more correct and clean. 2. use the plain list_for_each_entry() instead of _safe() to scan listeners->list. 3. Remove the unneeded INIT_LIST_HEAD(&s->list), we are going to list_add(&s->list). Signed-off-by: Oleg Nesterov Reviewed-by: Vasiliy Kulikov Cc: Balbir Singh Reviewed-by: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index d1db288..a09a549 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -291,30 +291,28 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) if (!cpumask_subset(mask, cpu_possible_mask)) return -EINVAL; - s = NULL; if (isadd == REGISTER) { for_each_cpu(cpu, mask) { - if (!s) - s = kmalloc_node(sizeof(struct listener), - GFP_KERNEL, cpu_to_node(cpu)); + s = kmalloc_node(sizeof(struct listener), + GFP_KERNEL, cpu_to_node(cpu)); if (!s) goto cleanup; + s->pid = pid; - INIT_LIST_HEAD(&s->list); s->valid = 1; listeners = &per_cpu(listener_array, cpu); down_write(&listeners->sem); - list_for_each_entry_safe(s2, tmp, &listeners->list, list) { + list_for_each_entry(s2, &listeners->list, list) { if (s2->pid == pid) - goto next_cpu; + goto exists; } list_add(&s->list, &listeners->list); s = NULL; -next_cpu: +exists: up_write(&listeners->sem); + kfree(s); /* nop if NULL */ } - kfree(s); return 0; } -- cgit v1.1 From a7295898a1d2e501427f557111c2b4bdfc90b1ed Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 3 Aug 2011 16:21:05 -0700 Subject: taskstats: add_del_listener() should ignore !valid listeners When send_cpu_listeners() finds the orphaned listener it marks it as !valid and drops listeners->sem. Before it takes this sem for writing, s->pid can be reused and add_del_listener() can wrongly try to re-use this entry. Change add_del_listener() to check ->valid = T. Signed-off-by: Oleg Nesterov Reviewed-by: Vasiliy Kulikov Acked-by: Balbir Singh Cc: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/taskstats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index a09a549..e19ce14 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -304,7 +304,7 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) listeners = &per_cpu(listener_array, cpu); down_write(&listeners->sem); list_for_each_entry(s2, &listeners->list, list) { - if (s2->pid == pid) + if (s2->pid == pid && s2->valid) goto exists; } list_add(&s->list, &listeners->list); -- cgit v1.1 From 88eca0207cf1574328c3ce8c3be537a9317261bb Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 3 Aug 2011 16:21:06 -0700 Subject: ida: simplified functions for id allocation The current hyper-optimized functions are overkill if you simply want to allocate an id for a device. Create versions which use an internal lock. In followup patches, numerous drivers are converted to use this interface. Thanks to Tejun for feedback. Signed-off-by: Rusty Russell Acked-by: Tejun Heo Acked-by: Jonathan Cameron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 4 ++++ lib/idr.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/include/linux/idr.h b/include/linux/idr.h index 13a801f..255491c 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -146,6 +146,10 @@ void ida_remove(struct ida *ida, int id); void ida_destroy(struct ida *ida); void ida_init(struct ida *ida); +int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, + gfp_t gfp_mask); +void ida_simple_remove(struct ida *ida, unsigned int id); + void __init idr_init_cache(void); #endif /* __IDR_H__ */ diff --git a/lib/idr.c b/lib/idr.c index e15502e..db040ce 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -34,8 +34,10 @@ #include #include #include +#include static struct kmem_cache *idr_layer_cache; +static DEFINE_SPINLOCK(simple_ida_lock); static struct idr_layer *get_from_free_list(struct idr *idp) { @@ -926,6 +928,71 @@ void ida_destroy(struct ida *ida) EXPORT_SYMBOL(ida_destroy); /** + * ida_simple_get - get a new id. + * @ida: the (initialized) ida. + * @start: the minimum id (inclusive, < 0x8000000) + * @end: the maximum id (exclusive, < 0x8000000 or 0) + * @gfp_mask: memory allocation flags + * + * Allocates an id in the range start <= id < end, or returns -ENOSPC. + * On memory allocation failure, returns -ENOMEM. + * + * Use ida_simple_remove() to get rid of an id. + */ +int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, + gfp_t gfp_mask) +{ + int ret, id; + unsigned int max; + + BUG_ON((int)start < 0); + BUG_ON((int)end < 0); + + if (end == 0) + max = 0x80000000; + else { + BUG_ON(end < start); + max = end - 1; + } + +again: + if (!ida_pre_get(ida, gfp_mask)) + return -ENOMEM; + + spin_lock(&simple_ida_lock); + ret = ida_get_new_above(ida, start, &id); + if (!ret) { + if (id > max) { + ida_remove(ida, id); + ret = -ENOSPC; + } else { + ret = id; + } + } + spin_unlock(&simple_ida_lock); + + if (unlikely(ret == -EAGAIN)) + goto again; + + return ret; +} +EXPORT_SYMBOL(ida_simple_get); + +/** + * ida_simple_remove - remove an allocated id. + * @ida: the (initialized) ida. + * @id: the id returned by ida_simple_get. + */ +void ida_simple_remove(struct ida *ida, unsigned int id) +{ + BUG_ON((int)id < 0); + spin_lock(&simple_ida_lock); + ida_remove(ida, id); + spin_unlock(&simple_ida_lock); +} +EXPORT_SYMBOL(ida_simple_remove); + +/** * ida_init - initialize ida handle * @ida: ida handle * -- cgit v1.1 From 2af14162656b81bea9e03e76d7c5f1787cc86ea6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 3 Aug 2011 16:21:07 -0700 Subject: fs/dcache.c: fix new kernel-doc warning Fix new kernel-doc warning in fs/dcache.c: Warning(fs/dcache.c:797): No description found for parameter 'sb' Signed-off-by: Randy Dunlap Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dcache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/dcache.c b/fs/dcache.c index 2347cdb..c83cae1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -795,6 +795,7 @@ relock: /** * prune_dcache_sb - shrink the dcache + * @sb: superblock * @nr_to_scan: number of entries to try to free * * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is -- cgit v1.1 From f2d34fd9435c7e60cb5189d036efe9abfc911862 Mon Sep 17 00:00:00 2001 From: Will Drewry Date: Wed, 3 Aug 2011 16:21:08 -0700 Subject: Documentation: add pointer to name_to_dev_t for root= values Update kernel-parameters.txt to point users to the authoritative comment for name_to_dev_t. In addition, updates other places where some name_to_dev_t behavior was discussed. All other references to root= appear to be for explicit sample usage or just side comments when discussing other kernel parameters. Signed-off-by: Will Drewry Cc: Kay Sievers Cc: Randy Dunlap Cc: Namhyung Kim Cc: Trond Myklebust Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/frv/booting.txt | 13 ++++++++++--- Documentation/kernel-parameters.txt | 1 + Documentation/m68k/kernel-options.txt | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/Documentation/frv/booting.txt b/Documentation/frv/booting.txt index ace200b..37c4d84 100644 --- a/Documentation/frv/booting.txt +++ b/Documentation/frv/booting.txt @@ -106,13 +106,20 @@ separated by spaces: To use the first on-chip serial port at baud rate 115200, no parity, 8 bits, and no flow control. - (*) root=/dev/ + (*) root= - This specifies the device upon which the root filesystem resides. For - example: + This specifies the device upon which the root filesystem resides. It + may be specified by major and minor number, device path, or even + partition uuid, if supported. For example: /dev/nfs NFS root filesystem /dev/mtdblock3 Fourth RedBoot partition on the System Flash + PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=1 + first partition after the partition with the given UUID + 253:0 Device with major 253 and minor 0 + + Authoritative information can be found in + "Documentation/kernel-parameters.txt". (*) rw diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index d57f00d..865e39f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2245,6 +2245,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ro [KNL] Mount root device read-only on boot root= [KNL] Root filesystem + See name_to_dev_t comment in init/do_mounts.c. rootdelay= [KNL] Delay (in seconds) to pause before attempting to mount the root filesystem diff --git a/Documentation/m68k/kernel-options.txt b/Documentation/m68k/kernel-options.txt index c93bed6..97d45f2 100644 --- a/Documentation/m68k/kernel-options.txt +++ b/Documentation/m68k/kernel-options.txt @@ -129,6 +129,20 @@ decimal 11 is the major of SCSI CD-ROMs, and the minor 0 stands for the first of these. You can find out all valid major numbers by looking into include/linux/major.h. +In addition to major and minor numbers, if the device containing your +root partition uses a partition table format with unique partition +identifiers, then you may use them. For instance, +"root=PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF". It is also +possible to reference another partition on the same device using a +known partition UUID as the starting point. For example, +if partition 5 of the device has the UUID of +00112233-4455-6677-8899-AABBCCDDEEFF then partition 3 may be found as +follows: + PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=-2 + +Authoritative information can be found in +"Documentation/kernel-parameters.txt". + 2.2) ro, rw ----------- -- cgit v1.1 From f7b9fcbbc3b8593ff7dc587f90c2fe90a2fd1e6f Mon Sep 17 00:00:00 2001 From: Sergiu Iordache Date: Wed, 3 Aug 2011 16:21:09 -0700 Subject: ramoops: update module parameters Update the module parameters when platform data is used. This means that they can be read from /sys/module/ramoops/parameters in order to parse the memory area. Signed-off-by: Sergiu Iordache Cc: Marco Stornelli Cc: Seiji Aguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ramoops.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/char/ramoops.c b/drivers/char/ramoops.c index fca0c51..810aff9 100644 --- a/drivers/char/ramoops.c +++ b/drivers/char/ramoops.c @@ -147,6 +147,14 @@ static int __init ramoops_probe(struct platform_device *pdev) cxt->phys_addr = pdata->mem_address; cxt->record_size = pdata->record_size; cxt->dump_oops = pdata->dump_oops; + /* + * Update the module parameter variables as well so they are visible + * through /sys/module/ramoops/parameters/ + */ + mem_size = pdata->mem_size; + mem_address = pdata->mem_address; + record_size = pdata->record_size; + dump_oops = pdata->dump_oops; if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) { pr_err("request mem region failed\n"); -- cgit v1.1 From 3dab1bce8e840606e76f1bad5a009328e2aa35c7 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 3 Aug 2011 16:21:09 -0700 Subject: mm: page_alloc: increase __GFP_BITS_SHIFT to include __GFP_OTHER_NODE __GFP_OTHER_NODE is used for NUMA allocations on behalf of other nodes. It's supposed to be passed through from the page allocator to zone_statistics(), but it never gets there as gfp_allowed_mask is not wide enough and masks out the flag early in the allocation path. The result is an accounting glitch where successful NUMA allocations by-agent are not properly attributed as local. Increase __GFP_BITS_SHIFT so that it includes __GFP_OTHER_NODE. Signed-off-by: Johannes Weiner Acked-by: Andi Kleen Reviewed-by: Minchan Kim Acked-by: Mel Gorman Reviewed-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index cb40892..3a76faf 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -92,7 +92,7 @@ struct vm_area_struct; */ #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) -#define __GFP_BITS_SHIFT 23 /* Room for 23 __GFP_FOO bits */ +#define __GFP_BITS_SHIFT 24 /* Room for N __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* This equals 0, but use constants in case they ever change */ -- cgit v1.1 From 1560ffe62a9d53a51faeec7417becfba4f2a0d18 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 3 Aug 2011 16:21:11 -0700 Subject: tpm_tis: fix build when ACPI is not enabled Fix tpm_tis.c build when CONFIG_ACPI is not enabled by providing a stub function. Fixes many build errors/warnings: drivers/char/tpm/tpm_tis.c:89: error: dereferencing pointer to incomplete type drivers/char/tpm/tpm_tis.c:89: warning: type defaults to 'int' in declaration of 'type name' drivers/char/tpm/tpm_tis.c:89: error: request for member 'list' in something not a structure or union ... Signed-off-by: Randy Dunlap Cc: Leendert van Doorn Cc: James Morris Cc: Ingo Molnar Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tpm/tpm_tis.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 7fc2f10..3f4051a 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -80,7 +80,7 @@ enum tis_defaults { static LIST_HEAD(tis_chips); static DEFINE_SPINLOCK(tis_lock); -#ifdef CONFIG_PNP +#if defined(CONFIG_PNP) && defined(CONFIG_ACPI) static int is_itpm(struct pnp_dev *dev) { struct acpi_device *acpi = pnp_acpi_device(dev); @@ -93,6 +93,11 @@ static int is_itpm(struct pnp_dev *dev) return 0; } +#else +static inline int is_itpm(struct pnp_dev *dev) +{ + return 0; +} #endif static int check_locality(struct tpm_chip *chip, int l) -- cgit v1.1 From d4969213f9e75ec1bfa6ea65c279c64cab7d1bd6 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 3 Aug 2011 16:21:12 -0700 Subject: cris: fix a build error in kernel/fork.c Fix this error: kernel/fork.c:267: error: implicit declaration of function 'alloc_thread_info_node' This is due to renaming alloc_thread_info() to alloc_thread_info_node(). [akpm@linux-foundation.org: coding-style fixes] Reported-by: Geert Uytterhoeven Signed-off-by: WANG Cong Cc: Mikael Starvik Cc: Jesper Nilsson Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/include/asm/thread_info.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h index 29b74a1..332f19c 100644 --- a/arch/cris/include/asm/thread_info.h +++ b/arch/cris/include/asm/thread_info.h @@ -11,8 +11,6 @@ #ifdef __KERNEL__ -#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR - #ifndef __ASSEMBLY__ #include #include @@ -67,8 +65,10 @@ struct thread_info { #define init_thread_info (init_thread_union.thread_info) +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR /* thread information allocation */ -#define alloc_thread_info(tsk, node) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1)) +#define alloc_thread_info_node(tsk, node) \ + ((struct thread_info *) __get_free_pages(GFP_KERNEL, 1)) #define free_thread_info(ti) free_pages((unsigned long) (ti), 1) #endif /* !__ASSEMBLY__ */ -- cgit v1.1 From 4b851d88192c22cf77418a0b4c45b5c789276837 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 3 Aug 2011 16:21:14 -0700 Subject: cris: fix a build error in sync_serial_open() Fix: arch/cris/arch-v10/drivers/sync_serial.c:628: error: 'ret' undeclared (first use in this function) 'ret' should be 'err'. Reported-by: Geert Uytterhoeven Signed-off-by: WANG Cong Cc: Mikael Starvik Cc: Jesper Nilsson Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v10/drivers/sync_serial.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c index 85026537..d2ad9e7 100644 --- a/arch/cris/arch-v10/drivers/sync_serial.c +++ b/arch/cris/arch-v10/drivers/sync_serial.c @@ -625,11 +625,11 @@ static int sync_serial_open(struct inode *inode, struct file *file) *R_IRQ_MASK1_SET = 1 << port->data_avail_bit; DEBUG(printk(KERN_DEBUG "sser%d rec started\n", dev)); } - ret = 0; + err = 0; out: mutex_unlock(&sync_serial_mutex); - return ret; + return err; } static int sync_serial_release(struct inode *inode, struct file *file) -- cgit v1.1 From b4bc281266e84e9a432b588ebdcef5fb94dc8ecb Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 3 Aug 2011 16:21:14 -0700 Subject: cris: fix the prototype of sync_serial_ioctl() Fix: arch/cris/arch-v10/drivers/sync_serial.c:961: error: conflicting types for 'sync_serial_ioctl' Reported-by: Geert Uytterhoeven Signed-off-by: WANG Cong Cc: Mikael Starvik Cc: Jesper Nilsson Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v10/drivers/sync_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c index d2ad9e7..466af40 100644 --- a/arch/cris/arch-v10/drivers/sync_serial.c +++ b/arch/cris/arch-v10/drivers/sync_serial.c @@ -158,7 +158,7 @@ static int sync_serial_open(struct inode *inode, struct file *file); static int sync_serial_release(struct inode *inode, struct file *file); static unsigned int sync_serial_poll(struct file *filp, poll_table *wait); -static int sync_serial_ioctl(struct file *file, +static long sync_serial_ioctl(struct file *file, unsigned int cmd, unsigned long arg); static ssize_t sync_serial_write(struct file *file, const char *buf, size_t count, loff_t *ppos); -- cgit v1.1 From 1646ec9db75e151b0479dbfaf972f741d0476ec7 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 3 Aug 2011 16:21:15 -0700 Subject: cris: add missing declaration of kgdb_init() and breakpoint() Fix: arch/cris/arch-v10/kernel/irq.c:239: error: implicit declaration of function 'kgdb_init' arch/cris/arch-v10/kernel/irq.c:240: error: implicit declaration of function 'breakpoint' Declare these two functions. Reported-by: Geert Uytterhoeven Signed-off-by: WANG Cong Cc: Mikael Starvik Cc: Jesper Nilsson Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v10/kernel/irq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/cris/arch-v10/kernel/irq.c b/arch/cris/arch-v10/kernel/irq.c index 907cfb5..ba0e596 100644 --- a/arch/cris/arch-v10/kernel/irq.c +++ b/arch/cris/arch-v10/kernel/irq.c @@ -20,6 +20,9 @@ #define crisv10_mask_irq(irq_nr) (*R_VECT_MASK_CLR = 1 << (irq_nr)); #define crisv10_unmask_irq(irq_nr) (*R_VECT_MASK_SET = 1 << (irq_nr)); +extern void kgdb_init(void); +extern void breakpoint(void); + /* don't use set_int_vector, it bypasses the linux interrupt handlers. it is * global just so that the kernel gdb can use it. */ -- cgit v1.1 From 4c4dd903e72507c853b8c2b04b22e0d0c721fd93 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 3 Aug 2011 16:21:16 -0700 Subject: drivers/video/backlight/aat2870_bl.c: fix error checking for backlight_device_register backlight_device_register() returns ERR_PTR() on error. Signed-off-by: Axel Lin Cc: Richard Purdie Cc: Jin Park Cc: Samuel Ortiz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/backlight/aat2870_bl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/backlight/aat2870_bl.c b/drivers/video/backlight/aat2870_bl.c index 4952a61..f13a3f7 100644 --- a/drivers/video/backlight/aat2870_bl.c +++ b/drivers/video/backlight/aat2870_bl.c @@ -158,10 +158,10 @@ static int aat2870_bl_probe(struct platform_device *pdev) props.type = BACKLIGHT_RAW; bd = backlight_device_register("aat2870-backlight", &pdev->dev, aat2870_bl, &aat2870_bl_ops, &props); - if (!bd) { + if (IS_ERR(bd)) { dev_err(&pdev->dev, "Failed allocate memory for backlight device\n"); - ret = -ENOMEM; + ret = PTR_ERR(bd); goto out_kfree; } -- cgit v1.1 From 5d6f921b42749d1a70441685b7a4f2801e12ebfb Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 3 Aug 2011 16:21:17 -0700 Subject: drivers/video/backlight/aat2870_bl.c: fix setting max_current - Current implementation tests wrong value for setting aat2870_bl->max_current. - In the current implementation, we cannot differentiate between 2 cases: a) if pdata->max_current is not set , or b) pdata->max_current is set to AAT2870_CURRENT_0_45 (which is also 0). Fix it by setting AAT2870_CURRENT_0_45 to be 1 and adjust the equation in aat2870_brightness() accordingly. Signed-off-by: Axel Lin Cc: Richard Purdie Cc: Samuel Ortiz Tested-by: Jin Park Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/backlight/aat2870_bl.c | 4 ++-- include/linux/mfd/aat2870.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/video/backlight/aat2870_bl.c b/drivers/video/backlight/aat2870_bl.c index f13a3f7..331f1ef 100644 --- a/drivers/video/backlight/aat2870_bl.c +++ b/drivers/video/backlight/aat2870_bl.c @@ -44,7 +44,7 @@ static inline int aat2870_brightness(struct aat2870_bl_driver_data *aat2870_bl, struct backlight_device *bd = aat2870_bl->bd; int val; - val = brightness * aat2870_bl->max_current; + val = brightness * (aat2870_bl->max_current - 1); val /= bd->props.max_brightness; return val; @@ -175,7 +175,7 @@ static int aat2870_bl_probe(struct platform_device *pdev) else aat2870_bl->channels = AAT2870_BL_CH_ALL; - if (pdata->max_brightness > 0) + if (pdata->max_current > 0) aat2870_bl->max_current = pdata->max_current; else aat2870_bl->max_current = AAT2870_CURRENT_27_9; diff --git a/include/linux/mfd/aat2870.h b/include/linux/mfd/aat2870.h index 89212df..f7316c2 100644 --- a/include/linux/mfd/aat2870.h +++ b/include/linux/mfd/aat2870.h @@ -89,7 +89,7 @@ enum aat2870_id { /* Backlight current magnitude (mA) */ enum aat2870_current { - AAT2870_CURRENT_0_45, + AAT2870_CURRENT_0_45 = 1, AAT2870_CURRENT_0_90, AAT2870_CURRENT_1_80, AAT2870_CURRENT_2_70, -- cgit v1.1 From 70d327198a434edb95b3d858bc8010b8add28e3e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 3 Aug 2011 16:21:17 -0700 Subject: drivers/video/backlight/aat2870_bl.c: make it buildable as a module i386 allmodconfig: drivers/built-in.o: In function `aat2870_bl_remove': aat2870_bl.c:(.text+0x414f9): undefined reference to `backlight_device_unregister' drivers/built-in.o: In function `aat2870_bl_probe': aat2870_bl.c:(.text+0x418fc): undefined reference to `backlight_device_register' aat2870_bl.c:(.text+0x41a31): undefined reference to `backlight_device_unregiste Cc: Jin Park Cc: Samuel Ortiz Cc: Axel Lin Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/backlight/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index 69407e7..278aeaa 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -336,7 +336,7 @@ config BACKLIGHT_PCF50633 enable its driver. config BACKLIGHT_AAT2870 - bool "AnalogicTech AAT2870 Backlight" + tristate "AnalogicTech AAT2870 Backlight" depends on BACKLIGHT_CLASS_DEVICE && MFD_AAT2870_CORE help If you have a AnalogicTech AAT2870 say Y to enable the -- cgit v1.1 From 6328650bb4d854a7dc1498d1c0048b838b0d340c Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 3 Aug 2011 16:21:18 -0700 Subject: radix_tree: exceptional entries and indices A patchset to extend tmpfs to MAX_LFS_FILESIZE by abandoning its peculiar swap vector, instead keeping a file's swap entries in the same radix tree as its struct page pointers: thus saving memory, and simplifying its code and locking. This patch: The radix_tree is used by several subsystems for different purposes. A major use is to store the struct page pointers of a file's pagecache for memory management. But what if mm wanted to store something other than page pointers there too? The low bit of a radix_tree entry is already used to denote an indirect pointer, for internal use, and the unlikely radix_tree_deref_retry() case. Define the next bit as denoting an exceptional entry, and supply inline functions radix_tree_exception() to return non-0 in either unlikely case, and radix_tree_exceptional_entry() to return non-0 in the second case. If a subsystem already uses radix_tree with that bit set, no problem: it does not affect internal workings at all, but is defined for the convenience of those storing well-aligned pointers in the radix_tree. The radix_tree_gang_lookups have an implicit assumption that the caller can deduce the offset of each entry returned e.g. by the page->index of a struct page. But that may not be feasible for some kinds of item to be stored there. radix_tree_gang_lookup_slot() allow for an optional indices argument, output array in which to return those offsets. The same could be added to other radix_tree_gang_lookups, but for now keep it to the only one for which we need it. Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 36 +++++++++++++++++++++++++++++++++--- lib/radix-tree.c | 29 +++++++++++++++++++---------- mm/filemap.c | 4 ++-- 3 files changed, 54 insertions(+), 15 deletions(-) diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 23241c2..b7edf82 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -39,7 +39,15 @@ * when it is shrunk, before we rcu free the node. See shrink code for * details. */ -#define RADIX_TREE_INDIRECT_PTR 1 +#define RADIX_TREE_INDIRECT_PTR 1 +/* + * A common use of the radix tree is to store pointers to struct pages; + * but shmem/tmpfs needs also to store swap entries in the same tree: + * those are marked as exceptional entries to distinguish them. + * EXCEPTIONAL_ENTRY tests the bit, EXCEPTIONAL_SHIFT shifts content past it. + */ +#define RADIX_TREE_EXCEPTIONAL_ENTRY 2 +#define RADIX_TREE_EXCEPTIONAL_SHIFT 2 #define radix_tree_indirect_to_ptr(ptr) \ radix_tree_indirect_to_ptr((void __force *)(ptr)) @@ -174,6 +182,28 @@ static inline int radix_tree_deref_retry(void *arg) } /** + * radix_tree_exceptional_entry - radix_tree_deref_slot gave exceptional entry? + * @arg: value returned by radix_tree_deref_slot + * Returns: 0 if well-aligned pointer, non-0 if exceptional entry. + */ +static inline int radix_tree_exceptional_entry(void *arg) +{ + /* Not unlikely because radix_tree_exception often tested first */ + return (unsigned long)arg & RADIX_TREE_EXCEPTIONAL_ENTRY; +} + +/** + * radix_tree_exception - radix_tree_deref_slot returned either exception? + * @arg: value returned by radix_tree_deref_slot + * Returns: 0 if well-aligned pointer, non-0 if either kind of exception. + */ +static inline int radix_tree_exception(void *arg) +{ + return unlikely((unsigned long)arg & + (RADIX_TREE_INDIRECT_PTR | RADIX_TREE_EXCEPTIONAL_ENTRY)); +} + +/** * radix_tree_replace_slot - replace item in a slot * @pslot: pointer to slot, returned by radix_tree_lookup_slot * @item: new item to store in the slot. @@ -194,8 +224,8 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long); unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items); -unsigned int -radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, +unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root, + void ***results, unsigned long *indices, unsigned long first_index, unsigned int max_items); unsigned long radix_tree_next_hole(struct radix_tree_root *root, unsigned long index, unsigned long max_scan); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 7ea2e03..348eaef 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -823,8 +823,8 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root, EXPORT_SYMBOL(radix_tree_prev_hole); static unsigned int -__lookup(struct radix_tree_node *slot, void ***results, unsigned long index, - unsigned int max_items, unsigned long *next_index) +__lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices, + unsigned long index, unsigned int max_items, unsigned long *next_index) { unsigned int nr_found = 0; unsigned int shift, height; @@ -857,12 +857,16 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index, /* Bottom level: grab some items */ for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { - index++; if (slot->slots[i]) { - results[nr_found++] = &(slot->slots[i]); - if (nr_found == max_items) + results[nr_found] = &(slot->slots[i]); + if (indices) + indices[nr_found] = index; + if (++nr_found == max_items) { + index++; goto out; + } } + index++; } out: *next_index = index; @@ -918,8 +922,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, if (cur_index > max_index) break; - slots_found = __lookup(node, (void ***)results + ret, cur_index, - max_items - ret, &next_index); + slots_found = __lookup(node, (void ***)results + ret, NULL, + cur_index, max_items - ret, &next_index); nr_found = 0; for (i = 0; i < slots_found; i++) { struct radix_tree_node *slot; @@ -944,6 +948,7 @@ EXPORT_SYMBOL(radix_tree_gang_lookup); * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree * @root: radix tree root * @results: where the results of the lookup are placed + * @indices: where their indices should be placed (but usually NULL) * @first_index: start the lookup from this key * @max_items: place up to this many items at *results * @@ -958,7 +963,8 @@ EXPORT_SYMBOL(radix_tree_gang_lookup); * protection, radix_tree_deref_slot may fail requiring a retry. */ unsigned int -radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, +radix_tree_gang_lookup_slot(struct radix_tree_root *root, + void ***results, unsigned long *indices, unsigned long first_index, unsigned int max_items) { unsigned long max_index; @@ -974,6 +980,8 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, if (first_index > 0) return 0; results[0] = (void **)&root->rnode; + if (indices) + indices[0] = 0; return 1; } node = indirect_to_ptr(node); @@ -987,8 +995,9 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, if (cur_index > max_index) break; - slots_found = __lookup(node, results + ret, cur_index, - max_items - ret, &next_index); + slots_found = __lookup(node, results + ret, + indices ? indices + ret : NULL, + cur_index, max_items - ret, &next_index); ret += slots_found; if (next_index == 0) break; diff --git a/mm/filemap.c b/mm/filemap.c index 867d402..b83aebf 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -840,7 +840,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, rcu_read_lock(); restart: nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, - (void ***)pages, start, nr_pages); + (void ***)pages, NULL, start, nr_pages); ret = 0; for (i = 0; i < nr_found; i++) { struct page *page; @@ -903,7 +903,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, rcu_read_lock(); restart: nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, - (void ***)pages, index, nr_pages); + (void ***)pages, NULL, index, nr_pages); ret = 0; for (i = 0; i < nr_found; i++) { struct page *page; -- cgit v1.1 From a2c16d6cb0e478812829ca84aeabd02e36af35eb Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 3 Aug 2011 16:21:19 -0700 Subject: mm: let swap use exceptional entries If swap entries are to be stored along with struct page pointers in a radix tree, they need to be distinguished as exceptional entries. Most of the handling of swap entries in radix tree will be contained in shmem.c, but a few functions in filemap.c's common code need to check for their appearance: find_get_page(), find_lock_page(), find_get_pages() and find_get_pages_contig(). So as not to slow their fast paths, tuck those checks inside the existing checks for unlikely radix_tree_deref_slot(); except for find_lock_page(), where it is an added test. And make it a BUG in find_get_pages_tag(), which is not applied to tmpfs files. A part of the reason for eliminating shmem_readpage() earlier, was to minimize the places where common code would need to allow for swap entries. The swp_entry_t known to swapfile.c must be massaged into a slightly different form when stored in the radix tree, just as it gets massaged into a pte_t when stored in page tables. In an i386 kernel this limits its information (type and page offset) to 30 bits: given 32 "types" of swapfile and 4kB pagesize, that's a maximum swapfile size of 128GB. Which is less than the 512GB we previously allowed with X86_PAE (where the swap entry can occupy the entire upper 32 bits of a pte_t), but not a new limitation on 32-bit without PAE; and there's not a new limitation on 64-bit (where swap filesize is already limited to 16TB by a 32-bit page offset). Thirty areas of 128GB is probably still enough swap for a 64GB 32-bit machine. Provide swp_to_radix_entry() and radix_to_swp_entry() conversions, and enforce filesize limit in read_swap_header(), just as for ptes. Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swapops.h | 23 +++++++++++++++++++++++ mm/filemap.c | 49 +++++++++++++++++++++++++++++++------------------ mm/swapfile.c | 20 ++++++++++++-------- 3 files changed, 66 insertions(+), 26 deletions(-) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index cd42e30..2189d3f 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -1,3 +1,8 @@ +#ifndef _LINUX_SWAPOPS_H +#define _LINUX_SWAPOPS_H + +#include + /* * swapcache pages are stored in the swapper_space radix tree. We want to * get good packing density in that tree, so the index should be dense in @@ -76,6 +81,22 @@ static inline pte_t swp_entry_to_pte(swp_entry_t entry) return __swp_entry_to_pte(arch_entry); } +static inline swp_entry_t radix_to_swp_entry(void *arg) +{ + swp_entry_t entry; + + entry.val = (unsigned long)arg >> RADIX_TREE_EXCEPTIONAL_SHIFT; + return entry; +} + +static inline void *swp_to_radix_entry(swp_entry_t entry) +{ + unsigned long value; + + value = entry.val << RADIX_TREE_EXCEPTIONAL_SHIFT; + return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY); +} + #ifdef CONFIG_MIGRATION static inline swp_entry_t make_migration_entry(struct page *page, int write) { @@ -169,3 +190,5 @@ static inline int non_swap_entry(swp_entry_t entry) return 0; } #endif + +#endif /* _LINUX_SWAPOPS_H */ diff --git a/mm/filemap.c b/mm/filemap.c index b83aebf..76bfb64 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -714,9 +714,12 @@ repeat: page = radix_tree_deref_slot(pagep); if (unlikely(!page)) goto out; - if (radix_tree_deref_retry(page)) + if (radix_tree_exception(page)) { + if (radix_tree_exceptional_entry(page)) + goto out; + /* radix_tree_deref_retry(page) */ goto repeat; - + } if (!page_cache_get_speculative(page)) goto repeat; @@ -753,7 +756,7 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) repeat: page = find_get_page(mapping, offset); - if (page) { + if (page && !radix_tree_exception(page)) { lock_page(page); /* Has the page been truncated? */ if (unlikely(page->mapping != mapping)) { @@ -849,11 +852,14 @@ repeat: if (unlikely(!page)) continue; - /* - * This can only trigger when the entry at index 0 moves out - * of or back to the root: none yet gotten, safe to restart. - */ - if (radix_tree_deref_retry(page)) { + if (radix_tree_exception(page)) { + if (radix_tree_exceptional_entry(page)) + continue; + /* + * radix_tree_deref_retry(page): + * can only trigger when entry at index 0 moves out of + * or back to root: none yet gotten, safe to restart. + */ WARN_ON(start | i); goto restart; } @@ -912,12 +918,16 @@ repeat: if (unlikely(!page)) continue; - /* - * This can only trigger when the entry at index 0 moves out - * of or back to the root: none yet gotten, safe to restart. - */ - if (radix_tree_deref_retry(page)) + if (radix_tree_exception(page)) { + if (radix_tree_exceptional_entry(page)) + break; + /* + * radix_tree_deref_retry(page): + * can only trigger when entry at index 0 moves out of + * or back to root: none yet gotten, safe to restart. + */ goto restart; + } if (!page_cache_get_speculative(page)) goto repeat; @@ -977,12 +987,15 @@ repeat: if (unlikely(!page)) continue; - /* - * This can only trigger when the entry at index 0 moves out - * of or back to the root: none yet gotten, safe to restart. - */ - if (radix_tree_deref_retry(page)) + if (radix_tree_exception(page)) { + BUG_ON(radix_tree_exceptional_entry(page)); + /* + * radix_tree_deref_retry(page): + * can only trigger when entry at index 0 moves out of + * or back to root: none yet gotten, safe to restart. + */ goto restart; + } if (!page_cache_get_speculative(page)) goto repeat; diff --git a/mm/swapfile.c b/mm/swapfile.c index 1b8c339..17bc224 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1924,20 +1924,24 @@ static unsigned long read_swap_header(struct swap_info_struct *p, /* * Find out how many pages are allowed for a single swap - * device. There are two limiting factors: 1) the number of - * bits for the swap offset in the swp_entry_t type and - * 2) the number of bits in the a swap pte as defined by - * the different architectures. In order to find the - * largest possible bit mask a swap entry with swap type 0 + * device. There are three limiting factors: 1) the number + * of bits for the swap offset in the swp_entry_t type, and + * 2) the number of bits in the swap pte as defined by the + * the different architectures, and 3) the number of free bits + * in an exceptional radix_tree entry. In order to find the + * largest possible bit mask, a swap entry with swap type 0 * and swap offset ~0UL is created, encoded to a swap pte, - * decoded to a swp_entry_t again and finally the swap + * decoded to a swp_entry_t again, and finally the swap * offset is extracted. This will mask all the bits from * the initial ~0UL mask that can't be encoded in either * the swp_entry_t or the architecture definition of a - * swap pte. + * swap pte. Then the same is done for a radix_tree entry. */ maxpages = swp_offset(pte_to_swp_entry( - swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; + swp_entry_to_pte(swp_entry(0, ~0UL)))); + maxpages = swp_offset(radix_to_swp_entry( + swp_to_radix_entry(swp_entry(0, maxpages)))) + 1; + if (maxpages > swap_header->info.last_page) { maxpages = swap_header->info.last_page + 1; /* p->max is an unsigned int: don't overflow it */ -- cgit v1.1 From 285b2c4fdd69ea73b4762785d8c6be83b6c074a6 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 3 Aug 2011 16:21:20 -0700 Subject: tmpfs: demolish old swap vector support The maximum size of a shmem/tmpfs file has been limited by the maximum size of its triple-indirect swap vector. With 4kB page size, maximum filesize was just over 2TB on a 32-bit kernel, but sadly one eighth of that on a 64-bit kernel. (With 8kB page size, maximum filesize was just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel, MAX_LFS_FILESIZE being then more restrictive than swap vector layout.) It's a shame that tmpfs should be more restrictive than ramfs, and this limitation has now been noticed. Add another level to the swap vector? No, it became obscure and hard to maintain, once I complicated it to make use of highmem pages nine years ago: better choose another way. Surely, if 2.4 had had the radix tree pagecache introduced in 2.5, then tmpfs would never have invented its own peculiar radix tree: we would have fitted swap entries into the common radix tree instead, in much the same way as we fit swap entries into page tables. And why should each file have a separate radix tree for its pages and for its swap entries? The swap entries are required precisely where and when the pages are not. We want to put them together in a single radix tree: which can then avoid much of the locking which was needed to prevent them from being exchanged underneath us. This also avoids the waste of memory devoted to swap vectors, first in the shmem_inode itself, then at least two more pages once a file grew beyond 16 data pages (pages accounted by df and du, but not by memcg). Allocated upfront, to avoid allocation when under swapping pressure, but pure waste when CONFIG_SWAP is not set - I have never spattered around the ifdefs to prevent that, preferring this move to sharing the common radix tree instead. There are three downsides to sharing the radix tree. One, that it binds tmpfs more tightly to the rest of mm, either requiring knowledge of swap entries in radix tree there, or duplication of its code here in shmem.c. I believe that the simplications and memory savings (and probable higher performance, not yet measured) justify that. Two, that on HIGHMEM systems with SWAP enabled, it's the lowmem radix nodes that cannot be freed under memory pressure - whereas before it was the less precious highmem swap vector pages that could not be freed. I'm hoping that 64-bit has now been accessible for long enough, that the highmem argument has grown much less persuasive. Three, that swapoff is slower than it used to be on tmpfs files, since it's using a simple generic mechanism not tailored to it: I find this noticeable, and shall want to improve, but maybe nobody else will notice. So... now remove most of the old swap vector code from shmem.c. But, for the moment, keep the simple i_direct vector of 16 pages, with simple accessors shmem_put_swap() and shmem_get_swap(), as a toy implementation to help mark where swap needs to be handled in subsequent patches. Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 2 - mm/shmem.c | 782 +++++------------------------------------------ 2 files changed, 84 insertions(+), 700 deletions(-) diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index aa08fa8..80b6952 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -17,9 +17,7 @@ struct shmem_inode_info { unsigned long flags; unsigned long alloced; /* data pages alloced to file */ unsigned long swapped; /* subtotal assigned to swap */ - unsigned long next_index; /* highest alloced index + 1 */ struct shared_policy policy; /* NUMA memory alloc policy */ - struct page *i_indirect; /* top indirect blocks page */ union { swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */ char inline_symlink[SHMEM_SYMLINK_INLINE_LEN]; diff --git a/mm/shmem.c b/mm/shmem.c index 5cc21f8..5574b00 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -66,37 +66,9 @@ static struct vfsmount *shm_mnt; #include #include -/* - * The maximum size of a shmem/tmpfs file is limited by the maximum size of - * its triple-indirect swap vector - see illustration at shmem_swp_entry(). - * - * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel, - * but one eighth of that on a 64-bit kernel. With 8kB page size, maximum - * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel, - * MAX_LFS_FILESIZE being then more restrictive than swap vector layout. - * - * We use / and * instead of shifts in the definitions below, so that the swap - * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE. - */ -#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) -#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) - -#define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) -#define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT) - -#define SHMEM_MAX_BYTES min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE) -#define SHMEM_MAX_INDEX ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT)) - #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) -/* info->flags needs VM_flags to handle pagein/truncate races efficiently */ -#define SHMEM_PAGEIN VM_READ -#define SHMEM_TRUNCATE VM_WRITE - -/* Definition to limit shmem_truncate's steps between cond_rescheds */ -#define LATENCY_LIMIT 64 - /* Pretend that each entry is of this size in directory's i_size */ #define BOGO_DIRENT_SIZE 20 @@ -107,7 +79,7 @@ struct shmem_xattr { char value[0]; }; -/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ +/* Flag allocation requirements to shmem_getpage */ enum sgp_type { SGP_READ, /* don't exceed i_size, don't allocate page */ SGP_CACHE, /* don't exceed i_size, may allocate page */ @@ -137,56 +109,6 @@ static inline int shmem_getpage(struct inode *inode, pgoff_t index, mapping_gfp_mask(inode->i_mapping), fault_type); } -static inline struct page *shmem_dir_alloc(gfp_t gfp_mask) -{ - /* - * The above definition of ENTRIES_PER_PAGE, and the use of - * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE: - * might be reconsidered if it ever diverges from PAGE_SIZE. - * - * Mobility flags are masked out as swap vectors cannot move - */ - return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO, - PAGE_CACHE_SHIFT-PAGE_SHIFT); -} - -static inline void shmem_dir_free(struct page *page) -{ - __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT); -} - -static struct page **shmem_dir_map(struct page *page) -{ - return (struct page **)kmap_atomic(page, KM_USER0); -} - -static inline void shmem_dir_unmap(struct page **dir) -{ - kunmap_atomic(dir, KM_USER0); -} - -static swp_entry_t *shmem_swp_map(struct page *page) -{ - return (swp_entry_t *)kmap_atomic(page, KM_USER1); -} - -static inline void shmem_swp_balance_unmap(void) -{ - /* - * When passing a pointer to an i_direct entry, to code which - * also handles indirect entries and so will shmem_swp_unmap, - * we must arrange for the preempt count to remain in balance. - * What kmap_atomic of a lowmem page does depends on config - * and architecture, so pretend to kmap_atomic some lowmem page. - */ - (void) kmap_atomic(ZERO_PAGE(0), KM_USER1); -} - -static inline void shmem_swp_unmap(swp_entry_t *entry) -{ - kunmap_atomic(entry, KM_USER1); -} - static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) { return sb->s_fs_info; @@ -303,468 +225,56 @@ static void shmem_recalc_inode(struct inode *inode) } } -/** - * shmem_swp_entry - find the swap vector position in the info structure - * @info: info structure for the inode - * @index: index of the page to find - * @page: optional page to add to the structure. Has to be preset to - * all zeros - * - * If there is no space allocated yet it will return NULL when - * page is NULL, else it will use the page for the needed block, - * setting it to NULL on return to indicate that it has been used. - * - * The swap vector is organized the following way: - * - * There are SHMEM_NR_DIRECT entries directly stored in the - * shmem_inode_info structure. So small files do not need an addional - * allocation. - * - * For pages with index > SHMEM_NR_DIRECT there is the pointer - * i_indirect which points to a page which holds in the first half - * doubly indirect blocks, in the second half triple indirect blocks: - * - * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the - * following layout (for SHMEM_NR_DIRECT == 16): - * - * i_indirect -> dir --> 16-19 - * | +-> 20-23 - * | - * +-->dir2 --> 24-27 - * | +-> 28-31 - * | +-> 32-35 - * | +-> 36-39 - * | - * +-->dir3 --> 40-43 - * +-> 44-47 - * +-> 48-51 - * +-> 52-55 - */ -static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page) -{ - unsigned long offset; - struct page **dir; - struct page *subdir; - - if (index < SHMEM_NR_DIRECT) { - shmem_swp_balance_unmap(); - return info->i_direct+index; - } - if (!info->i_indirect) { - if (page) { - info->i_indirect = *page; - *page = NULL; - } - return NULL; /* need another page */ - } - - index -= SHMEM_NR_DIRECT; - offset = index % ENTRIES_PER_PAGE; - index /= ENTRIES_PER_PAGE; - dir = shmem_dir_map(info->i_indirect); - - if (index >= ENTRIES_PER_PAGE/2) { - index -= ENTRIES_PER_PAGE/2; - dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE; - index %= ENTRIES_PER_PAGE; - subdir = *dir; - if (!subdir) { - if (page) { - *dir = *page; - *page = NULL; - } - shmem_dir_unmap(dir); - return NULL; /* need another page */ - } - shmem_dir_unmap(dir); - dir = shmem_dir_map(subdir); - } - - dir += index; - subdir = *dir; - if (!subdir) { - if (!page || !(subdir = *page)) { - shmem_dir_unmap(dir); - return NULL; /* need a page */ - } - *dir = subdir; - *page = NULL; - } - shmem_dir_unmap(dir); - return shmem_swp_map(subdir) + offset; -} - -static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value) +static void shmem_put_swap(struct shmem_inode_info *info, pgoff_t index, + swp_entry_t swap) { - long incdec = value? 1: -1; - - entry->val = value; - info->swapped += incdec; - if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) { - struct page *page = kmap_atomic_to_page(entry); - set_page_private(page, page_private(page) + incdec); - } -} - -/** - * shmem_swp_alloc - get the position of the swap entry for the page. - * @info: info structure for the inode - * @index: index of the page to find - * @sgp: check and recheck i_size? skip allocation? - * @gfp: gfp mask to use for any page allocation - * - * If the entry does not exist, allocate it. - */ -static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, - unsigned long index, enum sgp_type sgp, gfp_t gfp) -{ - struct inode *inode = &info->vfs_inode; - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - struct page *page = NULL; - swp_entry_t *entry; - - if (sgp != SGP_WRITE && - ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) - return ERR_PTR(-EINVAL); - - while (!(entry = shmem_swp_entry(info, index, &page))) { - if (sgp == SGP_READ) - return shmem_swp_map(ZERO_PAGE(0)); - /* - * Test used_blocks against 1 less max_blocks, since we have 1 data - * page (and perhaps indirect index pages) yet to allocate: - * a waste to allocate index if we cannot allocate data. - */ - if (sbinfo->max_blocks) { - if (percpu_counter_compare(&sbinfo->used_blocks, - sbinfo->max_blocks - 1) >= 0) - return ERR_PTR(-ENOSPC); - percpu_counter_inc(&sbinfo->used_blocks); - inode->i_blocks += BLOCKS_PER_PAGE; - } - - spin_unlock(&info->lock); - page = shmem_dir_alloc(gfp); - spin_lock(&info->lock); - - if (!page) { - shmem_free_blocks(inode, 1); - return ERR_PTR(-ENOMEM); - } - if (sgp != SGP_WRITE && - ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { - entry = ERR_PTR(-EINVAL); - break; - } - if (info->next_index <= index) - info->next_index = index + 1; - } - if (page) { - /* another task gave its page, or truncated the file */ - shmem_free_blocks(inode, 1); - shmem_dir_free(page); - } - if (info->next_index <= index && !IS_ERR(entry)) - info->next_index = index + 1; - return entry; -} - -/** - * shmem_free_swp - free some swap entries in a directory - * @dir: pointer to the directory - * @edir: pointer after last entry of the directory - * @punch_lock: pointer to spinlock when needed for the holepunch case - */ -static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir, - spinlock_t *punch_lock) -{ - spinlock_t *punch_unlock = NULL; - swp_entry_t *ptr; - int freed = 0; - - for (ptr = dir; ptr < edir; ptr++) { - if (ptr->val) { - if (unlikely(punch_lock)) { - punch_unlock = punch_lock; - punch_lock = NULL; - spin_lock(punch_unlock); - if (!ptr->val) - continue; - } - free_swap_and_cache(*ptr); - *ptr = (swp_entry_t){0}; - freed++; - } - } - if (punch_unlock) - spin_unlock(punch_unlock); - return freed; -} - -static int shmem_map_and_free_swp(struct page *subdir, int offset, - int limit, struct page ***dir, spinlock_t *punch_lock) -{ - swp_entry_t *ptr; - int freed = 0; - - ptr = shmem_swp_map(subdir); - for (; offset < limit; offset += LATENCY_LIMIT) { - int size = limit - offset; - if (size > LATENCY_LIMIT) - size = LATENCY_LIMIT; - freed += shmem_free_swp(ptr+offset, ptr+offset+size, - punch_lock); - if (need_resched()) { - shmem_swp_unmap(ptr); - if (*dir) { - shmem_dir_unmap(*dir); - *dir = NULL; - } - cond_resched(); - ptr = shmem_swp_map(subdir); - } - } - shmem_swp_unmap(ptr); - return freed; + if (index < SHMEM_NR_DIRECT) + info->i_direct[index] = swap; } -static void shmem_free_pages(struct list_head *next) +static swp_entry_t shmem_get_swap(struct shmem_inode_info *info, pgoff_t index) { - struct page *page; - int freed = 0; - - do { - page = container_of(next, struct page, lru); - next = next->next; - shmem_dir_free(page); - freed++; - if (freed >= LATENCY_LIMIT) { - cond_resched(); - freed = 0; - } - } while (next); + return (index < SHMEM_NR_DIRECT) ? + info->i_direct[index] : (swp_entry_t){0}; } -void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) +void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) { + struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); - unsigned long idx; - unsigned long size; - unsigned long limit; - unsigned long stage; - unsigned long diroff; - struct page **dir; - struct page *topdir; - struct page *middir; - struct page *subdir; - swp_entry_t *ptr; - LIST_HEAD(pages_to_free); - long nr_pages_to_free = 0; - long nr_swaps_freed = 0; - int offset; - int freed; - int punch_hole; - spinlock_t *needs_lock; - spinlock_t *punch_lock; - unsigned long upper_limit; + pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + pgoff_t end = (lend >> PAGE_CACHE_SHIFT); + pgoff_t index; + swp_entry_t swap; - truncate_inode_pages_range(inode->i_mapping, start, end); + truncate_inode_pages_range(mapping, lstart, lend); - inode->i_ctime = inode->i_mtime = CURRENT_TIME; - idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (idx >= info->next_index) - return; + if (end > SHMEM_NR_DIRECT) + end = SHMEM_NR_DIRECT; spin_lock(&info->lock); - info->flags |= SHMEM_TRUNCATE; - if (likely(end == (loff_t) -1)) { - limit = info->next_index; - upper_limit = SHMEM_MAX_INDEX; - info->next_index = idx; - needs_lock = NULL; - punch_hole = 0; - } else { - if (end + 1 >= inode->i_size) { /* we may free a little more */ - limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; - upper_limit = SHMEM_MAX_INDEX; - } else { - limit = (end + 1) >> PAGE_CACHE_SHIFT; - upper_limit = limit; - } - needs_lock = &info->lock; - punch_hole = 1; - } - - topdir = info->i_indirect; - if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) { - info->i_indirect = NULL; - nr_pages_to_free++; - list_add(&topdir->lru, &pages_to_free); - } - spin_unlock(&info->lock); - - if (info->swapped && idx < SHMEM_NR_DIRECT) { - ptr = info->i_direct; - size = limit; - if (size > SHMEM_NR_DIRECT) - size = SHMEM_NR_DIRECT; - nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock); - } - - /* - * If there are no indirect blocks or we are punching a hole - * below indirect blocks, nothing to be done. - */ - if (!topdir || limit <= SHMEM_NR_DIRECT) - goto done2; - - /* - * The truncation case has already dropped info->lock, and we're safe - * because i_size and next_index have already been lowered, preventing - * access beyond. But in the punch_hole case, we still need to take - * the lock when updating the swap directory, because there might be - * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or - * shmem_writepage. However, whenever we find we can remove a whole - * directory page (not at the misaligned start or end of the range), - * we first NULLify its pointer in the level above, and then have no - * need to take the lock when updating its contents: needs_lock and - * punch_lock (either pointing to info->lock or NULL) manage this. - */ - - upper_limit -= SHMEM_NR_DIRECT; - limit -= SHMEM_NR_DIRECT; - idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; - offset = idx % ENTRIES_PER_PAGE; - idx -= offset; - - dir = shmem_dir_map(topdir); - stage = ENTRIES_PER_PAGEPAGE/2; - if (idx < ENTRIES_PER_PAGEPAGE/2) { - middir = topdir; - diroff = idx/ENTRIES_PER_PAGE; - } else { - dir += ENTRIES_PER_PAGE/2; - dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE; - while (stage <= idx) - stage += ENTRIES_PER_PAGEPAGE; - middir = *dir; - if (*dir) { - diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % - ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; - if (!diroff && !offset && upper_limit >= stage) { - if (needs_lock) { - spin_lock(needs_lock); - *dir = NULL; - spin_unlock(needs_lock); - needs_lock = NULL; - } else - *dir = NULL; - nr_pages_to_free++; - list_add(&middir->lru, &pages_to_free); - } - shmem_dir_unmap(dir); - dir = shmem_dir_map(middir); - } else { - diroff = 0; - offset = 0; - idx = stage; + for (index = start; index < end; index++) { + swap = shmem_get_swap(info, index); + if (swap.val) { + free_swap_and_cache(swap); + shmem_put_swap(info, index, (swp_entry_t){0}); + info->swapped--; } } - for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) { - if (unlikely(idx == stage)) { - shmem_dir_unmap(dir); - dir = shmem_dir_map(topdir) + - ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; - while (!*dir) { - dir++; - idx += ENTRIES_PER_PAGEPAGE; - if (idx >= limit) - goto done1; - } - stage = idx + ENTRIES_PER_PAGEPAGE; - middir = *dir; - if (punch_hole) - needs_lock = &info->lock; - if (upper_limit >= stage) { - if (needs_lock) { - spin_lock(needs_lock); - *dir = NULL; - spin_unlock(needs_lock); - needs_lock = NULL; - } else - *dir = NULL; - nr_pages_to_free++; - list_add(&middir->lru, &pages_to_free); - } - shmem_dir_unmap(dir); - cond_resched(); - dir = shmem_dir_map(middir); - diroff = 0; - } - punch_lock = needs_lock; - subdir = dir[diroff]; - if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) { - if (needs_lock) { - spin_lock(needs_lock); - dir[diroff] = NULL; - spin_unlock(needs_lock); - punch_lock = NULL; - } else - dir[diroff] = NULL; - nr_pages_to_free++; - list_add(&subdir->lru, &pages_to_free); - } - if (subdir && page_private(subdir) /* has swap entries */) { - size = limit - idx; - if (size > ENTRIES_PER_PAGE) - size = ENTRIES_PER_PAGE; - freed = shmem_map_and_free_swp(subdir, - offset, size, &dir, punch_lock); - if (!dir) - dir = shmem_dir_map(middir); - nr_swaps_freed += freed; - if (offset || punch_lock) { - spin_lock(&info->lock); - set_page_private(subdir, - page_private(subdir) - freed); - spin_unlock(&info->lock); - } else - BUG_ON(page_private(subdir) != freed); - } - offset = 0; - } -done1: - shmem_dir_unmap(dir); -done2: - if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) { + if (mapping->nrpages) { + spin_unlock(&info->lock); /* - * Call truncate_inode_pages again: racing shmem_unuse_inode - * may have swizzled a page in from swap since - * truncate_pagecache or generic_delete_inode did it, before we - * lowered next_index. Also, though shmem_getpage checks - * i_size before adding to cache, no recheck after: so fix the - * narrow window there too. + * A page may have meanwhile sneaked in from swap. */ - truncate_inode_pages_range(inode->i_mapping, start, end); + truncate_inode_pages_range(mapping, lstart, lend); + spin_lock(&info->lock); } - spin_lock(&info->lock); - info->flags &= ~SHMEM_TRUNCATE; - info->swapped -= nr_swaps_freed; - if (nr_pages_to_free) - shmem_free_blocks(inode, nr_pages_to_free); shmem_recalc_inode(inode); spin_unlock(&info->lock); - /* - * Empty swap vector directory pages to be freed? - */ - if (!list_empty(&pages_to_free)) { - pages_to_free.prev->next = NULL; - shmem_free_pages(pages_to_free.next); - } + inode->i_ctime = inode->i_mtime = CURRENT_TIME; } EXPORT_SYMBOL_GPL(shmem_truncate_range); @@ -797,19 +307,6 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) if (page) unlock_page(page); } - /* - * Reset SHMEM_PAGEIN flag so that shmem_truncate can - * detect if any pages might have been added to cache - * after truncate_inode_pages. But we needn't bother - * if it's being fully truncated to zero-length: the - * nrpages check is efficient enough in that case. - */ - if (newsize) { - struct shmem_inode_info *info = SHMEM_I(inode); - spin_lock(&info->lock); - info->flags &= ~SHMEM_PAGEIN; - spin_unlock(&info->lock); - } } if (newsize != oldsize) { i_size_write(inode, newsize); @@ -859,106 +356,28 @@ static void shmem_evict_inode(struct inode *inode) end_writeback(inode); } -static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir) -{ - swp_entry_t *ptr; - - for (ptr = dir; ptr < edir; ptr++) { - if (ptr->val == entry.val) - return ptr - dir; - } - return -1; -} - static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) { - struct address_space *mapping; + struct address_space *mapping = info->vfs_inode.i_mapping; unsigned long idx; - unsigned long size; - unsigned long limit; - unsigned long stage; - struct page **dir; - struct page *subdir; - swp_entry_t *ptr; - int offset; int error; - idx = 0; - ptr = info->i_direct; - spin_lock(&info->lock); - if (!info->swapped) { - list_del_init(&info->swaplist); - goto lost2; - } - limit = info->next_index; - size = limit; - if (size > SHMEM_NR_DIRECT) - size = SHMEM_NR_DIRECT; - offset = shmem_find_swp(entry, ptr, ptr+size); - if (offset >= 0) { - shmem_swp_balance_unmap(); - goto found; - } - if (!info->i_indirect) - goto lost2; - - dir = shmem_dir_map(info->i_indirect); - stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2; - - for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) { - if (unlikely(idx == stage)) { - shmem_dir_unmap(dir-1); - if (cond_resched_lock(&info->lock)) { - /* check it has not been truncated */ - if (limit > info->next_index) { - limit = info->next_index; - if (idx >= limit) - goto lost2; - } - } - dir = shmem_dir_map(info->i_indirect) + - ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; - while (!*dir) { - dir++; - idx += ENTRIES_PER_PAGEPAGE; - if (idx >= limit) - goto lost1; - } - stage = idx + ENTRIES_PER_PAGEPAGE; - subdir = *dir; - shmem_dir_unmap(dir); - dir = shmem_dir_map(subdir); - } - subdir = *dir; - if (subdir && page_private(subdir)) { - ptr = shmem_swp_map(subdir); - size = limit - idx; - if (size > ENTRIES_PER_PAGE) - size = ENTRIES_PER_PAGE; - offset = shmem_find_swp(entry, ptr, ptr+size); - shmem_swp_unmap(ptr); - if (offset >= 0) { - shmem_dir_unmap(dir); - ptr = shmem_swp_map(subdir); - goto found; - } - } - } -lost1: - shmem_dir_unmap(dir-1); -lost2: - spin_unlock(&info->lock); + for (idx = 0; idx < SHMEM_NR_DIRECT; idx++) + if (shmem_get_swap(info, idx).val == entry.val) + goto found; return 0; found: - idx += offset; - ptr += offset; + spin_lock(&info->lock); + if (shmem_get_swap(info, idx).val != entry.val) { + spin_unlock(&info->lock); + return 0; + } /* * Move _head_ to start search for next from here. * But be careful: shmem_evict_inode checks list_empty without taking * mutex, and there's an instant in list_move_tail when info->swaplist - * would appear empty, if it were the only one on shmem_swaplist. We - * could avoid doing it if inode NULL; or use this minor optimization. + * would appear empty, if it were the only one on shmem_swaplist. */ if (shmem_swaplist.next != &info->swaplist) list_move_tail(&shmem_swaplist, &info->swaplist); @@ -968,19 +387,17 @@ found: * but also to hold up shmem_evict_inode(): so inode cannot be freed * beneath us (pagelock doesn't help until the page is in pagecache). */ - mapping = info->vfs_inode.i_mapping; error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); /* which does mem_cgroup_uncharge_cache_page on error */ if (error != -ENOMEM) { delete_from_swap_cache(page); set_page_dirty(page); - info->flags |= SHMEM_PAGEIN; - shmem_swp_set(info, ptr, 0); + shmem_put_swap(info, idx, (swp_entry_t){0}); + info->swapped--; swap_free(entry); error = 1; /* not an error, but entry was found */ } - shmem_swp_unmap(ptr); spin_unlock(&info->lock); return error; } @@ -1017,7 +434,14 @@ int shmem_unuse(swp_entry_t entry, struct page *page) mutex_lock(&shmem_swaplist_mutex); list_for_each_safe(p, next, &shmem_swaplist) { info = list_entry(p, struct shmem_inode_info, swaplist); - found = shmem_unuse_inode(info, entry, page); + if (!info->swapped) { + spin_lock(&info->lock); + if (!info->swapped) + list_del_init(&info->swaplist); + spin_unlock(&info->lock); + } + if (info->swapped) + found = shmem_unuse_inode(info, entry, page); cond_resched(); if (found) break; @@ -1041,7 +465,7 @@ out: static int shmem_writepage(struct page *page, struct writeback_control *wbc) { struct shmem_inode_info *info; - swp_entry_t *entry, swap; + swp_entry_t swap, oswap; struct address_space *mapping; unsigned long index; struct inode *inode; @@ -1067,6 +491,15 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ goto redirty; } + + /* + * Just for this patch, we have a toy implementation, + * which can swap out only the first SHMEM_NR_DIRECT pages: + * for simple demonstration of where we need to think about swap. + */ + if (index >= SHMEM_NR_DIRECT) + goto redirty; + swap = get_swap_page(); if (!swap.val) goto redirty; @@ -1087,22 +520,19 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) spin_lock(&info->lock); mutex_unlock(&shmem_swaplist_mutex); - if (index >= info->next_index) { - BUG_ON(!(info->flags & SHMEM_TRUNCATE)); - goto unlock; - } - entry = shmem_swp_entry(info, index, NULL); - if (entry->val) { + oswap = shmem_get_swap(info, index); + if (oswap.val) { WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ - free_swap_and_cache(*entry); - shmem_swp_set(info, entry, 0); + free_swap_and_cache(oswap); + shmem_put_swap(info, index, (swp_entry_t){0}); + info->swapped--; } shmem_recalc_inode(inode); if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { delete_from_page_cache(page); - shmem_swp_set(info, entry, swap.val); - shmem_swp_unmap(entry); + shmem_put_swap(info, index, swap); + info->swapped++; swap_shmem_alloc(swap); spin_unlock(&info->lock); BUG_ON(page_mapped(page)); @@ -1110,13 +540,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) return 0; } - shmem_swp_unmap(entry); -unlock: spin_unlock(&info->lock); - /* - * add_to_swap_cache() doesn't return -EEXIST, so we can safely - * clear SWAP_HAS_CACHE flag. - */ swapcache_free(swap, NULL); redirty: set_page_dirty(page); @@ -1230,12 +654,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx, struct shmem_sb_info *sbinfo; struct page *page; struct page *prealloc_page = NULL; - swp_entry_t *entry; swp_entry_t swap; int error; - int ret; - if (idx >= SHMEM_MAX_INDEX) + if (idx > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) return -EFBIG; repeat: page = find_lock_page(mapping, idx); @@ -1272,37 +694,22 @@ repeat: spin_lock(&info->lock); shmem_recalc_inode(inode); - entry = shmem_swp_alloc(info, idx, sgp, gfp); - if (IS_ERR(entry)) { - spin_unlock(&info->lock); - error = PTR_ERR(entry); - goto out; - } - swap = *entry; - + swap = shmem_get_swap(info, idx); if (swap.val) { /* Look it up and read it in.. */ page = lookup_swap_cache(swap); if (!page) { - shmem_swp_unmap(entry); spin_unlock(&info->lock); /* here we actually do the io */ if (fault_type) *fault_type |= VM_FAULT_MAJOR; page = shmem_swapin(swap, gfp, info, idx); if (!page) { - spin_lock(&info->lock); - entry = shmem_swp_alloc(info, idx, sgp, gfp); - if (IS_ERR(entry)) - error = PTR_ERR(entry); - else { - if (entry->val == swap.val) - error = -ENOMEM; - shmem_swp_unmap(entry); - } - spin_unlock(&info->lock); - if (error) + swp_entry_t nswap = shmem_get_swap(info, idx); + if (nswap.val == swap.val) { + error = -ENOMEM; goto out; + } goto repeat; } wait_on_page_locked(page); @@ -1312,14 +719,12 @@ repeat: /* We have to do this with page locked to prevent races */ if (!trylock_page(page)) { - shmem_swp_unmap(entry); spin_unlock(&info->lock); wait_on_page_locked(page); page_cache_release(page); goto repeat; } if (PageWriteback(page)) { - shmem_swp_unmap(entry); spin_unlock(&info->lock); wait_on_page_writeback(page); unlock_page(page); @@ -1327,7 +732,6 @@ repeat: goto repeat; } if (!PageUptodate(page)) { - shmem_swp_unmap(entry); spin_unlock(&info->lock); unlock_page(page); page_cache_release(page); @@ -1338,7 +742,6 @@ repeat: error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); if (error) { - shmem_swp_unmap(entry); spin_unlock(&info->lock); if (error == -ENOMEM) { /* @@ -1358,16 +761,14 @@ repeat: goto repeat; } - info->flags |= SHMEM_PAGEIN; - shmem_swp_set(info, entry, 0); - shmem_swp_unmap(entry); delete_from_swap_cache(page); + shmem_put_swap(info, idx, (swp_entry_t){0}); + info->swapped--; spin_unlock(&info->lock); set_page_dirty(page); swap_free(swap); } else if (sgp == SGP_READ) { - shmem_swp_unmap(entry); page = find_get_page(mapping, idx); if (page && !trylock_page(page)) { spin_unlock(&info->lock); @@ -1378,7 +779,6 @@ repeat: spin_unlock(&info->lock); } else if (prealloc_page) { - shmem_swp_unmap(entry); sbinfo = SHMEM_SB(inode->i_sb); if (sbinfo->max_blocks) { if (percpu_counter_compare(&sbinfo->used_blocks, @@ -1393,34 +793,24 @@ repeat: page = prealloc_page; prealloc_page = NULL; - entry = shmem_swp_alloc(info, idx, sgp, gfp); - if (IS_ERR(entry)) - error = PTR_ERR(entry); - else { - swap = *entry; - shmem_swp_unmap(entry); - } - ret = error || swap.val; - if (ret) + swap = shmem_get_swap(info, idx); + if (swap.val) mem_cgroup_uncharge_cache_page(page); else - ret = add_to_page_cache_lru(page, mapping, + error = add_to_page_cache_lru(page, mapping, idx, GFP_NOWAIT); /* * At add_to_page_cache_lru() failure, * uncharge will be done automatically. */ - if (ret) { + if (swap.val || error) { shmem_unacct_blocks(info->flags, 1); shmem_free_blocks(inode, 1); spin_unlock(&info->lock); page_cache_release(page); - if (error) - goto out; goto repeat; } - info->flags |= SHMEM_PAGEIN; info->alloced++; spin_unlock(&info->lock); clear_highpage(page); @@ -2627,7 +2017,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) goto failed; sbinfo->free_inodes = sbinfo->max_inodes; - sb->s_maxbytes = SHMEM_MAX_BYTES; + sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = TMPFS_MAGIC; @@ -2863,7 +2253,7 @@ out4: void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, struct page **pagep, swp_entry_t *ent) { - swp_entry_t entry = { .val = 0 }, *ptr; + swp_entry_t entry = { .val = 0 }; struct page *page = NULL; struct shmem_inode_info *info = SHMEM_I(inode); @@ -2871,16 +2261,13 @@ void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, goto out; spin_lock(&info->lock); - ptr = shmem_swp_entry(info, pgoff, NULL); #ifdef CONFIG_SWAP - if (ptr && ptr->val) { - entry.val = ptr->val; + entry = shmem_get_swap(info, pgoff); + if (entry.val) page = find_get_page(&swapper_space, entry.val); - } else + else #endif page = find_get_page(inode->i_mapping, pgoff); - if (ptr) - shmem_swp_unmap(ptr); spin_unlock(&info->lock); out: *pagep = page; @@ -2963,7 +2350,6 @@ out: #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) #define shmem_acct_size(flags, size) 0 #define shmem_unacct_size(flags, size) do {} while (0) -#define SHMEM_MAX_BYTES MAX_LFS_FILESIZE #endif /* CONFIG_SHMEM */ @@ -2987,7 +2373,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags if (IS_ERR(shm_mnt)) return (void *)shm_mnt; - if (size < 0 || size > SHMEM_MAX_BYTES) + if (size < 0 || size > MAX_LFS_FILESIZE) return ERR_PTR(-EINVAL); if (shmem_acct_size(flags, size)) -- cgit v1.1 From 41ffe5d5ceef7f7ff2ff18e320d88ca6d629efaf Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 3 Aug 2011 16:21:21 -0700 Subject: tmpfs: miscellaneous trivial cleanups While it's at its least, make a number of boring nitpicky cleanups to shmem.c, mostly for consistency of variable naming. Things like "swap" instead of "entry", "pgoff_t index" instead of "unsigned long idx". And since everything else here is prefixed "shmem_", better change init_tmpfs() to shmem_init(). Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 2 +- init/main.c | 2 +- mm/shmem.c | 216 +++++++++++++++++++++++------------------------ 3 files changed, 109 insertions(+), 111 deletions(-) diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 80b6952..3f05795 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -47,7 +47,7 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) /* * Functions in mm/shmem.c called directly from elsewhere: */ -extern int init_tmpfs(void); +extern int shmem_init(void); extern int shmem_fill_super(struct super_block *sb, void *data, int silent); extern struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); diff --git a/init/main.c b/init/main.c index d7211fa..1952d37 100644 --- a/init/main.c +++ b/init/main.c @@ -715,7 +715,7 @@ static void __init do_basic_setup(void) { cpuset_init_smp(); usermodehelper_init(); - init_tmpfs(); + shmem_init(); driver_init(); init_irq_proc(); do_ctors(); diff --git a/mm/shmem.c b/mm/shmem.c index 5574b00..24e95ac 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -28,7 +28,6 @@ #include #include #include -#include #include static struct vfsmount *shm_mnt; @@ -51,6 +50,7 @@ static struct vfsmount *shm_mnt; #include #include #include +#include #include #include #include @@ -63,7 +63,6 @@ static struct vfsmount *shm_mnt; #include #include -#include #include #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) @@ -201,7 +200,7 @@ static void shmem_free_inode(struct super_block *sb) } /** - * shmem_recalc_inode - recalculate the size of an inode + * shmem_recalc_inode - recalculate the block usage of an inode * @inode: inode to recalc * * We have to calculate the free blocks since the mm can drop @@ -356,19 +355,20 @@ static void shmem_evict_inode(struct inode *inode) end_writeback(inode); } -static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) +static int shmem_unuse_inode(struct shmem_inode_info *info, + swp_entry_t swap, struct page *page) { struct address_space *mapping = info->vfs_inode.i_mapping; - unsigned long idx; + pgoff_t index; int error; - for (idx = 0; idx < SHMEM_NR_DIRECT; idx++) - if (shmem_get_swap(info, idx).val == entry.val) + for (index = 0; index < SHMEM_NR_DIRECT; index++) + if (shmem_get_swap(info, index).val == swap.val) goto found; return 0; found: spin_lock(&info->lock); - if (shmem_get_swap(info, idx).val != entry.val) { + if (shmem_get_swap(info, index).val != swap.val) { spin_unlock(&info->lock); return 0; } @@ -387,15 +387,15 @@ found: * but also to hold up shmem_evict_inode(): so inode cannot be freed * beneath us (pagelock doesn't help until the page is in pagecache). */ - error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); + error = add_to_page_cache_locked(page, mapping, index, GFP_NOWAIT); /* which does mem_cgroup_uncharge_cache_page on error */ if (error != -ENOMEM) { delete_from_swap_cache(page); set_page_dirty(page); - shmem_put_swap(info, idx, (swp_entry_t){0}); + shmem_put_swap(info, index, (swp_entry_t){0}); info->swapped--; - swap_free(entry); + swap_free(swap); error = 1; /* not an error, but entry was found */ } spin_unlock(&info->lock); @@ -405,9 +405,9 @@ found: /* * shmem_unuse() search for an eventually swapped out shmem page. */ -int shmem_unuse(swp_entry_t entry, struct page *page) +int shmem_unuse(swp_entry_t swap, struct page *page) { - struct list_head *p, *next; + struct list_head *this, *next; struct shmem_inode_info *info; int found = 0; int error; @@ -432,8 +432,8 @@ int shmem_unuse(swp_entry_t entry, struct page *page) radix_tree_preload_end(); mutex_lock(&shmem_swaplist_mutex); - list_for_each_safe(p, next, &shmem_swaplist) { - info = list_entry(p, struct shmem_inode_info, swaplist); + list_for_each_safe(this, next, &shmem_swaplist) { + info = list_entry(this, struct shmem_inode_info, swaplist); if (!info->swapped) { spin_lock(&info->lock); if (!info->swapped) @@ -441,7 +441,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page) spin_unlock(&info->lock); } if (info->swapped) - found = shmem_unuse_inode(info, entry, page); + found = shmem_unuse_inode(info, swap, page); cond_resched(); if (found) break; @@ -467,7 +467,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) struct shmem_inode_info *info; swp_entry_t swap, oswap; struct address_space *mapping; - unsigned long index; + pgoff_t index; struct inode *inode; BUG_ON(!PageLocked(page)); @@ -577,35 +577,33 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) } #endif /* CONFIG_TMPFS */ -static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, - struct shmem_inode_info *info, unsigned long idx) +static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, + struct shmem_inode_info *info, pgoff_t index) { struct mempolicy mpol, *spol; struct vm_area_struct pvma; - struct page *page; spol = mpol_cond_copy(&mpol, - mpol_shared_policy_lookup(&info->policy, idx)); + mpol_shared_policy_lookup(&info->policy, index)); /* Create a pseudo vma that just contains the policy */ pvma.vm_start = 0; - pvma.vm_pgoff = idx; + pvma.vm_pgoff = index; pvma.vm_ops = NULL; pvma.vm_policy = spol; - page = swapin_readahead(entry, gfp, &pvma, 0); - return page; + return swapin_readahead(swap, gfp, &pvma, 0); } static struct page *shmem_alloc_page(gfp_t gfp, - struct shmem_inode_info *info, unsigned long idx) + struct shmem_inode_info *info, pgoff_t index) { struct vm_area_struct pvma; /* Create a pseudo vma that just contains the policy */ pvma.vm_start = 0; - pvma.vm_pgoff = idx; + pvma.vm_pgoff = index; pvma.vm_ops = NULL; - pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); + pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); /* * alloc_page_vma() will drop the shared policy reference @@ -614,19 +612,19 @@ static struct page *shmem_alloc_page(gfp_t gfp, } #else /* !CONFIG_NUMA */ #ifdef CONFIG_TMPFS -static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p) +static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) { } #endif /* CONFIG_TMPFS */ -static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, - struct shmem_inode_info *info, unsigned long idx) +static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, + struct shmem_inode_info *info, pgoff_t index) { - return swapin_readahead(entry, gfp, NULL, 0); + return swapin_readahead(swap, gfp, NULL, 0); } static inline struct page *shmem_alloc_page(gfp_t gfp, - struct shmem_inode_info *info, unsigned long idx) + struct shmem_inode_info *info, pgoff_t index) { return alloc_page(gfp); } @@ -646,7 +644,7 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) * vm. If we swap it in we mark it dirty since we also free the swap * entry since a page cannot live in both the swap and page cache */ -static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx, +static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type) { struct address_space *mapping = inode->i_mapping; @@ -657,10 +655,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx, swp_entry_t swap; int error; - if (idx > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) + if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) return -EFBIG; repeat: - page = find_lock_page(mapping, idx); + page = find_lock_page(mapping, index); if (page) { /* * Once we can get the page lock, it must be uptodate: @@ -681,7 +679,7 @@ repeat: radix_tree_preload_end(); if (sgp != SGP_READ && !prealloc_page) { - prealloc_page = shmem_alloc_page(gfp, info, idx); + prealloc_page = shmem_alloc_page(gfp, info, index); if (prealloc_page) { SetPageSwapBacked(prealloc_page); if (mem_cgroup_cache_charge(prealloc_page, @@ -694,7 +692,7 @@ repeat: spin_lock(&info->lock); shmem_recalc_inode(inode); - swap = shmem_get_swap(info, idx); + swap = shmem_get_swap(info, index); if (swap.val) { /* Look it up and read it in.. */ page = lookup_swap_cache(swap); @@ -703,9 +701,9 @@ repeat: /* here we actually do the io */ if (fault_type) *fault_type |= VM_FAULT_MAJOR; - page = shmem_swapin(swap, gfp, info, idx); + page = shmem_swapin(swap, gfp, info, index); if (!page) { - swp_entry_t nswap = shmem_get_swap(info, idx); + swp_entry_t nswap = shmem_get_swap(info, index); if (nswap.val == swap.val) { error = -ENOMEM; goto out; @@ -740,7 +738,7 @@ repeat: } error = add_to_page_cache_locked(page, mapping, - idx, GFP_NOWAIT); + index, GFP_NOWAIT); if (error) { spin_unlock(&info->lock); if (error == -ENOMEM) { @@ -762,14 +760,14 @@ repeat: } delete_from_swap_cache(page); - shmem_put_swap(info, idx, (swp_entry_t){0}); + shmem_put_swap(info, index, (swp_entry_t){0}); info->swapped--; spin_unlock(&info->lock); set_page_dirty(page); swap_free(swap); } else if (sgp == SGP_READ) { - page = find_get_page(mapping, idx); + page = find_get_page(mapping, index); if (page && !trylock_page(page)) { spin_unlock(&info->lock); wait_on_page_locked(page); @@ -793,12 +791,12 @@ repeat: page = prealloc_page; prealloc_page = NULL; - swap = shmem_get_swap(info, idx); + swap = shmem_get_swap(info, index); if (swap.val) mem_cgroup_uncharge_cache_page(page); else error = add_to_page_cache_lru(page, mapping, - idx, GFP_NOWAIT); + index, GFP_NOWAIT); /* * At add_to_page_cache_lru() failure, * uncharge will be done automatically. @@ -841,7 +839,7 @@ nospace: * but must also avoid reporting a spurious ENOSPC while working on a * full tmpfs. */ - page = find_get_page(mapping, idx); + page = find_get_page(mapping, index); spin_unlock(&info->lock); if (page) { page_cache_release(page); @@ -872,20 +870,20 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } #ifdef CONFIG_NUMA -static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new) +static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) { - struct inode *i = vma->vm_file->f_path.dentry->d_inode; - return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new); + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol); } static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, unsigned long addr) { - struct inode *i = vma->vm_file->f_path.dentry->d_inode; - unsigned long idx; + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + pgoff_t index; - idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx); + index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index); } #endif @@ -1016,7 +1014,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ { struct inode *inode = filp->f_path.dentry->d_inode; struct address_space *mapping = inode->i_mapping; - unsigned long index, offset; + pgoff_t index; + unsigned long offset; enum sgp_type sgp = SGP_READ; /* @@ -1032,7 +1031,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ for (;;) { struct page *page = NULL; - unsigned long end_index, nr, ret; + pgoff_t end_index; + unsigned long nr, ret; loff_t i_size = i_size_read(inode); end_index = i_size >> PAGE_CACHE_SHIFT; @@ -1270,8 +1270,9 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = NAME_MAX; if (sbinfo->max_blocks) { buf->f_blocks = sbinfo->max_blocks; - buf->f_bavail = buf->f_bfree = - sbinfo->max_blocks - percpu_counter_sum(&sbinfo->used_blocks); + buf->f_bavail = + buf->f_bfree = sbinfo->max_blocks - + percpu_counter_sum(&sbinfo->used_blocks); } if (sbinfo->max_inodes) { buf->f_files = sbinfo->max_inodes; @@ -1480,8 +1481,8 @@ static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *n static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd) { struct page *page = NULL; - int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); - nd_set_link(nd, res ? ERR_PTR(res) : kmap(page)); + int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); + nd_set_link(nd, error ? ERR_PTR(error) : kmap(page)); if (page) unlock_page(page); return page; @@ -1592,7 +1593,6 @@ out: return err; } - static const struct xattr_handler *shmem_xattr_handlers[] = { #ifdef CONFIG_TMPFS_POSIX_ACL &generic_acl_access_handler, @@ -2052,14 +2052,14 @@ static struct kmem_cache *shmem_inode_cachep; static struct inode *shmem_alloc_inode(struct super_block *sb) { - struct shmem_inode_info *p; - p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); - if (!p) + struct shmem_inode_info *info; + info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); + if (!info) return NULL; - return &p->vfs_inode; + return &info->vfs_inode; } -static void shmem_i_callback(struct rcu_head *head) +static void shmem_destroy_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); INIT_LIST_HEAD(&inode->i_dentry); @@ -2072,25 +2072,24 @@ static void shmem_destroy_inode(struct inode *inode) /* only struct inode is valid if it's an inline symlink */ mpol_free_shared_policy(&SHMEM_I(inode)->policy); } - call_rcu(&inode->i_rcu, shmem_i_callback); + call_rcu(&inode->i_rcu, shmem_destroy_callback); } -static void init_once(void *foo) +static void shmem_init_inode(void *foo) { - struct shmem_inode_info *p = (struct shmem_inode_info *) foo; - - inode_init_once(&p->vfs_inode); + struct shmem_inode_info *info = foo; + inode_init_once(&info->vfs_inode); } -static int init_inodecache(void) +static int shmem_init_inodecache(void) { shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", sizeof(struct shmem_inode_info), - 0, SLAB_PANIC, init_once); + 0, SLAB_PANIC, shmem_init_inode); return 0; } -static void destroy_inodecache(void) +static void shmem_destroy_inodecache(void) { kmem_cache_destroy(shmem_inode_cachep); } @@ -2187,21 +2186,20 @@ static const struct vm_operations_struct shmem_vm_ops = { #endif }; - static struct dentry *shmem_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return mount_nodev(fs_type, flags, data, shmem_fill_super); } -static struct file_system_type tmpfs_fs_type = { +static struct file_system_type shmem_fs_type = { .owner = THIS_MODULE, .name = "tmpfs", .mount = shmem_mount, .kill_sb = kill_litter_super, }; -int __init init_tmpfs(void) +int __init shmem_init(void) { int error; @@ -2209,18 +2207,18 @@ int __init init_tmpfs(void) if (error) goto out4; - error = init_inodecache(); + error = shmem_init_inodecache(); if (error) goto out3; - error = register_filesystem(&tmpfs_fs_type); + error = register_filesystem(&shmem_fs_type); if (error) { printk(KERN_ERR "Could not register tmpfs\n"); goto out2; } - shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER, - tmpfs_fs_type.name, NULL); + shm_mnt = vfs_kern_mount(&shmem_fs_type, MS_NOUSER, + shmem_fs_type.name, NULL); if (IS_ERR(shm_mnt)) { error = PTR_ERR(shm_mnt); printk(KERN_ERR "Could not kern_mount tmpfs\n"); @@ -2229,9 +2227,9 @@ int __init init_tmpfs(void) return 0; out1: - unregister_filesystem(&tmpfs_fs_type); + unregister_filesystem(&shmem_fs_type); out2: - destroy_inodecache(); + shmem_destroy_inodecache(); out3: bdi_destroy(&shmem_backing_dev_info); out4: @@ -2241,37 +2239,37 @@ out4: #ifdef CONFIG_CGROUP_MEM_RES_CTLR /** - * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file + * mem_cgroup_get_shmem_target - find page or swap assigned to the shmem file * @inode: the inode to be searched - * @pgoff: the offset to be searched + * @index: the page offset to be searched * @pagep: the pointer for the found page to be stored - * @ent: the pointer for the found swap entry to be stored + * @swapp: the pointer for the found swap entry to be stored * * If a page is found, refcount of it is incremented. Callers should handle * these refcount. */ -void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, - struct page **pagep, swp_entry_t *ent) +void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t index, + struct page **pagep, swp_entry_t *swapp) { - swp_entry_t entry = { .val = 0 }; - struct page *page = NULL; struct shmem_inode_info *info = SHMEM_I(inode); + struct page *page = NULL; + swp_entry_t swap = {0}; - if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) + if ((index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) goto out; spin_lock(&info->lock); #ifdef CONFIG_SWAP - entry = shmem_get_swap(info, pgoff); - if (entry.val) - page = find_get_page(&swapper_space, entry.val); + swap = shmem_get_swap(info, index); + if (swap.val) + page = find_get_page(&swapper_space, swap.val); else #endif - page = find_get_page(inode->i_mapping, pgoff); + page = find_get_page(inode->i_mapping, index); spin_unlock(&info->lock); out: *pagep = page; - *ent = entry; + *swapp = swap; } #endif @@ -2288,23 +2286,23 @@ out: #include -static struct file_system_type tmpfs_fs_type = { +static struct file_system_type shmem_fs_type = { .name = "tmpfs", .mount = ramfs_mount, .kill_sb = kill_litter_super, }; -int __init init_tmpfs(void) +int __init shmem_init(void) { - BUG_ON(register_filesystem(&tmpfs_fs_type) != 0); + BUG_ON(register_filesystem(&shmem_fs_type) != 0); - shm_mnt = kern_mount(&tmpfs_fs_type); + shm_mnt = kern_mount(&shmem_fs_type); BUG_ON(IS_ERR(shm_mnt)); return 0; } -int shmem_unuse(swp_entry_t entry, struct page *page) +int shmem_unuse(swp_entry_t swap, struct page *page) { return 0; } @@ -2314,34 +2312,34 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) return 0; } -void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) +void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) { - truncate_inode_pages_range(inode->i_mapping, start, end); + truncate_inode_pages_range(inode->i_mapping, lstart, lend); } EXPORT_SYMBOL_GPL(shmem_truncate_range); #ifdef CONFIG_CGROUP_MEM_RES_CTLR /** - * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file + * mem_cgroup_get_shmem_target - find page or swap assigned to the shmem file * @inode: the inode to be searched - * @pgoff: the offset to be searched + * @index: the page offset to be searched * @pagep: the pointer for the found page to be stored - * @ent: the pointer for the found swap entry to be stored + * @swapp: the pointer for the found swap entry to be stored * * If a page is found, refcount of it is incremented. Callers should handle * these refcount. */ -void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, - struct page **pagep, swp_entry_t *ent) +void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t index, + struct page **pagep, swp_entry_t *swapp) { struct page *page = NULL; - if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) + if ((index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) goto out; - page = find_get_page(inode->i_mapping, pgoff); + page = find_get_page(inode->i_mapping, index); out: *pagep = page; - *ent = (swp_entry_t){ .val = 0 }; + *swapp = (swp_entry_t){0}; } #endif -- cgit v1.1 From bda97eab0cc9c6385b9f26abdda6459f630f4513 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 3 Aug 2011 16:21:21 -0700 Subject: tmpfs: copy truncate_inode_pages_range Bring truncate.c's code for truncate_inode_pages_range() inline into shmem_truncate_range(), replacing its first call (there's a followup call below, but leave that one, it will disappear next). Don't play with it yet, apart from leaving out the cleancache flush, and (importantly) the nrpages == 0 skip, and moving shmem_setattr()'s partial page preparation into its partial page handling. Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 79 insertions(+), 20 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 24e95ac..e101c21 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -50,6 +50,7 @@ static struct vfsmount *shm_mnt; #include #include #include +#include #include #include #include @@ -242,11 +243,88 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); pgoff_t end = (lend >> PAGE_CACHE_SHIFT); + struct pagevec pvec; pgoff_t index; swp_entry_t swap; + int i; + + BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); + + pagevec_init(&pvec, 0); + index = start; + while (index <= end && pagevec_lookup(&pvec, mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { + mem_cgroup_uncharge_start(); + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; + + /* We rely upon deletion not changing page->index */ + index = page->index; + if (index > end) + break; + + if (!trylock_page(page)) + continue; + WARN_ON(page->index != index); + if (PageWriteback(page)) { + unlock_page(page); + continue; + } + truncate_inode_page(mapping, page); + unlock_page(page); + } + pagevec_release(&pvec); + mem_cgroup_uncharge_end(); + cond_resched(); + index++; + } + + if (partial) { + struct page *page = NULL; + shmem_getpage(inode, start - 1, &page, SGP_READ, NULL); + if (page) { + zero_user_segment(page, partial, PAGE_CACHE_SIZE); + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); + } + } + + index = start; + for ( ; ; ) { + cond_resched(); + if (!pagevec_lookup(&pvec, mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { + if (index == start) + break; + index = start; + continue; + } + if (index == start && pvec.pages[0]->index > end) { + pagevec_release(&pvec); + break; + } + mem_cgroup_uncharge_start(); + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; - truncate_inode_pages_range(mapping, lstart, lend); + /* We rely upon deletion not changing page->index */ + index = page->index; + if (index > end) + break; + + lock_page(page); + WARN_ON(page->index != index); + wait_on_page_writeback(page); + truncate_inode_page(mapping, page); + unlock_page(page); + } + pagevec_release(&pvec); + mem_cgroup_uncharge_end(); + index++; + } if (end > SHMEM_NR_DIRECT) end = SHMEM_NR_DIRECT; @@ -289,24 +367,7 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { loff_t oldsize = inode->i_size; loff_t newsize = attr->ia_size; - struct page *page = NULL; - if (newsize < oldsize) { - /* - * If truncating down to a partial page, then - * if that page is already allocated, hold it - * in memory until the truncation is over, so - * truncate_partial_page cannot miss it were - * it assigned to swap. - */ - if (newsize & (PAGE_CACHE_SIZE-1)) { - (void) shmem_getpage(inode, - newsize >> PAGE_CACHE_SHIFT, - &page, SGP_READ, NULL); - if (page) - unlock_page(page); - } - } if (newsize != oldsize) { i_size_write(inode, newsize); inode->i_ctime = inode->i_mtime = CURRENT_TIME; @@ -318,8 +379,6 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) /* unmap again to remove racily COWed private pages */ unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); } - if (page) - page_cache_release(page); } setattr_copy(inode, attr); -- cgit v1.1 From 7a5d0fbb29936fad7f17b1cb001b0c33a5f13328 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 3 Aug 2011 16:21:22 -0700 Subject: tmpfs: convert shmem_truncate_range to radix-swap Disable the toy swapping implementation in shmem_writepage() - it's hard to support two schemes at once - and convert shmem_truncate_range() to a lockless gang lookup of swap entries along with pages, freeing both. Since the second loop tightens its noose until all entries of either kind have been squeezed out (and we shall make sure that there's not an instant when neither is visible), there is no longer a need for yet another pass below. shmem_radix_tree_replace() compensates for the lockless lookup by checking that the expected entry is in place, under lock, before replacing it. Here it just deletes, but will be used in later patches to substitute swap entry for page or page for swap entry. Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 192 ++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 146 insertions(+), 46 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index e101c21..4439b7d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -238,6 +238,111 @@ static swp_entry_t shmem_get_swap(struct shmem_inode_info *info, pgoff_t index) info->i_direct[index] : (swp_entry_t){0}; } +/* + * Replace item expected in radix tree by a new item, while holding tree lock. + */ +static int shmem_radix_tree_replace(struct address_space *mapping, + pgoff_t index, void *expected, void *replacement) +{ + void **pslot; + void *item = NULL; + + VM_BUG_ON(!expected); + pslot = radix_tree_lookup_slot(&mapping->page_tree, index); + if (pslot) + item = radix_tree_deref_slot_protected(pslot, + &mapping->tree_lock); + if (item != expected) + return -ENOENT; + if (replacement) + radix_tree_replace_slot(pslot, replacement); + else + radix_tree_delete(&mapping->page_tree, index); + return 0; +} + +/* + * Like find_get_pages, but collecting swap entries as well as pages. + */ +static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping, + pgoff_t start, unsigned int nr_pages, + struct page **pages, pgoff_t *indices) +{ + unsigned int i; + unsigned int ret; + unsigned int nr_found; + + rcu_read_lock(); +restart: + nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, + (void ***)pages, indices, start, nr_pages); + ret = 0; + for (i = 0; i < nr_found; i++) { + struct page *page; +repeat: + page = radix_tree_deref_slot((void **)pages[i]); + if (unlikely(!page)) + continue; + if (radix_tree_exception(page)) { + if (radix_tree_exceptional_entry(page)) + goto export; + /* radix_tree_deref_retry(page) */ + goto restart; + } + if (!page_cache_get_speculative(page)) + goto repeat; + + /* Has the page moved? */ + if (unlikely(page != *((void **)pages[i]))) { + page_cache_release(page); + goto repeat; + } +export: + indices[ret] = indices[i]; + pages[ret] = page; + ret++; + } + if (unlikely(!ret && nr_found)) + goto restart; + rcu_read_unlock(); + return ret; +} + +/* + * Remove swap entry from radix tree, free the swap and its page cache. + */ +static int shmem_free_swap(struct address_space *mapping, + pgoff_t index, void *radswap) +{ + int error; + + spin_lock_irq(&mapping->tree_lock); + error = shmem_radix_tree_replace(mapping, index, radswap, NULL); + spin_unlock_irq(&mapping->tree_lock); + if (!error) + free_swap_and_cache(radix_to_swp_entry(radswap)); + return error; +} + +/* + * Pagevec may contain swap entries, so shuffle up pages before releasing. + */ +static void shmem_pagevec_release(struct pagevec *pvec) +{ + int i, j; + + for (i = 0, j = 0; i < pagevec_count(pvec); i++) { + struct page *page = pvec->pages[i]; + if (!radix_tree_exceptional_entry(page)) + pvec->pages[j++] = page; + } + pvec->nr = j; + pagevec_release(pvec); +} + +/* + * Remove range of pages and swap entries from radix tree, and free them. + */ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) { struct address_space *mapping = inode->i_mapping; @@ -246,36 +351,44 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); pgoff_t end = (lend >> PAGE_CACHE_SHIFT); struct pagevec pvec; + pgoff_t indices[PAGEVEC_SIZE]; + long nr_swaps_freed = 0; pgoff_t index; - swp_entry_t swap; int i; BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); pagevec_init(&pvec, 0); index = start; - while (index <= end && pagevec_lookup(&pvec, mapping, index, - min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { + while (index <= end) { + pvec.nr = shmem_find_get_pages_and_swap(mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, + pvec.pages, indices); + if (!pvec.nr) + break; mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - /* We rely upon deletion not changing page->index */ - index = page->index; + index = indices[i]; if (index > end) break; - if (!trylock_page(page)) + if (radix_tree_exceptional_entry(page)) { + nr_swaps_freed += !shmem_free_swap(mapping, + index, page); continue; - WARN_ON(page->index != index); - if (PageWriteback(page)) { - unlock_page(page); + } + + if (!trylock_page(page)) continue; + if (page->mapping == mapping) { + VM_BUG_ON(PageWriteback(page)); + truncate_inode_page(mapping, page); } - truncate_inode_page(mapping, page); unlock_page(page); } - pagevec_release(&pvec); + shmem_pagevec_release(&pvec); mem_cgroup_uncharge_end(); cond_resched(); index++; @@ -295,59 +408,47 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) index = start; for ( ; ; ) { cond_resched(); - if (!pagevec_lookup(&pvec, mapping, index, - min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { + pvec.nr = shmem_find_get_pages_and_swap(mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, + pvec.pages, indices); + if (!pvec.nr) { if (index == start) break; index = start; continue; } - if (index == start && pvec.pages[0]->index > end) { - pagevec_release(&pvec); + if (index == start && indices[0] > end) { + shmem_pagevec_release(&pvec); break; } mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - /* We rely upon deletion not changing page->index */ - index = page->index; + index = indices[i]; if (index > end) break; + if (radix_tree_exceptional_entry(page)) { + nr_swaps_freed += !shmem_free_swap(mapping, + index, page); + continue; + } + lock_page(page); - WARN_ON(page->index != index); - wait_on_page_writeback(page); - truncate_inode_page(mapping, page); + if (page->mapping == mapping) { + VM_BUG_ON(PageWriteback(page)); + truncate_inode_page(mapping, page); + } unlock_page(page); } - pagevec_release(&pvec); + shmem_pagevec_release(&pvec); mem_cgroup_uncharge_end(); index++; } - if (end > SHMEM_NR_DIRECT) - end = SHMEM_NR_DIRECT; - spin_lock(&info->lock); - for (index = start; index < end; index++) { - swap = shmem_get_swap(info, index); - if (swap.val) { - free_swap_and_cache(swap); - shmem_put_swap(info, index, (swp_entry_t){0}); - info->swapped--; - } - } - - if (mapping->nrpages) { - spin_unlock(&info->lock); - /* - * A page may have meanwhile sneaked in from swap. - */ - truncate_inode_pages_range(mapping, lstart, lend); - spin_lock(&info->lock); - } - + info->swapped -= nr_swaps_freed; shmem_recalc_inode(inode); spin_unlock(&info->lock); @@ -552,11 +653,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) } /* - * Just for this patch, we have a toy implementation, - * which can swap out only the first SHMEM_NR_DIRECT pages: - * for simple demonstration of where we need to think about swap. + * Disable even the toy swapping implementation, while we convert + * functions one by one to having swap entries in the radix tree. */ - if (index >= SHMEM_NR_DIRECT) + if (index < ULONG_MAX) goto redirty; swap = get_swap_page(); -- cgit v1.1 From 46f65ec15c6878a2b4a49f6e01b20b201b46a9e4 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 3 Aug 2011 16:21:23 -0700 Subject: tmpfs: convert shmem_unuse_inode to radix-swap Convert shmem_unuse_inode() to use a lockless gang lookup of the radix tree, searching for matching swap. This is somewhat slower than the old method: because of repeated radix tree descents, because of copying entries up, but probably most because the old method noted and skipped once a vector page was cleared of swap. Perhaps we can devise a use of radix tree tagging to achieve that later. shmem_add_to_page_cache() uses shmem_radix_tree_replace() to compensate for the lockless lookup by checking that the expected entry is in place, under lock. It is not very satisfactory to be copying this much from add_to_page_cache_locked(), but I think easier to sell than insisting that every caller of add_to_page_cache*() go through the extras. Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 107 insertions(+), 26 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 4439b7d..174f971 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -262,6 +262,55 @@ static int shmem_radix_tree_replace(struct address_space *mapping, } /* + * Like add_to_page_cache_locked, but error if expected item has gone. + */ +static int shmem_add_to_page_cache(struct page *page, + struct address_space *mapping, + pgoff_t index, gfp_t gfp, void *expected) +{ + int error; + + VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON(!PageSwapBacked(page)); + + error = mem_cgroup_cache_charge(page, current->mm, + gfp & GFP_RECLAIM_MASK); + if (error) + goto out; + if (!expected) + error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); + if (!error) { + page_cache_get(page); + page->mapping = mapping; + page->index = index; + + spin_lock_irq(&mapping->tree_lock); + if (!expected) + error = radix_tree_insert(&mapping->page_tree, + index, page); + else + error = shmem_radix_tree_replace(mapping, index, + expected, page); + if (!error) { + mapping->nrpages++; + __inc_zone_page_state(page, NR_FILE_PAGES); + __inc_zone_page_state(page, NR_SHMEM); + spin_unlock_irq(&mapping->tree_lock); + } else { + page->mapping = NULL; + spin_unlock_irq(&mapping->tree_lock); + page_cache_release(page); + } + if (!expected) + radix_tree_preload_end(); + } + if (error) + mem_cgroup_uncharge_cache_page(page); +out: + return error; +} + +/* * Like find_get_pages, but collecting swap entries as well as pages. */ static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping, @@ -309,6 +358,42 @@ export: } /* + * Lockless lookup of swap entry in radix tree, avoiding refcount on pages. + */ +static pgoff_t shmem_find_swap(struct address_space *mapping, void *radswap) +{ + void **slots[PAGEVEC_SIZE]; + pgoff_t indices[PAGEVEC_SIZE]; + unsigned int nr_found; + +restart: + nr_found = 1; + indices[0] = -1; + while (nr_found) { + pgoff_t index = indices[nr_found - 1] + 1; + unsigned int i; + + rcu_read_lock(); + nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, + slots, indices, index, PAGEVEC_SIZE); + for (i = 0; i < nr_found; i++) { + void *item = radix_tree_deref_slot(slots[i]); + if (radix_tree_deref_retry(item)) { + rcu_read_unlock(); + goto restart; + } + if (item == radswap) { + rcu_read_unlock(); + return indices[i]; + } + } + rcu_read_unlock(); + cond_resched(); + } + return -1; +} + +/* * Remove swap entry from radix tree, free the swap and its page cache. */ static int shmem_free_swap(struct address_space *mapping, @@ -515,23 +600,21 @@ static void shmem_evict_inode(struct inode *inode) end_writeback(inode); } +/* + * If swap found in inode, free it and move page from swapcache to filecache. + */ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t swap, struct page *page) { struct address_space *mapping = info->vfs_inode.i_mapping; + void *radswap; pgoff_t index; int error; - for (index = 0; index < SHMEM_NR_DIRECT; index++) - if (shmem_get_swap(info, index).val == swap.val) - goto found; - return 0; -found: - spin_lock(&info->lock); - if (shmem_get_swap(info, index).val != swap.val) { - spin_unlock(&info->lock); + radswap = swp_to_radix_entry(swap); + index = shmem_find_swap(mapping, radswap); + if (index == -1) return 0; - } /* * Move _head_ to start search for next from here. @@ -547,23 +630,30 @@ found: * but also to hold up shmem_evict_inode(): so inode cannot be freed * beneath us (pagelock doesn't help until the page is in pagecache). */ - error = add_to_page_cache_locked(page, mapping, index, GFP_NOWAIT); + error = shmem_add_to_page_cache(page, mapping, index, + GFP_NOWAIT, radswap); /* which does mem_cgroup_uncharge_cache_page on error */ if (error != -ENOMEM) { + /* + * Truncation and eviction use free_swap_and_cache(), which + * only does trylock page: if we raced, best clean up here. + */ delete_from_swap_cache(page); set_page_dirty(page); - shmem_put_swap(info, index, (swp_entry_t){0}); - info->swapped--; - swap_free(swap); + if (!error) { + spin_lock(&info->lock); + info->swapped--; + spin_unlock(&info->lock); + swap_free(swap); + } error = 1; /* not an error, but entry was found */ } - spin_unlock(&info->lock); return error; } /* - * shmem_unuse() search for an eventually swapped out shmem page. + * Search through swapped inodes to find and replace swap by page. */ int shmem_unuse(swp_entry_t swap, struct page *page) { @@ -576,20 +666,12 @@ int shmem_unuse(swp_entry_t swap, struct page *page) * Charge page using GFP_KERNEL while we can wait, before taking * the shmem_swaplist_mutex which might hold up shmem_writepage(). * Charged back to the user (not to caller) when swap account is used. - * add_to_page_cache() will be called with GFP_NOWAIT. + * shmem_add_to_page_cache() will be called with GFP_NOWAIT. */ error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); if (error) goto out; - /* - * Try to preload while we can wait, to not make a habit of - * draining atomic reserves; but don't latch on to this cpu, - * it's okay if sometimes we get rescheduled after this. - */ - error = radix_tree_preload(GFP_KERNEL); - if (error) - goto uncharge; - radix_tree_preload_end(); + /* No radix_tree_preload: swap entry keeps a place for page in tree */ mutex_lock(&shmem_swaplist_mutex); list_for_each_safe(this, next, &shmem_swaplist) { @@ -608,7 +690,6 @@ int shmem_unuse(swp_entry_t swap, struct page *page) } mutex_unlock(&shmem_swaplist_mutex); -uncharge: if (!found) mem_cgroup_uncharge_cache_page(page); if (found < 0) -- cgit v1.1 From 54af60421822bb9cb664dd5cd7aac46c01ccfcf8 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 3 Aug 2011 16:21:24 -0700 Subject: tmpfs: convert shmem_getpage_gfp to radix-swap Convert shmem_getpage_gfp(), the engine-room of shmem, to expect page or swap entry returned from radix tree by find_lock_page(). Whereas the repetitive old method proceeded mainly under info->lock, dropping and repeating whenever one of the conditions needed was not met, now we can proceed without it, leaving shmem_add_to_page_cache() to check for a race. This way there is no need to preallocate a page, no need for an early radix_tree_preload(), no need for mem_cgroup_shmem_charge_fallback(). Move the error unwinding down to the bottom instead of repeating it throughout. ENOSPC handling is a little different from before: there is no longer any race between find_lock_page() and finding swap, but we can arrive at ENOSPC before calling shmem_recalc_inode(), which might occasionally discover freed space. Be stricter to check i_size before returning. info->lock is used for little but alloced, swapped, i_blocks updates. Move i_blocks updates out from under the max_blocks check, so even an unlimited size=0 mount can show accurate du. Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 259 ++++++++++++++++++++++++++----------------------------------- 1 file changed, 112 insertions(+), 147 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 174f971..92f01d7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -166,15 +166,6 @@ static struct backing_dev_info shmem_backing_dev_info __read_mostly = { static LIST_HEAD(shmem_swaplist); static DEFINE_MUTEX(shmem_swaplist_mutex); -static void shmem_free_blocks(struct inode *inode, long pages) -{ - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - if (sbinfo->max_blocks) { - percpu_counter_add(&sbinfo->used_blocks, -pages); - inode->i_blocks -= pages*BLOCKS_PER_PAGE; - } -} - static int shmem_reserve_inode(struct super_block *sb) { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); @@ -219,9 +210,12 @@ static void shmem_recalc_inode(struct inode *inode) freed = info->alloced - info->swapped - inode->i_mapping->nrpages; if (freed > 0) { + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + if (sbinfo->max_blocks) + percpu_counter_add(&sbinfo->used_blocks, -freed); info->alloced -= freed; + inode->i_blocks -= freed * BLOCKS_PER_PAGE; shmem_unacct_blocks(info->flags, freed); - shmem_free_blocks(inode, freed); } } @@ -888,205 +882,180 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type) { struct address_space *mapping = inode->i_mapping; - struct shmem_inode_info *info = SHMEM_I(inode); + struct shmem_inode_info *info; struct shmem_sb_info *sbinfo; struct page *page; - struct page *prealloc_page = NULL; swp_entry_t swap; int error; + int once = 0; if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) return -EFBIG; repeat: + swap.val = 0; page = find_lock_page(mapping, index); - if (page) { + if (radix_tree_exceptional_entry(page)) { + swap = radix_to_swp_entry(page); + page = NULL; + } + + if (sgp != SGP_WRITE && + ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { + error = -EINVAL; + goto failed; + } + + if (page || (sgp == SGP_READ && !swap.val)) { /* * Once we can get the page lock, it must be uptodate: * if there were an error in reading back from swap, * the page would not be inserted into the filecache. */ - BUG_ON(!PageUptodate(page)); - goto done; + BUG_ON(page && !PageUptodate(page)); + *pagep = page; + return 0; } /* - * Try to preload while we can wait, to not make a habit of - * draining atomic reserves; but don't latch on to this cpu. + * Fast cache lookup did not find it: + * bring it back from swap or allocate. */ - error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); - if (error) - goto out; - radix_tree_preload_end(); - - if (sgp != SGP_READ && !prealloc_page) { - prealloc_page = shmem_alloc_page(gfp, info, index); - if (prealloc_page) { - SetPageSwapBacked(prealloc_page); - if (mem_cgroup_cache_charge(prealloc_page, - current->mm, GFP_KERNEL)) { - page_cache_release(prealloc_page); - prealloc_page = NULL; - } - } - } + info = SHMEM_I(inode); + sbinfo = SHMEM_SB(inode->i_sb); - spin_lock(&info->lock); - shmem_recalc_inode(inode); - swap = shmem_get_swap(info, index); if (swap.val) { /* Look it up and read it in.. */ page = lookup_swap_cache(swap); if (!page) { - spin_unlock(&info->lock); /* here we actually do the io */ if (fault_type) *fault_type |= VM_FAULT_MAJOR; page = shmem_swapin(swap, gfp, info, index); if (!page) { - swp_entry_t nswap = shmem_get_swap(info, index); - if (nswap.val == swap.val) { - error = -ENOMEM; - goto out; - } - goto repeat; + error = -ENOMEM; + goto failed; } - wait_on_page_locked(page); - page_cache_release(page); - goto repeat; } /* We have to do this with page locked to prevent races */ - if (!trylock_page(page)) { - spin_unlock(&info->lock); - wait_on_page_locked(page); - page_cache_release(page); - goto repeat; - } - if (PageWriteback(page)) { - spin_unlock(&info->lock); - wait_on_page_writeback(page); - unlock_page(page); - page_cache_release(page); - goto repeat; - } + lock_page(page); if (!PageUptodate(page)) { - spin_unlock(&info->lock); - unlock_page(page); - page_cache_release(page); error = -EIO; - goto out; + goto failed; } - - error = add_to_page_cache_locked(page, mapping, - index, GFP_NOWAIT); - if (error) { - spin_unlock(&info->lock); - if (error == -ENOMEM) { - /* - * reclaim from proper memory cgroup and - * call memcg's OOM if needed. - */ - error = mem_cgroup_shmem_charge_fallback( - page, current->mm, gfp); - if (error) { - unlock_page(page); - page_cache_release(page); - goto out; - } - } - unlock_page(page); - page_cache_release(page); - goto repeat; + wait_on_page_writeback(page); + + /* Someone may have already done it for us */ + if (page->mapping) { + if (page->mapping == mapping && + page->index == index) + goto done; + error = -EEXIST; + goto failed; } - delete_from_swap_cache(page); - shmem_put_swap(info, index, (swp_entry_t){0}); + error = shmem_add_to_page_cache(page, mapping, index, + gfp, swp_to_radix_entry(swap)); + if (error) + goto failed; + + spin_lock(&info->lock); info->swapped--; + shmem_recalc_inode(inode); spin_unlock(&info->lock); + + delete_from_swap_cache(page); set_page_dirty(page); swap_free(swap); - } else if (sgp == SGP_READ) { - page = find_get_page(mapping, index); - if (page && !trylock_page(page)) { - spin_unlock(&info->lock); - wait_on_page_locked(page); - page_cache_release(page); - goto repeat; + } else { + if (shmem_acct_block(info->flags)) { + error = -ENOSPC; + goto failed; } - spin_unlock(&info->lock); - - } else if (prealloc_page) { - sbinfo = SHMEM_SB(inode->i_sb); if (sbinfo->max_blocks) { if (percpu_counter_compare(&sbinfo->used_blocks, - sbinfo->max_blocks) >= 0 || - shmem_acct_block(info->flags)) - goto nospace; + sbinfo->max_blocks) >= 0) { + error = -ENOSPC; + goto unacct; + } percpu_counter_inc(&sbinfo->used_blocks); - inode->i_blocks += BLOCKS_PER_PAGE; - } else if (shmem_acct_block(info->flags)) - goto nospace; - - page = prealloc_page; - prealloc_page = NULL; + } - swap = shmem_get_swap(info, index); - if (swap.val) - mem_cgroup_uncharge_cache_page(page); - else - error = add_to_page_cache_lru(page, mapping, - index, GFP_NOWAIT); - /* - * At add_to_page_cache_lru() failure, - * uncharge will be done automatically. - */ - if (swap.val || error) { - shmem_unacct_blocks(info->flags, 1); - shmem_free_blocks(inode, 1); - spin_unlock(&info->lock); - page_cache_release(page); - goto repeat; + page = shmem_alloc_page(gfp, info, index); + if (!page) { + error = -ENOMEM; + goto decused; } + SetPageSwapBacked(page); + __set_page_locked(page); + error = shmem_add_to_page_cache(page, mapping, index, + gfp, NULL); + if (error) + goto decused; + lru_cache_add_anon(page); + + spin_lock(&info->lock); info->alloced++; + inode->i_blocks += BLOCKS_PER_PAGE; + shmem_recalc_inode(inode); spin_unlock(&info->lock); + clear_highpage(page); flush_dcache_page(page); SetPageUptodate(page); if (sgp == SGP_DIRTY) set_page_dirty(page); - - } else { - spin_unlock(&info->lock); - error = -ENOMEM; - goto out; } done: - *pagep = page; - error = 0; -out: - if (prealloc_page) { - mem_cgroup_uncharge_cache_page(prealloc_page); - page_cache_release(prealloc_page); + /* Perhaps the file has been truncated since we checked */ + if (sgp != SGP_WRITE && + ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { + error = -EINVAL; + goto trunc; } - return error; + *pagep = page; + return 0; -nospace: /* - * Perhaps the page was brought in from swap between find_lock_page - * and taking info->lock? We allow for that at add_to_page_cache_lru, - * but must also avoid reporting a spurious ENOSPC while working on a - * full tmpfs. + * Error recovery. */ - page = find_get_page(mapping, index); +trunc: + ClearPageDirty(page); + delete_from_page_cache(page); + spin_lock(&info->lock); + info->alloced--; + inode->i_blocks -= BLOCKS_PER_PAGE; spin_unlock(&info->lock); +decused: + if (sbinfo->max_blocks) + percpu_counter_add(&sbinfo->used_blocks, -1); +unacct: + shmem_unacct_blocks(info->flags, 1); +failed: + if (swap.val && error != -EINVAL) { + struct page *test = find_get_page(mapping, index); + if (test && !radix_tree_exceptional_entry(test)) + page_cache_release(test); + /* Have another try if the entry has changed */ + if (test != swp_to_radix_entry(swap)) + error = -EEXIST; + } if (page) { + unlock_page(page); page_cache_release(page); + } + if (error == -ENOSPC && !once++) { + info = SHMEM_I(inode); + spin_lock(&info->lock); + shmem_recalc_inode(inode); + spin_unlock(&info->lock); goto repeat; } - error = -ENOSPC; - goto out; + if (error == -EEXIST) + goto repeat; + return error; } static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) @@ -1095,9 +1064,6 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) int error; int ret = VM_FAULT_LOCKED; - if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) - return VM_FAULT_SIGBUS; - error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); if (error) return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); @@ -2164,8 +2130,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) if (config.max_inodes < inodes) goto out; /* - * Those tests also disallow limited->unlimited while any are in - * use, so i_blocks will always be zero when max_blocks is zero; + * Those tests disallow limited->unlimited while any are in use; * but we must separately disallow unlimited->limited, because * in that case we have no record of how much is already in use. */ -- cgit v1.1 From aa3b189551ad8e5cc1d9c663735c131650238278 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 3 Aug 2011 16:21:24 -0700 Subject: tmpfs: convert mem_cgroup shmem to radix-swap Remove mem_cgroup_shmem_charge_fallback(): it was only required when we had to move swappage to filecache with GFP_NOWAIT. Remove the GFP_NOWAIT special case from mem_cgroup_cache_charge(), by moving its call out from shmem_add_to_page_cache() to two of thats three callers. But leave it doing mem_cgroup_uncharge_cache_page() on error: although asymmetrical, it's easier for all 3 callers to handle. These two changes would also be appropriate if anyone were to start using shmem_read_mapping_page_gfp() with GFP_NOWAIT. Remove mem_cgroup_get_shmem_target(): mc_handle_file_pte() can test radix_tree_exceptional_entry() to get what it needs for itself. Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 8 ----- include/linux/shmem_fs.h | 2 -- mm/memcontrol.c | 66 +++++------------------------------- mm/shmem.c | 83 ++++++---------------------------------------- 4 files changed, 20 insertions(+), 139 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b966007..3b535db 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -86,8 +86,6 @@ extern void mem_cgroup_uncharge_end(void); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); -extern int mem_cgroup_shmem_charge_fallback(struct page *page, - struct mm_struct *mm, gfp_t gfp_mask); extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); @@ -225,12 +223,6 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page) { } -static inline int mem_cgroup_shmem_charge_fallback(struct page *page, - struct mm_struct *mm, gfp_t gfp_mask) -{ - return 0; -} - static inline void mem_cgroup_add_lru_list(struct page *page, int lru) { } diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 3f05795..0c8e952 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -57,8 +57,6 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); extern int shmem_unuse(swp_entry_t entry, struct page *page); -extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, - struct page **pagep, swp_entry_t *ent); static inline struct page *shmem_read_mapping_page( struct address_space *mapping, pgoff_t index) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5f84d23..f4ec4e7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -2873,30 +2872,6 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, return 0; if (PageCompound(page)) return 0; - /* - * Corner case handling. This is called from add_to_page_cache() - * in usual. But some FS (shmem) precharges this page before calling it - * and call add_to_page_cache() with GFP_NOWAIT. - * - * For GFP_NOWAIT case, the page may be pre-charged before calling - * add_to_page_cache(). (See shmem.c) check it here and avoid to call - * charge twice. (It works but has to pay a bit larger cost.) - * And when the page is SwapCache, it should take swap information - * into account. This is under lock_page() now. - */ - if (!(gfp_mask & __GFP_WAIT)) { - struct page_cgroup *pc; - - pc = lookup_page_cgroup(page); - if (!pc) - return 0; - lock_page_cgroup(pc); - if (PageCgroupUsed(pc)) { - unlock_page_cgroup(pc); - return 0; - } - unlock_page_cgroup(pc); - } if (unlikely(!mm)) mm = &init_mm; @@ -3486,31 +3461,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, cgroup_release_and_wakeup_rmdir(&mem->css); } -/* - * A call to try to shrink memory usage on charge failure at shmem's swapin. - * Calling hierarchical_reclaim is not enough because we should update - * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM. - * Moreover considering hierarchy, we should reclaim from the mem_over_limit, - * not from the memcg which this page would be charged to. - * try_charge_swapin does all of these works properly. - */ -int mem_cgroup_shmem_charge_fallback(struct page *page, - struct mm_struct *mm, - gfp_t gfp_mask) -{ - struct mem_cgroup *mem; - int ret; - - if (mem_cgroup_disabled()) - return 0; - - ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); - if (!ret) - mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */ - - return ret; -} - #ifdef CONFIG_DEBUG_VM static struct page_cgroup *lookup_page_cgroup_used(struct page *page) { @@ -5330,15 +5280,17 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, pgoff = pte_to_pgoff(ptent); /* page is moved even if it's not RSS of this task(page-faulted). */ - if (!mapping_cap_swap_backed(mapping)) { /* normal file */ - page = find_get_page(mapping, pgoff); - } else { /* shmem/tmpfs file. we should take account of swap too. */ - swp_entry_t ent; - mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent); + page = find_get_page(mapping, pgoff); + +#ifdef CONFIG_SWAP + /* shmem/tmpfs may report page out on swap: account for that too. */ + if (radix_tree_exceptional_entry(page)) { + swp_entry_t swap = radix_to_swp_entry(page); if (do_swap_account) - entry->val = ent.val; + *entry = swap; + page = find_get_page(&swapper_space, swap.val); } - +#endif return page; } diff --git a/mm/shmem.c b/mm/shmem.c index 92f01d7..13ef2d7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -262,15 +262,11 @@ static int shmem_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp, void *expected) { - int error; + int error = 0; VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(!PageSwapBacked(page)); - error = mem_cgroup_cache_charge(page, current->mm, - gfp & GFP_RECLAIM_MASK); - if (error) - goto out; if (!expected) error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); if (!error) { @@ -300,7 +296,6 @@ static int shmem_add_to_page_cache(struct page *page, } if (error) mem_cgroup_uncharge_cache_page(page); -out: return error; } @@ -660,7 +655,6 @@ int shmem_unuse(swp_entry_t swap, struct page *page) * Charge page using GFP_KERNEL while we can wait, before taking * the shmem_swaplist_mutex which might hold up shmem_writepage(). * Charged back to the user (not to caller) when swap account is used. - * shmem_add_to_page_cache() will be called with GFP_NOWAIT. */ error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); if (error) @@ -954,8 +948,11 @@ repeat: goto failed; } - error = shmem_add_to_page_cache(page, mapping, index, - gfp, swp_to_radix_entry(swap)); + error = mem_cgroup_cache_charge(page, current->mm, + gfp & GFP_RECLAIM_MASK); + if (!error) + error = shmem_add_to_page_cache(page, mapping, index, + gfp, swp_to_radix_entry(swap)); if (error) goto failed; @@ -990,8 +987,11 @@ repeat: SetPageSwapBacked(page); __set_page_locked(page); - error = shmem_add_to_page_cache(page, mapping, index, - gfp, NULL); + error = mem_cgroup_cache_charge(page, current->mm, + gfp & GFP_RECLAIM_MASK); + if (!error) + error = shmem_add_to_page_cache(page, mapping, index, + gfp, NULL); if (error) goto decused; lru_cache_add_anon(page); @@ -2442,42 +2442,6 @@ out4: return error; } -#ifdef CONFIG_CGROUP_MEM_RES_CTLR -/** - * mem_cgroup_get_shmem_target - find page or swap assigned to the shmem file - * @inode: the inode to be searched - * @index: the page offset to be searched - * @pagep: the pointer for the found page to be stored - * @swapp: the pointer for the found swap entry to be stored - * - * If a page is found, refcount of it is incremented. Callers should handle - * these refcount. - */ -void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t index, - struct page **pagep, swp_entry_t *swapp) -{ - struct shmem_inode_info *info = SHMEM_I(inode); - struct page *page = NULL; - swp_entry_t swap = {0}; - - if ((index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) - goto out; - - spin_lock(&info->lock); -#ifdef CONFIG_SWAP - swap = shmem_get_swap(info, index); - if (swap.val) - page = find_get_page(&swapper_space, swap.val); - else -#endif - page = find_get_page(inode->i_mapping, index); - spin_unlock(&info->lock); -out: - *pagep = page; - *swapp = swap; -} -#endif - #else /* !CONFIG_SHMEM */ /* @@ -2523,31 +2487,6 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) } EXPORT_SYMBOL_GPL(shmem_truncate_range); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR -/** - * mem_cgroup_get_shmem_target - find page or swap assigned to the shmem file - * @inode: the inode to be searched - * @index: the page offset to be searched - * @pagep: the pointer for the found page to be stored - * @swapp: the pointer for the found swap entry to be stored - * - * If a page is found, refcount of it is incremented. Callers should handle - * these refcount. - */ -void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t index, - struct page **pagep, swp_entry_t *swapp) -{ - struct page *page = NULL; - - if ((index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) - goto out; - page = find_get_page(inode->i_mapping, index); -out: - *pagep = page; - *swapp = (swp_entry_t){0}; -} -#endif - #define shmem_vm_ops generic_file_vm_ops #define shmem_file_operations ramfs_file_operations #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) -- cgit v1.1 From 6922c0c7abd387374255801f7739624867e8acad Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 3 Aug 2011 16:21:25 -0700 Subject: tmpfs: convert shmem_writepage and enable swap Convert shmem_writepage() to use shmem_delete_from_page_cache() to use shmem_radix_tree_replace() to substitute swap entry for page pointer atomically in the radix tree. As with shmem_add_to_page_cache(), it's not entirely satisfactory to be copying such code from delete_from_swap_cache, but again judged easier to sell than making its other callers go through the extras. Remove the toy implementation's shmem_put_swap() and shmem_get_swap(), now unreferenced, and the hack to disable swap: it's now good to go. The way things have worked out, info->lock no longer helps to guard the shmem_swaplist: we increment swapped under shmem_swaplist_mutex only. That global mutex exclusion between shmem_writepage() and shmem_unuse() is not pretty, and we ought to find another way; but it's been forced on us by recent race discoveries, not a consequence of this patchset. And what has become of the WARN_ON_ONCE(1) free_swap_and_cache() if a swap entry was found already present? That's no longer possible, the (unknown) one inserting this page into filecache would hit the swap entry occupying that slot. Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 88 ++++++++++++++++++++++++++------------------------------------ 1 file changed, 37 insertions(+), 51 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 13ef2d7..0f094a2 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -6,7 +6,8 @@ * 2000-2001 Christoph Rohland * 2000-2001 SAP AG * 2002 Red Hat Inc. - * Copyright (C) 2002-2005 Hugh Dickins. + * Copyright (C) 2002-2011 Hugh Dickins. + * Copyright (C) 2011 Google Inc. * Copyright (C) 2002-2005 VERITAS Software Corporation. * Copyright (C) 2004 Andi Kleen, SuSE Labs * @@ -219,19 +220,6 @@ static void shmem_recalc_inode(struct inode *inode) } } -static void shmem_put_swap(struct shmem_inode_info *info, pgoff_t index, - swp_entry_t swap) -{ - if (index < SHMEM_NR_DIRECT) - info->i_direct[index] = swap; -} - -static swp_entry_t shmem_get_swap(struct shmem_inode_info *info, pgoff_t index) -{ - return (index < SHMEM_NR_DIRECT) ? - info->i_direct[index] : (swp_entry_t){0}; -} - /* * Replace item expected in radix tree by a new item, while holding tree lock. */ @@ -300,6 +288,25 @@ static int shmem_add_to_page_cache(struct page *page, } /* + * Like delete_from_page_cache, but substitutes swap for page. + */ +static void shmem_delete_from_page_cache(struct page *page, void *radswap) +{ + struct address_space *mapping = page->mapping; + int error; + + spin_lock_irq(&mapping->tree_lock); + error = shmem_radix_tree_replace(mapping, page->index, page, radswap); + page->mapping = NULL; + mapping->nrpages--; + __dec_zone_page_state(page, NR_FILE_PAGES); + __dec_zone_page_state(page, NR_SHMEM); + spin_unlock_irq(&mapping->tree_lock); + page_cache_release(page); + BUG_ON(error); +} + +/* * Like find_get_pages, but collecting swap entries as well as pages. */ static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping, @@ -664,14 +671,10 @@ int shmem_unuse(swp_entry_t swap, struct page *page) mutex_lock(&shmem_swaplist_mutex); list_for_each_safe(this, next, &shmem_swaplist) { info = list_entry(this, struct shmem_inode_info, swaplist); - if (!info->swapped) { - spin_lock(&info->lock); - if (!info->swapped) - list_del_init(&info->swaplist); - spin_unlock(&info->lock); - } if (info->swapped) found = shmem_unuse_inode(info, swap, page); + else + list_del_init(&info->swaplist); cond_resched(); if (found) break; @@ -694,10 +697,10 @@ out: static int shmem_writepage(struct page *page, struct writeback_control *wbc) { struct shmem_inode_info *info; - swp_entry_t swap, oswap; struct address_space *mapping; - pgoff_t index; struct inode *inode; + swp_entry_t swap; + pgoff_t index; BUG_ON(!PageLocked(page)); mapping = page->mapping; @@ -720,55 +723,38 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ goto redirty; } - - /* - * Disable even the toy swapping implementation, while we convert - * functions one by one to having swap entries in the radix tree. - */ - if (index < ULONG_MAX) - goto redirty; - swap = get_swap_page(); if (!swap.val) goto redirty; /* * Add inode to shmem_unuse()'s list of swapped-out inodes, - * if it's not already there. Do it now because we cannot take - * mutex while holding spinlock, and must do so before the page - * is moved to swap cache, when its pagelock no longer protects + * if it's not already there. Do it now before the page is + * moved to swap cache, when its pagelock no longer protects * the inode from eviction. But don't unlock the mutex until - * we've taken the spinlock, because shmem_unuse_inode() will - * prune a !swapped inode from the swaplist under both locks. + * we've incremented swapped, because shmem_unuse_inode() will + * prune a !swapped inode from the swaplist under this mutex. */ mutex_lock(&shmem_swaplist_mutex); if (list_empty(&info->swaplist)) list_add_tail(&info->swaplist, &shmem_swaplist); - spin_lock(&info->lock); - mutex_unlock(&shmem_swaplist_mutex); - - oswap = shmem_get_swap(info, index); - if (oswap.val) { - WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ - free_swap_and_cache(oswap); - shmem_put_swap(info, index, (swp_entry_t){0}); - info->swapped--; - } - shmem_recalc_inode(inode); - if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { - delete_from_page_cache(page); - shmem_put_swap(info, index, swap); - info->swapped++; swap_shmem_alloc(swap); + shmem_delete_from_page_cache(page, swp_to_radix_entry(swap)); + + spin_lock(&info->lock); + info->swapped++; + shmem_recalc_inode(inode); spin_unlock(&info->lock); + + mutex_unlock(&shmem_swaplist_mutex); BUG_ON(page_mapped(page)); swap_writepage(page, wbc); return 0; } - spin_unlock(&info->lock); + mutex_unlock(&shmem_swaplist_mutex); swapcache_free(swap, NULL); redirty: set_page_dirty(page); -- cgit v1.1 From 69f07ec938712b58755add82dd3d0b35f01317cc Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 3 Aug 2011 16:21:26 -0700 Subject: tmpfs: use kmemdup for short symlinks But we've not yet removed the old swp_entry_t i_direct[16] from shmem_inode_info. That's because it was still being shared with the inline symlink. Remove it now (saving 64 or 128 bytes from shmem inode size), and use kmemdup() for short symlinks, say, those up to 128 bytes. I wonder why mpol_free_shared_policy() is done in shmem_destroy_inode() rather than shmem_evict_inode(), where we usually do such freeing? I guess it doesn't matter, and I'm not into NUMA mpol testing right now. Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Reviewed-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 11 +++-------- mm/shmem.c | 31 ++++++++++++++++++------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 0c8e952..9291ac3 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -8,20 +8,15 @@ /* inode in-kernel data */ -#define SHMEM_NR_DIRECT 16 - -#define SHMEM_SYMLINK_INLINE_LEN (SHMEM_NR_DIRECT * sizeof(swp_entry_t)) - struct shmem_inode_info { spinlock_t lock; unsigned long flags; unsigned long alloced; /* data pages alloced to file */ - unsigned long swapped; /* subtotal assigned to swap */ - struct shared_policy policy; /* NUMA memory alloc policy */ union { - swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */ - char inline_symlink[SHMEM_SYMLINK_INLINE_LEN]; + unsigned long swapped; /* subtotal assigned to swap */ + char *symlink; /* unswappable short symlink */ }; + struct shared_policy policy; /* NUMA memory alloc policy */ struct list_head swaplist; /* chain of maybes on swap */ struct list_head xattr_list; /* list of shmem_xattr */ struct inode vfs_inode; diff --git a/mm/shmem.c b/mm/shmem.c index 0f094a2..3a5be0f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -73,6 +73,9 @@ static struct vfsmount *shm_mnt; /* Pretend that each entry is of this size in directory's i_size */ #define BOGO_DIRENT_SIZE 20 +/* Symlink up to this size is kmalloc'ed instead of using a swappable page */ +#define SHORT_SYMLINK_LEN 128 + struct shmem_xattr { struct list_head list; /* anchored by shmem_inode_info->xattr_list */ char *name; /* xattr name */ @@ -585,7 +588,8 @@ static void shmem_evict_inode(struct inode *inode) list_del_init(&info->swaplist); mutex_unlock(&shmem_swaplist_mutex); } - } + } else + kfree(info->symlink); list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) { kfree(xattr->name); @@ -1173,7 +1177,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode #ifdef CONFIG_TMPFS static const struct inode_operations shmem_symlink_inode_operations; -static const struct inode_operations shmem_symlink_inline_operations; +static const struct inode_operations shmem_short_symlink_operations; static int shmem_write_begin(struct file *file, struct address_space *mapping, @@ -1638,10 +1642,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s info = SHMEM_I(inode); inode->i_size = len-1; - if (len <= SHMEM_SYMLINK_INLINE_LEN) { - /* do it inline */ - memcpy(info->inline_symlink, symname, len); - inode->i_op = &shmem_symlink_inline_operations; + if (len <= SHORT_SYMLINK_LEN) { + info->symlink = kmemdup(symname, len, GFP_KERNEL); + if (!info->symlink) { + iput(inode); + return -ENOMEM; + } + inode->i_op = &shmem_short_symlink_operations; } else { error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); if (error) { @@ -1664,9 +1671,9 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s return 0; } -static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) +static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd) { - nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink); + nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink); return NULL; } @@ -1914,9 +1921,9 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size) } #endif /* CONFIG_TMPFS_XATTR */ -static const struct inode_operations shmem_symlink_inline_operations = { +static const struct inode_operations shmem_short_symlink_operations = { .readlink = generic_readlink, - .follow_link = shmem_follow_link_inline, + .follow_link = shmem_follow_short_symlink, #ifdef CONFIG_TMPFS_XATTR .setxattr = shmem_setxattr, .getxattr = shmem_getxattr, @@ -2259,10 +2266,8 @@ static void shmem_destroy_callback(struct rcu_head *head) static void shmem_destroy_inode(struct inode *inode) { - if ((inode->i_mode & S_IFMT) == S_IFREG) { - /* only struct inode is valid if it's an inline symlink */ + if ((inode->i_mode & S_IFMT) == S_IFREG) mpol_free_shared_policy(&SHMEM_I(inode)->policy); - } call_rcu(&inode->i_rcu, shmem_destroy_callback); } -- cgit v1.1 From 31475dd611209413bace21651a400afb91d0bd9d Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 3 Aug 2011 16:21:27 -0700 Subject: mm: a few small updates for radix-swap Remove PageSwapBacked (!page_is_file_cache) cases from add_to_page_cache_locked() and add_to_page_cache_lru(): those pages now go through shmem_add_to_page_cache(). Remove a comment on maximum tmpfs size from fsstack_copy_inode_size(), and add a comment on swap entries to invalidate_mapping_pages(). And mincore_page() uses find_get_page() on what might be shmem or a tmpfs file: allow for a radix_tree_exceptional_entry(), and proceed to find_get_page() on swapper_space if so (oh, swapper_space needs #ifdef). Signed-off-by: Hugh Dickins Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/stack.c | 5 +---- mm/filemap.c | 21 +++------------------ mm/mincore.c | 10 ++++++---- mm/truncate.c | 8 ++++++++ 4 files changed, 18 insertions(+), 26 deletions(-) diff --git a/fs/stack.c b/fs/stack.c index 4a6f7f4..b4f2ab48 100644 --- a/fs/stack.c +++ b/fs/stack.c @@ -29,10 +29,7 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src) * * We don't actually know what locking is used at the lower level; * but if it's a filesystem that supports quotas, it will be using - * i_lock as in inode_add_bytes(). tmpfs uses other locking, and - * its 32-bit is (just) able to exceed 2TB i_size with the aid of - * holes; but its i_blocks cannot carry into the upper long without - * almost 2TB swap - let's ignore that case. + * i_lock as in inode_add_bytes(). */ if (sizeof(i_blocks) > sizeof(long)) spin_lock(&src->i_lock); diff --git a/mm/filemap.c b/mm/filemap.c index 76bfb64..96778fa 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -33,7 +33,6 @@ #include #include /* for BUG_ON(!in_atomic()) only */ #include -#include /* for page_is_file_cache() */ #include #include "internal.h" @@ -462,6 +461,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, int error; VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON(PageSwapBacked(page)); error = mem_cgroup_cache_charge(page, current->mm, gfp_mask & GFP_RECLAIM_MASK); @@ -479,8 +479,6 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, if (likely(!error)) { mapping->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); - if (PageSwapBacked(page)) - __inc_zone_page_state(page, NR_SHMEM); spin_unlock_irq(&mapping->tree_lock); } else { page->mapping = NULL; @@ -502,22 +500,9 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, { int ret; - /* - * Splice_read and readahead add shmem/tmpfs pages into the page cache - * before shmem_readpage has a chance to mark them as SwapBacked: they - * need to go on the anon lru below, and mem_cgroup_cache_charge - * (called in add_to_page_cache) needs to know where they're going too. - */ - if (mapping_cap_swap_backed(mapping)) - SetPageSwapBacked(page); - ret = add_to_page_cache(page, mapping, offset, gfp_mask); - if (ret == 0) { - if (page_is_file_cache(page)) - lru_cache_add_file(page); - else - lru_cache_add_anon(page); - } + if (ret == 0) + lru_cache_add_file(page); return ret; } EXPORT_SYMBOL_GPL(add_to_page_cache_lru); diff --git a/mm/mincore.c b/mm/mincore.c index a4e6b9d..733f182 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -69,12 +69,14 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) * file will not get a swp_entry_t in its pte, but rather it is like * any other file mapping (ie. marked !present and faulted in with * tmpfs's .fault). So swapped out tmpfs mappings are tested here. - * - * However when tmpfs moves the page from pagecache and into swapcache, - * it is still in core, but the find_get_page below won't find it. - * No big deal, but make a note of it. */ page = find_get_page(mapping, pgoff); +#ifdef CONFIG_SWAP + if (radix_tree_exceptional_entry(page)) { + swp_entry_t swap = radix_to_swp_entry(page); + page = find_get_page(&swapper_space, swap.val); + } +#endif if (page) { present = PageUptodate(page); page_cache_release(page); diff --git a/mm/truncate.c b/mm/truncate.c index 232eb27..b40ac6d 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -336,6 +336,14 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, unsigned long count = 0; int i; + /* + * Note: this function may get called on a shmem/tmpfs mapping: + * pagevec_lookup() might then return 0 prematurely (because it + * got a gangful of swap entries); but it's hardly worth worrying + * about - it can rarely have anything to free from such a mapping + * (most pages are dirty), and already skips over any difficulties. + */ + pagevec_init(&pvec, 0); while (index <= end && pagevec_lookup(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { -- cgit v1.1 From e504f3fdd63d486d45b18009e5a65f2e329acb0a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 3 Aug 2011 16:21:27 -0700 Subject: tmpfs radix_tree: locate_item to speed up swapoff We have already acknowledged that swapoff of a tmpfs file is slower than it was before conversion to the generic radix_tree: a little slower there will be acceptable, if the hotter paths are faster. But it was a shock to find swapoff of a 500MB file 20 times slower on my laptop, taking 10 minutes; and at that rate it significantly slows down my testing. Now, most of that turned out to be overhead from PROVE_LOCKING and PROVE_RCU: without those it was only 4 times slower than before; and more realistic tests on other machines don't fare as badly. I've tried a number of things to improve it, including tagging the swap entries, then doing lookup by tag: I'd expected that to halve the time, but in practice it's erratic, and often counter-productive. The only change I've so far found to make a consistent improvement, is to short-circuit the way we go back and forth, gang lookup packing entries into the array supplied, then shmem scanning that array for the target entry. Scanning in place doubles the speed, so it's now only twice as slow as before (or three times slower when the PROVEs are on). So, add radix_tree_locate_item() as an expedient, once-off, single-caller hack to do the lookup directly in place. #ifdef it on CONFIG_SHMEM and CONFIG_SWAP, as much to document its limited applicability as save space in other configurations. And, sadly, #include sched.h for cond_resched(). Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 1 + lib/radix-tree.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++ mm/shmem.c | 38 +------------------ 3 files changed, 94 insertions(+), 37 deletions(-) diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index b7edf82..9d4539c 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -252,6 +252,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, unsigned long nr_to_tag, unsigned int fromtag, unsigned int totag); int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); +unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item); static inline void radix_tree_preload_end(void) { diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 348eaef..a2f9da5 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1203,6 +1203,98 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, } EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); +#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP) +#include /* for cond_resched() */ + +/* + * This linear search is at present only useful to shmem_unuse_inode(). + */ +static unsigned long __locate(struct radix_tree_node *slot, void *item, + unsigned long index, unsigned long *found_index) +{ + unsigned int shift, height; + unsigned long i; + + height = slot->height; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; + + for ( ; height > 1; height--) { + i = (index >> shift) & RADIX_TREE_MAP_MASK; + for (;;) { + if (slot->slots[i] != NULL) + break; + index &= ~((1UL << shift) - 1); + index += 1UL << shift; + if (index == 0) + goto out; /* 32-bit wraparound */ + i++; + if (i == RADIX_TREE_MAP_SIZE) + goto out; + } + + shift -= RADIX_TREE_MAP_SHIFT; + slot = rcu_dereference_raw(slot->slots[i]); + if (slot == NULL) + goto out; + } + + /* Bottom level: check items */ + for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { + if (slot->slots[i] == item) { + *found_index = index + i; + index = 0; + goto out; + } + } + index += RADIX_TREE_MAP_SIZE; +out: + return index; +} + +/** + * radix_tree_locate_item - search through radix tree for item + * @root: radix tree root + * @item: item to be found + * + * Returns index where item was found, or -1 if not found. + * Caller must hold no lock (since this time-consuming function needs + * to be preemptible), and must check afterwards if item is still there. + */ +unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) +{ + struct radix_tree_node *node; + unsigned long max_index; + unsigned long cur_index = 0; + unsigned long found_index = -1; + + do { + rcu_read_lock(); + node = rcu_dereference_raw(root->rnode); + if (!radix_tree_is_indirect_ptr(node)) { + rcu_read_unlock(); + if (node == item) + found_index = 0; + break; + } + + node = indirect_to_ptr(node); + max_index = radix_tree_maxindex(node->height); + if (cur_index > max_index) + break; + + cur_index = __locate(node, item, cur_index, &found_index); + rcu_read_unlock(); + cond_resched(); + } while (cur_index != 0 && cur_index <= max_index); + + return found_index; +} +#else +unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) +{ + return -1; +} +#endif /* CONFIG_SHMEM && CONFIG_SWAP */ /** * radix_tree_shrink - shrink height of a radix tree to minimal diff --git a/mm/shmem.c b/mm/shmem.c index 3a5be0f..1c702f6 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -357,42 +357,6 @@ export: } /* - * Lockless lookup of swap entry in radix tree, avoiding refcount on pages. - */ -static pgoff_t shmem_find_swap(struct address_space *mapping, void *radswap) -{ - void **slots[PAGEVEC_SIZE]; - pgoff_t indices[PAGEVEC_SIZE]; - unsigned int nr_found; - -restart: - nr_found = 1; - indices[0] = -1; - while (nr_found) { - pgoff_t index = indices[nr_found - 1] + 1; - unsigned int i; - - rcu_read_lock(); - nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, - slots, indices, index, PAGEVEC_SIZE); - for (i = 0; i < nr_found; i++) { - void *item = radix_tree_deref_slot(slots[i]); - if (radix_tree_deref_retry(item)) { - rcu_read_unlock(); - goto restart; - } - if (item == radswap) { - rcu_read_unlock(); - return indices[i]; - } - } - rcu_read_unlock(); - cond_resched(); - } - return -1; -} - -/* * Remove swap entry from radix tree, free the swap and its page cache. */ static int shmem_free_swap(struct address_space *mapping, @@ -612,7 +576,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, int error; radswap = swp_to_radix_entry(swap); - index = shmem_find_swap(mapping, radswap); + index = radix_tree_locate_item(&mapping->page_tree, radswap); if (index == -1) return 0; -- cgit v1.1 From 8079b1c859c44f27d63da4951f5038a16589a563 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 3 Aug 2011 16:21:28 -0700 Subject: mm: clarify the radix_tree exceptional cases Make the radix_tree exceptional cases, mostly in filemap.c, clearer. It's hard to devise a suitable snappy name that illuminates the use by shmem/tmpfs for swap, while keeping filemap/pagecache/radix_tree generality. And akpm points out that /* radix_tree_deref_retry(page) */ comments look like calls that have been commented out for unknown reason. Skirt the naming difficulty by rearranging these blocks to handle the transient radix_tree_deref_retry(page) case first; then just explain the remaining shmem/tmpfs swap case in a comment. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 66 ++++++++++++++++++++++++++++++++++++++++-------------------- mm/mincore.c | 1 + mm/shmem.c | 12 +++++++---- 3 files changed, 53 insertions(+), 26 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 96778fa..645a080 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -700,10 +700,14 @@ repeat: if (unlikely(!page)) goto out; if (radix_tree_exception(page)) { - if (radix_tree_exceptional_entry(page)) - goto out; - /* radix_tree_deref_retry(page) */ - goto repeat; + if (radix_tree_deref_retry(page)) + goto repeat; + /* + * Otherwise, shmem/tmpfs must be storing a swap entry + * here as an exceptional entry: so return it without + * attempting to raise page count. + */ + goto out; } if (!page_cache_get_speculative(page)) goto repeat; @@ -838,15 +842,21 @@ repeat: continue; if (radix_tree_exception(page)) { - if (radix_tree_exceptional_entry(page)) - continue; + if (radix_tree_deref_retry(page)) { + /* + * Transient condition which can only trigger + * when entry at index 0 moves out of or back + * to root: none yet gotten, safe to restart. + */ + WARN_ON(start | i); + goto restart; + } /* - * radix_tree_deref_retry(page): - * can only trigger when entry at index 0 moves out of - * or back to root: none yet gotten, safe to restart. + * Otherwise, shmem/tmpfs must be storing a swap entry + * here as an exceptional entry: so skip over it - + * we only reach this from invalidate_mapping_pages(). */ - WARN_ON(start | i); - goto restart; + continue; } if (!page_cache_get_speculative(page)) @@ -904,14 +914,20 @@ repeat: continue; if (radix_tree_exception(page)) { - if (radix_tree_exceptional_entry(page)) - break; + if (radix_tree_deref_retry(page)) { + /* + * Transient condition which can only trigger + * when entry at index 0 moves out of or back + * to root: none yet gotten, safe to restart. + */ + goto restart; + } /* - * radix_tree_deref_retry(page): - * can only trigger when entry at index 0 moves out of - * or back to root: none yet gotten, safe to restart. + * Otherwise, shmem/tmpfs must be storing a swap entry + * here as an exceptional entry: so stop looking for + * contiguous pages. */ - goto restart; + break; } if (!page_cache_get_speculative(page)) @@ -973,13 +989,19 @@ repeat: continue; if (radix_tree_exception(page)) { - BUG_ON(radix_tree_exceptional_entry(page)); + if (radix_tree_deref_retry(page)) { + /* + * Transient condition which can only trigger + * when entry at index 0 moves out of or back + * to root: none yet gotten, safe to restart. + */ + goto restart; + } /* - * radix_tree_deref_retry(page): - * can only trigger when entry at index 0 moves out of - * or back to root: none yet gotten, safe to restart. + * This function is never used on a shmem/tmpfs + * mapping, so a swap entry won't be found here. */ - goto restart; + BUG(); } if (!page_cache_get_speculative(page)) diff --git a/mm/mincore.c b/mm/mincore.c index 733f182..636a868 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -72,6 +72,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) */ page = find_get_page(mapping, pgoff); #ifdef CONFIG_SWAP + /* shmem/tmpfs may return swap: account for swapcache page too. */ if (radix_tree_exceptional_entry(page)) { swp_entry_t swap = radix_to_swp_entry(page); page = find_get_page(&swapper_space, swap.val); diff --git a/mm/shmem.c b/mm/shmem.c index 1c702f6..32f6763 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -332,10 +332,14 @@ repeat: if (unlikely(!page)) continue; if (radix_tree_exception(page)) { - if (radix_tree_exceptional_entry(page)) - goto export; - /* radix_tree_deref_retry(page) */ - goto restart; + if (radix_tree_deref_retry(page)) + goto restart; + /* + * Otherwise, we must be storing a swap entry + * here as an exceptional entry: so return it + * without attempting to raise page count. + */ + goto export; } if (!page_cache_get_speculative(page)) goto repeat; -- cgit v1.1 From 206506ccf04b6790d11553a0c8595d1bf65790fe Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Wed, 3 Aug 2011 16:21:29 -0700 Subject: tmpfs: expand "help" to explain value of TMPFS_POSIX_ACL Expand the fs/Kconfig "help" info to clarify why it's a bad idea to deselect the TMPFS_POSIX_ACL config variable. Signed-off-by: Robert P. J. Day Acked-by: Randy Dunlap Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Kconfig | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/Kconfig b/fs/Kconfig index 19891aa..9fe0b34 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -127,14 +127,21 @@ config TMPFS_POSIX_ACL select TMPFS_XATTR select GENERIC_ACL help - POSIX Access Control Lists (ACLs) support permissions for users and - groups beyond the owner/group/world scheme. + POSIX Access Control Lists (ACLs) support additional access rights + for users and groups beyond the standard owner/group/world scheme, + and this option selects support for ACLs specifically for tmpfs + filesystems. + + If you've selected TMPFS, it's possible that you'll also need + this option as there are a number of Linux distros that require + POSIX ACL support under /dev for certain features to work properly. + For example, some distros need this feature for ALSA-related /dev + files for sound to work properly. In short, if you're not sure, + say Y. To learn more about Access Control Lists, visit the POSIX ACLs for Linux website . - If you don't know what Access Control Lists are, say N. - config TMPFS_XATTR bool "Tmpfs extended attributes" depends on TMPFS -- cgit v1.1 From ebec9a7bf11f843b0602b06c402f04bf4213b35a Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 3 Aug 2011 09:22:54 -0700 Subject: drm: track CEA version number if present Drivers need to know the CEA version number in addition to other display info (like whether the display is an HDMI sink) before enabling certain features. So track the CEA version number in the display info structure. Signed-off-by: Jesse Barnes Signed-off-by: Keith Packard --- drivers/gpu/drm/drm_edid.c | 9 +++++++++ include/drm/drm_crtc.h | 2 ++ 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 0929219..e12f8b0 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -1439,6 +1439,8 @@ EXPORT_SYMBOL(drm_detect_monitor_audio); static void drm_add_display_info(struct edid *edid, struct drm_display_info *info) { + u8 *edid_ext; + info->width_mm = edid->width_cm * 10; info->height_mm = edid->height_cm * 10; @@ -1483,6 +1485,13 @@ static void drm_add_display_info(struct edid *edid, info->color_formats = DRM_COLOR_FORMAT_YCRCB444; if (info->color_formats & DRM_EDID_FEATURE_RGB_YCRCB422) info->color_formats = DRM_COLOR_FORMAT_YCRCB422; + + /* Get data from CEA blocks if present */ + edid_ext = drm_find_cea_extension(edid); + if (!edid_ext) + return; + + info->cea_rev = edid_ext[1]; } /** diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 33d12f8..d515bc8 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -205,6 +205,8 @@ struct drm_display_info { enum subpixel_order subpixel_order; u32 color_formats; + u8 cea_rev; + char *raw_edid; /* if any */ }; -- cgit v1.1 From 45187ace97f7b3deb559b25348ccb7e301c158c9 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 3 Aug 2011 09:22:55 -0700 Subject: drm/i915/hdmi: split infoframe setting from infoframe type code This makes it easier to add support for other infoframes (e.g. SPD, vendor specific). Signed-off-by: Jesse Barnes Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_drv.h | 2 + drivers/gpu/drm/i915/intel_hdmi.c | 149 ++++++++++++++++++++++++++------------ 3 files changed, 106 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index be67a59..56cae72 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1506,6 +1506,7 @@ #define VIDEO_DIP_SELECT_AVI (0 << 19) #define VIDEO_DIP_SELECT_VENDOR (1 << 19) #define VIDEO_DIP_SELECT_SPD (3 << 19) +#define VIDEO_DIP_SELECT_MASK (3 << 19) #define VIDEO_DIP_FREQ_ONCE (0 << 16) #define VIDEO_DIP_FREQ_VSYNC (1 << 16) #define VIDEO_DIP_FREQ_2VSYNC (2 << 16) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 6e990f9..2f47dcf 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -178,6 +178,8 @@ struct intel_crtc { #define to_intel_encoder(x) container_of(x, struct intel_encoder, base) #define to_intel_framebuffer(x) container_of(x, struct intel_framebuffer, base) +#define DIP_HEADER_SIZE 5 + #define DIP_TYPE_AVI 0x82 #define DIP_VERSION_AVI 0x2 #define DIP_LEN_AVI 13 diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 105e2b8..d43cebd 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -45,6 +45,8 @@ struct intel_hdmi { bool has_hdmi_sink; bool has_audio; int force_audio; + void (*write_infoframe)(struct drm_encoder *encoder, + struct dip_infoframe *frame); }; static struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder) @@ -58,37 +60,70 @@ static struct intel_hdmi *intel_attached_hdmi(struct drm_connector *connector) struct intel_hdmi, base); } -void intel_dip_infoframe_csum(struct dip_infoframe *avi_if) +void intel_dip_infoframe_csum(struct dip_infoframe *frame) { - uint8_t *data = (uint8_t *)avi_if; + uint8_t *data = (uint8_t *)frame; uint8_t sum = 0; unsigned i; - avi_if->checksum = 0; - avi_if->ecc = 0; + frame->checksum = 0; + frame->ecc = 0; - for (i = 0; i < sizeof(*avi_if); i++) + /* Header isn't part of the checksum */ + for (i = 5; i < frame->len; i++) sum += data[i]; - avi_if->checksum = 0x100 - sum; + frame->checksum = 0x100 - sum; } -static void intel_hdmi_set_avi_infoframe(struct drm_encoder *encoder) +static u32 intel_infoframe_index(struct dip_infoframe *frame) { - struct dip_infoframe avi_if = { - .type = DIP_TYPE_AVI, - .ver = DIP_VERSION_AVI, - .len = DIP_LEN_AVI, - }; - uint32_t *data = (uint32_t *)&avi_if; + u32 flags = 0; + + switch (frame->type) { + case DIP_TYPE_AVI: + flags |= VIDEO_DIP_SELECT_AVI; + break; + case DIP_TYPE_SPD: + flags |= VIDEO_DIP_SELECT_SPD; + break; + default: + DRM_DEBUG_DRIVER("unknown info frame type %d\n", frame->type); + break; + } + + return flags; +} + +static u32 intel_infoframe_flags(struct dip_infoframe *frame) +{ + u32 flags = 0; + + switch (frame->type) { + case DIP_TYPE_AVI: + flags |= VIDEO_DIP_ENABLE_AVI | VIDEO_DIP_FREQ_VSYNC; + break; + case DIP_TYPE_SPD: + flags |= VIDEO_DIP_ENABLE_SPD | VIDEO_DIP_FREQ_2VSYNC; + break; + default: + DRM_DEBUG_DRIVER("unknown info frame type %d\n", frame->type); + break; + } + + return flags; +} + +static void i9xx_write_infoframe(struct drm_encoder *encoder, + struct dip_infoframe *frame) +{ + uint32_t *data = (uint32_t *)frame; struct drm_device *dev = encoder->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); - u32 port; - unsigned i; + u32 port, flags, val = I915_READ(VIDEO_DIP_CTL); + unsigned i, len = DIP_HEADER_SIZE + frame->len; - if (!intel_hdmi->has_hdmi_sink) - return; /* XXX first guess at handling video port, is this corrent? */ if (intel_hdmi->sdvox_reg == SDVOB) @@ -98,52 +133,72 @@ static void intel_hdmi_set_avi_infoframe(struct drm_encoder *encoder) else return; - I915_WRITE(VIDEO_DIP_CTL, VIDEO_DIP_ENABLE | port | - VIDEO_DIP_SELECT_AVI | VIDEO_DIP_FREQ_VSYNC); + flags = intel_infoframe_index(frame); + + val &= ~VIDEO_DIP_SELECT_MASK; + + I915_WRITE(VIDEO_DIP_CTL, val | port | flags); - intel_dip_infoframe_csum(&avi_if); - for (i = 0; i < sizeof(avi_if); i += 4) { + for (i = 0; i < len; i += 4) { I915_WRITE(VIDEO_DIP_DATA, *data); data++; } - I915_WRITE(VIDEO_DIP_CTL, VIDEO_DIP_ENABLE | port | - VIDEO_DIP_SELECT_AVI | VIDEO_DIP_FREQ_VSYNC | - VIDEO_DIP_ENABLE_AVI); + flags |= intel_infoframe_flags(frame); + + I915_WRITE(VIDEO_DIP_CTL, VIDEO_DIP_ENABLE | val | port | flags); } -static void intel_ironlake_hdmi_set_avi_infoframe(struct drm_encoder *encoder) +static void ironlake_write_infoframe(struct drm_encoder *encoder, + struct dip_infoframe *frame) { - struct dip_infoframe avi_if = { - .type = DIP_TYPE_AVI, - .ver = DIP_VERSION_AVI, - .len = DIP_LEN_AVI, - }; - uint32_t *data = (uint32_t *)&avi_if; + uint32_t *data = (uint32_t *)frame; struct drm_device *dev = encoder->dev; struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); struct drm_crtc *crtc = encoder->crtc; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int reg = TVIDEO_DIP_CTL(intel_crtc->pipe); - unsigned i; - - if (!intel_hdmi->has_hdmi_sink) - return; + unsigned i, len = DIP_HEADER_SIZE + frame->len; + u32 flags, val = I915_READ(reg); intel_wait_for_vblank(dev, intel_crtc->pipe); - I915_WRITE(reg, VIDEO_DIP_SELECT_AVI); + flags = intel_infoframe_index(frame); - intel_dip_infoframe_csum(&avi_if); - for (i = 0; i < sizeof(avi_if); i += 4) { + val &= ~VIDEO_DIP_SELECT_MASK; + + I915_WRITE(reg, val | flags); + + for (i = 0; i < len; i += 4) { I915_WRITE(TVIDEO_DIP_DATA(intel_crtc->pipe), *data); data++; } - I915_WRITE(reg, VIDEO_DIP_ENABLE | VIDEO_DIP_SELECT_AVI | - VIDEO_DIP_FREQ_VSYNC | (DIP_LEN_AVI << 8) | - VIDEO_DIP_ENABLE_AVI); + flags |= intel_infoframe_flags(frame); + + I915_WRITE(reg, VIDEO_DIP_ENABLE | val | flags); +} +static void intel_set_infoframe(struct drm_encoder *encoder, + struct dip_infoframe *frame) +{ + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); + + if (!intel_hdmi->has_hdmi_sink) + return; + + intel_dip_infoframe_csum(frame); + intel_hdmi->write_infoframe(encoder, frame); +} + +static void intel_hdmi_set_avi_infoframe(struct drm_encoder *encoder) +{ + struct dip_infoframe avi_if = { + .type = DIP_TYPE_AVI, + .ver = DIP_VERSION_AVI, + .len = DIP_LEN_AVI, + }; + + intel_set_infoframe(encoder, &avi_if); } static void intel_hdmi_mode_set(struct drm_encoder *encoder, @@ -189,10 +244,7 @@ static void intel_hdmi_mode_set(struct drm_encoder *encoder, I915_WRITE(intel_hdmi->sdvox_reg, sdvox); POSTING_READ(intel_hdmi->sdvox_reg); - if (HAS_PCH_SPLIT(dev)) - intel_ironlake_hdmi_set_avi_infoframe(encoder); - else - intel_hdmi_set_avi_infoframe(encoder); + intel_hdmi_set_avi_infoframe(encoder); } static void intel_hdmi_dpms(struct drm_encoder *encoder, int mode) @@ -470,6 +522,11 @@ void intel_hdmi_init(struct drm_device *dev, int sdvox_reg) intel_hdmi->sdvox_reg = sdvox_reg; + if (!HAS_PCH_SPLIT(dev)) + intel_hdmi->write_infoframe = i9xx_write_infoframe; + else + intel_hdmi->write_infoframe = ironlake_write_infoframe; + drm_encoder_helper_add(&intel_encoder->base, &intel_hdmi_helper_funcs); intel_hdmi_add_properties(intel_hdmi, connector); -- cgit v1.1 From c0864cb39c68696e80657360eba63da5e743b7aa Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 3 Aug 2011 09:22:56 -0700 Subject: drm/i915/hdmi: HDMI source product description infoframe support Set an SPD infoframe if the sink supports it. Signed-off-by: Jesse Barnes Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_drv.h | 21 +++++++++++++++++++++ drivers/gpu/drm/i915/intel_hdmi.c | 16 ++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 2f47dcf..7b330e7 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -184,6 +184,22 @@ struct intel_crtc { #define DIP_VERSION_AVI 0x2 #define DIP_LEN_AVI 13 +#define DIP_TYPE_SPD 0x3 +#define DIP_VERSION_SPD 0x1 +#define DIP_LEN_SPD 25 +#define DIP_SPD_UNKNOWN 0 +#define DIP_SPD_DSTB 0x1 +#define DIP_SPD_DVDP 0x2 +#define DIP_SPD_DVHS 0x3 +#define DIP_SPD_HDDVR 0x4 +#define DIP_SPD_DVC 0x5 +#define DIP_SPD_DSC 0x6 +#define DIP_SPD_VCD 0x7 +#define DIP_SPD_GAME 0x8 +#define DIP_SPD_PC 0x9 +#define DIP_SPD_BD 0xa +#define DIP_SPD_SCD 0xb + struct dip_infoframe { uint8_t type; /* HB0 */ uint8_t ver; /* HB1 */ @@ -208,6 +224,11 @@ struct dip_infoframe { uint16_t left_bar_end; uint16_t right_bar_start; } avi; + struct { + uint8_t vn[8]; + uint8_t pd[16]; + uint8_t sdi; + } spd; uint8_t payload[27]; } __attribute__ ((packed)) body; } __attribute__((packed)); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index d43cebd..226ba83 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -201,6 +201,21 @@ static void intel_hdmi_set_avi_infoframe(struct drm_encoder *encoder) intel_set_infoframe(encoder, &avi_if); } +static void intel_hdmi_set_spd_infoframe(struct drm_encoder *encoder) +{ + struct dip_infoframe spd_if; + + memset(&spd_if, 0, sizeof(spd_if)); + spd_if.type = DIP_TYPE_SPD; + spd_if.ver = DIP_VERSION_SPD; + spd_if.len = DIP_LEN_SPD; + strcpy(spd_if.body.spd.vn, "Intel"); + strcpy(spd_if.body.spd.pd, "Integrated gfx"); + spd_if.body.spd.sdi = DIP_SPD_PC; + + intel_set_infoframe(encoder, &spd_if); +} + static void intel_hdmi_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) @@ -245,6 +260,7 @@ static void intel_hdmi_mode_set(struct drm_encoder *encoder, POSTING_READ(intel_hdmi->sdvox_reg); intel_hdmi_set_avi_infoframe(encoder); + intel_hdmi_set_spd_infoframe(encoder); } static void intel_hdmi_dpms(struct drm_encoder *encoder, int mode) -- cgit v1.1 From 07b7ddd9b7f17a567e3ac2b33a4dffcb2a4524e0 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 3 Aug 2011 11:28:44 -0700 Subject: drm/i915: allow cache sharing policy control Expose the SNB+ cache sharing policy register in debugfs. The new file, i915_cache_sharing, has 4 values, 0-3, with 0 being "max uncore resources" and 3 being the minimum. Exposing this control should make benchmarking easier and help us choose a good default. Signed-off-by: Jesse Barnes Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/i915_debugfs.c | 99 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 8 +++ 2 files changed, 107 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5bf7bf5..a8ab626 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1408,6 +1408,85 @@ static const struct file_operations i915_max_freq_fops = { .llseek = default_llseek, }; +static int +i915_cache_sharing_open(struct inode *inode, + struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + +static ssize_t +i915_cache_sharing_read(struct file *filp, + char __user *ubuf, + size_t max, + loff_t *ppos) +{ + struct drm_device *dev = filp->private_data; + drm_i915_private_t *dev_priv = dev->dev_private; + char buf[80]; + u32 snpcr; + int len; + + mutex_lock(&dev_priv->dev->struct_mutex); + snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); + mutex_unlock(&dev_priv->dev->struct_mutex); + + len = snprintf(buf, sizeof (buf), + "%d\n", (snpcr & GEN6_MBC_SNPCR_MASK) >> + GEN6_MBC_SNPCR_SHIFT); + + if (len > sizeof (buf)) + len = sizeof (buf); + + return simple_read_from_buffer(ubuf, max, ppos, buf, len); +} + +static ssize_t +i915_cache_sharing_write(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + struct drm_device *dev = filp->private_data; + struct drm_i915_private *dev_priv = dev->dev_private; + char buf[20]; + u32 snpcr; + int val = 1; + + if (cnt > 0) { + if (cnt > sizeof (buf) - 1) + return -EINVAL; + + if (copy_from_user(buf, ubuf, cnt)) + return -EFAULT; + buf[cnt] = 0; + + val = simple_strtoul(buf, NULL, 0); + } + + if (val < 0 || val > 3) + return -EINVAL; + + DRM_DEBUG_DRIVER("Manually setting uncore sharing to %d\n", val); + + /* Update the cache sharing policy here as well */ + snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); + snpcr &= ~GEN6_MBC_SNPCR_MASK; + snpcr |= (val << GEN6_MBC_SNPCR_SHIFT); + I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); + + return cnt; +} + +static const struct file_operations i915_cache_sharing_fops = { + .owner = THIS_MODULE, + .open = i915_cache_sharing_open, + .read = i915_cache_sharing_read, + .write = i915_cache_sharing_write, + .llseek = default_llseek, +}; + /* As the drm_debugfs_init() routines are called before dev->dev_private is * allocated we need to hook into the minor for release. */ static int @@ -1522,6 +1601,21 @@ static int i915_max_freq_create(struct dentry *root, struct drm_minor *minor) return drm_add_fake_info_node(minor, ent, &i915_max_freq_fops); } +static int i915_cache_sharing_create(struct dentry *root, struct drm_minor *minor) +{ + struct drm_device *dev = minor->dev; + struct dentry *ent; + + ent = debugfs_create_file("i915_cache_sharing", + S_IRUGO | S_IWUSR, + root, dev, + &i915_cache_sharing_fops); + if (IS_ERR(ent)) + return PTR_ERR(ent); + + return drm_add_fake_info_node(minor, ent, &i915_cache_sharing_fops); +} + static struct drm_info_list i915_debugfs_list[] = { {"i915_capabilities", i915_capabilities, 0}, {"i915_gem_objects", i915_gem_object_info, 0}, @@ -1578,6 +1672,9 @@ int i915_debugfs_init(struct drm_minor *minor) ret = i915_max_freq_create(minor->debugfs_root, minor); if (ret) return ret; + ret = i915_cache_sharing_create(minor->debugfs_root, minor); + if (ret) + return ret; return drm_debugfs_create_files(i915_debugfs_list, I915_DEBUGFS_ENTRIES, @@ -1594,6 +1691,8 @@ void i915_debugfs_cleanup(struct drm_minor *minor) 1, minor); drm_debugfs_remove_files((struct drm_info_list *) &i915_max_freq_fops, 1, minor); + drm_debugfs_remove_files((struct drm_info_list *) &i915_cache_sharing_fops, + 1, minor); } #endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 56cae72..d1331f7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -78,6 +78,14 @@ #define GRDOM_RENDER (1<<2) #define GRDOM_MEDIA (3<<2) +#define GEN6_MBCUNIT_SNPCR 0x900c /* for LLC config */ +#define GEN6_MBC_SNPCR_SHIFT 21 +#define GEN6_MBC_SNPCR_MASK (3<<21) +#define GEN6_MBC_SNPCR_MAX (0<<21) +#define GEN6_MBC_SNPCR_MED (1<<21) +#define GEN6_MBC_SNPCR_LOW (2<<21) +#define GEN6_MBC_SNPCR_MIN (3<<21) /* only 1/16th of the cache is shared */ + #define GEN6_GDRST 0x941c #define GEN6_GRDOM_FULL (1 << 0) #define GEN6_GRDOM_RENDER (1 << 1) -- cgit v1.1 From 33a30ed4bdccd95ed84a1a20c1fef8ac89788ce5 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Wed, 3 Aug 2011 22:26:55 +0400 Subject: shm: fix wrong tests Commit 4c677e2eefdb ("shm: optimize locking and ipc_namespace getting") introduced a copy-paste bug. Due to the bug cycle optimizations were disabled. Signed-off-by: Vasiliy Kulikov Signed-off-by: Linus Torvalds --- ipc/shm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ipc/shm.c b/ipc/shm.c index 9fb044f3b..7efff04 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -294,7 +294,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) void shm_destroy_orphaned(struct ipc_namespace *ns) { down_write(&shm_ids(ns).rw_mutex); - if (&shm_ids(ns).in_use) + if (shm_ids(ns).in_use) idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); up_write(&shm_ids(ns).rw_mutex); } @@ -306,7 +306,7 @@ void exit_shm(struct task_struct *task) /* Destroy all already created segments, but not mapped yet */ down_write(&shm_ids(ns).rw_mutex); - if (&shm_ids(ns).in_use) + if (shm_ids(ns).in_use) idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); up_write(&shm_ids(ns).rw_mutex); } -- cgit v1.1 From 298507d4d2cff2248e84afcf646b697301294442 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Wed, 3 Aug 2011 22:28:26 +0400 Subject: shm: optimize exit_shm() We may optimistically check .in_use == 0 without holding the rw_mutex: it's the common case, and if it's zero, there certainly won't be any segments associated with us. After taking the lock, the idr_for_each() will do the right thing, so we could now drop the re-check inside the lock without any real cost. But it won't hurt. Signed-off-by: Vasiliy Kulikov Signed-off-by: Linus Torvalds --- ipc/shm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ipc/shm.c b/ipc/shm.c index 7efff04..b5bae9d 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -304,6 +304,9 @@ void exit_shm(struct task_struct *task) { struct ipc_namespace *ns = task->nsproxy->ipc_ns; + if (shm_ids(ns).in_use == 0) + return; + /* Destroy all already created segments, but not mapped yet */ down_write(&shm_ids(ns).rw_mutex); if (shm_ids(ns).in_use) -- cgit v1.1 From 5ee5a07ce3a54de3d1192f8c9c2378d51a51e3bd Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Wed, 3 Aug 2011 06:42:42 +0000 Subject: irda: use PCI_VENDOR_ID_* Use PCI_VENDOR_ID_* from pci_ids.h instead of creating #define locally. Signed-off-by: Jon Mason Signed-off-by: David S. Miller --- drivers/net/irda/smsc-ircc2.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c index 954f6e93..8b1c348 100644 --- a/drivers/net/irda/smsc-ircc2.c +++ b/drivers/net/irda/smsc-ircc2.c @@ -2405,8 +2405,6 @@ static int __init smsc_superio_lpc(unsigned short cfg_base) * addresses making a subsystem device table necessary. */ #ifdef CONFIG_PCI -#define PCIID_VENDOR_INTEL 0x8086 -#define PCIID_VENDOR_ALI 0x10b9 static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __initdata = { /* * Subsystems needing entries: @@ -2416,7 +2414,7 @@ static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __ini */ { /* Guessed entry */ - .vendor = PCIID_VENDOR_INTEL, /* Intel 82801DBM LPC bridge */ + .vendor = PCI_VENDOR_ID_INTEL, /* Intel 82801DBM LPC bridge */ .device = 0x24cc, .subvendor = 0x103c, .subdevice = 0x08bc, @@ -2429,7 +2427,7 @@ static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __ini .name = "HP nx5000 family", }, { - .vendor = PCIID_VENDOR_INTEL, /* Intel 82801DBM LPC bridge */ + .vendor = PCI_VENDOR_ID_INTEL, /* Intel 82801DBM LPC bridge */ .device = 0x24cc, .subvendor = 0x103c, .subdevice = 0x088c, @@ -2443,7 +2441,7 @@ static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __ini .name = "HP nc8000 family", }, { - .vendor = PCIID_VENDOR_INTEL, /* Intel 82801DBM LPC bridge */ + .vendor = PCI_VENDOR_ID_INTEL, /* Intel 82801DBM LPC bridge */ .device = 0x24cc, .subvendor = 0x103c, .subdevice = 0x0890, @@ -2456,7 +2454,7 @@ static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __ini .name = "HP nc6000 family", }, { - .vendor = PCIID_VENDOR_INTEL, /* Intel 82801DBM LPC bridge */ + .vendor = PCI_VENDOR_ID_INTEL, /* Intel 82801DBM LPC bridge */ .device = 0x24cc, .subvendor = 0x0e11, .subdevice = 0x0860, @@ -2471,7 +2469,7 @@ static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __ini }, { /* Intel 82801DB/DBL (ICH4/ICH4-L) LPC Interface Bridge */ - .vendor = PCIID_VENDOR_INTEL, + .vendor = PCI_VENDOR_ID_INTEL, .device = 0x24c0, .subvendor = 0x1179, .subdevice = 0xffff, /* 0xffff is "any" */ @@ -2484,7 +2482,7 @@ static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __ini .name = "Toshiba laptop with Intel 82801DB/DBL LPC bridge", }, { - .vendor = PCIID_VENDOR_INTEL, /* Intel 82801CAM ISA bridge */ + .vendor = PCI_VENDOR_ID_INTEL, /* Intel 82801CAM ISA bridge */ .device = 0x248c, .subvendor = 0x1179, .subdevice = 0xffff, /* 0xffff is "any" */ @@ -2498,7 +2496,7 @@ static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __ini }, { /* 82801DBM (ICH4-M) LPC Interface Bridge */ - .vendor = PCIID_VENDOR_INTEL, + .vendor = PCI_VENDOR_ID_INTEL, .device = 0x24cc, .subvendor = 0x1179, .subdevice = 0xffff, /* 0xffff is "any" */ @@ -2512,7 +2510,7 @@ static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __ini }, { /* ALi M1533/M1535 PCI to ISA Bridge [Aladdin IV/V/V+] */ - .vendor = PCIID_VENDOR_ALI, + .vendor = PCI_VENDOR_ID_AL, .device = 0x1533, .subvendor = 0x1179, .subdevice = 0xffff, /* 0xffff is "any" */ -- cgit v1.1 From d0b0c8c79b26a1f48e1e92a2c073655159e72b7c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 22 Jul 2011 14:20:46 -0700 Subject: ioat: fix xor_idx_to_desc For versions of the device that implement operation-types 0x87, 0x88 (IOAT_OP_XOR, IOAT_OP_XOR_VAL) this map determines whether a given source is located in the base or extended descriptor. Source addresses 6 through 8 require an extended descriptor, hence 0xe0, not 0xd0. No shipping hardware currently implements these operation types. Reported-by: Evgueni Smogailov Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index fec8664..f519c93 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -73,7 +73,7 @@ /* provide a lookup table for setting the source address in the base or * extended descriptor of an xor or pq descriptor */ -static const u8 xor_idx_to_desc = 0xd0; +static const u8 xor_idx_to_desc = 0xe0; static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; static const u8 pq_idx_to_desc = 0xf8; static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; -- cgit v1.1 From 21ef4b8b7a7d59a995bf44382de38c95587767d4 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 20 Jul 2011 11:32:28 +0800 Subject: dmaengine: use DEFINE_IDR for static initialization We could use DEFINE_IDR for statically allocated idr that allow us to save a few lines of code. And also remove unneeded mutex_init() for dma_list_mutex, as dma_list_mutex is initialized automatically by DEFINE_MUTEX(). Signed-off-by: Axel Lin Signed-off-by: Dan Williams --- drivers/dma/dmaengine.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 8bcb15f..bc11eeb 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -61,9 +61,9 @@ #include static DEFINE_MUTEX(dma_list_mutex); +static DEFINE_IDR(dma_idr); static LIST_HEAD(dma_device_list); static long dmaengine_ref_count; -static struct idr dma_idr; /* --- sysfs implementation --- */ @@ -1049,8 +1049,6 @@ EXPORT_SYMBOL_GPL(dma_run_dependencies); static int __init dma_bus_init(void) { - idr_init(&dma_idr); - mutex_init(&dma_list_mutex); return class_register(&dma_devclass); } arch_initcall(dma_bus_init); -- cgit v1.1 From 33f35f2a4ee3abfc0f87990058aa1b6b5092f725 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 3 Aug 2011 22:00:38 -1000 Subject: x86: don't include xen/xen.h in unless XEN is enabled Dmitry Kasatkin reports: "kernel-devel package with kernel headers have no directory if XEN is disabled. Modules which inclide asm/io.h won't compile. XEN related content is behind the CONFIG_XEN flag in the io.h. And should be also behind CONFIG_XEN flag." So move the include of down into the section that is conditional on CONFIG_XEN. Reported-by: Dmitry Kasatkin Signed-off-by: Linus Torvalds --- arch/x86/include/asm/io.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index d02804d..d8e8eef 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -40,8 +40,6 @@ #include #include -#include - #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ { type ret; asm volatile("mov" size " %1,%0":reg (ret) \ @@ -334,6 +332,7 @@ extern void fixup_early_ioremap(void); extern bool is_early_ioremap_ptep(pte_t *ptep); #ifdef CONFIG_XEN +#include struct bio_vec; extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, -- cgit v1.1 From 288d5abec8314ae50fe6692f324b0444acae8486 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 3 Aug 2011 22:03:29 -1000 Subject: Boot up with usermodehelper disabled The core device layer sends tons of uevent notifications for each device it finds, and if the kernel has been built with a non-empty CONFIG_UEVENT_HELPER_PATH that will make us try to execute the usermode helper binary for all these events very early in the boot. Not only won't the root filesystem even be mounted at that point, we literally won't have necessarily even initialized all the process handling data structures at that point, which causes no end of silly problems even when the usermode helper doesn't actually succeed in executing. So just use our existing infrastructure to disable the usermodehelpers to make the kernel start out with them disabled. We enable them when we've at least initialized stuff a bit. Problems related to an uninitialized init_ipc_ns.ids[IPC_SHM_IDS].rw_mutex reported by various people. Reported-by: Manuel Lauss Reported-by: Richard Weinberger Reported-by: Marc Zyngier Acked-by: Kay Sievers Cc: Andrew Morton Cc: Vasiliy Kulikov Cc: Greg KH Signed-off-by: Linus Torvalds --- init/main.c | 5 ++++- kernel/kmod.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/init/main.c b/init/main.c index 1952d37..9c51ee7 100644 --- a/init/main.c +++ b/init/main.c @@ -369,9 +369,12 @@ static noinline void __init_refok rest_init(void) init_idle_bootup_task(current); preempt_enable_no_resched(); schedule(); - preempt_disable(); + + /* At this point, we can enable user mode helper functionality */ + usermodehelper_enable(); /* Call into cpu_idle with preempt disabled */ + preempt_disable(); cpu_idle(); } diff --git a/kernel/kmod.c b/kernel/kmod.c index 47613df..ddc7644 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -274,7 +274,7 @@ static void __call_usermodehelper(struct work_struct *work) * (used for preventing user land processes from being created after the user * land has been frozen during a system-wide hibernation or suspend operation). */ -static int usermodehelper_disabled; +static int usermodehelper_disabled = 1; /* Number of helpers running */ static atomic_t running_helpers = ATOMIC_INIT(0); -- cgit v1.1 From 7d36b26be0f3c6b86e3ab7e1539e42f3a3bc79ca Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Jul 2011 13:13:44 +0200 Subject: lockdep: Fix trace_hardirqs_on_caller() Commit dd4e5d3ac4a ("lockdep: Fix trace_[soft,hard]irqs_[on,off]() recursion") made a bit of a mess of the various checks and error conditions. In particular it moved the check for !irqs_disabled() before the spurious enable test, resulting in some warnings. Reported-by: Arnaud Lacombe Reported-by: Dave Jones Reported-and-tested-by: Sergey Senozhatsky Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1311679697.24752.28.camel@twins Signed-off-by: Ingo Molnar --- kernel/lockdep.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 3956f51..74ca247 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2485,23 +2485,9 @@ static void __trace_hardirqs_on_caller(unsigned long ip) { struct task_struct *curr = current; - if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) - return; - - if (unlikely(curr->hardirqs_enabled)) { - /* - * Neither irq nor preemption are disabled here - * so this is racy by nature but losing one hit - * in a stat is not a big deal. - */ - __debug_atomic_inc(redundant_hardirqs_on); - return; - } /* we'll do an OFF -> ON transition: */ curr->hardirqs_enabled = 1; - if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) - return; /* * We are going to turn hardirqs on, so set the * usage bit for all held locks: @@ -2529,9 +2515,25 @@ void trace_hardirqs_on_caller(unsigned long ip) if (unlikely(!debug_locks || current->lockdep_recursion)) return; + if (unlikely(current->hardirqs_enabled)) { + /* + * Neither irq nor preemption are disabled here + * so this is racy by nature but losing one hit + * in a stat is not a big deal. + */ + __debug_atomic_inc(redundant_hardirqs_on); + return; + } + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return; + if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) + return; + + if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) + return; + current->lockdep_recursion = 1; __trace_hardirqs_on_caller(ip); current->lockdep_recursion = 0; -- cgit v1.1 From 70a0686a72c7a7e554b404ca11406ceec709d425 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Jul 2011 12:09:59 +0200 Subject: lockdep: Fix up warning On Sun, 2011-07-24 at 21:06 -0400, Arnaud Lacombe wrote: > /src/linux/linux/kernel/lockdep.c: In function 'mark_held_locks': > /src/linux/linux/kernel/lockdep.c:2471:31: warning: comparison of > distinct pointer types lacks a cast The warning is harmless in this case, but the below makes it go away. Reported-by: Arnaud Lacombe Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1311588599.2617.56.camel@laptop Signed-off-by: Ingo Molnar --- kernel/lockdep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 74ca247..5903586 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2468,7 +2468,7 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark) BUG_ON(usage_bit >= LOCK_USAGE_STATES); - if (hlock_class(hlock)->key == &__lockdep_no_validate__) + if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys) continue; if (!mark_lock(curr, hlock, usage_bit)) -- cgit v1.1 From 83835b3d9aec8e9f666d8223d8a386814f756266 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jul 2011 15:26:05 +0200 Subject: slab, lockdep: Annotate slab -> rcu -> debug_object -> slab Lockdep thinks there's lock recursion through: kmem_cache_free() cache_flusharray() spin_lock(&l3->list_lock) <----------------. free_block() | slab_destroy() | call_rcu() | debug_object_activate() | debug_object_init() | __debug_object_init() | kmem_cache_alloc() | cache_alloc_refill() | spin_lock(&l3->list_lock) --' Now debug objects doesn't use SLAB_DESTROY_BY_RCU and hence there is no actual possibility of recursing. Luckily debug objects marks it slab with SLAB_DEBUG_OBJECTS so we can identify the thing. Mark all SLAB_DEBUG_OBJECTS (all one!) slab caches with a special lockdep key so that lockdep sees its a different cachep. Also add a WARN on trying to create a SLAB_DESTROY_BY_RCU | SLAB_DEBUG_OBJECTS cache, to avoid possible future trouble. Reported-and-tested-by: Sebastian Siewior [ fixes to the initial patch ] Reported-by: Thomas Gleixner Acked-by: Pekka Enberg Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1311341165.27400.58.camel@twins Signed-off-by: Ingo Molnar --- mm/slab.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 68 insertions(+), 18 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 9594740..0703578 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -622,6 +622,51 @@ int slab_is_available(void) static struct lock_class_key on_slab_l3_key; static struct lock_class_key on_slab_alc_key; +static struct lock_class_key debugobj_l3_key; +static struct lock_class_key debugobj_alc_key; + +static void slab_set_lock_classes(struct kmem_cache *cachep, + struct lock_class_key *l3_key, struct lock_class_key *alc_key, + int q) +{ + struct array_cache **alc; + struct kmem_list3 *l3; + int r; + + l3 = cachep->nodelists[q]; + if (!l3) + return; + + lockdep_set_class(&l3->list_lock, l3_key); + alc = l3->alien; + /* + * FIXME: This check for BAD_ALIEN_MAGIC + * should go away when common slab code is taught to + * work even without alien caches. + * Currently, non NUMA code returns BAD_ALIEN_MAGIC + * for alloc_alien_cache, + */ + if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) + return; + for_each_node(r) { + if (alc[r]) + lockdep_set_class(&alc[r]->lock, alc_key); + } +} + +static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node) +{ + slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node); +} + +static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) +{ + int node; + + for_each_online_node(node) + slab_set_debugobj_lock_classes_node(cachep, node); +} + static void init_node_lock_keys(int q) { struct cache_sizes *s = malloc_sizes; @@ -630,29 +675,14 @@ static void init_node_lock_keys(int q) return; for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { - struct array_cache **alc; struct kmem_list3 *l3; - int r; l3 = s->cs_cachep->nodelists[q]; if (!l3 || OFF_SLAB(s->cs_cachep)) continue; - lockdep_set_class(&l3->list_lock, &on_slab_l3_key); - alc = l3->alien; - /* - * FIXME: This check for BAD_ALIEN_MAGIC - * should go away when common slab code is taught to - * work even without alien caches. - * Currently, non NUMA code returns BAD_ALIEN_MAGIC - * for alloc_alien_cache, - */ - if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) - continue; - for_each_node(r) { - if (alc[r]) - lockdep_set_class(&alc[r]->lock, - &on_slab_alc_key); - } + + slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key, + &on_slab_alc_key, q); } } @@ -671,6 +701,14 @@ static void init_node_lock_keys(int q) static inline void init_lock_keys(void) { } + +static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node) +{ +} + +static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) +{ +} #endif /* @@ -1264,6 +1302,8 @@ static int __cpuinit cpuup_prepare(long cpu) spin_unlock_irq(&l3->list_lock); kfree(shared); free_alien_cache(alien); + if (cachep->flags & SLAB_DEBUG_OBJECTS) + slab_set_debugobj_lock_classes_node(cachep, node); } init_node_lock_keys(node); @@ -2426,6 +2466,16 @@ kmem_cache_create (const char *name, size_t size, size_t align, goto oops; } + if (flags & SLAB_DEBUG_OBJECTS) { + /* + * Would deadlock through slab_destroy()->call_rcu()-> + * debug_object_activate()->kmem_cache_alloc(). + */ + WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU); + + slab_set_debugobj_lock_classes(cachep); + } + /* cache setup completed, link it into the list */ list_add(&cachep->next, &cache_chain); oops: -- cgit v1.1 From f59de8992aa6dc85e81aadc26b0f69e17809721d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jul 2011 15:19:09 +0200 Subject: lockdep: Clear whole lockdep_map on initialization lockdep_init_map() only initializes parts of lockdep_map and triggers kmemcheck warning when it is copied as a whole. There isn't anything to be gained by clearing selectively. memset() the whole structure and remove loop for ->class_cache[] clearing. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=35532 Signed-off-by: Tejun Heo Reported-and-tested-by: Christian Casteyde Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=35532 Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110714131909.GJ3455@htj.dyndns.org Signed-off-by: Ingo Molnar --- kernel/lockdep.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 5903586..8c24294 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2874,10 +2874,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, void lockdep_init_map(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass) { - int i; - - for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) - lock->class_cache[i] = NULL; + memset(lock, 0, sizeof(*lock)); #ifdef CONFIG_LOCK_STAT lock->cpu = raw_smp_processor_id(); -- cgit v1.1 From 30765b92ada267c5395fc788623cb15233276f5c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Jul 2011 23:22:56 +0200 Subject: slab, lockdep: Annotate the locks before using them Fernando found we hit the regular OFF_SLAB 'recursion' before we annotate the locks, cure this. The relevant portion of the stack-trace: > [ 0.000000] [] rt_spin_lock+0x50/0x56 > [ 0.000000] [] __cache_free+0x43/0xc3 > [ 0.000000] [] kmem_cache_free+0x6c/0xdc > [ 0.000000] [] slab_destroy+0x4f/0x53 > [ 0.000000] [] free_block+0x94/0xc1 > [ 0.000000] [] do_tune_cpucache+0x10b/0x2bb > [ 0.000000] [] enable_cpucache+0x7b/0xa7 > [ 0.000000] [] kmem_cache_init_late+0x1f/0x61 > [ 0.000000] [] start_kernel+0x24c/0x363 > [ 0.000000] [] i386_start_kernel+0xa9/0xaf Reported-by: Fernando Lopez-Lezcano Acked-by: Pekka Enberg Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1311888176.2617.379.camel@laptop Signed-off-by: Ingo Molnar --- mm/slab.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 0703578..6d90a09 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1666,6 +1666,9 @@ void __init kmem_cache_init_late(void) { struct kmem_cache *cachep; + /* Annotate slab for lockdep -- annotate the malloc caches */ + init_lock_keys(); + /* 6) resize the head arrays to their final sizes */ mutex_lock(&cache_chain_mutex); list_for_each_entry(cachep, &cache_chain, next) @@ -1676,9 +1679,6 @@ void __init kmem_cache_init_late(void) /* Done! */ g_cpucache_up = FULL; - /* Annotate slab for lockdep -- annotate the malloc caches */ - init_lock_keys(); - /* * Register a cpu startup notifier callback that initializes * cpu_cache_get for all new cpus -- cgit v1.1 From 89e9aad65ffad96c3a35ff979a61a65761873951 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Aug 2011 01:29:51 -0700 Subject: ide: Fix irq flags madness commit ec1a123 (IDE: pass IRQ flags to the IDE core) introduced the bogosity of passing unfiltered resource->flags to the irq_flags which are used for request_irq. It results in random bits set (especially IORESOURCE_IRQ which maps to IRQF_PER_CPU). Filter the bits proper. Signed-off-by: Thomas Gleixner Signed-off-by: David S. Miller --- drivers/ide/ide_platform.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/ide/ide_platform.c b/drivers/ide/ide_platform.c index 542603b..962693b 100644 --- a/drivers/ide/ide_platform.c +++ b/drivers/ide/ide_platform.c @@ -19,6 +19,7 @@ #include #include #include +#include #include static void __devinit plat_ide_setup_ports(struct ide_hw *hw, @@ -95,7 +96,10 @@ static int __devinit plat_ide_probe(struct platform_device *pdev) plat_ide_setup_ports(&hw, base, alt_base, pdata, res_irq->start); hw.dev = &pdev->dev; - d.irq_flags = res_irq->flags; + d.irq_flags = res_irq->flags & IRQF_TRIGGER_MASK; + if (res_irq->flags & IORESOURCE_IRQ_SHAREABLE) + d.irq_flags |= IRQF_SHARED; + if (mmio) d.host_flags |= IDE_HFLAG_MMIO; -- cgit v1.1 From 0302899e144296d6ce8cb3679a9a42d5c6436910 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 4 Aug 2011 01:30:34 -0700 Subject: drivers/ide/cy82c693.c: Add missing pci_dev_put Pci_get_slot calls pci_dev_get, so pci_dev_put is needed before leaving the function in the case where pci_get_slot is locally used. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ local idexpression x; expression e; @@ *x = pci_get_slot(...) ... when != true x == NULL when != pci_dev_put(x) when != e = x when != if (x != NULL) {<+... pci_dev_put(x); ...+>} *return ...; // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/ide/cy82c693.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c index 3be60da..67cbcfa 100644 --- a/drivers/ide/cy82c693.c +++ b/drivers/ide/cy82c693.c @@ -141,6 +141,8 @@ static void cy82c693_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) pci_write_config_byte(dev, CY82_IDE_SLAVE_IOW, time_16); pci_write_config_byte(dev, CY82_IDE_SLAVE_8BIT, time_8); } + if (hwif->index > 0) + pci_dev_put(dev); } static void __devinit init_iops_cy82c693(ide_hwif_t *hwif) -- cgit v1.1 From fabb5bd96d060da961d74531a0900da4da5ad2d4 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Thu, 4 Aug 2011 01:35:12 -0700 Subject: sparc: use kbuild-generic support for true asm-generic header files Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/Kbuild | 5 +++++ arch/sparc/include/asm/div64.h | 1 - arch/sparc/include/asm/irq_regs.h | 1 - arch/sparc/include/asm/local.h | 6 ------ arch/sparc/include/asm/local64.h | 1 - 5 files changed, 5 insertions(+), 9 deletions(-) delete mode 100644 arch/sparc/include/asm/div64.h delete mode 100644 arch/sparc/include/asm/irq_regs.h delete mode 100644 arch/sparc/include/asm/local.h delete mode 100644 arch/sparc/include/asm/local64.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 3c93f08..2c2e388 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -16,3 +16,8 @@ header-y += traps.h header-y += uctx.h header-y += utrap.h header-y += watchdog.h + +generic-y += div64.h +generic-y += local64.h +generic-y += irq_regs.h +generic-y += local.h diff --git a/arch/sparc/include/asm/div64.h b/arch/sparc/include/asm/div64.h deleted file mode 100644 index 6cd978c..0000000 --- a/arch/sparc/include/asm/div64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sparc/include/asm/irq_regs.h b/arch/sparc/include/asm/irq_regs.h deleted file mode 100644 index 3dd9c0b..0000000 --- a/arch/sparc/include/asm/irq_regs.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/sparc/include/asm/local.h b/arch/sparc/include/asm/local.h deleted file mode 100644 index bc80815..0000000 --- a/arch/sparc/include/asm/local.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _SPARC_LOCAL_H -#define _SPARC_LOCAL_H - -#include - -#endif diff --git a/arch/sparc/include/asm/local64.h b/arch/sparc/include/asm/local64.h deleted file mode 100644 index 36c93b5..0000000 --- a/arch/sparc/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include -- cgit v1.1 From 36c35416a94f5632c3addad05217ff02c39b3b61 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Wed, 3 Aug 2011 22:10:29 +0000 Subject: cdc_ncm: fix endianness problem. Fix a misusage of the struct usb_cdc_notification to pass arguments to the usb_control_msg function. The usb_control_msg function expects host endian arguments but usb_cdc_notification stores these values as little endian. Now usb_control_msg is directly invoked with host endian values. Signed-off-by: Giuseppe Scrivano Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 156 +++++++++++++++++----------------------------- 1 file changed, 56 insertions(+), 100 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index fd622a6..a03336e 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -53,7 +53,7 @@ #include #include -#define DRIVER_VERSION "01-June-2011" +#define DRIVER_VERSION "04-Aug-2011" /* CDC NCM subclass 3.2.1 */ #define USB_CDC_NCM_NDP16_LENGTH_MIN 0x10 @@ -163,35 +163,8 @@ cdc_ncm_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) usb_make_path(dev->udev, info->bus_info, sizeof(info->bus_info)); } -static int -cdc_ncm_do_request(struct cdc_ncm_ctx *ctx, struct usb_cdc_notification *req, - void *data, u16 flags, u16 *actlen, u16 timeout) -{ - int err; - - err = usb_control_msg(ctx->udev, (req->bmRequestType & USB_DIR_IN) ? - usb_rcvctrlpipe(ctx->udev, 0) : - usb_sndctrlpipe(ctx->udev, 0), - req->bNotificationType, req->bmRequestType, - req->wValue, - req->wIndex, data, - req->wLength, timeout); - - if (err < 0) { - if (actlen) - *actlen = 0; - return err; - } - - if (actlen) - *actlen = err; - - return 0; -} - static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) { - struct usb_cdc_notification req; u32 val; u8 flags; u8 iface_no; @@ -200,14 +173,14 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) iface_no = ctx->control->cur_altsetting->desc.bInterfaceNumber; - req.bmRequestType = USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE; - req.bNotificationType = USB_CDC_GET_NTB_PARAMETERS; - req.wValue = 0; - req.wIndex = cpu_to_le16(iface_no); - req.wLength = cpu_to_le16(sizeof(ctx->ncm_parm)); - - err = cdc_ncm_do_request(ctx, &req, &ctx->ncm_parm, 0, NULL, 1000); - if (err) { + err = usb_control_msg(ctx->udev, + usb_rcvctrlpipe(ctx->udev, 0), + USB_CDC_GET_NTB_PARAMETERS, + USB_TYPE_CLASS | USB_DIR_IN + | USB_RECIP_INTERFACE, + 0, iface_no, &ctx->ncm_parm, + sizeof(ctx->ncm_parm), 10000); + if (err < 0) { pr_debug("failed GET_NTB_PARAMETERS\n"); return 1; } @@ -253,31 +226,26 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) /* inform device about NTB input size changes */ if (ctx->rx_max != le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)) { - req.bmRequestType = USB_TYPE_CLASS | USB_DIR_OUT | - USB_RECIP_INTERFACE; - req.bNotificationType = USB_CDC_SET_NTB_INPUT_SIZE; - req.wValue = 0; - req.wIndex = cpu_to_le16(iface_no); if (flags & USB_CDC_NCM_NCAP_NTB_INPUT_SIZE) { struct usb_cdc_ncm_ndp_input_size ndp_in_sz; - - req.wLength = 8; - ndp_in_sz.dwNtbInMaxSize = cpu_to_le32(ctx->rx_max); - ndp_in_sz.wNtbInMaxDatagrams = - cpu_to_le16(CDC_NCM_DPT_DATAGRAMS_MAX); - ndp_in_sz.wReserved = 0; - err = cdc_ncm_do_request(ctx, &req, &ndp_in_sz, 0, NULL, - 1000); + err = usb_control_msg(ctx->udev, + usb_sndctrlpipe(ctx->udev, 0), + USB_CDC_SET_NTB_INPUT_SIZE, + USB_TYPE_CLASS | USB_DIR_OUT + | USB_RECIP_INTERFACE, + 0, iface_no, &ndp_in_sz, 8, 1000); } else { __le32 dwNtbInMaxSize = cpu_to_le32(ctx->rx_max); - - req.wLength = 4; - err = cdc_ncm_do_request(ctx, &req, &dwNtbInMaxSize, 0, - NULL, 1000); + err = usb_control_msg(ctx->udev, + usb_sndctrlpipe(ctx->udev, 0), + USB_CDC_SET_NTB_INPUT_SIZE, + USB_TYPE_CLASS | USB_DIR_OUT + | USB_RECIP_INTERFACE, + 0, iface_no, &dwNtbInMaxSize, 4, 1000); } - if (err) + if (err < 0) pr_debug("Setting NTB Input Size failed\n"); } @@ -332,29 +300,24 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) /* set CRC Mode */ if (flags & USB_CDC_NCM_NCAP_CRC_MODE) { - req.bmRequestType = USB_TYPE_CLASS | USB_DIR_OUT | - USB_RECIP_INTERFACE; - req.bNotificationType = USB_CDC_SET_CRC_MODE; - req.wValue = cpu_to_le16(USB_CDC_NCM_CRC_NOT_APPENDED); - req.wIndex = cpu_to_le16(iface_no); - req.wLength = 0; - - err = cdc_ncm_do_request(ctx, &req, NULL, 0, NULL, 1000); - if (err) + err = usb_control_msg(ctx->udev, usb_sndctrlpipe(ctx->udev, 0), + USB_CDC_SET_CRC_MODE, + USB_TYPE_CLASS | USB_DIR_OUT + | USB_RECIP_INTERFACE, + USB_CDC_NCM_CRC_NOT_APPENDED, + iface_no, NULL, 0, 1000); + if (err < 0) pr_debug("Setting CRC mode off failed\n"); } /* set NTB format, if both formats are supported */ if (ntb_fmt_supported & USB_CDC_NCM_NTH32_SIGN) { - req.bmRequestType = USB_TYPE_CLASS | USB_DIR_OUT | - USB_RECIP_INTERFACE; - req.bNotificationType = USB_CDC_SET_NTB_FORMAT; - req.wValue = cpu_to_le16(USB_CDC_NCM_NTB16_FORMAT); - req.wIndex = cpu_to_le16(iface_no); - req.wLength = 0; - - err = cdc_ncm_do_request(ctx, &req, NULL, 0, NULL, 1000); - if (err) + err = usb_control_msg(ctx->udev, usb_sndctrlpipe(ctx->udev, 0), + USB_CDC_SET_NTB_FORMAT, USB_TYPE_CLASS + | USB_DIR_OUT | USB_RECIP_INTERFACE, + USB_CDC_NCM_NTB16_FORMAT, + iface_no, NULL, 0, 1000); + if (err < 0) pr_debug("Setting NTB format to 16-bit failed\n"); } @@ -364,17 +327,13 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) if (flags & USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE) { __le16 max_datagram_size; u16 eth_max_sz = le16_to_cpu(ctx->ether_desc->wMaxSegmentSize); - - req.bmRequestType = USB_TYPE_CLASS | USB_DIR_IN | - USB_RECIP_INTERFACE; - req.bNotificationType = USB_CDC_GET_MAX_DATAGRAM_SIZE; - req.wValue = 0; - req.wIndex = cpu_to_le16(iface_no); - req.wLength = cpu_to_le16(2); - - err = cdc_ncm_do_request(ctx, &req, &max_datagram_size, 0, NULL, - 1000); - if (err) { + err = usb_control_msg(ctx->udev, usb_rcvctrlpipe(ctx->udev, 0), + USB_CDC_GET_MAX_DATAGRAM_SIZE, + USB_TYPE_CLASS | USB_DIR_IN + | USB_RECIP_INTERFACE, + 0, iface_no, &max_datagram_size, + 2, 1000); + if (err < 0) { pr_debug("GET_MAX_DATAGRAM_SIZE failed, use size=%u\n", CDC_NCM_MIN_DATAGRAM_SIZE); } else { @@ -395,17 +354,15 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) CDC_NCM_MIN_DATAGRAM_SIZE; /* if value changed, update device */ - req.bmRequestType = USB_TYPE_CLASS | USB_DIR_OUT | - USB_RECIP_INTERFACE; - req.bNotificationType = USB_CDC_SET_MAX_DATAGRAM_SIZE; - req.wValue = 0; - req.wIndex = cpu_to_le16(iface_no); - req.wLength = 2; - max_datagram_size = cpu_to_le16(ctx->max_datagram_size); - - err = cdc_ncm_do_request(ctx, &req, &max_datagram_size, - 0, NULL, 1000); - if (err) + err = usb_control_msg(ctx->udev, + usb_sndctrlpipe(ctx->udev, 0), + USB_CDC_SET_MAX_DATAGRAM_SIZE, + USB_TYPE_CLASS | USB_DIR_OUT + | USB_RECIP_INTERFACE, + 0, + iface_no, &max_datagram_size, + 2, 1000); + if (err < 0) pr_debug("SET_MAX_DATAGRAM_SIZE failed\n"); } @@ -671,7 +628,7 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb) u32 rem; u32 offset; u32 last_offset; - u16 n = 0; + u16 n = 0, index; u8 ready2send = 0; /* if there is a remaining skb, it gets priority */ @@ -859,8 +816,8 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb) cpu_to_le16(sizeof(ctx->tx_ncm.nth16)); ctx->tx_ncm.nth16.wSequence = cpu_to_le16(ctx->tx_seq); ctx->tx_ncm.nth16.wBlockLength = cpu_to_le16(last_offset); - ctx->tx_ncm.nth16.wNdpIndex = ALIGN(sizeof(struct usb_cdc_ncm_nth16), - ctx->tx_ndp_modulus); + index = ALIGN(sizeof(struct usb_cdc_ncm_nth16), ctx->tx_ndp_modulus); + ctx->tx_ncm.nth16.wNdpIndex = cpu_to_le16(index); memcpy(skb_out->data, &(ctx->tx_ncm.nth16), sizeof(ctx->tx_ncm.nth16)); ctx->tx_seq++; @@ -873,12 +830,11 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb) ctx->tx_ncm.ndp16.wLength = cpu_to_le16(rem); ctx->tx_ncm.ndp16.wNextNdpIndex = 0; /* reserved */ - memcpy(((u8 *)skb_out->data) + ctx->tx_ncm.nth16.wNdpIndex, + memcpy(((u8 *)skb_out->data) + index, &(ctx->tx_ncm.ndp16), sizeof(ctx->tx_ncm.ndp16)); - memcpy(((u8 *)skb_out->data) + ctx->tx_ncm.nth16.wNdpIndex + - sizeof(ctx->tx_ncm.ndp16), + memcpy(((u8 *)skb_out->data) + index + sizeof(ctx->tx_ncm.ndp16), &(ctx->tx_ncm.dpe16), (ctx->tx_curr_frame_num + 1) * sizeof(struct usb_cdc_ncm_dpe16)); -- cgit v1.1 From d3e614577198757d5854caa912e88f2d4296479b Mon Sep 17 00:00:00 2001 From: Tord Andersson Date: Wed, 3 Aug 2011 22:11:47 +0000 Subject: macb: restore wrap bit when performing underrun cleanup When TX underrun occurs, a cleanup is performed that marks all buffers as used. As a side effect it also clears the wrap bit in the last buffer. This patch will restore the wrap bit. Signed-off-by: Tord Andersson Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/macb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/macb.c b/drivers/net/macb.c index 0fcdc25..dc4e305 100644 --- a/drivers/net/macb.c +++ b/drivers/net/macb.c @@ -322,6 +322,9 @@ static void macb_tx(struct macb *bp) for (i = 0; i < TX_RING_SIZE; i++) bp->tx_ring[i].ctrl = MACB_BIT(TX_USED); + /* Add wrap bit */ + bp->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP); + /* free transmit buffer in upper layer*/ for (tail = bp->tx_tail; tail != head; tail = NEXT_TX(tail)) { struct ring_info *rp = &bp->tx_skb[tail]; -- cgit v1.1 From a61b582954183e93a3dc3a5cf6bfd2e2c3b40aba Mon Sep 17 00:00:00 2001 From: Josip Rodin Date: Thu, 4 Aug 2011 02:47:40 -0700 Subject: sparc: Fix __atomic_add_unless() return value. Signed-off-by: David S. Miller --- arch/sparc/lib/atomic32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c index 8600eb2..1d32b54 100644 --- a/arch/sparc/lib/atomic32.c +++ b/arch/sparc/lib/atomic32.c @@ -65,7 +65,7 @@ int __atomic_add_unless(atomic_t *v, int a, int u) if (ret != u) v->counter += a; spin_unlock_irqrestore(ATOMIC_HASH(v), flags); - return ret != u; + return ret; } EXPORT_SYMBOL(__atomic_add_unless); -- cgit v1.1 From 9e191b22c91873c09c722d7f956ab9f3276f9a37 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 4 Aug 2011 10:47:40 +0100 Subject: dt: remove of_alias_get_id() reference of_alias_get_id() is broken and being reverted. Remove the reference to it and replace with a single incrementing id number. There is no risk of regression here on the imx driver since the imx change to use of_alias_get_id() is commit 22698aa2, "serial/imx: add device tree probe support" which is new for v3.1, and it won't get used unless CONFIG_OF is enabled and the board is booted using a device tree. A single incrementing integer is sufficient for now. Signed-off-by: Grant Likely Acked-by: Shawn Guo --- drivers/tty/serial/imx.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 827db76..7e91b3d 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1286,22 +1286,17 @@ static int serial_imx_resume(struct platform_device *dev) static int serial_imx_probe_dt(struct imx_port *sport, struct platform_device *pdev) { + static int portnum = 0; struct device_node *np = pdev->dev.of_node; const struct of_device_id *of_id = of_match_device(imx_uart_dt_ids, &pdev->dev); - int ret; if (!np) return -ENODEV; - ret = of_alias_get_id(np, "serial"); - if (ret < 0) { - pr_err("%s: failed to get alias id, errno %d\n", - __func__, ret); - return -ENODEV; - } else { - sport->port.line = ret; - } + sport->port.line = portnum++; + if (sport->port.line >= UART_NR) + return -EINVAL; if (of_get_property(np, "fsl,uart-has-rtscts", NULL)) sport->have_rtscts = 1; -- cgit v1.1 From fe55c1844a1c106e9d9d3dd27cbfcf8caeb9e77e Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 4 Aug 2011 10:27:32 +0100 Subject: Revert "dt: add of_alias_scan and of_alias_get_id" This reverts commit 750f463a749e28464151ad26938d11b07b1c43cb. of_alias_* still needs work to be generalized for 'promtree' dt platforms, and to no implicitly create entries for available ids. Signed-off-by: Grant Likely --- drivers/of/base.c | 130 ------------------------------------------------- drivers/of/fdt.c | 4 +- include/linux/of.h | 8 --- include/linux/of_fdt.h | 1 + 4 files changed, 2 insertions(+), 141 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index fb28b5a..3ff22e3 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -17,39 +17,14 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include #include #include #include #include #include -/** - * struct alias_prop - Alias property in 'aliases' node - * @link: List node to link the structure in aliases_lookup list - * @alias: Alias property name - * @np: Pointer to device_node that the alias stands for - * @id: Index value from end of alias name - * @stem: Alias string without the index - * - * The structure represents one alias property of 'aliases' node as - * an entry in aliases_lookup list. - */ -struct alias_prop { - struct list_head link; - const char *alias; - struct device_node *np; - int id; - char stem[0]; -}; - -static LIST_HEAD(aliases_lookup); - struct device_node *allnodes; struct device_node *of_chosen; -struct device_node *of_aliases; - -static DEFINE_MUTEX(of_aliases_mutex); /* use when traversing tree through the allnext, child, sibling, * or parent members of struct device_node. @@ -1013,108 +988,3 @@ out_unlock: } #endif /* defined(CONFIG_OF_DYNAMIC) */ -static void of_alias_add(struct alias_prop *ap, struct device_node *np, - int id, const char *stem, int stem_len) -{ - ap->id = id; - ap->np = np; - strncpy(ap->stem, stem, stem_len); - ap->stem[stem_len] = 0; - list_add_tail(&ap->link, &aliases_lookup); - pr_debug("adding DT alias:%s: stem=%s id=%i node=%s\n", - ap->alias, ap->stem, ap->id, np ? np->full_name : NULL); -} - -/** - * of_alias_scan() - Scan all properties of 'aliases' node - * - * The function scans all the properties of 'aliases' node and populate - * the global lookup table with the properties. It returns the - * number of alias_prop found, or error code in error case. - */ -__init void of_alias_scan(void) -{ - struct property *pp; - - if (!of_aliases) - return; - - for_each_property(pp, of_aliases->properties) { - const char *start = pp->name; - const char *end = start + strlen(start); - struct device_node *np; - struct alias_prop *ap; - int id, len; - - /* Skip those we do not want to proceed */ - if (!strcmp(pp->name, "name") || - !strcmp(pp->name, "phandle") || - !strcmp(pp->name, "linux,phandle")) - continue; - - np = of_find_node_by_path(pp->value); - if (!np) - continue; - - /* walk alias backwards to extract the id and 'stem' string */ - while (isdigit(*(end-1)) && end > start) - end--; - len = end - start; - id = strlen(end) ? simple_strtoul(end, NULL, 10) : -1; - - /* Allocate an alias_prop with enough space for the stem */ - ap = early_init_dt_alloc_memory_arch(sizeof(*ap) + len + 1, 4); - if (!ap) - continue; - ap->alias = start; - of_alias_add(ap, np, id, start, len); - } -} - -/** - * of_alias_get_id() - Get alias id for the given device_node - * @np: Pointer to the given device_node - * @stem: Alias stem of the given device_node - * - * The function travels the lookup table to get alias id for the given - * device_node and alias stem. It returns the alias id if find it. - * If not, dynamically creates one in the lookup table and returns it, - * or returns error code if fail to create. - */ -int of_alias_get_id(struct device_node *np, const char *stem) -{ - struct alias_prop *app; - int id = 0; - bool found = false; - - mutex_lock(&of_aliases_mutex); - list_for_each_entry(app, &aliases_lookup, link) { - if (strcmp(app->stem, stem) != 0) - continue; - - if (np == app->np) { - found = true; - id = app->id; - break; - } - - if (id <= app->id) - id = app->id + 1; - } - - /* If an id is not found, then allocate a new one */ - if (!found) { - app = kzalloc(sizeof(*app) + strlen(stem) + 1, 4); - if (!app) { - id = -ENODEV; - goto out; - } - of_alias_add(app, np, id, stem, strlen(stem)); - } - - out: - mutex_unlock(&of_aliases_mutex); - - return id; -} -EXPORT_SYMBOL_GPL(of_alias_get_id); diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 13d6d3a..65200af 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -707,12 +707,10 @@ void __init unflatten_device_tree(void) __unflatten_device_tree(initial_boot_params, &allnodes, early_init_dt_alloc_memory_arch); - /* Get pointer to "/chosen" and "/aliasas" nodes for use everywhere */ + /* Get pointer to OF "/chosen" node for use everywhere */ of_chosen = of_find_node_by_path("/chosen"); if (of_chosen == NULL) of_chosen = of_find_node_by_path("/chosen@0"); - of_aliases = of_find_node_by_path("/aliases"); - of_alias_scan(); } #endif /* CONFIG_OF_EARLY_FLATTREE */ diff --git a/include/linux/of.h b/include/linux/of.h index bc3dc63..0085bb0 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -68,7 +68,6 @@ struct device_node { /* Pointer for first entry in chain of all nodes. */ extern struct device_node *allnodes; extern struct device_node *of_chosen; -extern struct device_node *of_aliases; extern rwlock_t devtree_lock; static inline bool of_have_populated_dt(void) @@ -210,9 +209,6 @@ extern int of_device_is_available(const struct device_node *device); extern const void *of_get_property(const struct device_node *node, const char *name, int *lenp); -#define for_each_property(pp, properties) \ - for (pp = properties; pp != NULL; pp = pp->next) - extern int of_n_addr_cells(struct device_node *np); extern int of_n_size_cells(struct device_node *np); extern const struct of_device_id *of_match_node( @@ -225,10 +221,6 @@ extern int of_parse_phandles_with_args(struct device_node *np, const char *list_name, const char *cells_name, int index, struct device_node **out_node, const void **out_args); -extern void *early_init_dt_alloc_memory_arch(u64 size, u64 align); -extern void of_alias_scan(void); -extern int of_alias_get_id(struct device_node *np, const char *stem); - extern int of_machine_is_compatible(const char *compat); extern int prom_add_property(struct device_node* np, struct property* prop); diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index b74b74f..c84d900 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -97,6 +97,7 @@ extern void early_init_dt_check_for_initrd(unsigned long node); extern int early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data); extern void early_init_dt_add_memory_arch(u64 base, u64 size); +extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align); extern u64 dt_mem_next_cell(int s, __be32 **cellp); /* -- cgit v1.1 From 945a51517cc0bd9e461f2018624dfc1faef9ddee Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 20 Jul 2011 00:56:21 +0000 Subject: intel drivers: repair missing flush operations after review of all intel drivers, found several instances where drivers had the incorrect pattern of: memory mapped write(); delay(); which should always be: memory mapped write(); write flush(); /* aka memory mapped read */ delay(); explanation: The reason for including the flush is that writes can be held (posted) in PCI/PCIe bridges, but the read always has to complete synchronously and therefore has to flush all pending writes to a device. If a write is held and followed by a delay, the delay means nothing because the write may not have reached hardware (maybe even not until the next read) Signed-off-by: Jesse Brandeburg Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/e1000/e1000_ethtool.c | 6 ++++++ drivers/net/e1000/e1000_hw.c | 3 +++ drivers/net/e1000e/es2lan.c | 2 ++ drivers/net/e1000e/ethtool.c | 9 +++++++++ drivers/net/e1000e/ich8lan.c | 4 ++++ drivers/net/e1000e/lib.c | 1 + drivers/net/e1000e/phy.c | 2 ++ drivers/net/igb/e1000_nvm.c | 1 + drivers/net/igb/igb_ethtool.c | 5 +++++ drivers/net/igb/igb_main.c | 2 ++ drivers/net/igbvf/netdev.c | 2 ++ drivers/net/ixgb/ixgb_ee.c | 9 +++++++++ drivers/net/ixgb/ixgb_hw.c | 2 ++ drivers/net/ixgbe/ixgbe_common.c | 1 + drivers/net/ixgbe/ixgbe_ethtool.c | 5 +++++ drivers/net/ixgbe/ixgbe_main.c | 1 + drivers/net/ixgbe/ixgbe_phy.c | 3 +++ drivers/net/ixgbe/ixgbe_x540.c | 1 + 18 files changed, 59 insertions(+) diff --git a/drivers/net/e1000/e1000_ethtool.c b/drivers/net/e1000/e1000_ethtool.c index c5f0f04..5548d46 100644 --- a/drivers/net/e1000/e1000_ethtool.c +++ b/drivers/net/e1000/e1000_ethtool.c @@ -838,6 +838,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) /* Disable all the interrupts */ ew32(IMC, 0xFFFFFFFF); + E1000_WRITE_FLUSH(); msleep(10); /* Test each interrupt */ @@ -856,6 +857,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) adapter->test_icr = 0; ew32(IMC, mask); ew32(ICS, mask); + E1000_WRITE_FLUSH(); msleep(10); if (adapter->test_icr & mask) { @@ -873,6 +875,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) adapter->test_icr = 0; ew32(IMS, mask); ew32(ICS, mask); + E1000_WRITE_FLUSH(); msleep(10); if (!(adapter->test_icr & mask)) { @@ -890,6 +893,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) adapter->test_icr = 0; ew32(IMC, ~mask & 0x00007FFF); ew32(ICS, ~mask & 0x00007FFF); + E1000_WRITE_FLUSH(); msleep(10); if (adapter->test_icr) { @@ -901,6 +905,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) /* Disable all the interrupts */ ew32(IMC, 0xFFFFFFFF); + E1000_WRITE_FLUSH(); msleep(10); /* Unhook test interrupt handler */ @@ -1394,6 +1399,7 @@ static int e1000_run_loopback_test(struct e1000_adapter *adapter) if (unlikely(++k == txdr->count)) k = 0; } ew32(TDT, k); + E1000_WRITE_FLUSH(); msleep(200); time = jiffies; /* set the start time for the receive */ good_cnt = 0; diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c index 1698622..8545c7a 100644 --- a/drivers/net/e1000/e1000_hw.c +++ b/drivers/net/e1000/e1000_hw.c @@ -446,6 +446,7 @@ s32 e1000_reset_hw(struct e1000_hw *hw) /* Must reset the PHY before resetting the MAC */ if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) { ew32(CTRL, (ctrl | E1000_CTRL_PHY_RST)); + E1000_WRITE_FLUSH(); msleep(5); } @@ -3752,6 +3753,7 @@ static s32 e1000_acquire_eeprom(struct e1000_hw *hw) /* Clear SK and CS */ eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); ew32(EECD, eecd); + E1000_WRITE_FLUSH(); udelay(1); } @@ -3824,6 +3826,7 @@ static void e1000_release_eeprom(struct e1000_hw *hw) eecd &= ~E1000_EECD_SK; /* Lower SCK */ ew32(EECD, eecd); + E1000_WRITE_FLUSH(); udelay(hw->eeprom.delay_usec); } else if (hw->eeprom.type == e1000_eeprom_microwire) { diff --git a/drivers/net/e1000e/es2lan.c b/drivers/net/e1000e/es2lan.c index c0ecb2d..e4f4225 100644 --- a/drivers/net/e1000e/es2lan.c +++ b/drivers/net/e1000e/es2lan.c @@ -1313,6 +1313,7 @@ static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; ew32(KMRNCTRLSTA, kmrnctrlsta); + e1e_flush(); udelay(2); @@ -1347,6 +1348,7 @@ static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & E1000_KMRNCTRLSTA_OFFSET) | data; ew32(KMRNCTRLSTA, kmrnctrlsta); + e1e_flush(); udelay(2); diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c index cb1a362..72756e4 100644 --- a/drivers/net/e1000e/ethtool.c +++ b/drivers/net/e1000e/ethtool.c @@ -964,6 +964,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) /* Disable all the interrupts */ ew32(IMC, 0xFFFFFFFF); + e1e_flush(); usleep_range(10000, 20000); /* Test each interrupt */ @@ -996,6 +997,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) adapter->test_icr = 0; ew32(IMC, mask); ew32(ICS, mask); + e1e_flush(); usleep_range(10000, 20000); if (adapter->test_icr & mask) { @@ -1014,6 +1016,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) adapter->test_icr = 0; ew32(IMS, mask); ew32(ICS, mask); + e1e_flush(); usleep_range(10000, 20000); if (!(adapter->test_icr & mask)) { @@ -1032,6 +1035,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) adapter->test_icr = 0; ew32(IMC, ~mask & 0x00007FFF); ew32(ICS, ~mask & 0x00007FFF); + e1e_flush(); usleep_range(10000, 20000); if (adapter->test_icr) { @@ -1043,6 +1047,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) /* Disable all the interrupts */ ew32(IMC, 0xFFFFFFFF); + e1e_flush(); usleep_range(10000, 20000); /* Unhook test interrupt handler */ @@ -1276,6 +1281,7 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter) E1000_CTRL_FD); /* Force Duplex to FULL */ ew32(CTRL, ctrl_reg); + e1e_flush(); udelay(500); return 0; @@ -1418,6 +1424,7 @@ static int e1000_set_82571_fiber_loopback(struct e1000_adapter *adapter) */ #define E1000_SERDES_LB_ON 0x410 ew32(SCTL, E1000_SERDES_LB_ON); + e1e_flush(); usleep_range(10000, 20000); return 0; @@ -1513,6 +1520,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter) hw->phy.media_type == e1000_media_type_internal_serdes) { #define E1000_SERDES_LB_OFF 0x400 ew32(SCTL, E1000_SERDES_LB_OFF); + e1e_flush(); usleep_range(10000, 20000); break; } @@ -1592,6 +1600,7 @@ static int e1000_run_loopback_test(struct e1000_adapter *adapter) k = 0; } ew32(TDT, k); + e1e_flush(); msleep(200); time = jiffies; /* set the start time for the receive */ good_cnt = 0; diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c index c175212..f7a75c1 100644 --- a/drivers/net/e1000e/ich8lan.c +++ b/drivers/net/e1000e/ich8lan.c @@ -283,6 +283,7 @@ static void e1000_toggle_lanphypc_value_ich8lan(struct e1000_hw *hw) ctrl |= E1000_CTRL_LANPHYPC_OVERRIDE; ctrl &= ~E1000_CTRL_LANPHYPC_VALUE; ew32(CTRL, ctrl); + e1e_flush(); udelay(10); ctrl &= ~E1000_CTRL_LANPHYPC_OVERRIDE; ew32(CTRL, ctrl); @@ -1230,9 +1231,11 @@ s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable) ew32(CTRL, reg); ew32(CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_SPD_BYPS); + e1e_flush(); udelay(20); ew32(CTRL, ctrl_reg); ew32(CTRL_EXT, ctrl_ext); + e1e_flush(); udelay(20); out: @@ -3090,6 +3093,7 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) ret_val = e1000_acquire_swflag_ich8lan(hw); e_dbg("Issuing a global reset to ich8lan\n"); ew32(CTRL, (ctrl | E1000_CTRL_RST)); + /* cannot issue a flush here because it hangs the hardware */ msleep(20); if (!ret_val) diff --git a/drivers/net/e1000e/lib.c b/drivers/net/e1000e/lib.c index 65580b40..7898a67 100644 --- a/drivers/net/e1000e/lib.c +++ b/drivers/net/e1000e/lib.c @@ -1986,6 +1986,7 @@ static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw) /* Clear SK and CS */ eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); ew32(EECD, eecd); + e1e_flush(); udelay(1); /* diff --git a/drivers/net/e1000e/phy.c b/drivers/net/e1000e/phy.c index 2a6ee13..8666476 100644 --- a/drivers/net/e1000e/phy.c +++ b/drivers/net/e1000e/phy.c @@ -537,6 +537,7 @@ static s32 __e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data, kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; ew32(KMRNCTRLSTA, kmrnctrlsta); + e1e_flush(); udelay(2); @@ -609,6 +610,7 @@ static s32 __e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data, kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & E1000_KMRNCTRLSTA_OFFSET) | data; ew32(KMRNCTRLSTA, kmrnctrlsta); + e1e_flush(); udelay(2); diff --git a/drivers/net/igb/e1000_nvm.c b/drivers/net/igb/e1000_nvm.c index 7dcd65c..4040712 100644 --- a/drivers/net/igb/e1000_nvm.c +++ b/drivers/net/igb/e1000_nvm.c @@ -285,6 +285,7 @@ static s32 igb_ready_nvm_eeprom(struct e1000_hw *hw) /* Clear SK and CS */ eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); wr32(E1000_EECD, eecd); + wrfl(); udelay(1); timeout = NVM_MAX_RETRY_SPI; diff --git a/drivers/net/igb/igb_ethtool.c b/drivers/net/igb/igb_ethtool.c index ff244ce..414b022 100644 --- a/drivers/net/igb/igb_ethtool.c +++ b/drivers/net/igb/igb_ethtool.c @@ -1225,6 +1225,7 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data) /* Disable all the interrupts */ wr32(E1000_IMC, ~0); + wrfl(); msleep(10); /* Define all writable bits for ICS */ @@ -1268,6 +1269,7 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data) wr32(E1000_IMC, mask); wr32(E1000_ICS, mask); + wrfl(); msleep(10); if (adapter->test_icr & mask) { @@ -1289,6 +1291,7 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data) wr32(E1000_IMS, mask); wr32(E1000_ICS, mask); + wrfl(); msleep(10); if (!(adapter->test_icr & mask)) { @@ -1310,6 +1313,7 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data) wr32(E1000_IMC, ~mask); wr32(E1000_ICS, ~mask); + wrfl(); msleep(10); if (adapter->test_icr & mask) { @@ -1321,6 +1325,7 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data) /* Disable all the interrupts */ wr32(E1000_IMC, ~0); + wrfl(); msleep(10); /* Unhook test interrupt handler */ diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index dc59905..ae3937e 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -1052,6 +1052,7 @@ msi_only: kfree(adapter->vf_data); adapter->vf_data = NULL; wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); + wrfl(); msleep(100); dev_info(&adapter->pdev->dev, "IOV Disabled\n"); } @@ -2198,6 +2199,7 @@ static void __devexit igb_remove(struct pci_dev *pdev) kfree(adapter->vf_data); adapter->vf_data = NULL; wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); + wrfl(); msleep(100); dev_info(&pdev->dev, "IOV Disabled\n"); } diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c index 1330c8e..40ed066 100644 --- a/drivers/net/igbvf/netdev.c +++ b/drivers/net/igbvf/netdev.c @@ -1226,6 +1226,7 @@ static void igbvf_configure_tx(struct igbvf_adapter *adapter) /* disable transmits */ txdctl = er32(TXDCTL(0)); ew32(TXDCTL(0), txdctl & ~E1000_TXDCTL_QUEUE_ENABLE); + e1e_flush(); msleep(10); /* Setup the HW Tx Head and Tail descriptor pointers */ @@ -1306,6 +1307,7 @@ static void igbvf_configure_rx(struct igbvf_adapter *adapter) /* disable receives */ rxdctl = er32(RXDCTL(0)); ew32(RXDCTL(0), rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE); + e1e_flush(); msleep(10); rdlen = rx_ring->count * sizeof(union e1000_adv_rx_desc); diff --git a/drivers/net/ixgb/ixgb_ee.c b/drivers/net/ixgb/ixgb_ee.c index c982ab9..38b362b 100644 --- a/drivers/net/ixgb/ixgb_ee.c +++ b/drivers/net/ixgb/ixgb_ee.c @@ -57,6 +57,7 @@ ixgb_raise_clock(struct ixgb_hw *hw, */ *eecd_reg = *eecd_reg | IXGB_EECD_SK; IXGB_WRITE_REG(hw, EECD, *eecd_reg); + IXGB_WRITE_FLUSH(hw); udelay(50); } @@ -75,6 +76,7 @@ ixgb_lower_clock(struct ixgb_hw *hw, */ *eecd_reg = *eecd_reg & ~IXGB_EECD_SK; IXGB_WRITE_REG(hw, EECD, *eecd_reg); + IXGB_WRITE_FLUSH(hw); udelay(50); } @@ -112,6 +114,7 @@ ixgb_shift_out_bits(struct ixgb_hw *hw, eecd_reg |= IXGB_EECD_DI; IXGB_WRITE_REG(hw, EECD, eecd_reg); + IXGB_WRITE_FLUSH(hw); udelay(50); @@ -206,21 +209,25 @@ ixgb_standby_eeprom(struct ixgb_hw *hw) /* Deselect EEPROM */ eecd_reg &= ~(IXGB_EECD_CS | IXGB_EECD_SK); IXGB_WRITE_REG(hw, EECD, eecd_reg); + IXGB_WRITE_FLUSH(hw); udelay(50); /* Clock high */ eecd_reg |= IXGB_EECD_SK; IXGB_WRITE_REG(hw, EECD, eecd_reg); + IXGB_WRITE_FLUSH(hw); udelay(50); /* Select EEPROM */ eecd_reg |= IXGB_EECD_CS; IXGB_WRITE_REG(hw, EECD, eecd_reg); + IXGB_WRITE_FLUSH(hw); udelay(50); /* Clock low */ eecd_reg &= ~IXGB_EECD_SK; IXGB_WRITE_REG(hw, EECD, eecd_reg); + IXGB_WRITE_FLUSH(hw); udelay(50); } @@ -239,11 +246,13 @@ ixgb_clock_eeprom(struct ixgb_hw *hw) /* Rising edge of clock */ eecd_reg |= IXGB_EECD_SK; IXGB_WRITE_REG(hw, EECD, eecd_reg); + IXGB_WRITE_FLUSH(hw); udelay(50); /* Falling edge of clock */ eecd_reg &= ~IXGB_EECD_SK; IXGB_WRITE_REG(hw, EECD, eecd_reg); + IXGB_WRITE_FLUSH(hw); udelay(50); } diff --git a/drivers/net/ixgb/ixgb_hw.c b/drivers/net/ixgb/ixgb_hw.c index 6cb2e42..3d61a9e 100644 --- a/drivers/net/ixgb/ixgb_hw.c +++ b/drivers/net/ixgb/ixgb_hw.c @@ -149,6 +149,7 @@ ixgb_adapter_stop(struct ixgb_hw *hw) */ IXGB_WRITE_REG(hw, RCTL, IXGB_READ_REG(hw, RCTL) & ~IXGB_RCTL_RXEN); IXGB_WRITE_REG(hw, TCTL, IXGB_READ_REG(hw, TCTL) & ~IXGB_TCTL_TXEN); + IXGB_WRITE_FLUSH(hw); msleep(IXGB_DELAY_BEFORE_RESET); /* Issue a global reset to the MAC. This will reset the chip's @@ -1220,6 +1221,7 @@ ixgb_optics_reset_bcm(struct ixgb_hw *hw) ctrl &= ~IXGB_CTRL0_SDP2; ctrl |= IXGB_CTRL0_SDP3; IXGB_WRITE_REG(hw, CTRL0, ctrl); + IXGB_WRITE_FLUSH(hw); /* SerDes needs extra delay */ msleep(IXGB_SUN_PHY_RESET_DELAY); diff --git a/drivers/net/ixgbe/ixgbe_common.c b/drivers/net/ixgbe/ixgbe_common.c index 777051f..fc1375f 100644 --- a/drivers/net/ixgbe/ixgbe_common.c +++ b/drivers/net/ixgbe/ixgbe_common.c @@ -2632,6 +2632,7 @@ s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index) autoc_reg |= IXGBE_AUTOC_AN_RESTART; autoc_reg |= IXGBE_AUTOC_FLU; IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); + IXGBE_WRITE_FLUSH(hw); usleep_range(10000, 20000); } diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c index dc64955..82d4244 100644 --- a/drivers/net/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ixgbe/ixgbe_ethtool.c @@ -1378,6 +1378,7 @@ static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data) /* Disable all the interrupts */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF); + IXGBE_WRITE_FLUSH(&adapter->hw); usleep_range(10000, 20000); /* Test each interrupt */ @@ -1398,6 +1399,7 @@ static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data) ~mask & 0x00007FFF); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, ~mask & 0x00007FFF); + IXGBE_WRITE_FLUSH(&adapter->hw); usleep_range(10000, 20000); if (adapter->test_icr & mask) { @@ -1415,6 +1417,7 @@ static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data) adapter->test_icr = 0; IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask); + IXGBE_WRITE_FLUSH(&adapter->hw); usleep_range(10000, 20000); if (!(adapter->test_icr &mask)) { @@ -1435,6 +1438,7 @@ static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data) ~mask & 0x00007FFF); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, ~mask & 0x00007FFF); + IXGBE_WRITE_FLUSH(&adapter->hw); usleep_range(10000, 20000); if (adapter->test_icr) { @@ -1446,6 +1450,7 @@ static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data) /* Disable all the interrupts */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF); + IXGBE_WRITE_FLUSH(&adapter->hw); usleep_range(10000, 20000); /* Unhook test interrupt handler */ diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 1be6175..26b132b 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -184,6 +184,7 @@ static inline void ixgbe_disable_sriov(struct ixgbe_adapter *adapter) vmdctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL); vmdctl &= ~IXGBE_VT_CTL_POOL_MASK; IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl); + IXGBE_WRITE_FLUSH(hw); /* take a breather then clean up driver data */ msleep(100); diff --git a/drivers/net/ixgbe/ixgbe_phy.c b/drivers/net/ixgbe/ixgbe_phy.c index 735f686..f7ca351 100644 --- a/drivers/net/ixgbe/ixgbe_phy.c +++ b/drivers/net/ixgbe/ixgbe_phy.c @@ -1585,6 +1585,7 @@ static s32 ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl) *i2cctl |= IXGBE_I2C_CLK_OUT; IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl); + IXGBE_WRITE_FLUSH(hw); /* SCL rise time (1000ns) */ udelay(IXGBE_I2C_T_RISE); @@ -1605,6 +1606,7 @@ static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl) *i2cctl &= ~IXGBE_I2C_CLK_OUT; IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl); + IXGBE_WRITE_FLUSH(hw); /* SCL fall time (300ns) */ udelay(IXGBE_I2C_T_FALL); @@ -1628,6 +1630,7 @@ static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data) *i2cctl &= ~IXGBE_I2C_DATA_OUT; IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl); + IXGBE_WRITE_FLUSH(hw); /* Data rise/fall (1000ns/300ns) and set-up time (250ns) */ udelay(IXGBE_I2C_T_RISE + IXGBE_I2C_T_FALL + IXGBE_I2C_T_SU_DATA); diff --git a/drivers/net/ixgbe/ixgbe_x540.c b/drivers/net/ixgbe/ixgbe_x540.c index bec30ed..2696c78 100644 --- a/drivers/net/ixgbe/ixgbe_x540.c +++ b/drivers/net/ixgbe/ixgbe_x540.c @@ -162,6 +162,7 @@ mac_reset_top: ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD; IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); + IXGBE_WRITE_FLUSH(hw); msleep(50); -- cgit v1.1 From b9e06f70dc186f8353cc593f2b4609383b3be7a9 Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Fri, 22 Jul 2011 06:21:41 +0000 Subject: e1000e: remove unnecessary check for NULL pointer The array shadow_ram is never NULL. Signed-off-by: Bruce Allan Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/e1000e/ich8lan.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c index f7a75c1..4e36978 100644 --- a/drivers/net/e1000e/ich8lan.c +++ b/drivers/net/e1000e/ich8lan.c @@ -2137,8 +2137,7 @@ static s32 e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, ret_val = 0; for (i = 0; i < words; i++) { - if ((dev_spec->shadow_ram) && - (dev_spec->shadow_ram[offset+i].modified)) { + if (dev_spec->shadow_ram[offset+i].modified) { data[i] = dev_spec->shadow_ram[offset+i].value; } else { ret_val = e1000_read_flash_word_ich8lan(hw, -- cgit v1.1 From 9fb7a5f77b26dedfcfa4e3a36fe207f818662bee Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Fri, 29 Jul 2011 05:52:51 +0000 Subject: e1000e: minor re-order of #include files The recent commit a6b7a407 when back-ported to the out-of-tree e1000e driver caused a compilation error on older kernels which required a re-ordering of the #include files. This cosmetic patch syncs the two drivers for easier maintainability. Signed-off-by: Bruce Allan Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/e1000e/ethtool.c | 2 +- drivers/net/e1000e/netdev.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c index 72756e4..06d88f3 100644 --- a/drivers/net/e1000e/ethtool.c +++ b/drivers/net/e1000e/ethtool.c @@ -28,8 +28,8 @@ /* ethtool support for e1000 */ -#include #include +#include #include #include #include diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 4353ad5..ab4be80 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -31,12 +31,12 @@ #include #include #include -#include #include #include #include #include #include +#include #include #include #include -- cgit v1.1 From 6d337dce664b6872ddf1655f6b1fcab76ce35b08 Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Thu, 7 Jul 2011 00:24:56 +0000 Subject: igb: fix WOL on second port of i350 device This patch fixes a problem where WOL would fail on second port of i350 device. Reported-by: Martin Wilck Reported-by: Stefan Assmann Signed-off-by: Carolyn Wyborny Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index ae3937e..40d4c40 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -2023,7 +2023,7 @@ static int __devinit igb_probe(struct pci_dev *pdev, if (hw->bus.func == 0) hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); - else if (hw->mac.type == e1000_82580) + else if (hw->mac.type >= e1000_82580) hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, &eeprom_data); -- cgit v1.1 From 2a72c31ee4aa31b6a762390e4811a0edf5eefcef Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Wed, 20 Jul 2011 02:27:05 +0000 Subject: ixgbe: fix __ixgbe_notify_dca() bail out code The way __ixgbe_notify_dca() was currently set up it would not be possible to add a requester. Both cases of the IXGBE_FLAG_DCA_ENABLED bit being on and off would lead to the function exiting for a DCA_PROVIDER_ADD. Signed-off-by: Don Skidmore Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ixgbe/ixgbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 26b132b..e86297b 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -1006,7 +1006,7 @@ static int __ixgbe_notify_dca(struct device *dev, void *data) struct ixgbe_adapter *adapter = dev_get_drvdata(dev); unsigned long event = *(unsigned long *)data; - if (!(adapter->flags & IXGBE_FLAG_DCA_ENABLED)) + if (!(adapter->flags & IXGBE_FLAG_DCA_CAPABLE)) return 0; switch (event) { -- cgit v1.1 From b57e35bd0e545181c94405ce35b89000aed56cc5 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Thu, 28 Jul 2011 06:17:04 +0000 Subject: ixgbe: fix PHY link setup for 82599 Fix pointer to setup_link for 82599. This resolves some link issues when advertising modes unsupported by the link partner. Signed-off-by: Emil Tantilov Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ixgbe/ixgbe_82599.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ixgbe/ixgbe_82599.c b/drivers/net/ixgbe/ixgbe_82599.c index 3b3dd4d..34f30ec 100644 --- a/drivers/net/ixgbe/ixgbe_82599.c +++ b/drivers/net/ixgbe/ixgbe_82599.c @@ -213,6 +213,7 @@ static s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw) switch (hw->phy.type) { case ixgbe_phy_tn: phy->ops.check_link = &ixgbe_check_phy_link_tnx; + phy->ops.setup_link = &ixgbe_setup_phy_link_tnx; phy->ops.get_firmware_version = &ixgbe_get_phy_firmware_version_tnx; break; -- cgit v1.1 From b8709894f248b569d7a0f32cc5e84fa14f7585f0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 27 Jul 2011 04:17:25 +0000 Subject: drm/radeon/kms: fix version comment due to merge timing Compute cs support was actually added in 2.11.0 rather than 2.10.0, but the patch was written prior. Update comment to match. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 85f033f..e71d2ed 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -50,8 +50,8 @@ * 2.7.0 - fixups for r600 2D tiling support. (no external ABI change), add eg dyn gpr regs * 2.8.0 - pageflip support, r500 US_FORMAT regs. r500 ARGB2101010 colorbuf, r300->r500 CMASK, clock crystal query * 2.9.0 - r600 tiling (s3tc,rgtc) working, SET_PREDICATION packet 3 on r600 + eg, backend query - * 2.10.0 - fusion 2D tiling, initial compute support for the CS checker - * 2.11.0 - backend map + * 2.10.0 - fusion 2D tiling + * 2.11.0 - backend map, initial compute support for the CS checker */ #define KMS_DRIVER_MAJOR 2 #define KMS_DRIVER_MINOR 11 -- cgit v1.1 From 884988398f43378a170026184648ce8ff14dcc9f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 27 Jul 2011 09:53:40 +0000 Subject: drm/radeon: off by one in check_reg() functions This off by one range check was copy and pasted a couple places. It's not really harmful, but we should fix it anyway. Signed-off-by: Dan Carpenter Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/evergreen_cs.c | 2 +- drivers/gpu/drm/radeon/r600_cs.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 189e865..a134790 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -428,7 +428,7 @@ static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u3 last_reg = ARRAY_SIZE(evergreen_reg_safe_bm); i = (reg >> 7); - if (i > last_reg) { + if (i >= last_reg) { dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return -EINVAL; } diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index db8ef19..cf83aa0 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -915,12 +915,11 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx { struct r600_cs_track *track = (struct r600_cs_track *)p->track; struct radeon_cs_reloc *reloc; - u32 last_reg = ARRAY_SIZE(r600_reg_safe_bm); u32 m, i, tmp, *ib; int r; i = (reg >> 7); - if (i > last_reg) { + if (i >= ARRAY_SIZE(r600_reg_safe_bm)) { dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return -EINVAL; } -- cgit v1.1 From c41b9ee901bb2c7e3eacfa7e171de50c15d61c0b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 30 Jul 2011 18:12:24 +0000 Subject: drm/radeon/kms: add thermal chip quirk for asus 9600xt The board has an lm63 compatible thermal chip, but no thermal chip entry in the vbios tables. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=39513 Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_combios.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index a74217c..e0138b6 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -2557,6 +2557,7 @@ void radeon_combios_get_power_modes(struct radeon_device *rdev) u16 offset, misc, misc2 = 0; u8 rev, blocks, tmp; int state_index = 0; + struct radeon_i2c_bus_rec i2c_bus; rdev->pm.default_power_state_index = -1; @@ -2575,7 +2576,6 @@ void radeon_combios_get_power_modes(struct radeon_device *rdev) offset = combios_get_table_offset(dev, COMBIOS_OVERDRIVE_INFO_TABLE); if (offset) { u8 thermal_controller = 0, gpio = 0, i2c_addr = 0, clk_bit = 0, data_bit = 0; - struct radeon_i2c_bus_rec i2c_bus; rev = RBIOS8(offset); @@ -2617,6 +2617,25 @@ void radeon_combios_get_power_modes(struct radeon_device *rdev) i2c_new_device(&rdev->pm.i2c_bus->adapter, &info); } } + } else { + /* boards with a thermal chip, but no overdrive table */ + + /* Asus 9600xt has an f75375 on the monid bus */ + if ((dev->pdev->device == 0x4152) && + (dev->pdev->subsystem_vendor == 0x1043) && + (dev->pdev->subsystem_device == 0xc002)) { + i2c_bus = combios_setup_i2c_bus(rdev, DDC_MONID, 0, 0); + rdev->pm.i2c_bus = radeon_i2c_lookup(rdev, &i2c_bus); + if (rdev->pm.i2c_bus) { + struct i2c_board_info info = { }; + const char *name = "f75375"; + info.addr = 0x28; + strlcpy(info.type, name, sizeof(info.type)); + i2c_new_device(&rdev->pm.i2c_bus->adapter, &info); + DRM_INFO("Possible %s thermal controller at 0x%02x\n", + name, info.addr); + } + } } if (rdev->flags & RADEON_IS_MOBILITY) { -- cgit v1.1 From 4d5cb60d3f6c396899a515e0e667d0f855ed66cc Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Sat, 30 Jul 2011 14:26:01 +0000 Subject: drm/debugfs: Initialise empty variable [airlied: move char declaration] Signed-off-by: Emil Velikov Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_debugfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index 9d8c892..9d2668a 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -90,7 +90,6 @@ int drm_debugfs_create_files(struct drm_info_list *files, int count, struct drm_device *dev = minor->dev; struct dentry *ent; struct drm_info_node *tmp; - char name[64]; int i, ret; for (i = 0; i < count; i++) { @@ -108,6 +107,9 @@ int drm_debugfs_create_files(struct drm_info_list *files, int count, ent = debugfs_create_file(files[i].name, S_IFREG | S_IRUGO, root, tmp, &drm_debugfs_fops); if (!ent) { + char name[64]; + strncpy(name, root->d_name.name, + min(root->d_name.len, 64U)); DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s/%s\n", name, files[i].name); kfree(tmp); -- cgit v1.1 From 69ad2ffe57a81c7b700e337f066b8b39c3655910 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=C3=83=C6=92=C3=86=E2=80=99=C3=83?= =?UTF-8?q?=E2=80=9A=C3=82=C2=A1zquez=20Cao?= Date: Thu, 28 Jul 2011 05:44:04 +0000 Subject: drm/radeon: clean reg header files Reg header files are generated so they are not cleaned automagically. They need to be added to the clean-files list. Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 3896ef8..9f363e0 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -5,6 +5,7 @@ ccflags-y := -Iinclude/drm hostprogs-y := mkregtable +clean-files := rn50_reg_safe.h r100_reg_safe.h r200_reg_safe.h rv515_reg_safe.h r300_reg_safe.h r420_reg_safe.h rs600_reg_safe.h r600_reg_safe.h evergreen_reg_safe.h cayman_reg_safe.h quiet_cmd_mkregtable = MKREGTABLE $@ cmd_mkregtable = $(obj)/mkregtable $< > $@ -- cgit v1.1 From 816985d4f96cda5159b6b65f3442964725030ae9 Mon Sep 17 00:00:00 2001 From: Bojan Prtvar Date: Wed, 3 Aug 2011 19:51:19 +0000 Subject: drm/radeon: fix potential NULL dereference in drivers/gpu/drm/radeon/atom.c kzalloc() can return NULL, so I added check for it Signed-off-by: Bojan Prtvar Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/atom.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c index ebdb0fd..e88c644 100644 --- a/drivers/gpu/drm/radeon/atom.c +++ b/drivers/gpu/drm/radeon/atom.c @@ -1245,6 +1245,9 @@ struct atom_context *atom_parse(struct card_info *card, void *bios) char name[512]; int i; + if (!ctx) + return NULL; + ctx->card = card; ctx->bios = bios; -- cgit v1.1 From e1c44acc8cabda6b38864f25e809b306c4d2d790 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Thu, 4 Aug 2011 05:41:00 +0000 Subject: drm: Fix irq install error handling The registered irq should be unregistered by free_irq() if irq_postinstall() returns the error after request_irq() is called successfully. [airlied: add vga switcheroo disable] Signed-off-by: Joonyoung Shim Signed-off-by: Kyungmin Park Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_irq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 2022a5c..9be574f 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -368,6 +368,9 @@ int drm_irq_install(struct drm_device *dev) mutex_lock(&dev->struct_mutex); dev->irq_enabled = 0; mutex_unlock(&dev->struct_mutex); + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + vga_client_register(dev->pdev, NULL, NULL, NULL); + free_irq(drm_dev_to_irq(dev), dev); } return ret; -- cgit v1.1 From 5037f8acf448dd0de9868dc7410f45879d3d1a1b Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Thu, 4 Aug 2011 05:41:01 +0000 Subject: drm: Add NULL check about irq functions The struct drm_driver has some function pointers for irq. They are gpu specific and some functions aren't essential things. This can prevents creation of unnecessary dummy function for irq. Signed-off-by: Joonyoung Shim Signed-off-by: Kyungmin Park Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_irq.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 9be574f..3830e9e 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -291,11 +291,14 @@ static void drm_irq_vgaarb_nokms(void *cookie, bool state) if (!dev->irq_enabled) return; - if (state) - dev->driver->irq_uninstall(dev); - else { - dev->driver->irq_preinstall(dev); - dev->driver->irq_postinstall(dev); + if (state) { + if (dev->driver->irq_uninstall) + dev->driver->irq_uninstall(dev); + } else { + if (dev->driver->irq_preinstall) + dev->driver->irq_preinstall(dev); + if (dev->driver->irq_postinstall) + dev->driver->irq_postinstall(dev); } } @@ -338,7 +341,8 @@ int drm_irq_install(struct drm_device *dev) DRM_DEBUG("irq=%d\n", drm_dev_to_irq(dev)); /* Before installing handler */ - dev->driver->irq_preinstall(dev); + if (dev->driver->irq_preinstall) + dev->driver->irq_preinstall(dev); /* Install handler */ if (drm_core_check_feature(dev, DRIVER_IRQ_SHARED)) @@ -363,7 +367,9 @@ int drm_irq_install(struct drm_device *dev) vga_client_register(dev->pdev, (void *)dev, drm_irq_vgaarb_nokms, NULL); /* After installing handler */ - ret = dev->driver->irq_postinstall(dev); + if (dev->driver->irq_postinstall) + ret = dev->driver->irq_postinstall(dev); + if (ret < 0) { mutex_lock(&dev->struct_mutex); dev->irq_enabled = 0; @@ -416,7 +422,8 @@ int drm_irq_uninstall(struct drm_device *dev) if (!drm_core_check_feature(dev, DRIVER_MODESET)) vga_client_register(dev->pdev, NULL, NULL, NULL); - dev->driver->irq_uninstall(dev); + if (dev->driver->irq_uninstall) + dev->driver->irq_uninstall(dev); free_irq(drm_dev_to_irq(dev), dev); -- cgit v1.1 From 051963d4832ed61e5ae74f5330b0a94489e101b9 Mon Sep 17 00:00:00 2001 From: Thomas Reim Date: Fri, 29 Jul 2011 14:28:57 +0000 Subject: drm: Separate EDID Header Check from EDID Block Check Provides function drm_edid_header_is_valid() for EDID header check and replaces EDID header check part of function drm_edid_block_valid() by a call of drm_edid_header_is_valid(). This is a prerequisite to extend DDC probing, e. g. in function radeon_ddc_probe() for Radeon devices, by a central EDID header check. Tested for kernel 2.6.35, 2.6.38 and 3.0 Cc: Signed-off-by: Thomas Reim Reviewed-by: Alex Deucher Acked-by: Stephen Michaels Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_edid.c | 24 ++++++++++++++++++------ include/drm/drm_crtc.h | 1 + 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 7496d24..7425e5c 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -127,6 +127,23 @@ static const u8 edid_header[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 }; + /* + * Sanity check the header of the base EDID block. Return 8 if the header + * is perfect, down to 0 if it's totally wrong. + */ +int drm_edid_header_is_valid(const u8 *raw_edid) +{ + int i, score = 0; + + for (i = 0; i < sizeof(edid_header); i++) + if (raw_edid[i] == edid_header[i]) + score++; + + return score; +} +EXPORT_SYMBOL(drm_edid_header_is_valid); + + /* * Sanity check the EDID block (base or extension). Return 0 if the block * doesn't check out, or 1 if it's valid. @@ -139,12 +156,7 @@ drm_edid_block_valid(u8 *raw_edid) struct edid *edid = (struct edid *)raw_edid; if (raw_edid[0] == 0x00) { - int score = 0; - - for (i = 0; i < sizeof(edid_header); i++) - if (raw_edid[i] == edid_header[i]) - score++; - + int score = drm_edid_header_is_valid(raw_edid); if (score == 8) ; else if (score >= 6) { DRM_DEBUG("Fixing EDID header, your hardware may be failing\n"); diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index d515bc8..44335e5 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -804,6 +804,7 @@ extern struct drm_display_mode *drm_gtf_mode_complex(struct drm_device *dev, extern int drm_add_modes_noedid(struct drm_connector *connector, int hdisplay, int vdisplay); +extern int drm_edid_header_is_valid(const u8 *raw_edid); extern bool drm_edid_is_valid(struct edid *edid); struct drm_display_mode *drm_mode_find_dmt(struct drm_device *dev, int hsize, int vsize, int fresh); -- cgit v1.1 From e384fab8c6f3ca88600bcb2ebdf0eb2f90864fab Mon Sep 17 00:00:00 2001 From: Thomas Reim Date: Fri, 29 Jul 2011 14:28:58 +0000 Subject: drm/radeon: Extended DDC Probing for Connectors with Improperly Wired DDC Lines (here: Asus M2A-VM HDMI) Some integrated ATI Radeon chipset implementations with add-on HDMI card (e. g. Asus M2A-VM HDMI) indicate the availability of a DDC even when the add-on card is not plugged in or HDMI is disabled in BIOS setup. In this case, drm_get_edid() and drm_edid_block_valid() periodically dump data and kernel errors into system log files and onto terminals. For these connectors DDC probing is extended by a check for a correct EDID header. Only in case a valid EDID header is also found, the (HDMI or DVI) connector will be used by the Radeon driver. This prevents the kernel driver from useless flooding of logs and terminal sessions with EDID dumps and error messages. This patch adds a flag 'requires_extended_probe' to the radeon_connector structure. In function radeon_connector_needs_extended_probe() this flag can be set on a chipset family/vendor/connector type specific basis. In addition, function radeon_ddc_probe() has been adapted to perform extended DDC probing if required by the connector's flag. Requires function drm_edid_header_is_valid() in DRM module provided by [PATCH] drm: Separate EDID Header Check from EDID Block Check. Tested for kernel 2.6.35, 2.6.38 and 3.0 on Asus M2A-VM HDMI board BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=668196 BugLink: http://bugs.launchpad.net/bugs/7228066 Cc: Signed-off-by: Thomas Reim Reviewed-by: Alex Deucher Acked-by: Stephen Michaels Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_connectors.c | 45 ++++++++++++++++++++++++++++-- drivers/gpu/drm/radeon/radeon_display.c | 9 ++++++ drivers/gpu/drm/radeon/radeon_i2c.c | 32 +++++++++++++++------ drivers/gpu/drm/radeon/radeon_mode.h | 6 +++- 4 files changed, 80 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 9792d4f..6a0f1be 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -430,6 +430,36 @@ int radeon_connector_set_property(struct drm_connector *connector, struct drm_pr return 0; } +/* + * Some integrated ATI Radeon chipset implementations (e. g. + * Asus M2A-VM HDMI) may indicate the availability of a DDC, + * even when there's no monitor connected. For these connectors + * following DDC probe extension will be applied: check also for the + * availability of EDID with at least a correct EDID header. Only then, + * DDC is assumed to be available. This prevents drm_get_edid() and + * drm_edid_block_valid() from periodically dumping data and kernel + * errors into the logs and onto the terminal. + */ +static bool radeon_connector_needs_extended_probe(struct radeon_device *dev, + uint32_t supported_device, + int connector_type) +{ + /* Asus M2A-VM HDMI board sends data to i2c bus even, + * if HDMI add-on card is not plugged in or HDMI is disabled in + * BIOS. Valid DDC can only be assumed, if also a valid EDID header + * can be retrieved via i2c bus during DDC probe */ + if ((dev->pdev->device == 0x791e) && + (dev->pdev->subsystem_vendor == 0x1043) && + (dev->pdev->subsystem_device == 0x826d)) { + if ((connector_type == DRM_MODE_CONNECTOR_HDMIA) && + (supported_device == ATOM_DEVICE_DFP2_SUPPORT)) + return true; + } + + /* Default: no EDID header probe required for DDC probing */ + return false; +} + static void radeon_fixup_lvds_native_mode(struct drm_encoder *encoder, struct drm_connector *connector) { @@ -661,7 +691,8 @@ radeon_vga_detect(struct drm_connector *connector, bool force) ret = connector_status_disconnected; if (radeon_connector->ddc_bus) - dret = radeon_ddc_probe(radeon_connector); + dret = radeon_ddc_probe(radeon_connector, + radeon_connector->requires_extended_probe); if (dret) { if (radeon_connector->edid) { kfree(radeon_connector->edid); @@ -833,7 +864,8 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) bool dret = false; if (radeon_connector->ddc_bus) - dret = radeon_ddc_probe(radeon_connector); + dret = radeon_ddc_probe(radeon_connector, + radeon_connector->requires_extended_probe); if (dret) { if (radeon_connector->edid) { kfree(radeon_connector->edid); @@ -1251,7 +1283,8 @@ radeon_dp_detect(struct drm_connector *connector, bool force) if (radeon_dp_getdpcd(radeon_connector)) ret = connector_status_connected; } else { - if (radeon_ddc_probe(radeon_connector)) + if (radeon_ddc_probe(radeon_connector, + radeon_connector->requires_extended_probe)) ret = connector_status_connected; } } @@ -1406,6 +1439,9 @@ radeon_add_atom_connector(struct drm_device *dev, radeon_connector->shared_ddc = shared_ddc; radeon_connector->connector_object_id = connector_object_id; radeon_connector->hpd = *hpd; + radeon_connector->requires_extended_probe = + radeon_connector_needs_extended_probe(rdev, supported_device, + connector_type); radeon_connector->router = *router; if (router->ddc_valid || router->cd_valid) { radeon_connector->router_bus = radeon_i2c_lookup(rdev, &router->i2c_info); @@ -1752,6 +1788,9 @@ radeon_add_legacy_connector(struct drm_device *dev, radeon_connector->devices = supported_device; radeon_connector->connector_object_id = connector_object_id; radeon_connector->hpd = *hpd; + radeon_connector->requires_extended_probe = + radeon_connector_needs_extended_probe(rdev, supported_device, + connector_type); switch (connector_type) { case DRM_MODE_CONNECTOR_VGA: drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type); diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 28f4655..1a85894 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -751,8 +751,17 @@ static int radeon_ddc_dump(struct drm_connector *connector) if (!radeon_connector->ddc_bus) return -1; edid = drm_get_edid(connector, &radeon_connector->ddc_bus->adapter); + /* Log EDID retrieval status here. In particular with regard to + * connectors with requires_extended_probe flag set, that will prevent + * function radeon_dvi_detect() to fetch EDID on this connector, + * as long as there is no valid EDID header found */ if (edid) { + DRM_INFO("Radeon display connector %s: Found valid EDID", + drm_get_connector_name(connector)); kfree(edid); + } else { + DRM_INFO("Radeon display connector %s: No monitor connected or invalid EDID", + drm_get_connector_name(connector)); } return ret; } diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c index 781196d..6c111c1 100644 --- a/drivers/gpu/drm/radeon/radeon_i2c.c +++ b/drivers/gpu/drm/radeon/radeon_i2c.c @@ -32,17 +32,17 @@ * radeon_ddc_probe * */ -bool radeon_ddc_probe(struct radeon_connector *radeon_connector) +bool radeon_ddc_probe(struct radeon_connector *radeon_connector, bool requires_extended_probe) { - u8 out_buf[] = { 0x0, 0x0}; - u8 buf[2]; + u8 out = 0x0; + u8 buf[8]; int ret; struct i2c_msg msgs[] = { { .addr = 0x50, .flags = 0, .len = 1, - .buf = out_buf, + .buf = &out, }, { .addr = 0x50, @@ -52,15 +52,31 @@ bool radeon_ddc_probe(struct radeon_connector *radeon_connector) } }; + /* Read 8 bytes from i2c for extended probe of EDID header */ + if (requires_extended_probe) + msgs[1].len = 8; + /* on hw with routers, select right port */ if (radeon_connector->router.ddc_valid) radeon_router_select_ddc_port(radeon_connector); ret = i2c_transfer(&radeon_connector->ddc_bus->adapter, msgs, 2); - if (ret == 2) - return true; - - return false; + if (ret != 2) + /* Couldn't find an accessible DDC on this connector */ + return false; + if (requires_extended_probe) { + /* Probe also for valid EDID header + * EDID header starts with: + * 0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00. + * Only the first 6 bytes must be valid as + * drm_edid_block_valid() can fix the last 2 bytes */ + if (drm_edid_header_is_valid(buf) < 6) { + /* Couldn't find an accessible EDID on this + * connector */ + return false; + } + } + return true; } /* bit banging i2c */ diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 6df4e3c..d09031c 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -438,6 +438,9 @@ struct radeon_connector { struct radeon_i2c_chan *ddc_bus; /* some systems have an hdmi and vga port with a shared ddc line */ bool shared_ddc; + /* for some Radeon chip families we apply an additional EDID header + check as part of the DDC probe */ + bool requires_extended_probe; bool use_digital; /* we need to mind the EDID between detect and get modes due to analog/digital/tvencoder */ @@ -514,7 +517,8 @@ extern void radeon_i2c_put_byte(struct radeon_i2c_chan *i2c, u8 val); extern void radeon_router_select_ddc_port(struct radeon_connector *radeon_connector); extern void radeon_router_select_cd_port(struct radeon_connector *radeon_connector); -extern bool radeon_ddc_probe(struct radeon_connector *radeon_connector); +extern bool radeon_ddc_probe(struct radeon_connector *radeon_connector, + bool requires_extended_probe); extern int radeon_ddc_get_modes(struct radeon_connector *radeon_connector); extern struct drm_encoder *radeon_best_encoder(struct drm_connector *connector); -- cgit v1.1 From d522d9cc5bdd41214084383fc3e6d882f6916a78 Mon Sep 17 00:00:00 2001 From: Thomas Reim Date: Fri, 29 Jul 2011 14:28:59 +0000 Subject: drm/radeon: Log Subsystem Vendor and Device Information Log PCI subsystem vendor and subsystem device ID in addition to PCI vendor and device ID during kernel mode initialisation. This helps to better identify radeon devices of third-party vendors, e. g. for bug analysis. Tested for kernel 2.6.35, 2.6.38 and 3.0 on Asus M2A-VM HDMI board Cc: Signed-off-by: Thomas Reim Reviewed-by: Alex Deucher Acked-by: Stephen Michaels Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 7cfaa7e..440e6ec 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -704,8 +704,9 @@ int radeon_device_init(struct radeon_device *rdev, rdev->gpu_lockup = false; rdev->accel_working = false; - DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X).\n", - radeon_family_name[rdev->family], pdev->vendor, pdev->device); + DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n", + radeon_family_name[rdev->family], pdev->vendor, pdev->device, + pdev->subsystem_vendor, pdev->subsystem_device); /* mutex initialization are all done here so we * can recall function without having locking issues */ -- cgit v1.1 From a81b31e9fc98e067b7e7f1244861c97e44268e2d Mon Sep 17 00:00:00 2001 From: Thomas Reim Date: Fri, 29 Jul 2011 14:29:00 +0000 Subject: drm/radeon: Extended DDC Probing for ECS A740GM-M DVI-D Connector ECS A740GM-M with ATI RADEON 2100 sends data to i2c bus for a DVI connector that is not implemented/existent on the board. Fix by applying extented DDC probing for this connector. Requires [PATCH] drm/radeon: Extended DDC Probing for Connectors with Improperly Wired DDC Lines Tested for kernel 2.6.38 on Asus ECS A740GM-M board BugLink: http://bugs.launchpad.net/bugs/810926 Cc: Signed-off-by: Thomas Reim Reviewed-by: Alex Deucher Acked-by: Stephen Michaels Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_connectors.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 6a0f1be..6d6b5f1 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -455,6 +455,15 @@ static bool radeon_connector_needs_extended_probe(struct radeon_device *dev, (supported_device == ATOM_DEVICE_DFP2_SUPPORT)) return true; } + /* ECS A740GM-M with ATI RADEON 2100 sends data to i2c bus + * for a DVI connector that is not implemented */ + if ((dev->pdev->device == 0x796e) && + (dev->pdev->subsystem_vendor == 0x1019) && + (dev->pdev->subsystem_device == 0x2615)) { + if ((connector_type == DRM_MODE_CONNECTOR_DVID) && + (supported_device == ATOM_DEVICE_DFP2_SUPPORT)) + return true; + } /* Default: no EDID header probe required for DDC probing */ return false; -- cgit v1.1 From 82de9a0cc34bc1640c4f133f13d62a765596b2b9 Mon Sep 17 00:00:00 2001 From: Arnaud Lacombe Date: Thu, 4 Aug 2011 10:39:44 -0400 Subject: eisa/pci_eisa.c: fix BUG introduced by 005bdad7b80 While `pci_eisa_driver' still refer `pci_eisa_init', the .probe() function should not be called after init memory release, as pointed out by commit 74b9a297. The structure is still referenced in the drivers subsystem, and can be accesseed through sysfs, so the modpost warning is a false positive. Mark it as such. In the same time, the warning referenced in 005bdad7b80 did only mention `pci_eisa_driver', not `pci_eisa_pci_tbl', so remove its marking. Broken-by: Arnaud Lacombe (in 005bdad7b80) Reported-by: Tetsuo Handa Signed-off-by: Arnaud Lacombe Signed-off-by: Linus Torvalds --- drivers/eisa/pci_eisa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/eisa/pci_eisa.c b/drivers/eisa/pci_eisa.c index 30da70d..cdae207 100644 --- a/drivers/eisa/pci_eisa.c +++ b/drivers/eisa/pci_eisa.c @@ -45,13 +45,13 @@ static int __init pci_eisa_init(struct pci_dev *pdev, return 0; } -static struct pci_device_id __initdata pci_eisa_pci_tbl[] = { +static struct pci_device_id pci_eisa_pci_tbl[] = { { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_BRIDGE_EISA << 8, 0xffff00, 0 }, { 0, } }; -static struct pci_driver __initdata pci_eisa_driver = { +static struct pci_driver __refdata pci_eisa_driver = { .name = "pci_eisa", .id_table = pci_eisa_pci_tbl, .probe = pci_eisa_init, -- cgit v1.1 From 1e9ea2656b656edd3c8de98675bbc0340211b5bd Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 3 Aug 2011 09:43:44 -0700 Subject: xen/tracing: it looks like we wanted CONFIG_FTRACE Apparently we wanted CONFIG_FTRACE rather the CONFIG_FUNCTION_TRACER. Reported-by: Sander Eikelenboom Tested-by: Sander Eikelenboom Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 45e94ac..3326204 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -15,7 +15,7 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ grant-table.o suspend.o platform-pci-unplug.o \ p2m.o -obj-$(CONFIG_FUNCTION_TRACER) += trace.o +obj-$(CONFIG_FTRACE) += trace.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o -- cgit v1.1 From 8f3c5883d840d079a5d2db20893b5ac8ba453b40 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 2 Aug 2011 11:45:24 +0200 Subject: xen: Fix printk() format in xen/setup.c Use correct format specifier for unsigned long. Signed-off-by: Igor Mammedov Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 60aeeb5..e405632 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -113,7 +113,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr, len++; } } - printk(KERN_CONT "%ld pages freed\n", len); + printk(KERN_CONT "%lu pages freed\n", len); return len; } @@ -139,7 +139,7 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, if (last_end < max_addr) released += xen_release_chunk(last_end, max_addr); - printk(KERN_INFO "released %ld pages of unused memory\n", released); + printk(KERN_INFO "released %lu pages of unused memory\n", released); return released; } -- cgit v1.1 From 98f531da8479eec3d828adc7f0865baaab99a543 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 2 Aug 2011 11:45:25 +0200 Subject: xen: Fix misleading WARN message at xen_release_chunk WARN message should not complain "Failed to release memory %lx-%lx err=%d\n" ^^^^^^^ about range when it fails to release just one page, instead it should say what pfn is not freed. In addition line: printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: " ... printk(KERN_CONT "%lu pages freed\n", len); will be broken if WARN in between this line is fired. So fix it by using a single printk for this. Signed-off-by: Igor Mammedov Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/setup.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index e405632..02ffd9e 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -92,8 +92,6 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr, if (end <= start) return 0; - printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ", - start, end); for(pfn = start; pfn < end; pfn++) { unsigned long mfn = pfn_to_mfn(pfn); @@ -106,14 +104,14 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr, ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); - WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", - start, end, ret); + WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); if (ret == 1) { __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); len++; } } - printk(KERN_CONT "%lu pages freed\n", len); + printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n", + start, end, len); return len; } -- cgit v1.1 From 655b16128482fd12808f77a6799eea5419c93709 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 29 May 2011 10:57:47 +0300 Subject: nfs_xdr: Move nfs4_string definition out of #ifdef CONFIG_NFS_V4 exofs file system wants to use pnfs_osd_xdr.h file instead of redefining pnfs-objects types in it's private "pnfs.h" headr. Before we do the switch we must make sure pnfs_osd_xdr.h is compilable also under NFS versions smaller than 4.1. Since now it is needed regardless of version, by the exofs code. nfs4_string is not the only nfs4 type out in the global scope. Ack-by: Trond Myklebust Signed-off-by: Boaz Harrosh --- include/linux/nfs_xdr.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 00848d8..d9f5e8d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -759,6 +759,11 @@ struct nfs3_getaclres { struct posix_acl * acl_default; }; +struct nfs4_string { + unsigned int len; + char *data; +}; + #ifdef CONFIG_NFS_V4 typedef u64 clientid4; @@ -949,11 +954,6 @@ struct nfs4_server_caps_res { struct nfs4_sequence_res seq_res; }; -struct nfs4_string { - unsigned int len; - char *data; -}; - #define NFS4_PATHNAME_MAXCOMPONENTS 512 struct nfs4_pathname { unsigned int ncomponents; -- cgit v1.1 From 26ae93c2dc7152463d319c28768f242a11a54620 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Tue, 2 Feb 2010 15:56:53 +0200 Subject: exofs: Remove pnfs-osd private definitions Now that pnfs-osd has hit mainline we can remove exofs's private header. (And the FIXME comment) Signed-off-by: Boaz Harrosh --- fs/exofs/exofs.h | 6 +----- fs/exofs/pnfs.h | 45 --------------------------------------------- 2 files changed, 1 insertion(+), 50 deletions(-) delete mode 100644 fs/exofs/pnfs.h diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index c965806..e103dbd 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -36,13 +36,9 @@ #include #include #include +#include #include "common.h" -/* FIXME: Remove once pnfs hits mainline - * #include - */ -#include "pnfs.h" - #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) #ifdef CONFIG_EXOFS_DEBUG diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h deleted file mode 100644 index c52e988..0000000 --- a/fs/exofs/pnfs.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (C) 2008, 2009 - * Boaz Harrosh - * - * This file is part of exofs. - * - * exofs is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License version 2 as published by the Free - * Software Foundation. - * - */ - -/* FIXME: Remove this file once pnfs hits mainline */ - -#ifndef __EXOFS_PNFS_H__ -#define __EXOFS_PNFS_H__ - -#if ! defined(__PNFS_OSD_XDR_H__) - -enum pnfs_iomode { - IOMODE_READ = 1, - IOMODE_RW = 2, - IOMODE_ANY = 3, -}; - -/* Layout Structure */ -enum pnfs_osd_raid_algorithm4 { - PNFS_OSD_RAID_0 = 1, - PNFS_OSD_RAID_4 = 2, - PNFS_OSD_RAID_5 = 3, - PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */ -}; - -struct pnfs_osd_data_map { - u32 odm_num_comps; - u64 odm_stripe_unit; - u32 odm_group_width; - u32 odm_group_depth; - u32 odm_mirror_cnt; - u32 odm_raid_algorithm; -}; - -#endif /* ! defined(__PNFS_OSD_XDR_H__) */ - -#endif /* __EXOFS_PNFS_H__ */ -- cgit v1.1 From 6d4073e88132259485ef1b2c88daa5e50c95789c Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 27 Jul 2011 17:51:53 -0700 Subject: exofs: BUG: Avoid sbi realloc Since the beginning we realloced the sbi structure when a bigger then one device table was specified. (I know that was really stupid). Then much later when "register bdi" was added (By Jens) it was registering the pointer to sbi->bdi before the realloc. We never saw this problem because up till now the realloc did not do anything since the device table was small enough to fit in the original allocation. But once we starting testing with large device tables (Bigger then 28) we noticed the crash of writeback operating on a deallocated pointer. * Avoid the all mess by allocating the device-table as a second array and get rid of the variable-sized structure and the rest of this mess. * Take the chance to clean near by structures and comments. * Add a needed dprint on startup to indicate the loaded layout. * Also move the bdi registration to the very end because it will only fail in a low memory, which will probably fail before hand. There are many more likely causes to not load before that. This way the error handling is made simpler. (Just doing this would be enough to fix the BUG) Signed-off-by: Boaz Harrosh --- fs/exofs/exofs.h | 9 +++------ fs/exofs/super.c | 45 +++++++++++++++++++++++++++------------------ 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index e103dbd..9f62349 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -66,13 +66,14 @@ struct exofs_layout { enum exofs_inode_layout_gen_functions lay_func; unsigned s_numdevs; /* Num of devices in array */ - struct osd_dev *s_ods[0]; /* Variable length */ + struct osd_dev **s_ods; /* osd_dev array */ }; /* * our extension to the in-memory superblock */ struct exofs_sb_info { + struct backing_dev_info bdi; /* register our bdi with VFS */ struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/ int s_timeout; /* timeout for OSD operations */ uint64_t s_nextid; /* highest object ID used */ @@ -81,15 +82,11 @@ struct exofs_sb_info { u32 s_next_generation; /* next gen # to use */ atomic_t s_curr_pending; /* number of pending commands */ uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ - struct backing_dev_info bdi; /* register our bdi with VFS */ struct pnfs_osd_data_map data_map; /* Default raid to use * FIXME: Needed ? */ -/* struct exofs_layout dir_layout;*/ /* Default dir layout */ - struct exofs_layout layout; /* Default files layout, - * contains the variable osd_dev - * array. Keep last */ + struct exofs_layout layout; /* Default files layout */ struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */ }; diff --git a/fs/exofs/super.c b/fs/exofs/super.c index c57bedd..a747c87 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -393,6 +393,8 @@ void exofs_free_sbi(struct exofs_sb_info *sbi) osduld_put_device(od); } } + if (sbi->layout.s_ods != sbi->_min_one_dev) + kfree(sbi->layout.s_ods); kfree(sbi); } @@ -501,6 +503,15 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, return -EINVAL; } + EXOFS_DBGMSG("exofs: layout: " + "num_comps=%u stripe_unit=0x%x group_width=%u " + "group_depth=0x%llx mirrors_p1=%u raid_algorithm=%u\n", + numdevs, + sbi->layout.stripe_unit, + sbi->layout.group_width, + _LLU(sbi->layout.group_depth), + sbi->layout.mirrors_p1, + sbi->data_map.odm_raid_algorithm); return 0; } @@ -547,11 +558,10 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, return !(odi->systemid_len || odi->osdname_len); } -static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, +static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, + struct osd_dev *fscb_od, unsigned table_count) { - struct exofs_sb_info *sbi = *psbi; - struct osd_dev *fscb_od; struct osd_obj_id obj = {.partition = sbi->layout.s_pid, .id = EXOFS_DEVTABLE_ID}; struct exofs_device_table *dt; @@ -567,8 +577,6 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, return -ENOMEM; } - fscb_od = sbi->layout.s_ods[0]; - sbi->layout.s_ods[0] = NULL; sbi->layout.s_numdevs = 0; ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes); if (unlikely(ret)) { @@ -590,14 +598,13 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, if (likely(numdevs > 1)) { unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]); - sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL); - if (unlikely(!sbi)) { + sbi->layout.s_ods = kzalloc(size, GFP_KERNEL); + if (unlikely(!sbi->layout.s_ods)) { + EXOFS_ERR("ERROR: faild allocating Device array[%d]\n", + numdevs); ret = -ENOMEM; goto out; } - memset(&sbi->layout.s_ods[1], 0, - size - sizeof(sbi->layout.s_ods[0])); - *psbi = sbi; } for (i = 0; i < numdevs; i++) { @@ -684,10 +691,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) if (!sbi) return -ENOMEM; - ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); - if (ret) - goto free_bdi; - /* use mount options to fill superblock */ if (opts->is_osdname) { struct osd_dev_info odi = {.systemid_len = 0}; @@ -709,7 +712,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) sbi->layout.group_width = 1; sbi->layout.group_depth = -1; sbi->layout.group_count = 1; - sbi->layout.s_ods[0] = od; + sbi->layout.s_ods = sbi->_min_one_dev; sbi->layout.s_numdevs = 1; sbi->layout.s_pid = opts->pid; sbi->s_timeout = opts->timeout; @@ -757,9 +760,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) table_count = le64_to_cpu(fscb.s_dev_table_count); if (table_count) { - ret = exofs_read_lookup_dev_table(&sbi, table_count); + ret = exofs_read_lookup_dev_table(sbi, od, table_count); if (unlikely(ret)) goto free_sbi; + } else { + sbi->layout.s_ods[0] = od; } __sbi_read_stats(sbi); @@ -793,6 +798,12 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) goto free_sbi; } + ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); + if (ret) { + EXOFS_DBGMSG("Failed to bdi_setup_and_register\n"); + goto free_sbi; + } + _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], sbi->layout.s_pid); if (opts->is_osdname) @@ -800,8 +811,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) return 0; free_sbi: - bdi_destroy(&sbi->bdi); -free_bdi: EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", opts->dev_name, sbi->layout.s_pid, ret); exofs_free_sbi(sbi); -- cgit v1.1 From 9ce730475e1b950d78a69c1be3410109c103ac98 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 3 Aug 2011 20:18:01 -0700 Subject: exofs: Small cleanup of exofs_fill_super Small cleanup that unifies duplicated code used in both the error and success cases Signed-off-by: Boaz Harrosh --- fs/exofs/super.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/exofs/super.c b/fs/exofs/super.c index a747c87..65fe5de 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -698,6 +698,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) odi.osdname_len = strlen(opts->dev_name); odi.osdname = (u8 *)opts->dev_name; od = osduld_info_lookup(&odi); + kfree(opts->dev_name); + opts->dev_name = NULL; } else { od = osduld_path_lookup(opts->dev_name); } @@ -806,16 +808,12 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], sbi->layout.s_pid); - if (opts->is_osdname) - kfree(opts->dev_name); return 0; free_sbi: EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", opts->dev_name, sbi->layout.s_pid, ret); exofs_free_sbi(sbi); - if (opts->is_osdname) - kfree(opts->dev_name); return ret; } -- cgit v1.1 From 16f75bb35d54b44356f496272c013f7ace5fa698 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 3 Aug 2011 20:44:16 -0700 Subject: exofs: Fix truncate for the raid-groups case In the general raid-group case the truncate was wrong in that it did not also fix the object length of the neighboring groups. There are two bad cases in the old code: 1. Space that should be freed was not. 2. If a file That was big is truncated small, then made bigger again, the holes would not contain zeros but could expose old data. (If the growing of the file expands to more than a full groups cycle + group size (> S + T)) Signed-off-by: Boaz Harrosh --- fs/exofs/ios.c | 73 ++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 53 insertions(+), 20 deletions(-) diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index f74a2ec..fbb47ba 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -305,20 +305,21 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid) struct _striping_info { u64 obj_offset; u64 group_length; + u64 M; /* for truncate */ unsigned dev; unsigned unit_off; }; -static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, +static void _calc_stripe_info(struct exofs_layout *layout, u64 file_offset, struct _striping_info *si) { - u32 stripe_unit = ios->layout->stripe_unit; - u32 group_width = ios->layout->group_width; - u64 group_depth = ios->layout->group_depth; + u32 stripe_unit = layout->stripe_unit; + u32 group_width = layout->group_width; + u64 group_depth = layout->group_depth; u32 U = stripe_unit * group_width; u64 T = U * group_depth; - u64 S = T * ios->layout->group_count; + u64 S = T * layout->group_count; u64 M = div64_u64(file_offset, S); /* @@ -333,7 +334,7 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, /* "H - (N * U)" is just "H % U" so it's bound to u32 */ si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; - si->dev *= ios->layout->mirrors_p1; + si->dev *= layout->mirrors_p1; div_u64_rem(file_offset, stripe_unit, &si->unit_off); @@ -341,6 +342,7 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, (M * group_depth * stripe_unit); si->group_length = T - H; + si->M = M; } static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, @@ -454,7 +456,7 @@ static int _prepare_for_striping(struct exofs_io_state *ios) if (ios->kern_buff) { struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; - _calc_stripe_info(ios, ios->offset, &si); + _calc_stripe_info(ios->layout, ios->offset, &si); per_dev->offset = si.obj_offset; per_dev->dev = si.dev; @@ -468,7 +470,7 @@ static int _prepare_for_striping(struct exofs_io_state *ios) } while (length) { - _calc_stripe_info(ios, offset, &si); + _calc_stripe_info(ios->layout, offset, &si); if (length < si.group_length) si.group_length = length; @@ -745,6 +747,31 @@ static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp, return 0; } +struct _trunc_info { + struct _striping_info si; + u64 prev_group_obj_off; + u64 next_group_obj_off; + + unsigned first_group_dev; + unsigned nex_group_dev; + unsigned max_devs; +}; + +void _calc_trunk_info(struct exofs_layout *layout, u64 file_offset, + struct _trunc_info *ti) +{ + unsigned stripe_unit = layout->stripe_unit; + + _calc_stripe_info(layout, file_offset, &ti->si); + + ti->prev_group_obj_off = ti->si.M * stripe_unit; + ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0; + + ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width); + ti->nex_group_dev = ti->first_group_dev + layout->group_width; + ti->max_devs = layout->group_width * layout->group_count; +} + int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) { struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info; @@ -753,14 +780,16 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) struct osd_attr attr; __be64 newsize; } *size_attrs; - struct _striping_info si; + struct _trunc_info ti; int i, ret; ret = exofs_get_io_state(&sbi->layout, &ios); if (unlikely(ret)) return ret; - size_attrs = kcalloc(ios->layout->group_width, sizeof(*size_attrs), + _calc_trunk_info(ios->layout, size, &ti); + + size_attrs = kcalloc(ti.max_devs, sizeof(*size_attrs), GFP_KERNEL); if (unlikely(!size_attrs)) { ret = -ENOMEM; @@ -769,26 +798,30 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) ios->obj.id = exofs_oi_objno(oi); ios->cred = oi->i_cred; - ios->numdevs = ios->layout->s_numdevs; - _calc_stripe_info(ios, size, &si); - for (i = 0; i < ios->layout->group_width; ++i) { + for (i = 0; i < ti.max_devs; ++i) { struct exofs_trunc_attr *size_attr = &size_attrs[i]; u64 obj_size; - if (i < si.dev) - obj_size = si.obj_offset + - ios->layout->stripe_unit - si.unit_off; - else if (i == si.dev) - obj_size = si.obj_offset; - else /* i > si.dev */ - obj_size = si.obj_offset - si.unit_off; + if (i < ti.first_group_dev) + obj_size = ti.prev_group_obj_off; + else if (i >= ti.nex_group_dev) + obj_size = ti.next_group_obj_off; + else if (i < ti.si.dev) /* dev within this group */ + obj_size = ti.si.obj_offset + + ios->layout->stripe_unit - ti.si.unit_off; + else if (i == ti.si.dev) + obj_size = ti.si.obj_offset; + else /* i > ti.dev */ + obj_size = ti.si.obj_offset - ti.si.unit_off; size_attr->newsize = cpu_to_be64(obj_size); size_attr->attr = g_attr_logical_length; size_attr->attr.val_ptr = &size_attr->newsize; + EXOFS_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", + _LLU(ios->obj.id), _LLU(obj_size), i); ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, &size_attr->attr); if (unlikely(ret)) -- cgit v1.1 From 1eb9a4b8a3c8a141cf2ab27309f089923b69c707 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 6 Jul 2011 16:48:49 -0400 Subject: efi: Fix argument types for SetVariable() for ia64 The spec says this takes uint32 for attributes, not uintn. Signed-off-by: Matthew Garrett Signed-off-by: Tony Luck --- arch/ia64/kernel/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 6fc03af..c38d22e 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -156,7 +156,7 @@ prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, \ #define STUB_SET_VARIABLE(prefix, adjust_arg) \ static efi_status_t \ prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, \ - unsigned long attr, unsigned long data_size, \ + u32 attr, unsigned long data_size, \ void *data) \ { \ struct ia64_fpreg fr[6]; \ -- cgit v1.1 From cbc158d6bfa1990f7869717bb5270867c66068d1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 4 Aug 2011 09:24:31 -0700 Subject: cpuidle: Consistent spelling of cpuidle_idle_call() Commit a0bfa1373859e9d11dc92561a8667588803e42d8 mispells cpuidle_idle_call() on ARM and SH code. Fix this to be consistent. Cc: Kevin Hilman Cc: Paul Mundt Cc: x86@kernel.org Cc: Len Brown Signed-off-by: David Brown [ Also done by Mark Brown - th ebug has been around forever, and was noticed in -next, but the idle tree never picked it up. Bad bad bad ] Signed-off-by: Linus Torvalds --- arch/arm/kernel/process.c | 2 +- arch/sh/kernel/idle.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index d7ee0d4..1a347f4 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -197,7 +197,7 @@ void cpu_idle(void) cpu_relax(); } else { stop_critical_timings(); - if (cpuidle_call_idle()) + if (cpuidle_idle_call()) pm_idle(); start_critical_timings(); /* diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 3c45de1..32114e0 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -101,7 +101,7 @@ void cpu_idle(void) local_irq_disable(); /* Don't trace irqs off for idle */ stop_critical_timings(); - if (cpuidle_call_idle()) + if (cpuidle_idle_call()) pm_idle(); /* * Sanity check to ensure that pm_idle() returns -- cgit v1.1 From 140d0b2108faebc77c6523296e211e509cb9f5f9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 4 Aug 2011 19:35:59 -1000 Subject: Do 'shm_init_ns()' in an early pure_initcall This isn't really critical any more, since other patches (commit 298507d4d2cf: "shm: optimize exit_shm()") have caused us to not actually need to touch the rw_mutex unless there are actual shm segments associated with the namespace, but we really should do tne shm_init_ns() earlier than we do now. This, together with commit 288d5abec831 ("Boot up with usermodehelper disabled") will mean that we really do initialize the initial ipc namespace data structure before we run any tasks. Tested-by: Marc Zyngier Signed-off-by: Linus Torvalds --- ipc/shm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ipc/shm.c b/ipc/shm.c index b5bae9d..02ecf2c 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -105,9 +105,16 @@ void shm_exit_ns(struct ipc_namespace *ns) } #endif -void __init shm_init (void) +static int __init ipc_ns_init(void) { shm_init_ns(&init_ipc_ns); + return 0; +} + +pure_initcall(ipc_ns_init); + +void __init shm_init (void) +{ ipc_init_proc_interface("sysvipc/shm", #if BITS_PER_LONG <= 32 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", -- cgit v1.1 From 9076d0e7e02b98f7a65df10d1956326c8d8ba61a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 5 Aug 2011 00:53:57 -0700 Subject: sparc: Access kernel TSB using physical addressing when possible. On sun4v this is basically required since we point the hypervisor and the TSB walking hardware at these tables using physical addressing too. Signed-off-by: David S. Miller --- arch/sparc/include/asm/tsb.h | 51 +++++++++++++++++++---------------------- arch/sparc/kernel/ktlb.S | 24 +++++++++---------- arch/sparc/kernel/vmlinux.lds.S | 10 ++++++++ arch/sparc/mm/init_64.c | 40 +++++++++++++++++++++++++++++++- 4 files changed, 85 insertions(+), 40 deletions(-) diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h index 83c571d..1a8afd1 100644 --- a/arch/sparc/include/asm/tsb.h +++ b/arch/sparc/include/asm/tsb.h @@ -133,29 +133,6 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; sub TSB, 0x8, TSB; \ TSB_STORE(TSB, TAG); -#define KTSB_LOAD_QUAD(TSB, REG) \ - ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG; - -#define KTSB_STORE(ADDR, VAL) \ - stxa VAL, [ADDR] ASI_N; - -#define KTSB_LOCK_TAG(TSB, REG1, REG2) \ -99: lduwa [TSB] ASI_N, REG1; \ - sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\ - andcc REG1, REG2, %g0; \ - bne,pn %icc, 99b; \ - nop; \ - casa [TSB] ASI_N, REG1, REG2;\ - cmp REG1, REG2; \ - bne,pn %icc, 99b; \ - nop; \ - -#define KTSB_WRITE(TSB, TTE, TAG) \ - add TSB, 0x8, TSB; \ - stxa TTE, [TSB] ASI_N; \ - sub TSB, 0x8, TSB; \ - stxa TAG, [TSB] ASI_N; - /* Do a kernel page table walk. Leaves physical PTE pointer in * REG1. Jumps to FAIL_LABEL on early page table walk termination. * VADDR will not be clobbered, but REG2 will. @@ -239,6 +216,8 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; (KERNEL_TSB_SIZE_BYTES / 16) #define KERNEL_TSB4M_NENTRIES 4096 +#define KTSB_PHYS_SHIFT 15 + /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries * and the found TTE will be left in REG1. REG3 and REG4 must @@ -247,13 +226,22 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; * VADDR and TAG will be preserved and not clobbered by this macro. */ #define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ - sethi %hi(swapper_tsb), REG1; \ +661: sethi %hi(swapper_tsb), REG1; \ or REG1, %lo(swapper_tsb), REG1; \ + .section .swapper_tsb_phys_patch, "ax"; \ + .word 661b; \ + .previous; \ +661: nop; \ + .section .tsb_ldquad_phys_patch, "ax"; \ + .word 661b; \ + sllx REG1, KTSB_PHYS_SHIFT, REG1; \ + sllx REG1, KTSB_PHYS_SHIFT, REG1; \ + .previous; \ srlx VADDR, PAGE_SHIFT, REG2; \ and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \ sllx REG2, 4, REG2; \ add REG1, REG2, REG2; \ - KTSB_LOAD_QUAD(REG2, REG3); \ + TSB_LOAD_QUAD(REG2, REG3); \ cmp REG3, TAG; \ be,a,pt %xcc, OK_LABEL; \ mov REG4, REG1; @@ -263,12 +251,21 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; * we can make use of that for the index computation. */ #define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ - sethi %hi(swapper_4m_tsb), REG1; \ +661: sethi %hi(swapper_4m_tsb), REG1; \ or REG1, %lo(swapper_4m_tsb), REG1; \ + .section .swapper_4m_tsb_phys_patch, "ax"; \ + .word 661b; \ + .previous; \ +661: nop; \ + .section .tsb_ldquad_phys_patch, "ax"; \ + .word 661b; \ + sllx REG1, KTSB_PHYS_SHIFT, REG1; \ + sllx REG1, KTSB_PHYS_SHIFT, REG1; \ + .previous; \ and TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \ sllx REG2, 4, REG2; \ add REG1, REG2, REG2; \ - KTSB_LOAD_QUAD(REG2, REG3); \ + TSB_LOAD_QUAD(REG2, REG3); \ cmp REG3, TAG; \ be,a,pt %xcc, OK_LABEL; \ mov REG4, REG1; diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S index 1d36147..79f3103 100644 --- a/arch/sparc/kernel/ktlb.S +++ b/arch/sparc/kernel/ktlb.S @@ -47,16 +47,16 @@ kvmap_itlb_tsb_miss: kvmap_itlb_vmalloc_addr: KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath) - KTSB_LOCK_TAG(%g1, %g2, %g7) + TSB_LOCK_TAG(%g1, %g2, %g7) /* Load and check PTE. */ ldxa [%g5] ASI_PHYS_USE_EC, %g5 mov 1, %g7 sllx %g7, TSB_TAG_INVALID_BIT, %g7 brgez,a,pn %g5, kvmap_itlb_longpath - KTSB_STORE(%g1, %g7) + TSB_STORE(%g1, %g7) - KTSB_WRITE(%g1, %g5, %g6) + TSB_WRITE(%g1, %g5, %g6) /* fallthrough to TLB load */ @@ -102,9 +102,9 @@ kvmap_itlb_longpath: kvmap_itlb_obp: OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath) - KTSB_LOCK_TAG(%g1, %g2, %g7) + TSB_LOCK_TAG(%g1, %g2, %g7) - KTSB_WRITE(%g1, %g5, %g6) + TSB_WRITE(%g1, %g5, %g6) ba,pt %xcc, kvmap_itlb_load nop @@ -112,17 +112,17 @@ kvmap_itlb_obp: kvmap_dtlb_obp: OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath) - KTSB_LOCK_TAG(%g1, %g2, %g7) + TSB_LOCK_TAG(%g1, %g2, %g7) - KTSB_WRITE(%g1, %g5, %g6) + TSB_WRITE(%g1, %g5, %g6) ba,pt %xcc, kvmap_dtlb_load nop .align 32 kvmap_dtlb_tsb4m_load: - KTSB_LOCK_TAG(%g1, %g2, %g7) - KTSB_WRITE(%g1, %g5, %g6) + TSB_LOCK_TAG(%g1, %g2, %g7) + TSB_WRITE(%g1, %g5, %g6) ba,pt %xcc, kvmap_dtlb_load nop @@ -222,16 +222,16 @@ kvmap_linear_patch: kvmap_dtlb_vmalloc_addr: KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) - KTSB_LOCK_TAG(%g1, %g2, %g7) + TSB_LOCK_TAG(%g1, %g2, %g7) /* Load and check PTE. */ ldxa [%g5] ASI_PHYS_USE_EC, %g5 mov 1, %g7 sllx %g7, TSB_TAG_INVALID_BIT, %g7 brgez,a,pn %g5, kvmap_dtlb_longpath - KTSB_STORE(%g1, %g7) + TSB_STORE(%g1, %g7) - KTSB_WRITE(%g1, %g5, %g6) + TSB_WRITE(%g1, %g5, %g6) /* fallthrough to TLB load */ diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 94a9548..0e16056 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -107,6 +107,16 @@ SECTIONS *(.sun4v_2insn_patch) __sun4v_2insn_patch_end = .; } + .swapper_tsb_phys_patch : { + __swapper_tsb_phys_patch = .; + *(.swapper_tsb_phys_patch) + __swapper_tsb_phys_patch_end = .; + } + .swapper_4m_tsb_phys_patch : { + __swapper_4m_tsb_phys_patch = .; + *(.swapper_4m_tsb_phys_patch) + __swapper_4m_tsb_phys_patch_end = .; + } .popc_3insn_patch : { __popc_3insn_patch = .; *(.popc_3insn_patch) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 3fd8e18..adfac23 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1597,6 +1597,42 @@ static void __init tsb_phys_patch(void) static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR]; extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; +static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa) +{ + pa >>= KTSB_PHYS_SHIFT; + + while (start < end) { + unsigned int *ia = (unsigned int *)(unsigned long)*start; + + ia[0] = (ia[0] & ~0x3fffff) | (pa >> 10); + __asm__ __volatile__("flush %0" : : "r" (ia)); + + ia[1] = (ia[1] & ~0x3ff) | (pa & 0x3ff); + __asm__ __volatile__("flush %0" : : "r" (ia + 1)); + + start++; + } +} + +static void ktsb_phys_patch(void) +{ + extern unsigned int __swapper_tsb_phys_patch; + extern unsigned int __swapper_tsb_phys_patch_end; + extern unsigned int __swapper_4m_tsb_phys_patch; + extern unsigned int __swapper_4m_tsb_phys_patch_end; + unsigned long ktsb_pa; + + ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE); + patch_one_ktsb_phys(&__swapper_tsb_phys_patch, + &__swapper_tsb_phys_patch_end, ktsb_pa); +#ifndef CONFIG_DEBUG_PAGEALLOC + ktsb_pa = (kern_base + + ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); + patch_one_ktsb_phys(&__swapper_4m_tsb_phys_patch, + &__swapper_4m_tsb_phys_patch_end, ktsb_pa); +#endif +} + static void __init sun4v_ktsb_init(void) { unsigned long ktsb_pa; @@ -1716,8 +1752,10 @@ void __init paging_init(void) sun4u_pgprot_init(); if (tlb_type == cheetah_plus || - tlb_type == hypervisor) + tlb_type == hypervisor) { tsb_phys_patch(); + ktsb_phys_patch(); + } if (tlb_type == hypervisor) { sun4v_patch_tlb_handlers(); -- cgit v1.1 From 961f65fc41cdc1f9099a6075258816c0db98e390 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 5 Aug 2011 02:38:27 -0700 Subject: sparc: Size mondo queues more sanely. There is currently no upper limit on the mondo queue sizes we'll use, which guarentees that we'll eventually his page allocation limits, and thus allocation failures, due to MAX_ORDER. Cap the sizes sanely, current limits are: CPU MONDO 2 * max_possible_cpus DEV MONDO 256 (basically NR_IRQS) RES MONDO 128 NRES MONDO 4 Signed-off-by: David S. Miller --- arch/sparc/kernel/mdesc.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 42f28c7..acaebb6 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -508,6 +508,8 @@ const char *mdesc_node_name(struct mdesc_handle *hp, u64 node) } EXPORT_SYMBOL(mdesc_node_name); +static u64 max_cpus = 64; + static void __init report_platform_properties(void) { struct mdesc_handle *hp = mdesc_grab(); @@ -543,8 +545,10 @@ static void __init report_platform_properties(void) if (v) printk("PLATFORM: watchdog-max-timeout [%llu ms]\n", *v); v = mdesc_get_property(hp, pn, "max-cpus", NULL); - if (v) - printk("PLATFORM: max-cpus [%llu]\n", *v); + if (v) { + max_cpus = *v; + printk("PLATFORM: max-cpus [%llu]\n", max_cpus); + } #ifdef CONFIG_SMP { @@ -715,7 +719,7 @@ static void __cpuinit set_proc_ids(struct mdesc_handle *hp) } static void __cpuinit get_one_mondo_bits(const u64 *p, unsigned int *mask, - unsigned char def) + unsigned long def, unsigned long max) { u64 val; @@ -726,6 +730,9 @@ static void __cpuinit get_one_mondo_bits(const u64 *p, unsigned int *mask, if (!val || val >= 64) goto use_default; + if (val > max) + val = max; + *mask = ((1U << val) * 64U) - 1U; return; @@ -736,19 +743,28 @@ use_default: static void __cpuinit get_mondo_data(struct mdesc_handle *hp, u64 mp, struct trap_per_cpu *tb) { + static int printed; const u64 *val; val = mdesc_get_property(hp, mp, "q-cpu-mondo-#bits", NULL); - get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7); + get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7, ilog2(max_cpus * 2)); val = mdesc_get_property(hp, mp, "q-dev-mondo-#bits", NULL); - get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7); + get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7, 8); val = mdesc_get_property(hp, mp, "q-resumable-#bits", NULL); - get_one_mondo_bits(val, &tb->resum_qmask, 6); + get_one_mondo_bits(val, &tb->resum_qmask, 6, 7); val = mdesc_get_property(hp, mp, "q-nonresumable-#bits", NULL); - get_one_mondo_bits(val, &tb->nonresum_qmask, 2); + get_one_mondo_bits(val, &tb->nonresum_qmask, 2, 2); + if (!printed++) { + pr_info("SUN4V: Mondo queue sizes " + "[cpu(%u) dev(%u) r(%u) nr(%u)]\n", + tb->cpu_mondo_qmask + 1, + tb->dev_mondo_qmask + 1, + tb->resum_qmask + 1, + tb->nonresum_qmask + 1); + } } static void * __cpuinit mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handle *, u64, int, void *), void *arg, cpumask_t *mask) -- cgit v1.1 From 39060a07781b4930656752943cf5d66376d0533c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 5 Aug 2011 10:56:29 +0100 Subject: Revert "drm/i915: Try enabling RC6 by default (again)" This reverts commit 4e20fa65a3ea789510eed1a15deb9e8aab2b8202. Francesco Allertsen still has a broken configuration. Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 60e4b9e..ce045a8 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -62,7 +62,7 @@ module_param_named(semaphores, i915_semaphores, int, 0600); MODULE_PARM_DESC(semaphores, "Use semaphores for inter-ring sync (default: false)"); -unsigned int i915_enable_rc6 __read_mostly = 1; +unsigned int i915_enable_rc6 __read_mostly = 0; module_param_named(i915_enable_rc6, i915_enable_rc6, int, 0600); MODULE_PARM_DESC(i915_enable_rc6, "Enable power-saving render C-state 6 (default: true)"); -- cgit v1.1 From 728ffb86f10873aaf4abd26dde691ee40ae731fe Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 4 Aug 2011 14:07:38 +0000 Subject: net: sendmmsg should only return an error if no messages were sent sendmmsg uses a similar error return strategy as recvmmsg but it turns out to be a confusing way to communicate errors. The current code stores the error code away and returns it on the next sendmmsg call. This means a call with completely valid arguments could get an error from a previous call. Change things so we only return an error if no datagrams could be sent. If less than the requested number of messages were sent, the application must retry starting at the first failed one and if the problem is persistent the error will be returned. This matches the behaviour of other syscalls like read/write - it is not an error if less than the requested number of elements are sent. Signed-off-by: Anton Blanchard Cc: stable [3.0+] Signed-off-by: David S. Miller --- net/socket.c | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/net/socket.c b/net/socket.c index b1cbbcd..e4ed235 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2005,12 +2005,9 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (!sock) return err; - err = sock_error(sock->sk); - if (err) - goto out_put; - entry = mmsg; compat_entry = (struct compat_mmsghdr __user *)mmsg; + err = 0; while (datagrams < vlen) { /* @@ -2037,29 +2034,11 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, ++datagrams; } -out_put: fput_light(sock->file, fput_needed); - if (err == 0) - return datagrams; - - if (datagrams != 0) { - /* - * We may send less entries than requested (vlen) if the - * sock is non blocking... - */ - if (err != -EAGAIN) { - /* - * ... or if sendmsg returns an error after we - * send some datagrams, where we record the - * error to return on the next call or if the - * app asks about it using getsockopt(SO_ERROR). - */ - sock->sk->sk_err = -err; - } - + /* We only return an error if no datagrams were able to be sent */ + if (datagrams != 0) return datagrams; - } return err; } -- cgit v1.1 From 98382f419f32d2c12d021943b87dea555677144b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 4 Aug 2011 14:07:39 +0000 Subject: net: Cap number of elements for sendmmsg To limit the amount of time we can spend in sendmmsg, cap the number of elements to UIO_MAXIOV (currently 1024). For error handling an application using sendmmsg needs to retry at the first unsent message, so capping is simpler and requires less application logic than returning EINVAL. Signed-off-by: Anton Blanchard Cc: stable [3.0+] Signed-off-by: David S. Miller --- net/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/socket.c b/net/socket.c index e4ed235..b5c6de4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1999,6 +1999,9 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, struct compat_mmsghdr __user *compat_entry; struct msghdr msg_sys; + if (vlen > UIO_MAXIOV) + vlen = UIO_MAXIOV; + datagrams = 0; sock = sockfd_lookup_light(fd, &err, &fput_needed); -- cgit v1.1 From c71d8ebe7a4496fb7231151cb70a6baa0cb56f9a Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 4 Aug 2011 14:07:40 +0000 Subject: net: Fix security_socket_sendmsg() bypass problem. The sendmmsg() introduced by commit 228e548e "net: Add sendmmsg socket system call" is capable of sending to multiple different destination addresses. SMACK is using destination's address for checking sendmsg() permission. However, security_socket_sendmsg() is called for only once even if multiple different destination addresses are passed to sendmmsg(). Therefore, we need to call security_socket_sendmsg() for each destination address rather than only the first destination address. Since calling security_socket_sendmsg() every time when only single destination address was passed to sendmmsg() is a waste of time, omit calling security_socket_sendmsg() unless destination address of previous datagram and that of current datagram differs. Signed-off-by: Tetsuo Handa Acked-by: Anton Blanchard Cc: stable [3.0+] Signed-off-by: David S. Miller --- net/socket.c | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/net/socket.c b/net/socket.c index b5c6de4..24a7740 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1871,8 +1871,14 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how) #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) +struct used_address { + struct sockaddr_storage name; + unsigned int name_len; +}; + static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, - struct msghdr *msg_sys, unsigned flags, int nosec) + struct msghdr *msg_sys, unsigned flags, + struct used_address *used_address) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; @@ -1953,8 +1959,28 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, if (sock->file->f_flags & O_NONBLOCK) msg_sys->msg_flags |= MSG_DONTWAIT; - err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys, - total_len); + /* + * If this is sendmmsg() and current destination address is same as + * previously succeeded address, omit asking LSM's decision. + * used_address->name_len is initialized to UINT_MAX so that the first + * destination address never matches. + */ + if (used_address && used_address->name_len == msg_sys->msg_namelen && + !memcmp(&used_address->name, msg->msg_name, + used_address->name_len)) { + err = sock_sendmsg_nosec(sock, msg_sys, total_len); + goto out_freectl; + } + err = sock_sendmsg(sock, msg_sys, total_len); + /* + * If this is sendmmsg() and sending to current destination address was + * successful, remember it. + */ + if (used_address && err >= 0) { + used_address->name_len = msg_sys->msg_namelen; + memcpy(&used_address->name, msg->msg_name, + used_address->name_len); + } out_freectl: if (ctl_buf != ctl) @@ -1979,7 +2005,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) if (!sock) goto out; - err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0); + err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL); fput_light(sock->file, fput_needed); out: @@ -1998,6 +2024,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, struct mmsghdr __user *entry; struct compat_mmsghdr __user *compat_entry; struct msghdr msg_sys; + struct used_address used_address; if (vlen > UIO_MAXIOV) vlen = UIO_MAXIOV; @@ -2008,24 +2035,22 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (!sock) return err; + used_address.name_len = UINT_MAX; entry = mmsg; compat_entry = (struct compat_mmsghdr __user *)mmsg; err = 0; while (datagrams < vlen) { - /* - * No need to ask LSM for more than the first datagram. - */ if (MSG_CMSG_COMPAT & flags) { err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry, - &msg_sys, flags, datagrams); + &msg_sys, flags, &used_address); if (err < 0) break; err = __put_user(err, &compat_entry->msg_len); ++compat_entry; } else { err = __sys_sendmsg(sock, (struct msghdr __user *)entry, - &msg_sys, flags, datagrams); + &msg_sys, flags, &used_address); if (err < 0) break; err = put_user(err, &entry->msg_len); -- cgit v1.1 From 20e72a44098641f0c4de34a31287a93e006afb5b Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Thu, 4 Aug 2011 01:05:12 +0000 Subject: mlx4: decreasing ref count when removing mac For older FW versions, when a Mac address removed from Mac table, we should set 0 for reference count for the corresponding Mac index. Fixes a bug where removing Mac from the table still left that entry as invalid. Signed-off-by: Yevgeny Petrilin Tested-by: Roland Dreier Signed-off-by: David S. Miller --- drivers/net/mlx4/port.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/mlx4/port.c b/drivers/net/mlx4/port.c index 1f95afd..609e0ec 100644 --- a/drivers/net/mlx4/port.c +++ b/drivers/net/mlx4/port.c @@ -258,9 +258,12 @@ void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int qpn) if (validate_index(dev, table, index)) goto out; - table->entries[index] = 0; - mlx4_set_port_mac_table(dev, port, table->entries); - --table->total; + /* Check whether this address has reference count */ + if (!(--table->refs[index])) { + table->entries[index] = 0; + mlx4_set_port_mac_table(dev, port, table->entries); + --table->total; + } out: mutex_unlock(&table->mutex); } -- cgit v1.1 From c15fea2d8ca834dae491339c47e4fb3c81428190 Mon Sep 17 00:00:00 2001 From: Max Matveev Date: Fri, 5 Aug 2011 03:56:30 -0700 Subject: ipv6: check for IPv4 mapped addresses when connecting IPv6 sockets When support for binding to 'mapped INADDR_ANY (::ffff.0.0.0.0)' was added in 0f8d3c7ac3693d7b6c731bf2159273a59bf70e12 the rest of the code wasn't told so now it's possible to bind IPv6 datagram socket to ::ffff.0.0.0.0, connect it to another IPv4 address and it will all work except for getsockhame() which does not return the local address as expected. To give getsockname() something to work with check for 'mapped INADDR_ANY' when connecting and update the in-core source addresses appropriately. Signed-off-by: Max Matveev Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 1656033..9ef1831 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -33,6 +33,11 @@ #include #include +static inline int ipv6_mapped_addr_any(const struct in6_addr *a) +{ + return (ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0)); +} + int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; @@ -102,10 +107,12 @@ ipv4_connected: ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr); - if (ipv6_addr_any(&np->saddr)) + if (ipv6_addr_any(&np->saddr) || + ipv6_mapped_addr_any(&np->saddr)) ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - if (ipv6_addr_any(&np->rcv_saddr)) { + if (ipv6_addr_any(&np->rcv_saddr) || + ipv6_mapped_addr_any(&np->rcv_saddr)) { ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); if (sk->sk_prot->rehash) -- cgit v1.1 From c00c8aa2d976e9ed1d12a57b42d6e9b27efb7abe Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 4 Aug 2011 18:42:10 -0400 Subject: xen/trace: Fix compile error when CONFIG_XEN_PRIVILEGED_GUEST is not set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit with CONFIG_XEN and CONFIG_FTRACE set we get this: arch/x86/xen/trace.c:22: error: ‘__HYPERVISOR_console_io’ undeclared here (not in a function) arch/x86/xen/trace.c:22: error: array index in initializer not of integer type arch/x86/xen/trace.c:22: error: (near initialization for ‘xen_hypercall_names’) arch/x86/xen/trace.c:23: error: ‘__HYPERVISOR_physdev_op_compat’ undeclared here (not in a function) Issue was that the definitions of __HYPERVISOR were not pulled if CONFIG_XEN_PRIVILEGED_GUEST was not set. Reported-by: Randy Dunlap Acked-by: Randy Dunlap Acked-by: Ingo Molnar Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/xen/trace.c b/arch/x86/xen/trace.c index 734beba..520022d 100644 --- a/arch/x86/xen/trace.c +++ b/arch/x86/xen/trace.c @@ -1,4 +1,5 @@ #include +#include #define N(x) [__HYPERVISOR_##x] = "("#x")" static const char *xen_hypercall_names[] = { -- cgit v1.1 From f9e8c45002cacad536b338dfa9e910e341a49c31 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 5 Aug 2011 10:28:01 -0400 Subject: cifs: convert prefixpath delimiters in cifs_build_path_to_root Regression from 2.6.39... The delimiters in the prefixpath are not being converted based on whether posix paths are in effect. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=727834 Reported-and-Tested-by: Iain Arnell Reported-by: Patrick Oltmann Cc: Pavel Shilovsky Cc: stable@kernel.org Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/inode.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 9b018c8..a7b2dcd 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -764,20 +764,10 @@ char *cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, if (full_path == NULL) return full_path; - if (dfsplen) { + if (dfsplen) strncpy(full_path, tcon->treeName, dfsplen); - /* switch slash direction in prepath depending on whether - * windows or posix style path names - */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) { - int i; - for (i = 0; i < dfsplen; i++) { - if (full_path[i] == '\\') - full_path[i] = '/'; - } - } - } strncpy(full_path + dfsplen, vol->prepath, pplen); + convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); full_path[dfsplen + pplen] = 0; /* add trailing null */ return full_path; } -- cgit v1.1 From 80975d21aae2136ccae1ce914a1602dc1d8b0795 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 5 Aug 2011 09:02:40 -0400 Subject: cifs: cope with negative dentries in cifs_get_root The loop around lookup_one_len doesn't handle the case where it might return a negative dentry, which can cause an oops on the next pass through the loop. Check for that and break out of the loop with an error of -ENOENT if there is one. Fixes the panic reported here: https://bugzilla.redhat.com/show_bug.cgi?id=727927 Reported-by: TR Bentley Reported-by: Iain Arnell Cc: Al Viro Cc: stable@kernel.org Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 212e562..f93eb94 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -563,6 +563,10 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) mutex_unlock(&dir->i_mutex); dput(dentry); dentry = child; + if (!dentry->d_inode) { + dput(dentry); + dentry = ERR_PTR(-ENOENT); + } } while (!IS_ERR(dentry)); _FreeXid(xid); kfree(full_path); -- cgit v1.1 From 7b8aca65db5dd1aaa6dc1e11f6bfcc0ecd6bc8a4 Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Tue, 31 May 2011 14:52:22 +0800 Subject: acer-wmi: schedule threeg and interface sysfs for feature removal we can now autodetect internal 3G device and already have the threeg rfkill device. So, we plan to remove threeg sysfs support for it's no longer necessary. We also plan to remove interface sysfs file that exposed which ACPI-WMI interface that was used by acer-wmi driver. It will replaced by information log when acer-wmi initial. We keep it around for userspace compatibility reasons, schedule removal in 2012. Cc: Carlos Corbacho Cc: Matthew Garrett Cc: Dmitry Torokhov Cc: Corentin Chary Acked-by: Thomas Renninger Signed-off-by: Lee, Chun-Yi Signed-off-by: Matthew Garrett --- Documentation/feature-removal-schedule.txt | 11 +++++++++++ drivers/platform/x86/acer-wmi.c | 11 +++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 43f4809..c4a6e14 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -581,3 +581,14 @@ Why: This driver has been superseded by g_mass_storage. Who: Alan Stern ---------------------------- + +What: threeg and interface sysfs files in /sys/devices/platform/acer-wmi +When: 2012 +Why: In 3.0, we can now autodetect internal 3G device and already have + the threeg rfkill device. So, we plan to remove threeg sysfs support + for it's no longer necessary. + + We also plan to remove interface sysfs file that exposed which ACPI-WMI + interface that was used by acer-wmi driver. It will replaced by + information log when acer-wmi initial. +Who: Lee, Chun-Yi diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index e1c4938..089db86 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -1400,6 +1400,9 @@ static ssize_t show_bool_threeg(struct device *dev, { u32 result; \ acpi_status status; + + pr_info("This threeg sysfs will be removed in 2012" + " - used by: %s\n", current->comm); if (wmi_has_guid(WMID_GUID3)) status = wmid3_get_device_status(&result, ACER_WMID3_GDS_THREEG); @@ -1415,8 +1418,10 @@ static ssize_t set_bool_threeg(struct device *dev, { u32 tmp = simple_strtoul(buf, NULL, 10); acpi_status status = set_u32(tmp, ACER_CAP_THREEG); - if (ACPI_FAILURE(status)) - return -EINVAL; + pr_info("This threeg sysfs will be removed in 2012" + " - used by: %s\n", current->comm); + if (ACPI_FAILURE(status)) + return -EINVAL; return count; } static DEVICE_ATTR(threeg, S_IRUGO | S_IWUSR, show_bool_threeg, @@ -1425,6 +1430,8 @@ static DEVICE_ATTR(threeg, S_IRUGO | S_IWUSR, show_bool_threeg, static ssize_t show_interface(struct device *dev, struct device_attribute *attr, char *buf) { + pr_info("This interface sysfs will be removed in 2012" + " - used by: %s\n", current->comm); switch (interface->type) { case ACER_AMW0: return sprintf(buf, "AMW0\n"); -- cgit v1.1 From 2605d753e488330f61000f1b2dc72d1668fba4ac Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 8 Jun 2011 14:56:42 +0800 Subject: platform-drivers-x86: dell-laptop: Remove unneeded mutex_init() for buffer_mutex DEFINE_MUTEX() will automatically initialize buffer_mutex, no need to call mutex_init() in dell_init(). Signed-off-by: Axel Lin Signed-off-by: Matthew Garrett --- drivers/platform/x86/dell-laptop.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index e39ab1d..f31fa4e 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -612,7 +612,6 @@ static int __init dell_init(void) if (!bufferpage) goto fail_buffer; buffer = page_address(bufferpage); - mutex_init(&buffer_mutex); ret = dell_setup_rfkill(); -- cgit v1.1 From b06862ba6b98bf05f92772bbe36971a483e35fdf Mon Sep 17 00:00:00 2001 From: Julien Valroff Date: Fri, 24 Jun 2011 08:47:17 -0400 Subject: acerhdf: add support for Aspire 1810TZ BIOS v1.3314 Would you please consider applying the following patch adding support for the Aspire 1810TZ BIOS v.1.3314 version to the acerhdf module and avoids the following error: acerhdf: unknown (unsupported) BIOS version Acer/Aspire 1810TZ/v1.3314, ple= ase report, aborting! Not sure about the other Aspire models, but it seems at least 1810T should also be updated. Signed-off-by: Julien Valroff --- drivers/platform/x86/acerhdf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index fca3489..d339756 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c @@ -182,6 +182,7 @@ static const struct bios_settings_t bios_tbl[] = { {"Acer", "Aspire 1810T", "v1.3308", 0x55, 0x58, {0x9e, 0x00} }, {"Acer", "Aspire 1810TZ", "v1.3310", 0x55, 0x58, {0x9e, 0x00} }, {"Acer", "Aspire 1810T", "v1.3310", 0x55, 0x58, {0x9e, 0x00} }, + {"Acer", "Aspire 1810TZ", "v1.3314", 0x55, 0x58, {0x9e, 0x00} }, /* Acer 531 */ {"Acer", "AO531h", "v0.3201", 0x55, 0x58, {0x20, 0x00} }, /* Gateway */ -- cgit v1.1 From 1a04d8ffc04c10fc50124f311d4c8c391f9a04ca Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Tue, 21 Jun 2011 12:00:32 -0500 Subject: acer-wmi: Add support for Aspire 1830 wlan hotkey Signed-off-by: Seth Forshee Signed-off-by: Matthew Garrett --- drivers/platform/x86/acer-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index 089db86..fa715302 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -99,6 +99,7 @@ enum acer_wmi_event_ids { static const struct key_entry acer_wmi_keymap[] = { {KE_KEY, 0x01, {KEY_WLAN} }, /* WiFi */ {KE_KEY, 0x03, {KEY_WLAN} }, /* WiFi */ + {KE_KEY, 0x04, {KEY_WLAN} }, /* WiFi */ {KE_KEY, 0x12, {KEY_BLUETOOTH} }, /* BT */ {KE_KEY, 0x21, {KEY_PROG1} }, /* Backup */ {KE_KEY, 0x22, {KEY_PROG2} }, /* Arcade */ -- cgit v1.1 From 38803141bf6ccf8f20d05c6f48e6e12c3650ce9a Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Fri, 10 Jun 2011 15:24:26 +0800 Subject: msi-laptop: add MSI U270 netbook to module alias and scm list After test, msi-laptop driver also can support MSI U270 netbook. So, add MSI U270's dmi information to module alias and scm table for support this machine. Tested on MSI U270 netbook. Cc: Matthew Garrett Cc: Dmitry Torokhov Cc: Corentin Chary Cc: Thomas Renninger Signed-off-by: Lee, Chun-Yi Signed-off-by: Matthew Garrett --- drivers/platform/x86/msi-laptop.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c index 3ff629d..f204643 100644 --- a/drivers/platform/x86/msi-laptop.c +++ b/drivers/platform/x86/msi-laptop.c @@ -538,6 +538,15 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = { }, .callback = dmi_check_cb }, + { + .ident = "MSI U270", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, + "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "U270 series"), + }, + .callback = dmi_check_cb + }, { } }; @@ -996,3 +1005,4 @@ MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnMS-N034:*"); MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnMS-N051:*"); MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnMS-N014:*"); MODULE_ALIAS("dmi:*:svnMicro-StarInternational*:pnCR620:*"); +MODULE_ALIAS("dmi:*:svnMicro-StarInternational*:pnU270series:*"); -- cgit v1.1 From 33009557bd9397c446a59e4cc91059a8e84c046b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 24 May 2011 15:16:43 -0400 Subject: Add KEY_MICMUTE and enable it on Lenovo X220 I suspect that this works on T410. Signed-off-by: Andy Lutomirski Signed-off-by: Matthew Garrett --- drivers/platform/x86/thinkpad_acpi.c | 11 ++++++++++- include/linux/input.h | 2 ++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 26c5b11..7bd829f 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -3186,8 +3186,17 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) KEY_VENDOR, /* 0x17: Thinkpad/AccessIBM/Lenovo */ /* (assignments unknown, please report if found) */ + KEY_UNKNOWN, KEY_UNKNOWN, + + /* + * The mic mute button only sends 0x1a. It does not + * automatically mute the mic or change the mute light. + */ + KEY_MICMUTE, /* 0x1a: Mic mute (since ?400 or so) */ + + /* (assignments unknown, please report if found) */ KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, - KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, + KEY_UNKNOWN, }, }; diff --git a/include/linux/input.h b/include/linux/input.h index 068784e..a637e78 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -438,6 +438,8 @@ struct input_keymap_entry { #define KEY_WIMAX 246 #define KEY_RFKILL 247 /* Key that controls all radios */ +#define KEY_MICMUTE 248 /* Mute / unmute the microphone */ + /* Code 255 is reserved for special needs of AT keyboard driver */ #define BTN_MISC 0x100 -- cgit v1.1 From 3371f48167e04017125dd08cc1f70fa93d2f2e17 Mon Sep 17 00:00:00 2001 From: Ike Panhc Date: Thu, 30 Jun 2011 19:50:40 +0800 Subject: ideapad: define cfg bits and create sysfs node for cfg Create /sys/devices/platform/ideapad/cfg for showing cfg value. Signed-off-by: Ike Panhc Signed-off-by: Matthew Garrett --- .../ABI/testing/sysfs-platform-ideapad-laptop | 17 ++++++++ drivers/platform/x86/ideapad-laptop.c | 51 +++++++++++++++------- 2 files changed, 52 insertions(+), 16 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-platform-ideapad-laptop b/Documentation/ABI/testing/sysfs-platform-ideapad-laptop index 807fca2..ff53183 100644 --- a/Documentation/ABI/testing/sysfs-platform-ideapad-laptop +++ b/Documentation/ABI/testing/sysfs-platform-ideapad-laptop @@ -4,3 +4,20 @@ KernelVersion: 2.6.37 Contact: "Ike Panhc " Description: Control the power of camera module. 1 means on, 0 means off. + +What: /sys/devices/platform/ideapad/cfg +Date: Jun 2011 +KernelVersion: 3.1 +Contact: "Ike Panhc " +Description: + Ideapad capability bits. + Bit 8-10: 1 - Intel graphic only + 2 - ATI graphic only + 3 - Nvidia graphic only + 4 - Intel and ATI graphic + 5 - Intel and Nvidia graphic + Bit 16: Bluetooth exist (1 for exist) + Bit 17: 3G exist (1 for exist) + Bit 18: Wifi exist (1 for exist) + Bit 19: Camera exist (1 for exist) + diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index bfdda33..42b9ba7 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -35,10 +35,15 @@ #define IDEAPAD_RFKILL_DEV_NUM (3) +#define CFG_BT_BIT (16) +#define CFG_3G_BIT (17) +#define CFG_WIFI_BIT (18) + struct ideapad_private { struct rfkill *rfk[IDEAPAD_RFKILL_DEV_NUM]; struct platform_device *platform_device; struct input_dev *inputdev; + unsigned long cfg; }; static acpi_handle ideapad_handle; @@ -155,7 +160,7 @@ static int write_ec_cmd(acpi_handle handle, int cmd, unsigned long data) } /* - * camera power + * sysfs */ static ssize_t show_ideapad_cam(struct device *dev, struct device_attribute *attr, @@ -186,6 +191,27 @@ static ssize_t store_ideapad_cam(struct device *dev, static DEVICE_ATTR(camera_power, 0644, show_ideapad_cam, store_ideapad_cam); +static ssize_t show_ideapad_cfg(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ideapad_private *priv = dev_get_drvdata(dev); + + return sprintf(buf, "0x%.8lX\n", priv->cfg); +} + +static DEVICE_ATTR(cfg, 0444, show_ideapad_cfg, NULL); + +static struct attribute *ideapad_attributes[] = { + &dev_attr_camera_power.attr, + &dev_attr_cfg.attr, + NULL +}; + +static struct attribute_group ideapad_attribute_group = { + .attrs = ideapad_attributes +}; + /* * Rfkill */ @@ -197,9 +223,9 @@ struct ideapad_rfk_data { }; const struct ideapad_rfk_data ideapad_rfk_data[] = { - { "ideapad_wlan", 18, 0x15, RFKILL_TYPE_WLAN }, - { "ideapad_bluetooth", 16, 0x17, RFKILL_TYPE_BLUETOOTH }, - { "ideapad_3g", 17, 0x20, RFKILL_TYPE_WWAN }, + { "ideapad_wlan", CFG_WIFI_BIT, 0x15, RFKILL_TYPE_WLAN }, + { "ideapad_bluetooth", CFG_BT_BIT, 0x17, RFKILL_TYPE_BLUETOOTH }, + { "ideapad_3g", CFG_3G_BIT, 0x20, RFKILL_TYPE_WWAN }, }; static int ideapad_rfk_set(void *data, bool blocked) @@ -280,15 +306,6 @@ static void __devexit ideapad_unregister_rfkill(struct acpi_device *adevice, /* * Platform device */ -static struct attribute *ideapad_attributes[] = { - &dev_attr_camera_power.attr, - NULL -}; - -static struct attribute_group ideapad_attribute_group = { - .attrs = ideapad_attributes -}; - static int __devinit ideapad_platform_init(struct ideapad_private *priv) { int result; @@ -393,10 +410,11 @@ MODULE_DEVICE_TABLE(acpi, ideapad_device_ids); static int __devinit ideapad_acpi_add(struct acpi_device *adevice) { - int ret, i, cfg; + int ret, i; + unsigned long cfg; struct ideapad_private *priv; - if (read_method_int(adevice->handle, "_CFG", &cfg)) + if (read_method_int(adevice->handle, "_CFG", (int *)&cfg)) return -ENODEV; priv = kzalloc(sizeof(*priv), GFP_KERNEL); @@ -404,6 +422,7 @@ static int __devinit ideapad_acpi_add(struct acpi_device *adevice) return -ENOMEM; dev_set_drvdata(&adevice->dev, priv); ideapad_handle = adevice->handle; + priv->cfg = cfg; ret = ideapad_platform_init(priv); if (ret) @@ -414,7 +433,7 @@ static int __devinit ideapad_acpi_add(struct acpi_device *adevice) goto input_failed; for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++) { - if (test_bit(ideapad_rfk_data[i].cfgbit, (unsigned long *)&cfg)) + if (test_bit(ideapad_rfk_data[i].cfgbit, &cfg)) ideapad_register_rfkill(adevice, i); else priv->rfk[i] = NULL; -- cgit v1.1 From a84511f7fbeb37e26aacb9c72f5a21ffc24e909e Mon Sep 17 00:00:00 2001 From: Ike Panhc Date: Thu, 30 Jun 2011 19:50:47 +0800 Subject: ideapad: let camera_power node invisiable if no camera Signed-off-by: Ike Panhc Signed-off-by: Matthew Garrett --- drivers/platform/x86/ideapad-laptop.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 42b9ba7..1612abd 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -38,6 +38,7 @@ #define CFG_BT_BIT (16) #define CFG_3G_BIT (17) #define CFG_WIFI_BIT (18) +#define CFG_CAMERA_BIT (19) struct ideapad_private { struct rfkill *rfk[IDEAPAD_RFKILL_DEV_NUM]; @@ -208,7 +209,24 @@ static struct attribute *ideapad_attributes[] = { NULL }; +static mode_t ideapad_is_visible(struct kobject *kobj, + struct attribute *attr, + int idx) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct ideapad_private *priv = dev_get_drvdata(dev); + bool supported; + + if (attr == &dev_attr_camera_power.attr) + supported = test_bit(CFG_CAMERA_BIT, &(priv->cfg)); + else + supported = true; + + return supported ? attr->mode : 0; +} + static struct attribute_group ideapad_attribute_group = { + .is_visible = ideapad_is_visible, .attrs = ideapad_attributes }; -- cgit v1.1 From a4ecbb8ae7be16497db2f984ee7a3ffec0f164c3 Mon Sep 17 00:00:00 2001 From: Ike Panhc Date: Thu, 30 Jun 2011 19:50:52 +0800 Subject: ideapad: add backlight driver When acpi_backlight=vendor in cmdline or no backlight support in acpi video device, ideapad-laptop will register backlight device and control brightness and backlight power via the command in VPC2004. Signed-off-by: Ike Panhc Signed-off-by: Matthew Garrett --- drivers/platform/x86/ideapad-laptop.c | 123 ++++++++++++++++++++++++++++++++-- 1 file changed, 117 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 1612abd..8811c68 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -32,6 +32,8 @@ #include #include #include +#include +#include #define IDEAPAD_RFKILL_DEV_NUM (3) @@ -44,6 +46,7 @@ struct ideapad_private { struct rfkill *rfk[IDEAPAD_RFKILL_DEV_NUM]; struct platform_device *platform_device; struct input_dev *inputdev; + struct backlight_device *blightdev; unsigned long cfg; }; @@ -309,8 +312,7 @@ static int __devinit ideapad_register_rfkill(struct acpi_device *adevice, return 0; } -static void __devexit ideapad_unregister_rfkill(struct acpi_device *adevice, - int dev) +static void ideapad_unregister_rfkill(struct acpi_device *adevice, int dev) { struct ideapad_private *priv = dev_get_drvdata(&adevice->dev); @@ -418,6 +420,98 @@ static void ideapad_input_report(struct ideapad_private *priv, } /* + * backlight + */ +static int ideapad_backlight_get_brightness(struct backlight_device *blightdev) +{ + unsigned long now; + + if (read_ec_data(ideapad_handle, 0x12, &now)) + return -EIO; + return now; +} + +static int ideapad_backlight_update_status(struct backlight_device *blightdev) +{ + if (write_ec_cmd(ideapad_handle, 0x13, blightdev->props.brightness)) + return -EIO; + if (write_ec_cmd(ideapad_handle, 0x33, + blightdev->props.power == FB_BLANK_POWERDOWN ? 0 : 1)) + return -EIO; + + return 0; +} + +static const struct backlight_ops ideapad_backlight_ops = { + .get_brightness = ideapad_backlight_get_brightness, + .update_status = ideapad_backlight_update_status, +}; + +static int ideapad_backlight_init(struct ideapad_private *priv) +{ + struct backlight_device *blightdev; + struct backlight_properties props; + unsigned long max, now, power; + + if (read_ec_data(ideapad_handle, 0x11, &max)) + return -EIO; + if (read_ec_data(ideapad_handle, 0x12, &now)) + return -EIO; + if (read_ec_data(ideapad_handle, 0x18, &power)) + return -EIO; + + memset(&props, 0, sizeof(struct backlight_properties)); + props.max_brightness = max; + props.type = BACKLIGHT_PLATFORM; + blightdev = backlight_device_register("ideapad", + &priv->platform_device->dev, + priv, + &ideapad_backlight_ops, + &props); + if (IS_ERR(blightdev)) { + pr_err("Could not register backlight device\n"); + return PTR_ERR(blightdev); + } + + priv->blightdev = blightdev; + blightdev->props.brightness = now; + blightdev->props.power = power ? FB_BLANK_UNBLANK : FB_BLANK_POWERDOWN; + backlight_update_status(blightdev); + + return 0; +} + +static void ideapad_backlight_exit(struct ideapad_private *priv) +{ + if (priv->blightdev) + backlight_device_unregister(priv->blightdev); + priv->blightdev = NULL; +} + +static void ideapad_backlight_notify_power(struct ideapad_private *priv) +{ + unsigned long power; + struct backlight_device *blightdev = priv->blightdev; + + if (read_ec_data(ideapad_handle, 0x18, &power)) + return; + blightdev->props.power = power ? FB_BLANK_UNBLANK : FB_BLANK_POWERDOWN; +} + +static void ideapad_backlight_notify_brightness(struct ideapad_private *priv) +{ + unsigned long now; + + /* if we control brightness via acpi video driver */ + if (priv->blightdev == NULL) { + read_ec_data(ideapad_handle, 0x12, &now); + return; + } + + backlight_force_update(priv->blightdev, BACKLIGHT_UPDATE_HOTKEY); +} + +/* * module init/exit */ static const struct acpi_device_id ideapad_device_ids[] = { @@ -458,8 +552,17 @@ static int __devinit ideapad_acpi_add(struct acpi_device *adevice) } ideapad_sync_rfk_state(adevice); + if (!acpi_video_backlight_support()) { + ret = ideapad_backlight_init(priv); + if (ret && ret != -ENODEV) + goto backlight_failed; + } + return 0; +backlight_failed: + for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++) + ideapad_unregister_rfkill(adevice, i); input_failed: ideapad_platform_exit(priv); platform_failed: @@ -472,6 +575,7 @@ static int __devexit ideapad_acpi_remove(struct acpi_device *adevice, int type) struct ideapad_private *priv = dev_get_drvdata(&adevice->dev); int i; + ideapad_backlight_exit(priv); for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++) ideapad_unregister_rfkill(adevice, i); ideapad_input_exit(priv); @@ -496,12 +600,19 @@ static void ideapad_acpi_notify(struct acpi_device *adevice, u32 event) vpc1 = (vpc2 << 8) | vpc1; for (vpc_bit = 0; vpc_bit < 16; vpc_bit++) { if (test_bit(vpc_bit, &vpc1)) { - if (vpc_bit == 9) + switch (vpc_bit) { + case 9: ideapad_sync_rfk_state(adevice); - else if (vpc_bit == 4) - read_ec_data(handle, 0x12, &vpc2); - else + break; + case 4: + ideapad_backlight_notify_brightness(priv); + break; + case 2: + ideapad_backlight_notify_power(priv); + break; + default: ideapad_input_report(priv, vpc_bit); + } } } } -- cgit v1.1 From 49979d091d1847823c064301da1ec173619ddd92 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Fri, 1 Jul 2011 11:34:26 +0200 Subject: asus-wmi: fix hwmon/pwm1 The code was completly broken, and should never had been sent to the kernel. That's what happens when you write code without hardware to test it. Cc: stable@kernel.org Signed-off-by: Corentin Chary Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus-wmi.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 65b66aa..bcb9fb4 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -797,8 +797,8 @@ exit: * Hwmon device */ static ssize_t asus_hwmon_pwm1(struct device *dev, - struct device_attribute *attr, - char *buf) + struct device_attribute *attr, + char *buf) { struct asus_wmi *asus = dev_get_drvdata(dev); u32 value; @@ -809,7 +809,7 @@ static ssize_t asus_hwmon_pwm1(struct device *dev, if (err < 0) return err; - value |= 0xFF; + value &= 0xFF; if (value == 1) /* Low Speed */ value = 85; @@ -869,7 +869,7 @@ static mode_t asus_hwmon_sysfs_is_visible(struct kobject *kobj, * - reverved bits are non-zero * - sfun and presence bit are not set */ - if (value != ASUS_WMI_UNSUPPORTED_METHOD || value & 0xFFF80000 + if (value == ASUS_WMI_UNSUPPORTED_METHOD || value & 0xFFF80000 || (!asus->sfun && !(value & ASUS_WMI_DSTS_PRESENCE_BIT))) ok = false; } @@ -904,6 +904,7 @@ static int asus_wmi_hwmon_init(struct asus_wmi *asus) pr_err("Could not register asus hwmon device\n"); return PTR_ERR(hwmon); } + dev_set_drvdata(hwmon, asus); asus->hwmon_device = hwmon; result = sysfs_create_group(&hwmon->kobj, &hwmon_attribute_group); if (result) -- cgit v1.1 From c4453f6a7ceff330ff37a9712ee8305fc3f8e9b6 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Fri, 1 Jul 2011 11:34:27 +0200 Subject: asus-wmi: Add callback for hotkey filtering This is required for the T101MT home key, which behaves differently than other hotkeys. Signed-off-by: Seth Forshee Signed-off-by: Corentin Chary Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus-wmi.c | 12 +++++++++++- drivers/platform/x86/asus-wmi.h | 6 ++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index bcb9fb4..cea9fdc 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -1061,6 +1061,8 @@ static void asus_wmi_notify(u32 value, void *context) acpi_status status; int code; int orig_code; + unsigned int key_value = 1; + bool autorelease = 1; status = wmi_get_event_data(value, &response); if (status != AE_OK) { @@ -1076,6 +1078,13 @@ static void asus_wmi_notify(u32 value, void *context) code = obj->integer.value; orig_code = code; + if (asus->driver->key_filter) { + asus->driver->key_filter(asus->driver, &code, &key_value, + &autorelease); + if (code == ASUS_WMI_KEY_IGNORE) + goto exit; + } + if (code >= NOTIFY_BRNUP_MIN && code <= NOTIFY_BRNUP_MAX) code = NOTIFY_BRNUP_MIN; else if (code >= NOTIFY_BRNDOWN_MIN && @@ -1085,7 +1094,8 @@ static void asus_wmi_notify(u32 value, void *context) if (code == NOTIFY_BRNUP_MIN || code == NOTIFY_BRNDOWN_MIN) { if (!acpi_video_backlight_support()) asus_wmi_backlight_notify(asus, orig_code); - } else if (!sparse_keymap_report_event(asus->inputdev, code, 1, true)) + } else if (!sparse_keymap_report_event(asus->inputdev, code, + key_value, autorelease)) pr_info("Unknown key %x pressed\n", code); exit: diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index c044522..4da6103 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -29,6 +29,8 @@ #include +#define ASUS_WMI_KEY_IGNORE (-1) + struct module; struct key_entry; struct asus_wmi; @@ -44,6 +46,10 @@ struct asus_wmi_driver { const struct key_entry *keymap; const char *input_name; const char *input_phys; + /* Returns new code, value, and autorelease values in arguments. + * Return ASUS_WMI_KEY_IGNORE in code if event should be ignored. */ + void (*key_filter) (struct asus_wmi_driver *driver, int *code, + unsigned int *value, bool *autorelease); int (*probe) (struct platform_device *device); void (*quirks) (struct asus_wmi_driver *driver); -- cgit v1.1 From 6ae8b807371ba8d5b42648721c528582295136f7 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Fri, 1 Jul 2011 11:34:28 +0200 Subject: eeepc-wmi: Add support for T101MT Home/Express Gate key This key is different than other hotkeys, having seperate scan codes for press, release, and hold, so it requires some special filtering. Press and release events are passed on, and hold events are ignored since sparse-keymap does not support hardware autorepeat. Note that "Home" in the context of this button doesn't mean the same thing as the usual Home key, and it really isn't clear at all what is meant by "Home". The manufacurer's description of the button indicates that it should launch some sort of touch screen settings interface on short press and apply a desktop rotation on long press. Signed-off-by: Seth Forshee Signed-off-by: Corentin Chary Signed-off-by: Matthew Garrett --- drivers/platform/x86/eeepc-wmi.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/platform/x86/eeepc-wmi.c b/drivers/platform/x86/eeepc-wmi.c index 4aa867a..f41a977 100644 --- a/drivers/platform/x86/eeepc-wmi.c +++ b/drivers/platform/x86/eeepc-wmi.c @@ -56,6 +56,11 @@ MODULE_PARM_DESC(hotplug_wireless, "If your laptop needs that, please report to " "acpi4asus-user@lists.sourceforge.net."); +/* Values for T101MT "Home" key */ +#define HOME_PRESS 0xe4 +#define HOME_HOLD 0xea +#define HOME_RELEASE 0xe5 + static const struct key_entry eeepc_wmi_keymap[] = { /* Sleep already handled via generic ACPI code */ { KE_KEY, 0x30, { KEY_VOLUMEUP } }, @@ -71,6 +76,7 @@ static const struct key_entry eeepc_wmi_keymap[] = { { KE_KEY, 0xcc, { KEY_SWITCHVIDEOMODE } }, { KE_KEY, 0xe0, { KEY_PROG1 } }, /* Task Manager */ { KE_KEY, 0xe1, { KEY_F14 } }, /* Change Resolution */ + { KE_KEY, HOME_PRESS, { KEY_CONFIG } }, /* Home/Express gate key */ { KE_KEY, 0xe8, { KEY_SCREENLOCK } }, { KE_KEY, 0xe9, { KEY_BRIGHTNESS_ZERO } }, { KE_KEY, 0xeb, { KEY_CAMERA_ZOOMOUT } }, @@ -81,6 +87,25 @@ static const struct key_entry eeepc_wmi_keymap[] = { { KE_END, 0}, }; +static void eeepc_wmi_key_filter(struct asus_wmi_driver *asus_wmi, int *code, + unsigned int *value, bool *autorelease) +{ + switch (*code) { + case HOME_PRESS: + *value = 1; + *autorelease = 0; + break; + case HOME_HOLD: + *code = ASUS_WMI_KEY_IGNORE; + break; + case HOME_RELEASE: + *code = HOME_PRESS; + *value = 0; + *autorelease = 0; + break; + } +} + static acpi_status eeepc_wmi_parse_device(acpi_handle handle, u32 level, void *context, void **retval) { @@ -151,6 +176,7 @@ static struct asus_wmi_driver asus_wmi_driver = { .keymap = eeepc_wmi_keymap, .input_name = "Eee PC WMI hotkeys", .input_phys = EEEPC_WMI_FILE "/input0", + .key_filter = eeepc_wmi_key_filter, .probe = eeepc_wmi_probe, .quirks = eeepc_wmi_quirks, }; -- cgit v1.1 From 39bbde059973d3337ae5f2fae5097d52599415e4 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Mon, 4 Jul 2011 09:49:20 +0200 Subject: asus-wmi: Enable autorepeat for hotkey input device The T101MT Home/Express Gate key autorepeats in hardware, but sparse-keymap does not support hardware autorepeat. Enable the input core's software autorepeat to emulate the hardware behavior. Normal hotkeys are autoreleased, so the behavior of these keys will not be affected. Signed-off-by: Seth Forshee Signed-off-by: Corentin Chary Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index cea9fdc..0fce7de 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -205,6 +205,7 @@ static int asus_wmi_input_init(struct asus_wmi *asus) asus->inputdev->phys = asus->driver->input_phys; asus->inputdev->id.bustype = BUS_HOST; asus->inputdev->dev.parent = &asus->platform_device->dev; + set_bit(EV_REP, asus->inputdev->evbit); err = sparse_keymap_setup(asus->inputdev, asus->driver->keymap, NULL); if (err) -- cgit v1.1 From 57d5c8e742dbb8cdc794d9e0fee769c9533072af Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Fri, 1 Jul 2011 11:34:30 +0200 Subject: asus-wmi: second part of the version starts at 16 not 8 Signed-off-by: Corentin Chary Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus-wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 0fce7de..fa2b395 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -1246,7 +1246,7 @@ static int asus_wmi_platform_init(struct asus_wmi *asus) /* We don't know yet what to do with this version... */ if (!asus_wmi_evaluate_method(ASUS_WMI_METHODID_SPEC, 0, 0x9, &rv)) { - pr_info("BIOS WMI version: %d.%d", rv >> 8, rv & 0xFF); + pr_info("BIOS WMI version: %d.%d", rv >> 16, rv & 0xFF); asus->spec = rv; } -- cgit v1.1 From e9809c0b9670656855655d8ed6dc33718ec12ba2 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Fri, 1 Jul 2011 11:34:31 +0200 Subject: asus-wmi: add keyboard backlight support Based on a patch from Nate Weibley. . Cc: Nate Weibley Signed-off-by: Corentin Chary Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus-wmi.c | 130 ++++++++++++++++++++++++++++++++++------ 1 file changed, 113 insertions(+), 17 deletions(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index fa2b395..83a7a81 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -66,6 +66,8 @@ MODULE_LICENSE("GPL"); #define NOTIFY_BRNUP_MAX 0x1f #define NOTIFY_BRNDOWN_MIN 0x20 #define NOTIFY_BRNDOWN_MAX 0x2e +#define NOTIFY_KBD_BRTUP 0xc4 +#define NOTIFY_KBD_BRTDWN 0xc5 /* WMI Methods */ #define ASUS_WMI_METHODID_SPEC 0x43455053 /* BIOS SPECification */ @@ -174,8 +176,11 @@ struct asus_wmi { struct led_classdev tpd_led; int tpd_led_wk; + struct led_classdev kbd_led; + int kbd_led_wk; struct workqueue_struct *led_workqueue; struct work_struct tpd_led_work; + struct work_struct kbd_led_work; struct asus_rfkill wlan; struct asus_rfkill bluetooth; @@ -360,30 +365,79 @@ static enum led_brightness tpd_led_get(struct led_classdev *led_cdev) return read_tpd_led_state(asus); } -static int asus_wmi_led_init(struct asus_wmi *asus) +static void kbd_led_update(struct work_struct *work) { - int rv; + int ctrl_param = 0; + struct asus_wmi *asus; - if (read_tpd_led_state(asus) < 0) - return 0; + asus = container_of(work, struct asus_wmi, kbd_led_work); - asus->led_workqueue = create_singlethread_workqueue("led_workqueue"); - if (!asus->led_workqueue) - return -ENOMEM; - INIT_WORK(&asus->tpd_led_work, tpd_led_update); + /* + * bits 0-2: level + * bit 7: light on/off + */ + if (asus->kbd_led_wk > 0) + ctrl_param = 0x80 | (asus->kbd_led_wk & 0x7F); - asus->tpd_led.name = "asus::touchpad"; - asus->tpd_led.brightness_set = tpd_led_set; - asus->tpd_led.brightness_get = tpd_led_get; - asus->tpd_led.max_brightness = 1; + asus_wmi_set_devstate(ASUS_WMI_DEVID_KBD_BACKLIGHT, ctrl_param, NULL); +} - rv = led_classdev_register(&asus->platform_device->dev, &asus->tpd_led); - if (rv) { - destroy_workqueue(asus->led_workqueue); - return rv; +static int kbd_led_read(struct asus_wmi *asus, int *level, int *env) +{ + int retval; + + /* + * bits 0-2: level + * bit 7: light on/off + * bit 8-10: environment (0: dark, 1: normal, 2: light) + * bit 17: status unknown + */ + retval = asus_wmi_get_devstate_bits(asus, ASUS_WMI_DEVID_KBD_BACKLIGHT, + 0xFFFF); + + if (retval == 0x8000) + retval = -ENODEV; + + if (retval >= 0) { + if (level) + *level = retval & 0x80 ? retval & 0x7F : 0; + if (env) + *env = (retval >> 8) & 0x7F; + retval = 0; } - return 0; + return retval; +} + +static void kbd_led_set(struct led_classdev *led_cdev, + enum led_brightness value) +{ + struct asus_wmi *asus; + + asus = container_of(led_cdev, struct asus_wmi, kbd_led); + + if (value > asus->kbd_led.max_brightness) + value = asus->kbd_led.max_brightness; + else if (value < 0) + value = 0; + + asus->kbd_led_wk = value; + queue_work(asus->led_workqueue, &asus->kbd_led_work); +} + +static enum led_brightness kbd_led_get(struct led_classdev *led_cdev) +{ + struct asus_wmi *asus; + int retval, value; + + asus = container_of(led_cdev, struct asus_wmi, kbd_led); + + retval = kbd_led_read(asus, &value, NULL); + + if (retval < 0) + return retval; + + return value; } static void asus_wmi_led_exit(struct asus_wmi *asus) @@ -394,6 +448,48 @@ static void asus_wmi_led_exit(struct asus_wmi *asus) destroy_workqueue(asus->led_workqueue); } +static int asus_wmi_led_init(struct asus_wmi *asus) +{ + int rv = 0; + + asus->led_workqueue = create_singlethread_workqueue("led_workqueue"); + if (!asus->led_workqueue) + return -ENOMEM; + + if (read_tpd_led_state(asus) >= 0) { + INIT_WORK(&asus->tpd_led_work, tpd_led_update); + + asus->tpd_led.name = "asus::touchpad"; + asus->tpd_led.brightness_set = tpd_led_set; + asus->tpd_led.brightness_get = tpd_led_get; + asus->tpd_led.max_brightness = 1; + + rv = led_classdev_register(&asus->platform_device->dev, + &asus->tpd_led); + if (rv) + goto error; + } + + if (kbd_led_read(asus, NULL, NULL) >= 0) { + INIT_WORK(&asus->kbd_led_work, kbd_led_update); + + asus->kbd_led.name = "asus::kbd_backlight"; + asus->kbd_led.brightness_set = kbd_led_set; + asus->kbd_led.brightness_get = kbd_led_get; + asus->kbd_led.max_brightness = 3; + + rv = led_classdev_register(&asus->platform_device->dev, + &asus->kbd_led); + } + +error: + if (rv) + asus_wmi_led_exit(asus); + + return rv; +} + + /* * PCI hotplug (for wlan rfkill) */ -- cgit v1.1 From 8fe8c25ef92ffc773a49c4b0dab2938a6fd64489 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Fri, 1 Jul 2011 11:34:32 +0200 Subject: asus-wmi: fix section mismatch Signed-off-by: Corentin Chary Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus-wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 83a7a81..41a96f2 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -1712,7 +1712,7 @@ static int asus_wmi_probe(struct platform_device *pdev) static bool used; -int asus_wmi_register_driver(struct asus_wmi_driver *driver) +int __init_or_module asus_wmi_register_driver(struct asus_wmi_driver *driver) { struct platform_driver *platform_driver; struct platform_device *platform_device; -- cgit v1.1 From af965e973276b015d1b7d9ee78ed8d8df9d6bc98 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Fri, 1 Jul 2011 11:34:34 +0200 Subject: asus-wmi: fix keyboard backlight detection Signed-off-by: Corentin Chary Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus-wmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 41a96f2..4b0384e 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -395,8 +395,9 @@ static int kbd_led_read(struct asus_wmi *asus, int *level, int *env) retval = asus_wmi_get_devstate_bits(asus, ASUS_WMI_DEVID_KBD_BACKLIGHT, 0xFFFF); + /* Unknown status is considered as off */ if (retval == 0x8000) - retval = -ENODEV; + retval = 0; if (retval >= 0) { if (level) -- cgit v1.1 From 79ec1172c29ed521f1652b0b44dceb84ba115541 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Fri, 1 Jul 2011 11:34:35 +0200 Subject: asus-wmi: add some device ids Signed-off-by: Corentin Chary Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus-wmi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 4b0384e..e60385d 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -95,6 +95,7 @@ MODULE_LICENSE("GPL"); /* Wireless */ #define ASUS_WMI_DEVID_HW_SWITCH 0x00010001 #define ASUS_WMI_DEVID_WIRELESS_LED 0x00010002 +#define ASUS_WMI_DEVID_CWAP 0x00010003 #define ASUS_WMI_DEVID_WLAN 0x00010011 #define ASUS_WMI_DEVID_BLUETOOTH 0x00010013 #define ASUS_WMI_DEVID_GPS 0x00010015 @@ -104,6 +105,12 @@ MODULE_LICENSE("GPL"); /* Leds */ /* 0x000200XX and 0x000400XX */ +#define ASUS_WMI_DEVID_LED1 0x00020011 +#define ASUS_WMI_DEVID_LED2 0x00020012 +#define ASUS_WMI_DEVID_LED3 0x00020013 +#define ASUS_WMI_DEVID_LED4 0x00020014 +#define ASUS_WMI_DEVID_LED5 0x00020015 +#define ASUS_WMI_DEVID_LED6 0x00020016 /* Backlight and Brightness */ #define ASUS_WMI_DEVID_BACKLIGHT 0x00050011 -- cgit v1.1 From 6118b8adb50c23714d5be089965082bbbb48d831 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Fri, 1 Jul 2011 11:34:36 +0200 Subject: asus-wmi: add thermal sensor Signed-off-by: Corentin Chary Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus-wmi.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index e60385d..d01f767 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -930,7 +931,26 @@ static ssize_t asus_hwmon_pwm1(struct device *dev, return sprintf(buf, "%d\n", value); } +static ssize_t asus_hwmon_temp1(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct asus_wmi *asus = dev_get_drvdata(dev); + u32 value; + int err; + + err = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_THERMAL_CTRL, &value); + + if (err < 0) + return err; + + value = KELVIN_TO_CELSIUS((value & 0xFFFF)) * 1000; + + return sprintf(buf, "%d\n", value); +} + static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO, asus_hwmon_pwm1, NULL, 0); +static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, asus_hwmon_temp1, NULL, 0); static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf) @@ -941,6 +961,7 @@ static SENSOR_DEVICE_ATTR(name, S_IRUGO, show_name, NULL, 0); static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_pwm1.dev_attr.attr, + &sensor_dev_attr_temp1_input.dev_attr.attr, &sensor_dev_attr_name.dev_attr.attr, NULL }; -- cgit v1.1 From e02431d6f5e8f1023ce0cfbaf70ddf4afae924d8 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Fri, 1 Jul 2011 11:34:37 +0200 Subject: asus-wmi: check for temp1 presence Signed-off-by: Corentin Chary Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus-wmi.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index d01f767..e508726 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -967,7 +967,7 @@ static struct attribute *hwmon_attributes[] = { }; static mode_t asus_hwmon_sysfs_is_visible(struct kobject *kobj, - struct attribute *attr, int idx) + struct attribute *attr, int idx) { struct device *dev = container_of(kobj, struct device, kobj); struct platform_device *pdev = to_platform_device(dev->parent); @@ -978,6 +978,8 @@ static mode_t asus_hwmon_sysfs_is_visible(struct kobject *kobj, if (attr == &sensor_dev_attr_pwm1.dev_attr.attr) dev_id = ASUS_WMI_DEVID_FAN_CTRL; + else if (attr == &sensor_dev_attr_temp1_input.dev_attr.attr) + dev_id = ASUS_WMI_DEVID_THERMAL_CTRL; if (dev_id != -1) { int err = asus_wmi_get_devstate(asus, dev_id, &value); @@ -998,6 +1000,10 @@ static mode_t asus_hwmon_sysfs_is_visible(struct kobject *kobj, if (value == ASUS_WMI_UNSUPPORTED_METHOD || value & 0xFFF80000 || (!asus->sfun && !(value & ASUS_WMI_DSTS_PRESENCE_BIT))) ok = false; + } else if (dev_id == ASUS_WMI_DEVID_THERMAL_CTRL) { + /* If value is zero, something is clearly wrong */ + if (value == 0) + ok = false; } return ok ? attr->mode : 0; -- cgit v1.1 From 3df5fdadf6400373a696bb14e27d4771e5f6afb3 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Fri, 1 Jul 2011 11:34:38 +0200 Subject: asus-wmi: return proper value in store_cpufv() Cc: stable@kernel.org Signed-off-by: Corentin Chary Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus-wmi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index e508726..3b9fb91 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -1307,14 +1307,18 @@ ASUS_WMI_CREATE_DEVICE_ATTR(cardr, 0644, ASUS_WMI_DEVID_CARDREADER); static ssize_t store_cpufv(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - int value; + int value, rv; if (!count || sscanf(buf, "%i", &value) != 1) return -EINVAL; if (value < 0 || value > 2) return -EINVAL; - return asus_wmi_evaluate_method(ASUS_WMI_METHODID_CFVS, value, 0, NULL); + rv = asus_wmi_evaluate_method(ASUS_WMI_METHODID_CFVS, value, 0, NULL); + if (rv < 0) + return rv; + + return count; } static DEVICE_ATTR(cpufv, S_IRUGO | S_IWUSR, NULL, store_cpufv); -- cgit v1.1 From fddbfed595b307f9dddc7a86ddfbcbcb80141e28 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Fri, 1 Jul 2011 11:34:39 +0200 Subject: asus-wmi: add CWAP support and clarify the meaning of WAPF bits ref: http://dev.iksaif.net/projects/3/wiki/Asus-laptop_WAPF Signed-off-by: Corentin Chary Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus-laptop.c | 9 ++++----- drivers/platform/x86/asus-nb-wmi.c | 27 +++++++++++++++++++++++---- drivers/platform/x86/asus-wmi.c | 6 ++++++ drivers/platform/x86/asus-wmi.h | 1 + drivers/platform/x86/eeepc-wmi.c | 1 + 5 files changed, 35 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c index d65df92..fa6d7ec 100644 --- a/drivers/platform/x86/asus-laptop.c +++ b/drivers/platform/x86/asus-laptop.c @@ -70,11 +70,10 @@ MODULE_LICENSE("GPL"); * WAPF defines the behavior of the Fn+Fx wlan key * The significance of values is yet to be found, but * most of the time: - * 0x0 will do nothing - * 0x1 will allow to control the device with Fn+Fx key. - * 0x4 will send an ACPI event (0x88) while pressing the Fn+Fx key - * 0x5 like 0x1 or 0x4 - * So, if something doesn't work as you want, just try other values =) + * Bit | Bluetooth | WLAN + * 0 | Hardware | Hardware + * 1 | Hardware | Software + * 4 | Software | Software */ static uint wapf = 1; module_param(wapf, uint, 0444); diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 0580d99..b0859d4 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -38,6 +38,24 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("wmi:"ASUS_NB_WMI_EVENT_GUID); +/* + * WAPF defines the behavior of the Fn+Fx wlan key + * The significance of values is yet to be found, but + * most of the time: + * Bit | Bluetooth | WLAN + * 0 | Hardware | Hardware + * 1 | Hardware | Software + * 4 | Software | Software + */ +static uint wapf; +module_param(wapf, uint, 0444); +MODULE_PARM_DESC(wapf, "WAPF value"); + +static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver) +{ + driver->wapf = wapf; +} + static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0x30, { KEY_VOLUMEUP } }, { KE_KEY, 0x31, { KEY_VOLUMEDOWN } }, @@ -53,16 +71,16 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0x51, { KEY_WWW } }, { KE_KEY, 0x55, { KEY_CALC } }, { KE_KEY, 0x5C, { KEY_F15 } }, /* Power Gear key */ - { KE_KEY, 0x5D, { KEY_WLAN } }, - { KE_KEY, 0x5E, { KEY_WLAN } }, - { KE_KEY, 0x5F, { KEY_WLAN } }, + { KE_KEY, 0x5D, { KEY_WLAN } }, /* Wireless console Toggle */ + { KE_KEY, 0x5E, { KEY_WLAN } }, /* Wireless console Enable */ + { KE_KEY, 0x5F, { KEY_WLAN } }, /* Wireless console Disable */ { KE_KEY, 0x60, { KEY_SWITCHVIDEOMODE } }, { KE_KEY, 0x61, { KEY_SWITCHVIDEOMODE } }, { KE_KEY, 0x62, { KEY_SWITCHVIDEOMODE } }, { KE_KEY, 0x63, { KEY_SWITCHVIDEOMODE } }, { KE_KEY, 0x6B, { KEY_TOUCHPAD_TOGGLE } }, - { KE_KEY, 0x7E, { KEY_BLUETOOTH } }, { KE_KEY, 0x7D, { KEY_BLUETOOTH } }, + { KE_KEY, 0x7E, { KEY_BLUETOOTH } }, { KE_KEY, 0x82, { KEY_CAMERA } }, { KE_KEY, 0x88, { KEY_RFKILL } }, { KE_KEY, 0x8A, { KEY_PROG1 } }, @@ -81,6 +99,7 @@ static struct asus_wmi_driver asus_nb_wmi_driver = { .keymap = asus_nb_wmi_keymap, .input_name = "Asus WMI hotkeys", .input_phys = ASUS_NB_WMI_FILE "/input0", + .quirks = asus_nb_wmi_quirks, }; diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 3b9fb91..8787dfa 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -1413,6 +1413,12 @@ static int asus_wmi_platform_init(struct asus_wmi *asus) return -ENODEV; } + /* CWAP allow to define the behavior of the Fn+F2 key, + * this method doesn't seems to be present on Eee PCs */ + if (asus->driver->wapf >= 0) + asus_wmi_set_devstate(ASUS_WMI_DEVID_CWAP, + asus->driver->wapf, NULL); + return asus_wmi_sysfs_init(asus->platform_device); } diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index 4da6103..8147c10 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -37,6 +37,7 @@ struct asus_wmi; struct asus_wmi_driver { bool hotplug_wireless; + int wapf; const char *name; struct module *owner; diff --git a/drivers/platform/x86/eeepc-wmi.c b/drivers/platform/x86/eeepc-wmi.c index f41a977..9f6e643 100644 --- a/drivers/platform/x86/eeepc-wmi.c +++ b/drivers/platform/x86/eeepc-wmi.c @@ -166,6 +166,7 @@ static void eeepc_dmi_check(struct asus_wmi_driver *driver) static void eeepc_wmi_quirks(struct asus_wmi_driver *driver) { driver->hotplug_wireless = hotplug_wireless; + driver->wapf = -1; eeepc_dmi_check(driver); } -- cgit v1.1 From 43be8bde1fdfbe3f4dedfd04ca4f9d6dd1e04b46 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Fri, 1 Jul 2011 11:34:40 +0200 Subject: asus-wmi: add gps rfkill support Signed-off-by: Corentin Chary Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus-wmi.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 8787dfa..57514de 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -194,6 +194,7 @@ struct asus_wmi { struct asus_rfkill bluetooth; struct asus_rfkill wimax; struct asus_rfkill wwan3g; + struct asus_rfkill gps; struct hotplug_slot *hotplug_slot; struct mutex hotplug_lock; @@ -835,6 +836,11 @@ static void asus_wmi_rfkill_exit(struct asus_wmi *asus) rfkill_destroy(asus->wwan3g.rfkill); asus->wwan3g.rfkill = NULL; } + if (asus->gps.rfkill) { + rfkill_unregister(asus->gps.rfkill); + rfkill_destroy(asus->gps.rfkill); + asus->gps.rfkill = NULL; + } } static int asus_wmi_rfkill_init(struct asus_wmi *asus) @@ -869,6 +875,12 @@ static int asus_wmi_rfkill_init(struct asus_wmi *asus) if (result && result != -ENODEV) goto exit; + result = asus_new_rfkill(asus, &asus->gps, "asus-gps", + RFKILL_TYPE_GPS, ASUS_WMI_DEVID_GPS); + + if (result && result != -ENODEV) + goto exit; + if (!asus->driver->hotplug_wireless) goto exit; @@ -1721,6 +1733,10 @@ static int asus_hotk_restore(struct device *device) bl = !asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_WWAN3G); rfkill_set_sw_state(asus->wwan3g.rfkill, bl); } + if (asus->gps.rfkill) { + bl = !asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_GPS); + rfkill_set_sw_state(asus->gps.rfkill, bl); + } return 0; } -- cgit v1.1 From a912d329102aa13c572989c6adc103685c9d24ae Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Fri, 1 Jul 2011 11:34:41 +0200 Subject: asus-wmi: add uwb rfkill support Signed-off-by: Corentin Chary Signed-off-by: Matthew Garrett --- drivers/platform/x86/asus-wmi.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 57514de..95cba9e 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -195,6 +195,7 @@ struct asus_wmi { struct asus_rfkill wimax; struct asus_rfkill wwan3g; struct asus_rfkill gps; + struct asus_rfkill uwb; struct hotplug_slot *hotplug_slot; struct mutex hotplug_lock; @@ -841,6 +842,11 @@ static void asus_wmi_rfkill_exit(struct asus_wmi *asus) rfkill_destroy(asus->gps.rfkill); asus->gps.rfkill = NULL; } + if (asus->uwb.rfkill) { + rfkill_unregister(asus->uwb.rfkill); + rfkill_destroy(asus->uwb.rfkill); + asus->uwb.rfkill = NULL; + } } static int asus_wmi_rfkill_init(struct asus_wmi *asus) @@ -881,6 +887,12 @@ static int asus_wmi_rfkill_init(struct asus_wmi *asus) if (result && result != -ENODEV) goto exit; + result = asus_new_rfkill(asus, &asus->uwb, "asus-uwb", + RFKILL_TYPE_UWB, ASUS_WMI_DEVID_UWB); + + if (result && result != -ENODEV) + goto exit; + if (!asus->driver->hotplug_wireless) goto exit; @@ -1737,6 +1749,10 @@ static int asus_hotk_restore(struct device *device) bl = !asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_GPS); rfkill_set_sw_state(asus->gps.rfkill, bl); } + if (asus->uwb.rfkill) { + bl = !asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_UWB); + rfkill_set_sw_state(asus->uwb.rfkill, bl); + } return 0; } -- cgit v1.1 From 4e2441c046737d3fb452ae46f0e4b12e18cf440d Mon Sep 17 00:00:00 2001 From: J Witteveen Date: Sun, 3 Jul 2011 13:15:44 +0200 Subject: Samsung Laptop platform driver: support N510 The N510 benefits from this code as well. Below is a patch to include support. Signed-off-by: Jouke Witteveen Signed-off-by: Matthew Garrett --- drivers/platform/x86/samsung-laptop.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c index d347116..e12c88e 100644 --- a/drivers/platform/x86/samsung-laptop.c +++ b/drivers/platform/x86/samsung-laptop.c @@ -521,6 +521,16 @@ static struct dmi_system_id __initdata samsung_dmi_table[] = { .callback = dmi_check_cb, }, { + .ident = "N510", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, + "SAMSUNG ELECTRONICS CO., LTD."), + DMI_MATCH(DMI_PRODUCT_NAME, "N510"), + DMI_MATCH(DMI_BOARD_NAME, "N510"), + }, + .callback = dmi_check_cb, + }, + { .ident = "X125", .matches = { DMI_MATCH(DMI_SYS_VENDOR, -- cgit v1.1 From 94ee48b9287c5c22c90754a48dbcf80b2df99d5f Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 6 Jul 2011 11:05:20 +0800 Subject: platform-drivers-x86: msi-wmi: add missing sparse_keymap_free in msi_wmi_init error path Signed-off-by: Axel Lin Signed-off-by: Matthew Garrett --- drivers/platform/x86/msi-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c index c832e33..6f40bf2 100644 --- a/drivers/platform/x86/msi-wmi.c +++ b/drivers/platform/x86/msi-wmi.c @@ -272,6 +272,7 @@ static int __init msi_wmi_init(void) err_free_backlight: backlight_device_unregister(backlight); err_free_input: + sparse_keymap_free(msi_wmi_input_dev); input_unregister_device(msi_wmi_input_dev); err_uninstall_notifier: wmi_remove_notify_handler(MSIWMI_EVENT_GUID); -- cgit v1.1 From 03f8952cf63b6059e56074be6ed450dc7739cdee Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 6 Jul 2011 23:40:51 +0800 Subject: platform-drivers-x86: intel_mid_thermal: fix memory leak The memory for td_info which is allocated in initialize_sensor() should be properly kfreed in mid_thermal_probe() error patch and mid_thermal_remove(). Signed-off-by: Axel Lin Signed-off-by: Matthew Garrett --- drivers/platform/x86/intel_mid_thermal.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/platform/x86/intel_mid_thermal.c b/drivers/platform/x86/intel_mid_thermal.c index 3a57832..ccd7b1f 100644 --- a/drivers/platform/x86/intel_mid_thermal.c +++ b/drivers/platform/x86/intel_mid_thermal.c @@ -493,20 +493,30 @@ static int mid_thermal_probe(struct platform_device *pdev) /* Register each sensor with the generic thermal framework*/ for (i = 0; i < MSIC_THERMAL_SENSORS; i++) { + struct thermal_device_info *td_info = initialize_sensor(i); + + if (!td_info) { + ret = -ENOMEM; + goto err; + } pinfo->tzd[i] = thermal_zone_device_register(name[i], - 0, initialize_sensor(i), &tzd_ops, 0, 0, 0, 0); - if (IS_ERR(pinfo->tzd[i])) - goto reg_fail; + 0, td_info, &tzd_ops, 0, 0, 0, 0); + if (IS_ERR(pinfo->tzd[i])) { + kfree(td_info); + ret = PTR_ERR(pinfo->tzd[i]); + goto err; + } } pinfo->pdev = pdev; platform_set_drvdata(pdev, pinfo); return 0; -reg_fail: - ret = PTR_ERR(pinfo->tzd[i]); - while (--i >= 0) +err: + while (--i >= 0) { + kfree(pinfo->tzd[i]->devdata); thermal_zone_device_unregister(pinfo->tzd[i]); + } configure_adc(0); kfree(pinfo); return ret; @@ -524,8 +534,10 @@ static int mid_thermal_remove(struct platform_device *pdev) int i; struct platform_info *pinfo = platform_get_drvdata(pdev); - for (i = 0; i < MSIC_THERMAL_SENSORS; i++) + for (i = 0; i < MSIC_THERMAL_SENSORS; i++) { + kfree(pinfo->tzd[i]->devdata); thermal_zone_device_unregister(pinfo->tzd[i]); + } kfree(pinfo); platform_set_drvdata(pdev, NULL); -- cgit v1.1 From c8bb2ebd6267d6c1288201344d7e7ee987d74467 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 7 Jul 2011 10:05:49 +0800 Subject: platform-drivers-x86: intel_menlow: add missing return AE_OK for intel_menlow_register_sensor() Otherwise, the error path will always be executed. Signed-off-by: Axel Lin Signed-off-by: Matthew Garrett --- drivers/platform/x86/intel_menlow.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/intel_menlow.c b/drivers/platform/x86/intel_menlow.c index 809adea4..abddc83 100644 --- a/drivers/platform/x86/intel_menlow.c +++ b/drivers/platform/x86/intel_menlow.c @@ -477,6 +477,8 @@ static acpi_status intel_menlow_register_sensor(acpi_handle handle, u32 lvl, return AE_ERROR; } + return AE_OK; + aux1_not_found: if (status == AE_NOT_FOUND) return AE_OK; -- cgit v1.1 From b73210aec44ddf73ca856cbca2d25daeccf47025 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 7 Jul 2011 10:21:15 +0800 Subject: platform-drivers-x86: intel_rar_register: convert to DEFINE_PCI_DEVICE_TABLE And also remove unused variable 'my_id_table'. Signed-off-by: Axel Lin Signed-off-by: Matthew Garrett --- drivers/platform/x86/intel_rar_register.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/platform/x86/intel_rar_register.c b/drivers/platform/x86/intel_rar_register.c index bde47e9..c8a6aed 100644 --- a/drivers/platform/x86/intel_rar_register.c +++ b/drivers/platform/x86/intel_rar_register.c @@ -637,15 +637,13 @@ end_function: return error; } -const struct pci_device_id rar_pci_id_tbl[] = { +static DEFINE_PCI_DEVICE_TABLE(rar_pci_id_tbl) = { { PCI_VDEVICE(INTEL, 0x4110) }, { 0 } }; MODULE_DEVICE_TABLE(pci, rar_pci_id_tbl); -const struct pci_device_id *my_id_table = rar_pci_id_tbl; - /* field for registering driver to PCI device */ static struct pci_driver rar_pci_driver = { .name = "rar_register_driver", -- cgit v1.1 From daa7769641019e9a0da180170cc1cd1b8ebd1641 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 7 Jul 2011 10:22:46 +0800 Subject: platform-drivers-x86: intel_scu_ipc: convert to DEFINE_PCI_DEVICE_TABLE Signed-off-by: Axel Lin Signed-off-by: Matthew Garrett --- drivers/platform/x86/intel_scu_ipc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index 940accb..c866653 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -725,7 +725,7 @@ static void ipc_remove(struct pci_dev *pdev) intel_scu_devices_destroy(); } -static const struct pci_device_id pci_ids[] = { +static DEFINE_PCI_DEVICE_TABLE(pci_ids) = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x080e)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x082a)}, { 0,} -- cgit v1.1 From 78542e18335e35136c2dcb6c904f3f554241ae97 Mon Sep 17 00:00:00 2001 From: Frederick van der Wyck Date: Thu, 7 Jul 2011 22:05:25 +0100 Subject: Platform: Samsung Q10 backlight driver This adds backlight control on the Samsung Q10 laptop, which does not support the SABI interface. Also tested successfully on the Dell Latitude X200. Signed-off-by: Frederick van der Wyck Signed-off-by: Matthew Garrett --- drivers/platform/x86/Kconfig | 8 ++ drivers/platform/x86/Makefile | 1 + drivers/platform/x86/samsung-q10.c | 196 +++++++++++++++++++++++++++++++++++++ 3 files changed, 205 insertions(+) create mode 100644 drivers/platform/x86/samsung-q10.c diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 45e0191..1e88d47 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -769,4 +769,12 @@ config INTEL_OAKTRAIL enable/disable the Camera, WiFi, BT etc. devices. If in doubt, say Y here; it will only load on supported platforms. +config SAMSUNG_Q10 + tristate "Samsung Q10 Extras" + depends on SERIO_I8042 + select BACKLIGHT_CLASS_DEVICE + ---help--- + This driver provides support for backlight control on Samsung Q10 + and related laptops, including Dell Latitude X200. + endif # X86_PLATFORM_DEVICES diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index afc1f83..293a320 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -44,3 +44,4 @@ obj-$(CONFIG_SAMSUNG_LAPTOP) += samsung-laptop.o obj-$(CONFIG_MXM_WMI) += mxm-wmi.o obj-$(CONFIG_INTEL_MID_POWER_BUTTON) += intel_mid_powerbtn.o obj-$(CONFIG_INTEL_OAKTRAIL) += intel_oaktrail.o +obj-$(CONFIG_SAMSUNG_Q10) += samsung-q10.o diff --git a/drivers/platform/x86/samsung-q10.c b/drivers/platform/x86/samsung-q10.c new file mode 100644 index 0000000..54cf3d6 --- /dev/null +++ b/drivers/platform/x86/samsung-q10.c @@ -0,0 +1,196 @@ +/* + * Driver for Samsung Q10 and related laptops: controls the backlight + * + * Copyright (c) 2011 Frederick van der Wyck + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#define SAMSUNGQ10_BL_MAX_INTENSITY 255 +#define SAMSUNGQ10_BL_DEFAULT_INTENSITY 185 + +#define SAMSUNGQ10_BL_8042_CMD 0xbe +#define SAMSUNGQ10_BL_8042_DATA { 0x89, 0x91 } + +static int samsungq10_bl_brightness; + +static bool force; +module_param(force, bool, 0); +MODULE_PARM_DESC(force, + "Disable the DMI check and force the driver to be loaded"); + +static int samsungq10_bl_set_intensity(struct backlight_device *bd) +{ + + int brightness = bd->props.brightness; + unsigned char c[3] = SAMSUNGQ10_BL_8042_DATA; + + c[2] = (unsigned char)brightness; + i8042_lock_chip(); + i8042_command(c, (0x30 << 8) | SAMSUNGQ10_BL_8042_CMD); + i8042_unlock_chip(); + samsungq10_bl_brightness = brightness; + + return 0; +} + +static int samsungq10_bl_get_intensity(struct backlight_device *bd) +{ + return samsungq10_bl_brightness; +} + +static const struct backlight_ops samsungq10_bl_ops = { + .get_brightness = samsungq10_bl_get_intensity, + .update_status = samsungq10_bl_set_intensity, +}; + +#ifdef CONFIG_PM_SLEEP +static int samsungq10_suspend(struct device *dev) +{ + return 0; +} + +static int samsungq10_resume(struct device *dev) +{ + + struct backlight_device *bd = dev_get_drvdata(dev); + + samsungq10_bl_set_intensity(bd); + return 0; +} +#else +#define samsungq10_suspend NULL +#define samsungq10_resume NULL +#endif + +static SIMPLE_DEV_PM_OPS(samsungq10_pm_ops, + samsungq10_suspend, samsungq10_resume); + +static int __devinit samsungq10_probe(struct platform_device *pdev) +{ + + struct backlight_properties props; + struct backlight_device *bd; + + memset(&props, 0, sizeof(struct backlight_properties)); + props.type = BACKLIGHT_PLATFORM; + props.max_brightness = SAMSUNGQ10_BL_MAX_INTENSITY; + bd = backlight_device_register("samsung", &pdev->dev, NULL, + &samsungq10_bl_ops, &props); + if (IS_ERR(bd)) + return PTR_ERR(bd); + + platform_set_drvdata(pdev, bd); + + bd->props.brightness = SAMSUNGQ10_BL_DEFAULT_INTENSITY; + samsungq10_bl_set_intensity(bd); + + return 0; +} + +static int __devexit samsungq10_remove(struct platform_device *pdev) +{ + + struct backlight_device *bd = platform_get_drvdata(pdev); + + bd->props.brightness = SAMSUNGQ10_BL_DEFAULT_INTENSITY; + samsungq10_bl_set_intensity(bd); + + backlight_device_unregister(bd); + + return 0; +} + +static struct platform_driver samsungq10_driver = { + .driver = { + .name = KBUILD_MODNAME, + .owner = THIS_MODULE, + .pm = &samsungq10_pm_ops, + }, + .probe = samsungq10_probe, + .remove = __devexit_p(samsungq10_remove), +}; + +static struct platform_device *samsungq10_device; + +static int __init dmi_check_callback(const struct dmi_system_id *id) +{ + printk(KERN_INFO KBUILD_MODNAME ": found model '%s'\n", id->ident); + return 0; +} + +static struct dmi_system_id __initdata samsungq10_dmi_table[] = { + { + .ident = "Samsung Q10", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Samsung"), + DMI_MATCH(DMI_PRODUCT_NAME, "SQ10"), + }, + .callback = dmi_check_callback, + }, + { + .ident = "Samsung Q20", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG Electronics"), + DMI_MATCH(DMI_PRODUCT_NAME, "SENS Q20"), + }, + .callback = dmi_check_callback, + }, + { + .ident = "Samsung Q25", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG Electronics"), + DMI_MATCH(DMI_PRODUCT_NAME, "NQ25"), + }, + .callback = dmi_check_callback, + }, + { + .ident = "Dell Latitude X200", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "X200"), + }, + .callback = dmi_check_callback, + }, + { }, +}; +MODULE_DEVICE_TABLE(dmi, samsungq10_dmi_table); + +static int __init samsungq10_init(void) +{ + if (!force && !dmi_check_system(samsungq10_dmi_table)) + return -ENODEV; + + samsungq10_device = platform_create_bundle(&samsungq10_driver, + samsungq10_probe, + NULL, 0, NULL, 0); + + if (IS_ERR(samsungq10_device)) + return PTR_ERR(samsungq10_device); + + return 0; +} + +static void __exit samsungq10_exit(void) +{ + platform_device_unregister(samsungq10_device); + platform_driver_unregister(&samsungq10_driver); +} + +module_init(samsungq10_init); +module_exit(samsungq10_exit); + +MODULE_AUTHOR("Frederick van der Wyck "); +MODULE_DESCRIPTION("Samsung Q10 Driver"); +MODULE_LICENSE("GPL"); -- cgit v1.1 From 046f828b410837d304137cec9e4e0d72e50fc454 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 18 Jul 2011 16:08:21 +0800 Subject: platform-drivers-x86: samsung-q10: make dmi_check_callback return 1 We only care about if there is any successful match from the dmi table or no match at all, we can make dmi_check_system return immediately if we have a successful match instead of iterate thorough the whole table. Signed-off-by: Axel Lin Signed-off-by: Matthew Garrett --- drivers/platform/x86/samsung-q10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/samsung-q10.c b/drivers/platform/x86/samsung-q10.c index 54cf3d6..1e54ae7 100644 --- a/drivers/platform/x86/samsung-q10.c +++ b/drivers/platform/x86/samsung-q10.c @@ -127,7 +127,7 @@ static struct platform_device *samsungq10_device; static int __init dmi_check_callback(const struct dmi_system_id *id) { printk(KERN_INFO KBUILD_MODNAME ": found model '%s'\n", id->ident); - return 0; + return 1; } static struct dmi_system_id __initdata samsungq10_dmi_table[] = { -- cgit v1.1 From f1566f0dc07ec9b5409b348070f5a700032d7881 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Mon, 1 Aug 2011 15:46:10 -0500 Subject: dell-wmi: Add keys for Dell XPS L502X All of these keys are being reported on the keyboard controller but are also generating WMI events. Add them to the legacy keymap to silence the noise. BugLink: http://bugs.launchpad.net/bugs/815914 Signed-off-by: Seth Forshee Signed-off-by: Matthew Garrett --- drivers/platform/x86/dell-wmi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c index ce79082..fa9a217 100644 --- a/drivers/platform/x86/dell-wmi.c +++ b/drivers/platform/x86/dell-wmi.c @@ -54,6 +54,8 @@ MODULE_ALIAS("wmi:"DELL_EVENT_GUID); */ static const struct key_entry dell_wmi_legacy_keymap[] __initconst = { + { KE_IGNORE, 0x003a, { KEY_CAPSLOCK } }, + { KE_KEY, 0xe045, { KEY_PROG1 } }, { KE_KEY, 0xe009, { KEY_EJECTCD } }, @@ -85,6 +87,11 @@ static const struct key_entry dell_wmi_legacy_keymap[] __initconst = { { KE_IGNORE, 0xe013, { KEY_RESERVED } }, { KE_IGNORE, 0xe020, { KEY_MUTE } }, + + /* Shortcut and audio panel keys */ + { KE_IGNORE, 0xe025, { KEY_RESERVED } }, + { KE_IGNORE, 0xe026, { KEY_RESERVED } }, + { KE_IGNORE, 0xe02e, { KEY_VOLUMEDOWN } }, { KE_IGNORE, 0xe030, { KEY_VOLUMEUP } }, { KE_IGNORE, 0xe033, { KEY_KBDILLUMUP } }, @@ -92,6 +99,9 @@ static const struct key_entry dell_wmi_legacy_keymap[] __initconst = { { KE_IGNORE, 0xe03a, { KEY_CAPSLOCK } }, { KE_IGNORE, 0xe045, { KEY_NUMLOCK } }, { KE_IGNORE, 0xe046, { KEY_SCROLLLOCK } }, + { KE_IGNORE, 0xe0f7, { KEY_MUTE } }, + { KE_IGNORE, 0xe0f8, { KEY_VOLUMEDOWN } }, + { KE_IGNORE, 0xe0f9, { KEY_VOLUMEUP } }, { KE_END, 0 } }; -- cgit v1.1 From 78a7539b881eb557494a7c810625c0307b27296c Mon Sep 17 00:00:00 2001 From: Thomas Courbon Date: Wed, 20 Jul 2011 22:57:44 +0200 Subject: Platform: fix samsung-laptop DMI identification for N150/N210/220/N230 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some samsung latop of the N150/N2{10,20,30} serie are badly detected by the samsung-laptop platform driver, see bug # 36082. It appears that N230 identifies itself as N150/N210/N220/N230 whereas the other identify themselves as N150/N210/220. This patch attemtp fix #36082 allowing correct identification for all the said netbook model. Reported-by: Daniel Eklöf Signed-off-by: Thomas Courbon Signed-off-by: Matthew Garrett --- drivers/platform/x86/samsung-laptop.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c index e12c88e..3591630 100644 --- a/drivers/platform/x86/samsung-laptop.c +++ b/drivers/platform/x86/samsung-laptop.c @@ -611,6 +611,16 @@ static struct dmi_system_id __initdata samsung_dmi_table[] = { .callback = dmi_check_cb, }, { + .ident = "N150/N210/N220", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, + "SAMSUNG ELECTRONICS CO., LTD."), + DMI_MATCH(DMI_PRODUCT_NAME, "N150/N210/N220"), + DMI_MATCH(DMI_BOARD_NAME, "N150/N210/N220"), + }, + .callback = dmi_check_cb, + }, + { .ident = "N150/N210/N220/N230", .matches = { DMI_MATCH(DMI_SYS_VENDOR, -- cgit v1.1 From 70fda70a724c737b0df4195763f9124d181fe64d Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 22 Jul 2011 09:21:36 -0700 Subject: x86 driver: fix typo in TDP override enabling When enabling turbo, we need to set both the TDC and TDP bits. IIRC only the TDC one actually matters, but fix it up anyway since the current code is confusing. Signed-off-by: Jesse Barnes Signed-off-by: Matthew Garrett --- drivers/platform/x86/intel_ips.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c index 5ffe7c3..809a3ae 100644 --- a/drivers/platform/x86/intel_ips.c +++ b/drivers/platform/x86/intel_ips.c @@ -403,7 +403,7 @@ static void ips_cpu_raise(struct ips_driver *ips) thm_writew(THM_MPCPC, (new_tdp_limit * 10) / 8); - turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDC_OVR_EN; + turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN; wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override); turbo_override &= ~TURBO_TDP_MASK; @@ -438,7 +438,7 @@ static void ips_cpu_lower(struct ips_driver *ips) thm_writew(THM_MPCPC, (new_limit * 10) / 8); - turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDC_OVR_EN; + turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN; wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override); turbo_override &= ~TURBO_TDP_MASK; -- cgit v1.1 From 7451a55af08d40fd6b28802f4e26a13264114430 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 27 Jul 2011 15:27:34 +0800 Subject: platform-drivers-x86: ideapad-laptop: add missing ideapad_input_exit in ideapad_acpi_add error path In the case of ideapad_backlight_init() failure, we need to free the resources allocated by ideapad_input_init(). Aslo drop __devexit annotation for ideapad_input_exit() because we also call it in ideapad_acpi_add() error path. Signed-off-by: Axel Lin Signed-off-by: Matthew Garrett --- drivers/platform/x86/ideapad-laptop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 8811c68..0c59541 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -406,7 +406,7 @@ err_free_dev: return error; } -static void __devexit ideapad_input_exit(struct ideapad_private *priv) +static void ideapad_input_exit(struct ideapad_private *priv) { sparse_keymap_free(priv->inputdev); input_unregister_device(priv->inputdev); @@ -563,6 +563,7 @@ static int __devinit ideapad_acpi_add(struct acpi_device *adevice) backlight_failed: for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++) ideapad_unregister_rfkill(adevice, i); + ideapad_input_exit(priv); input_failed: ideapad_platform_exit(priv); platform_failed: -- cgit v1.1 From 2c3422d9c475c62adae82743d8507651342925ae Mon Sep 17 00:00:00 2001 From: "Anton V. Boyarshinov" Date: Thu, 28 Jul 2011 18:05:35 +0400 Subject: acerhdf.c: spaces in aliased changed to * It seems that aliases shouldn't contain spaces, as module-init-tools uses them as delimeters in module.alias file Signed-off-by: Anton V. Boyarshinov Signed-off-by: Matthew Garrett --- drivers/platform/x86/acerhdf.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index d339756..760c6d7 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c @@ -704,15 +704,15 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Peter Feuerer"); MODULE_DESCRIPTION("Aspire One temperature and fan driver"); MODULE_ALIAS("dmi:*:*Acer*:pnAOA*:"); -MODULE_ALIAS("dmi:*:*Acer*:pnAspire 1410*:"); -MODULE_ALIAS("dmi:*:*Acer*:pnAspire 1810*:"); +MODULE_ALIAS("dmi:*:*Acer*:pnAspire*1410*:"); +MODULE_ALIAS("dmi:*:*Acer*:pnAspire*1810*:"); MODULE_ALIAS("dmi:*:*Acer*:pnAO531*:"); MODULE_ALIAS("dmi:*:*Gateway*:pnAOA*:"); MODULE_ALIAS("dmi:*:*Gateway*:pnLT31*:"); -MODULE_ALIAS("dmi:*:*Packard Bell*:pnAOA*:"); -MODULE_ALIAS("dmi:*:*Packard Bell*:pnDOA*:"); -MODULE_ALIAS("dmi:*:*Packard Bell*:pnDOTMU*:"); -MODULE_ALIAS("dmi:*:*Packard Bell*:pnDOTMA*:"); +MODULE_ALIAS("dmi:*:*Packard*Bell*:pnAOA*:"); +MODULE_ALIAS("dmi:*:*Packard*Bell*:pnDOA*:"); +MODULE_ALIAS("dmi:*:*Packard*Bell*:pnDOTMU*:"); +MODULE_ALIAS("dmi:*:*Packard*Bell*:pnDOTMA*:"); module_init(acerhdf_init); module_exit(acerhdf_exit); -- cgit v1.1 From 15b956a0b5651bbb1217ec374fdd67291dabb2af Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Sat, 30 Jul 2011 17:00:45 +0800 Subject: acer-wmi: support Lenovo ideapad S205 wifi switch The AMW0 function in acer-wmi works on Lenovo ideapad S205 for control the wifi hardware state. We also found there have a 0x78 EC register exposes the state of wifi hardware switch on the machine. So, add this patch to support Lenovo ideapad S205 wifi hardware switch in acer-wmi driver. Reference: bko#37892 https://bugzilla.kernel.org/show_bug.cgi?id=37892 Cc: Carlos Corbacho Cc: Matthew Garrett Cc: Dmitry Torokhov Cc: Corentin Chary Cc: Thomas Renninger Tested-by: Florian Heyer Signed-off-by: Lee, Chun-Yi Signed-off-by: Matthew Garrett --- drivers/platform/x86/acer-wmi.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index fa715302..af2bb20 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -305,6 +305,10 @@ static struct quirk_entry quirk_fujitsu_amilo_li_1718 = { .wireless = 2, }; +static struct quirk_entry quirk_lenovo_ideapad_s205 = { + .wireless = 3, +}; + /* The Aspire One has a dummy ACPI-WMI interface - disable it */ static struct dmi_system_id __devinitdata acer_blacklist[] = { { @@ -451,6 +455,15 @@ static struct dmi_system_id acer_quirks[] = { }, .driver_data = &quirk_medion_md_98300, }, + { + .callback = dmi_matched, + .ident = "Lenovo Ideapad S205", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "10382LG"), + }, + .driver_data = &quirk_lenovo_ideapad_s205, + }, {} }; @@ -543,6 +556,12 @@ struct wmi_interface *iface) return AE_ERROR; *value = result & 0x1; return AE_OK; + case 3: + err = ec_read(0x78, &result); + if (err) + return AE_ERROR; + *value = result & 0x1; + return AE_OK; default: err = ec_read(0xA, &result); if (err) @@ -1267,8 +1286,13 @@ static void acer_rfkill_update(struct work_struct *ignored) acpi_status status; status = get_u32(&state, ACER_CAP_WIRELESS); - if (ACPI_SUCCESS(status)) - rfkill_set_sw_state(wireless_rfkill, !state); + if (ACPI_SUCCESS(status)) { + if (quirks->wireless == 3) { + rfkill_set_hw_state(wireless_rfkill, !state); + } else { + rfkill_set_sw_state(wireless_rfkill, !state); + } + } if (has_cap(ACER_CAP_BLUETOOTH)) { status = get_u32(&state, ACER_CAP_BLUETOOTH); -- cgit v1.1 From fe202fde50a986a8510c62a76dc8733c1a8fac86 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Aug 2011 21:45:11 +0200 Subject: PM / Domains: Fix pm_genpd_poweron() The local variable ret is defined twice in pm_genpd_poweron(), which causes this function to always return 0, even if the PM domain's .power_on() callback fails, in which case an error code should be returned. Remove the wrong second definition of ret and additionally remove an unnecessary definition of wait from pm_genpd_poweron(). Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index be8714a..e18566a 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -80,7 +80,6 @@ static void genpd_set_active(struct generic_pm_domain *genpd) int pm_genpd_poweron(struct generic_pm_domain *genpd) { struct generic_pm_domain *parent = genpd->parent; - DEFINE_WAIT(wait); int ret = 0; start: @@ -112,7 +111,7 @@ int pm_genpd_poweron(struct generic_pm_domain *genpd) } if (genpd->power_on) { - int ret = genpd->power_on(genpd); + ret = genpd->power_on(genpd); if (ret) goto out; } -- cgit v1.1 From 02b26774afebb2d62695ba3230319d70d8c6cc2d Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Fri, 5 Aug 2011 21:45:20 +0200 Subject: PM / Runtime: Allow _put_sync() from interrupts-disabled context Currently the use of pm_runtime_put_sync() is not safe from interrupts-disabled context because rpm_idle() will release the spinlock and enable interrupts for the idle callbacks. This enables interrupts during a time where interrupts were expected to be disabled, and can have strange side effects on drivers that expected interrupts to be disabled. This is not a bug since the documentation clearly states that only _put_sync_suspend() is safe in IRQ-safe mode. However, pm_runtime_put_sync() could be made safe when in IRQ-safe mode by releasing the spinlock but not re-enabling interrupts, which is what this patch aims to do. Problem was found when using some buggy drivers that set pm_runtime_irq_safe() and used _put_sync() in interrupts-disabled context. Reported-by: Colin Cross Tested-by: Nishanth Menon Signed-off-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- Documentation/power/runtime_pm.txt | 10 +++++----- drivers/base/power/runtime.c | 10 ++++++++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 14dd3c6..4ce5450 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -54,11 +54,10 @@ referred to as subsystem-level callbacks in what follows. By default, the callbacks are always invoked in process context with interrupts enabled. However, subsystems can use the pm_runtime_irq_safe() helper function to tell the PM core that a device's ->runtime_suspend() and ->runtime_resume() -callbacks should be invoked in atomic context with interrupts disabled -(->runtime_idle() is still invoked the default way). This implies that these -callback routines must not block or sleep, but it also means that the -synchronous helper functions listed at the end of Section 4 can be used within -an interrupt handler or in an atomic context. +callbacks should be invoked in atomic context with interrupts disabled. +This implies that these callback routines must not block or sleep, but it also +means that the synchronous helper functions listed at the end of Section 4 can +be used within an interrupt handler or in an atomic context. The subsystem-level suspend callback is _entirely_ _responsible_ for handling the suspend of the device as appropriate, which may, but need not include @@ -483,6 +482,7 @@ pm_runtime_suspend() pm_runtime_autosuspend() pm_runtime_resume() pm_runtime_get_sync() +pm_runtime_put_sync() pm_runtime_put_sync_suspend() 5. Runtime PM Initialization, Device Probing and Removal diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 8dc247c..acb3f83 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -226,11 +226,17 @@ static int rpm_idle(struct device *dev, int rpmflags) callback = NULL; if (callback) { - spin_unlock_irq(&dev->power.lock); + if (dev->power.irq_safe) + spin_unlock(&dev->power.lock); + else + spin_unlock_irq(&dev->power.lock); callback(dev); - spin_lock_irq(&dev->power.lock); + if (dev->power.irq_safe) + spin_lock(&dev->power.lock); + else + spin_lock_irq(&dev->power.lock); } dev->power.idle_notification = false; -- cgit v1.1 From 69d94ec6d83d84044252d9ba03f6a8970816e350 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Sat, 6 Aug 2011 01:34:08 +0300 Subject: Battery: sysfs_remove_battery(): possible circular locking Commit 9c921c22a7f33397a6774d7fa076db9b6a0fd669 Author: Lan Tianyu ACPI / Battery: Resolve the race condition in the sysfs_remove_battery() fixed BUG https://bugzilla.kernel.org/show_bug.cgi?id=35642 , but as a side effect made lockdep unhappy with sysfs_remove_battery(): [14818.477168] [14818.477170] ======================================================= [14818.477200] [ INFO: possible circular locking dependency detected ] [14818.477221] 3.1.0-dbg-07865-g1280ea8-dirty #668 [14818.477236] ------------------------------------------------------- [14818.477257] s2ram/1599 is trying to acquire lock: [14818.477276] (s_active#8){++++.+}, at: [] sysfs_addrm_finish+0x31/0x5a [14818.477323] [14818.477325] but task is already holding lock: [14818.477350] (&battery->lock){+.+.+.}, at: [] sysfs_remove_battery+0x10/0x4b [battery] [14818.477395] [14818.477397] which lock already depends on the new lock. [14818.477399] [..] [14818.479121] stack backtrace: [14818.479148] Pid: 1599, comm: s2ram Not tainted 3.1.0-dbg-07865-g1280ea8-dirty #668 [14818.479175] Call Trace: [14818.479198] [] print_circular_bug+0x293/0x2a4 [14818.479228] [] __lock_acquire+0xfe4/0x164b [14818.479260] [] ? sysfs_addrm_finish+0x31/0x5a [14818.479288] [] lock_acquire+0x138/0x1ac [14818.479316] [] ? sysfs_addrm_finish+0x31/0x5a [14818.479345] [] sysfs_deactivate+0x9b/0xec [14818.479373] [] ? sysfs_addrm_finish+0x31/0x5a [14818.479405] [] sysfs_addrm_finish+0x31/0x5a [14818.479433] [] sysfs_hash_and_remove+0x54/0x77 [14818.479461] [] sysfs_remove_file+0x12/0x14 [14818.479488] [] device_remove_file+0x12/0x14 [14818.479516] [] device_del+0x119/0x17c [14818.479542] [] device_unregister+0xe/0x1a [14818.479570] [] power_supply_unregister+0x23/0x27 [14818.479601] [] sysfs_remove_battery+0x34/0x4b [battery] [14818.479632] [] battery_notify+0x2c/0x3a [battery] [14818.479662] [] notifier_call_chain+0x74/0xa1 [14818.479692] [] __blocking_notifier_call_chain+0x6c/0x89 [14818.479722] [] blocking_notifier_call_chain+0xf/0x11 [14818.479751] [] pm_notifier_call_chain+0x15/0x27 [14818.479770] [] enter_state+0xa7/0xd5 [14818.479782] [] state_store+0xaa/0xc0 [14818.479795] [] ? pm_async_store+0x45/0x45 [14818.479807] [] kobj_attr_store+0x17/0x19 [14818.479820] [] sysfs_write_file+0x103/0x13f [14818.479834] [] vfs_write+0xad/0x13d [14818.479847] [] sys_write+0x45/0x6c [14818.479860] [] system_call_fastpath+0x16/0x1b This patch introduces separate lock to struct acpi_battery to grab in sysfs_remove_battery() instead of battery->lock. So fix by Lan Tianyu is still there, we just grab independent lock. Signed-off-by: Sergey Senozhatsky Tested-by: Lan Tianyu Signed-off-by: Len Brown --- drivers/acpi/battery.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index ffce2f0..8953b70 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -99,6 +99,7 @@ enum { struct acpi_battery { struct mutex lock; + struct mutex sysfs_lock; struct power_supply bat; struct acpi_device *device; struct notifier_block pm_nb; @@ -573,16 +574,16 @@ static int sysfs_add_battery(struct acpi_battery *battery) static void sysfs_remove_battery(struct acpi_battery *battery) { - mutex_lock(&battery->lock); + mutex_lock(&battery->sysfs_lock); if (!battery->bat.dev) { - mutex_unlock(&battery->lock); + mutex_unlock(&battery->sysfs_lock); return; } device_remove_file(battery->bat.dev, &alarm_attr); power_supply_unregister(&battery->bat); battery->bat.dev = NULL; - mutex_unlock(&battery->lock); + mutex_unlock(&battery->sysfs_lock); } /* @@ -982,6 +983,7 @@ static int acpi_battery_add(struct acpi_device *device) strcpy(acpi_device_class(device), ACPI_BATTERY_CLASS); device->driver_data = battery; mutex_init(&battery->lock); + mutex_init(&battery->sysfs_lock); if (ACPI_SUCCESS(acpi_get_handle(battery->device->handle, "_BIX", &handle))) set_bit(ACPI_BATTERY_XINFO_PRESENT, &battery->flags); @@ -1010,6 +1012,7 @@ static int acpi_battery_add(struct acpi_device *device) fail: sysfs_remove_battery(battery); mutex_destroy(&battery->lock); + mutex_destroy(&battery->sysfs_lock); kfree(battery); return result; } @@ -1027,6 +1030,7 @@ static int acpi_battery_remove(struct acpi_device *device, int type) #endif sysfs_remove_battery(battery); mutex_destroy(&battery->lock); + mutex_destroy(&battery->sysfs_lock); kfree(battery); return 0; } -- cgit v1.1 From 0785a8e87be0202744d8681363aecbd4ffbb5f5a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 6 Aug 2011 05:26:35 -0700 Subject: sparc: Fix build with DEBUG_PAGEALLOC enabled. arch/sparc/mm/init_64.c:1622:22: error: unused variable '__swapper_4m_tsb_phys_patch_end' [-Werror=unused-variable] arch/sparc/mm/init_64.c:1621:22: error: unused variable '__swapper_4m_tsb_phys_patch' [-Werror=unused-variable] Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index adfac23..581531d 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1618,18 +1618,20 @@ static void ktsb_phys_patch(void) { extern unsigned int __swapper_tsb_phys_patch; extern unsigned int __swapper_tsb_phys_patch_end; - extern unsigned int __swapper_4m_tsb_phys_patch; - extern unsigned int __swapper_4m_tsb_phys_patch_end; unsigned long ktsb_pa; ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE); patch_one_ktsb_phys(&__swapper_tsb_phys_patch, &__swapper_tsb_phys_patch_end, ktsb_pa); #ifndef CONFIG_DEBUG_PAGEALLOC + { + extern unsigned int __swapper_4m_tsb_phys_patch; + extern unsigned int __swapper_4m_tsb_phys_patch_end; ktsb_pa = (kern_base + ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); patch_one_ktsb_phys(&__swapper_4m_tsb_phys_patch, &__swapper_4m_tsb_phys_patch_end, ktsb_pa); + } #endif } -- cgit v1.1 From 1eb19a12bd2214cdcad5273d472b062a4ba97fa1 Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Fri, 5 Aug 2011 18:46:27 -0700 Subject: lib/sha1: use the git implementation of SHA-1 For ChromiumOS, we use SHA-1 to verify the integrity of the root filesystem. The speed of the kernel sha-1 implementation has a major impact on our boot performance. To improve boot performance, we investigated using the heavily optimized sha-1 implementation used in git. With the git sha-1 implementation, we see a 11.7% improvement in boot time. 10 reboots, remove slowest/fastest. Before: Mean: 6.58 seconds Stdev: 0.14 After (with git sha-1, this patch): Mean: 5.89 seconds Stdev: 0.07 The other cool thing about the git SHA-1 implementation is that it only needs 64 bytes of stack for the workspace while the original kernel implementation needed 320 bytes. Signed-off-by: Mandeep Singh Baines Cc: Ramsay Jones Cc: Nicolas Pitre Cc: Herbert Xu Cc: David S. Miller Cc: linux-crypto@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/cryptohash.h | 2 +- lib/sha1.c | 212 +++++++++++++++++++++++++++++++++------------ 2 files changed, 159 insertions(+), 55 deletions(-) diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h index ec78a4b..f945218 100644 --- a/include/linux/cryptohash.h +++ b/include/linux/cryptohash.h @@ -3,7 +3,7 @@ #define SHA_DIGEST_WORDS 5 #define SHA_MESSAGE_BYTES (512 /*bits*/ / 8) -#define SHA_WORKSPACE_WORDS 80 +#define SHA_WORKSPACE_WORDS 16 void sha_init(__u32 *buf); void sha_transform(__u32 *digest, const char *data, __u32 *W); diff --git a/lib/sha1.c b/lib/sha1.c index 4c45fd5..f33271d 100644 --- a/lib/sha1.c +++ b/lib/sha1.c @@ -1,31 +1,72 @@ /* - * SHA transform algorithm, originally taken from code written by - * Peter Gutmann, and placed in the public domain. + * SHA1 routine optimized to do word accesses rather than byte accesses, + * and to avoid unnecessary copies into the context array. + * + * This was based on the git SHA1 implementation. */ #include #include -#include +#include +#include -/* The SHA f()-functions. */ +/* + * If you have 32 registers or more, the compiler can (and should) + * try to change the array[] accesses into registers. However, on + * machines with less than ~25 registers, that won't really work, + * and at least gcc will make an unholy mess of it. + * + * So to avoid that mess which just slows things down, we force + * the stores to memory to actually happen (we might be better off + * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as + * suggested by Artur Skawina - that will also make gcc unable to + * try to do the silly "optimize away loads" part because it won't + * see what the value will be). + * + * Ben Herrenschmidt reports that on PPC, the C version comes close + * to the optimized asm with this (ie on PPC you don't want that + * 'volatile', since there are lots of registers). + * + * On ARM we get the best code generation by forcing a full memory barrier + * between each SHA_ROUND, otherwise gcc happily get wild with spilling and + * the stack frame size simply explode and performance goes down the drain. + */ -#define f1(x,y,z) (z ^ (x & (y ^ z))) /* x ? y : z */ -#define f2(x,y,z) (x ^ y ^ z) /* XOR */ -#define f3(x,y,z) ((x & y) + (z & (x ^ y))) /* majority */ +#ifdef CONFIG_X86 + #define setW(x, val) (*(volatile __u32 *)&W(x) = (val)) +#elif defined(CONFIG_ARM) + #define setW(x, val) do { W(x) = (val); __asm__("":::"memory"); } while (0) +#else + #define setW(x, val) (W(x) = (val)) +#endif -/* The SHA Mysterious Constants */ +/* This "rolls" over the 512-bit array */ +#define W(x) (array[(x)&15]) -#define K1 0x5A827999L /* Rounds 0-19: sqrt(2) * 2^30 */ -#define K2 0x6ED9EBA1L /* Rounds 20-39: sqrt(3) * 2^30 */ -#define K3 0x8F1BBCDCL /* Rounds 40-59: sqrt(5) * 2^30 */ -#define K4 0xCA62C1D6L /* Rounds 60-79: sqrt(10) * 2^30 */ +/* + * Where do we get the source from? The first 16 iterations get it from + * the input data, the next mix it from the 512-bit array. + */ +#define SHA_SRC(t) get_unaligned_be32((__u32 *)data + t) +#define SHA_MIX(t) rol32(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1) + +#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \ + __u32 TEMP = input(t); setW(t, TEMP); \ + E += TEMP + rol32(A,5) + (fn) + (constant); \ + B = ror32(B, 2); } while (0) + +#define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) +#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) +#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E ) +#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E ) +#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0xca62c1d6, A, B, C, D, E ) /** * sha_transform - single block SHA1 transform * * @digest: 160 bit digest to update * @data: 512 bits of data to hash - * @W: 80 words of workspace (see note) + * @array: 16 words of workspace (see note) * * This function generates a SHA1 digest for a single 512-bit block. * Be warned, it does not handle padding and message digest, do not @@ -36,47 +77,111 @@ * to clear the workspace. This is left to the caller to avoid * unnecessary clears between chained hashing operations. */ -void sha_transform(__u32 *digest, const char *in, __u32 *W) +void sha_transform(__u32 *digest, const char *data, __u32 *array) { - __u32 a, b, c, d, e, t, i; - - for (i = 0; i < 16; i++) - W[i] = be32_to_cpu(((const __be32 *)in)[i]); - - for (i = 0; i < 64; i++) - W[i+16] = rol32(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 1); - - a = digest[0]; - b = digest[1]; - c = digest[2]; - d = digest[3]; - e = digest[4]; - - for (i = 0; i < 20; i++) { - t = f1(b, c, d) + K1 + rol32(a, 5) + e + W[i]; - e = d; d = c; c = rol32(b, 30); b = a; a = t; - } - - for (; i < 40; i ++) { - t = f2(b, c, d) + K2 + rol32(a, 5) + e + W[i]; - e = d; d = c; c = rol32(b, 30); b = a; a = t; - } - - for (; i < 60; i ++) { - t = f3(b, c, d) + K3 + rol32(a, 5) + e + W[i]; - e = d; d = c; c = rol32(b, 30); b = a; a = t; - } - - for (; i < 80; i ++) { - t = f2(b, c, d) + K4 + rol32(a, 5) + e + W[i]; - e = d; d = c; c = rol32(b, 30); b = a; a = t; - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; + __u32 A, B, C, D, E; + + A = digest[0]; + B = digest[1]; + C = digest[2]; + D = digest[3]; + E = digest[4]; + + /* Round 1 - iterations 0-16 take their input from 'data' */ + T_0_15( 0, A, B, C, D, E); + T_0_15( 1, E, A, B, C, D); + T_0_15( 2, D, E, A, B, C); + T_0_15( 3, C, D, E, A, B); + T_0_15( 4, B, C, D, E, A); + T_0_15( 5, A, B, C, D, E); + T_0_15( 6, E, A, B, C, D); + T_0_15( 7, D, E, A, B, C); + T_0_15( 8, C, D, E, A, B); + T_0_15( 9, B, C, D, E, A); + T_0_15(10, A, B, C, D, E); + T_0_15(11, E, A, B, C, D); + T_0_15(12, D, E, A, B, C); + T_0_15(13, C, D, E, A, B); + T_0_15(14, B, C, D, E, A); + T_0_15(15, A, B, C, D, E); + + /* Round 1 - tail. Input from 512-bit mixing array */ + T_16_19(16, E, A, B, C, D); + T_16_19(17, D, E, A, B, C); + T_16_19(18, C, D, E, A, B); + T_16_19(19, B, C, D, E, A); + + /* Round 2 */ + T_20_39(20, A, B, C, D, E); + T_20_39(21, E, A, B, C, D); + T_20_39(22, D, E, A, B, C); + T_20_39(23, C, D, E, A, B); + T_20_39(24, B, C, D, E, A); + T_20_39(25, A, B, C, D, E); + T_20_39(26, E, A, B, C, D); + T_20_39(27, D, E, A, B, C); + T_20_39(28, C, D, E, A, B); + T_20_39(29, B, C, D, E, A); + T_20_39(30, A, B, C, D, E); + T_20_39(31, E, A, B, C, D); + T_20_39(32, D, E, A, B, C); + T_20_39(33, C, D, E, A, B); + T_20_39(34, B, C, D, E, A); + T_20_39(35, A, B, C, D, E); + T_20_39(36, E, A, B, C, D); + T_20_39(37, D, E, A, B, C); + T_20_39(38, C, D, E, A, B); + T_20_39(39, B, C, D, E, A); + + /* Round 3 */ + T_40_59(40, A, B, C, D, E); + T_40_59(41, E, A, B, C, D); + T_40_59(42, D, E, A, B, C); + T_40_59(43, C, D, E, A, B); + T_40_59(44, B, C, D, E, A); + T_40_59(45, A, B, C, D, E); + T_40_59(46, E, A, B, C, D); + T_40_59(47, D, E, A, B, C); + T_40_59(48, C, D, E, A, B); + T_40_59(49, B, C, D, E, A); + T_40_59(50, A, B, C, D, E); + T_40_59(51, E, A, B, C, D); + T_40_59(52, D, E, A, B, C); + T_40_59(53, C, D, E, A, B); + T_40_59(54, B, C, D, E, A); + T_40_59(55, A, B, C, D, E); + T_40_59(56, E, A, B, C, D); + T_40_59(57, D, E, A, B, C); + T_40_59(58, C, D, E, A, B); + T_40_59(59, B, C, D, E, A); + + /* Round 4 */ + T_60_79(60, A, B, C, D, E); + T_60_79(61, E, A, B, C, D); + T_60_79(62, D, E, A, B, C); + T_60_79(63, C, D, E, A, B); + T_60_79(64, B, C, D, E, A); + T_60_79(65, A, B, C, D, E); + T_60_79(66, E, A, B, C, D); + T_60_79(67, D, E, A, B, C); + T_60_79(68, C, D, E, A, B); + T_60_79(69, B, C, D, E, A); + T_60_79(70, A, B, C, D, E); + T_60_79(71, E, A, B, C, D); + T_60_79(72, D, E, A, B, C); + T_60_79(73, C, D, E, A, B); + T_60_79(74, B, C, D, E, A); + T_60_79(75, A, B, C, D, E); + T_60_79(76, E, A, B, C, D); + T_60_79(77, D, E, A, B, C); + T_60_79(78, C, D, E, A, B); + T_60_79(79, B, C, D, E, A); + + digest[0] += A; + digest[1] += B; + digest[2] += C; + digest[3] += D; + digest[4] += E; } EXPORT_SYMBOL(sha_transform); @@ -92,4 +197,3 @@ void sha_init(__u32 *buf) buf[3] = 0x10325476; buf[4] = 0xc3d2e1f0; } - -- cgit v1.1 From c21427043dec93d40e3a1af970831d1f5f15ce5d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 6 Aug 2011 11:43:08 -0700 Subject: oom_ajd: don't use WARN_ONCE, just use printk_once WARN_ONCE() is very annoying, in that it shows the stack trace that we don't care about at all, and also triggers various user-level "kernel oopsed" logic that we really don't care about. And it's not like the user can do anything about the applications (sshd) in question, it's a distro issue. Requested-by: Andi Kleen (and many others) Signed-off-by: Linus Torvalds --- fs/proc/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 08e3ecc..73a562b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1118,7 +1118,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, * Warn that /proc/pid/oom_adj is deprecated, see * Documentation/feature-removal-schedule.txt. */ - WARN_ONCE(1, "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", + printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", current->comm, task_pid_nr(current), task_pid_nr(task), task_pid_nr(task)); task->signal->oom_adj = oom_adjust; -- cgit v1.1 From 1117f72ea0217ba0cc19f05adbbd8b9a397f5ab7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 6 Aug 2011 11:51:33 -0700 Subject: vfs: show O_CLOEXE bit properly in /proc//fdinfo/ files The CLOEXE bit is magical, and for performance (and semantic) reasons we don't actually maintain it in the file descriptor itself, but in a separate bit array. Which means that when we show f_flags, the CLOEXE status is shown incorrectly: we show the status not as it is now, but as it was when the file was opened. Fix that by looking up the bit properly in the 'fdt->close_on_exec' bit array. Uli needs this in order to re-implement the pfiles program: "For normal file descriptors (not sockets) this was the last piece of information which wasn't available. This is all part of my 'give Solaris users no reason to not switch' effort. I intend to offer the code to the util-linux-ng maintainers." Requested-by: Ulrich Drepper Signed-off-by: Linus Torvalds --- fs/proc/base.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 73a562b..5eb0206 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1919,6 +1919,14 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info) spin_lock(&files->file_lock); file = fcheck_files(files, fd); if (file) { + unsigned int f_flags; + struct fdtable *fdt; + + fdt = files_fdtable(files); + f_flags = file->f_flags & ~O_CLOEXEC; + if (FD_ISSET(fd, fdt->close_on_exec)) + f_flags |= O_CLOEXEC; + if (path) { *path = file->f_path; path_get(&file->f_path); @@ -1928,7 +1936,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info) "pos:\t%lli\n" "flags:\t0%o\n", (long long) file->f_pos, - file->f_flags); + f_flags); spin_unlock(&files->file_lock); put_files_struct(files); return 0; -- cgit v1.1 From 6f76b6fcaa6025bc9b2c00e055c7ccd39730568d Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Wed, 3 Aug 2011 12:19:07 -0700 Subject: CodingStyle: Document the exception of not splitting user-visible strings, for grepping Patch reviewers now recommend not splitting long user-visible strings, such as printk messages, even if they exceed 80 columns. This avoids breaking grep. However, that recommendation did not actually appear anywhere in Documentation/CodingStyle. See, for example, the thread at http://news.gmane.org/find-root.php?message_id=%3c1312215262.11635.15.camel%40Joe%2dLaptop%3e Signed-off-by: Josh Triplett Signed-off-by: Linus Torvalds --- Documentation/CodingStyle | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle index fa6e25b..c940239 100644 --- a/Documentation/CodingStyle +++ b/Documentation/CodingStyle @@ -80,22 +80,13 @@ available tools. The limit on the length of lines is 80 columns and this is a strongly preferred limit. -Statements longer than 80 columns will be broken into sensible chunks. -Descendants are always substantially shorter than the parent and are placed -substantially to the right. The same applies to function headers with a long -argument list. Long strings are as well broken into shorter strings. The -only exception to this is where exceeding 80 columns significantly increases -readability and does not hide information. - -void fun(int a, int b, int c) -{ - if (condition) - printk(KERN_WARNING "Warning this is a long printk with " - "3 parameters a: %u b: %u " - "c: %u \n", a, b, c); - else - next_statement; -} +Statements longer than 80 columns will be broken into sensible chunks, unless +exceeding 80 columns significantly increases readability and does not hide +information. Descendants are always substantially shorter than the parent and +are placed substantially to the right. The same applies to function headers +with a long argument list. However, never break user-visible strings such as +printk messages, because that breaks the ability to grep for them. + Chapter 3: Placing Braces and Spaces -- cgit v1.1 From 4b00e4b3940eabb38adeec0823751820fe2d6fda Mon Sep 17 00:00:00 2001 From: John Stanley Date: Wed, 3 Aug 2011 20:41:00 -0400 Subject: savagedb: Fix typo causing regression in savage4 series video chip detection Two additional savage4 variants were added, but the S3_SAVAGE4_SERIES macro was incompletely modified, resulting in a false positive detection of a savage4 card regardless of which savage card is actually present. For non-savage4 series cards, such as a Savage/IX-MV card, this results in garbled video and/or a hard-hang at boot time. Fix this by changing an '||' to an '&&' in the S3_SAVAGE4_SERIES macro. Signed-off-by: John P. Stanley Reviewed-by: Tormod Volden [ The macros have incomplete parenthesis too, but whatever .. -Linus ] Signed-off-by: Linus Torvalds --- drivers/video/savage/savagefb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/savage/savagefb.h b/drivers/video/savage/savagefb.h index 32549d1..dcaab90 100644 --- a/drivers/video/savage/savagefb.h +++ b/drivers/video/savage/savagefb.h @@ -55,7 +55,7 @@ #define S3_SAVAGE3D_SERIES(chip) ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE_MX)) -#define S3_SAVAGE4_SERIES(chip) ((chip>=S3_SAVAGE4) || (chip<=S3_PROSAVAGEDDR)) +#define S3_SAVAGE4_SERIES(chip) ((chip>=S3_SAVAGE4) && (chip<=S3_PROSAVAGEDDR)) #define S3_SAVAGE_MOBILE_SERIES(chip) ((chip==S3_SAVAGE_MX) || (chip==S3_SUPERSAVAGE)) -- cgit v1.1 From bc0b96b54a21246e377122d54569eef71cec535f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 3 Aug 2011 19:45:10 -0700 Subject: crypto: Move md5_transform to lib/md5.c We are going to use this for TCP/IP sequence number and fragment ID generation. Signed-off-by: David S. Miller --- crypto/md5.c | 92 +------------------------------------------- include/linux/cryptohash.h | 5 +++ lib/Makefile | 2 +- lib/md5.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 92 deletions(-) create mode 100644 lib/md5.c diff --git a/crypto/md5.c b/crypto/md5.c index 30efc7d..7febeaa 100644 --- a/crypto/md5.c +++ b/crypto/md5.c @@ -21,99 +21,9 @@ #include #include #include +#include #include -#define F1(x, y, z) (z ^ (x & (y ^ z))) -#define F2(x, y, z) F1(z, x, y) -#define F3(x, y, z) (x ^ y ^ z) -#define F4(x, y, z) (y ^ (x | ~z)) - -#define MD5STEP(f, w, x, y, z, in, s) \ - (w += f(x, y, z) + in, w = (w<>(32-s)) + x) - -static void md5_transform(u32 *hash, u32 const *in) -{ - u32 a, b, c, d; - - a = hash[0]; - b = hash[1]; - c = hash[2]; - d = hash[3]; - - MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); - MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); - MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); - MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); - MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); - MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); - MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); - MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); - MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); - MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); - MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); - MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); - MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); - MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); - MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); - MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); - - MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); - MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); - MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); - MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); - MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); - MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); - MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); - MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); - MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); - MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); - MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); - MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); - MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); - MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); - MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); - MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); - - MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); - MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); - MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); - MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); - MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); - MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); - MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); - MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); - MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); - MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); - MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); - MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); - MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); - MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); - MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); - MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); - - MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); - MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); - MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); - MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); - MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); - MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); - MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); - MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); - MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); - MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); - MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); - MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); - MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); - MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); - MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); - MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); - - hash[0] += a; - hash[1] += b; - hash[2] += c; - hash[3] += d; -} - /* XXX: this stuff can be optimized */ static inline void le32_to_cpu_array(u32 *buf, unsigned int words) { diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h index ec78a4b..d2984fb 100644 --- a/include/linux/cryptohash.h +++ b/include/linux/cryptohash.h @@ -8,6 +8,11 @@ void sha_init(__u32 *buf); void sha_transform(__u32 *digest, const char *data, __u32 *W); +#define MD5_DIGEST_WORDS 4 +#define MD5_MESSAGE_BYTES 64 + +void md5_transform(__u32 *hash, __u32 const *in); + __u32 half_md4_transform(__u32 buf[4], __u32 const in[8]); #endif diff --git a/lib/Makefile b/lib/Makefile index 6457af4a..d5d175c 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -10,7 +10,7 @@ endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ idr.o int_sqrt.o extable.o prio_tree.o \ - sha1.o irq_regs.o reciprocal_div.o argv_split.o \ + sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o prio_heap.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o find_next_bit.o diff --git a/lib/md5.c b/lib/md5.c new file mode 100644 index 0000000..c777180 --- /dev/null +++ b/lib/md5.c @@ -0,0 +1,95 @@ +#include +#include +#include + +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) + +#define MD5STEP(f, w, x, y, z, in, s) \ + (w += f(x, y, z) + in, w = (w<>(32-s)) + x) + +void md5_transform(__u32 *hash, __u32 const *in) +{ + u32 a, b, c, d; + + a = hash[0]; + b = hash[1]; + c = hash[2]; + d = hash[3]; + + MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); + + hash[0] += a; + hash[1] += b; + hash[2] += c; + hash[3] += d; +} +EXPORT_SYMBOL(md5_transform); -- cgit v1.1 From 6e5714eaf77d79ae1c8b47e3e040ff5411b717ec Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 3 Aug 2011 20:50:44 -0700 Subject: net: Compute protocol sequence numbers and fragment IDs using MD5. Computers have become a lot faster since we compromised on the partial MD4 hash which we use currently for performance reasons. MD5 is a much safer choice, and is inline with both RFC1948 and other ISS generators (OpenBSD, Solaris, etc.) Furthermore, only having 24-bits of the sequence number be truly unpredictable is a very serious limitation. So the periodic regeneration and 8-bit counter have been removed. We compute and use a full 32-bit sequence number. For ipv6, DCCP was found to use a 32-bit truncated initial sequence number (it needs 43-bits) and that is fixed here as well. Reported-by: Dan Kaminsky Tested-by: Willy Tarreau Signed-off-by: David S. Miller --- drivers/char/random.c | 349 +------------------------------ include/linux/random.h | 12 -- include/net/secure_seq.h | 20 ++ net/core/Makefile | 2 +- net/core/secure_seq.c | 184 ++++++++++++++++ net/dccp/ipv4.c | 1 + net/dccp/ipv6.c | 9 +- net/ipv4/inet_hashtables.c | 1 + net/ipv4/inetpeer.c | 1 + net/ipv4/netfilter/nf_nat_proto_common.c | 1 + net/ipv4/route.c | 1 + net/ipv4/tcp_ipv4.c | 1 + net/ipv6/inet6_hashtables.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 14 files changed, 223 insertions(+), 361 deletions(-) create mode 100644 include/net/secure_seq.h create mode 100644 net/core/secure_seq.c diff --git a/drivers/char/random.c b/drivers/char/random.c index 7292819..c35a785 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1300,345 +1300,14 @@ ctl_table random_table[] = { }; #endif /* CONFIG_SYSCTL */ -/******************************************************************** - * - * Random functions for networking - * - ********************************************************************/ - -/* - * TCP initial sequence number picking. This uses the random number - * generator to pick an initial secret value. This value is hashed - * along with the TCP endpoint information to provide a unique - * starting point for each pair of TCP endpoints. This defeats - * attacks which rely on guessing the initial TCP sequence number. - * This algorithm was suggested by Steve Bellovin. - * - * Using a very strong hash was taking an appreciable amount of the total - * TCP connection establishment time, so this is a weaker hash, - * compensated for by changing the secret periodically. - */ - -/* F, G and H are basic MD4 functions: selection, majority, parity */ -#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) -#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z))) -#define H(x, y, z) ((x) ^ (y) ^ (z)) - -/* - * The generic round function. The application is so specific that - * we don't bother protecting all the arguments with parens, as is generally - * good macro practice, in favor of extra legibility. - * Rotation is separate from addition to prevent recomputation - */ -#define ROUND(f, a, b, c, d, x, s) \ - (a += f(b, c, d) + x, a = (a << s) | (a >> (32 - s))) -#define K1 0 -#define K2 013240474631UL -#define K3 015666365641UL - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - -static __u32 twothirdsMD4Transform(__u32 const buf[4], __u32 const in[12]) -{ - __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; - - /* Round 1 */ - ROUND(F, a, b, c, d, in[ 0] + K1, 3); - ROUND(F, d, a, b, c, in[ 1] + K1, 7); - ROUND(F, c, d, a, b, in[ 2] + K1, 11); - ROUND(F, b, c, d, a, in[ 3] + K1, 19); - ROUND(F, a, b, c, d, in[ 4] + K1, 3); - ROUND(F, d, a, b, c, in[ 5] + K1, 7); - ROUND(F, c, d, a, b, in[ 6] + K1, 11); - ROUND(F, b, c, d, a, in[ 7] + K1, 19); - ROUND(F, a, b, c, d, in[ 8] + K1, 3); - ROUND(F, d, a, b, c, in[ 9] + K1, 7); - ROUND(F, c, d, a, b, in[10] + K1, 11); - ROUND(F, b, c, d, a, in[11] + K1, 19); - - /* Round 2 */ - ROUND(G, a, b, c, d, in[ 1] + K2, 3); - ROUND(G, d, a, b, c, in[ 3] + K2, 5); - ROUND(G, c, d, a, b, in[ 5] + K2, 9); - ROUND(G, b, c, d, a, in[ 7] + K2, 13); - ROUND(G, a, b, c, d, in[ 9] + K2, 3); - ROUND(G, d, a, b, c, in[11] + K2, 5); - ROUND(G, c, d, a, b, in[ 0] + K2, 9); - ROUND(G, b, c, d, a, in[ 2] + K2, 13); - ROUND(G, a, b, c, d, in[ 4] + K2, 3); - ROUND(G, d, a, b, c, in[ 6] + K2, 5); - ROUND(G, c, d, a, b, in[ 8] + K2, 9); - ROUND(G, b, c, d, a, in[10] + K2, 13); - - /* Round 3 */ - ROUND(H, a, b, c, d, in[ 3] + K3, 3); - ROUND(H, d, a, b, c, in[ 7] + K3, 9); - ROUND(H, c, d, a, b, in[11] + K3, 11); - ROUND(H, b, c, d, a, in[ 2] + K3, 15); - ROUND(H, a, b, c, d, in[ 6] + K3, 3); - ROUND(H, d, a, b, c, in[10] + K3, 9); - ROUND(H, c, d, a, b, in[ 1] + K3, 11); - ROUND(H, b, c, d, a, in[ 5] + K3, 15); - ROUND(H, a, b, c, d, in[ 9] + K3, 3); - ROUND(H, d, a, b, c, in[ 0] + K3, 9); - ROUND(H, c, d, a, b, in[ 4] + K3, 11); - ROUND(H, b, c, d, a, in[ 8] + K3, 15); - - return buf[1] + b; /* "most hashed" word */ - /* Alternative: return sum of all words? */ -} -#endif - -#undef ROUND -#undef F -#undef G -#undef H -#undef K1 -#undef K2 -#undef K3 - -/* This should not be decreased so low that ISNs wrap too fast. */ -#define REKEY_INTERVAL (300 * HZ) -/* - * Bit layout of the tcp sequence numbers (before adding current time): - * bit 24-31: increased after every key exchange - * bit 0-23: hash(source,dest) - * - * The implementation is similar to the algorithm described - * in the Appendix of RFC 1185, except that - * - it uses a 1 MHz clock instead of a 250 kHz clock - * - it performs a rekey every 5 minutes, which is equivalent - * to a (source,dest) tulple dependent forward jump of the - * clock by 0..2^(HASH_BITS+1) - * - * Thus the average ISN wraparound time is 68 minutes instead of - * 4.55 hours. - * - * SMP cleanup and lock avoidance with poor man's RCU. - * Manfred Spraul - * - */ -#define COUNT_BITS 8 -#define COUNT_MASK ((1 << COUNT_BITS) - 1) -#define HASH_BITS 24 -#define HASH_MASK ((1 << HASH_BITS) - 1) +static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; -static struct keydata { - __u32 count; /* already shifted to the final position */ - __u32 secret[12]; -} ____cacheline_aligned ip_keydata[2]; - -static unsigned int ip_cnt; - -static void rekey_seq_generator(struct work_struct *work); - -static DECLARE_DELAYED_WORK(rekey_work, rekey_seq_generator); - -/* - * Lock avoidance: - * The ISN generation runs lockless - it's just a hash over random data. - * State changes happen every 5 minutes when the random key is replaced. - * Synchronization is performed by having two copies of the hash function - * state and rekey_seq_generator always updates the inactive copy. - * The copy is then activated by updating ip_cnt. - * The implementation breaks down if someone blocks the thread - * that processes SYN requests for more than 5 minutes. Should never - * happen, and even if that happens only a not perfectly compliant - * ISN is generated, nothing fatal. - */ -static void rekey_seq_generator(struct work_struct *work) +static int __init random_int_secret_init(void) { - struct keydata *keyptr = &ip_keydata[1 ^ (ip_cnt & 1)]; - - get_random_bytes(keyptr->secret, sizeof(keyptr->secret)); - keyptr->count = (ip_cnt & COUNT_MASK) << HASH_BITS; - smp_wmb(); - ip_cnt++; - schedule_delayed_work(&rekey_work, - round_jiffies_relative(REKEY_INTERVAL)); -} - -static inline struct keydata *get_keyptr(void) -{ - struct keydata *keyptr = &ip_keydata[ip_cnt & 1]; - - smp_rmb(); - - return keyptr; -} - -static __init int seqgen_init(void) -{ - rekey_seq_generator(NULL); + get_random_bytes(random_int_secret, sizeof(random_int_secret)); return 0; } -late_initcall(seqgen_init); - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport) -{ - __u32 seq; - __u32 hash[12]; - struct keydata *keyptr = get_keyptr(); - - /* The procedure is the same as for IPv4, but addresses are longer. - * Thus we must use twothirdsMD4Transform. - */ - - memcpy(hash, saddr, 16); - hash[4] = ((__force u16)sport << 16) + (__force u16)dport; - memcpy(&hash[5], keyptr->secret, sizeof(__u32) * 7); - - seq = twothirdsMD4Transform((const __u32 *)daddr, hash) & HASH_MASK; - seq += keyptr->count; - - seq += ktime_to_ns(ktime_get_real()); - - return seq; -} -EXPORT_SYMBOL(secure_tcpv6_sequence_number); -#endif - -/* The code below is shamelessly stolen from secure_tcp_sequence_number(). - * All blames to Andrey V. Savochkin . - */ -__u32 secure_ip_id(__be32 daddr) -{ - struct keydata *keyptr; - __u32 hash[4]; - - keyptr = get_keyptr(); - - /* - * Pick a unique starting offset for each IP destination. - * The dest ip address is placed in the starting vector, - * which is then hashed with random data. - */ - hash[0] = (__force __u32)daddr; - hash[1] = keyptr->secret[9]; - hash[2] = keyptr->secret[10]; - hash[3] = keyptr->secret[11]; - - return half_md4_transform(hash, keyptr->secret); -} - -__u32 secure_ipv6_id(const __be32 daddr[4]) -{ - const struct keydata *keyptr; - __u32 hash[4]; - - keyptr = get_keyptr(); - - hash[0] = (__force __u32)daddr[0]; - hash[1] = (__force __u32)daddr[1]; - hash[2] = (__force __u32)daddr[2]; - hash[3] = (__force __u32)daddr[3]; - - return half_md4_transform(hash, keyptr->secret); -} - -#ifdef CONFIG_INET - -__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport) -{ - __u32 seq; - __u32 hash[4]; - struct keydata *keyptr = get_keyptr(); - - /* - * Pick a unique starting offset for each TCP connection endpoints - * (saddr, daddr, sport, dport). - * Note that the words are placed into the starting vector, which is - * then mixed with a partial MD4 over random data. - */ - hash[0] = (__force u32)saddr; - hash[1] = (__force u32)daddr; - hash[2] = ((__force u16)sport << 16) + (__force u16)dport; - hash[3] = keyptr->secret[11]; - - seq = half_md4_transform(hash, keyptr->secret) & HASH_MASK; - seq += keyptr->count; - /* - * As close as possible to RFC 793, which - * suggests using a 250 kHz clock. - * Further reading shows this assumes 2 Mb/s networks. - * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate. - * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but - * we also need to limit the resolution so that the u32 seq - * overlaps less than one time per MSL (2 minutes). - * Choosing a clock of 64 ns period is OK. (period of 274 s) - */ - seq += ktime_to_ns(ktime_get_real()) >> 6; - - return seq; -} - -/* Generate secure starting point for ephemeral IPV4 transport port search */ -u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) -{ - struct keydata *keyptr = get_keyptr(); - u32 hash[4]; - - /* - * Pick a unique starting offset for each ephemeral port search - * (saddr, daddr, dport) and 48bits of random data. - */ - hash[0] = (__force u32)saddr; - hash[1] = (__force u32)daddr; - hash[2] = (__force u32)dport ^ keyptr->secret[10]; - hash[3] = keyptr->secret[11]; - - return half_md4_transform(hash, keyptr->secret); -} -EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, - __be16 dport) -{ - struct keydata *keyptr = get_keyptr(); - u32 hash[12]; - - memcpy(hash, saddr, 16); - hash[4] = (__force u32)dport; - memcpy(&hash[5], keyptr->secret, sizeof(__u32) * 7); - - return twothirdsMD4Transform((const __u32 *)daddr, hash); -} -#endif - -#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) -/* Similar to secure_tcp_sequence_number but generate a 48 bit value - * bit's 32-47 increase every key exchange - * 0-31 hash(source, dest) - */ -u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport) -{ - u64 seq; - __u32 hash[4]; - struct keydata *keyptr = get_keyptr(); - - hash[0] = (__force u32)saddr; - hash[1] = (__force u32)daddr; - hash[2] = ((__force u16)sport << 16) + (__force u16)dport; - hash[3] = keyptr->secret[11]; - - seq = half_md4_transform(hash, keyptr->secret); - seq |= ((u64)keyptr->count) << (32 - HASH_BITS); - - seq += ktime_to_ns(ktime_get_real()); - seq &= (1ull << 48) - 1; - - return seq; -} -EXPORT_SYMBOL(secure_dccp_sequence_number); -#endif - -#endif /* CONFIG_INET */ - +late_initcall(random_int_secret_init); /* * Get a random word for internal kernel use only. Similar to urandom but @@ -1646,17 +1315,15 @@ EXPORT_SYMBOL(secure_dccp_sequence_number); * value is not cryptographically secure but for several uses the cost of * depleting entropy is too high */ -DEFINE_PER_CPU(__u32 [4], get_random_int_hash); +DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash); unsigned int get_random_int(void) { - struct keydata *keyptr; __u32 *hash = get_cpu_var(get_random_int_hash); - int ret; + unsigned int ret; - keyptr = get_keyptr(); hash[0] += current->pid + jiffies + get_cycles(); - - ret = half_md4_transform(hash, keyptr->secret); + md5_transform(hash, random_int_secret); + ret = hash[0]; put_cpu_var(get_random_int_hash); return ret; diff --git a/include/linux/random.h b/include/linux/random.h index ce29a04..d13059f3 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -57,18 +57,6 @@ extern void add_interrupt_randomness(int irq); extern void get_random_bytes(void *buf, int nbytes); void generate_random_uuid(unsigned char uuid_out[16]); -extern __u32 secure_ip_id(__be32 daddr); -extern __u32 secure_ipv6_id(const __be32 daddr[4]); -extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); -extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, - __be16 dport); -extern __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); -extern __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport); -extern u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); - #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; #endif diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h new file mode 100644 index 0000000..d97f689 --- /dev/null +++ b/include/net/secure_seq.h @@ -0,0 +1,20 @@ +#ifndef _NET_SECURE_SEQ +#define _NET_SECURE_SEQ + +#include + +extern __u32 secure_ip_id(__be32 daddr); +extern __u32 secure_ipv6_id(const __be32 daddr[4]); +extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); +extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport); +extern __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); +extern __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport); +extern u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); +extern u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport); + +#endif /* _NET_SECURE_SEQ */ diff --git a/net/core/Makefile b/net/core/Makefile index 8a04dd2..0d357b1 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -3,7 +3,7 @@ # obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o net_namespace.o + gen_stats.o gen_estimator.o net_namespace.o secure_seq.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c new file mode 100644 index 0000000..45329d7 --- /dev/null +++ b/net/core/secure_seq.c @@ -0,0 +1,184 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; + +static int __init net_secret_init(void) +{ + get_random_bytes(net_secret, sizeof(net_secret)); + return 0; +} +late_initcall(net_secret_init); + +static u32 seq_scale(u32 seq) +{ + /* + * As close as possible to RFC 793, which + * suggests using a 250 kHz clock. + * Further reading shows this assumes 2 Mb/s networks. + * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate. + * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but + * we also need to limit the resolution so that the u32 seq + * overlaps less than one time per MSL (2 minutes). + * Choosing a clock of 64 ns period is OK. (period of 274 s) + */ + return seq + (ktime_to_ns(ktime_get_real()) >> 6); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + daddr[i]; + secret[4] = net_secret[4] + + (((__force u16)sport << 16) + (__force u16)dport); + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + return seq_scale(hash[0]); +} +EXPORT_SYMBOL(secure_tcpv6_sequence_number); + +u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + (__force u32) daddr[i]; + secret[4] = net_secret[4] + (__force u32)dport; + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + return hash[0]; +} +#endif + +#ifdef CONFIG_INET +__u32 secure_ip_id(__be32 daddr) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force __u32) daddr; + hash[1] = net_secret[13]; + hash[2] = net_secret[14]; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return hash[0]; +} + +__u32 secure_ipv6_id(const __be32 daddr[4]) +{ + __u32 hash[4]; + + memcpy(hash, daddr, 16); + md5_transform(hash, net_secret); + + return hash[0]; +} + +__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = ((__force u16)sport << 16) + (__force u16)dport; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return seq_scale(hash[0]); +} + +u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = (__force u32)dport ^ net_secret[14]; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return hash[0]; +} +EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); +#endif + +#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) +u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + u64 seq; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = ((__force u16)sport << 16) + (__force u16)dport; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + seq = hash[0] | (((u64)hash[1]) << 32); + seq += ktime_to_ns(ktime_get_real()); + seq &= (1ull << 48) - 1; + + return seq; +} +EXPORT_SYMBOL(secure_dccp_sequence_number); + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u64 seq; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + daddr[i]; + secret[4] = net_secret[4] + + (((__force u16)sport << 16) + (__force u16)dport); + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + seq = hash[0] | (((u64)hash[1]) << 32); + seq += ktime_to_ns(ktime_get_real()); + seq &= (1ull << 48) - 1; + + return seq; +} +EXPORT_SYMBOL(secure_dccpv6_sequence_number); +#endif +#endif diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 8c36adf..332639b 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "ackvec.h" #include "ccid.h" diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 8dc4348..b74f761 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "dccp.h" #include "ipv6.h" @@ -69,13 +70,7 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb) dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr); } -static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport ) -{ - return secure_tcpv6_sequence_number(saddr, daddr, sport, dport); -} - -static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb) +static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb) { return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 3c0369a..984ec65 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -21,6 +21,7 @@ #include #include +#include #include /* diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e382138..86f13c67 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -19,6 +19,7 @@ #include #include #include +#include /* * Theory of operations. diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 3e61faf..f52d41e 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6afc4eb..e3dec1c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -109,6 +109,7 @@ #include #endif #include +#include #define RT_FL_TOS(oldflp4) \ ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 955b8e6..1c12b8e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -72,6 +72,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index b531972..73f1a00 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -20,6 +20,7 @@ #include #include #include +#include #include int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 78aa534..d1fb63f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -61,6 +61,7 @@ #include #include #include +#include #include -- cgit v1.1 From e1042ba0991aab80ced34f7dade6ec25f22b4304 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Tue, 16 Nov 2010 20:09:58 +0200 Subject: exofs: Add offset/length to exofs_get_io_state In future raid code we will need to know the IO offset/length and if it's a read or write to determine some of the array sizes we'll need. So add a new exofs_get_rw_state() API for use when writeing/reading. All other simple cases are left using the old way. The major change to this is that now we need to call exofs_get_io_state later at inode.c::read_exec and inode.c::write_exec when we actually know these things. So this patch is kept separate so I can test things apart from other changes. Signed-off-by: Boaz Harrosh --- fs/exofs/exofs.h | 6 +++++- fs/exofs/inode.c | 34 +++++++++++++++++++++------------- fs/exofs/ios.c | 14 ++++++++++++-- 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 9f62349..fd913dd 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -109,7 +109,7 @@ static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) } struct exofs_io_state; -typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private); +typedef void (*exofs_io_done_fn)(struct exofs_io_state *ios, void *private); struct exofs_io_state { struct kref kref; @@ -137,6 +137,8 @@ struct exofs_io_state { unsigned out_attr_len; struct osd_attr *out_attr; + bool reading; + /* Variable array of size numdevs */ unsigned numdevs; struct exofs_per_dev_state { @@ -218,6 +220,8 @@ void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, u64 offset, void *p, unsigned length); +int exofs_get_rw_state(struct exofs_layout *layout, bool is_reading, + u64 offset, u64 length, struct exofs_io_state **ios); int exofs_get_io_state(struct exofs_layout *layout, struct exofs_io_state **ios); void exofs_put_io_state(struct exofs_io_state *ios); diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 8472c09..ba9f0be 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -110,13 +110,6 @@ static int pcol_try_alloc(struct page_collect *pcol) { unsigned pages; - if (!pcol->ios) { /* First time allocate io_state */ - int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); - - if (ret) - return ret; - } - /* TODO: easily support bio chaining */ pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages); @@ -269,17 +262,25 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) static int read_exec(struct page_collect *pcol) { struct exofs_i_info *oi = exofs_i(pcol->inode); - struct exofs_io_state *ios = pcol->ios; + struct exofs_io_state *ios; struct page_collect *pcol_copy = NULL; int ret; if (!pcol->pages) return 0; + if (!pcol->ios) { + int ret = exofs_get_rw_state(&pcol->sbi->layout, true, + pcol->pg_first << PAGE_CACHE_SHIFT, + pcol->length, &pcol->ios); + + if (ret) + return ret; + } + + ios = pcol->ios; ios->pages = pcol->pages; ios->nr_pages = pcol->nr_pages; - ios->length = pcol->length; - ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; if (pcol->read_4_write) { exofs_oi_read(oi, pcol->ios); @@ -507,13 +508,21 @@ static void writepages_done(struct exofs_io_state *ios, void *p) static int write_exec(struct page_collect *pcol) { struct exofs_i_info *oi = exofs_i(pcol->inode); - struct exofs_io_state *ios = pcol->ios; + struct exofs_io_state *ios; struct page_collect *pcol_copy = NULL; int ret; if (!pcol->pages) return 0; + BUG_ON(pcol->ios); + ret = exofs_get_rw_state(&pcol->sbi->layout, false, + pcol->pg_first << PAGE_CACHE_SHIFT, + pcol->length, &pcol->ios); + + if (unlikely(ret)) + goto err; + pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); if (!pcol_copy) { EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n"); @@ -523,10 +532,9 @@ static int write_exec(struct page_collect *pcol) *pcol_copy = *pcol; + ios = pcol->ios; ios->pages = pcol_copy->pages; ios->nr_pages = pcol_copy->nr_pages; - ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; - ios->length = pcol_copy->length; ios->done = writepages_done; ios->private = pcol_copy; diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index fbb47ba..096405e5 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -69,8 +69,8 @@ out: return ret; } -int exofs_get_io_state(struct exofs_layout *layout, - struct exofs_io_state **pios) +int exofs_get_rw_state(struct exofs_layout *layout, bool is_reading, + u64 offset, u64 length, struct exofs_io_state **pios) { struct exofs_io_state *ios; @@ -87,10 +87,20 @@ int exofs_get_io_state(struct exofs_layout *layout, ios->layout = layout; ios->obj.partition = layout->s_pid; + ios->offset = offset; + ios->length = length; + ios->reading = is_reading; + *pios = ios; return 0; } +int exofs_get_io_state(struct exofs_layout *layout, + struct exofs_io_state **ios) +{ + return exofs_get_rw_state(layout, true, 0, 0, ios); +} + void exofs_put_io_state(struct exofs_io_state *ios) { if (ios) { -- cgit v1.1 From 85e44df4748670a1a7d8441b2d75843cdebc478a Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 16 May 2011 15:26:47 +0300 Subject: exofs: Move exofs specific osd operations out of ios.c ios.c will be moving to an external library, for use by the objects-layout-driver. Remove from it some exofs specific functions. Also g_attr_logical_length is used both by inode.c and ios.c move definition to the later, to keep it independent Signed-off-by: Boaz Harrosh --- fs/exofs/exofs.h | 17 +++++++++----- fs/exofs/inode.c | 3 --- fs/exofs/ios.c | 64 +++------------------------------------------------- fs/exofs/super.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 79 insertions(+), 73 deletions(-) diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index fd913dd..5a77fc7 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -205,6 +205,14 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode) */ unsigned exofs_layout_od_id(struct exofs_layout *layout, osd_id obj_no, unsigned layout_index); + +static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios, + unsigned layout_index) +{ + return ios->layout->s_ods[ + exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)]; +} + /* * Maximum count of links to a file */ @@ -215,11 +223,6 @@ unsigned exofs_layout_od_id(struct exofs_layout *layout, *************************/ /* ios.c */ -void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], - const struct osd_obj_id *obj); -int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, - u64 offset, void *p, unsigned length); - int exofs_get_rw_state(struct exofs_layout *layout, bool is_reading, u64 offset, u64 length, struct exofs_io_state **ios); int exofs_get_io_state(struct exofs_layout *layout, @@ -234,6 +237,7 @@ int exofs_sbi_write(struct exofs_io_state *ios); int exofs_sbi_read(struct exofs_io_state *ios); int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr); +extern const struct osd_attr g_attr_logical_length; int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len); static inline int exofs_oi_write(struct exofs_i_info *oi, @@ -278,6 +282,8 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *, struct inode *); /* super.c */ +void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], + const struct osd_obj_id *obj); int exofs_sbi_write_stats(struct exofs_sb_info *sbi); /********************* @@ -292,7 +298,6 @@ extern const struct file_operations exofs_file_operations; /* inode.c */ extern const struct address_space_operations exofs_aops; -extern const struct osd_attr g_attr_logical_length; /* namei.c */ extern const struct inode_operations exofs_dir_inode_operations; diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index ba9f0be..abcdac9 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -852,9 +852,6 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode) return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); } -const struct osd_attr g_attr_logical_length = ATTR_DEF( - OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); - static int _do_truncate(struct inode *inode, loff_t newsize) { struct exofs_i_info *oi = exofs_i(inode); diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 096405e5..21d6130 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -31,44 +31,6 @@ #define EXOFS_DBGMSG2(M...) do {} while (0) /* #define EXOFS_DBGMSG2 EXOFS_DBGMSG */ -void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) -{ - osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); -} - -int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, - u64 offset, void *p, unsigned length) -{ - struct osd_request *or = osd_start_request(od, GFP_KERNEL); -/* struct osd_sense_info osi = {.key = 0};*/ - int ret; - - if (unlikely(!or)) { - EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); - return -ENOMEM; - } - ret = osd_req_read_kern(or, obj, offset, p, length); - if (unlikely(ret)) { - EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__); - goto out; - } - - ret = osd_finalize_request(or, 0, cred, NULL); - if (unlikely(ret)) { - EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret); - goto out; - } - - ret = osd_execute_request(or); - if (unlikely(ret)) - EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); - /* osd_req_decode_sense(or, ret); */ - -out: - osd_end_request(or); - return ret; -} - int exofs_get_rw_state(struct exofs_layout *layout, bool is_reading, u64 offset, u64 length, struct exofs_io_state **pios) { @@ -119,29 +81,6 @@ void exofs_put_io_state(struct exofs_io_state *ios) } } -unsigned exofs_layout_od_id(struct exofs_layout *layout, - osd_id obj_no, unsigned layout_index) -{ -/* switch (layout->lay_func) { - case LAYOUT_MOVING_WINDOW: - {*/ - unsigned dev_mod = obj_no; - - return (layout_index + dev_mod * layout->mirrors_p1) % - layout->s_numdevs; -/* } - case LAYOUT_FUNC_IMPLICT: - return layout->devs[layout_index]; - }*/ -} - -static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios, - unsigned layout_index) -{ - return ios->layout->s_ods[ - exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)]; -} - static void _sync_done(struct exofs_io_state *ios, void *p) { struct completion *waiting = p; @@ -844,3 +783,6 @@ out: exofs_put_io_state(ios); return ret; } + +const struct osd_attr g_attr_logical_length = ATTR_DEF( + OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 65fe5de..8783f3d 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -40,6 +40,8 @@ #include "exofs.h" +#define EXOFS_DBGMSG2(M...) do {} while (0) + /****************************************************************************** * MOUNT OPTIONS *****************************************************************************/ @@ -208,10 +210,64 @@ static void destroy_inodecache(void) } /****************************************************************************** - * SUPERBLOCK FUNCTIONS + * Some osd helpers *****************************************************************************/ -static const struct super_operations exofs_sops; -static const struct export_operations exofs_export_ops; +void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) +{ + osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); +} + +static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, + u64 offset, void *p, unsigned length) +{ + struct osd_request *or = osd_start_request(od, GFP_KERNEL); +/* struct osd_sense_info osi = {.key = 0};*/ + int ret; + + if (unlikely(!or)) { + EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); + return -ENOMEM; + } + ret = osd_req_read_kern(or, obj, offset, p, length); + if (unlikely(ret)) { + EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__); + goto out; + } + + ret = osd_finalize_request(or, 0, cred, NULL); + if (unlikely(ret)) { + EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret); + goto out; + } + + ret = osd_execute_request(or); + if (unlikely(ret)) + EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); + /* osd_req_decode_sense(or, ret); */ + +out: + osd_end_request(or); + EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx " + "length=0x%llx dev=%p ret=>%d\n", + _LLU(obj->id), _LLU(offset), _LLU(length), od, ret); + return ret; +} + +unsigned exofs_layout_od_id(struct exofs_layout *layout, + osd_id obj_no, unsigned layout_index) +{ +/* switch (layout->lay_func) { + case LAYOUT_MOVING_WINDOW: + {*/ + unsigned dev_mod = obj_no; + + return (layout_index + dev_mod * layout->mirrors_p1) % + layout->s_numdevs; +/* } + case LAYOUT_FUNC_IMPLICT: + return layout->devs[layout_index]; + }*/ +} static const struct osd_attr g_attr_sb_stats = ATTR_DEF( EXOFS_APAGE_SB_DATA, @@ -308,6 +364,12 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi) return ret; } +/****************************************************************************** + * SUPERBLOCK FUNCTIONS + *****************************************************************************/ +static const struct super_operations exofs_sops; +static const struct export_operations exofs_export_ops; + /* * Write the superblock to the OSD */ -- cgit v1.1 From 9e9db45649eb5d3ee5622fdad741914ecf1016a0 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Fri, 5 Aug 2011 15:06:04 -0700 Subject: exofs: ios: Move to a per inode components & device-table Exofs raid engine was saving on memory space by having a single layout-info, single pid, and a single device-table, global to the filesystem. Then passing a credential and object_id info at the io_state level, private for each inode. It would also devise this contraption of rotating the device table view for each inode->ino to spread out the device usage. This is not compatible with the pnfs-objects standard, demanding that each inode can have it's own layout-info, device-table, and each object component it's own pid, oid and creds. So: Bring exofs raid engine to be usable for generic pnfs-objects use by: * Define an exofs_comp structure that holds obj_id and credential info. * Break up exofs_layout struct to an exofs_components structure that holds a possible array of exofs_comp and the array of devices + the size of the arrays. * Add a "comps" parameter to get_io_state() that specifies the ids creds and device array to use for each IO. This enables to keep the layout global, but the device-table view, creds and IDs at the inode level. It only adds two 64bit to each inode, since some of these members already existed in another form. * ios raid engine now access layout-info and comps-info through the passed pointers. Everything is pre-prepared by caller for generic access of these structures and arrays. At the exofs Level: * Super block holds an exofs_components struct that holds the device array, previously in layout. The devices there are in device-table order. The device-array is twice bigger and repeats the device-table twice so now each inode's device array can point to a random device and have a round-robin view of the table, making it compatible to previous exofs versions. * Each inode has an exofs_components struct that is initialized at load time, with it's own view of the device table IDs and creds. When doing IO this gets passed to the io_state together with the layout. While preforming this change. Bugs where found where credentials with the wrong IDs where used to access the different SB objects (super.c). As well as some dead code. It was never noticed because the target we use does not check the credentials. Signed-off-by: Boaz Harrosh --- fs/exofs/exofs.h | 104 ++++++++++++++++++++++++------------------ fs/exofs/inode.c | 59 +++++++++++------------- fs/exofs/ios.c | 102 +++++++++++++++++++++++++---------------- fs/exofs/super.c | 136 ++++++++++++++++++++++++++++--------------------------- 4 files changed, 218 insertions(+), 183 deletions(-) diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 5a77fc7..39ad501 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -52,9 +52,12 @@ /* u64 has problems with printk this will cast it to unsigned long long */ #define _LLU(x) (unsigned long long)(x) -struct exofs_layout { - osd_id s_pid; /* partition ID of file system*/ +struct exofs_comp { + struct osd_obj_id obj; + u8 cred[OSD_CAP_LEN]; +}; +struct exofs_layout { /* Our way of looking at the data_map */ unsigned stripe_unit; unsigned mirrors_p1; @@ -62,11 +65,18 @@ struct exofs_layout { unsigned group_width; u64 group_depth; unsigned group_count; +}; - enum exofs_inode_layout_gen_functions lay_func; - - unsigned s_numdevs; /* Num of devices in array */ - struct osd_dev **s_ods; /* osd_dev array */ +struct exofs_components { + unsigned numdevs; /* Num of devices in array */ + /* If @single_comp == EC_SINGLE_COMP, @comps points to a single + * component. else there are @numdevs components + */ + enum EC_COMP_USAGE { + EC_SINGLE_COMP = 0, EC_MULTPLE_COMPS = 0xffffffff + } single_comp; + struct exofs_comp *comps; + struct osd_dev **ods; /* osd_dev array */ }; /* @@ -81,12 +91,13 @@ struct exofs_sb_info { spinlock_t s_next_gen_lock; /* spinlock for gen # update */ u32 s_next_generation; /* next gen # to use */ atomic_t s_curr_pending; /* number of pending commands */ - uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ struct pnfs_osd_data_map data_map; /* Default raid to use * FIXME: Needed ? */ struct exofs_layout layout; /* Default files layout */ + struct exofs_comp one_comp; /* id & cred of partition id=0*/ + struct exofs_components comps; /* comps for the partition */ struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */ }; @@ -100,7 +111,8 @@ struct exofs_i_info { uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/ uint32_t i_dir_start_lookup; /* which page to start lookup */ uint64_t i_commit_size; /* the object's written length */ - uint8_t i_cred[OSD_CAP_LEN];/* all-powerful credential */ + struct exofs_comp one_comp; /* same component for all devices */ + struct exofs_components comps; /* inode view of the device table */ }; static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) @@ -118,8 +130,7 @@ struct exofs_io_state { exofs_io_done_fn done; struct exofs_layout *layout; - struct osd_obj_id obj; - u8 *cred; + struct exofs_components *comps; /* Global read/write IO*/ loff_t offset; @@ -200,20 +211,6 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode) } /* - * Given a layout, object_number and stripe_index return the associated global - * dev_index - */ -unsigned exofs_layout_od_id(struct exofs_layout *layout, - osd_id obj_no, unsigned layout_index); - -static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios, - unsigned layout_index) -{ - return ios->layout->s_ods[ - exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)]; -} - -/* * Maximum count of links to a file */ #define EXOFS_LINK_MAX 32000 @@ -223,10 +220,13 @@ static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios, *************************/ /* ios.c */ -int exofs_get_rw_state(struct exofs_layout *layout, bool is_reading, - u64 offset, u64 length, struct exofs_io_state **ios); -int exofs_get_io_state(struct exofs_layout *layout, - struct exofs_io_state **ios); +int exofs_get_rw_state(struct exofs_layout *layout, + struct exofs_components *comps, + bool is_reading, u64 offset, u64 length, + struct exofs_io_state **ios); +int exofs_get_io_state(struct exofs_layout *layout, + struct exofs_components *comps, + struct exofs_io_state **ios); void exofs_put_io_state(struct exofs_io_state *ios); int exofs_check_io(struct exofs_io_state *ios, u64 *resid); @@ -235,27 +235,12 @@ int exofs_sbi_create(struct exofs_io_state *ios); int exofs_sbi_remove(struct exofs_io_state *ios); int exofs_sbi_write(struct exofs_io_state *ios); int exofs_sbi_read(struct exofs_io_state *ios); +int exofs_truncate(struct exofs_layout *layout, struct exofs_components *comps, + u64 size); int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr); extern const struct osd_attr g_attr_logical_length; -int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len); -static inline int exofs_oi_write(struct exofs_i_info *oi, - struct exofs_io_state *ios) -{ - ios->obj.id = exofs_oi_objno(oi); - ios->cred = oi->i_cred; - return exofs_sbi_write(ios); -} - -static inline int exofs_oi_read(struct exofs_i_info *oi, - struct exofs_io_state *ios) -{ - ios->obj.id = exofs_oi_objno(oi); - ios->cred = oi->i_cred; - return exofs_sbi_read(ios); -} - /* inode.c */ unsigned exofs_max_io_pages(struct exofs_layout *layout, unsigned expected_pages); @@ -307,4 +292,33 @@ extern const struct inode_operations exofs_special_inode_operations; extern const struct inode_operations exofs_symlink_inode_operations; extern const struct inode_operations exofs_fast_symlink_inode_operations; +/* exofs_init_comps will initialize an exofs_components device array + * pointing to a single exofs_comp struct, and a round-robin view + * of the device table. + * The first device of each inode is the [inode->ino % num_devices] + * and the rest of the devices sequentially following where the + * first device is after the last device. + * It is assumed that the global device array at @sbi is twice + * bigger and that the device table repeats twice. + * See: exofs_read_lookup_dev_table() + */ +static inline void exofs_init_comps(struct exofs_components *comps, + struct exofs_comp *one_comp, + struct exofs_sb_info *sbi, osd_id oid) +{ + unsigned dev_mod = (unsigned)oid, first_dev; + + one_comp->obj.partition = sbi->one_comp.obj.partition; + one_comp->obj.id = oid; + exofs_make_credential(one_comp->cred, &one_comp->obj); + + comps->numdevs = sbi->comps.numdevs; + comps->single_comp = EC_SINGLE_COMP; + comps->comps = one_comp; + + /* Round robin device view of the table */ + first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->comps.numdevs; + comps->ods = sbi->comps.ods + first_dev; +} + #endif diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index abcdac9..3cc24f2 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -270,7 +270,7 @@ static int read_exec(struct page_collect *pcol) return 0; if (!pcol->ios) { - int ret = exofs_get_rw_state(&pcol->sbi->layout, true, + int ret = exofs_get_rw_state(&pcol->sbi->layout, &oi->comps, true, pcol->pg_first << PAGE_CACHE_SHIFT, pcol->length, &pcol->ios); @@ -283,7 +283,7 @@ static int read_exec(struct page_collect *pcol) ios->nr_pages = pcol->nr_pages; if (pcol->read_4_write) { - exofs_oi_read(oi, pcol->ios); + exofs_sbi_read(pcol->ios); return __readpages_done(pcol); } @@ -296,14 +296,14 @@ static int read_exec(struct page_collect *pcol) *pcol_copy = *pcol; ios->done = readpages_done; ios->private = pcol_copy; - ret = exofs_oi_read(oi, ios); + ret = exofs_sbi_read(ios); if (unlikely(ret)) goto err; atomic_inc(&pcol->sbi->s_curr_pending); EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", - ios->obj.id, _LLU(ios->offset), pcol->length); + oi->one_comp.obj.id, _LLU(ios->offset), pcol->length); /* pages ownership was passed to pcol_copy */ _pcol_reset(pcol); @@ -516,7 +516,7 @@ static int write_exec(struct page_collect *pcol) return 0; BUG_ON(pcol->ios); - ret = exofs_get_rw_state(&pcol->sbi->layout, false, + ret = exofs_get_rw_state(&pcol->sbi->layout, &oi->comps, false, pcol->pg_first << PAGE_CACHE_SHIFT, pcol->length, &pcol->ios); @@ -538,9 +538,9 @@ static int write_exec(struct page_collect *pcol) ios->done = writepages_done; ios->private = pcol_copy; - ret = exofs_oi_write(oi, ios); + ret = exofs_sbi_write(ios); if (unlikely(ret)) { - EXOFS_ERR("write_exec: exofs_oi_write() Failed\n"); + EXOFS_ERR("write_exec: exofs_sbi_write() Failed\n"); goto err; } @@ -855,11 +855,12 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode) static int _do_truncate(struct inode *inode, loff_t newsize) { struct exofs_i_info *oi = exofs_i(inode); + struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; int ret; inode->i_mtime = inode->i_ctime = CURRENT_TIME; - ret = exofs_oi_truncate(oi, (u64)newsize); + ret = exofs_truncate(&sbi->layout, &oi->comps, (u64)newsize); if (likely(!ret)) truncate_setsize(inode, newsize); @@ -926,18 +927,14 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, struct exofs_on_disk_inode_layout *layout; int ret; - ret = exofs_get_io_state(&sbi->layout, &ios); + ret = exofs_get_io_state(&sbi->layout, &oi->comps, &ios); if (unlikely(ret)) { EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); return ret; } - ios->obj.id = exofs_oi_objno(oi); - exofs_make_credential(oi->i_cred, &ios->obj); - ios->cred = oi->i_cred; - - attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs); - attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs); + attrs[1].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs); + attrs[2].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs); ios->in_attr = attrs; ios->in_attr_len = ARRAY_SIZE(attrs); @@ -945,7 +942,7 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, ret = exofs_sbi_read(ios); if (unlikely(ret)) { EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n", - _LLU(ios->obj.id), ret); + _LLU(oi->one_comp.obj.id), ret); memset(inode, 0, sizeof(*inode)); inode->i_mode = 0040000 | (0777 & ~022); /* If object is lost on target we might as well enable it's @@ -1021,6 +1018,8 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) return inode; oi = exofs_i(inode); __oi_init(oi); + exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info, + exofs_oi_objno(oi)); /* read the inode from the osd */ ret = exofs_get_inode(sb, oi, &fcb); @@ -1126,7 +1125,8 @@ static void create_done(struct exofs_io_state *ios, void *p) if (unlikely(ret)) { EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx", - _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid)); + _LLU(exofs_oi_objno(oi)), + _LLU(oi->one_comp.obj.partition)); /*TODO: When FS is corrupted creation can fail, object already * exist. Get rid of this asynchronous creation, if exist * increment the obj counter and try the next object. Until we @@ -1145,14 +1145,13 @@ static void create_done(struct exofs_io_state *ios, void *p) */ struct inode *exofs_new_inode(struct inode *dir, int mode) { - struct super_block *sb; + struct super_block *sb = dir->i_sb; + struct exofs_sb_info *sbi = sb->s_fs_info; struct inode *inode; struct exofs_i_info *oi; - struct exofs_sb_info *sbi; struct exofs_io_state *ios; int ret; - sb = dir->i_sb; inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); @@ -1162,8 +1161,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) set_obj_2bcreated(oi); - sbi = sb->s_fs_info; - inode->i_mapping->backing_dev_info = sb->s_bdi; inode_init_owner(inode, dir, mode); inode->i_ino = sbi->s_nextid++; @@ -1175,22 +1172,21 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) spin_unlock(&sbi->s_next_gen_lock); insert_inode_hash(inode); + exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info, + exofs_oi_objno(oi)); exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */ mark_inode_dirty(inode); - ret = exofs_get_io_state(&sbi->layout, &ios); + ret = exofs_get_io_state(&sbi->layout, &oi->comps, &ios); if (unlikely(ret)) { EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n"); return ERR_PTR(ret); } - ios->obj.id = exofs_oi_objno(oi); - exofs_make_credential(oi->i_cred, &ios->obj); - ios->done = create_done; ios->private = inode; - ios->cred = oi->i_cred; + ret = exofs_sbi_create(ios); if (ret) { exofs_put_io_state(ios); @@ -1271,7 +1267,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync) } else memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); - ret = exofs_get_io_state(&sbi->layout, &ios); + ret = exofs_get_io_state(&sbi->layout, &oi->comps, &ios); if (unlikely(ret)) { EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); goto free_args; @@ -1290,7 +1286,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync) ios->private = args; } - ret = exofs_oi_write(oi, ios); + ret = exofs_sbi_write(ios); if (!do_sync && !ret) { atomic_inc(&sbi->s_curr_pending); goto out; /* deallocation in updatei_done */ @@ -1354,16 +1350,15 @@ void exofs_evict_inode(struct inode *inode) /* ignore the error, attempt a remove anyway */ /* Now Remove the OSD objects */ - ret = exofs_get_io_state(&sbi->layout, &ios); + ret = exofs_get_io_state(&sbi->layout, &oi->comps, &ios); if (unlikely(ret)) { EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); return; } - ios->obj.id = exofs_oi_objno(oi); ios->done = delete_done; ios->private = sbi; - ios->cred = oi->i_cred; + ret = exofs_sbi_remove(ios); if (ret) { EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__); diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 21d6130..f9d5c5a 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -31,24 +31,41 @@ #define EXOFS_DBGMSG2(M...) do {} while (0) /* #define EXOFS_DBGMSG2 EXOFS_DBGMSG */ -int exofs_get_rw_state(struct exofs_layout *layout, bool is_reading, - u64 offset, u64 length, struct exofs_io_state **pios) +static u8 *_ios_cred(struct exofs_io_state *ios, unsigned index) +{ + return ios->comps->comps[index & ios->comps->single_comp].cred; +} + +static struct osd_obj_id *_ios_obj(struct exofs_io_state *ios, unsigned index) +{ + return &ios->comps->comps[index & ios->comps->single_comp].obj; +} + +static struct osd_dev *_ios_od(struct exofs_io_state *ios, unsigned index) +{ + return ios->comps->ods[index]; +} + +int exofs_get_rw_state(struct exofs_layout *layout, + struct exofs_components *comps, + bool is_reading, u64 offset, u64 length, + struct exofs_io_state **pios) { struct exofs_io_state *ios; /*TODO: Maybe use kmem_cach per sbi of size * exofs_io_state_size(layout->s_numdevs) */ - ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL); + ios = kzalloc(exofs_io_state_size(comps->numdevs), GFP_KERNEL); if (unlikely(!ios)) { EXOFS_DBGMSG("Failed kzalloc bytes=%d\n", - exofs_io_state_size(layout->s_numdevs)); + exofs_io_state_size(comps->numdevs)); *pios = NULL; return -ENOMEM; } ios->layout = layout; - ios->obj.partition = layout->s_pid; + ios->comps = comps; ios->offset = offset; ios->length = length; ios->reading = is_reading; @@ -58,9 +75,10 @@ int exofs_get_rw_state(struct exofs_layout *layout, bool is_reading, } int exofs_get_io_state(struct exofs_layout *layout, + struct exofs_components *comps, struct exofs_io_state **ios) { - return exofs_get_rw_state(layout, true, 0, 0, ios); + return exofs_get_rw_state(layout, comps, true, 0, 0, ios); } void exofs_put_io_state(struct exofs_io_state *ios) @@ -119,7 +137,7 @@ static int exofs_io_execute(struct exofs_io_state *ios) if (unlikely(!or)) continue; - ret = osd_finalize_request(or, 0, ios->cred, NULL); + ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL); if (unlikely(ret)) { EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret); @@ -300,7 +318,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, { unsigned pg = *cur_pg; struct request_queue *q = - osd_request_queue(exofs_ios_od(ios, per_dev->dev)); + osd_request_queue(_ios_od(ios, per_dev->dev)); per_dev->length += cur_len; @@ -440,10 +458,10 @@ int exofs_sbi_create(struct exofs_io_state *ios) { int i, ret; - for (i = 0; i < ios->layout->s_numdevs; i++) { + for (i = 0; i < ios->comps->numdevs; i++) { struct osd_request *or; - or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -452,7 +470,7 @@ int exofs_sbi_create(struct exofs_io_state *ios) ios->per_dev[i].or = or; ios->numdevs++; - osd_req_create_object(or, &ios->obj); + osd_req_create_object(or, _ios_obj(ios, i)); } ret = exofs_io_execute(ios); @@ -464,10 +482,10 @@ int exofs_sbi_remove(struct exofs_io_state *ios) { int i, ret; - for (i = 0; i < ios->layout->s_numdevs; i++) { + for (i = 0; i < ios->comps->numdevs; i++) { struct osd_request *or; - or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -476,7 +494,7 @@ int exofs_sbi_remove(struct exofs_io_state *ios) ios->per_dev[i].or = or; ios->numdevs++; - osd_req_remove_object(or, &ios->obj); + osd_req_remove_object(or, _ios_obj(ios, i)); } ret = exofs_io_execute(ios); @@ -498,7 +516,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; struct osd_request *or; - or = osd_start_request(exofs_ios_od(ios, dev), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -533,25 +551,29 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) bio->bi_rw |= REQ_WRITE; } - osd_req_write(or, &ios->obj, per_dev->offset, bio, - per_dev->length); + osd_req_write(or, _ios_obj(ios, dev), per_dev->offset, + bio, per_dev->length); EXOFS_DBGMSG("write(0x%llx) offset=0x%llx " "length=0x%llx dev=%d\n", - _LLU(ios->obj.id), _LLU(per_dev->offset), + _LLU(_ios_obj(ios, dev)->id), + _LLU(per_dev->offset), _LLU(per_dev->length), dev); } else if (ios->kern_buff) { - ret = osd_req_write_kern(or, &ios->obj, per_dev->offset, - ios->kern_buff, ios->length); + ret = osd_req_write_kern(or, _ios_obj(ios, dev), + per_dev->offset, + ios->kern_buff, ios->length); if (unlikely(ret)) goto out; EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx " "length=0x%llx dev=%d\n", - _LLU(ios->obj.id), _LLU(per_dev->offset), + _LLU(_ios_obj(ios, dev)->id), + _LLU(per_dev->offset), _LLU(ios->length), dev); } else { - osd_req_set_attributes(or, &ios->obj); + osd_req_set_attributes(or, _ios_obj(ios, dev)); EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", - _LLU(ios->obj.id), ios->out_attr_len, dev); + _LLU(_ios_obj(ios, dev)->id), + ios->out_attr_len, dev); } if (ios->out_attr) @@ -590,13 +612,14 @@ static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp) { struct osd_request *or; struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; - unsigned first_dev = (unsigned)ios->obj.id; + struct osd_obj_id *obj = _ios_obj(ios, cur_comp); + unsigned first_dev = (unsigned)obj->id; if (ios->pages && !per_dev->length) return 0; /* Just an empty slot */ first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; - or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); return -ENOMEM; @@ -604,25 +627,26 @@ static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp) per_dev->or = or; if (ios->pages) { - osd_req_read(or, &ios->obj, per_dev->offset, + osd_req_read(or, obj, per_dev->offset, per_dev->bio, per_dev->length); EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" - " dev=%d\n", _LLU(ios->obj.id), + " dev=%d\n", _LLU(obj->id), _LLU(per_dev->offset), _LLU(per_dev->length), first_dev); } else if (ios->kern_buff) { - int ret = osd_req_read_kern(or, &ios->obj, per_dev->offset, + int ret = osd_req_read_kern(or, obj, per_dev->offset, ios->kern_buff, ios->length); EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx " "length=0x%llx dev=%d ret=>%d\n", - _LLU(ios->obj.id), _LLU(per_dev->offset), + _LLU(obj->id), _LLU(per_dev->offset), _LLU(ios->length), first_dev, ret); if (unlikely(ret)) return ret; } else { - osd_req_get_attributes(or, &ios->obj); + osd_req_get_attributes(or, obj); EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", - _LLU(ios->obj.id), ios->in_attr_len, first_dev); + _LLU(obj->id), + ios->in_attr_len, first_dev); } if (ios->out_attr) osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len); @@ -682,14 +706,14 @@ static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp, struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; struct osd_request *or; - or = osd_start_request(exofs_ios_od(ios, cur_comp), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); return -ENOMEM; } per_dev->or = or; - osd_req_set_attributes(or, &ios->obj); + osd_req_set_attributes(or, _ios_obj(ios, cur_comp)); osd_req_add_set_attr_list(or, attr, 1); } @@ -721,9 +745,9 @@ void _calc_trunk_info(struct exofs_layout *layout, u64 file_offset, ti->max_devs = layout->group_width * layout->group_count; } -int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) +int exofs_truncate(struct exofs_layout *layout, struct exofs_components *comps, + u64 size) { - struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info; struct exofs_io_state *ios; struct exofs_trunc_attr { struct osd_attr attr; @@ -732,7 +756,7 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) struct _trunc_info ti; int i, ret; - ret = exofs_get_io_state(&sbi->layout, &ios); + ret = exofs_get_io_state(layout, comps, &ios); if (unlikely(ret)) return ret; @@ -745,9 +769,7 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) goto out; } - ios->obj.id = exofs_oi_objno(oi); - ios->cred = oi->i_cred; - ios->numdevs = ios->layout->s_numdevs; + ios->numdevs = ios->comps->numdevs; for (i = 0; i < ti.max_devs; ++i) { struct exofs_trunc_attr *size_attr = &size_attrs[i]; @@ -770,7 +792,7 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) size_attr->attr.val_ptr = &size_attr->newsize; EXOFS_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", - _LLU(ios->obj.id), _LLU(obj_size), i); + _LLU(comps->comps->obj.id), _LLU(obj_size), i); ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, &size_attr->attr); if (unlikely(ret)) diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 8783f3d..4d68779 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -253,22 +253,6 @@ out: return ret; } -unsigned exofs_layout_od_id(struct exofs_layout *layout, - osd_id obj_no, unsigned layout_index) -{ -/* switch (layout->lay_func) { - case LAYOUT_MOVING_WINDOW: - {*/ - unsigned dev_mod = obj_no; - - return (layout_index + dev_mod * layout->mirrors_p1) % - layout->s_numdevs; -/* } - case LAYOUT_FUNC_IMPLICT: - return layout->devs[layout_index]; - }*/ -} - static const struct osd_attr g_attr_sb_stats = ATTR_DEF( EXOFS_APAGE_SB_DATA, EXOFS_ATTR_SB_STATS, @@ -282,14 +266,12 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi) struct exofs_io_state *ios; int ret; - ret = exofs_get_io_state(&sbi->layout, &ios); + ret = exofs_get_io_state(&sbi->layout, &sbi->comps, &ios); if (unlikely(ret)) { EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); return ret; } - ios->cred = sbi->s_cred; - ios->in_attr = attrs; ios->in_attr_len = ARRAY_SIZE(attrs); @@ -339,7 +321,7 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi) struct exofs_io_state *ios; int ret; - ret = exofs_get_io_state(&sbi->layout, &ios); + ret = exofs_get_io_state(&sbi->layout, &sbi->comps, &ios); if (unlikely(ret)) { EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); return ret; @@ -349,7 +331,7 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi) sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles); attrs[0].val_ptr = &sbi->s_ess; - ios->cred = sbi->s_cred; + ios->done = stats_done; ios->private = sbi; ios->out_attr = attrs; @@ -377,6 +359,8 @@ int exofs_sync_fs(struct super_block *sb, int wait) { struct exofs_sb_info *sbi; struct exofs_fscb *fscb; + struct exofs_comp one_comp; + struct exofs_components comps; struct exofs_io_state *ios; int ret = -ENOMEM; @@ -393,7 +377,10 @@ int exofs_sync_fs(struct super_block *sb, int wait) * version). Otherwise the exofs_fscb is read-only from mkfs time. All * the writeable info is set in exofs_sbi_write_stats() above. */ - ret = exofs_get_io_state(&sbi->layout, &ios); + + exofs_init_comps(&comps, &one_comp, sbi, EXOFS_SUPER_ID); + + ret = exofs_get_io_state(&sbi->layout, &comps, &ios); if (unlikely(ret)) goto out; @@ -407,10 +394,8 @@ int exofs_sync_fs(struct super_block *sb, int wait) fscb->s_newfs = 0; fscb->s_version = EXOFS_FSCB_VER; - ios->obj.id = EXOFS_SUPER_ID; ios->offset = 0; ios->kern_buff = fscb; - ios->cred = sbi->s_cred; ret = exofs_sbi_write(ios); if (unlikely(ret)) @@ -446,17 +431,17 @@ static void _exofs_print_device(const char *msg, const char *dev_path, void exofs_free_sbi(struct exofs_sb_info *sbi) { - while (sbi->layout.s_numdevs) { - int i = --sbi->layout.s_numdevs; - struct osd_dev *od = sbi->layout.s_ods[i]; + while (sbi->comps.numdevs) { + int i = --sbi->comps.numdevs; + struct osd_dev *od = sbi->comps.ods[i]; if (od) { - sbi->layout.s_ods[i] = NULL; + sbi->comps.ods[i] = NULL; osduld_put_device(od); } } - if (sbi->layout.s_ods != sbi->_min_one_dev) - kfree(sbi->layout.s_ods); + if (sbi->comps.ods != sbi->_min_one_dev) + kfree(sbi->comps.ods); kfree(sbi); } @@ -483,8 +468,8 @@ static void exofs_put_super(struct super_block *sb) msecs_to_jiffies(100)); } - _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0], - sbi->layout.s_pid); + _exofs_print_device("Unmounting", NULL, sbi->comps.ods[0], + sbi->one_comp.obj.partition); bdi_destroy(&sbi->bdi); exofs_free_sbi(sbi); @@ -624,8 +609,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, struct osd_dev *fscb_od, unsigned table_count) { - struct osd_obj_id obj = {.partition = sbi->layout.s_pid, - .id = EXOFS_DEVTABLE_ID}; + struct exofs_comp comp; struct exofs_device_table *dt; unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + sizeof(*dt); @@ -639,8 +623,14 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, return -ENOMEM; } - sbi->layout.s_numdevs = 0; - ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes); + sbi->comps.numdevs = 0; + + comp.obj.partition = sbi->one_comp.obj.partition; + comp.obj.id = EXOFS_DEVTABLE_ID; + exofs_make_credential(comp.cred, &comp.obj); + + ret = exofs_read_kern(fscb_od, comp.cred, &comp.obj, 0, dt, + table_bytes); if (unlikely(ret)) { EXOFS_ERR("ERROR: reading device table\n"); goto out; @@ -658,10 +648,13 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, goto out; if (likely(numdevs > 1)) { - unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]); + unsigned size = numdevs * sizeof(sbi->comps.ods[0]); - sbi->layout.s_ods = kzalloc(size, GFP_KERNEL); - if (unlikely(!sbi->layout.s_ods)) { + /* Twice bigger table: See exofs_init_comps() and below + * comment + */ + sbi->comps.ods = kzalloc(size + size - 1, GFP_KERNEL); + if (unlikely(!sbi->comps.ods)) { EXOFS_ERR("ERROR: faild allocating Device array[%d]\n", numdevs); ret = -ENOMEM; @@ -688,8 +681,8 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, * line. We always keep them in device-table order. */ if (fscb_od && osduld_device_same(fscb_od, &odi)) { - sbi->layout.s_ods[i] = fscb_od; - ++sbi->layout.s_numdevs; + sbi->comps.ods[i] = fscb_od; + ++sbi->comps.numdevs; fscb_od = NULL; continue; } @@ -702,13 +695,13 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, goto out; } - sbi->layout.s_ods[i] = od; - ++sbi->layout.s_numdevs; + sbi->comps.ods[i] = od; + ++sbi->comps.numdevs; /* Read the fscb of the other devices to make sure the FS * partition is there. */ - ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, + ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb, sizeof(fscb)); if (unlikely(ret)) { EXOFS_ERR("ERROR: Malformed participating device " @@ -725,13 +718,22 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, out: kfree(dt); - if (unlikely(!ret && fscb_od)) { - EXOFS_ERR( - "ERROR: Bad device-table container device not present\n"); - osduld_put_device(fscb_od); - ret = -EINVAL; - } + if (likely(!ret)) { + unsigned numdevs = sbi->comps.numdevs; + if (unlikely(fscb_od)) { + EXOFS_ERR("ERROR: Bad device-table container device not present\n"); + osduld_put_device(fscb_od); + return -EINVAL; + } + /* exofs round-robins the device table view according to inode + * number. We hold a: twice bigger table hence inodes can point + * to any device and have a sequential view of the table + * starting at this device. See exofs_init_comps() + */ + for (i = 0; i < numdevs - 1; ++i) + sbi->comps.ods[i + numdevs] = sbi->comps.ods[i]; + } return ret; } @@ -745,7 +747,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) struct exofs_sb_info *sbi; /*extended info */ struct osd_dev *od; /* Master device */ struct exofs_fscb fscb; /*on-disk superblock info */ - struct osd_obj_id obj; + struct exofs_comp comp; unsigned table_count; int ret; @@ -776,11 +778,16 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) sbi->layout.group_width = 1; sbi->layout.group_depth = -1; sbi->layout.group_count = 1; - sbi->layout.s_ods = sbi->_min_one_dev; - sbi->layout.s_numdevs = 1; - sbi->layout.s_pid = opts->pid; sbi->s_timeout = opts->timeout; + sbi->one_comp.obj.partition = opts->pid; + sbi->one_comp.obj.id = 0; + exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj); + sbi->comps.numdevs = 1; + sbi->comps.single_comp = EC_SINGLE_COMP; + sbi->comps.comps = &sbi->one_comp; + sbi->comps.ods = sbi->_min_one_dev; + /* fill in some other data by hand */ memset(sb->s_id, 0, sizeof(sb->s_id)); strcpy(sb->s_id, "exofs"); @@ -791,11 +798,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) sb->s_bdev = NULL; sb->s_dev = 0; - obj.partition = sbi->layout.s_pid; - obj.id = EXOFS_SUPER_ID; - exofs_make_credential(sbi->s_cred, &obj); + comp.obj.partition = sbi->one_comp.obj.partition; + comp.obj.id = EXOFS_SUPER_ID; + exofs_make_credential(comp.cred, &comp.obj); - ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb)); + ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb, sizeof(fscb)); if (unlikely(ret)) goto free_sbi; @@ -828,7 +835,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) if (unlikely(ret)) goto free_sbi; } else { - sbi->layout.s_ods[0] = od; + sbi->comps.ods[0] = od; } __sbi_read_stats(sbi); @@ -868,13 +875,13 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) goto free_sbi; } - _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], - sbi->layout.s_pid); + _exofs_print_device("Mounting", opts->dev_name, sbi->comps.ods[0], + sbi->one_comp.obj.partition); return 0; free_sbi: EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", - opts->dev_name, sbi->layout.s_pid, ret); + opts->dev_name, sbi->one_comp.obj.partition, ret); exofs_free_sbi(sbi); return ret; } @@ -915,17 +922,14 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) }; uint64_t capacity = ULLONG_MAX; uint64_t used = ULLONG_MAX; - uint8_t cred_a[OSD_CAP_LEN]; int ret; - ret = exofs_get_io_state(&sbi->layout, &ios); + ret = exofs_get_io_state(&sbi->layout, &sbi->comps, &ios); if (ret) { EXOFS_DBGMSG("exofs_get_io_state failed.\n"); return ret; } - exofs_make_credential(cred_a, &ios->obj); - ios->cred = sbi->s_cred; ios->in_attr = attrs; ios->in_attr_len = ARRAY_SIZE(attrs); -- cgit v1.1 From 8ff660ab85f524bdc7652eb5d38aaef1d66aa9c7 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sat, 6 Aug 2011 19:26:31 -0700 Subject: exofs: Rename raid engine from exofs/ios.c => ore ORE stands for "Objects Raid Engine" This patch is a mechanical rename of everything that was in ios.c and its API declaration to an ore.c and an osd_ore.h header. The ore engine will later be used by the pnfs objects layout driver. * File ios.c => ore.c * Declaration of types and API are moved from exofs.h to a new osd_ore.h * All used types are prefixed by ore_ from their exofs_ name. * Shift includes from exofs.h to osd_ore.h so osd_ore.h is independent, include it from exofs.h. Other than a pure rename there are no other changes. Next patch will move the ore into it's own module and will export the API to be used by exofs and later the layout driver Signed-off-by: Boaz Harrosh --- fs/exofs/Kbuild | 2 +- fs/exofs/exofs.h | 119 +------ fs/exofs/inode.c | 88 +++--- fs/exofs/ios.c | 810 ------------------------------------------------ fs/exofs/ore.c | 820 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/exofs/super.c | 56 ++-- include/scsi/osd_ore.h | 125 ++++++++ 7 files changed, 1030 insertions(+), 990 deletions(-) delete mode 100644 fs/exofs/ios.c create mode 100644 fs/exofs/ore.c create mode 100644 include/scsi/osd_ore.h diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild index 2d0f757..c1af221 100644 --- a/fs/exofs/Kbuild +++ b/fs/exofs/Kbuild @@ -12,5 +12,5 @@ # Kbuild - Gets included from the Kernels Makefile and build system # -exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o +exofs-y := ore.o inode.o file.o symlink.o namei.o dir.o super.o obj-$(CONFIG_EXOFS_FS) += exofs.o diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 39ad501..f4e442e 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -36,7 +36,8 @@ #include #include #include -#include +#include + #include "common.h" #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) @@ -52,33 +53,6 @@ /* u64 has problems with printk this will cast it to unsigned long long */ #define _LLU(x) (unsigned long long)(x) -struct exofs_comp { - struct osd_obj_id obj; - u8 cred[OSD_CAP_LEN]; -}; - -struct exofs_layout { - /* Our way of looking at the data_map */ - unsigned stripe_unit; - unsigned mirrors_p1; - - unsigned group_width; - u64 group_depth; - unsigned group_count; -}; - -struct exofs_components { - unsigned numdevs; /* Num of devices in array */ - /* If @single_comp == EC_SINGLE_COMP, @comps points to a single - * component. else there are @numdevs components - */ - enum EC_COMP_USAGE { - EC_SINGLE_COMP = 0, EC_MULTPLE_COMPS = 0xffffffff - } single_comp; - struct exofs_comp *comps; - struct osd_dev **ods; /* osd_dev array */ -}; - /* * our extension to the in-memory superblock */ @@ -95,9 +69,9 @@ struct exofs_sb_info { struct pnfs_osd_data_map data_map; /* Default raid to use * FIXME: Needed ? */ - struct exofs_layout layout; /* Default files layout */ - struct exofs_comp one_comp; /* id & cred of partition id=0*/ - struct exofs_components comps; /* comps for the partition */ + struct ore_layout layout; /* Default files layout */ + struct ore_comp one_comp; /* id & cred of partition id=0*/ + struct ore_components comps; /* comps for the partition */ struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */ }; @@ -111,8 +85,8 @@ struct exofs_i_info { uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/ uint32_t i_dir_start_lookup; /* which page to start lookup */ uint64_t i_commit_size; /* the object's written length */ - struct exofs_comp one_comp; /* same component for all devices */ - struct exofs_components comps; /* inode view of the device table */ + struct ore_comp one_comp; /* same component for all devices */ + struct ore_components comps; /* inode view of the device table */ }; static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) @@ -120,53 +94,6 @@ static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF; } -struct exofs_io_state; -typedef void (*exofs_io_done_fn)(struct exofs_io_state *ios, void *private); - -struct exofs_io_state { - struct kref kref; - - void *private; - exofs_io_done_fn done; - - struct exofs_layout *layout; - struct exofs_components *comps; - - /* Global read/write IO*/ - loff_t offset; - unsigned long length; - void *kern_buff; - - struct page **pages; - unsigned nr_pages; - unsigned pgbase; - unsigned pages_consumed; - - /* Attributes */ - unsigned in_attr_len; - struct osd_attr *in_attr; - unsigned out_attr_len; - struct osd_attr *out_attr; - - bool reading; - - /* Variable array of size numdevs */ - unsigned numdevs; - struct exofs_per_dev_state { - struct osd_request *or; - struct bio *bio; - loff_t offset; - unsigned length; - unsigned dev; - } per_dev[]; -}; - -static inline unsigned exofs_io_state_size(unsigned numdevs) -{ - return sizeof(struct exofs_io_state) + - sizeof(struct exofs_per_dev_state) * numdevs; -} - /* * our inode flags */ @@ -219,30 +146,8 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode) * function declarations * *************************/ -/* ios.c */ -int exofs_get_rw_state(struct exofs_layout *layout, - struct exofs_components *comps, - bool is_reading, u64 offset, u64 length, - struct exofs_io_state **ios); -int exofs_get_io_state(struct exofs_layout *layout, - struct exofs_components *comps, - struct exofs_io_state **ios); -void exofs_put_io_state(struct exofs_io_state *ios); - -int exofs_check_io(struct exofs_io_state *ios, u64 *resid); - -int exofs_sbi_create(struct exofs_io_state *ios); -int exofs_sbi_remove(struct exofs_io_state *ios); -int exofs_sbi_write(struct exofs_io_state *ios); -int exofs_sbi_read(struct exofs_io_state *ios); -int exofs_truncate(struct exofs_layout *layout, struct exofs_components *comps, - u64 size); - -int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr); -extern const struct osd_attr g_attr_logical_length; - /* inode.c */ -unsigned exofs_max_io_pages(struct exofs_layout *layout, +unsigned exofs_max_io_pages(struct ore_layout *layout, unsigned expected_pages); int exofs_setattr(struct dentry *, struct iattr *); int exofs_write_begin(struct file *file, struct address_space *mapping, @@ -292,8 +197,8 @@ extern const struct inode_operations exofs_special_inode_operations; extern const struct inode_operations exofs_symlink_inode_operations; extern const struct inode_operations exofs_fast_symlink_inode_operations; -/* exofs_init_comps will initialize an exofs_components device array - * pointing to a single exofs_comp struct, and a round-robin view +/* exofs_init_comps will initialize an ore_components device array + * pointing to a single ore_comp struct, and a round-robin view * of the device table. * The first device of each inode is the [inode->ino % num_devices] * and the rest of the devices sequentially following where the @@ -302,8 +207,8 @@ extern const struct inode_operations exofs_fast_symlink_inode_operations; * bigger and that the device table repeats twice. * See: exofs_read_lookup_dev_table() */ -static inline void exofs_init_comps(struct exofs_components *comps, - struct exofs_comp *one_comp, +static inline void exofs_init_comps(struct ore_components *comps, + struct ore_comp *one_comp, struct exofs_sb_info *sbi, osd_id oid) { unsigned dev_mod = (unsigned)oid, first_dev; diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 3cc24f2..f39a38f 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -43,7 +43,7 @@ enum { BIO_MAX_PAGES_KMALLOC = PAGE_SIZE / sizeof(struct page *), }; -unsigned exofs_max_io_pages(struct exofs_layout *layout, +unsigned exofs_max_io_pages(struct ore_layout *layout, unsigned expected_pages) { unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC); @@ -58,7 +58,7 @@ struct page_collect { struct exofs_sb_info *sbi; struct inode *inode; unsigned expected_pages; - struct exofs_io_state *ios; + struct ore_io_state *ios; struct page **pages; unsigned alloc_pages; @@ -133,7 +133,7 @@ static void pcol_free(struct page_collect *pcol) pcol->pages = NULL; if (pcol->ios) { - exofs_put_io_state(pcol->ios); + ore_put_io_state(pcol->ios); pcol->ios = NULL; } } @@ -193,7 +193,7 @@ static int __readpages_done(struct page_collect *pcol) u64 resid; u64 good_bytes; u64 length = 0; - int ret = exofs_check_io(pcol->ios, &resid); + int ret = ore_check_io(pcol->ios, &resid); if (likely(!ret)) good_bytes = pcol->length; @@ -234,7 +234,7 @@ static int __readpages_done(struct page_collect *pcol) } /* callback of async reads */ -static void readpages_done(struct exofs_io_state *ios, void *p) +static void readpages_done(struct ore_io_state *ios, void *p) { struct page_collect *pcol = p; @@ -262,7 +262,7 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) static int read_exec(struct page_collect *pcol) { struct exofs_i_info *oi = exofs_i(pcol->inode); - struct exofs_io_state *ios; + struct ore_io_state *ios; struct page_collect *pcol_copy = NULL; int ret; @@ -270,7 +270,7 @@ static int read_exec(struct page_collect *pcol) return 0; if (!pcol->ios) { - int ret = exofs_get_rw_state(&pcol->sbi->layout, &oi->comps, true, + int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, true, pcol->pg_first << PAGE_CACHE_SHIFT, pcol->length, &pcol->ios); @@ -283,7 +283,7 @@ static int read_exec(struct page_collect *pcol) ios->nr_pages = pcol->nr_pages; if (pcol->read_4_write) { - exofs_sbi_read(pcol->ios); + ore_read(pcol->ios); return __readpages_done(pcol); } @@ -296,7 +296,7 @@ static int read_exec(struct page_collect *pcol) *pcol_copy = *pcol; ios->done = readpages_done; ios->private = pcol_copy; - ret = exofs_sbi_read(ios); + ret = ore_read(ios); if (unlikely(ret)) goto err; @@ -458,14 +458,14 @@ static int exofs_readpage(struct file *file, struct page *page) } /* Callback for osd_write. All writes are asynchronous */ -static void writepages_done(struct exofs_io_state *ios, void *p) +static void writepages_done(struct ore_io_state *ios, void *p) { struct page_collect *pcol = p; int i; u64 resid; u64 good_bytes; u64 length = 0; - int ret = exofs_check_io(ios, &resid); + int ret = ore_check_io(ios, &resid); atomic_dec(&pcol->sbi->s_curr_pending); @@ -508,7 +508,7 @@ static void writepages_done(struct exofs_io_state *ios, void *p) static int write_exec(struct page_collect *pcol) { struct exofs_i_info *oi = exofs_i(pcol->inode); - struct exofs_io_state *ios; + struct ore_io_state *ios; struct page_collect *pcol_copy = NULL; int ret; @@ -516,7 +516,7 @@ static int write_exec(struct page_collect *pcol) return 0; BUG_ON(pcol->ios); - ret = exofs_get_rw_state(&pcol->sbi->layout, &oi->comps, false, + ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, false, pcol->pg_first << PAGE_CACHE_SHIFT, pcol->length, &pcol->ios); @@ -538,9 +538,9 @@ static int write_exec(struct page_collect *pcol) ios->done = writepages_done; ios->private = pcol_copy; - ret = exofs_sbi_write(ios); + ret = ore_write(ios); if (unlikely(ret)) { - EXOFS_ERR("write_exec: exofs_sbi_write() Failed\n"); + EXOFS_ERR("write_exec: ore_write() Failed\n"); goto err; } @@ -860,7 +860,7 @@ static int _do_truncate(struct inode *inode, loff_t newsize) inode->i_mtime = inode->i_ctime = CURRENT_TIME; - ret = exofs_truncate(&sbi->layout, &oi->comps, (u64)newsize); + ret = ore_truncate(&sbi->layout, &oi->comps, (u64)newsize); if (likely(!ret)) truncate_setsize(inode, newsize); @@ -923,13 +923,13 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, [1] = g_attr_inode_file_layout, [2] = g_attr_inode_dir_layout, }; - struct exofs_io_state *ios; + struct ore_io_state *ios; struct exofs_on_disk_inode_layout *layout; int ret; - ret = exofs_get_io_state(&sbi->layout, &oi->comps, &ios); + ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); if (unlikely(ret)) { - EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); + EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); return ret; } @@ -939,7 +939,7 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, ios->in_attr = attrs; ios->in_attr_len = ARRAY_SIZE(attrs); - ret = exofs_sbi_read(ios); + ret = ore_read(ios); if (unlikely(ret)) { EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n", _LLU(oi->one_comp.obj.id), ret); @@ -992,7 +992,7 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, } out: - exofs_put_io_state(ios); + ore_put_io_state(ios); return ret; } @@ -1111,15 +1111,15 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi) * set the obj_created flag so that other methods know that the object exists on * the OSD. */ -static void create_done(struct exofs_io_state *ios, void *p) +static void create_done(struct ore_io_state *ios, void *p) { struct inode *inode = p; struct exofs_i_info *oi = exofs_i(inode); struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; int ret; - ret = exofs_check_io(ios, NULL); - exofs_put_io_state(ios); + ret = ore_check_io(ios, NULL); + ore_put_io_state(ios); atomic_dec(&sbi->s_curr_pending); @@ -1149,7 +1149,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) struct exofs_sb_info *sbi = sb->s_fs_info; struct inode *inode; struct exofs_i_info *oi; - struct exofs_io_state *ios; + struct ore_io_state *ios; int ret; inode = new_inode(sb); @@ -1178,18 +1178,18 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) mark_inode_dirty(inode); - ret = exofs_get_io_state(&sbi->layout, &oi->comps, &ios); + ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); if (unlikely(ret)) { - EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n"); + EXOFS_ERR("exofs_new_inode: ore_get_io_state failed\n"); return ERR_PTR(ret); } ios->done = create_done; ios->private = inode; - ret = exofs_sbi_create(ios); + ret = ore_create(ios); if (ret) { - exofs_put_io_state(ios); + ore_put_io_state(ios); return ERR_PTR(ret); } atomic_inc(&sbi->s_curr_pending); @@ -1208,11 +1208,11 @@ struct updatei_args { /* * Callback function from exofs_update_inode(). */ -static void updatei_done(struct exofs_io_state *ios, void *p) +static void updatei_done(struct ore_io_state *ios, void *p) { struct updatei_args *args = p; - exofs_put_io_state(ios); + ore_put_io_state(ios); atomic_dec(&args->sbi->s_curr_pending); @@ -1228,7 +1228,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync) struct exofs_i_info *oi = exofs_i(inode); struct super_block *sb = inode->i_sb; struct exofs_sb_info *sbi = sb->s_fs_info; - struct exofs_io_state *ios; + struct ore_io_state *ios; struct osd_attr attr; struct exofs_fcb *fcb; struct updatei_args *args; @@ -1267,9 +1267,9 @@ static int exofs_update_inode(struct inode *inode, int do_sync) } else memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); - ret = exofs_get_io_state(&sbi->layout, &oi->comps, &ios); + ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); if (unlikely(ret)) { - EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); + EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); goto free_args; } @@ -1286,13 +1286,13 @@ static int exofs_update_inode(struct inode *inode, int do_sync) ios->private = args; } - ret = exofs_sbi_write(ios); + ret = ore_write(ios); if (!do_sync && !ret) { atomic_inc(&sbi->s_curr_pending); goto out; /* deallocation in updatei_done */ } - exofs_put_io_state(ios); + ore_put_io_state(ios); free_args: kfree(args); out: @@ -1311,11 +1311,11 @@ int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) * Callback function from exofs_delete_inode() - don't have much cleaning up to * do. */ -static void delete_done(struct exofs_io_state *ios, void *p) +static void delete_done(struct ore_io_state *ios, void *p) { struct exofs_sb_info *sbi = p; - exofs_put_io_state(ios); + ore_put_io_state(ios); atomic_dec(&sbi->s_curr_pending); } @@ -1330,7 +1330,7 @@ void exofs_evict_inode(struct inode *inode) struct exofs_i_info *oi = exofs_i(inode); struct super_block *sb = inode->i_sb; struct exofs_sb_info *sbi = sb->s_fs_info; - struct exofs_io_state *ios; + struct ore_io_state *ios; int ret; truncate_inode_pages(&inode->i_data, 0); @@ -1350,19 +1350,19 @@ void exofs_evict_inode(struct inode *inode) /* ignore the error, attempt a remove anyway */ /* Now Remove the OSD objects */ - ret = exofs_get_io_state(&sbi->layout, &oi->comps, &ios); + ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); if (unlikely(ret)) { - EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); + EXOFS_ERR("%s: ore_get_io_state failed\n", __func__); return; } ios->done = delete_done; ios->private = sbi; - ret = exofs_sbi_remove(ios); + ret = ore_remove(ios); if (ret) { - EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__); - exofs_put_io_state(ios); + EXOFS_ERR("%s: ore_remove failed\n", __func__); + ore_put_io_state(ios); return; } atomic_inc(&sbi->s_curr_pending); diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c deleted file mode 100644 index f9d5c5a..0000000 --- a/fs/exofs/ios.c +++ /dev/null @@ -1,810 +0,0 @@ -/* - * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) - * Copyright (C) 2008, 2009 - * Boaz Harrosh - * - * This file is part of exofs. - * - * exofs is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation. Since it is based on ext2, and the only - * valid version of GPL for the Linux kernel is version 2, the only valid - * version of GPL for exofs is version 2. - * - * exofs is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with exofs; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include -#include -#include - -#include "exofs.h" - -#define EXOFS_DBGMSG2(M...) do {} while (0) -/* #define EXOFS_DBGMSG2 EXOFS_DBGMSG */ - -static u8 *_ios_cred(struct exofs_io_state *ios, unsigned index) -{ - return ios->comps->comps[index & ios->comps->single_comp].cred; -} - -static struct osd_obj_id *_ios_obj(struct exofs_io_state *ios, unsigned index) -{ - return &ios->comps->comps[index & ios->comps->single_comp].obj; -} - -static struct osd_dev *_ios_od(struct exofs_io_state *ios, unsigned index) -{ - return ios->comps->ods[index]; -} - -int exofs_get_rw_state(struct exofs_layout *layout, - struct exofs_components *comps, - bool is_reading, u64 offset, u64 length, - struct exofs_io_state **pios) -{ - struct exofs_io_state *ios; - - /*TODO: Maybe use kmem_cach per sbi of size - * exofs_io_state_size(layout->s_numdevs) - */ - ios = kzalloc(exofs_io_state_size(comps->numdevs), GFP_KERNEL); - if (unlikely(!ios)) { - EXOFS_DBGMSG("Failed kzalloc bytes=%d\n", - exofs_io_state_size(comps->numdevs)); - *pios = NULL; - return -ENOMEM; - } - - ios->layout = layout; - ios->comps = comps; - ios->offset = offset; - ios->length = length; - ios->reading = is_reading; - - *pios = ios; - return 0; -} - -int exofs_get_io_state(struct exofs_layout *layout, - struct exofs_components *comps, - struct exofs_io_state **ios) -{ - return exofs_get_rw_state(layout, comps, true, 0, 0, ios); -} - -void exofs_put_io_state(struct exofs_io_state *ios) -{ - if (ios) { - unsigned i; - - for (i = 0; i < ios->numdevs; i++) { - struct exofs_per_dev_state *per_dev = &ios->per_dev[i]; - - if (per_dev->or) - osd_end_request(per_dev->or); - if (per_dev->bio) - bio_put(per_dev->bio); - } - - kfree(ios); - } -} - -static void _sync_done(struct exofs_io_state *ios, void *p) -{ - struct completion *waiting = p; - - complete(waiting); -} - -static void _last_io(struct kref *kref) -{ - struct exofs_io_state *ios = container_of( - kref, struct exofs_io_state, kref); - - ios->done(ios, ios->private); -} - -static void _done_io(struct osd_request *or, void *p) -{ - struct exofs_io_state *ios = p; - - kref_put(&ios->kref, _last_io); -} - -static int exofs_io_execute(struct exofs_io_state *ios) -{ - DECLARE_COMPLETION_ONSTACK(wait); - bool sync = (ios->done == NULL); - int i, ret; - - if (sync) { - ios->done = _sync_done; - ios->private = &wait; - } - - for (i = 0; i < ios->numdevs; i++) { - struct osd_request *or = ios->per_dev[i].or; - if (unlikely(!or)) - continue; - - ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL); - if (unlikely(ret)) { - EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", - ret); - return ret; - } - } - - kref_init(&ios->kref); - - for (i = 0; i < ios->numdevs; i++) { - struct osd_request *or = ios->per_dev[i].or; - if (unlikely(!or)) - continue; - - kref_get(&ios->kref); - osd_execute_request_async(or, _done_io, ios); - } - - kref_put(&ios->kref, _last_io); - ret = 0; - - if (sync) { - wait_for_completion(&wait); - ret = exofs_check_io(ios, NULL); - } - return ret; -} - -static void _clear_bio(struct bio *bio) -{ - struct bio_vec *bv; - unsigned i; - - __bio_for_each_segment(bv, bio, i, 0) { - unsigned this_count = bv->bv_len; - - if (likely(PAGE_SIZE == this_count)) - clear_highpage(bv->bv_page); - else - zero_user(bv->bv_page, bv->bv_offset, this_count); - } -} - -int exofs_check_io(struct exofs_io_state *ios, u64 *resid) -{ - enum osd_err_priority acumulated_osd_err = 0; - int acumulated_lin_err = 0; - int i; - - for (i = 0; i < ios->numdevs; i++) { - struct osd_sense_info osi; - struct osd_request *or = ios->per_dev[i].or; - int ret; - - if (unlikely(!or)) - continue; - - ret = osd_req_decode_sense(or, &osi); - if (likely(!ret)) - continue; - - if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { - /* start read offset passed endof file */ - _clear_bio(ios->per_dev[i].bio); - EXOFS_DBGMSG("start read offset passed end of file " - "offset=0x%llx, length=0x%llx\n", - _LLU(ios->per_dev[i].offset), - _LLU(ios->per_dev[i].length)); - - continue; /* we recovered */ - } - - if (osi.osd_err_pri >= acumulated_osd_err) { - acumulated_osd_err = osi.osd_err_pri; - acumulated_lin_err = ret; - } - } - - /* TODO: raid specific residual calculations */ - if (resid) { - if (likely(!acumulated_lin_err)) - *resid = 0; - else - *resid = ios->length; - } - - return acumulated_lin_err; -} - -/* - * L - logical offset into the file - * - * U - The number of bytes in a stripe within a group - * - * U = stripe_unit * group_width - * - * T - The number of bytes striped within a group of component objects - * (before advancing to the next group) - * - * T = stripe_unit * group_width * group_depth - * - * S - The number of bytes striped across all component objects - * before the pattern repeats - * - * S = stripe_unit * group_width * group_depth * group_count - * - * M - The "major" (i.e., across all components) stripe number - * - * M = L / S - * - * G - Counts the groups from the beginning of the major stripe - * - * G = (L - (M * S)) / T [or (L % S) / T] - * - * H - The byte offset within the group - * - * H = (L - (M * S)) % T [or (L % S) % T] - * - * N - The "minor" (i.e., across the group) stripe number - * - * N = H / U - * - * C - The component index coresponding to L - * - * C = (H - (N * U)) / stripe_unit + G * group_width - * [or (L % U) / stripe_unit + G * group_width] - * - * O - The component offset coresponding to L - * - * O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit - */ -struct _striping_info { - u64 obj_offset; - u64 group_length; - u64 M; /* for truncate */ - unsigned dev; - unsigned unit_off; -}; - -static void _calc_stripe_info(struct exofs_layout *layout, u64 file_offset, - struct _striping_info *si) -{ - u32 stripe_unit = layout->stripe_unit; - u32 group_width = layout->group_width; - u64 group_depth = layout->group_depth; - - u32 U = stripe_unit * group_width; - u64 T = U * group_depth; - u64 S = T * layout->group_count; - u64 M = div64_u64(file_offset, S); - - /* - G = (L - (M * S)) / T - H = (L - (M * S)) % T - */ - u64 LmodS = file_offset - M * S; - u32 G = div64_u64(LmodS, T); - u64 H = LmodS - G * T; - - u32 N = div_u64(H, U); - - /* "H - (N * U)" is just "H % U" so it's bound to u32 */ - si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; - si->dev *= layout->mirrors_p1; - - div_u64_rem(file_offset, stripe_unit, &si->unit_off); - - si->obj_offset = si->unit_off + (N * stripe_unit) + - (M * group_depth * stripe_unit); - - si->group_length = T - H; - si->M = M; -} - -static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, - unsigned pgbase, struct exofs_per_dev_state *per_dev, - int cur_len) -{ - unsigned pg = *cur_pg; - struct request_queue *q = - osd_request_queue(_ios_od(ios, per_dev->dev)); - - per_dev->length += cur_len; - - if (per_dev->bio == NULL) { - unsigned pages_in_stripe = ios->layout->group_width * - (ios->layout->stripe_unit / PAGE_SIZE); - unsigned bio_size = (ios->nr_pages + pages_in_stripe) / - ios->layout->group_width; - - per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); - if (unlikely(!per_dev->bio)) { - EXOFS_DBGMSG("Failed to allocate BIO size=%u\n", - bio_size); - return -ENOMEM; - } - } - - while (cur_len > 0) { - unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); - unsigned added_len; - - BUG_ON(ios->nr_pages <= pg); - cur_len -= pglen; - - added_len = bio_add_pc_page(q, per_dev->bio, ios->pages[pg], - pglen, pgbase); - if (unlikely(pglen != added_len)) - return -ENOMEM; - pgbase = 0; - ++pg; - } - BUG_ON(cur_len); - - *cur_pg = pg; - return 0; -} - -static int _prepare_one_group(struct exofs_io_state *ios, u64 length, - struct _striping_info *si) -{ - unsigned stripe_unit = ios->layout->stripe_unit; - unsigned mirrors_p1 = ios->layout->mirrors_p1; - unsigned devs_in_group = ios->layout->group_width * mirrors_p1; - unsigned dev = si->dev; - unsigned first_dev = dev - (dev % devs_in_group); - unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; - unsigned cur_pg = ios->pages_consumed; - int ret = 0; - - while (length) { - struct exofs_per_dev_state *per_dev = &ios->per_dev[dev]; - unsigned cur_len, page_off = 0; - - if (!per_dev->length) { - per_dev->dev = dev; - if (dev < si->dev) { - per_dev->offset = si->obj_offset + stripe_unit - - si->unit_off; - cur_len = stripe_unit; - } else if (dev == si->dev) { - per_dev->offset = si->obj_offset; - cur_len = stripe_unit - si->unit_off; - page_off = si->unit_off & ~PAGE_MASK; - BUG_ON(page_off && (page_off != ios->pgbase)); - } else { /* dev > si->dev */ - per_dev->offset = si->obj_offset - si->unit_off; - cur_len = stripe_unit; - } - - if (max_comp < dev) - max_comp = dev; - } else { - cur_len = stripe_unit; - } - if (cur_len >= length) - cur_len = length; - - ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev, - cur_len); - if (unlikely(ret)) - goto out; - - dev += mirrors_p1; - dev = (dev % devs_in_group) + first_dev; - - length -= cur_len; - } -out: - ios->numdevs = max_comp + mirrors_p1; - ios->pages_consumed = cur_pg; - return ret; -} - -static int _prepare_for_striping(struct exofs_io_state *ios) -{ - u64 length = ios->length; - u64 offset = ios->offset; - struct _striping_info si; - int ret = 0; - - if (!ios->pages) { - if (ios->kern_buff) { - struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; - - _calc_stripe_info(ios->layout, ios->offset, &si); - per_dev->offset = si.obj_offset; - per_dev->dev = si.dev; - - /* no cross device without page array */ - BUG_ON((ios->layout->group_width > 1) && - (si.unit_off + ios->length > - ios->layout->stripe_unit)); - } - ios->numdevs = ios->layout->mirrors_p1; - return 0; - } - - while (length) { - _calc_stripe_info(ios->layout, offset, &si); - - if (length < si.group_length) - si.group_length = length; - - ret = _prepare_one_group(ios, si.group_length, &si); - if (unlikely(ret)) - goto out; - - offset += si.group_length; - length -= si.group_length; - } - -out: - return ret; -} - -int exofs_sbi_create(struct exofs_io_state *ios) -{ - int i, ret; - - for (i = 0; i < ios->comps->numdevs; i++) { - struct osd_request *or; - - or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); - if (unlikely(!or)) { - EXOFS_ERR("%s: osd_start_request failed\n", __func__); - ret = -ENOMEM; - goto out; - } - ios->per_dev[i].or = or; - ios->numdevs++; - - osd_req_create_object(or, _ios_obj(ios, i)); - } - ret = exofs_io_execute(ios); - -out: - return ret; -} - -int exofs_sbi_remove(struct exofs_io_state *ios) -{ - int i, ret; - - for (i = 0; i < ios->comps->numdevs; i++) { - struct osd_request *or; - - or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); - if (unlikely(!or)) { - EXOFS_ERR("%s: osd_start_request failed\n", __func__); - ret = -ENOMEM; - goto out; - } - ios->per_dev[i].or = or; - ios->numdevs++; - - osd_req_remove_object(or, _ios_obj(ios, i)); - } - ret = exofs_io_execute(ios); - -out: - return ret; -} - -static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) -{ - struct exofs_per_dev_state *master_dev = &ios->per_dev[cur_comp]; - unsigned dev = ios->per_dev[cur_comp].dev; - unsigned last_comp = cur_comp + ios->layout->mirrors_p1; - int ret = 0; - - if (ios->pages && !master_dev->length) - return 0; /* Just an empty slot */ - - for (; cur_comp < last_comp; ++cur_comp, ++dev) { - struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; - struct osd_request *or; - - or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL); - if (unlikely(!or)) { - EXOFS_ERR("%s: osd_start_request failed\n", __func__); - ret = -ENOMEM; - goto out; - } - per_dev->or = or; - per_dev->offset = master_dev->offset; - - if (ios->pages) { - struct bio *bio; - - if (per_dev != master_dev) { - bio = bio_kmalloc(GFP_KERNEL, - master_dev->bio->bi_max_vecs); - if (unlikely(!bio)) { - EXOFS_DBGMSG( - "Failed to allocate BIO size=%u\n", - master_dev->bio->bi_max_vecs); - ret = -ENOMEM; - goto out; - } - - __bio_clone(bio, master_dev->bio); - bio->bi_bdev = NULL; - bio->bi_next = NULL; - per_dev->length = master_dev->length; - per_dev->bio = bio; - per_dev->dev = dev; - } else { - bio = master_dev->bio; - /* FIXME: bio_set_dir() */ - bio->bi_rw |= REQ_WRITE; - } - - osd_req_write(or, _ios_obj(ios, dev), per_dev->offset, - bio, per_dev->length); - EXOFS_DBGMSG("write(0x%llx) offset=0x%llx " - "length=0x%llx dev=%d\n", - _LLU(_ios_obj(ios, dev)->id), - _LLU(per_dev->offset), - _LLU(per_dev->length), dev); - } else if (ios->kern_buff) { - ret = osd_req_write_kern(or, _ios_obj(ios, dev), - per_dev->offset, - ios->kern_buff, ios->length); - if (unlikely(ret)) - goto out; - EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx " - "length=0x%llx dev=%d\n", - _LLU(_ios_obj(ios, dev)->id), - _LLU(per_dev->offset), - _LLU(ios->length), dev); - } else { - osd_req_set_attributes(or, _ios_obj(ios, dev)); - EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", - _LLU(_ios_obj(ios, dev)->id), - ios->out_attr_len, dev); - } - - if (ios->out_attr) - osd_req_add_set_attr_list(or, ios->out_attr, - ios->out_attr_len); - - if (ios->in_attr) - osd_req_add_get_attr_list(or, ios->in_attr, - ios->in_attr_len); - } - -out: - return ret; -} - -int exofs_sbi_write(struct exofs_io_state *ios) -{ - int i; - int ret; - - ret = _prepare_for_striping(ios); - if (unlikely(ret)) - return ret; - - for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { - ret = _sbi_write_mirror(ios, i); - if (unlikely(ret)) - return ret; - } - - ret = exofs_io_execute(ios); - return ret; -} - -static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp) -{ - struct osd_request *or; - struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; - struct osd_obj_id *obj = _ios_obj(ios, cur_comp); - unsigned first_dev = (unsigned)obj->id; - - if (ios->pages && !per_dev->length) - return 0; /* Just an empty slot */ - - first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; - or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL); - if (unlikely(!or)) { - EXOFS_ERR("%s: osd_start_request failed\n", __func__); - return -ENOMEM; - } - per_dev->or = or; - - if (ios->pages) { - osd_req_read(or, obj, per_dev->offset, - per_dev->bio, per_dev->length); - EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" - " dev=%d\n", _LLU(obj->id), - _LLU(per_dev->offset), _LLU(per_dev->length), - first_dev); - } else if (ios->kern_buff) { - int ret = osd_req_read_kern(or, obj, per_dev->offset, - ios->kern_buff, ios->length); - EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx " - "length=0x%llx dev=%d ret=>%d\n", - _LLU(obj->id), _LLU(per_dev->offset), - _LLU(ios->length), first_dev, ret); - if (unlikely(ret)) - return ret; - } else { - osd_req_get_attributes(or, obj); - EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", - _LLU(obj->id), - ios->in_attr_len, first_dev); - } - if (ios->out_attr) - osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len); - - if (ios->in_attr) - osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len); - - return 0; -} - -int exofs_sbi_read(struct exofs_io_state *ios) -{ - int i; - int ret; - - ret = _prepare_for_striping(ios); - if (unlikely(ret)) - return ret; - - for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { - ret = _sbi_read_mirror(ios, i); - if (unlikely(ret)) - return ret; - } - - ret = exofs_io_execute(ios); - return ret; -} - -int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) -{ - struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ - void *iter = NULL; - int nelem; - - do { - nelem = 1; - osd_req_decode_get_attr_list(ios->per_dev[0].or, - &cur_attr, &nelem, &iter); - if ((cur_attr.attr_page == attr->attr_page) && - (cur_attr.attr_id == attr->attr_id)) { - attr->len = cur_attr.len; - attr->val_ptr = cur_attr.val_ptr; - return 0; - } - } while (iter); - - return -EIO; -} - -static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp, - struct osd_attr *attr) -{ - int last_comp = cur_comp + ios->layout->mirrors_p1; - - for (; cur_comp < last_comp; ++cur_comp) { - struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; - struct osd_request *or; - - or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL); - if (unlikely(!or)) { - EXOFS_ERR("%s: osd_start_request failed\n", __func__); - return -ENOMEM; - } - per_dev->or = or; - - osd_req_set_attributes(or, _ios_obj(ios, cur_comp)); - osd_req_add_set_attr_list(or, attr, 1); - } - - return 0; -} - -struct _trunc_info { - struct _striping_info si; - u64 prev_group_obj_off; - u64 next_group_obj_off; - - unsigned first_group_dev; - unsigned nex_group_dev; - unsigned max_devs; -}; - -void _calc_trunk_info(struct exofs_layout *layout, u64 file_offset, - struct _trunc_info *ti) -{ - unsigned stripe_unit = layout->stripe_unit; - - _calc_stripe_info(layout, file_offset, &ti->si); - - ti->prev_group_obj_off = ti->si.M * stripe_unit; - ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0; - - ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width); - ti->nex_group_dev = ti->first_group_dev + layout->group_width; - ti->max_devs = layout->group_width * layout->group_count; -} - -int exofs_truncate(struct exofs_layout *layout, struct exofs_components *comps, - u64 size) -{ - struct exofs_io_state *ios; - struct exofs_trunc_attr { - struct osd_attr attr; - __be64 newsize; - } *size_attrs; - struct _trunc_info ti; - int i, ret; - - ret = exofs_get_io_state(layout, comps, &ios); - if (unlikely(ret)) - return ret; - - _calc_trunk_info(ios->layout, size, &ti); - - size_attrs = kcalloc(ti.max_devs, sizeof(*size_attrs), - GFP_KERNEL); - if (unlikely(!size_attrs)) { - ret = -ENOMEM; - goto out; - } - - ios->numdevs = ios->comps->numdevs; - - for (i = 0; i < ti.max_devs; ++i) { - struct exofs_trunc_attr *size_attr = &size_attrs[i]; - u64 obj_size; - - if (i < ti.first_group_dev) - obj_size = ti.prev_group_obj_off; - else if (i >= ti.nex_group_dev) - obj_size = ti.next_group_obj_off; - else if (i < ti.si.dev) /* dev within this group */ - obj_size = ti.si.obj_offset + - ios->layout->stripe_unit - ti.si.unit_off; - else if (i == ti.si.dev) - obj_size = ti.si.obj_offset; - else /* i > ti.dev */ - obj_size = ti.si.obj_offset - ti.si.unit_off; - - size_attr->newsize = cpu_to_be64(obj_size); - size_attr->attr = g_attr_logical_length; - size_attr->attr.val_ptr = &size_attr->newsize; - - EXOFS_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", - _LLU(comps->comps->obj.id), _LLU(obj_size), i); - ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, - &size_attr->attr); - if (unlikely(ret)) - goto out; - } - ret = exofs_io_execute(ios); - -out: - kfree(size_attrs); - exofs_put_io_state(ios); - return ret; -} - -const struct osd_attr g_attr_logical_length = ATTR_DEF( - OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c new file mode 100644 index 0000000..3a00008 --- /dev/null +++ b/fs/exofs/ore.c @@ -0,0 +1,820 @@ +/* + * Copyright (C) 2005, 2006 + * Avishay Traeger (avishay@gmail.com) + * Copyright (C) 2008, 2009 + * Boaz Harrosh + * + * This file is part of exofs. + * + * exofs is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. Since it is based on ext2, and the only + * valid version of GPL for the Linux kernel is version 2, the only valid + * version of GPL for exofs is version 2. + * + * exofs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with exofs; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include + +#include + +#define ORE_ERR(fmt, a...) printk(KERN_ERR "ore: " fmt, ##a) + +#ifdef CONFIG_EXOFS_DEBUG +#define ORE_DBGMSG(fmt, a...) \ + printk(KERN_NOTICE "ore @%s:%d: " fmt, __func__, __LINE__, ##a) +#else +#define ORE_DBGMSG(fmt, a...) \ + do { if (0) printk(fmt, ##a); } while (0) +#endif + +/* u64 has problems with printk this will cast it to unsigned long long */ +#define _LLU(x) (unsigned long long)(x) + +#define ORE_DBGMSG2(M...) do {} while (0) +/* #define ORE_DBGMSG2 ORE_DBGMSG */ + +static u8 *_ios_cred(struct ore_io_state *ios, unsigned index) +{ + return ios->comps->comps[index & ios->comps->single_comp].cred; +} + +static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index) +{ + return &ios->comps->comps[index & ios->comps->single_comp].obj; +} + +static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index) +{ + return ios->comps->ods[index]; +} + +int ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps, + bool is_reading, u64 offset, u64 length, + struct ore_io_state **pios) +{ + struct ore_io_state *ios; + + /*TODO: Maybe use kmem_cach per sbi of size + * exofs_io_state_size(layout->s_numdevs) + */ + ios = kzalloc(ore_io_state_size(comps->numdevs), GFP_KERNEL); + if (unlikely(!ios)) { + ORE_DBGMSG("Failed kzalloc bytes=%d\n", + ore_io_state_size(comps->numdevs)); + *pios = NULL; + return -ENOMEM; + } + + ios->layout = layout; + ios->comps = comps; + ios->offset = offset; + ios->length = length; + ios->reading = is_reading; + + *pios = ios; + return 0; +} + +int ore_get_io_state(struct ore_layout *layout, struct ore_components *comps, + struct ore_io_state **ios) +{ + return ore_get_rw_state(layout, comps, true, 0, 0, ios); +} + +void ore_put_io_state(struct ore_io_state *ios) +{ + if (ios) { + unsigned i; + + for (i = 0; i < ios->numdevs; i++) { + struct ore_per_dev_state *per_dev = &ios->per_dev[i]; + + if (per_dev->or) + osd_end_request(per_dev->or); + if (per_dev->bio) + bio_put(per_dev->bio); + } + + kfree(ios); + } +} + +static void _sync_done(struct ore_io_state *ios, void *p) +{ + struct completion *waiting = p; + + complete(waiting); +} + +static void _last_io(struct kref *kref) +{ + struct ore_io_state *ios = container_of( + kref, struct ore_io_state, kref); + + ios->done(ios, ios->private); +} + +static void _done_io(struct osd_request *or, void *p) +{ + struct ore_io_state *ios = p; + + kref_put(&ios->kref, _last_io); +} + +static int ore_io_execute(struct ore_io_state *ios) +{ + DECLARE_COMPLETION_ONSTACK(wait); + bool sync = (ios->done == NULL); + int i, ret; + + if (sync) { + ios->done = _sync_done; + ios->private = &wait; + } + + for (i = 0; i < ios->numdevs; i++) { + struct osd_request *or = ios->per_dev[i].or; + if (unlikely(!or)) + continue; + + ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL); + if (unlikely(ret)) { + ORE_DBGMSG("Failed to osd_finalize_request() => %d\n", + ret); + return ret; + } + } + + kref_init(&ios->kref); + + for (i = 0; i < ios->numdevs; i++) { + struct osd_request *or = ios->per_dev[i].or; + if (unlikely(!or)) + continue; + + kref_get(&ios->kref); + osd_execute_request_async(or, _done_io, ios); + } + + kref_put(&ios->kref, _last_io); + ret = 0; + + if (sync) { + wait_for_completion(&wait); + ret = ore_check_io(ios, NULL); + } + return ret; +} + +static void _clear_bio(struct bio *bio) +{ + struct bio_vec *bv; + unsigned i; + + __bio_for_each_segment(bv, bio, i, 0) { + unsigned this_count = bv->bv_len; + + if (likely(PAGE_SIZE == this_count)) + clear_highpage(bv->bv_page); + else + zero_user(bv->bv_page, bv->bv_offset, this_count); + } +} + +int ore_check_io(struct ore_io_state *ios, u64 *resid) +{ + enum osd_err_priority acumulated_osd_err = 0; + int acumulated_lin_err = 0; + int i; + + for (i = 0; i < ios->numdevs; i++) { + struct osd_sense_info osi; + struct osd_request *or = ios->per_dev[i].or; + int ret; + + if (unlikely(!or)) + continue; + + ret = osd_req_decode_sense(or, &osi); + if (likely(!ret)) + continue; + + if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { + /* start read offset passed endof file */ + _clear_bio(ios->per_dev[i].bio); + ORE_DBGMSG("start read offset passed end of file " + "offset=0x%llx, length=0x%llx\n", + _LLU(ios->per_dev[i].offset), + _LLU(ios->per_dev[i].length)); + + continue; /* we recovered */ + } + + if (osi.osd_err_pri >= acumulated_osd_err) { + acumulated_osd_err = osi.osd_err_pri; + acumulated_lin_err = ret; + } + } + + /* TODO: raid specific residual calculations */ + if (resid) { + if (likely(!acumulated_lin_err)) + *resid = 0; + else + *resid = ios->length; + } + + return acumulated_lin_err; +} + +/* + * L - logical offset into the file + * + * U - The number of bytes in a stripe within a group + * + * U = stripe_unit * group_width + * + * T - The number of bytes striped within a group of component objects + * (before advancing to the next group) + * + * T = stripe_unit * group_width * group_depth + * + * S - The number of bytes striped across all component objects + * before the pattern repeats + * + * S = stripe_unit * group_width * group_depth * group_count + * + * M - The "major" (i.e., across all components) stripe number + * + * M = L / S + * + * G - Counts the groups from the beginning of the major stripe + * + * G = (L - (M * S)) / T [or (L % S) / T] + * + * H - The byte offset within the group + * + * H = (L - (M * S)) % T [or (L % S) % T] + * + * N - The "minor" (i.e., across the group) stripe number + * + * N = H / U + * + * C - The component index coresponding to L + * + * C = (H - (N * U)) / stripe_unit + G * group_width + * [or (L % U) / stripe_unit + G * group_width] + * + * O - The component offset coresponding to L + * + * O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit + */ +struct _striping_info { + u64 obj_offset; + u64 group_length; + u64 M; /* for truncate */ + unsigned dev; + unsigned unit_off; +}; + +static void _calc_stripe_info(struct ore_layout *layout, u64 file_offset, + struct _striping_info *si) +{ + u32 stripe_unit = layout->stripe_unit; + u32 group_width = layout->group_width; + u64 group_depth = layout->group_depth; + + u32 U = stripe_unit * group_width; + u64 T = U * group_depth; + u64 S = T * layout->group_count; + u64 M = div64_u64(file_offset, S); + + /* + G = (L - (M * S)) / T + H = (L - (M * S)) % T + */ + u64 LmodS = file_offset - M * S; + u32 G = div64_u64(LmodS, T); + u64 H = LmodS - G * T; + + u32 N = div_u64(H, U); + + /* "H - (N * U)" is just "H % U" so it's bound to u32 */ + si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; + si->dev *= layout->mirrors_p1; + + div_u64_rem(file_offset, stripe_unit, &si->unit_off); + + si->obj_offset = si->unit_off + (N * stripe_unit) + + (M * group_depth * stripe_unit); + + si->group_length = T - H; + si->M = M; +} + +static int _add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg, + unsigned pgbase, struct ore_per_dev_state *per_dev, + int cur_len) +{ + unsigned pg = *cur_pg; + struct request_queue *q = + osd_request_queue(_ios_od(ios, per_dev->dev)); + + per_dev->length += cur_len; + + if (per_dev->bio == NULL) { + unsigned pages_in_stripe = ios->layout->group_width * + (ios->layout->stripe_unit / PAGE_SIZE); + unsigned bio_size = (ios->nr_pages + pages_in_stripe) / + ios->layout->group_width; + + per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); + if (unlikely(!per_dev->bio)) { + ORE_DBGMSG("Failed to allocate BIO size=%u\n", + bio_size); + return -ENOMEM; + } + } + + while (cur_len > 0) { + unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); + unsigned added_len; + + BUG_ON(ios->nr_pages <= pg); + cur_len -= pglen; + + added_len = bio_add_pc_page(q, per_dev->bio, ios->pages[pg], + pglen, pgbase); + if (unlikely(pglen != added_len)) + return -ENOMEM; + pgbase = 0; + ++pg; + } + BUG_ON(cur_len); + + *cur_pg = pg; + return 0; +} + +static int _prepare_one_group(struct ore_io_state *ios, u64 length, + struct _striping_info *si) +{ + unsigned stripe_unit = ios->layout->stripe_unit; + unsigned mirrors_p1 = ios->layout->mirrors_p1; + unsigned devs_in_group = ios->layout->group_width * mirrors_p1; + unsigned dev = si->dev; + unsigned first_dev = dev - (dev % devs_in_group); + unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; + unsigned cur_pg = ios->pages_consumed; + int ret = 0; + + while (length) { + struct ore_per_dev_state *per_dev = &ios->per_dev[dev]; + unsigned cur_len, page_off = 0; + + if (!per_dev->length) { + per_dev->dev = dev; + if (dev < si->dev) { + per_dev->offset = si->obj_offset + stripe_unit - + si->unit_off; + cur_len = stripe_unit; + } else if (dev == si->dev) { + per_dev->offset = si->obj_offset; + cur_len = stripe_unit - si->unit_off; + page_off = si->unit_off & ~PAGE_MASK; + BUG_ON(page_off && (page_off != ios->pgbase)); + } else { /* dev > si->dev */ + per_dev->offset = si->obj_offset - si->unit_off; + cur_len = stripe_unit; + } + + if (max_comp < dev) + max_comp = dev; + } else { + cur_len = stripe_unit; + } + if (cur_len >= length) + cur_len = length; + + ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev, + cur_len); + if (unlikely(ret)) + goto out; + + dev += mirrors_p1; + dev = (dev % devs_in_group) + first_dev; + + length -= cur_len; + } +out: + ios->numdevs = max_comp + mirrors_p1; + ios->pages_consumed = cur_pg; + return ret; +} + +static int _prepare_for_striping(struct ore_io_state *ios) +{ + u64 length = ios->length; + u64 offset = ios->offset; + struct _striping_info si; + int ret = 0; + + if (!ios->pages) { + if (ios->kern_buff) { + struct ore_per_dev_state *per_dev = &ios->per_dev[0]; + + _calc_stripe_info(ios->layout, ios->offset, &si); + per_dev->offset = si.obj_offset; + per_dev->dev = si.dev; + + /* no cross device without page array */ + BUG_ON((ios->layout->group_width > 1) && + (si.unit_off + ios->length > + ios->layout->stripe_unit)); + } + ios->numdevs = ios->layout->mirrors_p1; + return 0; + } + + while (length) { + _calc_stripe_info(ios->layout, offset, &si); + + if (length < si.group_length) + si.group_length = length; + + ret = _prepare_one_group(ios, si.group_length, &si); + if (unlikely(ret)) + goto out; + + offset += si.group_length; + length -= si.group_length; + } + +out: + return ret; +} + +int ore_create(struct ore_io_state *ios) +{ + int i, ret; + + for (i = 0; i < ios->comps->numdevs; i++) { + struct osd_request *or; + + or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); + if (unlikely(!or)) { + ORE_ERR("%s: osd_start_request failed\n", __func__); + ret = -ENOMEM; + goto out; + } + ios->per_dev[i].or = or; + ios->numdevs++; + + osd_req_create_object(or, _ios_obj(ios, i)); + } + ret = ore_io_execute(ios); + +out: + return ret; +} + +int ore_remove(struct ore_io_state *ios) +{ + int i, ret; + + for (i = 0; i < ios->comps->numdevs; i++) { + struct osd_request *or; + + or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); + if (unlikely(!or)) { + ORE_ERR("%s: osd_start_request failed\n", __func__); + ret = -ENOMEM; + goto out; + } + ios->per_dev[i].or = or; + ios->numdevs++; + + osd_req_remove_object(or, _ios_obj(ios, i)); + } + ret = ore_io_execute(ios); + +out: + return ret; +} + +static int _write_mirror(struct ore_io_state *ios, int cur_comp) +{ + struct ore_per_dev_state *master_dev = &ios->per_dev[cur_comp]; + unsigned dev = ios->per_dev[cur_comp].dev; + unsigned last_comp = cur_comp + ios->layout->mirrors_p1; + int ret = 0; + + if (ios->pages && !master_dev->length) + return 0; /* Just an empty slot */ + + for (; cur_comp < last_comp; ++cur_comp, ++dev) { + struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; + struct osd_request *or; + + or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL); + if (unlikely(!or)) { + ORE_ERR("%s: osd_start_request failed\n", __func__); + ret = -ENOMEM; + goto out; + } + per_dev->or = or; + per_dev->offset = master_dev->offset; + + if (ios->pages) { + struct bio *bio; + + if (per_dev != master_dev) { + bio = bio_kmalloc(GFP_KERNEL, + master_dev->bio->bi_max_vecs); + if (unlikely(!bio)) { + ORE_DBGMSG( + "Failed to allocate BIO size=%u\n", + master_dev->bio->bi_max_vecs); + ret = -ENOMEM; + goto out; + } + + __bio_clone(bio, master_dev->bio); + bio->bi_bdev = NULL; + bio->bi_next = NULL; + per_dev->length = master_dev->length; + per_dev->bio = bio; + per_dev->dev = dev; + } else { + bio = master_dev->bio; + /* FIXME: bio_set_dir() */ + bio->bi_rw |= REQ_WRITE; + } + + osd_req_write(or, _ios_obj(ios, dev), per_dev->offset, + bio, per_dev->length); + ORE_DBGMSG("write(0x%llx) offset=0x%llx " + "length=0x%llx dev=%d\n", + _LLU(_ios_obj(ios, dev)->id), + _LLU(per_dev->offset), + _LLU(per_dev->length), dev); + } else if (ios->kern_buff) { + ret = osd_req_write_kern(or, _ios_obj(ios, dev), + per_dev->offset, + ios->kern_buff, ios->length); + if (unlikely(ret)) + goto out; + ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx " + "length=0x%llx dev=%d\n", + _LLU(_ios_obj(ios, dev)->id), + _LLU(per_dev->offset), + _LLU(ios->length), dev); + } else { + osd_req_set_attributes(or, _ios_obj(ios, dev)); + ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", + _LLU(_ios_obj(ios, dev)->id), + ios->out_attr_len, dev); + } + + if (ios->out_attr) + osd_req_add_set_attr_list(or, ios->out_attr, + ios->out_attr_len); + + if (ios->in_attr) + osd_req_add_get_attr_list(or, ios->in_attr, + ios->in_attr_len); + } + +out: + return ret; +} + +int ore_write(struct ore_io_state *ios) +{ + int i; + int ret; + + ret = _prepare_for_striping(ios); + if (unlikely(ret)) + return ret; + + for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { + ret = _write_mirror(ios, i); + if (unlikely(ret)) + return ret; + } + + ret = ore_io_execute(ios); + return ret; +} + +static int _read_mirror(struct ore_io_state *ios, unsigned cur_comp) +{ + struct osd_request *or; + struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; + struct osd_obj_id *obj = _ios_obj(ios, cur_comp); + unsigned first_dev = (unsigned)obj->id; + + if (ios->pages && !per_dev->length) + return 0; /* Just an empty slot */ + + first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; + or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL); + if (unlikely(!or)) { + ORE_ERR("%s: osd_start_request failed\n", __func__); + return -ENOMEM; + } + per_dev->or = or; + + if (ios->pages) { + osd_req_read(or, obj, per_dev->offset, + per_dev->bio, per_dev->length); + ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" + " dev=%d\n", _LLU(obj->id), + _LLU(per_dev->offset), _LLU(per_dev->length), + first_dev); + } else if (ios->kern_buff) { + int ret = osd_req_read_kern(or, obj, per_dev->offset, + ios->kern_buff, ios->length); + ORE_DBGMSG2("read_kern(0x%llx) offset=0x%llx " + "length=0x%llx dev=%d ret=>%d\n", + _LLU(obj->id), _LLU(per_dev->offset), + _LLU(ios->length), first_dev, ret); + if (unlikely(ret)) + return ret; + } else { + osd_req_get_attributes(or, obj); + ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", + _LLU(obj->id), + ios->in_attr_len, first_dev); + } + if (ios->out_attr) + osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len); + + if (ios->in_attr) + osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len); + + return 0; +} + +int ore_read(struct ore_io_state *ios) +{ + int i; + int ret; + + ret = _prepare_for_striping(ios); + if (unlikely(ret)) + return ret; + + for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { + ret = _read_mirror(ios, i); + if (unlikely(ret)) + return ret; + } + + ret = ore_io_execute(ios); + return ret; +} + +int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr) +{ + struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ + void *iter = NULL; + int nelem; + + do { + nelem = 1; + osd_req_decode_get_attr_list(ios->per_dev[0].or, + &cur_attr, &nelem, &iter); + if ((cur_attr.attr_page == attr->attr_page) && + (cur_attr.attr_id == attr->attr_id)) { + attr->len = cur_attr.len; + attr->val_ptr = cur_attr.val_ptr; + return 0; + } + } while (iter); + + return -EIO; +} + +static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp, + struct osd_attr *attr) +{ + int last_comp = cur_comp + ios->layout->mirrors_p1; + + for (; cur_comp < last_comp; ++cur_comp) { + struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; + struct osd_request *or; + + or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL); + if (unlikely(!or)) { + ORE_ERR("%s: osd_start_request failed\n", __func__); + return -ENOMEM; + } + per_dev->or = or; + + osd_req_set_attributes(or, _ios_obj(ios, cur_comp)); + osd_req_add_set_attr_list(or, attr, 1); + } + + return 0; +} + +struct _trunc_info { + struct _striping_info si; + u64 prev_group_obj_off; + u64 next_group_obj_off; + + unsigned first_group_dev; + unsigned nex_group_dev; + unsigned max_devs; +}; + +void _calc_trunk_info(struct ore_layout *layout, u64 file_offset, + struct _trunc_info *ti) +{ + unsigned stripe_unit = layout->stripe_unit; + + _calc_stripe_info(layout, file_offset, &ti->si); + + ti->prev_group_obj_off = ti->si.M * stripe_unit; + ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0; + + ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width); + ti->nex_group_dev = ti->first_group_dev + layout->group_width; + ti->max_devs = layout->group_width * layout->group_count; +} + +int ore_truncate(struct ore_layout *layout, struct ore_components *comps, + u64 size) +{ + struct ore_io_state *ios; + struct exofs_trunc_attr { + struct osd_attr attr; + __be64 newsize; + } *size_attrs; + struct _trunc_info ti; + int i, ret; + + ret = ore_get_io_state(layout, comps, &ios); + if (unlikely(ret)) + return ret; + + _calc_trunk_info(ios->layout, size, &ti); + + size_attrs = kcalloc(ti.max_devs, sizeof(*size_attrs), + GFP_KERNEL); + if (unlikely(!size_attrs)) { + ret = -ENOMEM; + goto out; + } + + ios->numdevs = ios->comps->numdevs; + + for (i = 0; i < ti.max_devs; ++i) { + struct exofs_trunc_attr *size_attr = &size_attrs[i]; + u64 obj_size; + + if (i < ti.first_group_dev) + obj_size = ti.prev_group_obj_off; + else if (i >= ti.nex_group_dev) + obj_size = ti.next_group_obj_off; + else if (i < ti.si.dev) /* dev within this group */ + obj_size = ti.si.obj_offset + + ios->layout->stripe_unit - ti.si.unit_off; + else if (i == ti.si.dev) + obj_size = ti.si.obj_offset; + else /* i > ti.dev */ + obj_size = ti.si.obj_offset - ti.si.unit_off; + + size_attr->newsize = cpu_to_be64(obj_size); + size_attr->attr = g_attr_logical_length; + size_attr->attr.val_ptr = &size_attr->newsize; + + ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", + _LLU(comps->comps->obj.id), _LLU(obj_size), i); + ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, + &size_attr->attr); + if (unlikely(ret)) + goto out; + } + ret = ore_io_execute(ios); + +out: + kfree(size_attrs); + ore_put_io_state(ios); + return ret; +} + +const struct osd_attr g_attr_logical_length = ATTR_DEF( + OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 4d68779..2748940 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -263,19 +263,19 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi) struct osd_attr attrs[] = { [0] = g_attr_sb_stats, }; - struct exofs_io_state *ios; + struct ore_io_state *ios; int ret; - ret = exofs_get_io_state(&sbi->layout, &sbi->comps, &ios); + ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios); if (unlikely(ret)) { - EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); + EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); return ret; } ios->in_attr = attrs; ios->in_attr_len = ARRAY_SIZE(attrs); - ret = exofs_sbi_read(ios); + ret = ore_read(ios); if (unlikely(ret)) { EXOFS_ERR("Error reading super_block stats => %d\n", ret); goto out; @@ -302,13 +302,13 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi) } out: - exofs_put_io_state(ios); + ore_put_io_state(ios); return ret; } -static void stats_done(struct exofs_io_state *ios, void *p) +static void stats_done(struct ore_io_state *ios, void *p) { - exofs_put_io_state(ios); + ore_put_io_state(ios); /* Good thanks nothing to do anymore */ } @@ -318,12 +318,12 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi) struct osd_attr attrs[] = { [0] = g_attr_sb_stats, }; - struct exofs_io_state *ios; + struct ore_io_state *ios; int ret; - ret = exofs_get_io_state(&sbi->layout, &sbi->comps, &ios); + ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios); if (unlikely(ret)) { - EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); + EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); return ret; } @@ -337,10 +337,10 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi) ios->out_attr = attrs; ios->out_attr_len = ARRAY_SIZE(attrs); - ret = exofs_sbi_write(ios); + ret = ore_write(ios); if (unlikely(ret)) { - EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); - exofs_put_io_state(ios); + EXOFS_ERR("%s: ore_write failed.\n", __func__); + ore_put_io_state(ios); } return ret; @@ -359,9 +359,9 @@ int exofs_sync_fs(struct super_block *sb, int wait) { struct exofs_sb_info *sbi; struct exofs_fscb *fscb; - struct exofs_comp one_comp; - struct exofs_components comps; - struct exofs_io_state *ios; + struct ore_comp one_comp; + struct ore_components comps; + struct ore_io_state *ios; int ret = -ENOMEM; fscb = kmalloc(sizeof(*fscb), GFP_KERNEL); @@ -380,7 +380,7 @@ int exofs_sync_fs(struct super_block *sb, int wait) exofs_init_comps(&comps, &one_comp, sbi, EXOFS_SUPER_ID); - ret = exofs_get_io_state(&sbi->layout, &comps, &ios); + ret = ore_get_io_state(&sbi->layout, &comps, &ios); if (unlikely(ret)) goto out; @@ -397,9 +397,9 @@ int exofs_sync_fs(struct super_block *sb, int wait) ios->offset = 0; ios->kern_buff = fscb; - ret = exofs_sbi_write(ios); + ret = ore_write(ios); if (unlikely(ret)) - EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); + EXOFS_ERR("%s: ore_write failed.\n", __func__); else sb->s_dirt = 0; @@ -407,7 +407,7 @@ int exofs_sync_fs(struct super_block *sb, int wait) unlock_super(sb); out: EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); - exofs_put_io_state(ios); + ore_put_io_state(ios); kfree(fscb); return ret; } @@ -562,7 +562,7 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, return 0; } -static unsigned __ra_pages(struct exofs_layout *layout) +static unsigned __ra_pages(struct ore_layout *layout) { const unsigned _MIN_RA = 32; /* min 128K read-ahead */ unsigned ra_pages = layout->group_width * layout->stripe_unit / @@ -609,7 +609,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, struct osd_dev *fscb_od, unsigned table_count) { - struct exofs_comp comp; + struct ore_comp comp; struct exofs_device_table *dt; unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + sizeof(*dt); @@ -747,7 +747,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) struct exofs_sb_info *sbi; /*extended info */ struct osd_dev *od; /* Master device */ struct exofs_fscb fscb; /*on-disk superblock info */ - struct exofs_comp comp; + struct ore_comp comp; unsigned table_count; int ret; @@ -913,7 +913,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct exofs_sb_info *sbi = sb->s_fs_info; - struct exofs_io_state *ios; + struct ore_io_state *ios; struct osd_attr attrs[] = { ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), @@ -924,16 +924,16 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) uint64_t used = ULLONG_MAX; int ret; - ret = exofs_get_io_state(&sbi->layout, &sbi->comps, &ios); + ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios); if (ret) { - EXOFS_DBGMSG("exofs_get_io_state failed.\n"); + EXOFS_DBGMSG("ore_get_io_state failed.\n"); return ret; } ios->in_attr = attrs; ios->in_attr_len = ARRAY_SIZE(attrs); - ret = exofs_sbi_read(ios); + ret = ore_read(ios); if (unlikely(ret)) goto out; @@ -962,7 +962,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = EXOFS_NAME_LEN; out: - exofs_put_io_state(ios); + ore_put_io_state(ios); return ret; } diff --git a/include/scsi/osd_ore.h b/include/scsi/osd_ore.h new file mode 100644 index 0000000..c5c5e00 --- /dev/null +++ b/include/scsi/osd_ore.h @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2011 + * Boaz Harrosh + * + * Public Declarations of the ORE API + * + * This file is part of the ORE (Object Raid Engine) library. + * + * ORE is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. (GPL v2) + * + * ORE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the ORE; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __ORE_H__ +#define __ORE_H__ + +#include +#include +#include +#include + +struct ore_comp { + struct osd_obj_id obj; + u8 cred[OSD_CAP_LEN]; +}; + +struct ore_layout { + /* Our way of looking at the data_map */ + unsigned stripe_unit; + unsigned mirrors_p1; + + unsigned group_width; + u64 group_depth; + unsigned group_count; +}; + +struct ore_components { + unsigned numdevs; /* Num of devices in array */ + /* If @single_comp == EC_SINGLE_COMP, @comps points to a single + * component. else there are @numdevs components + */ + enum EC_COMP_USAGE { + EC_SINGLE_COMP = 0, EC_MULTPLE_COMPS = 0xffffffff + } single_comp; + struct ore_comp *comps; + struct osd_dev **ods; /* osd_dev array */ +}; + +struct ore_io_state; +typedef void (*ore_io_done_fn)(struct ore_io_state *ios, void *private); + +struct ore_io_state { + struct kref kref; + + void *private; + ore_io_done_fn done; + + struct ore_layout *layout; + struct ore_components *comps; + + /* Global read/write IO*/ + loff_t offset; + unsigned long length; + void *kern_buff; + + struct page **pages; + unsigned nr_pages; + unsigned pgbase; + unsigned pages_consumed; + + /* Attributes */ + unsigned in_attr_len; + struct osd_attr *in_attr; + unsigned out_attr_len; + struct osd_attr *out_attr; + + bool reading; + + /* Variable array of size numdevs */ + unsigned numdevs; + struct ore_per_dev_state { + struct osd_request *or; + struct bio *bio; + loff_t offset; + unsigned length; + unsigned dev; + } per_dev[]; +}; + +static inline unsigned ore_io_state_size(unsigned numdevs) +{ + return sizeof(struct ore_io_state) + + sizeof(struct ore_per_dev_state) * numdevs; +} + +/* ore.c */ +int ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps, + bool is_reading, u64 offset, u64 length, + struct ore_io_state **ios); +int ore_get_io_state(struct ore_layout *layout, struct ore_components *comps, + struct ore_io_state **ios); +void ore_put_io_state(struct ore_io_state *ios); + +int ore_check_io(struct ore_io_state *ios, u64 *resid); + +int ore_create(struct ore_io_state *ios); +int ore_remove(struct ore_io_state *ios); +int ore_write(struct ore_io_state *ios); +int ore_read(struct ore_io_state *ios); +int ore_truncate(struct ore_layout *layout, struct ore_components *comps, + u64 size); + +int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr); + +extern const struct osd_attr g_attr_logical_length; + +#endif -- cgit v1.1 From cf283ade08c454e884394a4720f22421dd33a715 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sat, 6 Aug 2011 19:22:06 -0700 Subject: ore: Make ore its own module Export everything from ore need exporting. Change Kbuild and Kconfig to build ore.ko as an independent module. Import ore from exofs Signed-off-by: Boaz Harrosh --- fs/exofs/Kbuild | 5 ++++- fs/exofs/Kconfig | 4 ++++ fs/exofs/ore.c | 15 +++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild index c1af221..c5a5855 100644 --- a/fs/exofs/Kbuild +++ b/fs/exofs/Kbuild @@ -12,5 +12,8 @@ # Kbuild - Gets included from the Kernels Makefile and build system # -exofs-y := ore.o inode.o file.o symlink.o namei.o dir.o super.o +# ore module library +obj-$(CONFIG_ORE) += ore.o + +exofs-y := inode.o file.o symlink.o namei.o dir.o super.o obj-$(CONFIG_EXOFS_FS) += exofs.o diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig index 86194b2..70bae41 100644 --- a/fs/exofs/Kconfig +++ b/fs/exofs/Kconfig @@ -1,6 +1,10 @@ +config ORE + tristate + config EXOFS_FS tristate "exofs: OSD based file system support" depends on SCSI_OSD_ULD + select ORE help EXOFS is a file system that uses an OSD storage device, as its backing storage. diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 3a00008..25305af 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -43,6 +43,10 @@ #define ORE_DBGMSG2(M...) do {} while (0) /* #define ORE_DBGMSG2 ORE_DBGMSG */ +MODULE_AUTHOR("Boaz Harrosh "); +MODULE_DESCRIPTION("Objects Raid Engine ore.ko"); +MODULE_LICENSE("GPL"); + static u8 *_ios_cred(struct ore_io_state *ios, unsigned index) { return ios->comps->comps[index & ios->comps->single_comp].cred; @@ -84,12 +88,14 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps, *pios = ios; return 0; } +EXPORT_SYMBOL(ore_get_rw_state); int ore_get_io_state(struct ore_layout *layout, struct ore_components *comps, struct ore_io_state **ios) { return ore_get_rw_state(layout, comps, true, 0, 0, ios); } +EXPORT_SYMBOL(ore_get_io_state); void ore_put_io_state(struct ore_io_state *ios) { @@ -108,6 +114,7 @@ void ore_put_io_state(struct ore_io_state *ios) kfree(ios); } } +EXPORT_SYMBOL(ore_put_io_state); static void _sync_done(struct ore_io_state *ios, void *p) { @@ -236,6 +243,7 @@ int ore_check_io(struct ore_io_state *ios, u64 *resid) return acumulated_lin_err; } +EXPORT_SYMBOL(ore_check_io); /* * L - logical offset into the file @@ -487,6 +495,7 @@ int ore_create(struct ore_io_state *ios) out: return ret; } +EXPORT_SYMBOL(ore_create); int ore_remove(struct ore_io_state *ios) { @@ -511,6 +520,7 @@ int ore_remove(struct ore_io_state *ios) out: return ret; } +EXPORT_SYMBOL(ore_remove); static int _write_mirror(struct ore_io_state *ios, int cur_comp) { @@ -617,6 +627,7 @@ int ore_write(struct ore_io_state *ios) ret = ore_io_execute(ios); return ret; } +EXPORT_SYMBOL(ore_write); static int _read_mirror(struct ore_io_state *ios, unsigned cur_comp) { @@ -685,6 +696,7 @@ int ore_read(struct ore_io_state *ios) ret = ore_io_execute(ios); return ret; } +EXPORT_SYMBOL(ore_read); int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr) { @@ -706,6 +718,7 @@ int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr) return -EIO; } +EXPORT_SYMBOL(extract_attr_from_ios); static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp, struct osd_attr *attr) @@ -815,6 +828,8 @@ out: ore_put_io_state(ios); return ret; } +EXPORT_SYMBOL(ore_truncate); const struct osd_attr g_attr_logical_length = ATTR_DEF( OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); +EXPORT_SYMBOL(g_attr_logical_length); -- cgit v1.1 From 830c0f0edca67403d361fe976a25b17356c11f19 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 6 Aug 2011 22:41:50 -0700 Subject: vfs: renumber DCACHE_xyz flags, remove some stale ones Gcc tends to generate better code with small integers, including the DCACHE_xyz flag tests - so move the common ones to be first in the list. Also just remove the unused DCACHE_INOTIFY_PARENT_WATCHED and DCACHE_AUTOFS_PENDING values, their users no longer exists in the source tree. And add a "unlikely()" to the DCACHE_OP_COMPARE test, since we want the common case to be a nice straight-line fall-through. Signed-off-by: Linus Torvalds --- fs/dcache.c | 2 +- include/linux/dcache.h | 30 +++++++++++++----------------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index c83cae1..a88948b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1729,7 +1729,7 @@ seqretry: */ if (read_seqcount_retry(&dentry->d_seq, *seq)) goto seqretry; - if (parent->d_flags & DCACHE_OP_COMPARE) { + if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { if (parent->d_op->d_compare(parent, *inode, dentry, i, tlen, tname, name)) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d37d2a7..62157c0 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -180,12 +180,12 @@ struct dentry_operations { */ /* d_flags entries */ -#define DCACHE_AUTOFS_PENDING 0x0001 /* autofs: "under construction" */ -#define DCACHE_NFSFS_RENAMED 0x0002 - /* this dentry has been "silly renamed" and has to be deleted on the last - * dput() */ +#define DCACHE_OP_HASH 0x0001 +#define DCACHE_OP_COMPARE 0x0002 +#define DCACHE_OP_REVALIDATE 0x0004 +#define DCACHE_OP_DELETE 0x0008 -#define DCACHE_DISCONNECTED 0x0004 +#define DCACHE_DISCONNECTED 0x0010 /* This dentry is possibly not currently connected to the dcache tree, in * which case its parent will either be itself, or will have this flag as * well. nfsd will not use a dentry with this bit set, but will first @@ -196,22 +196,18 @@ struct dentry_operations { * dentry into place and return that dentry rather than the passed one, * typically using d_splice_alias. */ -#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ -#define DCACHE_RCUACCESS 0x0010 /* Entry has ever been RCU-visible */ -#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 - /* Parent inode is watched by inotify */ - -#define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */ -#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 - /* Parent inode is watched by some fsnotify listener */ +#define DCACHE_REFERENCED 0x0020 /* Recently used, don't discard. */ +#define DCACHE_RCUACCESS 0x0040 /* Entry has ever been RCU-visible */ #define DCACHE_CANT_MOUNT 0x0100 #define DCACHE_GENOCIDE 0x0200 -#define DCACHE_OP_HASH 0x1000 -#define DCACHE_OP_COMPARE 0x2000 -#define DCACHE_OP_REVALIDATE 0x4000 -#define DCACHE_OP_DELETE 0x8000 +#define DCACHE_NFSFS_RENAMED 0x1000 + /* this dentry has been "silly renamed" and has to be deleted on the last + * dput() */ +#define DCACHE_COOKIE 0x2000 /* For use by dcookie subsystem */ +#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x4000 + /* Parent inode is watched by some fsnotify listener */ #define DCACHE_MOUNTED 0x10000 /* is a mountpoint */ #define DCACHE_NEED_AUTOMOUNT 0x20000 /* handle automount on this dir */ -- cgit v1.1 From 3ddcd0569cd68f00f3beae9a7959b72918bb91f4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 6 Aug 2011 22:45:50 -0700 Subject: vfs: optimize inode cache access patterns The inode structure layout is largely random, and some of the vfs paths really do care. The path lookup in particular is already quite D$ intensive, and profiles show that accessing the 'inode->i_op->xyz' fields is quite costly. We already optimized the dcache to not unnecessarily load the d_op structure for members that are often NULL using the DCACHE_OP_xyz bits in dentry->d_flags, and this does something very similar for the inode ops that are used during pathname lookup. It also re-orders the fields so that the fields accessed by 'stat' are together at the beginning of the inode structure, and roughly in the order accessed. The effect of this seems to be in the 1-2% range for an empty kernel "make -j" run (which is fairly kernel-intensive, mostly in filename lookup), so it's visible. The numbers are fairly noisy, though, and likely depend a lot on exact microarchitecture. So there's more tuning to be done. Signed-off-by: Linus Torvalds --- fs/inode.c | 1 + fs/namei.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++------- fs/stat.c | 4 +-- include/linux/fs.h | 59 ++++++++++++++++++++++++++---------------- 4 files changed, 106 insertions(+), 34 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 5aab80d..73920d5 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -143,6 +143,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_op = &empty_iops; inode->i_fop = &empty_fops; inode->i_nlink = 1; + inode->i_opflags = 0; inode->i_uid = 0; inode->i_gid = 0; atomic_set(&inode->i_writecount, 0); diff --git a/fs/namei.c b/fs/namei.c index 3d607bd..4a98bf1 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -308,6 +308,26 @@ int generic_permission(struct inode *inode, int mask) return -EACCES; } +/* + * We _really_ want to just do "generic_permission()" without + * even looking at the inode->i_op values. So we keep a cache + * flag in inode->i_opflags, that says "this has not special + * permission function, use the fast case". + */ +static inline int do_inode_permission(struct inode *inode, int mask) +{ + if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) { + if (likely(inode->i_op->permission)) + return inode->i_op->permission(inode, mask); + + /* This gets set once for the inode lifetime */ + spin_lock(&inode->i_lock); + inode->i_opflags |= IOP_FASTPERM; + spin_unlock(&inode->i_lock); + } + return generic_permission(inode, mask); +} + /** * inode_permission - check for access rights to a given inode * @inode: inode to check permission on @@ -322,7 +342,7 @@ int inode_permission(struct inode *inode, int mask) { int retval; - if (mask & MAY_WRITE) { + if (unlikely(mask & MAY_WRITE)) { umode_t mode = inode->i_mode; /* @@ -339,11 +359,7 @@ int inode_permission(struct inode *inode, int mask) return -EACCES; } - if (inode->i_op->permission) - retval = inode->i_op->permission(inode, mask); - else - retval = generic_permission(inode, mask); - + retval = do_inode_permission(inode, mask); if (retval) return retval; @@ -1245,6 +1261,26 @@ static void terminate_walk(struct nameidata *nd) } } +/* + * Do we need to follow links? We _really_ want to be able + * to do this check without having to look at inode->i_op, + * so we keep a cache of "no, this doesn't need follow_link" + * for the common case. + */ +static inline int do_follow_link(struct inode *inode, int follow) +{ + if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { + if (likely(inode->i_op->follow_link)) + return follow; + + /* This gets set once for the inode lifetime */ + spin_lock(&inode->i_lock); + inode->i_opflags |= IOP_NOFOLLOW; + spin_unlock(&inode->i_lock); + } + return 0; +} + static inline int walk_component(struct nameidata *nd, struct path *path, struct qstr *name, int type, int follow) { @@ -1267,7 +1303,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path, terminate_walk(nd); return -ENOENT; } - if (unlikely(inode->i_op->follow_link) && follow) { + if (do_follow_link(inode, follow)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { terminate_walk(nd); @@ -1320,6 +1356,26 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd) } /* + * We really don't want to look at inode->i_op->lookup + * when we don't have to. So we keep a cache bit in + * the inode ->i_opflags field that says "yes, we can + * do lookup on this inode". + */ +static inline int can_lookup(struct inode *inode) +{ + if (likely(inode->i_opflags & IOP_LOOKUP)) + return 1; + if (likely(!inode->i_op->lookup)) + return 0; + + /* We do this once for the lifetime of the inode */ + spin_lock(&inode->i_lock); + inode->i_opflags |= IOP_LOOKUP; + spin_unlock(&inode->i_lock); + return 1; +} + +/* * Name resolution. * This is the basic name resolution function, turning a pathname into * the final dentry. We expect 'base' to be positive and a directory. @@ -1398,10 +1454,10 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (err) return err; } + if (can_lookup(nd->inode)) + continue; err = -ENOTDIR; - if (!nd->inode->i_op->lookup) - break; - continue; + break; /* here ends the main loop */ last_component: diff --git a/fs/stat.c b/fs/stat.c index 9610391..ba5316f 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -27,12 +27,12 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) stat->uid = inode->i_uid; stat->gid = inode->i_gid; stat->rdev = inode->i_rdev; + stat->size = i_size_read(inode); stat->atime = inode->i_atime; stat->mtime = inode->i_mtime; stat->ctime = inode->i_ctime; - stat->size = i_size_read(inode); - stat->blocks = inode->i_blocks; stat->blksize = (1 << inode->i_blkbits); + stat->blocks = inode->i_blocks; } EXPORT_SYMBOL(generic_fillattr); diff --git a/include/linux/fs.h b/include/linux/fs.h index 786b3b1..178cdb4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -738,22 +738,54 @@ static inline int mapping_writably_mapped(struct address_space *mapping) struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) +#define IOP_FASTPERM 0x0001 +#define IOP_LOOKUP 0x0002 +#define IOP_NOFOLLOW 0x0004 + +/* + * Keep mostly read-only and often accessed (especially for + * the RCU path lookup and 'stat' data) fields at the beginning + * of the 'struct inode' + */ struct inode { - /* RCU path lookup touches following: */ umode_t i_mode; + unsigned short i_opflags; uid_t i_uid; gid_t i_gid; + unsigned int i_flags; + +#ifdef CONFIG_FS_POSIX_ACL + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; +#endif + const struct inode_operations *i_op; struct super_block *i_sb; + struct address_space *i_mapping; - spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ - unsigned int i_flags; - unsigned long i_state; #ifdef CONFIG_SECURITY void *i_security; #endif - struct mutex i_mutex; + /* Stat data, not accessed from path walking */ + unsigned long i_ino; + unsigned int i_nlink; + dev_t i_rdev; + loff_t i_size; + struct timespec i_atime; + struct timespec i_mtime; + struct timespec i_ctime; + unsigned int i_blkbits; + blkcnt_t i_blocks; + +#ifdef __NEED_I_SIZE_ORDERED + seqcount_t i_size_seqcount; +#endif + + /* Misc */ + unsigned long i_state; + spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ + struct mutex i_mutex; unsigned long dirtied_when; /* jiffies of first dirtying */ @@ -765,25 +797,12 @@ struct inode { struct list_head i_dentry; struct rcu_head i_rcu; }; - unsigned long i_ino; atomic_t i_count; - unsigned int i_nlink; - dev_t i_rdev; - unsigned int i_blkbits; u64 i_version; - loff_t i_size; -#ifdef __NEED_I_SIZE_ORDERED - seqcount_t i_size_seqcount; -#endif - struct timespec i_atime; - struct timespec i_mtime; - struct timespec i_ctime; - blkcnt_t i_blocks; unsigned short i_bytes; atomic_t i_dio_count; const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ struct file_lock *i_flock; - struct address_space *i_mapping; struct address_space i_data; #ifdef CONFIG_QUOTA struct dquot *i_dquot[MAXQUOTAS]; @@ -806,10 +825,6 @@ struct inode { atomic_t i_readcount; /* struct files open RO */ #endif atomic_t i_writecount; -#ifdef CONFIG_FS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif void *i_private; /* fs or device private pointer */ }; -- cgit v1.1 From 206b1d09a56dcd2db1052245c4131879c410eaf8 Mon Sep 17 00:00:00 2001 From: Ari Savolainen Date: Sat, 6 Aug 2011 19:43:07 +0300 Subject: Fix POSIX ACL permission check After commit 3567866bf261: "RCUify freeing acls, let check_acl() go ahead in RCU mode if acl is cached" posix_acl_permission is being called with an unsupported flag and the permission check fails. This patch fixes the issue. Signed-off-by: Ari Savolainen Signed-off-by: Al Viro --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 4a98bf1..fc13609 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -186,7 +186,7 @@ static int check_acl(struct inode *inode, int mask) /* no ->get_acl() calls in RCU mode... */ if (acl == ACL_NOT_CACHED) return -ECHILD; - return posix_acl_permission(inode, acl, mask); + return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK); } acl = get_cached_acl(inode, ACL_TYPE_ACCESS); -- cgit v1.1 From 7813b94a54987571082ff19e9d87eabbfec23b4e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 7 Aug 2011 09:53:20 -0700 Subject: vfs: rename 'do_follow_link' to 'should_follow_link' Al points out that the do_follow_link() helper function really is misnamed - it's about whether we should try to follow a symlink or not, not about actually doing the following. Signed-off-by: Linus Torvalds --- fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index fc13609..2826db3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1267,7 +1267,7 @@ static void terminate_walk(struct nameidata *nd) * so we keep a cache of "no, this doesn't need follow_link" * for the common case. */ -static inline int do_follow_link(struct inode *inode, int follow) +static inline int should_follow_link(struct inode *inode, int follow) { if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { if (likely(inode->i_op->follow_link)) @@ -1303,7 +1303,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path, terminate_walk(nd); return -ENOENT; } - if (do_follow_link(inode, follow)) { + if (should_follow_link(inode, follow)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { terminate_walk(nd); -- cgit v1.1 From 3295514841c2112d94451ba5deaf54f5afb78ea9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 7 Aug 2011 18:55:11 +0100 Subject: fix rcu annotations noise in cred.h task->cred is declared as __rcu, and access to other tasks' ->cred is, indeed, protected. Access to current->cred does not need rcu_dereference() at all, since only the task itself can change its ->cred. sparse, of course, has no way of knowing that... Add force-cast in current_cred(), make current_fsuid() et.al. use it. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/cred.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/cred.h b/include/linux/cred.h index 48e82af..98f46ef 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -265,10 +265,11 @@ static inline void put_cred(const struct cred *_cred) /** * current_cred - Access the current task's subjective credentials * - * Access the subjective credentials of the current task. + * Access the subjective credentials of the current task. RCU-safe, + * since nobody else can modify it. */ #define current_cred() \ - (current->cred) + (*(__force struct cred **)¤t->cred) /** * __task_cred - Access a task's objective credentials @@ -307,7 +308,7 @@ static inline void put_cred(const struct cred *_cred) ({ \ struct user_struct *__u; \ struct cred *__cred; \ - __cred = (struct cred *) current_cred(); \ + __cred = current_cred(); \ __u = get_uid(__cred->user); \ __u; \ }) @@ -322,7 +323,7 @@ static inline void put_cred(const struct cred *_cred) ({ \ struct group_info *__groups; \ struct cred *__cred; \ - __cred = (struct cred *) current_cred(); \ + __cred = current_cred(); \ __groups = get_group_info(__cred->group_info); \ __groups; \ }) @@ -341,7 +342,7 @@ static inline void put_cred(const struct cred *_cred) #define current_cred_xxx(xxx) \ ({ \ - current->cred->xxx; \ + current_cred()->xxx; \ }) #define current_uid() (current_cred_xxx(uid)) -- cgit v1.1 From 4d4487140d34c1b9b321889d2d209321b0da6643 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 7 Aug 2011 14:07:03 -0700 Subject: arm: remove "optimized" SHA1 routines Since commit 1eb19a12bd22 ("lib/sha1: use the git implementation of SHA-1"), the ARM SHA1 routines no longer work. The reason? They depended on the larger 320-byte workspace, and now the sha1 workspace is just 16 words (64 bytes). So the assembly version would overwrite the stack randomly. The optimized asm version is also probably slower than the new improved C version, so there's no reason to keep it around. At least that was the case in git, where what appears to be the same assembly language version was removed two years ago because the optimized C BLK_SHA1 code was faster. Reported-and-tested-by: Joachim Eastwood Cc: Andreas Schwab Cc: Nicolas Pitre Signed-off-by: Linus Torvalds --- arch/arm/lib/Makefile | 2 +- arch/arm/lib/sha1.S | 211 -------------------------------------------------- 2 files changed, 1 insertion(+), 212 deletions(-) delete mode 100644 arch/arm/lib/sha1.S diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 59ff42d..cf73a7f 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -12,7 +12,7 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ strchr.o strrchr.o \ testchangebit.o testclearbit.o testsetbit.o \ ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ - ucmpdi2.o lib1funcs.o div64.o sha1.o \ + ucmpdi2.o lib1funcs.o div64.o \ io-readsb.o io-writesb.o io-readsl.o io-writesl.o mmu-y := clear_user.o copy_page.o getuser.o putuser.o diff --git a/arch/arm/lib/sha1.S b/arch/arm/lib/sha1.S deleted file mode 100644 index eb0edb8..0000000 --- a/arch/arm/lib/sha1.S +++ /dev/null @@ -1,211 +0,0 @@ -/* - * linux/arch/arm/lib/sha1.S - * - * SHA transform optimized for ARM - * - * Copyright: (C) 2005 by Nicolas Pitre - * Created: September 17, 2005 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * The reference implementation for this code is linux/lib/sha1.c - */ - -#include - - .text - - -/* - * void sha_transform(__u32 *digest, const char *in, __u32 *W) - * - * Note: the "in" ptr may be unaligned. - */ - -ENTRY(sha_transform) - - stmfd sp!, {r4 - r8, lr} - - @ for (i = 0; i < 16; i++) - @ W[i] = be32_to_cpu(in[i]); - -#ifdef __ARMEB__ - mov r4, r0 - mov r0, r2 - mov r2, #64 - bl memcpy - mov r2, r0 - mov r0, r4 -#else - mov r3, r2 - mov lr, #16 -1: ldrb r4, [r1], #1 - ldrb r5, [r1], #1 - ldrb r6, [r1], #1 - ldrb r7, [r1], #1 - subs lr, lr, #1 - orr r5, r5, r4, lsl #8 - orr r6, r6, r5, lsl #8 - orr r7, r7, r6, lsl #8 - str r7, [r3], #4 - bne 1b -#endif - - @ for (i = 0; i < 64; i++) - @ W[i+16] = ror(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 31); - - sub r3, r2, #4 - mov lr, #64 -2: ldr r4, [r3, #4]! - subs lr, lr, #1 - ldr r5, [r3, #8] - ldr r6, [r3, #32] - ldr r7, [r3, #52] - eor r4, r4, r5 - eor r4, r4, r6 - eor r4, r4, r7 - mov r4, r4, ror #31 - str r4, [r3, #64] - bne 2b - - /* - * The SHA functions are: - * - * f1(B,C,D) = (D ^ (B & (C ^ D))) - * f2(B,C,D) = (B ^ C ^ D) - * f3(B,C,D) = ((B & C) | (D & (B | C))) - * - * Then the sub-blocks are processed as follows: - * - * A' = ror(A, 27) + f(B,C,D) + E + K + *W++ - * B' = A - * C' = ror(B, 2) - * D' = C - * E' = D - * - * We therefore unroll each loop 5 times to avoid register shuffling. - * Also the ror for C (and also D and E which are successivelyderived - * from it) is applied in place to cut on an additional mov insn for - * each round. - */ - - .macro sha_f1, A, B, C, D, E - ldr r3, [r2], #4 - eor ip, \C, \D - add \E, r1, \E, ror #2 - and ip, \B, ip, ror #2 - add \E, \E, \A, ror #27 - eor ip, ip, \D, ror #2 - add \E, \E, r3 - add \E, \E, ip - .endm - - .macro sha_f2, A, B, C, D, E - ldr r3, [r2], #4 - add \E, r1, \E, ror #2 - eor ip, \B, \C, ror #2 - add \E, \E, \A, ror #27 - eor ip, ip, \D, ror #2 - add \E, \E, r3 - add \E, \E, ip - .endm - - .macro sha_f3, A, B, C, D, E - ldr r3, [r2], #4 - add \E, r1, \E, ror #2 - orr ip, \B, \C, ror #2 - add \E, \E, \A, ror #27 - and ip, ip, \D, ror #2 - add \E, \E, r3 - and r3, \B, \C, ror #2 - orr ip, ip, r3 - add \E, \E, ip - .endm - - ldmia r0, {r4 - r8} - - mov lr, #4 - ldr r1, .L_sha_K + 0 - - /* adjust initial values */ - mov r6, r6, ror #30 - mov r7, r7, ror #30 - mov r8, r8, ror #30 - -3: subs lr, lr, #1 - sha_f1 r4, r5, r6, r7, r8 - sha_f1 r8, r4, r5, r6, r7 - sha_f1 r7, r8, r4, r5, r6 - sha_f1 r6, r7, r8, r4, r5 - sha_f1 r5, r6, r7, r8, r4 - bne 3b - - ldr r1, .L_sha_K + 4 - mov lr, #4 - -4: subs lr, lr, #1 - sha_f2 r4, r5, r6, r7, r8 - sha_f2 r8, r4, r5, r6, r7 - sha_f2 r7, r8, r4, r5, r6 - sha_f2 r6, r7, r8, r4, r5 - sha_f2 r5, r6, r7, r8, r4 - bne 4b - - ldr r1, .L_sha_K + 8 - mov lr, #4 - -5: subs lr, lr, #1 - sha_f3 r4, r5, r6, r7, r8 - sha_f3 r8, r4, r5, r6, r7 - sha_f3 r7, r8, r4, r5, r6 - sha_f3 r6, r7, r8, r4, r5 - sha_f3 r5, r6, r7, r8, r4 - bne 5b - - ldr r1, .L_sha_K + 12 - mov lr, #4 - -6: subs lr, lr, #1 - sha_f2 r4, r5, r6, r7, r8 - sha_f2 r8, r4, r5, r6, r7 - sha_f2 r7, r8, r4, r5, r6 - sha_f2 r6, r7, r8, r4, r5 - sha_f2 r5, r6, r7, r8, r4 - bne 6b - - ldmia r0, {r1, r2, r3, ip, lr} - add r4, r1, r4 - add r5, r2, r5 - add r6, r3, r6, ror #2 - add r7, ip, r7, ror #2 - add r8, lr, r8, ror #2 - stmia r0, {r4 - r8} - - ldmfd sp!, {r4 - r8, pc} - -ENDPROC(sha_transform) - - .align 2 -.L_sha_K: - .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 - - -/* - * void sha_init(__u32 *buf) - */ - - .align 2 -.L_sha_initial_digest: - .word 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0 - -ENTRY(sha_init) - - str lr, [sp, #-4]! - adr r1, .L_sha_initial_digest - ldmia r1, {r1, r2, r3, ip, lr} - stmia r0, {r1, r2, r3, ip, lr} - ldr pc, [sp], #4 - -ENDPROC(sha_init) -- cgit v1.1 From f23c126bfabef88c201c8cc56bd3ccd9d59c51e0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 7 Aug 2011 15:49:11 -0700 Subject: arm: remove stale export of 'sha_transform' The generic library code already exports the generic function, this was left-over from the ARM-specific version that just got removed. Signed-off-by: Linus Torvalds --- arch/arm/kernel/armksyms.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index acca35a..aeef960 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -112,9 +112,6 @@ EXPORT_SYMBOL(__put_user_4); EXPORT_SYMBOL(__put_user_8); #endif - /* crypto hash */ -EXPORT_SYMBOL(sha_transform); - /* gcc lib functions */ EXPORT_SYMBOL(__ashldi3); EXPORT_SYMBOL(__ashrdi3); -- cgit v1.1 From fc97114b8d67819fadcc5af855da9a3e6a6a329b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 8 Aug 2011 00:26:50 +0200 Subject: sh: Fix boot crash related to SCI Commit d006199e72a9 ("serial: sh-sci: Regtype probing doesn't need to be fatal.") made sci_init_single() return when sci_probe_regmap() succeeds, although it should return when sci_probe_regmap() fails. This causes systems using the serial sh-sci driver to crash during boot. Fix the problem by using the right return condition. Signed-off-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- drivers/tty/serial/sh-sci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index d0a5623..2ec57b2 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1889,7 +1889,7 @@ static int __devinit sci_init_single(struct platform_device *dev, if (p->regtype == SCIx_PROBE_REGTYPE) { ret = sci_probe_regmap(p); - if (unlikely(!ret)) + if (unlikely(ret)) return ret; } -- cgit v1.1 From 322a8b034003c0d46d39af85bf24fee27b902f48 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 7 Aug 2011 18:23:30 -0700 Subject: Linux 3.1-rc1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index f676d15..b4ca4e1 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 -PATCHLEVEL = 0 +PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Sneaky Weasel # *DOCUMENTATION* -- cgit v1.1